_bol.py 2.6 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495
  1. import requests
  2. from bs4 import BeautifulSoup
  3. from isbnlib.dev import stdmeta
  4. from isbnlib.dev._exceptions import NoDataForSelectorError
  5. class Book:
  6. LANGUAGE_MAP = {"Engels": "en", "Nederlands": "nl", "Duits": "de"}
  7. def __init__(self, isbn, html):
  8. self.html = html
  9. self.specs = {}
  10. self.isbn = isbn
  11. self.parse_specs()
  12. @property
  13. def title(self):
  14. return self.html.find("h1", class_="pdp-header__title").text.strip()
  15. @property
  16. def authors(self):
  17. authors = self.specs.get("Auteur", None)
  18. if authors:
  19. return self.specs["Auteur"].split("\n")
  20. return None
  21. @property
  22. def publisher(self):
  23. return self.specs.get("Uitgever", None)
  24. @property
  25. def year(self):
  26. date = self.specs.get("Verschijningsdatum", None)
  27. if date:
  28. try:
  29. year = re.match(r'.*(\d{4}).*', date).group(1)
  30. except:
  31. return None
  32. else:
  33. return int(year)
  34. return None
  35. @property
  36. def language(self):
  37. lang = self.specs.get("Taal", None)
  38. if lang in self.LANGUAGE_MAP:
  39. return self.LANGUAGE_MAP[lang]
  40. else:
  41. print(f"Warning: language {lang} not found in mapping")
  42. return lang
  43. def as_record(self):
  44. return {"Title": self.title,
  45. "Authors": self.authors,
  46. "Year": self.year,
  47. "Publisher": self.publisher,
  48. "Language": self.language,
  49. "ISBN-13": self.isbn}
  50. def parse_specs(self):
  51. specs_lists = self.html.find_all("dl", class_="specs__list")
  52. for specs_list in specs_lists:
  53. keys = [el.text.strip() for el in specs_list.find_all("dt")]
  54. values = [el.text.strip() for el in specs_list.find_all("dd")]
  55. specs_dict = dict(zip(keys, values))
  56. self.specs.update(specs_dict)
  57. def __str__(self):
  58. return f"""Title: {self.title}
  59. Author: "; ".join({self.authors})
  60. Publisher: {self.publisher}
  61. Year: {self.year}
  62. Language: {self.language}
  63. ISBN: {self.isbn}"""
  64. def __repr__(self):
  65. return self.__str__()
  66. @classmethod
  67. def find(cls, isbn):
  68. url = "https://www.bol.com/nl/rnwy/search.html"
  69. r = requests.get(url, params={"Ntt": isbn})
  70. if r.status_code == 200:
  71. soup = BeautifulSoup(r.text, 'html.parser')
  72. return Book(isbn, soup)
  73. else:
  74. return None
  75. def query(isbn):
  76. book = Book.find(isbn)
  77. if book:
  78. return book.as_record()
  79. else:
  80. raise NoDataForSelectorError(isbn)