_bol.py 2.6 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596
  1. import requests
  2. from bs4 import BeautifulSoup
  3. from isbnlib.dev import stdmeta
  4. from isbnlib.dev._exceptions import NoDataForSelectorError
  5. class Book:
  6. LANGUAGE_MAP = {"Engels": "en", "Nederlands": "nl", "Duits": "de"}
  7. def __init__(self, isbn, html):
  8. self.html = html
  9. self.specs = {}
  10. self.isbn = isbn
  11. self.parse_specs()
  12. @property
  13. def title(self):
  14. return self.html.find("h1", class_="pdp-header__title").text.strip()
  15. @property
  16. def authors(self):
  17. for key in ["Auteur", "Redacteur"]:
  18. authors = self.specs.get(key, None)
  19. if authors:
  20. return authors.split("\n")
  21. return []
  22. @property
  23. def publisher(self):
  24. return self.specs.get("Uitgever", None)
  25. @property
  26. def year(self):
  27. date = self.specs.get("Verschijningsdatum", None)
  28. if date:
  29. try:
  30. year = re.match(r'.*(\d{4}).*', date).group(1)
  31. except:
  32. return None
  33. else:
  34. return int(year)
  35. return None
  36. @property
  37. def language(self):
  38. lang = self.specs.get("Taal", None)
  39. if lang in self.LANGUAGE_MAP:
  40. return self.LANGUAGE_MAP[lang]
  41. else:
  42. print(f"Warning: language {lang} not found in mapping")
  43. return lang
  44. def as_record(self):
  45. return {"Title": self.title,
  46. "Authors": self.authors,
  47. "Year": self.year,
  48. "Publisher": self.publisher,
  49. "Language": self.language,
  50. "ISBN-13": self.isbn}
  51. def parse_specs(self):
  52. specs_lists = self.html.find_all("dl", class_="specs__list")
  53. for specs_list in specs_lists:
  54. keys = [el.text.strip() for el in specs_list.find_all("dt")]
  55. values = [el.text.strip() for el in specs_list.find_all("dd")]
  56. specs_dict = dict(zip(keys, values))
  57. self.specs.update(specs_dict)
  58. def __str__(self):
  59. return f"""Title: {self.title}
  60. Author: "; ".join({self.authors})
  61. Publisher: {self.publisher}
  62. Year: {self.year}
  63. Language: {self.language}
  64. ISBN: {self.isbn}"""
  65. def __repr__(self):
  66. return self.__str__()
  67. @classmethod
  68. def find(cls, isbn):
  69. url = "https://www.bol.com/nl/rnwy/search.html"
  70. r = requests.get(url, params={"Ntt": isbn})
  71. if r.status_code == 200:
  72. soup = BeautifulSoup(r.text, 'html.parser')
  73. return Book(isbn, soup)
  74. else:
  75. return None
  76. def query(isbn):
  77. book = Book.find(isbn)
  78. if book:
  79. return book.as_record()
  80. else:
  81. raise NoDataForSelectorError(isbn)