voedingscentrum.py 6.3 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153
  1. #!./python_env/bin/python3
  2. from argparse import ArgumentParser
  3. from datetime import date
  4. from getpass import getuser
  5. import re
  6. from xml.etree import ElementTree
  7. from bs4 import BeautifulSoup
  8. import requests
  9. import psycopg2
  10. parser = ArgumentParser(description="Tool om data uit voedingscentrum.nl te importeren in PostgreSQL.")
  11. parser.add_argument('--email', type=str, required=True, help="Je emailadres voor voedingscentrum.nl")
  12. parser.add_argument('--password', type=str, required=True, help="Je wachtwoord voor voedingscentrum.nl")
  13. parser.add_argument('--dbhost', type=str, default='localhost', help="Je PostgreSQL hostname (standaard localhost)")
  14. parser.add_argument('--dbuser', type=str, default=getuser(), help=f"Je PostgreSQL username (standaard {getuser()})")
  15. parser.add_argument('--dbpassword', type=str, default=None, help="Je PostgreSQL password")
  16. parser.add_argument('--dbport', type=int, default=5432, help="Je PostgreSQL poort (standaard 5432)")
  17. parser.add_argument('--dbname', type=str, default='voeding', help="De naam van je PostgreSQL database (standaard voeding)")
  18. parser.add_argument('--from-date', type=str, help="Datum vanaf wanneer je gegevens wilt ophalen")
  19. parser.add_argument('--to-date', type=str, help="Datum tot wanneer je gegevens wilt ophalen")
  20. args = parser.parse_args()
  21. def parse_date(date_string):
  22. matches = re.match(r'(\d{4})-(\d{2})-(\d{2})', date_string)
  23. if matches:
  24. year, month, day = map(int, matches.groups())
  25. return date(year, month, day)
  26. matches = re.match(r'(\d{1,2})-(\d{1,2})-(\d{4})', date_string)
  27. if matches:
  28. day, month, year = map(int, matches.groups())
  29. return date(year, month, day)
  30. def snake_case(s):
  31. s = re.sub(r'([a-z])([A-Z])', r'\1_\2', s)
  32. return s.lower()
  33. def download_data():
  34. # Laad de startpagina en haal de verborgen form-data op
  35. s = requests.Session()
  36. r = s.get("https://mijn.voedingscentrum.nl/nl/login/")
  37. soup = BeautifulSoup(r.text, 'html.parser')
  38. data = {element['name']: element.get('value', None) for element in soup.find_all('input')}
  39. # Voeg persoonlijke gegevens toe om in te loggen
  40. data['ctl00$ctl00$main$ctl00$txtUser'] = args.email
  41. data['ctl00$ctl00$main$ctl00$txtPassword'] = args.password
  42. data['__EVENTTARGET'] = 'ctl00$ctl00$main$ctl00$lbLogin'
  43. data.pop('ctl00$ctl00$main$ctl00$cbxRemember')
  44. # Log in
  45. s.post("https://mijn.voedingscentrum.nl/nl/login/", data=data)
  46. # Laad de overichtpagina en haal de verborgen form-data op
  47. r = s.get("https://mijn.voedingscentrum.nl/nl/dashboard/eetmeter/overzicht/")
  48. soup = BeautifulSoup(r.text, 'html.parser')
  49. data = {element['name']: element.get('value', None) for element in soup.find_all('input')}
  50. data['__EVENTTARGET'] = "ctl00$ctl20$lbXML"
  51. data['ctl00$ctl20$txtDateStart'] = DATE_FROM.strftime("%d-%m-%Y")
  52. data['ctl00$ctl20$txtDateEnd'] = DATE_TO.strftime("%d-%m-%Y")
  53. # Download de XML
  54. r = s.post("https://mijn.voedingscentrum.nl/nl/dashboard/eetmeter/overzicht/", data=data)
  55. if r.status_code != 200:
  56. print(f"No data was available between {DATE_FROM} and {DATE_TO} (error code {r.status_code}).")
  57. return None
  58. r.encoding = 'utf-8'
  59. return ElementTree.fromstring(r.text)
  60. def extract_consumpties(consumpties):
  61. for consumptie in consumpties:
  62. product = consumptie.find('Product')
  63. product_guid = product.find('Guid').text
  64. product_naam = product.find('Naam').text
  65. nutrienten = consumptie.find('Nutrienten')
  66. product_nutrients = {snake_case(n.tag) + "_" + n.attrib['Eenheid']: float(n.attrib.get('WaardePer100Gram', 0))
  67. for n in nutrienten}
  68. product_dict= {'guid': product_guid,
  69. 'naam': product_naam}
  70. product_dict.update(product_nutrients)
  71. consumptie_eenheden = float(product.find('Eenheid').find('Aantal').text)
  72. consumptie_grampereenheid = float(product.find('Eenheid').attrib['GramPerEenheid'])
  73. consumptie_hoeveelheid = consumptie_eenheden * consumptie_grampereenheid
  74. consumptie_datum = date(*(int(consumptie.find('Datum').find(part).text) for part in ['Jaar', 'Maand', 'Dag']))
  75. consumptie_periode = consumptie.attrib['Periode']
  76. consumptie_dict = {'datum': consumptie_datum,
  77. 'product_guid': product_guid,
  78. 'periode': consumptie_periode,
  79. 'hoeveelheid': consumptie_hoeveelheid}
  80. yield product_dict, consumptie_dict
  81. def insert_product(cursor, guid, naam, **nutrients):
  82. cursor.execute("SELECT * FROM producten WHERE guid = %s", (guid,))
  83. if cursor.rowcount > 0:
  84. return
  85. nutrients_cols = ", ".join(nutrients.keys())
  86. placeholders = ", ".join(["%s"] * (len(nutrients.keys()) + 2))
  87. cursor.execute(f"INSERT INTO producten (guid, naam, {nutrients_cols}) VALUES ({placeholders})",
  88. (guid, naam, *nutrients.values()))
  89. def insert_consumptie(cursor, datum, product_guid, periode, hoeveelheid):
  90. cursor.execute(f"INSERT INTO consumpties_{DATABASE['user']} (datum, product_guid, periode, hoeveelheid_g) VALUES (%s, %s, %s, %s)",
  91. (datum, product_guid, periode, hoeveelheid))
  92. DATABASE = {'host': args.dbhost,
  93. 'user': args.dbuser,
  94. 'password': args.dbpassword,
  95. 'dbname': args.dbname,
  96. 'port': args.dbport}
  97. conn = psycopg2.connect(**DATABASE)
  98. cursor = conn.cursor()
  99. # Controleer van welke datum we voor het laatst gegevens hebben
  100. cursor.execute(f"SELECT MAX(datum) FROM consumpties_{args.dbuser}")
  101. if cursor.rowcount == 1:
  102. last_known_date = cursor.fetchone()[0]
  103. else:
  104. last_known_date = None
  105. # Bepaal de datum range waarbinnen we gaan zoeken
  106. if args.from_date:
  107. DATE_FROM = parse_date(args.from_date)
  108. elif last_known_date and last_known_date <= date.today():
  109. DATE_FROM = last_known_date
  110. else:
  111. DATE_FROM = date.today()
  112. DATE_TO = parse_date(args.to_date) if args.to_date else date.today()
  113. if DATE_TO < DATE_FROM:
  114. DATE_TO = DATE_FROM
  115. data = download_data()
  116. if not data:
  117. quit(0)
  118. conn = psycopg2.connect(**DATABASE)
  119. cursor = conn.cursor()
  120. cursor.execute(f"DELETE FROM consumpties_{args.dbuser} WHERE datum BETWEEN %s AND %s", (DATE_FROM, DATE_TO))
  121. for product, consumptie in extract_consumpties(data):
  122. insert_product(cursor, **product)
  123. insert_consumptie(cursor, **consumptie)
  124. conn.commit()
  125. conn.close()