voedingscentrum.py 5.6 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126
  1. from argparse import ArgumentParser
  2. from datetime import date
  3. from getpass import getuser
  4. import re
  5. from xml.etree import ElementTree
  6. from bs4 import BeautifulSoup
  7. import requests
  8. import psycopg2
  9. parser = ArgumentParser(description="Tool om data uit voedingscentrum.nl te importeren in PostgreSQL.")
  10. parser.add_argument('--email', type=str, required=True, help="Je emailadres voor voedingscentrum.nl")
  11. parser.add_argument('--password', type=str, required=True, help="Je wachtwoord voor voedingscentrum.nl")
  12. parser.add_argument('--dbhost', type=str, default='localhost', help="Je PostgreSQL hostname (standaard localhost)")
  13. parser.add_argument('--dbuser', type=str, default=getuser(), help=f"Je PostgreSQL username (standaard {getuser()})")
  14. parser.add_argument('--dbpassword', type=str, default=None, help="Je PostgreSQL password")
  15. parser.add_argument('--dbport', type=int, default=5432, help="Je PostgreSQL poort (standaard 5432)")
  16. parser.add_argument('--dbname', type=str, default='voeding', help="De naam van je PostgreSQL database (standaard voeding)")
  17. parser.add_argument('--from-date', type=str, help="Datum vanaf wanneer je gegevens wilt ophalen (standaard vandaag)")
  18. parser.add_argument('--to-date', type=str, help="Datum tot wanneer je gegevens wilt ophalen (standaard vandaag)")
  19. args = parser.parse_args()
  20. def parse_date(date_string):
  21. matches = re.match(r'(\d{4})-(\d{2})-(\d{2})', date_string)
  22. if matches:
  23. year, month, day = map(int, matches.groups())
  24. return date(year, month, day)
  25. matches = re.match(r'(\d{1,2})-(\d{1,2})-(\d{4})', date_string)
  26. if matches:
  27. day, month, year = map(int, matches.groups())
  28. return date(year, month, day)
  29. def snake_case(s):
  30. s = re.sub(r'([a-z])([A-Z])', r'\1_\2', s)
  31. return s.lower()
  32. def download_data():
  33. # Laad de startpagina en haal de verborgen form-data op
  34. s = requests.Session()
  35. r = s.get("https://mijn.voedingscentrum.nl/nl/login/")
  36. soup = BeautifulSoup(r.text, 'html.parser')
  37. data = {element['name']: element.get('value', None) for element in soup.find_all('input')}
  38. # Voeg persoonlijke gegevens toe om in te loggen
  39. data['ctl00$ctl18$txtUser'] = args.email
  40. data['ctl00$ctl18$txtPassword'] = args.password
  41. data['__EVENTTARGET'] = 'ctl00$ctl18$lbLogin'
  42. data.pop('ctl00$ctl18$cbxRemember')
  43. # Log in
  44. s.post("https://mijn.voedingscentrum.nl/nl/login/", data=data)
  45. # Laad de overichtpagina en haal de verborgen form-data op
  46. r = s.get("https://mijn.voedingscentrum.nl/nl/dashboard/eetmeter/overzicht/")
  47. soup = BeautifulSoup(r.text, 'html.parser')
  48. data = {element['name']: element.get('value', None) for element in soup.find_all('input')}
  49. data['__EVENTTARGET'] = "ctl00$ctl20$lbXML"
  50. data['ctl00$ctl20$txtDateStart'] = DATE_FROM.strftime("%d-%m-%Y")
  51. data['ctl00$ctl20$txtDateEnd'] = DATE_TO.strftime("%d-%m-%Y")
  52. # Download de XML
  53. r = s.post("https://mijn.voedingscentrum.nl/nl/dashboard/eetmeter/overzicht/", data=data)
  54. r.encoding = 'utf-8'
  55. return ElementTree.fromstring(r.text)
  56. def extract_consumpties(consumpties):
  57. for consumptie in consumpties:
  58. product = consumptie.find('Product')
  59. product_guid = product.find('Guid').text
  60. product_naam = product.find('Naam').text
  61. nutrienten = consumptie.find('Nutrienten')
  62. product_nutrients = {snake_case(n.tag) + "_" + n.attrib['Eenheid']: float(n.attrib.get('WaardePer100Gram', 0))
  63. for n in nutrienten}
  64. product_dict= {'guid': product_guid,
  65. 'naam': product_naam}
  66. product_dict.update(product_nutrients)
  67. consumptie_eenheden = float(product.find('Eenheid').find('Aantal').text)
  68. consumptie_grampereenheid = float(product.find('Eenheid').attrib['GramPerEenheid'])
  69. consumptie_hoeveelheid = consumptie_eenheden * consumptie_grampereenheid
  70. consumptie_datum = date(*(int(consumptie.find('Datum').find(part).text) for part in ['Jaar', 'Maand', 'Dag']))
  71. consumptie_periode = consumptie.attrib['Periode']
  72. consumptie_dict = {'datum': consumptie_datum,
  73. 'product_guid': product_guid,
  74. 'periode': consumptie_periode,
  75. 'hoeveelheid': consumptie_hoeveelheid}
  76. yield product_dict, consumptie_dict
  77. def insert_product(cursor, guid, naam, **nutrients):
  78. cursor.execute("SELECT * FROM producten WHERE guid = %s", (guid,))
  79. if cursor.rowcount > 0:
  80. return
  81. nutrients_cols = ", ".join(nutrients.keys())
  82. placeholders = ", ".join(["%s"] * (len(nutrients.keys()) + 2))
  83. cursor.execute(f"INSERT INTO producten (guid, naam, {nutrients_cols}) VALUES ({placeholders})",
  84. (guid, naam, *nutrients.values()))
  85. def insert_consumptie(cursor, datum, product_guid, periode, hoeveelheid):
  86. cursor.execute(f"INSERT INTO consumpties_{DATABASE['user']} (datum, product_guid, periode, hoeveelheid_g) VALUES (%s, %s, %s, %s)",
  87. (datum, product_guid, periode, hoeveelheid))
  88. DATABASE = {'host': args.dbhost,
  89. 'user': args.dbuser,
  90. 'password': args.dbpassword,
  91. 'dbname': args.dbname,
  92. 'port': args.dbport}
  93. DATE_FROM = parse_date(args.from_date) if args.from_date else date.today()
  94. DATE_TO = parse_date(args.to_date) if args.to_date else date.today()
  95. conn = psycopg2.connect(**DATABASE)
  96. cursor = conn.cursor()
  97. cursor.execute(f"DELETE FROM consumpties_{args.dbuser} WHERE datum BETWEEN %s AND %s", (DATE_FROM, DATE_TO))
  98. data = download_data()
  99. for product, consumptie in extract_consumpties(data):
  100. insert_product(cursor, **product)
  101. insert_consumptie(cursor, **consumptie)
  102. conn.commit()
  103. conn.close()