from argparse import ArgumentParser from datetime import date from getpass import getuser import re from xml.etree import ElementTree from bs4 import BeautifulSoup import requests import psycopg2 parser = ArgumentParser(description="Tool om data uit voedingscentrum.nl te importeren in PostgreSQL.") parser.add_argument('--email', type=str, required=True, help="Je emailadres voor voedingscentrum.nl") parser.add_argument('--password', type=str, required=True, help="Je wachtwoord voor voedingscentrum.nl") parser.add_argument('--dbhost', type=str, default='localhost', help="Je PostgreSQL hostname (standaard localhost)") parser.add_argument('--dbuser', type=str, default=getuser(), help=f"Je PostgreSQL username (standaard {getuser()})") parser.add_argument('--dbpassword', type=str, default=None, help="Je PostgreSQL password") parser.add_argument('--dbport', type=int, default=5432, help="Je PostgreSQL poort (standaard 5432)") parser.add_argument('--dbname', type=str, default='voeding', help="De naam van je PostgreSQL database (standaard voeding)") parser.add_argument('--from-date', type=str, help="Datum vanaf wanneer je gegevens wilt ophalen (standaard vandaag)") parser.add_argument('--to-date', type=str, help="Datum tot wanneer je gegevens wilt ophalen (standaard vandaag)") args = parser.parse_args() def parse_date(date_string): matches = re.match(r'(\d{4})-(\d{2})-(\d{2})', date_string) if matches: year, month, day = map(int, matches.groups()) return date(year, month, day) matches = re.match(r'(\d{1,2})-(\d{1,2})-(\d{4})', date_string) if matches: day, month, year = map(int, matches.groups()) return date(year, month, day) def snake_case(s): s = re.sub(r'([a-z])([A-Z])', r'\1_\2', s) return s.lower() def download_data(): # Laad de startpagina en haal de verborgen form-data op s = requests.Session() r = s.get("https://mijn.voedingscentrum.nl/nl/login/") soup = BeautifulSoup(r.text, 'html.parser') data = {element['name']: element.get('value', None) for element in soup.find_all('input')} # Voeg persoonlijke gegevens toe om in te loggen data['ctl00$ctl18$txtUser'] = args.email data['ctl00$ctl18$txtPassword'] = args.password data['__EVENTTARGET'] = 'ctl00$ctl18$lbLogin' data.pop('ctl00$ctl18$cbxRemember') # Log in s.post("https://mijn.voedingscentrum.nl/nl/login/", data=data) # Laad de overichtpagina en haal de verborgen form-data op r = s.get("https://mijn.voedingscentrum.nl/nl/dashboard/eetmeter/overzicht/") soup = BeautifulSoup(r.text, 'html.parser') data = {element['name']: element.get('value', None) for element in soup.find_all('input')} data['__EVENTTARGET'] = "ctl00$ctl20$lbXML" data['ctl00$ctl20$txtDateStart'] = DATE_FROM.strftime("%d-%m-%Y") data['ctl00$ctl20$txtDateEnd'] = DATE_TO.strftime("%d-%m-%Y") # Download de XML r = s.post("https://mijn.voedingscentrum.nl/nl/dashboard/eetmeter/overzicht/", data=data) r.encoding = 'utf-8' return ElementTree.fromstring(r.text) def extract_consumpties(consumpties): for consumptie in consumpties: product = consumptie.find('Product') product_guid = product.find('Guid').text product_naam = product.find('Naam').text nutrienten = consumptie.find('Nutrienten') product_nutrients = {snake_case(n.tag) + "_" + n.attrib['Eenheid']: float(n.attrib.get('WaardePer100Gram', 0)) for n in nutrienten} product_dict= {'guid': product_guid, 'naam': product_naam} product_dict.update(product_nutrients) consumptie_eenheden = float(product.find('Eenheid').find('Aantal').text) consumptie_grampereenheid = float(product.find('Eenheid').attrib['GramPerEenheid']) consumptie_hoeveelheid = consumptie_eenheden * consumptie_grampereenheid consumptie_datum = date(*(int(consumptie.find('Datum').find(part).text) for part in ['Jaar', 'Maand', 'Dag'])) consumptie_periode = consumptie.attrib['Periode'] consumptie_dict = {'datum': consumptie_datum, 'product_guid': product_guid, 'periode': consumptie_periode, 'hoeveelheid': consumptie_hoeveelheid} yield product_dict, consumptie_dict def insert_product(cursor, guid, naam, **nutrients): cursor.execute("SELECT * FROM producten WHERE guid = %s", (guid,)) if cursor.rowcount > 0: return nutrients_cols = ", ".join(nutrients.keys()) placeholders = ", ".join(["%s"] * (len(nutrients.keys()) + 2)) cursor.execute(f"INSERT INTO producten (guid, naam, {nutrients_cols}) VALUES ({placeholders})", (guid, naam, *nutrients.values())) def insert_consumptie(cursor, datum, product_guid, periode, hoeveelheid): cursor.execute(f"INSERT INTO consumpties_{DATABASE['user']} (datum, product_guid, periode, hoeveelheid_g) VALUES (%s, %s, %s, %s)", (datum, product_guid, periode, hoeveelheid)) DATABASE = {'host': args.dbhost, 'user': args.dbuser, 'password': args.dbpassword, 'dbname': args.dbname, 'port': args.dbport} DATE_FROM = parse_date(args.from_date) if args.from_date else date.today() DATE_TO = parse_date(args.to_date) if args.to_date else date.today() conn = psycopg2.connect(**DATABASE) cursor = conn.cursor() cursor.execute(f"DELETE FROM consumpties_{args.dbuser} WHERE datum BETWEEN %s AND %s", (DATE_FROM, DATE_TO)) data = download_data() for product, consumptie in extract_consumpties(data): insert_product(cursor, **product) insert_consumptie(cursor, **consumptie) conn.commit() conn.close()