123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960 |
- from lxml import etree
- import urllib.request
- import os
- from argparse import ArgumentParser
- from datetime import datetime
- parser = ArgumentParser()
- parser.add_argument("url", type=str)
- parser.add_argument("output", type=str)
- args = parser.parse_args()
- HEADER = """<!DOCTYPE HTML>
- <html>
- <head>
- <meta charset="utf-8">
- <title>RSS Feed</title>
- <style>
- * { font-family: sans-serif; line-height: 1.6em;}
- a:link { color: black }
- a:visited {color: #666 }
- </style>
- </head>
- <body>"""
- URL = args.url
- DIR = args.output
- if not os.path.exists(DIR):
- os.makedirs(DIR)
- os.makedirs(os.path.join(DIR, "articles"), exist_ok=True)
- FILE = open(os.path.join(DIR, "index.html"), "w")
- print(HEADER, file=FILE)
- print("<h1>RSS Feed</h1>", file=FILE)
- print(f"<p>Bijgewerkt: {datetime.now().strftime('%d-%m-%Y %H:%M uur')}", file=FILE)
- print("<ul>", file=FILE)
- feed = urllib.request.urlopen("https://finetuned.nl/freshrss/p/i/?a=rss&rid=610659e817a51&hours=168")
- feed = etree.parse(feed)
- feed = feed.getroot()
- channel = feed.find("channel")
- for item in channel.iterfind("item"):
- title = item.find("title").text
- guid = item.find("guid").text
- contents = item.find("description").text
- link = item.find("link").text
- print(f"<li><a href=\"articles/{guid}.html\">{title}</a></li>", file=FILE)
- with open(os.path.join(DIR, "articles", f"{guid}.html"), "w") as file:
- print(HEADER, file=file)
- print(f"<p>Original article: <a href=\"{link}\">{link}</a></p>", file=file)
- print(f"<h1>{title}</h1>", file=file)
- print(contents, file=file)
- print("</body>", file=file)
- print("</html>", file=file)
- print("</ul>", file=FILE)
- print("</body>", file=FILE)
- print("</html>", file=FILE)
- FILE.close()
|