Feed date scraper for Gemini (protocol)
Diffstat (limited to 'zachwalk.py')
| -rwxr-xr-x | zachwalk.py | 32 |
1 files changed, 12 insertions, 20 deletions
diff --git a/zachwalk.py b/zachwalk.py index da03aaf..a77acad 100755 --- a/zachwalk.py +++ b/zachwalk.py @@ -2,7 +2,7 @@ """load links from a gemini file and output the latest date for each one""" __author__ = "Zach DeCook" __email__ = "zachdecook@librem.one" -__copyright__ = "Copyright (C) 2021 Zach DeCook" +__copyright__ = "Copyright (C) 2021-2022 Zach DeCook" __license__ = "AGPL" __version__ = "3" @@ -10,9 +10,8 @@ import sys import socket import ssl import fileinput -from dateutil.parser import parse -DEFAULT = parse('1970-01-01').date() +DEFAULT = "1970-01-01" def getnewestdate(url): """load the url, and find the newest date listed in a link""" @@ -39,28 +38,18 @@ def gnd(fp): line=line.decode('UTF-8') if line.strip()[0:2] == '=>': desc =getdesc(line) - try: - date = parse(desc[0:10],fuzzy=True).date() - if date > nd: - nd = date - except: - try: - date = parse(desc,fuzzy=True).date() - if date > nd: - nd = date - except: - pass - pass + date = desc[0:10] + # basic check if it is a date + if isDate(date) and date > nd: + nd = date return nd def replaceDateIfNewer(desc, newestdate): - try: - tup = parse(desc, fuzzy_with_tokens=True) - date = tup[0].date() - except: + date = desc[0:10] + if not isDate(date): return f'{newestdate} - {desc}' if newestdate > date: - return str(newestdate) + ' '.join(tup[1]) + return str(newestdate) + desc[10:] return desc def main(): @@ -80,6 +69,9 @@ def main(): def isAbsGeminiUrl(url): return url[0:9] == 'gemini://' +def isDate(date): + return date[0:4].isnumeric() and date[5:7].isnumeric() and date[8:].isnumeric() + def getdesc(line): return ' '.join(line[2:].strip().replace(' ',' ').split(' ')[1:]).lstrip() |