Feed date scraper for Gemini (protocol)
date: Check all link lines
(rather than just the first one with a date)
| -rwxr-xr-x | test_zachwalk.py | 2 | ||||
| -rwxr-xr-x | zachwalk.py | 9 |
2 files changed, 7 insertions, 4 deletions
diff --git a/test_zachwalk.py b/test_zachwalk.py index 3860752..6a3b579 100755 --- a/test_zachwalk.py +++ b/test_zachwalk.py @@ -14,7 +14,7 @@ def main(): assert zachwalk.gnd(['=> geminitoepub.gmi 2021-02-27 Gemini to Epub']) == parse('2021-02-27').date() assert zachwalk.gnd(['=> m5paper.gmi 2021-01-31 M5Paper']) == parse('2021-01-31').date() assert zachwalk.gnd(['=> gemini://fossphones.com/03-29-22.gmi 2022-03-29 Linux Phone News - March 29, 2022']) == parse('2022-03-29').date() - + assert zachwalk.gnd(['=> pizza.gmi 1999-01-01 yum', '=> crepes.gmi 2099-01-01 cool']) == parse('2099-01-01').date() assert zachwalk.getdesc('=> m5paper.gmi 2021-01-31 M5Paper') == '2021-01-31 M5Paper' diff --git a/zachwalk.py b/zachwalk.py index 1588449..da03aaf 100755 --- a/zachwalk.py +++ b/zachwalk.py @@ -33,6 +33,7 @@ def getnewestdate(url): # TODO: something special if status is not 2x return gnd(fp) def gnd(fp): + nd = DEFAULT for line in fp: if type(line) != str: line=line.decode('UTF-8') @@ -40,15 +41,17 @@ def gnd(fp): desc =getdesc(line) try: date = parse(desc[0:10],fuzzy=True).date() - return date + if date > nd: + nd = date except: try: date = parse(desc,fuzzy=True).date() - return date + if date > nd: + nd = date except: pass pass - return DEFAULT + return nd def replaceDateIfNewer(desc, newestdate): try: |