Feed date scraper for Gemini (protocol)
date: Check all link lines
(rather than just the first one with a date)
Zach DeCook 2022-04-26
parent 17d4fa5 · commit 42a9c82
-rwxr-xr-xtest_zachwalk.py2
-rwxr-xr-xzachwalk.py9
2 files changed, 7 insertions, 4 deletions
diff --git a/test_zachwalk.py b/test_zachwalk.py
index 3860752..6a3b579 100755
--- a/test_zachwalk.py
+++ b/test_zachwalk.py
@@ -14,7 +14,7 @@ def main():
assert zachwalk.gnd(['=> geminitoepub.gmi 2021-02-27 Gemini to Epub']) == parse('2021-02-27').date()
assert zachwalk.gnd(['=> m5paper.gmi 2021-01-31 M5Paper']) == parse('2021-01-31').date()
assert zachwalk.gnd(['=> gemini://fossphones.com/03-29-22.gmi 2022-03-29 Linux Phone News - March 29, 2022']) == parse('2022-03-29').date()
-
+ assert zachwalk.gnd(['=> pizza.gmi 1999-01-01 yum', '=> crepes.gmi 2099-01-01 cool']) == parse('2099-01-01').date()
assert zachwalk.getdesc('=> m5paper.gmi 2021-01-31 M5Paper') == '2021-01-31 M5Paper'
diff --git a/zachwalk.py b/zachwalk.py
index 1588449..da03aaf 100755
--- a/zachwalk.py
+++ b/zachwalk.py
@@ -33,6 +33,7 @@ def getnewestdate(url):
# TODO: something special if status is not 2x
return gnd(fp)
def gnd(fp):
+ nd = DEFAULT
for line in fp:
if type(line) != str:
line=line.decode('UTF-8')
@@ -40,15 +41,17 @@ def gnd(fp):
desc =getdesc(line)
try:
date = parse(desc[0:10],fuzzy=True).date()
- return date
+ if date > nd:
+ nd = date
except:
try:
date = parse(desc,fuzzy=True).date()
- return date
+ if date > nd:
+ nd = date
except:
pass
pass
- return DEFAULT
+ return nd
def replaceDateIfNewer(desc, newestdate):
try: