Feed date scraper for Gemini (protocol)
date parsing: Better accept standard subscribable pages spec
Zach DeCook 2022-04-26
parent 5e2cc3f · commit 17d4fa5
-rwxr-xr-xtest_zachwalk.py6
-rwxr-xr-xzachwalk.py11
2 files changed, 11 insertions, 6 deletions
diff --git a/test_zachwalk.py b/test_zachwalk.py
index 454dc56..3860752 100755
--- a/test_zachwalk.py
+++ b/test_zachwalk.py
@@ -7,12 +7,14 @@ def main():
assert zachwalk.gnd([b'2021-01-31']) == zachwalk.DEFAULT
assert zachwalk.gnd([b'=> path.gmi 2021-01-31 - my post']) == parse('2021-01-31').date()
assert zachwalk.gnd([b'=> 2020/11/25/hello-gemini.gmi 2020-11-25 - Hello, Gemini!']) == parse('2020-11-25').date()
- assert zachwalk.gnd([b'=> gemini://drewdevault.com/2020/09/21/Gemini-TOFU.gmi September 21, 2020: TOFU recommendations for Gemini']) == parse('2020-09-21').date()
- assert zachwalk.gnd(['=> gemini://drewdevault.com/2021/02/15/Status-update-February-2021.gmi February 15, 2021: Status update, February 2021']) == parse('2021-02-15').date()
+ assert zachwalk.gnd([b'=> gemini://drewdevault.com/2020/09/21/Gemini-TOFU.gmi 2020-09-21: TOFU recommendations for Gemini']) == parse('2020-09-21').date()
+ assert zachwalk.gnd(['=> gemini://drewdevault.com/2021/02/15/Status-update-February-2021.gmi 2021-02-15: Status update, February 2021']) == parse('2021-02-15').date()
assert zachwalk.gnd(['=>2021-01-28.gmi 2021-01-28 - RE ew0k: Your Gemini Browser and Server are Probably Doing Certificates Wrong']) == parse('2021-01-28').date()
assert zachwalk.gnd(['=> atom.xml Atom Feed']) == zachwalk.DEFAULT
assert zachwalk.gnd(['=> geminitoepub.gmi 2021-02-27 Gemini to Epub']) == parse('2021-02-27').date()
assert zachwalk.gnd(['=> m5paper.gmi 2021-01-31 M5Paper']) == parse('2021-01-31').date()
+ assert zachwalk.gnd(['=> gemini://fossphones.com/03-29-22.gmi 2022-03-29 Linux Phone News - March 29, 2022']) == parse('2022-03-29').date()
+
assert zachwalk.getdesc('=> m5paper.gmi 2021-01-31 M5Paper') == '2021-01-31 M5Paper'
diff --git a/zachwalk.py b/zachwalk.py
index 145c174..1588449 100755
--- a/zachwalk.py
+++ b/zachwalk.py
@@ -37,13 +37,16 @@ def gnd(fp):
if type(line) != str:
line=line.decode('UTF-8')
if line.strip()[0:2] == '=>':
+ desc =getdesc(line)
try:
- desc =getdesc(line)
- desc=desc.split(':')[0] #this should only have 1 'datey' part
- date = parse(desc,fuzzy=True).date()
- # todo: read lots of these and compare them
+ date = parse(desc[0:10],fuzzy=True).date()
return date
except:
+ try:
+ date = parse(desc,fuzzy=True).date()
+ return date
+ except:
+ pass
pass
return DEFAULT