Feed date scraper for Gemini (protocol)
parsing: help out by adding space before colon
Zach DeCook 2021-02-07
parent 2bf26df · commit 6ae0102
-rwxr-xr-xtest_zachwalk.py1
-rwxr-xr-xzachwalk.py4
2 files changed, 4 insertions, 1 deletions
diff --git a/test_zachwalk.py b/test_zachwalk.py
index 1a551db..3cbf5b2 100755
--- a/test_zachwalk.py
+++ b/test_zachwalk.py
@@ -7,6 +7,7 @@ def main():
assert zachwalk.gnd([b'2021-01-31']) == zachwalk.DEFAULT
assert zachwalk.gnd([b'=> path.gmi 2021-01-31 - my post']) == parse('2021-01-31').date()
assert zachwalk.gnd([b'=> 2020/11/25/hello-gemini.gmi 2020-11-25 - Hello, Gemini!']) == parse('2020-11-25').date()
+ assert zachwalk.gnd([b'=> gemini://drewdevault.com/2020/09/21/Gemini-TOFU.gmi September 21, 2020: TOFU recommendations for Gemini']) == parse('2020-09-21').date()
if __name__ == '__main__':
main()
diff --git a/zachwalk.py b/zachwalk.py
index 7c8680e..2c2c3ea 100755
--- a/zachwalk.py
+++ b/zachwalk.py
@@ -29,7 +29,9 @@ def gnd(fp):
line=line.decode('UTF-8')
if line.strip()[0:2] == '=>':
try:
- date = parse(getdesc(line),fuzzy=True).date()
+ desc =getdesc(line)
+ desc=desc.replace(':',' :') #quirk for parse on drew's capsule
+ date = parse(desc,fuzzy=True).date()
# todo: read lots of these and compare them
return date
except: