Feed date scraper for Gemini (protocol)
date parsing: only attempt to check link description
Zach DeCook 2021-02-07
parent 5a0ebe1 · commit 2bf26df
-rwxr-xr-xtest_zachwalk.py1
-rwxr-xr-xzachwalk.py7
2 files changed, 6 insertions, 2 deletions
diff --git a/test_zachwalk.py b/test_zachwalk.py
index 952ad65..1a551db 100755
--- a/test_zachwalk.py
+++ b/test_zachwalk.py
@@ -6,6 +6,7 @@ def main():
assert zachwalk.gnd([b'']) == zachwalk.DEFAULT
assert zachwalk.gnd([b'2021-01-31']) == zachwalk.DEFAULT
assert zachwalk.gnd([b'=> path.gmi 2021-01-31 - my post']) == parse('2021-01-31').date()
+ assert zachwalk.gnd([b'=> 2020/11/25/hello-gemini.gmi 2020-11-25 - Hello, Gemini!']) == parse('2020-11-25').date()
if __name__ == '__main__':
main()
diff --git a/zachwalk.py b/zachwalk.py
index 7965962..7c8680e 100755
--- a/zachwalk.py
+++ b/zachwalk.py
@@ -29,7 +29,7 @@ def gnd(fp):
line=line.decode('UTF-8')
if line.strip()[0:2] == '=>':
try:
- date = parse(line,fuzzy=True).date()
+ date = parse(getdesc(line),fuzzy=True).date()
# todo: read lots of these and compare them
return date
except:
@@ -42,9 +42,12 @@ def main(argv):
if line[0:2] == '=>':
# plz don't use multiple spaces.
url = line.split(' ')[1]
- desc = line[3+len(url):].strip()
+ desc = getdesc(line)
newestdate = getnewestdate(url)
print(f'=> {url} {newestdate} - {desc}')
+def getdesc(line):
+ return ' '.join(line.split(' ')[2:])
+
if __name__ == '__main__':
main(sys.argv)