Feed date scraper for Gemini (protocol)
date parsing: only attempt to check link description
| -rwxr-xr-x | test_zachwalk.py | 1 | ||||
| -rwxr-xr-x | zachwalk.py | 7 |
2 files changed, 6 insertions, 2 deletions
diff --git a/test_zachwalk.py b/test_zachwalk.py index 952ad65..1a551db 100755 --- a/test_zachwalk.py +++ b/test_zachwalk.py @@ -6,6 +6,7 @@ def main(): assert zachwalk.gnd([b'']) == zachwalk.DEFAULT assert zachwalk.gnd([b'2021-01-31']) == zachwalk.DEFAULT assert zachwalk.gnd([b'=> path.gmi 2021-01-31 - my post']) == parse('2021-01-31').date() + assert zachwalk.gnd([b'=> 2020/11/25/hello-gemini.gmi 2020-11-25 - Hello, Gemini!']) == parse('2020-11-25').date() if __name__ == '__main__': main() diff --git a/zachwalk.py b/zachwalk.py index 7965962..7c8680e 100755 --- a/zachwalk.py +++ b/zachwalk.py @@ -29,7 +29,7 @@ def gnd(fp): line=line.decode('UTF-8') if line.strip()[0:2] == '=>': try: - date = parse(line,fuzzy=True).date() + date = parse(getdesc(line),fuzzy=True).date() # todo: read lots of these and compare them return date except: @@ -42,9 +42,12 @@ def main(argv): if line[0:2] == '=>': # plz don't use multiple spaces. url = line.split(' ')[1] - desc = line[3+len(url):].strip() + desc = getdesc(line) newestdate = getnewestdate(url) print(f'=> {url} {newestdate} - {desc}') +def getdesc(line): + return ' '.join(line.split(' ')[2:]) + if __name__ == '__main__': main(sys.argv) |