Feed date scraper for Gemini (protocol)
* fix parsing line lines with tabs
| -rwxr-xr-x | test_zachwalk.py | 3 | ||||
| -rwxr-xr-x | zachwalk.py | 3 |
2 files changed, 5 insertions, 1 deletions
diff --git a/test_zachwalk.py b/test_zachwalk.py index 0576ac5..454dc56 100755 --- a/test_zachwalk.py +++ b/test_zachwalk.py @@ -12,6 +12,9 @@ def main(): assert zachwalk.gnd(['=>2021-01-28.gmi 2021-01-28 - RE ew0k: Your Gemini Browser and Server are Probably Doing Certificates Wrong']) == parse('2021-01-28').date() assert zachwalk.gnd(['=> atom.xml Atom Feed']) == zachwalk.DEFAULT assert zachwalk.gnd(['=> geminitoepub.gmi 2021-02-27 Gemini to Epub']) == parse('2021-02-27').date() + assert zachwalk.gnd(['=> m5paper.gmi 2021-01-31 M5Paper']) == parse('2021-01-31').date() + + assert zachwalk.getdesc('=> m5paper.gmi 2021-01-31 M5Paper') == '2021-01-31 M5Paper' if __name__ == '__main__': main() diff --git a/zachwalk.py b/zachwalk.py index b65df8c..145c174 100755 --- a/zachwalk.py +++ b/zachwalk.py @@ -60,6 +60,7 @@ def replaceDateIfNewer(desc, newestdate): def main(): for line in fileinput.input(): #stdin or file from argv if line[0:2] == '=>': + # don't use tabs url = line[2:].strip().split(' ')[0] if isAbsGeminiUrl(url): desc = getdesc(line) @@ -74,7 +75,7 @@ def isAbsGeminiUrl(url): return url[0:9] == 'gemini://' def getdesc(line): - return ' '.join(line[2:].strip().split(' ')[1:]) + return ' '.join(line[2:].strip().replace(' ',' ').split(' ')[1:]).lstrip() if __name__ == '__main__': main() |