Feed date scraper for Gemini (protocol)
date parsing: Better accept standard subscribable pages spec
| -rwxr-xr-x | test_zachwalk.py | 6 | ||||
| -rwxr-xr-x | zachwalk.py | 11 |
2 files changed, 11 insertions, 6 deletions
diff --git a/test_zachwalk.py b/test_zachwalk.py index 454dc56..3860752 100755 --- a/test_zachwalk.py +++ b/test_zachwalk.py @@ -7,12 +7,14 @@ def main(): assert zachwalk.gnd([b'2021-01-31']) == zachwalk.DEFAULT assert zachwalk.gnd([b'=> path.gmi 2021-01-31 - my post']) == parse('2021-01-31').date() assert zachwalk.gnd([b'=> 2020/11/25/hello-gemini.gmi 2020-11-25 - Hello, Gemini!']) == parse('2020-11-25').date() - assert zachwalk.gnd([b'=> gemini://drewdevault.com/2020/09/21/Gemini-TOFU.gmi September 21, 2020: TOFU recommendations for Gemini']) == parse('2020-09-21').date() - assert zachwalk.gnd(['=> gemini://drewdevault.com/2021/02/15/Status-update-February-2021.gmi February 15, 2021: Status update, February 2021']) == parse('2021-02-15').date() + assert zachwalk.gnd([b'=> gemini://drewdevault.com/2020/09/21/Gemini-TOFU.gmi 2020-09-21: TOFU recommendations for Gemini']) == parse('2020-09-21').date() + assert zachwalk.gnd(['=> gemini://drewdevault.com/2021/02/15/Status-update-February-2021.gmi 2021-02-15: Status update, February 2021']) == parse('2021-02-15').date() assert zachwalk.gnd(['=>2021-01-28.gmi 2021-01-28 - RE ew0k: Your Gemini Browser and Server are Probably Doing Certificates Wrong']) == parse('2021-01-28').date() assert zachwalk.gnd(['=> atom.xml Atom Feed']) == zachwalk.DEFAULT assert zachwalk.gnd(['=> geminitoepub.gmi 2021-02-27 Gemini to Epub']) == parse('2021-02-27').date() assert zachwalk.gnd(['=> m5paper.gmi 2021-01-31 M5Paper']) == parse('2021-01-31').date() + assert zachwalk.gnd(['=> gemini://fossphones.com/03-29-22.gmi 2022-03-29 Linux Phone News - March 29, 2022']) == parse('2022-03-29').date() + assert zachwalk.getdesc('=> m5paper.gmi 2021-01-31 M5Paper') == '2021-01-31 M5Paper' diff --git a/zachwalk.py b/zachwalk.py index 145c174..1588449 100755 --- a/zachwalk.py +++ b/zachwalk.py @@ -37,13 +37,16 @@ def gnd(fp): if type(line) != str: line=line.decode('UTF-8') if line.strip()[0:2] == '=>': + desc =getdesc(line) try: - desc =getdesc(line) - desc=desc.split(':')[0] #this should only have 1 'datey' part - date = parse(desc,fuzzy=True).date() - # todo: read lots of these and compare them + date = parse(desc[0:10],fuzzy=True).date() return date except: + try: + date = parse(desc,fuzzy=True).date() + return date + except: + pass pass return DEFAULT |