Feed date scraper for Gemini (protocol)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
#!/usr/bin/env python3
import zachwalk
from dateutil.parser import parse

def main():
    assert zachwalk.gnd([b'']) == zachwalk.DEFAULT
    assert zachwalk.gnd([b'2021-01-31']) == zachwalk.DEFAULT
    assert zachwalk.gnd([b'=> path.gmi 2021-01-31 - my post']) == parse('2021-01-31').date()
    assert zachwalk.gnd([b'=> 2020/11/25/hello-gemini.gmi 2020-11-25 - Hello, Gemini!']) == parse('2020-11-25').date()
    assert zachwalk.gnd([b'=> gemini://drewdevault.com/2020/09/21/Gemini-TOFU.gmi September 21, 2020: TOFU recommendations for Gemini']) == parse('2020-09-21').date()
    assert zachwalk.gnd(['=> gemini://drewdevault.com/2021/02/15/Status-update-February-2021.gmi February 15, 2021: Status update, February 2021']) == parse('2021-02-15').date()
    assert zachwalk.gnd(['=>2021-01-28.gmi 2021-01-28 - RE ew0k: Your Gemini Browser and Server are Probably Doing Certificates Wrong']) == parse('2021-01-28').date()
    assert zachwalk.gnd(['=> atom.xml     Atom Feed']) == zachwalk.DEFAULT
    assert zachwalk.gnd(['=> geminitoepub.gmi     2021-02-27 Gemini to Epub']) == parse('2021-02-27').date()

if __name__ == '__main__':
    main()