#!/usr/bin/env python3 """load links from a gemini file and output the latest date for each one""" import sys import socket import ssl import fileinput from dateutil.parser import parse DEFAULT = parse('1970-01-01').date() def getnewestdate(url): """load the url, and find the newest date listed in a link""" # TODO: outsource to pre-installed cli program? hostname = url.split('/')[2] try: s = socket.create_connection((hostname, 1965),timeout=2) except: return DEFAULT context = ssl.SSLContext() context.check_hostname = False context.verify_mode = ssl.CERT_NONE s = context.wrap_socket(s, server_hostname = hostname) s.sendall((url + '\r\n').encode("UTF-8")) fp = s.makefile("rb") header = fp.readline() header = header.decode("UTF-8").strip() # TODO: something special if status is not 2x return gnd(fp) def gnd(fp): for line in fp: if type(line) != str: line=line.decode('UTF-8') if line.strip()[0:2] == '=>': try: desc =getdesc(line) desc=desc.replace(':',' :') #quirk for parse on drew's capsule date = parse(desc,fuzzy=True).date() # todo: read lots of these and compare them return date except: pass return DEFAULT def replaceDateIfNewer(desc, newestdate): try: tup = parse(desc, fuzzy_with_tokens=True) date = tup[0].date() except: return f'{newestdate} - {desc}' if newestdate > date: return str(newestdate) + ' '.join(tup[1]) return desc def main(): for line in fileinput.input(): #stdin or file from argv if line[0:2] == '=>': # plz don't use multiple spaces. url = line.split(' ')[1] desc = getdesc(line) olddate = gnd(url) newestdate = getnewestdate(url) desc = replaceDateIfNewer(desc, newestdate) print(f'=> {url} {desc}') else: print(line.rstrip()) def getdesc(line): return ' '.join(line.split(' ')[2:]).strip() if __name__ == '__main__': main()