Feed date scraper for Gemini (protocol)
Diffstat (limited to 'zachwalk.py')
-rwxr-xr-xzachwalk.py32
1 files changed, 12 insertions, 20 deletions
diff --git a/zachwalk.py b/zachwalk.py
index da03aaf..a77acad 100755
--- a/zachwalk.py
+++ b/zachwalk.py
@@ -2,7 +2,7 @@
"""load links from a gemini file and output the latest date for each one"""
__author__ = "Zach DeCook"
__email__ = "zachdecook@librem.one"
-__copyright__ = "Copyright (C) 2021 Zach DeCook"
+__copyright__ = "Copyright (C) 2021-2022 Zach DeCook"
__license__ = "AGPL"
__version__ = "3"
@@ -10,9 +10,8 @@ import sys
import socket
import ssl
import fileinput
-from dateutil.parser import parse
-DEFAULT = parse('1970-01-01').date()
+DEFAULT = "1970-01-01"
def getnewestdate(url):
"""load the url, and find the newest date listed in a link"""
@@ -39,28 +38,18 @@ def gnd(fp):
line=line.decode('UTF-8')
if line.strip()[0:2] == '=>':
desc =getdesc(line)
- try:
- date = parse(desc[0:10],fuzzy=True).date()
- if date > nd:
- nd = date
- except:
- try:
- date = parse(desc,fuzzy=True).date()
- if date > nd:
- nd = date
- except:
- pass
- pass
+ date = desc[0:10]
+ # basic check if it is a date
+ if isDate(date) and date > nd:
+ nd = date
return nd
def replaceDateIfNewer(desc, newestdate):
- try:
- tup = parse(desc, fuzzy_with_tokens=True)
- date = tup[0].date()
- except:
+ date = desc[0:10]
+ if not isDate(date):
return f'{newestdate} - {desc}'
if newestdate > date:
- return str(newestdate) + ' '.join(tup[1])
+ return str(newestdate) + desc[10:]
return desc
def main():
@@ -80,6 +69,9 @@ def main():
def isAbsGeminiUrl(url):
return url[0:9] == 'gemini://'
+def isDate(date):
+ return date[0:4].isnumeric() and date[5:7].isnumeric() and date[8:].isnumeric()
+
def getdesc(line):
return ' '.join(line[2:].strip().replace(' ',' ').split(' ')[1:]).lstrip()