-
Notifications
You must be signed in to change notification settings - Fork 4
/
Copy pathurlutils.py
84 lines (73 loc) · 1.96 KB
/
urlutils.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
#!/usr/bin/env python3
"""
Utility URL methods
"""
import hashlib
import sys
import errno
import time
import yaml
import json
from os import environ
from os.path import getmtime, join
from urllib.request import urlopen, Request
from urllib.error import HTTPError
MAXAGE=int(environ.get('CACHE_AGE', '600')) # 5 min in seconds
CACHE=environ.get('CACHE')
DEBUG=environ.get('CACHE_DEBUG')
def isFileStale(filename):
""" is file older than max age (default 5 minutes) """
try:
t = getmtime(filename)
except OSError as e:
if not e.errno == errno.ENOENT:
raise e
return True
diff = time.time() - t
return diff > MAXAGE
def hashurl(url):
""" create hash from url """
return hashlib.sha224(url.encode()).hexdigest()
def geturl(url):
""" Get url contents -- no caching """
req = Request(url)
resp = urlopen(req)
return resp.read()
def urlexists(url):
""" Does URL exist? """
req = Request(url)
try:
urlopen(req)
return True
except HTTPError:
return False
def urlcache(url):
""" Get url contents -- optional caching """
if CACHE:
# basename seems to work OK with URLs
cache = join(CACHE, hashurl(url)+".tmp")
if isFileStale(cache):
if DEBUG:
print("Caching %s" % url, file=sys.stderr)
data = geturl(url)
with open(cache,'wb') as w:
w.write(data)
return data
else:
if DEBUG:
print("Using cache for %s" % url, file=sys.stderr)
with open(cache,'r') as r:
return r.read()
else:
if DEBUG:
print("Fetching %s" % url, file=sys.stderr)
return geturl(url)
def loadyaml(url):
return yaml.safe_load(urlcache(url))
def loadjson(url):
return json.loads(urlcache(url))
# Test code
if __name__ == '__main__':
for arg in sys.argv[1:]:
print(arg)
print(loadyaml(arg))