forked from danvk/oldnyc
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathgeocoder.py
executable file
·126 lines (102 loc) · 3.47 KB
/
geocoder.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
#!/usr/bin/python
#
# Run addresses or cross-streets through the Google Maps geocoder.
#
# Maintains a cache of previously-geocoded locations and throttles traffic to the Geocoder.
import base64
import os
import re
import sys
import time
import json
import urllib
GeocodeUrlTemplate = 'https://maps.googleapis.com/maps/api/geocode/json?sensor=false&address=%s'
CacheDir = "geocache"
CacheDebug = False
# CacheDebug = True
# For lat/lon requests, where we can skip the geocoder.
FakeResponse = """
{ "results" : [ {
"geometry" : { "location" : { "lat" : %s, "lng" : %s } },
"types" : [ "point_of_interest" ]
} ], "status" : "OK" }
"""
def _cache_file(loc):
key = base64.b64encode(loc)[:-2] # minus the trailing '=='
key = key.replace('/', '-') # '/' is bad in a file name.
key = key[:255] # longest possible filename
return "%s/%s" % (CacheDir, key)
class Geocoder:
def __init__(self, network_allowed, wait_time):
self._network_allowed = network_allowed
self._wait_time = wait_time
self._last_fetch = 0
def _check_cache(self, loc):
"""Returns cached results for the location or None if not available."""
cache_file = _cache_file(loc)
if CacheDebug:
sys.stderr.write('Checking %s\n' % cache_file);
try:
return file(cache_file).read()
except:
return None
def _cache_result(self, loc, result):
cache_file = _cache_file(loc)
file(cache_file, "w").write(result)
def _fetch(self, url):
"""Attempts to fetch the URL. Does rate throttling. Returns XML."""
now = time.time()
diff = now - self._last_fetch
sys.stderr.write("now=%f, then=%f, diff=%f vs. %f\n" % (
now, self._last_fetch, diff, self._wait_time))
if diff < self._wait_time:
time.sleep(self._wait_time - diff)
self._last_fetch = time.time()
sys.stderr.write("Fetching %s\n" % url)
f = urllib.URLopener().open(url)
return f.read()
def _check_for_lat_lon(self, address):
"""For addresses of the form "@(lat),(lon)", skip the geocoder."""
m = re.match(r'@([-0-9.]+),([-0-9.]+)$', address)
if m:
return FakeResponse % (m.group(1), m.group(2))
def Locate(self, address, check_cache=True):
"""Returns a maps API JSON response for the address or None.
Address should be a fully-qualified address, e.g.
'111 8th Avenue, New York, NY'.
"""
url = GeocodeUrlTemplate % urllib.quote(address)
data = None
from_cache = False
if check_cache:
data = self._check_cache(address)
from_cache = data != None
if not data:
data = self._check_for_lat_lon(address)
if not data:
if not self._network_allowed:
return None
data = self._fetch(url)
if not data:
return None
response = json.loads(data)
status = response['status']
if status not in ['OK', 'ZERO_RESULTS']:
sys.stderr.write('Error status %s %s\n' % (status, json.dumps(response)))
if status == 'OVER_QUERY_LIMIT':
raise Exception('Over your quota for the day!')
return None
if not from_cache and response:
self._cache_result(address, data)
return response
def InCache(self, loc):
data = self._check_cache(loc)
return data == None
def LocateFromCache(self, loc):
"""Like Locate, but never goes to the network to get a location."""
data = self._check_cache(loc)
if not data: return None
return json.loads(data)
if __name__ == '__main__':
for arg in sys.argv[1:]:
print '%s --> %s' % (arg, _cache_file(arg))