Skip to content

Commit

Permalink
return results with max match
Browse files Browse the repository at this point in the history
  • Loading branch information
cswbrian committed Aug 12, 2018
1 parent 729727f commit 014bbc3
Show file tree
Hide file tree
Showing 3 changed files with 42 additions and 5,287 deletions.
69 changes: 42 additions & 27 deletions addressParser.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,6 @@ class Phrases:
def __init__(self):
text = open("data/region.chi", "r", encoding='utf8').read()
regions = text.split("\n")
<<<<<<< HEAD
phrases = [ (region, "r") for region in regions ]
text = open("data/subDistrict.chi", "r", encoding='utf8').read()
subDistricts = text.split("\n")
Expand All @@ -31,27 +30,6 @@ def __init__(self):
phrases.sort( key=lambda t: t[0] )
self._phrases = phrases
self._keys = [phrase[0] for phrase in phrases]
=======
phases = [ (region, "r") for region in regions ]
text = open("data/subDistrict.chi", "r", encoding='utf8').read()
subDistricts = text.split("\n")
phases += [ (subDistrict, "sd") for subDistrict in subDistricts ]
text = open("data/street.chi", "r", encoding='utf8').read()
streets = text.split("\n")
phases += [ ( street, "s" ) for street in streets ]
text = open("data/building.chi", "r", encoding='utf8').read()
buildings = text.split("\n")
phases += [ ( building, "b" ) for building in buildings ]
text = open("data/estate.chi", "r", encoding='utf8').read()
estates = text.split("\n")
phases += [ ( estate, 'e') for estate in estates ]
text = open("data/village.chi", "r", encoding='utf8').read()
villages = text.split("\n")
phases += [ ( village, 'v') for village in villages ]
phases.sort( key=lambda t: t[0] )
self._phases = phases
self._keys = [phase[0] for phase in phases]
>>>>>>> 9cdf66650c37488ce826d62a68d490ab83ed6821

def searchPhrase(self, string):
idx = bisect.bisect_right ( self._keys, string )
Expand Down Expand Up @@ -93,12 +71,27 @@ def queryOGCIO(self, RequestAddress, n):
soup = BeautifulSoup(r.content, 'html.parser')
return(json.loads(str(soup))['SuggestedAddress'])

class Utilities:
def flattenJSON(self, data, json_items):
for key, value in data.items():
if hasattr(value, 'items'):
self.flattenJSON(value, json_items)
else:
d = {key: value}
json_items.update(d)
return json_items






if __name__ == "__main__":
# Tokenizer
ph = Phrases()
ut = Utilities()

address = sys.argv[1]
print (json.dumps(ph.parseAddress(address), ensure_ascii=False))

# Look for OGCIO Result
session = requests.Session()
Expand All @@ -108,7 +101,29 @@ def queryOGCIO(self, RequestAddress, n):
"Accept-Encoding":"gzip"
}

for p in ph.parseAddress(address):
if (p[1] == 'b'):
for addr in (ph.queryOGCIO(p[0],10)): # Return 10 results
print(addr)
parsedchunks = ph.parseAddress(address)
print(parsedchunks)
print("================================================================================================")

possibleResults = []

for idx, address in enumerate(ph.queryOGCIO(address,200)): # Loop OGCIO results
addr = address['Address']['PremisesAddress']['ChiPremisesAddress']
flatOGCIO = (ut.flattenJSON(addr, {}))
match = {}
for key, value in flatOGCIO.items():
for p in reversed(parsedchunks):
if (p[0] == value):
d = {key: value}
match.update(d)

flatOGCIO['matchCount'] = len(match)
possibleResults.append(flatOGCIO)

matchCounts = [x['matchCount'] for x in possibleResults]

for r in possibleResults:
if (r['matchCount'] == max(matchCounts)):
print(r)


Loading

0 comments on commit 014bbc3

Please sign in to comment.