Skip to content

Commit 9ece0bc

Browse files
committed
Improve standardize_address function and nom_afnor_clause
1 parent df8648a commit 9ece0bc

File tree

1 file changed

+18
-25
lines changed

1 file changed

+18
-25
lines changed

geocoder/addresses_matcher.py

Lines changed: 18 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -230,30 +230,18 @@ def standardize_address(address):
230230
"R": "RUE",
231231
}
232232

233-
# Define a regex pattern to match a number at the beginning of the address
234-
pattern_number = re.compile(r'^(\d+)\b')
235-
236-
# Define a regex pattern to match the first or second word in the address
237-
pattern_word = re.compile(r'\b(\w+)\b')
238-
239-
# Check if the address starts with a number
240-
if pattern_number.match(address):
241-
# TODO Find and replace some word like " GAL " in address (Split more 2 parts, and parse all parts...
242-
# If it does, split the address and replace the second word
243-
parts = address.split(maxsplit=2) # Split into three parts: number, word to replace, the rest
244-
if len(parts) > 1 and parts[1].upper() in replacements:
245-
for val in replacements:
246-
if parts[1].upper() == val:
247-
parts[1] = replacements[val].capitalize() # Replace the second word if it's in the replacements
233+
parts = address.split()
234+
if len(parts) > 1:
235+
n = 0
236+
while n != len(parts):
237+
if parts[n].upper() in replacements:
238+
for val in replacements:
239+
if parts[n].upper() == val:
240+
parts[n] = replacements[val].capitalize()
241+
n = n + 1
248242
standardized_address = ' '.join(parts)
249243
else:
250-
# If it doesn't start with a number, replace the first word
251-
match = pattern_word.search(address)
252-
if match and match.group(0) in replacements:
253-
standardized_address = pattern_word.sub(replacements[match.group(0)], address, count=1)
254-
else:
255-
standardized_address = address
256-
244+
standardized_address = address
257245
return standardized_address
258246

259247
def geocode_addresses(self, input_file):
@@ -317,10 +305,15 @@ def geocode_addresses(self, input_file):
317305
# Create a nom_afnor WHERE clause to match any entry starting with the keyword
318306
nom_afnor_clause = '1=1' # Default to true if no keyword is matched
319307
for keyword in keywords:
320-
upper_keyword = keyword.upper()
321-
if upper_keyword in standardized_address.upper():
308+
if keyword.upper() in standardized_address.upper():
322309
# Use the UPPER function to perform case-insensitive match
323-
nom_afnor_clause = f"nom_afnor LIKE '{keyword.upper()}%'"
310+
nom_afnor_clause = f"nom_afnor LIKE '%{keyword.upper()}%"
311+
pattern_word = re.compile(r'\b(\w+)\b \d+')
312+
m = re.search(pattern_word, standardized_address)
313+
if m is not None:
314+
nom_afnor_clause = f"{nom_afnor_clause}{m.group(1).upper()}%'"
315+
else:
316+
nom_afnor_clause = f"{nom_afnor_clause}'"
324317
break # Stop after the first match
325318

326319
# If a starting number is found, include it in the SQL WHERE clause

0 commit comments

Comments
 (0)