@@ -230,30 +230,18 @@ def standardize_address(address):
230
230
"R" : "RUE" ,
231
231
}
232
232
233
- # Define a regex pattern to match a number at the beginning of the address
234
- pattern_number = re .compile (r'^(\d+)\b' )
235
-
236
- # Define a regex pattern to match the first or second word in the address
237
- pattern_word = re .compile (r'\b(\w+)\b' )
238
-
239
- # Check if the address starts with a number
240
- if pattern_number .match (address ):
241
- # TODO Find and replace some word like " GAL " in address (Split more 2 parts, and parse all parts...
242
- # If it does, split the address and replace the second word
243
- parts = address .split (maxsplit = 2 ) # Split into three parts: number, word to replace, the rest
244
- if len (parts ) > 1 and parts [1 ].upper () in replacements :
245
- for val in replacements :
246
- if parts [1 ].upper () == val :
247
- parts [1 ] = replacements [val ].capitalize () # Replace the second word if it's in the replacements
233
+ parts = address .split ()
234
+ if len (parts ) > 1 :
235
+ n = 0
236
+ while n != len (parts ):
237
+ if parts [n ].upper () in replacements :
238
+ for val in replacements :
239
+ if parts [n ].upper () == val :
240
+ parts [n ] = replacements [val ].capitalize ()
241
+ n = n + 1
248
242
standardized_address = ' ' .join (parts )
249
243
else :
250
- # If it doesn't start with a number, replace the first word
251
- match = pattern_word .search (address )
252
- if match and match .group (0 ) in replacements :
253
- standardized_address = pattern_word .sub (replacements [match .group (0 )], address , count = 1 )
254
- else :
255
- standardized_address = address
256
-
244
+ standardized_address = address
257
245
return standardized_address
258
246
259
247
def geocode_addresses (self , input_file ):
@@ -317,10 +305,15 @@ def geocode_addresses(self, input_file):
317
305
# Create a nom_afnor WHERE clause to match any entry starting with the keyword
318
306
nom_afnor_clause = '1=1' # Default to true if no keyword is matched
319
307
for keyword in keywords :
320
- upper_keyword = keyword .upper ()
321
- if upper_keyword in standardized_address .upper ():
308
+ if keyword .upper () in standardized_address .upper ():
322
309
# Use the UPPER function to perform case-insensitive match
323
- nom_afnor_clause = f"nom_afnor LIKE '{ keyword .upper ()} %'"
310
+ nom_afnor_clause = f"nom_afnor LIKE '%{ keyword .upper ()} %"
311
+ pattern_word = re .compile (r'\b(\w+)\b \d+' )
312
+ m = re .search (pattern_word , standardized_address )
313
+ if m is not None :
314
+ nom_afnor_clause = f"{ nom_afnor_clause } { m .group (1 ).upper ()} %'"
315
+ else :
316
+ nom_afnor_clause = f"{ nom_afnor_clause } '"
324
317
break # Stop after the first match
325
318
326
319
# If a starting number is found, include it in the SQL WHERE clause
0 commit comments