Skip to content

Commit

Permalink
Validate the length of the matched address
Browse files Browse the repository at this point in the history
  • Loading branch information
michielgerritsen committed Feb 11, 2022
1 parent 8533fe4 commit 8832f5d
Show file tree
Hide file tree
Showing 2 changed files with 24 additions and 1 deletion.
17 changes: 16 additions & 1 deletion src/PrecisionAddressExtraction.php
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,7 @@ class PrecisionAddressExtraction implements AddressExtractionInterface
public function process(array $address): AddressExtractionResult
{
$this->input = implode(' ', $address);
$normalizedAddress = strtolower(trim($this->input));
$normalizedAddress = mb_strtolower(trim($this->input));

if ($match = $this->hasExactMatch($normalizedAddress)) {
return $this->getResult($match);
Expand Down Expand Up @@ -124,6 +124,7 @@ private function tryByNonExactMatch(string $normalizedAddress): ?string
$possibleStreet = $this->replaceAbbreviations(implode(' ', $parts));

if ($validatedStreet = $this->validStreet($possibleStreet)) {
$this->validateStreet($normalizedAddress, $validatedStreet);
return $validatedStreet;
}
} while(count($parts));
Expand Down Expand Up @@ -164,4 +165,18 @@ private function replaceAbbreviations(string $address): string
// If the address ends with "st." or "str.", replace it with "straat".
return preg_replace('/(st\.|str\.)$/', 'straat', $address);
}

/**
* Compare the length of the found address to the original address, without numbers. If the difference is too big,
* throw an exception. This is to prevent that "rue du plat d‘étain" is being matches as "run" for example.
*/
private function validateStreet(string $normalizedAddress, string $validatedStreet): void
{
$addressWithoutNumbers = mb_strlen(trim(preg_replace('/[0-9]+/', '', $normalizedAddress)));
$validatedStreet = mb_strlen($validatedStreet);

if ($addressWithoutNumbers - $validatedStreet > 2) {
throw new AddressExtractionError('We extracted an address but the difference is too big');
}
}
}
8 changes: 8 additions & 0 deletions tests/CombinedAddressExtractionTest.php
Original file line number Diff line number Diff line change
Expand Up @@ -94,6 +94,14 @@ public function addressProvider()
['Prof. C. Eijkmanstr.12'],
new AddressExtractionResult('Prof. C. Eijkmanstraat', '12', '')
],
'Address with special character (1)' => [
["rue du plat d‘étain", '1'],
new AddressExtractionResult("rue du plat d‘étain", '1', '')
],
'Address with special character (2)' => [
["rue du plat d'etain", '1'],
new AddressExtractionResult("rue du plat d'etain", '1', '')
],
];
}

Expand Down

0 comments on commit 8832f5d

Please sign in to comment.