Skip to content

Commit

Permalink
Add ABA routing number recognizer (#348)
Browse files Browse the repository at this point in the history
* aba routing number with checksum digit

* Update test_aba_routing_recognizer.py

Co-authored-by: Omri Mendels <omri374@users.noreply.github.com>
  • Loading branch information
diwu1989 and omri374 committed Aug 6, 2020
1 parent 94d51d4 commit 07a877d
Show file tree
Hide file tree
Showing 5 changed files with 115 additions and 7 deletions.
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
from .aba_routing_recognizer import AbaRoutingRecognizer
from .credit_card_recognizer import CreditCardRecognizer
from .crypto_recognizer import CryptoRecognizer
from .domain_recognizer import DomainRecognizer
Expand All @@ -18,19 +19,20 @@
NLP_RECOGNIZERS = {"spacy": SpacyRecognizer, "stanza": StanzaRecognizer}

__all__ = [
"AbaRoutingRecognizer",
"CreditCardRecognizer",
"CryptoRecognizer",
"DomainRecognizer",
"EmailRecognizer",
"IbanRecognizer",
"IpRecognizer",
"NhsRecognizer",
"SgFinRecognizer",
"SpacyRecognizer",
"StanzaRecognizer",
"NhsRecognizer",
"UsBankRecognizer",
"UsLicenseRecognizer",
"UsItinRecognizer",
"UsLicenseRecognizer",
"UsPassportRecognizer",
"UsPhoneRecognizer",
"UsSsnRecognizer",
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,60 @@
from presidio_analyzer import Pattern, PatternRecognizer


class AbaRoutingRecognizer(PatternRecognizer):
"""
Recognizes American Banking Association (ABA) routing number.
Also known as routing transit number (RTN) and used to identify financial
institutions and process transactions.
"""

PATTERNS = [
Pattern("ABA routing number (weak)", r"\b[0123678]\d{8}\b", 0.05,),
Pattern("ABA routing number",
r"\b[0123678]\d{3}-\d{4}-\d\b",
0.3,),
]

CONTEXT = [
"aba",
"routing",
"abarouting",
"association",
"bankrouting",
]

def __init__(
self,
patterns=None,
context=None,
supported_language="en",
supported_entity="ABA_ROUTING_NUMBER",
replacement_pairs=None,
):
self.replacement_pairs = replacement_pairs or [("-", "")]
patterns = patterns if patterns else self.PATTERNS
context = context if context else self.CONTEXT
super().__init__(
supported_entity=supported_entity,
patterns=patterns,
context=context,
supported_language=supported_language,
)

def validate_result(self, pattern_text):
sanitized_value = self.__sanitize_value(pattern_text,
self.replacement_pairs)
return self.__checksum(sanitized_value)

@staticmethod
def __checksum(sanitized_value):
s = 0
for idx, m in enumerate([3, 7, 1, 3, 7, 1, 3, 7, 1]):
s += int(sanitized_value[idx]) * m
return s % 10 == 0

@staticmethod
def __sanitize_value(text, replacement_pairs):
for search_string, replacement_string in replacement_pairs:
text = text.replace(search_string, replacement_string)
return text
5 changes: 4 additions & 1 deletion presidio-analyzer/tests/data/context_sentences_tests.txt
Original file line number Diff line number Diff line change
Expand Up @@ -60,6 +60,9 @@ my DRIVER LICENSE is: 7774567901234
US_DRIVER_LICENSE
my DrIvEr LiCeNsE is: 7774567901234

ABA_ROUTING_NUMBER
routing number is: 101205681

US_BANK_NUMBER
my bank account number is 912803456

Expand Down Expand Up @@ -90,4 +93,4 @@ Special NRIC numbers e.g. S0000001I that are numerically significant have been i

# Verify SG NRIC/FIN mixed case (e.g. lower case )
FIN
my fin is g3300299L
my fin is g3300299L
38 changes: 38 additions & 0 deletions presidio-analyzer/tests/test_aba_routing_recognizer.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
import pytest

from tests import assert_result
from presidio_analyzer.predefined_recognizers import AbaRoutingRecognizer


@pytest.fixture(scope="module")
def recognizer():
return AbaRoutingRecognizer()


@pytest.fixture(scope="module")
def entities():
return ["ABA_ROUTING_NUMBER"]


@pytest.mark.parametrize(
"text, expected_len, expected_positions, expected_score",
[
# Bank of America
("121000358", 1, ((0, 9),), 1.0),
# Chase
("3222-7162-7", 1, ((0, 11),), 1.0),
# Wells Fargo
("121042882", 1, ((0, 9),), 1.0),
("0711-0130-7", 1, ((0, 11),), 1.0),
# invalid ABA numbers
("421042111", 0, (), -1.0),
("1234-0000-0", 0, (), -1.0),
],
)
def test_aba_routing_numbers(
text, expected_len, expected_positions, expected_score, recognizer, entities
):
results = recognizer.analyze(text, entities)
assert len(results) == expected_len
for res, (st_pos, fn_pos) in zip(results, expected_positions):
assert_result(res, entities[0], st_pos, fn_pos, expected_score)
13 changes: 9 additions & 4 deletions presidio-analyzer/tests/test_context_support.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@

from presidio_analyzer import PatternRecognizer, Pattern
from presidio_analyzer.predefined_recognizers import (
AbaRoutingRecognizer,
# CreditCardRecognizer,
UsPhoneRecognizer,
# DomainRecognizer,
Expand All @@ -23,6 +24,7 @@ def recognizers():
"IP_ADDRESS": IpRecognizer(),
"US_SSN": UsSsnRecognizer(),
"PHONE_NUMBER": UsPhoneRecognizer(),
"ABA_ROUTING_NUMBER": AbaRoutingRecognizer(),
"US_ITIN": UsItinRecognizer(),
"US_DRIVER_LICENSE": UsLicenseRecognizer(),
"US_BANK_NUMBER": UsBankRecognizer(),
Expand Down Expand Up @@ -58,9 +60,9 @@ def dataset(recognizers):
raise ValueError(f"bad entity type {entity_type}")

test_items.append((item, recognizer, [entity_type]))
# Currently we have 27 sentences, this is a sanity check
if not len(test_items) == 27:
raise ValueError(f"expected 27 context sentences but found {len(test_items)}")
# Currently we have 28 sentences, this is a sanity check
if not len(test_items) == 28:
raise ValueError(f"expected 28 context sentences but found {len(test_items)}")

yield test_items

Expand All @@ -79,7 +81,10 @@ def test_text_with_context_improves_score(dataset, nlp_engine, mock_nlp_artifact

assert len(results_without_context) == len(results_with_context)
for res_wo, res_w in zip(results_without_context, results_with_context):
assert res_wo.score < res_w.score
if res_wo.score != 1.0:
assert res_wo.score < res_w.score
else:
assert res_wo.score <= res_w.score


def test_context_custom_recognizer(nlp_engine, mock_nlp_artifacts):
Expand Down

0 comments on commit 07a877d

Please sign in to comment.