From c89bf3267073662922374675004fe45d150a47b8 Mon Sep 17 00:00:00 2001 From: Omri Mendels Date: Wed, 1 Jan 2020 11:34:29 +0200 Subject: [PATCH] Analyzer container size reduction + faster builds (#252) * updated spacy model version + demo text * removed lazy loading of spacy models, ignoring deprecation warnings on tests * Changed base image from Alpine to Slim Buster to support faster pip installations, updated Spacy model version to 2.2.5 * Removed PIP cache to reduce image size * Updated base image to use specific venv * refined logger, fixed unit tests, addressed linting issues * Printing model version * verbose asserts for demo text * Update recognizers_store_api.py which currently throws an exception of type in logger --- Dockerfile.python.deps | 21 +- Makefile | 6 +- presidio-analyzer/Dockerfile | 17 +- presidio-analyzer/Pipfile | 4 +- presidio-analyzer/Pipfile.lock | 800 ++++++++++-------- presidio-analyzer/analyzer/__init__.py | 34 +- presidio-analyzer/analyzer/__main__.py | 30 +- presidio-analyzer/analyzer/analyzer_engine.py | 16 +- presidio-analyzer/analyzer/app_tracer.py | 4 +- .../analyzer/entity_recognizer.py | 6 +- .../analyzer/nlp_engine/spacy_nlp_engine.py | 13 +- .../predefined_recognizers/__init__.py | 44 +- .../domain_recognizer.py | 3 +- .../email_recognizer.py | 3 +- .../us_bank_recognizer.py | 4 +- .../us_passport_recognizer.py | 3 +- .../{logger.py => presidio_logger.py} | 7 +- .../recognizers_store_api.py | 2 +- .../analyzer/recognizer_result.py | 35 + presidio-analyzer/setup.py | 2 +- presidio-analyzer/tests/__init__.py | 8 +- .../tests/test_analyzer_engine.py | 101 ++- .../tests/test_context_support.py | 7 +- .../tests/test_credit_card_recognizer.py | 4 +- .../tests/test_crypto_recognizer.py | 2 +- .../tests/test_domain_recognizer.py | 4 +- .../tests/test_email_recognizer.py | 2 +- .../tests/test_iban_recognizer.py | 222 ++--- presidio-analyzer/tests/test_ip_recognizer.py | 2 +- .../tests/test_pattern_recognizer.py | 2 +- .../tests/test_recognizer_registry.py | 7 +- .../tests/test_spacy_recognizer.py | 54 +- .../tests/test_uk_nhs_recognizer.py | 2 +- .../tests/test_us_bank_recognizer.py | 2 +- .../test_us_driver_license_recognizer.py | 4 +- .../tests/test_us_itin_recognizer.py | 2 +- .../tests/test_us_passport_recognizer.py | 2 +- .../tests/test_us_phone_recognizer.py | 2 +- .../tests/test_us_ssn_recognizer.py | 69 +- pytest.ini | 5 +- 40 files changed, 859 insertions(+), 698 deletions(-) rename presidio-analyzer/analyzer/{logger.py => presidio_logger.py} (94%) diff --git a/Dockerfile.python.deps b/Dockerfile.python.deps index bd286c542..bbd32f188 100644 --- a/Dockerfile.python.deps +++ b/Dockerfile.python.deps @@ -1,26 +1,31 @@ -FROM python:3.7.1-alpine3.8 +FROM python:3.7-slim ARG re2_version="2018-12-01" ARG NAME=presidio-analyzer + +ENV PIP_NO_CACHE_DIR true + COPY ./${NAME}/Pipfile /usr/bin/${NAME}/Pipfile COPY ./${NAME}/Pipfile.lock /usr/bin/${NAME}/Pipfile.lock WORKDIR /usr/bin/${NAME} -RUN apk --update add --no-cache g++ && \ - apk --update add --no-cache --virtual build_deps make tar wget clang && \ +RUN apt-get update -qq \ + && DEBIAN_FRONTEND=noninteractive apt-get -y install --no-install-recommends \ + wget build-essential && \ wget -O re2.tar.gz https://github.com/google/re2/archive/${re2_version}.tar.gz && \ mkdir re2 && tar --extract --file "re2.tar.gz" --directory "re2" --strip-components 1 && \ cd re2 && make install && cd .. && rm -rf re2 && rm re2.tar.gz && \ - apk add --virtual build_deps make automake gcc g++ subversion python3-dev + apt-get clean autoclean && apt-get autoremove --yes && rm -rf /var/lib/{apt,dpkg,cache,log}/ + # Making sure we have pipenv -RUN pip3 install pipenv +RUN pip install pipenv # Updating setuptools -RUN pip3 install --upgrade setuptools +RUN pip install --upgrade setuptools # Installing specified packages from Pipfile.lock -RUN pipenv sync +RUN bash -c 'PIPENV_VENV_IN_PROJECT=1 pipenv sync' + # Print to screen the installed packages for easy debugging RUN pipenv run pip freeze -RUN apk del build_deps diff --git a/Makefile b/Makefile index 850ad605c..59a8140d3 100644 --- a/Makefile +++ b/Makefile @@ -28,8 +28,8 @@ $(BINS): vendor .PHONY: docker-build-deps docker-build-deps: - -docker pull $(DOCKER_REGISTRY)/$(GOLANG_DEPS):$(PRESIDIO_DEPS_LABEL) ||: - -docker pull $(DOCKER_REGISTRY)/$(PYTHON_DEPS):$(PRESIDIO_DEPS_LABEL) ||: + -docker pull $(DOCKER_REGISTRY)/$(GOLANG_DEPS):$(PRESIDIO_DEPS_LABEL) || echo "\nCould not pull base Go image from registry, building locally. If you planned to build locally, the previous error message could be ignored\n" + -docker pull $(DOCKER_REGISTRY)/$(PYTHON_DEPS):$(PRESIDIO_DEPS_LABEL) || echo "\nCould not pull base Python image from registry, building locally (If you planned to build images locally, the previous error message could be ignored\n" docker build -t $(DOCKER_REGISTRY)/$(GOLANG_DEPS):$(PRESIDIO_DEPS_LABEL) -f Dockerfile.golang.deps . docker build -t $(DOCKER_REGISTRY)/$(PYTHON_DEPS):$(PRESIDIO_DEPS_LABEL) -f Dockerfile.python.deps . @@ -200,4 +200,4 @@ ifndef HAS_GOMETALINTER endif .PHONY: bootstrap -bootstrap: vendor \ No newline at end of file +bootstrap: vendor diff --git a/presidio-analyzer/Dockerfile b/presidio-analyzer/Dockerfile index 0373c6b2a..03ce4fb88 100644 --- a/presidio-analyzer/Dockerfile +++ b/presidio-analyzer/Dockerfile @@ -1,23 +1,28 @@ ARG REGISTRY=presidio.azurecr.io -ARG PRESIDIO_DEPS_LABEL=latest +ARG PRESIDIO_DEPS_LABEL=latest -FROM ${REGISTRY}/presidio-python-deps:${PRESIDIO_DEPS_LABEL} +FROM ${REGISTRY}/presidio-python-deps:${PRESIDIO_DEPS_LABEL} ARG NAME=presidio-analyzer WORKDIR /usr/bin/${NAME} ADD ./${NAME} /usr/bin/${NAME} -RUN pipenv install --dev --sequential && \ - pipenv run pylint analyzer && \ +# Print venv information +RUN pipenv --venv +RUN pipenv run pip freeze + +RUN pipenv install pylint==2.3.1 flake8 pytest --skip-lock + +RUN pipenv run pylint analyzer && \ pipenv run flake8 analyzer --exclude "*pb2*.py" && \ pipenv run pytest --log-cli-level=0 #---------------------------- -FROM ${REGISTRY}/presidio-python-deps:${PRESIDIO_DEPS_LABEL} +FROM ${REGISTRY}/presidio-python-deps:${PRESIDIO_DEPS_LABEL} ARG NAME=presidio-analyzer ADD ./${NAME}/analyzer /usr/bin/${NAME}/analyzer WORKDIR /usr/bin/${NAME}/analyzer -CMD pipenv run python __main__.py serve --env-grpc-port \ No newline at end of file +CMD pipenv run python __main__.py serve --env-grpc-port diff --git a/presidio-analyzer/Pipfile b/presidio-analyzer/Pipfile index 61f9d1252..3aa8206fc 100644 --- a/presidio-analyzer/Pipfile +++ b/presidio-analyzer/Pipfile @@ -5,8 +5,8 @@ name = "pypi" [packages] cython = "*" -spacy = "*" -en_core_web_lg = {file = "https://github.com/explosion/spacy-models/releases/download/en_core_web_lg-2.1.0/en_core_web_lg-2.1.0.tar.gz"} +spacy = "==2.2.3" +en_core_web_lg = {file = "https://github.com/explosion/spacy-models/releases/download/en_core_web_lg-2.2.5/en_core_web_lg-2.2.5.tar.gz"} regex = "*" pyre2 = {file = "https://github.com/torosent/pyre2/archive/release/0.2.23.zip"} grpcio = "*" diff --git a/presidio-analyzer/Pipfile.lock b/presidio-analyzer/Pipfile.lock index 0eb5b069a..761a43718 100644 --- a/presidio-analyzer/Pipfile.lock +++ b/presidio-analyzer/Pipfile.lock @@ -1,7 +1,7 @@ { "_meta": { "hash": { - "sha256": "be7df2b6a129090a66e0049544cdeae3425b032a6333d2d2991aa8e0e26725d2" + "sha256": "a52cd3363894ab3e63ced494d26c90ba875218105cda094d30dc3ecffa105d72" }, "pipfile-spec": 6, "requires": {}, @@ -16,35 +16,45 @@ "default": { "argcomplete": { "hashes": [ - "sha256:59909d0ce5be1a46e2fb4e4fa5b714f6d605151ce88c468afb42d800879a6e6d", - "sha256:94423d1a56cdec2ef47699e02c9a48cf8827b9c4465b836c0cefb30afe85e59a" + "sha256:52a08b426bd0b03b6881182dd84149b2493540d1c3109ccf9f09f78e4459e387", + "sha256:783d6a12c6c84a33653dc5bac4d6c0640ba64d1037c2662acd9dbe410c26056f" ], - "version": "==1.9.5" + "version": "==1.11.0" }, "blis": { "hashes": [ - "sha256:039129410a338be8db8cf48c54334bd7c30da7e72bad2741e59313b1d242814b", - "sha256:058f9109aaea9d4f88cb623a44994d96c8cf36448de3e1bd30210628d6b52e9e", - "sha256:278d7b95e56cf82a6bef91cd8283eadc9401f2d3bdbbf2cdfdb605cf9081c36e", - "sha256:2d4ca1508fd6229c7994fc17ba324083a5b83f66612c8ea62623a41a1768b030", - "sha256:51a54bad6175e9b154beeb628a879ed492ee2247c9e40c77bdf6fc772145130c", - "sha256:886b313f96d4e268a0587e98c1637d963c73defa8de51e2e6b0d0bd00f16afbb", - "sha256:9f12e6f1e4b10dbb1e0e34e98f60e8435058a60d544a009cb761351fe1d12cad", - "sha256:a54d4fa1908d586f8bce9851a453cb89d1542e9aca65b8b88e9bb9432d626f80", - "sha256:b9d6cef13d95e3752320cd942df25e09160a6f9dfc3d7b41af7cdc772ab18270", - "sha256:d571464d195a950e60bf1547c8914d4da50952e06a0f38cea7b0829d0a4b985a", - "sha256:d616d64c85e6be92d69a1410dc58146cb9603fd1eb148f9ee512b8fddfd789f6", - "sha256:e477c7eaacf7dcccbb190a29559579efb287ecf5c2a9a7a6f9acb0452899f033", - "sha256:e6ae1986625af86f90f111f9d2d284b9e45fddfe56cf40524cdd9417a6a33b87" - ], - "version": "==0.2.4" + "sha256:00473602629ba69fe6565108e21957e918cb48b59f5bf2f6bfb6e04de42500cb", + "sha256:03c368c9716ca814c436550a5f1e02ccf74850e613602519e3941d212e5aa177", + "sha256:135450caabc8aea9bb9250329ebdf7189982d9b57d5c92789b2ba2fe52c247a7", + "sha256:1402d9cbb0fbc21b749dd5b87d7ee14249e74a0ca38be6ecc56b3b356fca2f21", + "sha256:26b16d6005bb2671699831b5cc699905215d1abde1ec5c1d04de7dcd9eb29f75", + "sha256:3347a4b1b7d3ae14476aac9a6f7bf8ebf464863f4ebf4aea228874a7694ea240", + "sha256:38fe877a4b52e762f5e137a412e3c256545a696a12ae8c40d67b8815d2bb5097", + "sha256:4fb89c47ee06b58a4410a16fd5794847517262c9d2a342643475b477dfeff0a4", + "sha256:77a6486b9794af01bcdfd1bc6e067c93add4b93292e6f95bf6e5ce7f98bf0163", + "sha256:856142a11e37fd2c47c5006a3197e157bb8469a491a73d2d442223dd3279df84", + "sha256:8aeaf6954351593a1e412f80e398aa51df588d3c0de74b9f3323b694c603381b", + "sha256:9ede123065f3cacb109967755b3d83d4ca0de90643a9058129a6ab2d4051954f", + "sha256:d1d59faebc1c94f8f4f77154ef4b9d6d40364b111cf8fde48ee3b524c85f1075", + "sha256:d69257d317e86f34a7f230a2fd1f021fd2a1b944137f40d8cdbb23bd334cd0c4", + "sha256:ddd732c5274d1082fa92e2c42317587d5ebabce7741ca98120f69bd45d004b99", + "sha256:f0b0dad4d6268d9dba0a65a9db12dd7a2d8686b648399e4aa1aec7550697e99e" + ], + "version": "==0.4.1" + }, + "catalogue": { + "hashes": [ + "sha256:430ff0e1478ef6d97f81a1bd093d8c4038f88af6d233a05a625f4d87ff030a7e", + "sha256:998329046e952f2e07d606b96e7b2505b40aca1962345398385863781449a69d" + ], + "version": "==0.2.0" }, "certifi": { "hashes": [ - "sha256:59b7658e26ca9c7339e00f8f4636cdfe59d34fa37b9b04f6f9e9926b3cece1a5", - "sha256:b26104d6835d1f5e49452a26eb2ff87fe7090b89dfcaee5ea2212697e1e1d7ae" + "sha256:017c25db2a153ce562900032d5bc68e9f191e44e9a0f762f373977de9df1fbb3", + "sha256:25b64c7da4cd7479594d035c08c2d809eb4aab3a26e5a990ea98cc450c320f1f" ], - "version": "==2019.3.9" + "version": "==2019.11.28" }, "chardet": { "hashes": [ @@ -55,113 +65,117 @@ }, "colorama": { "hashes": [ - "sha256:05eed71e2e327246ad6b38c540c4a3117230b19679b875190486ddd2d721422d", - "sha256:f8ac84de7840f5b9c4e3347b3c1eaa50f7e49c2b07596221daec5edaabbd7c48" + "sha256:7d73d2a99753107a36ac6b455ee49046802e59d9d076ef8e47b61499fa29afff", + "sha256:e96da0d330793e2cb9485e9ddfd918d456036c7149416295932478192f4436a1" ], - "version": "==0.4.1" + "version": "==0.4.3" }, "cymem": { "hashes": [ - "sha256:081c652ae1aff4759813e93a2fc4df4ba410ce214a0e542988e24c62110d4cd0", - "sha256:0e447fa4cb6dccd0b96257a798370a17bef3ec254a527230058e41816a777c04", - "sha256:2c8267dcb15cc6ab318f01ceaf16b8440c0386ae44014d5b22fefe5b0398d05c", - "sha256:46141111eedbb5b0d8c9386b00226a15f5727a1202b9095f4363d425f259267e", - "sha256:4994c1f3e948bd58a6e38c905221680563b851983a15f1f01e5ff415d560d153", - "sha256:584872fd3df176e50c90e37aaca6cb731ac0abcdea4f5b8ad77c30674cfaaa99", - "sha256:6e3194135b21bb268030f3473beb8b674b356c330a9fa185dced2f5006cbd5ba", - "sha256:71710ee0e946a6bd33c86dd9e71f95ad584c65e8bb02615f00ceb0d8348fb303", - "sha256:741957f541fb8322de5a8c711d5d58f80d684225d2aec32fec92484cac931a52", - "sha256:7f01ba6153427811cd7d35630081c69b32c188a1d330599a826ef3bf17edbd7c", - "sha256:8d96e95902e781950d7c255b19364a1ed50a204843d63dd386b0abc5e6df5e44", - "sha256:8dd169ece1629ec4db1a592321e3ae0a9bb62fda2052a351fc36871f314c3569", - "sha256:8e6ad29636edd559b0dfe0a19c5cb5e6257461a5df90839e8c7710ddb005f4b4", - "sha256:9935b233882732f03fd0fadbeb9e9aa672edcdd126e6d52c36d60adf1def8ea5", - "sha256:a38b3229782411e4b23240f5f90000c4e7a834af88ed8763c66f8e4603db6b51", - "sha256:a5966b3171bad9c84a2b19dccda5ab37ae8437c0709a6b72cb42b64ea76a4bd3", - "sha256:ab88b1534f06df07262d9bc5efb3ba07948cdbe9a363eb9eaa4ad42fae6c7b5e", - "sha256:b08b0dd7adafbff9f0fd7dc8dcad5f3ce6f23c126c81ad8d1666880cc94e6974", - "sha256:ba47b571d480c0b76d282ff1634372070031d4998a46ae5d8305d49563b74ca6", - "sha256:bf049dc9cf0d3aa4a48ba514b7f1699fb6f35b18ad8c6f018bd13e0bccd9d30c", - "sha256:c46a122c524a3270ac5249f590ac2f75f1a83692a3d3a03479cea49de72a0a89", - "sha256:c63337aa7e1ad4ec182cc7847c6d85390589fbbf1f9f67d1fde8133a9acb7fa8", - "sha256:ec51273ea08a2c6389bc4dd6b5183354826d916b149a041f2f274431166191bc" - ], - "version": "==2.0.2" + "sha256:5083b2ab5fe13ced094a82e0df465e2dbbd9b1c013288888035e24fd6eb4ed01", + "sha256:622c20a57701d02f01a47e856dea248e112638f28c8249dbe3ed95a9702e3d74", + "sha256:6f4cb689a9552e9e13dccc89203c8ab09f210a7ffb92ce27c384a4a0be27b527", + "sha256:719f04a11ca709fc2b47868070d79fccff77e5d502ff32de2f4baa73cb16166f", + "sha256:7236252bed70f37b898933dcf8aa875d0829664a245a272516f27b30439df71c", + "sha256:7f5ddceb12b73f7fd2e4398266401b6f887003740ccd18c989a2af04500b5f2b", + "sha256:85b9364e099426bd7f445a7705aad87bf6dbb71d79e3802dd8ca14e181d38a33", + "sha256:c288a1bbdf58c360457443e5297e74844e1961e5e7001dbcb3a5297a41911a11", + "sha256:cd21ec48ee70878d46c486e2f7ae94b32bfc6b37c4d27876c5a5a00c4eb75c3c", + "sha256:d7505c500d994f11662e5595f5002251f572acc189f18944619352e2636f5181", + "sha256:dd24848fbd75b17bab06408da6c029ba7cc615bd9e4a1f755fb3a090025fb922", + "sha256:f4f19af4bca81f11922508a9dcf30ce1d2aee4972af9f81ce8e5331a6f46f5e1" + ], + "version": "==2.0.3" }, "cython": { "hashes": [ - "sha256:0ce8f6c789c907472c9084a44b625eba76a85d0189513de1497ab102a9d39ef8", - "sha256:0d67964b747ac09758ba31fe25da2f66f575437df5f121ff481889a7a4485f56", - "sha256:1630823619a87a814e5c1fa9f96544272ce4f94a037a34093fbec74989342328", - "sha256:1a4c634bb049c8482b7a4f3121330de1f1c1f66eac3570e1e885b0c392b6a451", - "sha256:1ec91cc09e9f9a2c3173606232adccc68f3d14be1a15a8c5dc6ab97b47b31528", - "sha256:237a8fdd8333f7248718875d930d1e963ffa519fefeb0756d01d91cbfadab0bc", - "sha256:28a308cbfdf9b7bb44def918ad4a26b2d25a0095fa2f123addda33a32f308d00", - "sha256:2fe3dde34fa125abf29996580d0182c18b8a240d7fa46d10984cc28d27808731", - "sha256:30bda294346afa78c49a343e26f3ab2ad701e09f6a6373f579593f0cfcb1235a", - "sha256:33d27ea23e12bf0d420e40c20308c03ef192d312e187c1f72f385edd9bd6d570", - "sha256:34d24d9370a6089cdd5afe56aa3c4af456e6400f8b4abb030491710ee765bafc", - "sha256:4e4877c2b96fae90f26ee528a87b9347872472b71c6913715ca15c8fe86a68c9", - "sha256:50d6f1f26702e5f2a19890c7bc3de00f9b8a0ec131b52edccd56a60d02519649", - "sha256:55d081162191b7c11c7bfcb7c68e913827dfd5de6ecdbab1b99dab190586c1e8", - "sha256:59d339c7f99920ff7e1d9d162ea309b35775172e4bab9553f1b968cd43b21d6d", - "sha256:6cf4d10df9edc040c955fca708bbd65234920e44c30fccd057ecf3128efb31ad", - "sha256:6ec362539e2a6cf2329cd9820dec64868d8f0babe0d8dc5deff6c87a84d13f68", - "sha256:7edc61a17c14b6e54d5317b0300d2da23d94a719c466f93cafa3b666b058c43b", - "sha256:8e37fc4db3f2c4e7e1ed98fe4fe313f1b7202df985de4ee1451d2e331332afae", - "sha256:b8c996bde5852545507bff45af44328fa48a7b22b5bec2f43083f0b8d1024fd9", - "sha256:bf9c16f3d46af82f89fdefc0d64b2fb02f899c20da64548a8ea336beefcf8d23", - "sha256:c1038aba898bed34ab1b5ddb0d3f9c9ae33b0649387ab9ffe6d0af677f66bfc1", - "sha256:d405649c1bfc42e20d86178257658a859a3217b6e6d950ee8cb76353fcea9c39", - "sha256:db6eeb20a3bd60e1cdcf6ce9a784bc82aec6ab891c800dc5d7824d5cfbfe77f2", - "sha256:e382f8cb40dca45c3b439359028a4b60e74e22d391dc2deb360c0b8239d6ddc0", - "sha256:f3f6c09e2c76f2537d61f907702dd921b04d1c3972f01d5530ef1f748f22bd89", - "sha256:f749287087f67957c020e1de26906e88b8b0c4ea588facb7349c115a63346f67", - "sha256:f86b96e014732c0d1ded2c1f51444c80176a98c21856d0da533db4e4aef54070" + "sha256:03f6bbb380ad0acb744fb06e42996ea217e9d00016ca0ff6f2e7d60f580d0360", + "sha256:05e8cfd3a3a6087aec49a1ae08a89171db991956209406d1e5576f9db70ece52", + "sha256:05eb79efc8029d487251c8a2702a909a8ba33c332e06d2f3980866541bd81253", + "sha256:094d28a34c3fa992ae02aea1edbe6ff89b3cc5870b6ee38b5baeb805dc57b013", + "sha256:0c70e842e52e2f50cc43bad43b5e5bc515f30821a374e544abb0e0746f2350ff", + "sha256:1dcdaa319558eb924294a554dcf6c12383ec947acc7e779e8d3622409a7f7d28", + "sha256:1fc5bdda28f25fec44e4721677458aa509d743cd350862270309d61aa148d6ff", + "sha256:280573a01d9348d44a42d6a9c651d9f7eb1fe9217df72555b2a118f902996a10", + "sha256:298ceca7b0f0da4205fcb0b7c9ac9e120e2dafffd5019ba1618e84ef89434b5a", + "sha256:4074a8bff0040035673cc6dd365a762476d6bff4d03d8ce6904e3e53f9a25dc8", + "sha256:41e7068e95fbf9ec94b41437f989caf9674135e770a39cdb9c00de459bafd1bc", + "sha256:47e5e1502d52ef03387cf9d3b3241007961a84a466e58a3b74028e1dd4957f8c", + "sha256:521340844cf388d109ceb61397f3fd5250ccb622a1a8e93559e8de76c80940a9", + "sha256:6c53338c1811f8c6d7f8cb7abd874810b15045e719e8207f957035c9177b4213", + "sha256:75c2dda47dcc3c77449712b1417bb6b89ec3b7b02e18c64262494dceffdf455e", + "sha256:773c5a98e463b52f7e8197254b39b703a5ea1972aef3a94b3b921515d77dd041", + "sha256:78c3068dcba300d473fef57cdf523e34b37de522f5a494ef9ee1ac9b4b8bbe3f", + "sha256:7bc18fc5a170f2c1cef5387a3d997c28942918bbee0f700e73fd2178ee8d474d", + "sha256:7f89eff20e4a7a64b55210dac17aea711ed8a3f2e78f2ff784c0e984302583dd", + "sha256:89458b49976b1dee5d89ab4ac943da3717b4292bf624367e862e4ee172fcce99", + "sha256:986f871c0fa649b293061236b93782d25c293a8dd8117c7ba05f8a61bdc261ae", + "sha256:a0f495a4fe5278aab278feee35e6102efecde5176a8a74dd28c28e3fc5c8d7c7", + "sha256:a14aa436586c41633339415de82a41164691d02d3e661038da533be5d40794a5", + "sha256:b8ab3ab38afc47d8f4fe629b836243544351cef681b6bdb1dc869028d6fdcbfb", + "sha256:bb487881608ebd293592553c618f0c83316f4f13a64cb18605b1d2fb9fd3da3e", + "sha256:c0b24bfe3431b3cb7ced323bca813dbd13aca973a1475b512d3331fd0de8ec60", + "sha256:c7894c06205166d360ab2915ae306d1f7403e9ce3d3aaeff4095eaf98e42ce66", + "sha256:d4039bb7f234ad32267c55e72fd49fb56078ea102f9d9d8559f6ec34d4887630", + "sha256:e4d6bb8703d0319eb04b7319b12ea41580df44fd84d83ccda13ea463c6801414", + "sha256:e8fab9911fd2fa8e5af407057cb8bdf87762f983cba483fa3234be20a9a0af77", + "sha256:f3818e578e687cdb21dc4aa4a3bc6278c656c9c393e9eda14dd04943f478863d", + "sha256:fe666645493d72712c46e4fbe8bec094b06aec3c337400479e9704439c9d9586" ], "index": "pypi", - "version": "==0.29.7" + "version": "==0.29.14" }, "en-core-web-lg": { - "file": "https://github.com/explosion/spacy-models/releases/download/en_core_web_lg-2.1.0/en_core_web_lg-2.1.0.tar.gz" + "file": "https://github.com/explosion/spacy-models/releases/download/en_core_web_lg-2.2.5/en_core_web_lg-2.2.5.tar.gz" }, "grpcio": { "hashes": [ - "sha256:0442f7d0c527ceab6a76159937ae8109941eace90ec00cb1bd08fc4f3179e52e", - "sha256:051957d0f61f4dec90868a54ee969228409926a0a19fd8ed7b4a0e50388effee", - "sha256:0d262794b2339770d5378a5717f8ddbfb68e409974582f0503272b90b7cc79bd", - "sha256:142693dc8bd427c595d030f75bf8d01c843d9ccb659499e8507ad22da832e9cf", - "sha256:18d44515a3fd3a71442abb5a1c65fc1909d859c13cda50c974cbc69742a80cea", - "sha256:1d50674bdffa18ea6143e0df9a1b97cdeab583ce5dd1cabda3502ee75215065c", - "sha256:3945335a5b8332995415c5f03da1a5f6e36da6ede819a611e2cbb093cf752bdd", - "sha256:3a9603ff14070524f4c69634afad6b280b07ad9f8c2c346c4b2290306e1928ac", - "sha256:52861aac5c1dcf4c841eb555b257cfb56d0c840a286495078382f538d0a34d6a", - "sha256:53c512c7c8af9cb9e3e1cc5ce5e4a5fb2f2e7695e69219f90016bc602abe2f3b", - "sha256:57ea92c9b81015e5f2cc355e53f08a4e661b78a207857311c7b8c55137a43b29", - "sha256:5f8574c9e42d1917e41cdedc6312682a96e4547114c7bb0f3de125199a58b3d6", - "sha256:638ff1a45dd7a226b2b9390296a111142363fe2b5503499f3987d599bce0683c", - "sha256:64fe0dc897f1f19a6500948862857cb3b97247be997bc47b4dbade42f8af5f97", - "sha256:67920ec7d2de89845e5232aed41271ef53e1a362c8ffb84f6a6c6e644a75ce3a", - "sha256:714cddc170efeedf6312d8534ef7f52dcf20dd8f5fb7c5e425c2b6819ac1b9ec", - "sha256:7edf33e929b1666ff68bfc280b9021a862ab423d0e6306889cc2bc7c907dfc27", - "sha256:84eb47b1a47e206e78f453fb92a155ed0d18d2ca8747f5c67e4b50b9c37180a7", - "sha256:8a6289e5c38318cba75115f0bf88be166ead40c83c10dd81ace52f1ab5dc1eab", - "sha256:8bd5b8c3c8872da748dc8810b664699a5f1d49f2c9ab2b205b96ec9fe06741ad", - "sha256:93e7672348d4c68ac570c499a794ff4453a1928c39cbe708472a0e1b77176411", - "sha256:9d37fb214674f0f194a80df5ad0b9c9b9f2fa5c5408ceaf0fc796e57588404d9", - "sha256:9de6746a749634004499bac773ad9877d84d826aca2dc14ba4ebd3cd9f64ed74", - "sha256:9e530c69d6e566ca985193a63363af36a7560a23f4979df6e392bb1bdf05caed", - "sha256:b37f36da8f4d0bf07d53eb34395b68f5e0dc0bcee207affde9ba29bbf6bd6ced", - "sha256:cf9b57d139e44eab294ab31eb0181150d877440a8a321bb4422e2c09f6c7a7d9", - "sha256:dd716aab42be3d1fde74577e42b6319b6399b07d418e49b653e0e1bcd88399bc", - "sha256:dea43aa864edc3b3d8de1f6e40144119fbccdf04525b3ece4fef9392b6eed436", - "sha256:e6cbd27559ff91c98991b8ec4ef19f394bf9056d6897aabb9af79568307181d3", - "sha256:f58e3377da8e8e453068dffc00d17691a97ffd1c3a5a7460b890cf83a9ca6edf", - "sha256:f938fdfb780a0658d04e1d727b4fb470490087c56cb31ba75cb54fb4bea515bd", - "sha256:fee4accad7a113004aef226b851f0494c01fc8d281fdebd74468f19cc45354a0" + "sha256:066630f6b62bffa291dacbee56994279a6a3682b8a11967e9ccaf3cc770fc11e", + "sha256:07e95762ca6b18afbeb3aa2793e827c841152d5e507089b1db0b18304edda105", + "sha256:0a0fb2f8e3a13537106bc77e4c63005bc60124a6203034304d9101921afa4e90", + "sha256:0c61b74dcfb302613926e785cb3542a0905b9a3a86e9410d8cf5d25e25e10104", + "sha256:13383bd70618da03684a8aafbdd9e3d9a6720bf8c07b85d0bc697afed599d8f0", + "sha256:1c6e0f6b9d091e3717e9a58d631c8bb4898be3b261c2a01fe46371fdc271052f", + "sha256:1cf710c04689daa5cc1e598efba00b028215700dcc1bf66fcb7b4f64f2ea5d5f", + "sha256:2da5cee9faf17bb8daf500cd0d28a17ae881ab5500f070a6aace457f4c08cac4", + "sha256:2f78ebf340eaf28fa09aba0f836a8b869af1716078dfe8f3b3f6ff785d8f2b0f", + "sha256:33a07a1a8e817d733588dbd18e567caad1a6fe0d440c165619866cd490c7911a", + "sha256:3d090c66af9c065b7228b07c3416f93173e9839b1d40bb0ce3dd2aa783645026", + "sha256:42b903a3596a10e2a3727bae2a76f8aefd324d498424b843cfa9606847faea7b", + "sha256:4fffbb58134c4f23e5a8312ac3412db6f5e39e961dc0eb5e3115ce5aa16bf927", + "sha256:57be5a6c509a406fe0ffa6f8b86904314c77b5e2791be8123368ad2ebccec874", + "sha256:5b0fa09efb33e2af4e8822b4eb8b2cbc201d562e3e185c439be7eaeee2e8b8aa", + "sha256:5ef42dfc18f9a63a06aca938770b69470bb322e4c137cf08cf21703d1ef4ae5c", + "sha256:6a43d2f2ff8250f200fdf7aa31fa191a997922aa9ea1182453acd705ad83ab72", + "sha256:6d8ab28559be98b02f8b3a154b53239df1aa5b0d28ff865ae5be4f30e7ed4d3f", + "sha256:6e47866b7dc14ca3a12d40c1d6082e7bea964670f1c5315ea0fb8b0550244d64", + "sha256:6edda1b96541187f73aab11800d25f18ee87e53d5f96bb74473873072bf28a0e", + "sha256:7109c8738a8a3c98cfb5dda1c45642a8d6d35dc00d257ab7a175099b2b4daecd", + "sha256:8d866aafb08657c456a18c4a31c8526ea62de42427c242b58210b9eae6c64559", + "sha256:9939727d9ae01690b24a2b159ac9dbca7b7e8e6edd5af6a6eb709243cae7b52b", + "sha256:99fd873699df17cb11c542553270ae2b32c169986e475df0d68a8629b8ef4df7", + "sha256:b6fda5674f990e15e1bcaacf026428cf50bce36e708ddcbd1de9673b14aab760", + "sha256:bdb2f3dcb664f0c39ef1312cd6acf6bc6375252e4420cf8f36fff4cb4fa55c71", + "sha256:bfd7d3130683a1a0a50c456273c21ec8a604f2d043b241a55235a78a0090ee06", + "sha256:c6c2db348ac73d73afe14e0833b18abbbe920969bf2c5c03c0922719f8020d06", + "sha256:cb7a4b41b5e2611f85c3402ac364f1d689f5d7ecbc24a55ef010eedcd6cf460f", + "sha256:cd3d3e328f20f7c807a862620c6ee748e8d57ba2a8fc960d48337ed71c6d9d32", + "sha256:d1a481777952e4f99b8a6956581f3ee866d7614100d70ae6d7e07327570b85ce", + "sha256:d1d49720ed636920bb3d74cedf549382caa9ad55aea89d1de99d817068d896b2", + "sha256:d42433f0086cccd192114343473d7dbd4aae9141794f939e2b7b83efc57543db", + "sha256:d44c34463a7c481e076f691d8fa25d080c3486978c2c41dca09a8dd75296c2d7", + "sha256:d7e5b7af1350e9c8c17a7baf99d575fbd2de69f7f0b0e6ebd47b57506de6493a", + "sha256:d9542366a0917b9b48bab1fee481ac01f56bdffc52437b598c09e7840148a6a9", + "sha256:df7cdfb40179acc9790a462c049e0b8e109481164dd7ad1a388dd67ff1528759", + "sha256:e1a9d9d2e7224d981aea8da79260c7f6932bf31ce1f99b7ccfa5eceeb30dc5d0", + "sha256:ed10e5fad105ecb0b12822f924e62d0deb07f46683a0b64416b17fd143daba1d", + "sha256:f0ec5371ce2363b03531ed522bfbe691ec940f51f0e111f0500fc0f44518c69d", + "sha256:f6580a8a4f5e701289b45fd62a8f6cb5ec41e4d77082424f8b676806dcd22564", + "sha256:f7b83e4b2842d44fce3cdc0d54db7a7e0d169a598751bf393601efaa401c83e0", + "sha256:ffec45b0db18a555fdfe0c6fa2d0a3fceb751b22b31e8fcd14ceed7bde05481e" ], "index": "pypi", - "version": "==1.20.1" + "version": "==1.26.0" }, "idna": { "hashes": [ @@ -170,6 +184,14 @@ ], "version": "==2.8" }, + "importlib-metadata": { + "hashes": [ + "sha256:073a852570f92da5f744a3472af1b61e28e9f78ccf0c9117658dc32b15de7b45", + "sha256:d95141fbfa7ef2ec65cfd945e2af7e5a6ddbd7c8d9a25e66ff3be8e3daf9f60f" + ], + "markers": "python_version < '3.8'", + "version": "==1.3.0" + }, "jmespath": { "hashes": [ "sha256:3720a4b1bd659dd2eecad0666459b9788813e032b83e7ba58578e48254e0a0e6", @@ -177,20 +199,20 @@ ], "version": "==0.9.4" }, - "jsonschema": { + "knack": { "hashes": [ - "sha256:000e68abd33c972a5248544925a0cae7d1125f9bf6c58280d37546b946769a08", - "sha256:6ff5f3180870836cae40f06fa10419f557208175f13ad7bc26caa77beb1f6e02" + "sha256:b1ac92669641b902e1aef97138666a21b8852f65d83cbde03eb9ddebf82ce121", + "sha256:bd240163d4e2ce9fc8535f77519358da0afd6c0ca19f001c639c3160b57630a9" ], - "version": "==2.6.0" + "index": "pypi", + "version": "==0.6.3" }, - "knack": { + "more-itertools": { "hashes": [ - "sha256:2a4b4d86c4700dd6714e5b4ca6bbca6baf2c827d9de28ca2b66640988c1b6ff4", - "sha256:7f17d4a1b34ea76821d3504f5f0f8c1b75bd9f08497db6a5864677214ac76adc" + "sha256:b84b238cce0d9adad5ed87e745778d20a3f8487d0f0cb8b8a586816c7496458d", + "sha256:c833ef592a0324bcc6a60e48440da07645063c453880c9477ceb22490aec1564" ], - "index": "pypi", - "version": "==0.6.1" + "version": "==8.0.2" }, "murmurhash": { "hashes": [ @@ -200,150 +222,156 @@ "sha256:3af36a0dc9f13f6892d9b8b39a6a3ccf216cae5bce38adc7c2d145677987772f", "sha256:717196a04cdc80cc3103a3da17b2415a8a5e1d0d578b7079259386bf153b3258", "sha256:8a4ed95cd3456b43ea301679c7c39ade43fc18b844b37d0ba0ac0d6acbff8e0c", + "sha256:8b045a79e8b621b4b35b29f29e33e9e0964f3a276f7da4d5736142f322ad4842", "sha256:a6c071b4b498bcea16a8dc8590cad81fa8d43821f34c74bc00f96499e2527073", "sha256:b0afe329701b59d02e56bc6cee7325af83e3fee9c299c615fc1df3202b4f886f", "sha256:ba766343bdbcb928039b8fff609e80ae7a5fd5ed7a4fc5af822224b63e0cbaff", "sha256:bf33490514d308bcc27ed240cb3eb114f1ec31af031535cd8f27659a7049bd52", "sha256:c7a646f6b07b033642b4f52ae2e45efd8b80780b3b90e8092a0cec935fbf81e2", + "sha256:cc97ea766ac545074bab0e5af3dbc48e0d05ba230ae5a404e284d39abe4b3baf", "sha256:d696c394ebd164ca80b5871e2e9ad2f9fdbb81bd3c552c1d5f1e8ee694e6204a", + "sha256:f468e4868f78c3ac202a66abfe2866414bca4ae7666a21ef0938c423de0f7d50", "sha256:fe344face8d30a5a6aa26e5acf288aa2a8f0f32e05efdda3d314b4bf289ec2af" ], "version": "==1.0.2" }, "numpy": { "hashes": [ - "sha256:0e2eed77804b2a6a88741f8fcac02c5499bba3953ec9c71e8b217fad4912c56c", - "sha256:1c666f04553ef70fda54adf097dbae7080645435fc273e2397f26bbf1d127bbb", - "sha256:1f46532afa7b2903bfb1b79becca2954c0a04389d19e03dc73f06b039048ac40", - "sha256:315fa1b1dfc16ae0f03f8fd1c55f23fd15368710f641d570236f3d78af55e340", - "sha256:3d5fcea4f5ed40c3280791d54da3ad2ecf896f4c87c877b113576b8280c59441", - "sha256:48241759b99d60aba63b0e590332c600fc4b46ad597c9b0a53f350b871ef0634", - "sha256:4b4f2924b36d857cf302aec369caac61e43500c17eeef0d7baacad1084c0ee84", - "sha256:54fe3b7ed9e7eb928bbc4318f954d133851865f062fa4bbb02ef8940bc67b5d2", - "sha256:5a8f021c70e6206c317974c93eaaf9bc2b56295b6b1cacccf88846e44a1f33fc", - "sha256:754a6be26d938e6ca91942804eb209307b73f806a1721176278a6038869a1686", - "sha256:771147e654e8b95eea1293174a94f34e2e77d5729ad44aefb62fbf8a79747a15", - "sha256:78a6f89da87eeb48014ec652a65c4ffde370c036d780a995edaeb121d3625621", - "sha256:7fde5c2a3a682a9e101e61d97696687ebdba47637611378b4127fe7e47fdf2bf", - "sha256:80d99399c97f646e873dd8ce87c38cfdbb668956bbc39bc1e6cac4b515bba2a0", - "sha256:88a72c1e45a0ae24d1f249a529d9f71fe82e6fa6a3fd61414b829396ec585900", - "sha256:a4f4460877a16ac73302a9c077ca545498d9fe64e6a81398d8e1a67e4695e3df", - "sha256:a61255a765b3ac73ee4b110b28fccfbf758c985677f526c2b4b39c48cc4b509d", - "sha256:ab4896a8c910b9a04c0142871d8800c76c8a2e5ff44763513e1dd9d9631ce897", - "sha256:abbd6b1c2ef6199f4b7ca9f818eb6b31f17b73a6110aadc4e4298c3f00fab24e", - "sha256:b16d88da290334e33ea992c56492326ea3b06233a00a1855414360b77ca72f26", - "sha256:b78a1defedb0e8f6ae1eb55fa6ac74ab42acc4569c3a2eacc2a407ee5d42ebcb", - "sha256:cfef82c43b8b29ca436560d51b2251d5117818a8d1fb74a8384a83c096745dad", - "sha256:d160e57731fcdec2beda807ebcabf39823c47e9409485b5a3a1db3a8c6ce763e" - ], - "version": "==1.16.3" + "sha256:03bbde29ac8fba860bb2c53a1525b3604a9b60417855ac3119d89868ec6041c3", + "sha256:1baefd1fb4695e7f2e305467dbd876d765e6edd30c522894df76f8301efaee36", + "sha256:1c35fb1131362e6090d30286cfda52ddd42e69d3e2bf1fea190a0fad83ea3a18", + "sha256:3c68c827689ca0ca713dba598335073ce0966850ec0b30715527dce4ecd84055", + "sha256:443ab93fc35b31f01db8704681eb2fd82f3a1b2fa08eed2dd0e71f1f57423d4a", + "sha256:56710a756c5009af9f35b91a22790701420406d9ac24cf6b652b0e22cfbbb7ff", + "sha256:62506e9e4d2a39c87984f081a2651d4282a1d706b1a82fe9d50a559bb58e705a", + "sha256:6f8113c8dbfc192b58996ee77333696469ea121d1c44ea429d8fd266e4c6be51", + "sha256:712f0c32555132f4b641b918bdb1fd3c692909ae916a233ce7f50eac2de87e37", + "sha256:854f6ed4fa91fa6da5d764558804ba5b0f43a51e5fe9fc4fdc93270b052f188a", + "sha256:88c5ccbc4cadf39f32193a5ef22e3f84674418a9fd877c63322917ae8f295a56", + "sha256:905cd6fa6ac14654a6a32b21fad34670e97881d832e24a3ca32e19b455edb4a8", + "sha256:9d6de2ad782aae68f7ed0e0e616477fbf693d6d7cc5f0f1505833ff12f84a673", + "sha256:a30f5c3e1b1b5d16ec1f03f4df28e08b8a7529d8c920bbed657f4fde61f1fbcd", + "sha256:a9d72d9abaf65628f0f31bbb573b7d9304e43b1e6bbae43149c17737a42764c4", + "sha256:ac3cf835c334fcc6b74dc4e630f9b5ff7b4c43f7fb2a7813208d95d4e10b5623", + "sha256:b091e5d4cbbe79f0e8b6b6b522346e54a282eadb06e3fd761e9b6fafc2ca91ad", + "sha256:cc070fc43a494e42732d6ae2f6621db040611c1dde64762a40c8418023af56d7", + "sha256:e1080e37c090534adb2dd7ae1c59ee883e5d8c3e63d2a4d43c20ee348d0459c5", + "sha256:f084d513de729ff10cd72a1f80db468cff464fedb1ef2fea030221a0f62d7ff4", + "sha256:f6a7421da632fc01e8a3ecd19c3f7350258d82501a646747664bae9c6a87c731" + ], + "version": "==1.18.0" }, "plac": { "hashes": [ - "sha256:854693ad90367e8267112ffbb8955f57d6fdeac3191791dc9ffce80f87fd2370", - "sha256:ba3f719a018175f0a15a6b04e6cc79c25fd563d348aacd320c3644d2a9baf89b" + "sha256:398cb947c60c4c25e275e1f1dadf027e7096858fb260b8ece3b33bcff90d985f", + "sha256:487e553017d419f35add346c4c09707e52fa53f7e7181ce1098ca27620e9ceee" ], - "version": "==0.9.6" + "version": "==1.1.3" }, "preshed": { "hashes": [ - "sha256:0c9af79c7b825793f987d477627efb81afd23384ac791bebbc88a257342a77ab", - "sha256:0ebc79431154bc5d12f97b3c93bc350af941702a44f0761dfcd395e970d693f8", - "sha256:102e71dc841c979b2ece44ab05b2b0aa39c8039493ddac40dd22cf23e2484063", - "sha256:15145b24eded01426544be829a6395d6c99e2d62f5f3b88a6e19087ebeef7237", - "sha256:195674dfb4bcf18b26e448feaabdf61adcf028ae69ecaa075c0bdfaf62a19671", - "sha256:38f7fbef59f89d3b2c8c3b102f9a7360cd73a33c829fdeb101c615b18ecc4686", - "sha256:3aa411233dc230247ea4c4558062e5b2d59d41c697107a45fddbfe03e63f3e77", - "sha256:3b8c7b607e6dce0843544cfe4f05355db0516fce8eca0c37d6b5f4f3680493bf", - "sha256:4bda4153d46a603bc6ea65380dfa091d46700f664cb906c7f26a469be6c2a503", - "sha256:541d7ed765d67512d6f9fa24fd01cc1d7a51c7ff2646362924f4db46813b485a", - "sha256:593d23b9f851ae7a4d519ca4489dd2b352d833e08f5d35795d42a591b8badb54", - "sha256:7f6fb8f4108abe958af892847ed50abe6f45aaf45a87853cc8154a7203e75d84", - "sha256:7ff7f18af1f19ea666ac4fbf48842e6acd900fbfdc26bb9aad02f353ff932386", - "sha256:9c0d503d8693bf1e08e0fa1cecbcd3253146abaa9a7501d7d583a72edd29fdd1", - "sha256:9cefe818a97134c0ddf22ef76fced1c841ebd137c2895251c5d1310276c234b5", - "sha256:9e603916a95dc524081d54c0a135611e6f68d787185d5df2b5ab3f076c3d1bd4", - "sha256:a2acacceac79aa6d4b65125e20c7de78fbca1340a251854c87967acef1795490", - "sha256:a3d592e7b265b4faf08c9b4d7493b9e8604e0ba8858cc9bd8c9aee41d3df2a3a", - "sha256:b2030e68c6f539e6dd7bfcea032940042739ef05d50a2eb1d7af24e038971b0f", - "sha256:bc894dc14d8567a5d6a1cded0a701da7fbb360b2124237fe8acde85333825aef", - "sha256:c21d4d10cc0248ba3facbbbfbe63211ce921478a3d5db6de34de39ee1b3484e1", - "sha256:dae01c74313965c487e0ec839e5f28d0c7df9bfd1d978aa5bada3f72ff20a9e5", - "sha256:ee8068035684a4b382bebb3a3f270799360545baff9742b85e627a0a889e6850" - ], - "version": "==2.0.1" + "sha256:0c15ae62f2595ca479decc3452967484dae57b510278800f5deb9115238cc818", + "sha256:190345724eb3f7aeaeb2a758740d698bd6c017c2cdf07c71c16b34820973d114", + "sha256:1be3cb59211282e906a11443464fe3e19f6561e2fcd06410e4adc6d45354cf82", + "sha256:1ef72a120e49356058b3c0590d7b5e91f2747b44e006eef6579be6131223cab0", + "sha256:253970beae87ab672a6afb543908761795eea3cb7b0d784e2ea51e265752059e", + "sha256:448d9df12e63fe4a3024f6153ee6703bb95d2be0ce887b5eda7ddc41acfba825", + "sha256:61d73468c97c1d6d5a048de0b01d5a6fd052123358aca4823cdb277e436436cb", + "sha256:633358f1fb0ec5dd6dbe4971c328d08809e5a8dbefdf13a802ae0a7cb45306c7", + "sha256:6518bbd5fb8adbc3231e75ae78d96a7bdd5405a3b23a09d5e62a2e4fc833724e", + "sha256:7e80ffc1fb79496d4feafe0eaf71ee5e532b91daf6cec235d7f9c4c12657a58c", + "sha256:7ea588a78aaf310ae2c293071a8571b07ae434819be05fe510442b6df3f8fbf7", + "sha256:88427346b220293439db77c82913791fa13edc6ac73d8159610699a3ca17aae9", + "sha256:8a9a8222a697a513f25a94733e7a17cc298ecd8fd56b606a1d8fa0ac342c2830", + "sha256:b4ae6c7c44aa3ff7bd717791bb6b619ecb273b7cb128c986f2dc65f6e0e6ddd4", + "sha256:e37058d91bd7f0f5a7a9c83d22a83dc581ab5f79688a87be81f200993145a250", + "sha256:ece5e850f667eaa3367d5c56dda9e3aa6ac1c0bb2117d2f466a26db5f26bbe4b" + ], + "version": "==3.0.2" }, "protobuf": { "hashes": [ - "sha256:21e395d7959551e759d604940a115c51c6347d90a475c9baf471a1a86b5604a9", - "sha256:57e05e16955aee9e6a0389fcbd58d8289dd2420e47df1a1096b3a232c26eb2dd", - "sha256:67819e8e48a74c68d87f25cad9f40edfe2faf278cdba5ca73173211b9213b8c9", - "sha256:75da7d43a2c8a13b0bc7238ab3c8ae217cbfd5979d33b01e98e1f78defb2d060", - "sha256:78e08371e236f193ce947712c072542ff19d0043ab5318c2ea46bbc2aaebdca6", - "sha256:7ee5b595db5abb0096e8c4755e69c20dfad38b2d0bcc9bc7bafc652d2496b471", - "sha256:86260ecfe7a66c0e9d82d2c61f86a14aa974d340d159b829b26f35f710f615db", - "sha256:92c77db4bd33ea4ee5f15152a835273f2338a5246b2cbb84bab5d0d7f6e9ba94", - "sha256:9c7b90943e0e188394b4f068926a759e3b4f63738190d1ab3d500d53b9ce7614", - "sha256:a77f217ea50b2542bae5b318f7acee50d9fc8c95dd6d3656eaeff646f7cab5ee", - "sha256:ad589ed1d1f83db22df867b10e01fe445516a5a4d7cfa37fe3590a5f6cfc508b", - "sha256:b06a794901bf573f4b2af87e6139e5cd36ac7c91ac85d7ae3fe5b5f6fc317513", - "sha256:bd8592cc5f8b4371d0bad92543370d4658dc41a5ccaaf105597eb5524c616291", - "sha256:be48e5a6248a928ec43adf2bea037073e5da692c0b3c10b34f9904793bd63138", - "sha256:cc5eb13f5ccc4b1b642cc147c2cdd121a34278b341c7a4d79e91182fff425836", - "sha256:cd3b0e0ad69b74ee55e7c321f52a98effed2b4f4cc9a10f3683d869de00590d5", - "sha256:d6e88c4920660aa75c0c2c4b53407aef5efd9a6e0ca7d2fc84d79aba2ccbda3a", - "sha256:ec3c49b6d247152e19110c3a53d9bb4cf917747882017f70796460728b02722e" + "sha256:0329e86a397db2a83f9dcbe21d9be55a47f963cdabc893c3a24f4d3a8f117c37", + "sha256:0a7219254afec0d488211f3d482d8ed57e80ae735394e584a98d8f30a8c88a36", + "sha256:14d6ac53df9cb5bb87c4f91b677c1bc5cec9c0fd44327f367a3c9562de2877c4", + "sha256:180fc364b42907a1d2afa183ccbeffafe659378c236b1ec3daca524950bb918d", + "sha256:3d7a7d8d20b4e7a8f63f62de2d192cfd8b7a53c56caba7ece95367ca2b80c574", + "sha256:3f509f7e50d806a434fe4a5fbf602516002a0f092889209fff7db82060efffc0", + "sha256:4571da974019849201fc1ec6626b9cea54bd11b6bed140f8f737c0a33ea37de5", + "sha256:56bd1d84fbf4505c7b73f04de987eef5682e5752c811141b0186a3809bfb396f", + "sha256:680c668d00b5eff08b86aef9e5ba9a705e621ea05d39071cfea8e28cb2400946", + "sha256:6b5b947dc8b3f2aec0eaad65b0b5113fcd642c358c31357c647da6281ee31104", + "sha256:6e96dffaf4d0a9a329e528b353ba62fd9ef13599688723d96bc9c165d0b6871e", + "sha256:919f0d6f6addc836d08658eba3b52be2e92fd3e76da3ce00c325d8e9826d17c7", + "sha256:9c7b19c30cf0644afd0e4218b13f637ce54382fdcb1c8f75bf3e84e49a5f6d0a", + "sha256:a2e6f57114933882ec701807f217df2fb4588d47f71f227c0a163446b930d507", + "sha256:a6b970a2eccfcbabe1acf230fbf112face1c4700036c95e195f3554d7bcb04c1", + "sha256:bc45641cbcdea068b67438244c926f9fd3e5cbdd824448a4a64370610df7c593", + "sha256:d61b14a9090da77fe87e38ba4c6c43d3533dcbeb5d84f5474e7ac63c532dcc9c", + "sha256:d6faf5dbefb593e127463f58076b62fcfe0784187be8fe1aa9167388f24a22a1" ], "index": "pypi", - "version": "==3.7.1" + "version": "==3.11.2" }, "pygments": { "hashes": [ - "sha256:5ffada19f6203563680669ee7f53b64dabbeb100eb51b61996085e99c03b284a", - "sha256:e8218dd399a61674745138520d0d4cf2621d7e032439341bc3f647bff125818d" + "sha256:2a3fe295e54a20164a9df49c75fa58526d3be48e14aceba6d6b1e8ac0bfd6f1b", + "sha256:98c8aa5a9f778fcd1026a17361ddaf7330d1b7c62ae97c3bb0ae73e0b9b6b0fe" ], - "version": "==2.3.1" + "version": "==2.5.2" }, "pyre2": { "file": "https://github.com/torosent/pyre2/archive/release/0.2.23.zip" }, "pyyaml": { "hashes": [ - "sha256:1adecc22f88d38052fb787d959f003811ca858b799590a5eaa70e63dca50308c", - "sha256:436bc774ecf7c103814098159fbb84c2715d25980175292c648f2da143909f95", - "sha256:460a5a4248763f6f37ea225d19d5c205677d8d525f6a83357ca622ed541830c2", - "sha256:5a22a9c84653debfbf198d02fe592c176ea548cccce47553f35f466e15cf2fd4", - "sha256:7a5d3f26b89d688db27822343dfa25c599627bc92093e788956372285c6298ad", - "sha256:9372b04a02080752d9e6f990179a4ab840227c6e2ce15b95e1278456664cf2ba", - "sha256:a5dcbebee834eaddf3fa7366316b880ff4062e4bcc9787b78c7fbb4a26ff2dd1", - "sha256:aee5bab92a176e7cd034e57f46e9df9a9862a71f8f37cad167c6fc74c65f5b4e", - "sha256:c51f642898c0bacd335fc119da60baae0824f2cde95b0330b56c0553439f0673", - "sha256:c68ea4d3ba1705da1e0d85da6684ac657912679a649e8868bd850d2c299cce13", - "sha256:e23d0cc5299223dcc37885dae624f382297717e459ea24053709675a976a3e19" + "sha256:0e7f69397d53155e55d10ff68fdfb2cf630a35e6daf65cf0bdeaf04f127c09dc", + "sha256:2e9f0b7c5914367b0916c3c104a024bb68f269a486b9d04a2e8ac6f6597b7803", + "sha256:35ace9b4147848cafac3db142795ee42deebe9d0dad885ce643928e88daebdcc", + "sha256:38a4f0d114101c58c0f3a88aeaa44d63efd588845c5a2df5290b73db8f246d15", + "sha256:483eb6a33b671408c8529106df3707270bfacb2447bf8ad856a4b4f57f6e3075", + "sha256:4b6be5edb9f6bb73680f5bf4ee08ff25416d1400fbd4535fe0069b2994da07cd", + "sha256:7f38e35c00e160db592091751d385cd7b3046d6d51f578b29943225178257b31", + "sha256:8100c896ecb361794d8bfdb9c11fce618c7cf83d624d73d5ab38aef3bc82d43f", + "sha256:c0ee8eca2c582d29c3c2ec6e2c4f703d1b7f1fb10bc72317355a746057e7346c", + "sha256:e4c015484ff0ff197564917b4b4246ca03f411b9bd7f16e02a2f586eb48b6d04", + "sha256:ebc4ed52dcc93eeebeae5cf5deb2ae4347b3a81c3fa12b0b8c976544829396a4" ], - "version": "==5.1" + "version": "==5.2" }, "regex": { "hashes": [ - "sha256:020429dcf9b76cc7648a99c81b3a70154e45afebc81e0b85364457fe83b525e4", - "sha256:0552802b1c3f3c7e4fee8c85e904a13c48226020aa1a0593246888a1ac55aaaf", - "sha256:308965a80b92e1fec263ac1e4f1094317809a72bc4d26be2ec8a5fd026301175", - "sha256:4d627feef04eb626397aa7bdec772774f53d63a1dc7cc5ee4d1bd2786a769d19", - "sha256:93d1f9fcb1d25e0b4bd622eeba95b080262e7f8f55e5b43c76b8a5677e67334c", - "sha256:c3859bbf29b1345d694f069ddfe53d6907b0393fda5e3794c800ad02902d78e9", - "sha256:d56ce4c7b1a189094b9bee3b81c4aeb3f1ba3e375e91627ec8561b6ab483d0a8", - "sha256:ebc5ef4e10fa3312fa1967dc0a894e6bd985a046768171f042ac3974fadc9680", - "sha256:f9cd39066048066a4abe4c18fb213bc541339728005e72263f023742fb912585" + "sha256:032fdcc03406e1a6485ec09b826eac78732943840c4b29e503b789716f051d8d", + "sha256:0e6cf1e747f383f52a0964452658c04300a9a01e8a89c55ea22813931b580aa8", + "sha256:106e25a841921d8259dcef2a42786caae35bc750fb996f830065b3dfaa67b77e", + "sha256:1768cf42a78a11dae63152685e7a1d90af7a8d71d2d4f6d2387edea53a9e0588", + "sha256:27d1bd20d334f50b7ef078eba0f0756a640fd25f5f1708d3b5bed18a5d6bced9", + "sha256:29b20f66f2e044aafba86ecf10a84e611b4667643c42baa004247f5dfef4f90b", + "sha256:4850c78b53acf664a6578bba0e9ebeaf2807bb476c14ec7e0f936f2015133cae", + "sha256:57eacd38a5ec40ed7b19a968a9d01c0d977bda55664210be713e750dd7b33540", + "sha256:724eb24b92fc5fdc1501a1b4df44a68b9c1dda171c8ef8736799e903fb100f63", + "sha256:77ae8d926f38700432807ba293d768ba9e7652df0cbe76df2843b12f80f68885", + "sha256:78b3712ec529b2a71731fbb10b907b54d9c53a17ca589b42a578bc1e9a2c82ea", + "sha256:7bbbdbada3078dc360d4692a9b28479f569db7fc7f304b668787afc9feb38ec8", + "sha256:8d9ef7f6c403e35e73b7fc3cde9f6decdc43b1cb2ff8d058c53b9084bfcb553e", + "sha256:a83049eb717ae828ced9cf607845929efcb086a001fc8af93ff15c50012a5716", + "sha256:adc35d38952e688535980ae2109cad3a109520033642e759f987cf47fe278aa1", + "sha256:c29a77ad4463f71a506515d9ec3a899ed026b4b015bf43245c919ff36275444b", + "sha256:cfd31b3300fefa5eecb2fe596c6dee1b91b3a05ece9d5cfd2631afebf6c6fadd", + "sha256:d3ee0b035816e0520fac928de31b6572106f0d75597f6fa3206969a02baba06f", + "sha256:d508875793efdf6bab3d47850df8f40d4040ae9928d9d80864c1768d6aeaf8e3", + "sha256:ef0b828a7e22e58e06a1cceddba7b4665c6af8afeb22a0d8083001330572c147", + "sha256:faad39fdbe2c2ccda9846cd21581063086330efafa47d87afea4073a08128656" ], "index": "pypi", - "version": "==2019.4.14" + "version": "==2019.12.20" }, "requests": { "hashes": [ - "sha256:502a824f31acdacb3a35b6690b5fbf0bc41d63a24a45c4004352b0242707598e", - "sha256:7bf2a778576d825600030a110f3c0e3e8edc51dfaafe1c146e39a2027784957b" + "sha256:11e007a8a2aa0323f5a921e9e6a2d7e4e67d9877e85773fba9ba6419025cbeb4", + "sha256:9cf5292fcd0f598c671cfc1e0d7d1a7f13bb8085e9a590f48c010551dc6c4b31" ], - "version": "==2.21.0" + "version": "==2.22.0" }, "requests-file": { "hashes": [ @@ -354,119 +382,139 @@ }, "six": { "hashes": [ - "sha256:3350809f0555b11f552448330d0b52d5f24c91a322ea4a15ef22629740f3761c", - "sha256:d16a0141ec1a18405cd4ce8b4613101da75da0e9a7aec5bdd4fa804d0e0eba73" + "sha256:1f1b7d42e254082a9db6279deae68afb421ceba6158efa6131de7b3003ee93fd", + "sha256:30f610279e8b2578cab6db20741130331735c781b56053c59c4076da27f06b66" ], - "version": "==1.12.0" + "version": "==1.13.0" }, "spacy": { "hashes": [ - "sha256:0fe2e5905f2f5b41be3ebea40626f70bea567a7a2cda9c244109fffe8d964429", - "sha256:30f0f09074bf115a0384691e8ba3d64aab431192b3095a13312a93d0e8a71c07", - "sha256:6a82612f0e75c11d541002f49375d80b4800c967e5d2b402d5a8dd40b6c57ae6", - "sha256:74066ac969a587d16d00d65318c1baa3c3e9215e6858d0c81ce2823320fe09dc", - "sha256:b1b86ddf6142fa2782b2e0269d040430ae5696eb0224f3e99408897cac7bb506", - "sha256:be8a7c89461ac22d261e19e1d3eb35752d8ff3e52452af076b303561bb166408", - "sha256:e6522e1242a5a5f12ef7e55f74df020b5deea59f7d1e7b6e69298301e3c0badd", - "sha256:eb699f54bf6d131df701e6dbbef9e91b74a065a42c9d2850964282b3c14560bb", - "sha256:f385942c5b2c8cf07e4a56871f88a49d4c8a9145fcd731c455e39fb5af9b12ba" + "sha256:1d14c9e7d65b2cecd56c566d9ffac8adbcb9ce2cff2274cbfdcf5468cd940e6a", + "sha256:2cb77315522cc422df7750dac778f13d8079f409b4842cf74a54ffe3b84ee5c6", + "sha256:3c83c061597b5dc94c939c511d3b72c2971257204f21976afc117a350e8fa92b", + "sha256:6971359e43841ff9ed87e1af5e87ea74d6fdb01fe54807d3e4c6a2a3798d18a4", + "sha256:708d25c7212bd20d1268c6559e191d221e88e68e152fb98b82c388d16dfdd3d7", + "sha256:713811c96396c6bb86a1da2bbbe02d874385e74dde6617a84d61d99e9d2b1105", + "sha256:7fa02ababbb3762277b81873204d78583008b408ddf6fc0ef977b38d3b462b85", + "sha256:8d1ce99fc30d634b63b15d98c49b96d6a40b0d2048d5dad0f2bb31d3f6dc5ef0", + "sha256:9afdec1aeb21dbeccfd4d702f12fe8bab88e4d7cd410785bf17f6b186cbc73e8", + "sha256:ce7fad73de7aed7ca2ee7c2404c77c72005f67ca95edae6f19f08947fb0f8ab3", + "sha256:d6a2804c457ce74f0d3bf1f4cdb00cbcd228e9da5f0bdbbbe0a856afe12db37e", + "sha256:d8791f5f69800d702b8e9457418af2cd29789b82697d17ad66df98922f081d1b" ], "index": "pypi", - "version": "==2.1.3" + "version": "==2.2.3" }, "srsly": { "hashes": [ - "sha256:02ea974c4b80f9ffdea4f953ffece5a8715e4e4b37d09192ab65cf4edfbf74d1", - "sha256:061ade35556e51b2e1da6f8552be7a6327d2d02b69edf0aacc9f5c4319d495f1", - "sha256:1bf6af7a86f34969a3997da09fc8c2f72ee02cd74ff40035e37c2f968776fa23", - "sha256:1e4ef85bf133e384f465865ba4e0a14a52c4f2e4b46c763faf100339a06f09c4", - "sha256:850399e43f4cefdcac7a913363b120ea084cb02fcfdbbde1bd37444804d7def4", - "sha256:977aa6e5fd3f7e9d1c8fe7aeed841dfe3ede75dfce04255d4c670e663faaef2a", - "sha256:abdc5b46866648b123517550582dc4c4b767b816ae54c44e5973bbebc3f0dab4", - "sha256:ac0dbe6e715e1fe3536397a9e65ec8f3c624c99f45b6f30e87d220071ef84721", - "sha256:b8646f0f7cf6fd1de4919ab456d9c030e09e74f741a0cecc941363414109ccdc", - "sha256:b9dc81339c1ab969057e790d7b2a56fd4da87336785bd671c86520e8272e3663", - "sha256:d7c91f59edc2ceeca70adf1b0a46d337234ff4fb7ca2b579ca41885f011b329f", - "sha256:d906a2a3df1cac2cb4bf382b8aaf14e22df2ca3758eba0d3049723c851c8ebf0", - "sha256:ecec49c9cdaae4594011666dd654e1e044e552f63bb3a62a1849c65a92ee302e", - "sha256:ef7897050c04a313f2db99c9bcaf2f0c3c75609677683ca5a6e1e7a515325d72" - ], - "version": "==0.0.5" + "sha256:41f2fe803fe6985eb79982ce5d571b81413adfe2d01dcd470e55c6a0f16e07d8", + "sha256:57a87513ffcf986d0da842241f2f01a2a719cb97c7ecb01d07f14e7c48392eb4", + "sha256:6ec70d50d2a63452faf5b7606120310c4a95f2c24d931bd8f5babae9d1d99412", + "sha256:798010e744469f65b3c492eac77d9b46a47a7bc229428f63e9c1c7445efc1809", + "sha256:8730016fc5ca49dbaf676a8d02b12b184e909a26e596d51f46a6c71a963de462", + "sha256:8ffa7deafac1fb961385eff6feed324b5890b42175e1dde2c3e3fab2034756bb", + "sha256:97e5101d6ff08e5a2ebd83fc31b48c90aad24ba35eb4468f5b7ec56ecd8bdb6b", + "sha256:9e9a395ea53dbac0b705556246d1a9f8e5fea9ba49bc63ec3d3de05bfbe48735", + "sha256:aa02e2a62093ef09d7904343ee7381b9c9bab5b4f06960dfbeaa12035d0f0a3e", + "sha256:c62acffd96b4699820e39fcc47fc5a45ff14432c4665d4112ee08e42aeda047e", + "sha256:d30074fdb05a739358fef33701315f8247161fbdb52f29fca368d10c2ef23fae", + "sha256:d60256e395cc61e85e26b0e3549ed8839e365106f8cd2ed5db43bf79ad5efce4", + "sha256:eca4be587d20a3dfbf45b316ceec5e9f9fd231c3b7d1365a35ce34d34b6e184b", + "sha256:f2f6a950b801352f596667459839235cf059b39307e4034d7ed68e7dfb497bd6", + "sha256:f48623820170eff0e2fc79419688a16f5977916548dd0d3a8d0d3fc93a7978ad" + ], + "version": "==0.2.0" }, "tabulate": { "hashes": [ - "sha256:8af07a39377cee1103a5c8b3330a421c2d99b9141e9cc5ddd2e3263fea416943" + "sha256:5470cc6687a091c7042cee89b2946d9235fe9f6d49c193a4ae2ac7bf386737c8" ], - "version": "==0.8.3" + "version": "==0.8.6" }, "thinc": { "hashes": [ - "sha256:12c003b804fb93c64261a5010df0129f942234adb8f45d489a355a5315e06acf", - "sha256:17f9ada01f1f77a5560bc16ec5a650dca08356b50727ded0df19f0dfb4a32a25", - "sha256:26c9d54ffd90753feebbc462ae59939a9e3d2485ef24ed3dc1861c9b486fdbbe", - "sha256:3258161fc2cefa4082f099dec3748f1dcef5e920df5e9d82258ea6ffec280b9a", - "sha256:38a83b928cdc49c994852538f639b2a889681a0589c44b1a6fc3c899e5f36893", - "sha256:3e76101a733bbb0b97d44bdbcb407678b9e2b487047acb6f4c19b72909a6b12f", - "sha256:412f107c458d2951711b4d3ec53587244cd3acc032944e855f49cf94a1adc36e", - "sha256:4948c10c61e627950900cdccf506eb7398d2b28f33cf72bb4b5d9c5c572925e7", - "sha256:a8b2d7713a7dfc0b18b5c16db58ab6e015df14e4fbed0249ed49e630b2d6a86f", - "sha256:ec99c2c65962157c7ee7b947d29f2775291860b81cba62c5bd9f92fdeca2d137", - "sha256:f2386e66042218f19e511692926cef00a9646a3104d2efddfb5bec7b0388a83b", - "sha256:fc0b37733591315afddee45823d4f6740f9b0567c1ba57a3a3c319669d1fcbad" - ], - "version": "==7.0.4" + "sha256:1dbaec0628040a1f8d66147fadbf7775ad6dfe4c681424b2e20479c1e54dc3c1", + "sha256:20b6ed4a8112342b433b9b3ca23b59322d07e32a9232d3cca19b0353e213eadb", + "sha256:30790a1a496a8a84fe300edf50df50454dbdb625b41b203739fbc03112a4d3b6", + "sha256:56b67887930df87c28af2cc4d046c6bc3e80ed4ff3e57208a4fb7a348d12a580", + "sha256:650fbead603bd7e73a61fd2c1b69202ad7a8eb70d4ebe7c5484b8788e828b6e0", + "sha256:713adad69c108dbdc145276d077c4a80f3df31a39b3fc574782dcb64b1def815", + "sha256:801f32f6c048de7e9f6d406342080e6348d0bb02beb1412811f9150a26661691", + "sha256:8833246f1c8b95143c91e310728bf64af8972a9d8653252efa1b4c9036837569", + "sha256:93cb9d184115a8890321dd7f5d94a0d8235dc2fca54d92a9c1c051234a7af43e", + "sha256:abe0d00cbb2cc831f4462e41f97aeb754b275a723a1335cdce7ac9224001d567", + "sha256:bad16bcc608ec4d74c680d85aa9bf43cfc776ac12ca3b7e699d7283fd0177bca", + "sha256:ce81d6b2372057e10f9d7cb505942df67a803f270d69959d44d372e8e3792bb9", + "sha256:f19a36cdfdbef75109f505313c16a7b154b9bbf83dd177e9ddd43430dc523bb0" + ], + "version": "==7.3.1" }, "tldextract": { "hashes": [ - "sha256:2c1c5d9d454f79734b4f3da0d603856dd9f820753410a3e9abf0a0c9fde33e97", - "sha256:b72bef6013de67c7fa181250bc2c2e089a994d259c09ca95a9771f2f97e29ed1" + "sha256:16b2f7e81d89c2a5a914d25bdbddd3932c31a6b510db886c3ce0764a195c0ee7", + "sha256:9aa21a1f7827df4209e242ec4fc2293af5940ec730cde46ea80f66ed97bfc808" ], "index": "pypi", - "version": "==2.2.1" + "version": "==2.2.2" }, "tqdm": { "hashes": [ - "sha256:d385c95361699e5cf7622485d9b9eae2d4864b21cd5a2374a9c381ffed701021", - "sha256:e22977e3ebe961f72362f6ddfb9197cc531c9737aaf5f607ef09740c849ecd05" + "sha256:166a82cdea964ae45528e0cc89436255ff2be73dc848bdf239f13c501cae5dc7", + "sha256:9036904496bd2afacf836a6f206c5a766ce11d3e9319d54a4e794c0f34b111dc" ], - "version": "==4.31.1" + "version": "==4.41.0" }, "urllib3": { "hashes": [ - "sha256:2393a695cd12afedd0dcb26fe5d50d0cf248e5a66f75dbd89a3d4eb333a61af4", - "sha256:a637e5fae88995b256e3409dc4d52c2e2e0ba32c42a6365fee8bbd2238de3cfb" + "sha256:a8a318824cc77d1fd4b2bec2ded92646630d7fe8619497b142c84a9e6f5a7293", + "sha256:f3c5fd51747d450d4dcf6f923c81f78f811aab8205fda64b0aba34a4e48b0745" ], - "version": "==1.24.3" + "version": "==1.25.7" }, "wasabi": { "hashes": [ - "sha256:b4fbee9dd0c8f5cff6554c0463c565e2d52b7c844d7eccb477d29a6ff8567750", - "sha256:f92c83e728bf1db6dc859ffc861afa328d2da8ef0c7a19300e5fb1bd5762b277" + "sha256:64bddd8ccd67c8327853401a6f579a926c84639f84ad5cabc3be3baa2b111f22", + "sha256:bc882670a317be1a88e85cd7fc41bd99c1857bfc085090edd11bbff6c4f35788" + ], + "version": "==0.5.0" + }, + "zipp": { + "hashes": [ + "sha256:3718b1cbcd963c7d4c5511a8240812904164b7f381b647143a89d3b98f9bcd8e", + "sha256:f06903e9f1f43b12d371004b4ac7b06ab39a44adc747266928ae6debfa7b3335" ], - "version": "==0.2.2" + "version": "==0.6.0" } }, "develop": { "astroid": { "hashes": [ - "sha256:6560e1e1749f68c64a4b5dee4e091fce798d2f0d84ebe638cf0e0585a343acf4", - "sha256:b65db1bbaac9f9f4d190199bb8680af6f6f84fd3769a5ea883df8a91fe68b4c4" + "sha256:71ea07f44df9568a75d0f354c49143a4575d90645e9fead6dfb52c26a85ed13a", + "sha256:840947ebfa8b58f318d42301cf8c0a20fd794a33b61cc4638e28e9e61ba32f42" ], - "version": "==2.2.5" + "version": "==2.3.3" }, "atomicwrites": { "hashes": [ "sha256:03472c30eb2c5d1ba9227e4c2ca66ab8287fbfbbda3888aa93dc2e28fc6811b4", "sha256:75a9445bac02d8d058d5e1fe689654ba5a6556a1dfd8ce6ec55a0ed79866cfa6" ], + "markers": "sys_platform == 'win32'", "version": "==1.3.0" }, "attrs": { "hashes": [ - "sha256:69c0dbf2ed392de1cb5ec704444b08a5ef81680a61cb899dc08127123af36a79", - "sha256:f0b870f674851ecbfbbbd364d6b5cbdff9dcedbc7f3f5e18a6891057f21fe399" + "sha256:08a96c641c3a74e44eb59afb61a24f2cb9f4d7188748e76ba4bb5edfa3cb7d1c", + "sha256:f7b7ce16570fe9965acd6d30101a28f62fb4a7f9e926b3bbc9b61f8b04247e72" ], - "version": "==19.1.0" + "version": "==19.3.0" + }, + "colorama": { + "hashes": [ + "sha256:7d73d2a99753107a36ac6b455ee49046802e59d9d076ef8e47b61499fa29afff", + "sha256:e96da0d330793e2cb9485e9ddfd918d456036c7149416295932478192f4436a1" + ], + "version": "==0.4.3" }, "entrypoints": { "hashes": [ @@ -477,52 +525,52 @@ }, "flake8": { "hashes": [ - "sha256:859996073f341f2670741b51ec1e67a01da142831aa1fdc6242dbf88dffbe661", - "sha256:a796a115208f5c03b18f332f7c11729812c8c3ded6c46319c59b53efd3819da8" + "sha256:45681a117ecc81e870cbf1262835ae4af5e7a8b08e40b944a8a6e6b895914cfb", + "sha256:49356e766643ad15072a789a20915d3c91dc89fd313ccd71802303fd67e4deca" ], "index": "pypi", - "version": "==3.7.7" + "version": "==3.7.9" + }, + "importlib-metadata": { + "hashes": [ + "sha256:073a852570f92da5f744a3472af1b61e28e9f78ccf0c9117658dc32b15de7b45", + "sha256:d95141fbfa7ef2ec65cfd945e2af7e5a6ddbd7c8d9a25e66ff3be8e3daf9f60f" + ], + "markers": "python_version < '3.8'", + "version": "==1.3.0" }, "isort": { "hashes": [ - "sha256:1349c6f7c2a0f7539f5f2ace51a9a8e4a37086ce4de6f78f5f53fb041d0a3cd5", - "sha256:f09911f6eb114e5592abe635aded8bf3d2c3144ebcfcaf81ee32e7af7b7d1870" + "sha256:54da7e92468955c4fceacd0c86bd0ec997b0e1ee80d97f67c35a78b719dccab1", + "sha256:6e811fcb295968434526407adb8796944f1988c5b65e8139058f2014cbe100fd" ], - "version": "==4.3.18" + "version": "==4.3.21" }, "lazy-object-proxy": { "hashes": [ - "sha256:0ce34342b419bd8f018e6666bfef729aec3edf62345a53b537a4dcc115746a33", - "sha256:1b668120716eb7ee21d8a38815e5eb3bb8211117d9a90b0f8e21722c0758cc39", - "sha256:209615b0fe4624d79e50220ce3310ca1a9445fd8e6d3572a896e7f9146bbf019", - "sha256:27bf62cb2b1a2068d443ff7097ee33393f8483b570b475db8ebf7e1cba64f088", - "sha256:27ea6fd1c02dcc78172a82fc37fcc0992a94e4cecf53cb6d73f11749825bd98b", - "sha256:2c1b21b44ac9beb0fc848d3993924147ba45c4ebc24be19825e57aabbe74a99e", - "sha256:2df72ab12046a3496a92476020a1a0abf78b2a7db9ff4dc2036b8dd980203ae6", - "sha256:320ffd3de9699d3892048baee45ebfbbf9388a7d65d832d7e580243ade426d2b", - "sha256:50e3b9a464d5d08cc5227413db0d1c4707b6172e4d4d915c1c70e4de0bbff1f5", - "sha256:5276db7ff62bb7b52f77f1f51ed58850e315154249aceb42e7f4c611f0f847ff", - "sha256:61a6cf00dcb1a7f0c773ed4acc509cb636af2d6337a08f362413c76b2b47a8dd", - "sha256:6ae6c4cb59f199d8827c5a07546b2ab7e85d262acaccaacd49b62f53f7c456f7", - "sha256:7661d401d60d8bf15bb5da39e4dd72f5d764c5aff5a86ef52a042506e3e970ff", - "sha256:7bd527f36a605c914efca5d3d014170b2cb184723e423d26b1fb2fd9108e264d", - "sha256:7cb54db3535c8686ea12e9535eb087d32421184eacc6939ef15ef50f83a5e7e2", - "sha256:7f3a2d740291f7f2c111d86a1c4851b70fb000a6c8883a59660d95ad57b9df35", - "sha256:81304b7d8e9c824d058087dcb89144842c8e0dea6d281c031f59f0acf66963d4", - "sha256:933947e8b4fbe617a51528b09851685138b49d511af0b6c0da2539115d6d4514", - "sha256:94223d7f060301b3a8c09c9b3bc3294b56b2188e7d8179c762a1cda72c979252", - "sha256:ab3ca49afcb47058393b0122428358d2fbe0408cf99f1b58b295cfeb4ed39109", - "sha256:bd6292f565ca46dee4e737ebcc20742e3b5be2b01556dafe169f6c65d088875f", - "sha256:cb924aa3e4a3fb644d0c463cad5bc2572649a6a3f68a7f8e4fbe44aaa6d77e4c", - "sha256:d0fc7a286feac9077ec52a927fc9fe8fe2fabab95426722be4c953c9a8bede92", - "sha256:ddc34786490a6e4ec0a855d401034cbd1242ef186c20d79d2166d6a4bd449577", - "sha256:e34b155e36fa9da7e1b7c738ed7767fc9491a62ec6af70fe9da4a057759edc2d", - "sha256:e5b9e8f6bda48460b7b143c3821b21b452cb3a835e6bbd5dd33aa0c8d3f5137d", - "sha256:e81ebf6c5ee9684be8f2c87563880f93eedd56dd2b6146d8a725b50b7e5adb0f", - "sha256:eb91be369f945f10d3a49f5f9be8b3d0b93a4c2be8f8a5b83b0571b8123e0a7a", - "sha256:f460d1ceb0e4a5dcb2a652db0904224f367c9b3c1470d5a7683c0480e582468b" - ], - "version": "==1.3.1" + "sha256:0c4b206227a8097f05c4dbdd323c50edf81f15db3b8dc064d08c62d37e1a504d", + "sha256:194d092e6f246b906e8f70884e620e459fc54db3259e60cf69a4d66c3fda3449", + "sha256:1be7e4c9f96948003609aa6c974ae59830a6baecc5376c25c92d7d697e684c08", + "sha256:4677f594e474c91da97f489fea5b7daa17b5517190899cf213697e48d3902f5a", + "sha256:48dab84ebd4831077b150572aec802f303117c8cc5c871e182447281ebf3ac50", + "sha256:5541cada25cd173702dbd99f8e22434105456314462326f06dba3e180f203dfd", + "sha256:59f79fef100b09564bc2df42ea2d8d21a64fdcda64979c0fa3db7bdaabaf6239", + "sha256:8d859b89baf8ef7f8bc6b00aa20316483d67f0b1cbf422f5b4dc56701c8f2ffb", + "sha256:9254f4358b9b541e3441b007a0ea0764b9d056afdeafc1a5569eee1cc6c1b9ea", + "sha256:9651375199045a358eb6741df3e02a651e0330be090b3bc79f6d0de31a80ec3e", + "sha256:97bb5884f6f1cdce0099f86b907aa41c970c3c672ac8b9c8352789e103cf3156", + "sha256:9b15f3f4c0f35727d3a0fba4b770b3c4ebbb1fa907dbcc046a1d2799f3edd142", + "sha256:a2238e9d1bb71a56cd710611a1614d1194dc10a175c1e08d75e1a7bcc250d442", + "sha256:a6ae12d08c0bf9909ce12385803a543bfe99b95fe01e752536a60af2b7797c62", + "sha256:ca0a928a3ddbc5725be2dd1cf895ec0a254798915fb3a36af0964a0a4149e3db", + "sha256:cb2c7c57005a6804ab66f106ceb8482da55f5314b7fcb06551db1edae4ad1531", + "sha256:d74bb8693bf9cf75ac3b47a54d716bbb1a92648d5f781fc799347cfc95952383", + "sha256:d945239a5639b3ff35b70a88c5f2f491913eb94871780ebfabb2568bd58afc5a", + "sha256:eba7011090323c1dadf18b3b689845fd96a61ba0a1dfbd7f24b921398affc357", + "sha256:efa1909120ce98bbb3777e8b6f92237f5d5c8ea6758efea36a473e1d38f7d3e4", + "sha256:f3900e8a5de27447acbf900b4750b0ddfd7ec1ea7fbaf11dfa911141bc522af0" + ], + "version": "==1.4.3" }, "mccabe": { "hashes": [ @@ -533,25 +581,31 @@ }, "more-itertools": { "hashes": [ - "sha256:2112d2ca570bb7c3e53ea1a35cd5df42bb0fd10c45f0fb97178679c3c03d64c7", - "sha256:c3e4748ba1aad8dba30a4886b0b1a2004f9a863837b8654e7059eebf727afa5a" + "sha256:b84b238cce0d9adad5ed87e745778d20a3f8487d0f0cb8b8a586816c7496458d", + "sha256:c833ef592a0324bcc6a60e48440da07645063c453880c9477ceb22490aec1564" ], - "markers": "python_version > '2.7'", - "version": "==7.0.0" + "version": "==8.0.2" + }, + "packaging": { + "hashes": [ + "sha256:28b924174df7a2fa32c1953825ff29c61e2f5e082343165438812f00d3a7fc47", + "sha256:d9551545c6d761f3def1677baf08ab2a3ca17c56879e70fecba2fc4dde4ed108" + ], + "version": "==19.2" }, "pluggy": { "hashes": [ - "sha256:19ecf9ce9db2fce065a7a0586e07cfb4ac8614fe96edf628a264b1c70116cf8f", - "sha256:84d306a647cc805219916e62aab89caa97a33a1dd8c342e87a37f91073cd4746" + "sha256:15b2acde666561e1298d71b523007ed7364de07029219b604cf808bfa1c765b0", + "sha256:966c145cd83c96502c3c3868f50408687b38434af77734af1e9ca461a4081d2d" ], - "version": "==0.9.0" + "version": "==0.13.1" }, "py": { "hashes": [ - "sha256:64f65755aee5b381cea27766a3a147c3f15b9b6b9ac88676de66ba2ae36793fa", - "sha256:dc639b046a6e2cff5bbe40194ad65936d6ba360b52b3c3fe1d08a82dd50b5e53" + "sha256:5e27081401262157467ad6e7f851b7aa402c5852dbcb3dae06768434de5752aa", + "sha256:c20fdd83a5dbc0af9efd622bee9a5564e278f6380fffcacc43ba6f43db2813b0" ], - "version": "==1.8.0" + "version": "==1.8.1" }, "pycodestyle": { "hashes": [ @@ -575,51 +629,73 @@ "index": "pypi", "version": "==2.3.1" }, + "pyparsing": { + "hashes": [ + "sha256:4c830582a84fb022400b85429791bc551f1f4871c33f23e44f353119e92f969f", + "sha256:c342dccb5250c08d45fd6f8b4a559613ca603b57498511740e65cd11a2e7dcec" + ], + "version": "==2.4.6" + }, "pytest": { "hashes": [ - "sha256:3773f4c235918987d51daf1db66d51c99fac654c81d6f2f709a046ab446d5e5d", - "sha256:b7802283b70ca24d7119b32915efa7c409982f59913c1a6c0640aacf118b95f5" + "sha256:6b571215b5a790f9b41f19f3531c53a45cf6bb8ef2988bc1ff9afb38270b25fa", + "sha256:e41d489ff43948babd0fad7ad5e49b8735d5d55e26628a58673c39ff61d95de4" ], "index": "pypi", - "version": "==4.4.1" + "version": "==5.3.2" }, "six": { "hashes": [ - "sha256:3350809f0555b11f552448330d0b52d5f24c91a322ea4a15ef22629740f3761c", - "sha256:d16a0141ec1a18405cd4ce8b4613101da75da0e9a7aec5bdd4fa804d0e0eba73" + "sha256:1f1b7d42e254082a9db6279deae68afb421ceba6158efa6131de7b3003ee93fd", + "sha256:30f610279e8b2578cab6db20741130331735c781b56053c59c4076da27f06b66" ], - "version": "==1.12.0" + "version": "==1.13.0" }, "typed-ast": { "hashes": [ - "sha256:132eae51d6ef3ff4a8c47c393a4ef5ebf0d1aecc96880eb5d6c8ceab7017cc9b", - "sha256:18141c1484ab8784006c839be8b985cfc82a2e9725837b0ecfa0203f71c4e39d", - "sha256:2baf617f5bbbfe73fd8846463f5aeafc912b5ee247f410700245d68525ec584a", - "sha256:3d90063f2cbbe39177e9b4d888e45777012652d6110156845b828908c51ae462", - "sha256:4304b2218b842d610aa1a1d87e1dc9559597969acc62ce717ee4dfeaa44d7eee", - "sha256:4983ede548ffc3541bae49a82675996497348e55bafd1554dc4e4a5d6eda541a", - "sha256:5315f4509c1476718a4825f45a203b82d7fdf2a6f5f0c8f166435975b1c9f7d4", - "sha256:6cdfb1b49d5345f7c2b90d638822d16ba62dc82f7616e9b4caa10b72f3f16649", - "sha256:7b325f12635598c604690efd7a0197d0b94b7d7778498e76e0710cd582fd1c7a", - "sha256:8d3b0e3b8626615826f9a626548057c5275a9733512b137984a68ba1598d3d2f", - "sha256:8f8631160c79f53081bd23446525db0bc4c5616f78d04021e6e434b286493fd7", - "sha256:912de10965f3dc89da23936f1cc4ed60764f712e5fa603a09dd904f88c996760", - "sha256:b010c07b975fe853c65d7bbe9d4ac62f1c69086750a574f6292597763781ba18", - "sha256:c908c10505904c48081a5415a1e295d8403e353e0c14c42b6d67f8f97fae6616", - "sha256:c94dd3807c0c0610f7c76f078119f4ea48235a953512752b9175f9f98f5ae2bd", - "sha256:ce65dee7594a84c466e79d7fb7d3303e7295d16a83c22c7c4037071b059e2c21", - "sha256:eaa9cfcb221a8a4c2889be6f93da141ac777eb8819f077e1d09fb12d00a09a93", - "sha256:f3376bc31bad66d46d44b4e6522c5c21976bf9bca4ef5987bb2bf727f4506cbb", - "sha256:f9202fa138544e13a4ec1a6792c35834250a85958fde1251b6a22e07d1260ae7" - ], - "markers": "implementation_name == 'cpython'", - "version": "==1.3.5" + "sha256:1170afa46a3799e18b4c977777ce137bb53c7485379d9706af8a59f2ea1aa161", + "sha256:18511a0b3e7922276346bcb47e2ef9f38fb90fd31cb9223eed42c85d1312344e", + "sha256:262c247a82d005e43b5b7f69aff746370538e176131c32dda9cb0f324d27141e", + "sha256:2b907eb046d049bcd9892e3076c7a6456c93a25bebfe554e931620c90e6a25b0", + "sha256:354c16e5babd09f5cb0ee000d54cfa38401d8b8891eefa878ac772f827181a3c", + "sha256:48e5b1e71f25cfdef98b013263a88d7145879fbb2d5185f2a0c79fa7ebbeae47", + "sha256:4e0b70c6fc4d010f8107726af5fd37921b666f5b31d9331f0bd24ad9a088e631", + "sha256:630968c5cdee51a11c05a30453f8cd65e0cc1d2ad0d9192819df9978984529f4", + "sha256:66480f95b8167c9c5c5c87f32cf437d585937970f3fc24386f313a4c97b44e34", + "sha256:71211d26ffd12d63a83e079ff258ac9d56a1376a25bc80b1cdcdf601b855b90b", + "sha256:7954560051331d003b4e2b3eb822d9dd2e376fa4f6d98fee32f452f52dd6ebb2", + "sha256:838997f4310012cf2e1ad3803bce2f3402e9ffb71ded61b5ee22617b3a7f6b6e", + "sha256:95bd11af7eafc16e829af2d3df510cecfd4387f6453355188342c3e79a2ec87a", + "sha256:bc6c7d3fa1325a0c6613512a093bc2a2a15aeec350451cbdf9e1d4bffe3e3233", + "sha256:cc34a6f5b426748a507dd5d1de4c1978f2eb5626d51326e43280941206c209e1", + "sha256:d755f03c1e4a51e9b24d899561fec4ccaf51f210d52abdf8c07ee2849b212a36", + "sha256:d7c45933b1bdfaf9f36c579671fec15d25b06c8398f113dab64c18ed1adda01d", + "sha256:d896919306dd0aa22d0132f62a1b78d11aaf4c9fc5b3410d3c666b818191630a", + "sha256:fdc1c9bbf79510b76408840e009ed65958feba92a88833cdceecff93ae8fff66", + "sha256:ffde2fbfad571af120fcbfbbc61c72469e72f550d676c3342492a9dfdefb8f12" + ], + "markers": "implementation_name == 'cpython' and python_version < '3.8'", + "version": "==1.4.0" + }, + "wcwidth": { + "hashes": [ + "sha256:3df37372226d6e63e1b1e1eda15c594bca98a22d33a23832a90998faa96bc65e", + "sha256:f4ebe71925af7b40a864553f761ed559b43544f8f71746c2d756c7fe788ade7c" + ], + "version": "==0.1.7" }, "wrapt": { "hashes": [ - "sha256:4aea003270831cceb8a90ff27c4031da6ead7ec1886023b80ce0dfe0adf61533" + "sha256:565a021fd19419476b9362b05eeaa094178de64f8361e44468f9e9d7843901e1" + ], + "version": "==1.11.2" + }, + "zipp": { + "hashes": [ + "sha256:3718b1cbcd963c7d4c5511a8240812904164b7f381b647143a89d3b98f9bcd8e", + "sha256:f06903e9f1f43b12d371004b4ac7b06ab39a44adc747266928ae6debfa7b3335" ], - "version": "==1.11.1" + "version": "==0.6.0" } } } diff --git a/presidio-analyzer/analyzer/__init__.py b/presidio-analyzer/analyzer/__init__.py index eb2c386d3..b52320d5f 100644 --- a/presidio-analyzer/analyzer/__init__.py +++ b/presidio-analyzer/analyzer/__init__.py @@ -1,19 +1,23 @@ -import os -import sys +import os # noqa +import sys # noqa # pylint: disable=unused-import,wrong-import-position -# bug #602: Fix imports issue in python sys.path.append(os.path.dirname(os.path.dirname( - os.path.abspath(__file__))) + "/analyzer") + os.path.abspath(__file__))) + "/analyzer") # noqa -from analyzer.analysis_explanation import AnalysisExplanation # noqa -from analyzer.pattern import Pattern # noqa: F401 -from analyzer.entity_recognizer import EntityRecognizer # noqa: F401 -from analyzer.local_recognizer import LocalRecognizer # noqa: F401 -from analyzer.recognizer_result import RecognizerResult # noqa: F401 -from analyzer.pattern_recognizer import PatternRecognizer # noqa: F401 -from analyzer.remote_recognizer import RemoteRecognizer # noqa: F401 -from analyzer.recognizer_registry.recognizer_registry import ( # noqa: F401 - RecognizerRegistry -) -from analyzer.analyzer_engine import AnalyzerEngine # noqa +from analyzer.presidio_logger import PresidioLogger +from analyzer.analysis_explanation import AnalysisExplanation +from analyzer.pattern import Pattern +from analyzer.entity_recognizer import EntityRecognizer +from analyzer.local_recognizer import LocalRecognizer +from analyzer.recognizer_result import RecognizerResult +from analyzer.pattern_recognizer import PatternRecognizer +from analyzer.remote_recognizer import RemoteRecognizer +from analyzer.recognizer_registry.recognizer_registry import RecognizerRegistry +from analyzer.analyzer_engine import AnalyzerEngine + + +__all__ = ['PresidioLogger', 'AnalysisExplanation', 'Pattern', + 'EntityRecognizer', 'LocalRecognizer', 'RecognizerResult', + 'PatternRecognizer', 'RemoteRecognizer', 'RecognizerRegistry', + 'AnalyzerEngine'] diff --git a/presidio-analyzer/analyzer/__main__.py b/presidio-analyzer/analyzer/__main__.py index 510f00217..202a345a6 100644 --- a/presidio-analyzer/analyzer/__main__.py +++ b/presidio-analyzer/analyzer/__main__.py @@ -4,9 +4,9 @@ import analyze_pb2 import analyze_pb2_grpc from concurrent import futures -import time -from os import sys, path import os +import sys +import time from google.protobuf.json_format import MessageToJson from knack import CLI from knack.arguments import ArgumentsContext @@ -14,12 +14,14 @@ from knack.help import CLIHelp from knack.help_files import helps -# bug #602: Fix imports issue in python -sys.path.append(path.dirname(path.dirname(path.abspath(__file__)))) +sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) from analyzer_engine import AnalyzerEngine # noqa from recognizer_registry.recognizer_registry import RecognizerRegistry # noqa from nlp_engine.spacy_nlp_engine import SpacyNlpEngine # noqa +from presidio_logger import PresidioLogger # noqa + +logging.getLogger().setLevel("INFO") WELCOME_MESSAGE = r""" @@ -47,9 +49,7 @@ license is AC432223" --fields "PERSON" "US_DRIVER_LICENSE" """ -loglevel = os.environ.get("LOG_LEVEL", "INFO") -logging.basicConfig( - format='%(asctime)s:%(levelname)s:%(message)s', level=loglevel) +logger = PresidioLogger() class PresidioCLIHelp(CLIHelp): @@ -63,24 +63,36 @@ def __init__(self, cli_ctx=None): def serve_command_handler(enable_trace_pii, env_grpc_port=False, grpc_port=3000): - + logger.info("Starting GRPC server") server = grpc.server(futures.ThreadPoolExecutor(max_workers=10)) + logger.info("GRPC started") + logger.info("Creating RecognizerRegistry") registry = RecognizerRegistry() + logger.info("RecognizerRegistry created") + logger.info("Creating SpacyNlpEngine") nlp_engine = SpacyNlpEngine() + logger.info("SpacyNlpEngine created") + analyze_pb2_grpc.add_AnalyzeServiceServicer_to_server( AnalyzerEngine(registry=registry, nlp_engine=nlp_engine, enable_trace_pii=enable_trace_pii), server) + logger.info("Added AnalyzeServiceServicer to server") + if env_grpc_port: + logger.info("Getting port {}".format(env_grpc_port)) port = os.environ.get('GRPC_PORT') if port is not None or port != '': grpc_port = int(port) + else: + logger.info("env_grpc_port not provided. " + "Using grpc_port {}".format(grpc_port)) server.add_insecure_port('[::]:' + str(grpc_port)) - logging.info("Starting GRPC listener at port %d", grpc_port) + logger.info("Starting GRPC listener at port {}".format(grpc_port)) server.start() try: while True: diff --git a/presidio-analyzer/analyzer/analyzer_engine.py b/presidio-analyzer/analyzer/analyzer_engine.py index 7157bf14c..ad6c2a025 100644 --- a/presidio-analyzer/analyzer/analyzer_engine.py +++ b/presidio-analyzer/analyzer/analyzer_engine.py @@ -5,11 +5,11 @@ import analyze_pb2_grpc import common_pb2 -from analyzer.logger import Logger +from analyzer import PresidioLogger from analyzer.app_tracer import AppTracer DEFAULT_LANGUAGE = "en" -logger = Logger() +logger = PresidioLogger("presidio") class AnalyzerEngine(analyze_pb2_grpc.AnalyzeServiceServicer): @@ -31,9 +31,13 @@ def __init__(self, registry=None, nlp_engine=None, for detected entities to be returned """ if not nlp_engine: + logger.info("nlp_engine not provided. Creating new " + "SpacyNlpEngine instance") from analyzer.nlp_engine import SpacyNlpEngine nlp_engine = SpacyNlpEngine() if not registry: + logger.info("Recognizer registry not provided. " + "Creating default RecognizerRegistry instance") from analyzer import RecognizerRegistry registry = RecognizerRegistry() if not app_tracer: @@ -98,7 +102,7 @@ def Apply(self, request, context): def __remove_duplicates(results): """ Removes each result which has a span contained in a - result's span with ahigher score + result's span with a higher score :param results: List[RecognizerResult] :return: List[RecognizerResult] """ @@ -117,9 +121,9 @@ def __remove_duplicates(results): for filtered in filtered_results: # If result is equal to or substring of # one of the other results - if result.start >= filtered.start \ - and result.end <= filtered.end \ - and result.entity_type == filtered.entity_type: + + if result.contained_in(filtered) and \ + result.entity_type == filtered.entity_type: valid_result = False break diff --git a/presidio-analyzer/analyzer/app_tracer.py b/presidio-analyzer/analyzer/app_tracer.py index 0154eaeaf..ce025a0d8 100644 --- a/presidio-analyzer/analyzer/app_tracer.py +++ b/presidio-analyzer/analyzer/app_tracer.py @@ -1,4 +1,4 @@ -from analyzer.logger import Logger +from analyzer import PresidioLogger class AppTracer: @@ -8,7 +8,7 @@ class AppTracer: This can be useful for analyzing the detection accuracy of the system.""" def __init__(self, enabled=True): - self.logger = Logger('Interpretability') + self.logger = PresidioLogger('Interpretability') self.logger.set_level("INFO") self.enabled = enabled diff --git a/presidio-analyzer/analyzer/entity_recognizer.py b/presidio-analyzer/analyzer/entity_recognizer.py index 49a06d5dc..4bf0a983f 100644 --- a/presidio-analyzer/analyzer/entity_recognizer.py +++ b/presidio-analyzer/analyzer/entity_recognizer.py @@ -1,7 +1,7 @@ -from abc import abstractmethod import copy +from abc import abstractmethod -from analyzer.logger import Logger +from analyzer import PresidioLogger class EntityRecognizer: @@ -36,7 +36,7 @@ def __init__(self, supported_entities, name=None, supported_language="en", self.version = version self.is_loaded = False - self.logger = Logger() + self.logger = PresidioLogger() self.load() self.logger.info("Loaded recognizer: %s", self.name) self.is_loaded = True diff --git a/presidio-analyzer/analyzer/nlp_engine/spacy_nlp_engine.py b/presidio-analyzer/analyzer/nlp_engine/spacy_nlp_engine.py index f30685a1b..2321e9075 100644 --- a/presidio-analyzer/analyzer/nlp_engine/spacy_nlp_engine.py +++ b/presidio-analyzer/analyzer/nlp_engine/spacy_nlp_engine.py @@ -1,9 +1,9 @@ import spacy -from spacy.cli import download -from analyzer.logger import Logger +from analyzer import PresidioLogger from analyzer.nlp_engine import NlpArtifacts, NlpEngine -logger = Logger() + +logger = PresidioLogger() class SpacyNlpEngine(NlpEngine): @@ -14,13 +14,14 @@ class SpacyNlpEngine(NlpEngine): """ def __init__(self): - logger.info("Loading NLP model...") + logger.info("Loading NLP model: spaCy en_core_web_lg") - # Download model lazily if it wasn't previously installed - download('en_core_web_lg') self.nlp = {"en": spacy.load("en_core_web_lg", disable=['parser', 'tagger'])} + logger.info("Printing spaCy model and package details:" + "\n\n {}\n\n".format(spacy.info("en_core_web_lg"))) + def process_text(self, text, language): """ Execute the SpaCy NLP pipeline on the given text and language diff --git a/presidio-analyzer/analyzer/predefined_recognizers/__init__.py b/presidio-analyzer/analyzer/predefined_recognizers/__init__.py index 4306363e5..12560e530 100644 --- a/presidio-analyzer/analyzer/predefined_recognizers/__init__.py +++ b/presidio-analyzer/analyzer/predefined_recognizers/__init__.py @@ -1,15 +1,29 @@ -# pylint: disable=unused-import -from .credit_card_recognizer import CreditCardRecognizer # noqa: F401 -from .spacy_recognizer import SpacyRecognizer # noqa: F401 -from .crypto_recognizer import CryptoRecognizer # noqa: F401 -from .domain_recognizer import DomainRecognizer # noqa: F401 -from .email_recognizer import EmailRecognizer # noqa: F401 -from .iban_recognizer import IbanRecognizer # noqa: F401 -from .ip_recognizer import IpRecognizer # noqa: F401 -from .uk_nhs_recognizer import NhsRecognizer # noqa: F401 -from .us_bank_recognizer import UsBankRecognizer # noqa: F401 -from .us_driver_license_recognizer import UsLicenseRecognizer # noqa: F401 -from .us_itin_recognizer import UsItinRecognizer # noqa: F401 -from .us_passport_recognizer import UsPassportRecognizer # noqa: F401 -from .us_phone_recognizer import UsPhoneRecognizer # noqa: F401 -from .us_ssn_recognizer import UsSsnRecognizer # noqa: F401 +from .credit_card_recognizer import CreditCardRecognizer +from .crypto_recognizer import CryptoRecognizer +from .domain_recognizer import DomainRecognizer +from .email_recognizer import EmailRecognizer +from .iban_recognizer import IbanRecognizer +from .ip_recognizer import IpRecognizer +from .spacy_recognizer import SpacyRecognizer +from .uk_nhs_recognizer import NhsRecognizer +from .us_bank_recognizer import UsBankRecognizer +from .us_driver_license_recognizer import UsLicenseRecognizer +from .us_itin_recognizer import UsItinRecognizer +from .us_passport_recognizer import UsPassportRecognizer +from .us_phone_recognizer import UsPhoneRecognizer +from .us_ssn_recognizer import UsSsnRecognizer + +__all__ = ["CreditCardRecognizer", + "CryptoRecognizer", + "DomainRecognizer", + "EmailRecognizer", + "IbanRecognizer", + "IpRecognizer", + "SpacyRecognizer", + "NhsRecognizer", + "UsBankRecognizer", + "UsLicenseRecognizer", + "UsItinRecognizer", + "UsPassportRecognizer", + "UsPhoneRecognizer", + "UsSsnRecognizer"] diff --git a/presidio-analyzer/analyzer/predefined_recognizers/domain_recognizer.py b/presidio-analyzer/analyzer/predefined_recognizers/domain_recognizer.py index 1a578c330..419b1fb7c 100644 --- a/presidio-analyzer/analyzer/predefined_recognizers/domain_recognizer.py +++ b/presidio-analyzer/analyzer/predefined_recognizers/domain_recognizer.py @@ -1,7 +1,6 @@ import tldextract -from analyzer import Pattern -from analyzer import PatternRecognizer +from analyzer import Pattern, PatternRecognizer # pylint: disable=line-too-long REGEX = r'\b(((([a-zA-Z0-9])|([a-zA-Z0-9][a-zA-Z0-9\-]{0,86}[a-zA-Z0-9]))\.(([a-zA-Z0-9])|([a-zA-Z0-9][a-zA-Z0-9\-]{0,73}[a-zA-Z0-9]))\.(([a-zA-Z0-9]{2,12}\.[a-zA-Z0-9]{2,12})|([a-zA-Z0-9]{2,25})))|((([a-zA-Z0-9])|([a-zA-Z0-9][a-zA-Z0-9\-]{0,162}[a-zA-Z0-9]))\.(([a-zA-Z0-9]{2,12}\.[a-zA-Z0-9]{2,12})|([a-zA-Z0-9]{2,25}))))\b' # noqa: E501' # noqa: E501 diff --git a/presidio-analyzer/analyzer/predefined_recognizers/email_recognizer.py b/presidio-analyzer/analyzer/predefined_recognizers/email_recognizer.py index 5925608a4..64128c78a 100644 --- a/presidio-analyzer/analyzer/predefined_recognizers/email_recognizer.py +++ b/presidio-analyzer/analyzer/predefined_recognizers/email_recognizer.py @@ -1,7 +1,6 @@ import tldextract -from analyzer import Pattern -from analyzer import PatternRecognizer +from analyzer import Pattern, PatternRecognizer # pylint: disable=line-too-long REGEX = r"\b((([!#$%&'*+\-/=?^_`{|}~\w])|([!#$%&'*+\-/=?^_`{|}~\w][!#$%&'*+\-/=?^_`{|}~\.\w]{0,}[!#$%&'*+\-/=?^_`{|}~\w]))[@]\w+([-.]\w+)*\.\w+([-.]\w+)*)\b" # noqa: E501 diff --git a/presidio-analyzer/analyzer/predefined_recognizers/us_bank_recognizer.py b/presidio-analyzer/analyzer/predefined_recognizers/us_bank_recognizer.py index f16d07ea7..222cdb487 100644 --- a/presidio-analyzer/analyzer/predefined_recognizers/us_bank_recognizer.py +++ b/presidio-analyzer/analyzer/predefined_recognizers/us_bank_recognizer.py @@ -9,11 +9,11 @@ "bank" # Task #603: Support keyphrases: change to "checking account" # as part of keyphrase change - "checking", + "check", "account", "account#", "acct", - "saving", + "save", "debit" ] diff --git a/presidio-analyzer/analyzer/predefined_recognizers/us_passport_recognizer.py b/presidio-analyzer/analyzer/predefined_recognizers/us_passport_recognizer.py index 3b9f2397d..d5553849f 100644 --- a/presidio-analyzer/analyzer/predefined_recognizers/us_passport_recognizer.py +++ b/presidio-analyzer/analyzer/predefined_recognizers/us_passport_recognizer.py @@ -5,8 +5,9 @@ # Weak pattern: all passport numbers are a weak match, e.g., 14019033 VERY_WEAK_REGEX = r'(\b[0-9]{9}\b)' + CONTEXT = [ - "us", "united", "states", "passport", "number", "passport#", "travel", + "us", "united", "states", "passport", "passport#", "travel", "document" ] diff --git a/presidio-analyzer/analyzer/logger.py b/presidio-analyzer/analyzer/presidio_logger.py similarity index 94% rename from presidio-analyzer/analyzer/logger.py rename to presidio-analyzer/analyzer/presidio_logger.py index 12598ca61..d344d34b3 100644 --- a/presidio-analyzer/analyzer/logger.py +++ b/presidio-analyzer/analyzer/presidio_logger.py @@ -1,10 +1,11 @@ import logging import os +import sys -class Logger: +class PresidioLogger: """A wrapper class for logger""" - def __init__(self, logger_name=None): + def __init__(self, logger_name="presidio"): if logger_name: logger = logging.getLogger(logger_name) else: @@ -12,7 +13,7 @@ def __init__(self, logger_name=None): if not logger.handlers: loglevel = os.environ.get("LOG_LEVEL", "INFO") - ch = logging.StreamHandler() + ch = logging.StreamHandler(sys.stdout) formatter = logging.Formatter( '[%(asctime)s][%(name)s][%(levelname)s]%(message)s') ch.setFormatter(formatter) diff --git a/presidio-analyzer/analyzer/recognizer_registry/recognizers_store_api.py b/presidio-analyzer/analyzer/recognizer_registry/recognizers_store_api.py index 4219ca2e1..17b3be4e8 100644 --- a/presidio-analyzer/analyzer/recognizer_registry/recognizers_store_api.py +++ b/presidio-analyzer/analyzer/recognizer_registry/recognizers_store_api.py @@ -45,7 +45,7 @@ def get_latest_hash(self): if not last_hash: logging.info("Recognizers hash was not found in store") else: - logging.info("Latest hash found in store is: %s", last_hash) + logging.info("Latest hash found in store is: %s", str(last_hash)) return last_hash def get_all_recognizers(self): diff --git a/presidio-analyzer/analyzer/recognizer_result.py b/presidio-analyzer/analyzer/recognizer_result.py index 6261451c1..94cc74da6 100644 --- a/presidio-analyzer/analyzer/recognizer_result.py +++ b/presidio-analyzer/analyzer/recognizer_result.py @@ -27,3 +27,38 @@ def append_analysis_explenation_text(self, text): def to_json(self): return str(self.__dict__) + + def __str__(self): + return "type: {}, " \ + "start: {}, " \ + "end: {}, " \ + "score: {}".format(self.entity_type, + self.start, + self.end, + self.score) + + def __repr(self): + return self.__str__() + + def intersects(self, other): + """ + Checks if self intersects with a different RecognizerResult + :return: If interesecting, returns the number of + intersecting characters. + If not, returns 0 + """ + + # if they do not overlap the intersection is 0 + if self.end < other.start or other.end < self.start: + return 0 + + # otherwise the intersection is min(end) - max(start) + return min(self.end, other.end) - max(self.start, other.start) + + def contained_in(self, other): + """ + Checks if self is contained in a different RecognizerResult + :return: true if contained + """ + + return self.start >= other.start and self.end <= other.end diff --git a/presidio-analyzer/setup.py b/presidio-analyzer/setup.py index 670d4d66b..bb2e7889b 100644 --- a/presidio-analyzer/setup.py +++ b/presidio-analyzer/setup.py @@ -22,7 +22,7 @@ tests_require=['pytest', 'flake8', 'pylint==2.3.1'], install_requires=[ 'cython==0.29.10', - 'spacy==2.1.4', + 'spacy==2.2.3', 'regex==2019.6.8', 'grpcio==1.21.1', 'protobuf==3.8.0', diff --git a/presidio-analyzer/tests/__init__.py b/presidio-analyzer/tests/__init__.py index 572ca2a79..559f283ca 100644 --- a/presidio-analyzer/tests/__init__.py +++ b/presidio-analyzer/tests/__init__.py @@ -1,6 +1,12 @@ import os import sys -# bug #602: Fix imports issue in python +from analyzer.nlp_engine import SpacyNlpEngine + sys.path.append(os.path.dirname(os.path.dirname( os.path.abspath(__file__))) + "/tests") + +from .assertions import assert_result, assert_result_within_score_range + +print("Creating tests SpacyNlpEngine which starts the spaCy model") +TESTS_NLP_ENGINE = SpacyNlpEngine() diff --git a/presidio-analyzer/tests/test_analyzer_engine.py b/presidio-analyzer/tests/test_analyzer_engine.py index 5e79a1265..2c28bb59f 100644 --- a/presidio-analyzer/tests/test_analyzer_engine.py +++ b/presidio-analyzer/tests/test_analyzer_engine.py @@ -1,25 +1,24 @@ +import hashlib from unittest import TestCase -from analyzer.entity_recognizer import EntityRecognizer -import os -import hashlib import pytest -from assertions import assert_result -from analyzer.analyze_pb2 import AnalyzeRequest - from analyzer import AnalyzerEngine, PatternRecognizer, Pattern, \ RecognizerResult, RecognizerRegistry, AnalysisExplanation +from analyzer import PresidioLogger +from analyzer.analyze_pb2 import AnalyzeRequest +from analyzer.entity_recognizer import EntityRecognizer +from analyzer.nlp_engine import NlpArtifacts from analyzer.predefined_recognizers import CreditCardRecognizer, \ - UsPhoneRecognizer, DomainRecognizer, UsItinRecognizer, \ - UsLicenseRecognizer, UsBankRecognizer, UsPassportRecognizer + UsPhoneRecognizer, DomainRecognizer from analyzer.recognizer_registry.recognizers_store_api \ import RecognizerStoreApi # noqa: F401 -from analyzer.nlp_engine import SpacyNlpEngine, NlpArtifacts -from analyzer.predefined_recognizers import IpRecognizer, UsSsnRecognizer +from tests import assert_result, TESTS_NLP_ENGINE from tests.mocks import MockNlpEngine from tests.mocks.app_tracer_mock import AppTracerMock +logger = PresidioLogger() + class RecognizerStoreApiMock(RecognizerStoreApi): """ @@ -84,7 +83,7 @@ def load_recognizers(self, path): DomainRecognizer()]) -loaded_spacy_nlp_engine = SpacyNlpEngine() +loaded_spacy_nlp_engine = TESTS_NLP_ENGINE class TestAnalyzerEngine(TestCase): @@ -351,7 +350,8 @@ def test_when_analyze_then_apptracer_has_value(self): entities = ["CREDIT_CARD", "PHONE_NUMBER", "PERSON"] analyzer_engine_with_spacy = AnalyzerEngine(self.loaded_registry, app_tracer=self.app_tracer, - enable_trace_pii=True) + enable_trace_pii=True, + nlp_engine=TESTS_NLP_ENGINE) results = analyzer_engine_with_spacy.analyze(correlation_id=self.unit_test_guid, text=text, entities=entities, @@ -432,34 +432,61 @@ def test_when_default_threshold_is_zero_all_results_pass(self): assert len(results) == 2 def test_demo_text(self): - text = "Here are a few examples of entities we currently support: \n" \ - "Credit card: 4095-2609-9393-4932 \n" \ - "Crypto wallet id: 16Yeky6GMjeNkAiNcBY7ZhrLoMSgg1BoyZ \n" \ - "DateTime: September 18 n" \ - "Domain: microsoft.com \n" \ - "Email address: test@presidio.site \n" \ - "IBAN code: IL150120690000003111111 \n" \ - "IP: 192.168.0.1 i\n" \ - "Person name: David Johnson\n" \ - "Bank account: 2854567876542\n" \ - "Driver license number: H12234567\n" \ - "Passport: 912803456\n" \ - "Phone number: (212) 555-1234.\n" \ - "Social security number: 078-05-1120\n" \ + text = "Here are a few examples sentences we currently support:\n\n" \ + "Hello, my name is David Johnson and I live in Maine.\n" \ + "My credit card number is 4095-2609-9393-4932 and my " \ + "Crypto wallet id is 16Yeky6GMjeNkAiNcBY7ZhrLoMSgg1BoyZ.\n\n" \ + "On September 18 I visited microsoft.com and sent an " \ + "email to test@microsoft.com, from the IP 192.168.0.1.\n\n" \ + "My passport: 991280345 and my phone number: (212) 555-1234.\n\n" \ + "Please transfer using this IBAN IL150120690000003111111.\n\n" \ + "Can you please check the status on bank account 954567876544 " \ + "in PresidiBank?\n\n" \ + "" \ + "Kate's social security number is 078-05-1120. " \ + "Her driver license? it is 9234567B.\n\n" \ "" \ - "This project welcomes contributions and suggestions. Most contributions require you to agree to a " \ - "Contributor License Agreement (CLA) declaring that you have the right to, and actually do, grant us " \ - "the rights to use your contribution. For details, visit https://cla.microsoft.com.\n" \ - "When you submit a pull request, a CLA-bot will automatically determine whether you need to provide " \ - "a CLA and decorate the PR appropriately (e.g., label, comment). Simply follow the instructions " \ - "provided by the bot. You will only need to do this once across all repos using our CLA.\n\n" \ - "This project has adopted the Microsoft Open Source Code of Conduct. For more information see the " \ - "Code of Conduct FAQ or contact opencode@microsoft.com with any additional questions or comments." + "This project welcomes contributions and suggestions.\n" \ + "Most contributions require you to agree to a " \ + "Contributor License Agreement (CLA) declaring " \ + "that you have the right to, and actually do, " \ + "grant us the rights to use your contribution. " \ + "For details, visit https://cla.microsoft.com " \ + "When you submit a pull request, " \ + "a CLA-bot will automatically determine whether " \ + "you need to provide a CLA and decorate the PR " \ + "appropriately (e.g., label, comment).\n" \ + "Simply follow the instructions provided by the bot. " \ + "You will only need to do this once across all repos using our CLA.\n" \ + "This project has adopted the Microsoft Open Source Code of Conduct.\n" \ + "For more information see the Code of Conduct FAQ or " \ + "contact opencode@microsoft.com with any additional questions or comments." language = "en" - analyzer_engine = AnalyzerEngine(default_score_threshold=0.6) + analyzer_engine = AnalyzerEngine(default_score_threshold=0.35, nlp_engine=loaded_spacy_nlp_engine) results = analyzer_engine.analyze(correlation_id=self.unit_test_guid, text=text, entities=None, language=language, all_fields=True) - - assert len(results) == 15 + for result in results: + logger.info("Entity = {}, Text = {}, Score={}, Start={}, End={}".format(result.entity_type, + text[result.start:result.end], + result.score, + result.start, result.end)) + detected_entities = [result.entity_type for result in results] + + assert len([entity for entity in detected_entities if entity == "CREDIT_CARD"]) == 1 + assert len([entity for entity in detected_entities if entity == "CRYPTO"]) == 1 + assert len([entity for entity in detected_entities if entity == "DATE_TIME"]) == 1 + assert len([entity for entity in detected_entities if entity == "DOMAIN_NAME"]) == 4 + assert len([entity for entity in detected_entities if entity == "EMAIL_ADDRESS"]) == 2 + assert len([entity for entity in detected_entities if entity == "IBAN_CODE"]) == 1 + assert len([entity for entity in detected_entities if entity == "IP_ADDRESS"]) == 1 + assert len([entity for entity in detected_entities if entity == "LOCATION"]) == 1 + assert len([entity for entity in detected_entities if entity == "PERSON"]) == 2 + assert len([entity for entity in detected_entities if entity == "PHONE_NUMBER"]) == 1 + assert len([entity for entity in detected_entities if entity == "US_BANK_NUMBER"]) == 1 + assert len([entity for entity in detected_entities if entity == "US_DRIVER_LICENSE"]) == 1 + assert len([entity for entity in detected_entities if entity == "US_PASSPORT"]) == 1 + assert len([entity for entity in detected_entities if entity == "US_SSN"]) == 1 + + assert len(results) == 19 diff --git a/presidio-analyzer/tests/test_context_support.py b/presidio-analyzer/tests/test_context_support.py index 8f2c311a5..a3936b89b 100644 --- a/presidio-analyzer/tests/test_context_support.py +++ b/presidio-analyzer/tests/test_context_support.py @@ -8,7 +8,8 @@ UsPhoneRecognizer, DomainRecognizer, UsItinRecognizer, \ UsLicenseRecognizer, UsBankRecognizer, UsPassportRecognizer, \ IpRecognizer, UsSsnRecognizer -from analyzer.nlp_engine import SpacyNlpEngine, NlpArtifacts +from analyzer.nlp_engine import NlpArtifacts +from tests import TESTS_NLP_ENGINE ip_recognizer = IpRecognizer() us_ssn_recognizer = UsSsnRecognizer() @@ -77,7 +78,7 @@ def __init__(self, *args, **kwargs): # Context tests def test_text_with_context_improves_score(self): - nlp_engine = SpacyNlpEngine() + nlp_engine = TESTS_NLP_ENGINE mock_nlp_artifacts = NlpArtifacts([], [], [], [], None, "en") for item in self.context_sentences: @@ -93,7 +94,7 @@ def test_text_with_context_improves_score(self): assert(results_without_context[i].score < results_with_context[i].score) def test_context_custom_recognizer(self): - nlp_engine = SpacyNlpEngine() + nlp_engine = TESTS_NLP_ENGINE mock_nlp_artifacts = NlpArtifacts([], [], [], [], None, "en") # This test checks that a custom recognizer is also enhanced by context. diff --git a/presidio-analyzer/tests/test_credit_card_recognizer.py b/presidio-analyzer/tests/test_credit_card_recognizer.py index cbeee5584..7c8c562d8 100644 --- a/presidio-analyzer/tests/test_credit_card_recognizer.py +++ b/presidio-analyzer/tests/test_credit_card_recognizer.py @@ -2,7 +2,7 @@ # https://www.freeformatter.com/credit-card-number-generator-validator.html from unittest import TestCase -from assertions import assert_result +from tests import assert_result from analyzer.predefined_recognizers import CreditCardRecognizer from analyzer.entity_recognizer import EntityRecognizer @@ -142,7 +142,7 @@ def test_invalid_diners_card_with_no_context(self): results = credit_card_recognizer.analyze(number, entities) assert not results - + def test_invalid_diners_card_with_context(self): number = '36168002586008' results = credit_card_recognizer.analyze('my credit card number is ' + number, entities) diff --git a/presidio-analyzer/tests/test_crypto_recognizer.py b/presidio-analyzer/tests/test_crypto_recognizer.py index 1ba2322fd..7b4fd20dc 100644 --- a/presidio-analyzer/tests/test_crypto_recognizer.py +++ b/presidio-analyzer/tests/test_crypto_recognizer.py @@ -1,6 +1,6 @@ from unittest import TestCase -from assertions import assert_result +from tests import assert_result from analyzer.predefined_recognizers import CryptoRecognizer from analyzer.entity_recognizer import EntityRecognizer diff --git a/presidio-analyzer/tests/test_domain_recognizer.py b/presidio-analyzer/tests/test_domain_recognizer.py index 2dc160d68..65926f1b3 100644 --- a/presidio-analyzer/tests/test_domain_recognizer.py +++ b/presidio-analyzer/tests/test_domain_recognizer.py @@ -1,6 +1,6 @@ from unittest import TestCase -from assertions import assert_result +from tests import assert_result from analyzer.predefined_recognizers import DomainRecognizer from analyzer.entity_recognizer import EntityRecognizer @@ -34,7 +34,7 @@ def test_valid_domain(self): def test_valid_domains_lemma_text(self): domain1 = 'microsoft.com' - domain2 = 'google.co.il' + domain2 = 'google.co.il' results = domain_recognizer.analyze('my domains: {} {}'.format(domain1, domain2), entities) assert len(results) == 2 diff --git a/presidio-analyzer/tests/test_email_recognizer.py b/presidio-analyzer/tests/test_email_recognizer.py index a2ec7dc79..d6768dd5b 100644 --- a/presidio-analyzer/tests/test_email_recognizer.py +++ b/presidio-analyzer/tests/test_email_recognizer.py @@ -1,6 +1,6 @@ from unittest import TestCase -from assertions import assert_result +from tests import assert_result from analyzer.predefined_recognizers import EmailRecognizer from analyzer.entity_recognizer import EntityRecognizer diff --git a/presidio-analyzer/tests/test_iban_recognizer.py b/presidio-analyzer/tests/test_iban_recognizer.py index 0f1bae7e7..4da0b0c21 100644 --- a/presidio-analyzer/tests/test_iban_recognizer.py +++ b/presidio-analyzer/tests/test_iban_recognizer.py @@ -1,7 +1,7 @@ from unittest import TestCase import string -from assertions import assert_result +from tests import assert_result from analyzer.predefined_recognizers.iban_recognizer import IbanRecognizer, IBAN_GENERIC_SCORE, LETTERS from analyzer.entity_recognizer import EntityRecognizer @@ -17,7 +17,7 @@ def update_iban_checksum(iban): iban_digits = (iban_no_spaces[4:] +iban_no_spaces[:2] + '00').upper().translate(LETTERS) check_digits = '{:0>2}'.format(98 - (int(iban_digits) % 97)) return iban[:2] + check_digits + iban[4:] - + class TestIbanRecognizer(TestCase): # Test valid and invalid ibans per each country which supports IBAN - without context @@ -35,26 +35,26 @@ def test_AL_iban_valid_with_spaces(self): assert len(results) == 1 assert_result(results[0], entities[0], 0, 34, EntityRecognizer.MAX_SCORE) - + def test_AL_iban_invalid_format_valid_checksum(self): iban = 'AL47 212A 1009 0000 0002 3569 8741' iban = update_iban_checksum(iban) results = iban_recognizer.analyze(iban, entities) assert len(results) == 0 - + def test_AL_iban_invalid_length(self): iban = 'AL47 212A 1009 0000 0002 3569 874' results = iban_recognizer.analyze(iban, entities) assert len(results) == 0 - + def test_AL_iban_invalid_checksum(self): iban = 'AL47 2121 1009 0000 0002 3569 8740' results = iban_recognizer.analyze(iban, entities) assert len(results) == 0 - + #Andorra (8n, 12c) ADkk bbbs sssx cccc cccc cccc def test_AD_valid_iban_no_spaces(self): iban = 'AD1200012030200359100100' @@ -76,7 +76,7 @@ def test_AD_iban_invalid_format_valid_checksum(self): results = iban_recognizer.analyze(iban, entities) assert len(results) == 0 - + def test_AD_iban_invalid_length(self): iban = 'AD12000A203020035910010' results = iban_recognizer.analyze(iban, entities) @@ -88,7 +88,7 @@ def test_AD_iban_invalid_checksum(self): results = iban_recognizer.analyze(iban, entities) assert len(results) == 0 - + # Austria (16n) ATkk bbbb bccc cccc cccc def test_AT_iban_valid_no_spaces(self): iban = 'AT611904300234573201' @@ -103,26 +103,26 @@ def test_AT_iban_valid_with_spaces(self): assert len(results) == 1 assert_result(results[0], entities[0], 0, 24, EntityRecognizer.MAX_SCORE) - + def test_AT_iban_invalid_format_valid_checksum(self): iban = 'AT61 1904 A002 3457 3201' iban = update_iban_checksum(iban) results = iban_recognizer.analyze(iban, entities) assert len(results) == 0 - + def test_AT_iban_invalid_length(self): iban = 'AT61 1904 3002 3457 320' results = iban_recognizer.analyze(iban, entities) assert len(results) == 0 - + def test_AT_iban_invalid_checksum(self): iban = 'AT61 1904 3002 3457 3202' results = iban_recognizer.analyze(iban, entities) assert len(results) == 0 - + # Azerbaijan    (4c,20n) AZkk bbbb cccc cccc cccc cccc cccc def test_AZ_iban_valid_no_spaces(self): iban = 'AZ21NABZ00000000137010001944' @@ -144,7 +144,7 @@ def test_AZ_iban_invalid_format_valid_checksum(self): results = iban_recognizer.analyze(iban, entities) assert len(results) == 0 - + def test_AZ_iban_invalid_length(self): iban = 'AZ21NABZ0000000013701000194' results = iban_recognizer.analyze(iban, entities) @@ -171,14 +171,14 @@ def testBH_iban_valid__with_spaces(self): assert len(results) == 1 assert_result(results[0], entities[0], 0, 27, EntityRecognizer.MAX_SCORE) - + def test_BH_iban_invalid_format_valid_checksum(self): iban = 'BH67BMA100001299123456' iban = update_iban_checksum(iban) results = iban_recognizer.analyze(iban, entities) assert len(results) == 0 - + def test_BH_iban_invalid_length(self): iban = 'BH67BMAG0000129912345' results = iban_recognizer.analyze(iban, entities) @@ -190,7 +190,7 @@ def test_BH_iban_invalid_checksum(self): results = iban_recognizer.analyze(iban, entities) assert len(results) == 0 - + # Belarus (4c, 4n, 16c)   BYkk bbbb aaaa cccc cccc cccc cccc   def test_BY_iban_valid_no_spaces(self): iban = 'BY13NBRB3600900000002Z00AB00' @@ -205,20 +205,20 @@ def test_BY_iban_valid_with_spaces(self): assert len(results) == 1 assert_result(results[0], entities[0], 0, 34, EntityRecognizer.MAX_SCORE) - + def test_BY_iban_invalid_format_valid_checksum(self): iban = 'BY13NBRBA600900000002Z00AB00' iban = update_iban_checksum(iban) results = iban_recognizer.analyze(iban, entities) assert len(results) == 0 - + def test_BY_iban_invalid_length(self): iban = 'BY13 NBRB 3600 9000 0000 2Z00 AB0' results = iban_recognizer.analyze(iban, entities) assert len(results) == 0 - + def test_BY_iban_invalid_checksum(self): iban = 'BY13NBRB3600900000002Z00AB01' results = iban_recognizer.analyze(iban, entities) @@ -239,14 +239,14 @@ def test_BE_iban_valid_with_spaces(self): assert len(results) == 1 assert_result(results[0], entities[0], 0, 19, EntityRecognizer.MAX_SCORE) - + def test_BE_iban_invalid_format_valid_checksum(self): iban = 'BE71 A961 2345 6769' iban = update_iban_checksum(iban) results = iban_recognizer.analyze(iban, entities) assert len(results) == 0 - + def test_BE_iban_invalid_length(self): iban = 'BE6853900754703' results = iban_recognizer.analyze(iban, entities) @@ -258,7 +258,7 @@ def test_BE_iban_invalid_checksum(self): results = iban_recognizer.analyze(iban, entities) assert len(results) == 0 - + # Bosnia and Herzegovina    (16n)   BAkk bbbs sscc cccc ccxx def test_BA_iban_valid_no_spaces(self): iban = 'BA391290079401028494' @@ -280,7 +280,7 @@ def test_BA_iban_invalid_format_valid_checksum(self): results = iban_recognizer.analyze(iban, entities) assert len(results) == 0 - + def test_BA_iban_invalid_length(self): iban = 'BA39129007940102849' results = iban_recognizer.analyze(iban, entities) @@ -307,20 +307,20 @@ def test_BR_iban_valid_with_spaces(self): assert len(results) == 1 assert_result(results[0], entities[0], 0, 36, EntityRecognizer.MAX_SCORE) - + def test_BR_iban_invalid_format_valid_checksum(self): iban = 'BR97 0036 A305 0000 1000 9795 493P 1' iban = update_iban_checksum(iban) results = iban_recognizer.analyze(iban, entities) assert len(results) == 0 - + def test_BR_iban_invalid_length(self): iban = 'BR9700360305000010009795493P' results = iban_recognizer.analyze(iban, entities) assert len(results) == 0 - + def test_BR_iban_invalid_checksum(self): iban = 'BR97 0036 0305 0000 1000 9795 493P 2' results = iban_recognizer.analyze(iban, entities) @@ -341,14 +341,14 @@ def test_BG_iban_valid_with_spaces(self): assert len(results) == 1 assert_result(results[0], entities[0], 0, 27, EntityRecognizer.MAX_SCORE) - + def test_BG_iban_invalid_format_valid_checksum(self): iban = 'BG80 BNBG 9661 A020 3456 78' iban = update_iban_checksum(iban) results = iban_recognizer.analyze(iban, entities) assert len(results) == 0 - + def test_BG_iban_invalid_length(self): iban = 'BG80BNBG9661102034567' results = iban_recognizer.analyze(iban, entities) @@ -375,20 +375,20 @@ def test_CR_iban_valid_with_spaces(self): assert len(results) == 1 assert_result(results[0], entities[0], 0, 27, EntityRecognizer.MAX_SCORE) - + def test_CR_iban_invalid_format_valid_checksum(self): iban = 'CR05 0152 0200 1026 2840 6A' iban = update_iban_checksum(iban) results = iban_recognizer.analyze(iban, entities) assert len(results) == 0 - + def test_CR_iban_invalid_length(self): iban = 'CR05 0152 0200 1026 2840 6' results = iban_recognizer.analyze(iban, entities) assert len(results) == 0 - + def test_CR_iban_invalid_checksum(self): iban = 'CR05 0152 0200 1026 2840 67' results = iban_recognizer.analyze(iban, entities) @@ -409,7 +409,7 @@ def test_HR_iban_valid_with_spaces(self): assert len(results) == 1 assert_result(results[0], entities[0], 0, 26, EntityRecognizer.MAX_SCORE) - + def test_HR_iban_invalid_format_valid_checksum(self): iban = 'HR12 001 0051 8630 0016 A' iban = update_iban_checksum(iban) @@ -422,7 +422,7 @@ def test_HR_iban_invalid_length(self): results = iban_recognizer.analyze(iban, entities) assert len(results) == 0 - + def test_HR_iban_invalid_Checksum(self): iban = 'HR12 1001 0051 8630 0016 1' results = iban_recognizer.analyze(iban, entities) @@ -443,14 +443,14 @@ def test_CY_iban_valid_with_spaces(self): assert len(results) == 1 assert_result(results[0], entities[0], 0, 34, EntityRecognizer.MAX_SCORE) - + def test_CY_iban_invalid_format_valid_checksum(self): iban = 'CY17 0020 A128 0000 0012 0052 7600' iban = update_iban_checksum(iban) results = iban_recognizer.analyze(iban, entities) assert len(results) == 0 - + def test_CY_iban_invalid_length(self): iban = 'CY17 0020 0128 0000 0012 0052 760' results = iban_recognizer.analyze(iban, entities) @@ -477,14 +477,14 @@ def test_CZ_iban_valid_with_spaces(self): assert len(results) == 1 assert_result(results[0], entities[0], 0, 29, EntityRecognizer.MAX_SCORE) - + def test_CZ_iban_invalid_format_valid_checksum(self): iban = 'CZ65 0800 A000 1920 0014 5399' iban = update_iban_checksum(iban) results = iban_recognizer.analyze(iban, entities) assert len(results) == 0 - + def test_CZ_iban_invalid_length(self): iban = 'CZ65 0800 0000 1920 0014 539' results = iban_recognizer.analyze(iban, entities) @@ -511,20 +511,20 @@ def test_DK_iban_valid_with_spaces(self): assert len(results) == 1 assert_result(results[0], entities[0], 0, 22, EntityRecognizer.MAX_SCORE) - + def test_DK_iban_invalid_format_valid_checksum(self): iban = 'DK50 0040 A440 1162 43' iban = update_iban_checksum(iban) results = iban_recognizer.analyze(iban, entities) assert len(results) == 0 - + def test_DK_iban_invalid_length(self): iban = 'DK50 0040 0440 1162 4' results = iban_recognizer.analyze(iban, entities) assert len(results) == 0 - + def test_DK_iban_invalid_checksum(self): iban = 'DK50 0040 0440 1162 44' results = iban_recognizer.analyze(iban, entities) @@ -545,14 +545,14 @@ def test_DO_iban_valid_with_spaces(self): assert len(results) == 1 assert_result(results[0], entities[0], 0, 34, EntityRecognizer.MAX_SCORE) - + def test_DO_iban_invalid_format_valid_checksum(self): iban = 'DO28 BAGR A000 0001 2124 5361 1324' iban = update_iban_checksum(iban) results = iban_recognizer.analyze(iban, entities) assert len(results) == 0 - + def test_DO_iban_invalid_length(self): iban = 'DO28 BAGR 0000 0001 2124 5361 132' results = iban_recognizer.analyze(iban, entities) @@ -579,14 +579,14 @@ def test_TL_iban_valid_with_spaces(self): assert len(results) == 1 assert_result(results[0], entities[0], 0, 28, EntityRecognizer.MAX_SCORE) - + def test_TL_iban_invalid_format_valid_checksum(self): iban = 'TL38 A080 0123 4567 8910 157' iban = update_iban_checksum(iban) results = iban_recognizer.analyze(iban, entities) assert len(results) == 0 - + def test_TL_iban_invalid_checksum(self): iban = 'TL38 0080 0123 4567 8910 158' results = iban_recognizer.analyze(iban, entities) @@ -607,14 +607,14 @@ def test_EE_iban_valid_with_spaces(self): assert len(results) == 1 assert_result(results[0], entities[0], 0, 24, EntityRecognizer.MAX_SCORE) - + def test_EE_iban_invalid_format_valid_checksum(self): iban = 'EE38 A200 2210 2014 5685' iban = update_iban_checksum(iban) results = iban_recognizer.analyze(iban, entities) assert len(results) == 0 - + def test_EE_iban_invalid_checksum(self): iban = 'EE38 2200 2210 2014 5686' results = iban_recognizer.analyze(iban, entities) @@ -635,14 +635,14 @@ def test_FO_iban_valid_with_spaces(self): assert len(results) == 1 assert_result(results[0], entities[0], 0, 22, EntityRecognizer.MAX_SCORE) - + def test_FO_iban_invalid_format_valid_checksum(self): iban = 'FO62 A460 0001 6316 34' iban = update_iban_checksum(iban) results = iban_recognizer.analyze(iban, entities) assert len(results) == 0 - + def test_FO_iban_invalid_checksum(self): iban = 'FO62 6460 0001 6316 35' results = iban_recognizer.analyze(iban, entities) @@ -670,7 +670,7 @@ def test_FI_iban_invalid_format_valid_checksum(self): results = iban_recognizer.analyze(iban, entities) assert len(results) == 0 - + def test_FI_iban_invalid_checksum(self): iban = 'FI21 1234 5600 0007 86' results = iban_recognizer.analyze(iban, entities) @@ -691,14 +691,14 @@ def test_FR_iban_valid_with_spaces(self): assert len(results) == 1 assert_result(results[0], entities[0], 0, 33, EntityRecognizer.MAX_SCORE) - + def test_FR_iban_invalid_format_valid_checksum(self): iban = 'FR14 A004 1010 0505 0001 3M02 606' iban = update_iban_checksum(iban) results = iban_recognizer.analyze(iban, entities) assert len(results) == 0 - + def test_FR_iban_invalid_checksum(self): iban = 'FR14 2004 1010 0505 0001 3M02 607' results = iban_recognizer.analyze(iban, entities) @@ -719,14 +719,14 @@ def test_GE_iban_valid_with_spaces(self): assert len(results) == 1 assert_result(results[0], entities[0], 0, 27, EntityRecognizer.MAX_SCORE) - + def test_GE_iban_invalid_format_valid_checksum(self): iban = 'GE29 NBA0 0000 0101 9049 17' iban = update_iban_checksum(iban) results = iban_recognizer.analyze(iban, entities) assert len(results) == 0 - + def test_GE_iban_invalid_checksum(self): iban = 'GE29 NB00 0000 0101 9049 18' results = iban_recognizer.analyze(iban, entities) @@ -747,14 +747,14 @@ def test_DE_iban_valid_with_spaces(self): assert len(results) == 1 assert_result(results[0], entities[0], 0, 27, EntityRecognizer.MAX_SCORE) - + def test_DE_iban_invalid_format_valid_checksum(self): iban = 'DE89 A704 0044 0532 0130 00' iban = update_iban_checksum(iban) results = iban_recognizer.analyze(iban, entities) assert len(results) == 0 - + def test_DE_iban_invalid_checksum(self): iban = 'DE89 3704 0044 0532 0130 01' results = iban_recognizer.analyze(iban, entities) @@ -775,7 +775,7 @@ def test_GI_iban_valid_with_spaces(self): assert len(results) == 1 assert_result(results[0], entities[0], 0, 28, EntityRecognizer.MAX_SCORE) - + def test_GI_iban_invalid_format_valid_checksum(self): iban = 'GI75 aWBK 0000 0000 7099 453' iban = update_iban_checksum(iban) @@ -805,14 +805,14 @@ def test_GR_iban_valid_with_spaces(self): assert len(results) == 1 assert_result(results[0], entities[0], 0, 33, EntityRecognizer.MAX_SCORE) - + def test_GR_iban_invalid_format_valid_checksum(self): iban = 'GR16 A110 1250 0000 0001 2300 695' iban = update_iban_checksum(iban) results = iban_recognizer.analyze(iban, entities) assert len(results) == 0 - + def test_GR_iban_invalid_checksum(self): iban = 'GR16 0110 1250 0000 0001 2300 696' results = iban_recognizer.analyze(iban, entities) @@ -833,14 +833,14 @@ def test_GL_iban_valid_with_spaces(self): assert len(results) == 1 assert_result(results[0], entities[0], 0, 22, EntityRecognizer.MAX_SCORE) - + def test_GL_iban_invalid_format_valid_checksum(self): iban = 'GL89 A471 0001 0002 06' iban = update_iban_checksum(iban) results = iban_recognizer.analyze(iban, entities) assert len(results) == 0 - + def test_GL_iban_invalid_checksum(self): iban = 'GL89 6471 0001 0002 07' results = iban_recognizer.analyze(iban, entities) @@ -861,14 +861,14 @@ def test_GT_iban_valid_with_spaces(self): assert len(results) == 1 assert_result(results[0], entities[0], 0, 34, EntityRecognizer.MAX_SCORE) - + def test_GT_iban_invalid_format_valid_checksum(self): iban = 'GT82 TRAJ 0102 0000 0012 1002 9690 A' iban = update_iban_checksum(iban) results = iban_recognizer.analyze(iban, entities) assert len(results) == 0 - + def test_GT_iban_invalid_checksum(self): iban = 'GT82 TRAJ 0102 0000 0012 1002 9691' results = iban_recognizer.analyze(iban, entities) @@ -889,14 +889,14 @@ def test_HU_iban_valid_with_spaces(self): assert len(results) == 1 assert_result(results[0], entities[0], 0, 34, EntityRecognizer.MAX_SCORE) - + def test_HU_iban_invalid_format_valid_checksum(self): iban = 'HU42 A177 3016 1111 1018 0000 0000' iban = update_iban_checksum(iban) results = iban_recognizer.analyze(iban, entities) assert len(results) == 0 - + def test_HU_iban_invalid_checksum(self): iban = 'HU42 1177 3016 1111 1018 0000 0001' results = iban_recognizer.analyze(iban, entities) @@ -917,14 +917,14 @@ def test_IS_iban_valid_with_spaces(self): assert len(results) == 1 assert_result(results[0], entities[0], 0, 32, EntityRecognizer.MAX_SCORE) - + def test_IS_iban_invalid_format_valid_checksum(self): iban = 'IS14 A159 2600 7654 5510 7303 39' iban = update_iban_checksum(iban) results = iban_recognizer.analyze(iban, entities) assert len(results) == 0 - + def test_IS_iban_invalid_checksum(self): iban = 'IS14 0159 2600 7654 5510 7303 30' results = iban_recognizer.analyze(iban, entities) @@ -945,14 +945,14 @@ def test_IE_iban_valid_with_spaces(self): assert len(results) == 1 assert_result(results[0], entities[0], 0, 27, EntityRecognizer.MAX_SCORE) - + def test_IE_iban_invalid_format_valid_checksum(self): iban = 'IE29 AIBK A311 5212 3456 78' iban = update_iban_checksum(iban) results = iban_recognizer.analyze(iban, entities) assert len(results) == 0 - + def test_IE_iban_invalid_checksum(self): iban = 'IE29 AIBK 9311 5212 3456 79' results = iban_recognizer.analyze(iban, entities) @@ -973,7 +973,7 @@ def test_IL_iban_valid_with_spaces(self): assert len(results) == 1 assert_result(results[0], entities[0], 0, 28, EntityRecognizer.MAX_SCORE) - + def test_IL_iban_invalid_format_valid_checksum(self): iban = 'IL62 A108 0000 0009 9999 999' iban = update_iban_checksum(iban) @@ -1001,7 +1001,7 @@ def test_IT_iban_valid_with_spaces(self): assert len(results) == 1 assert_result(results[0], entities[0], 0, 33, EntityRecognizer.MAX_SCORE) - + def test_IT_iban_invalid_format_valid_checksum(self): iban = 'IT60 XW54 2811 1010 0000 0123 456' iban = update_iban_checksum(iban) @@ -1029,7 +1029,7 @@ def test_JO_iban_valid_with_spaces(self): assert len(results) == 1 assert_result(results[0], entities[0], 0, 37, EntityRecognizer.MAX_SCORE) - + def test_JO_iban_invalid_format_valid_checksum(self): iban = 'JO94 CBJO A010 0000 0000 0131 0003 02' iban = update_iban_checksum(iban) @@ -1057,7 +1057,7 @@ def test_KZ_iban_valid_with_spaces(self): assert len(results) == 1 assert_result(results[0], entities[0], 0, 24, EntityRecognizer.MAX_SCORE) - + def test_KZ_iban_invalid_format_valid_checksum(self): iban = 'KZ86 A25K ZT50 0410 0100' iban = update_iban_checksum(iban) @@ -1085,7 +1085,7 @@ def test_XK_iban_valid_with_spaces(self): assert len(results) == 1 assert_result(results[0], entities[0], 0, 24, EntityRecognizer.MAX_SCORE) - + def test_XK_iban_invalid_format_valid_checksum(self): iban = 'XK05 A212 0123 4567 8906' iban = update_iban_checksum(iban) @@ -1113,7 +1113,7 @@ def test_KW_iban_valid_with_spaces(self): assert len(results) == 1 assert_result(results[0], entities[0], 0, 37, EntityRecognizer.MAX_SCORE) - + def test_KW_iban_invalid_format_valid_checksum(self): iban = 'KW81 aBKU 0000 0000 0000 1234 5601 01' iban = update_iban_checksum(iban) @@ -1143,7 +1143,7 @@ def test_LV_iban_valid_with_spaces(self): assert len(results) == 1 assert_result(results[0], entities[0], 0, 26, EntityRecognizer.MAX_SCORE) - + def test_LV_iban_invalid_format_valid_checksum(self): iban = 'LV80 bANK 0000 4351 9500 1' iban = update_iban_checksum(iban) @@ -1172,7 +1172,7 @@ def test_LB_iban_valid_with_spaces(self): assert len(results) == 1 assert_result(results[0], entities[0], 0, 34, EntityRecognizer.MAX_SCORE) - + def test_LB_iban_invalid_format_valid_checksum(self): iban = 'LB62 A999 0000 0001 0019 0122 9114' iban = update_iban_checksum(iban) @@ -1200,7 +1200,7 @@ def test_LI_iban_valid_with_spaces(self): assert len(results) == 1 assert_result(results[0], entities[0], 0, 26, EntityRecognizer.MAX_SCORE) - + def test_LI_iban_invalid_format_valid_checksum(self): iban = 'LI21 A881 0000 2324 013A A' iban = update_iban_checksum(iban) @@ -1228,7 +1228,7 @@ def test_LT_iban_valid_with_spaces(self): assert len(results) == 1 assert_result(results[0], entities[0], 0, 24, EntityRecognizer.MAX_SCORE) - + def test_LT_iban_invalid_format_valid_checksum(self): iban = 'LT12 A000 0111 0100 1000' iban = update_iban_checksum(iban) @@ -1256,7 +1256,7 @@ def test_LU_iban_valid_with_spaces(self): assert len(results) == 1 assert_result(results[0], entities[0], 0, 24, EntityRecognizer.MAX_SCORE) - + def test_LU_iban_invalid_format_valid_checksum(self): iban = 'LU28 A019 4006 4475 0000' iban = update_iban_checksum(iban) @@ -1284,7 +1284,7 @@ def test_MT_iban_valid_with_spaces(self): assert len(results) == 1 assert_result(results[0], entities[0], 0, 38, EntityRecognizer.MAX_SCORE) - + def test_MT_iban_invalid_format_valid_checksum(self): iban = 'MT84 MALT A110 0001 2345 MTLC AST0 01S' iban = update_iban_checksum(iban) @@ -1312,7 +1312,7 @@ def test_MR_iban_valid_with_spaces(self): assert len(results) == 1 assert_result(results[0], entities[0], 0, 33, EntityRecognizer.MAX_SCORE) - + def test_MR_iban_invalid_format_valid_checksum(self): iban = 'MR13 A002 0001 0100 0012 3456 753' iban = update_iban_checksum(iban) @@ -1340,7 +1340,7 @@ def test_MU_iban_valid_with_spaces(self): assert len(results) == 1 assert_result(results[0], entities[0], 0, 37, EntityRecognizer.MAX_SCORE) - + def test_MU_iban_invalid_format_valid_checksum(self): iban = 'MU17 BOMM A101 1010 3030 0200 000M UR' iban = update_iban_checksum(iban) @@ -1368,7 +1368,7 @@ def test_MD_iban_valid_with_spaces(self): assert len(results) == 1 assert_result(results[0], entities[0], 0, 29, EntityRecognizer.MAX_SCORE) - + def test_MD_iban_invalid_format_valid_checksum(self): iban = 'MD24 AG00 0225 1000 1310 4168 9' iban = update_iban_checksum(iban) @@ -1396,7 +1396,7 @@ def test_MC_iban_valid_with_spaces(self): assert len(results) == 1 assert_result(results[0], entities[0], 0, 33, EntityRecognizer.MAX_SCORE) - + def test_MC_iban_invalid_format_valid_checksum(self): iban = 'MC58 A122 2000 0101 2345 6789 030' iban = update_iban_checksum(iban) @@ -1424,7 +1424,7 @@ def test_ME_iban_valid_with_spaces(self): assert len(results) == 1 assert_result(results[0], entities[0], 0, 27, EntityRecognizer.MAX_SCORE) - + def test_ME_iban_invalid_format_valid_checksum(self): iban = 'ME25 A050 0001 2345 6789 51' iban = update_iban_checksum(iban) @@ -1452,7 +1452,7 @@ def test_NL_iban_valid_with_spaces(self): assert len(results) == 1 assert_result(results[0], entities[0], 0, 22, EntityRecognizer.MAX_SCORE) - + def test_NL_iban_invalid_format_valid_checksum(self): iban = 'NL91 1BNA 0417 1643 00' iban = update_iban_checksum(iban) @@ -1480,7 +1480,7 @@ def test_MK_iban_valid_with_spaces(self): assert len(results) == 1 assert_result(results[0], entities[0], 0, 23, EntityRecognizer.MAX_SCORE) - + def test_MK_iban_invalid_format_valid_checksum(self): iban = 'MK07 A501 2000 0058 984' iban = update_iban_checksum(iban) @@ -1508,7 +1508,7 @@ def test_NO_iban_valid_with_spaces(self): assert len(results) == 1 assert_result(results[0], entities[0], 0, 18, EntityRecognizer.MAX_SCORE) - + def test_NO_iban_invalid_format_valid_checksum(self): iban = 'NO93 A601 1117 947' iban = update_iban_checksum(iban) @@ -1536,7 +1536,7 @@ def test_PK_iban_valid_with_spaces(self): assert len(results) == 1 assert_result(results[0], entities[0], 0, 29, EntityRecognizer.MAX_SCORE) - + def test_PK_iban_invalid_format_valid_checksum(self): iban = 'PK36 SCBL A000 0011 2345 6702' iban = update_iban_checksum(iban) @@ -1564,7 +1564,7 @@ def test_PS_iban_valid_with_spaces(self): assert len(results) == 1 assert_result(results[0], entities[0], 0, 36, EntityRecognizer.MAX_SCORE) - + def test_PS_iban_invalid_format_valid_checksum(self): iban = 'PS92 PALS A000 0000 0400 1234 5670 2' iban = update_iban_checksum(iban) @@ -1592,7 +1592,7 @@ def test_PL_iban_valid_with_spaces(self): assert len(results) == 1 assert_result(results[0], entities[0], 0, 34, EntityRecognizer.MAX_SCORE) - + def test_PL_iban_invalid_format_valid_checksum(self): iban = 'PL61 A090 1014 0000 0712 1981 2874' iban = update_iban_checksum(iban) @@ -1620,7 +1620,7 @@ def test_PT_iban_valid_with_spaces(self): assert len(results) == 1 assert_result(results[0], entities[0], 0, 31, EntityRecognizer.MAX_SCORE) - + def test_PT_iban_invalid_format_valid_checksum(self): iban = 'PT50 A002 0123 1234 5678 9015 4' iban = update_iban_checksum(iban) @@ -1648,7 +1648,7 @@ def test_QA_iban_valid_with_spaces(self): assert len(results) == 1 assert_result(results[0], entities[0], 0, 36, EntityRecognizer.MAX_SCORE) - + def test_QA_iban_invalid_format_valid_checksum(self): iban = 'QA58 0OHB 0000 1234 5678 90AB CDEF G' iban = update_iban_checksum(iban) @@ -1678,7 +1678,7 @@ def test_RO_iban_valid_with_spaces(self): assert len(results) == 1 assert_result(results[0], entities[0], 0, 29, EntityRecognizer.MAX_SCORE) - + def test_RO_iban_invalid_format_valid_checksum(self): iban = 'RO49 0AAA 1B31 0075 9384 0000' iban = update_iban_checksum(iban) @@ -1695,7 +1695,7 @@ def test_RO_iban_valid_checksum(self): ### Saint Barthelemy ### Saint Lucia ### Saint Martin - ### Saint Pierrer + ### Saint Pierrer # San Marino (1a,10n,12c)  SMkk xbbb bbss sssc cccc cccc ccc def test_SM_iban_valid_no_spaces(self): @@ -1711,7 +1711,7 @@ def test_SM_iban_valid_with_spaces(self): assert len(results) == 1 assert_result(results[0], entities[0], 0, 33, EntityRecognizer.MAX_SCORE) - + def test_SM_iban_invalid_format_valid_checksum(self): iban = 'SM86 0032 2509 8000 0000 0270 100' iban = update_iban_checksum(iban) @@ -1741,7 +1741,7 @@ def test_SA_iban_valid_with_spaces(self): assert len(results) == 1 assert_result(results[0], entities[0], 0, 29, EntityRecognizer.MAX_SCORE) - + def test_SA_iban_invalid_format_valid_checksum(self): iban = 'SA03 A000 0000 6080 1016 7519' iban = update_iban_checksum(iban) @@ -1769,7 +1769,7 @@ def test_RS_iban_valid_with_spaces(self): assert len(results) == 1 assert_result(results[0], entities[0], 0, 27, EntityRecognizer.MAX_SCORE) - + def test_RS_iban_invalid_format_valid_checksum(self): iban = 'RS35 A600 0560 1001 6113 79' iban = update_iban_checksum(iban) @@ -1797,7 +1797,7 @@ def test_RS_iban_valid_with_spaces(self): assert len(results) == 1 assert_result(results[0], entities[0], 0, 29, EntityRecognizer.MAX_SCORE) - + def test_RS_iban_invalid_format_valid_checksum(self): iban = 'SK31 A200 0000 1987 4263 7541' iban = update_iban_checksum(iban) @@ -1825,7 +1825,7 @@ def test_SI_iban_valid_with_spaces(self): assert len(results) == 1 assert_result(results[0], entities[0], 0, 23, EntityRecognizer.MAX_SCORE) - + def test_SI_iban_invalid_format_valid_checksum(self): iban = 'SI56 A633 0001 2039 086' iban = update_iban_checksum(iban) @@ -1853,7 +1853,7 @@ def test_ES_iban_valid_with_spaces(self): assert len(results) == 1 assert_result(results[0], entities[0], 0, 29, EntityRecognizer.MAX_SCORE) - + def test_ES_iban_invalid_format_valid_checksum(self): iban = 'ES91 A100 0418 4502 0005 1332' iban = update_iban_checksum(iban) @@ -1881,7 +1881,7 @@ def test_SE_iban_valid_with_spaces(self): assert len(results) == 1 assert_result(results[0], entities[0], 0, 29, EntityRecognizer.MAX_SCORE) - + def test_SE_iban_invalid_format_valid_checksum(self): iban = 'SE45 A000 0000 0583 9825 7466' iban = update_iban_checksum(iban) @@ -1909,7 +1909,7 @@ def test_CH_iban_valid_with_spaces(self): assert len(results) == 1 assert_result(results[0], entities[0], 0, 26, EntityRecognizer.MAX_SCORE) - + def test_CH_iban_invalid_format_valid_checksum(self): iban = 'CH93 A076 2011 6238 5295 7' iban = update_iban_checksum(iban) @@ -1937,7 +1937,7 @@ def test_TN_iban_valid_with_spaces(self): assert len(results) == 1 assert_result(results[0], entities[0], 0, 29, EntityRecognizer.MAX_SCORE) - + def test_TN_iban_invalid_format_valid_checksum(self): iban = 'TN59 A000 6035 1835 9847 8831' iban = update_iban_checksum(iban) @@ -1965,7 +1965,7 @@ def test_TR_iban_valid_with_spaces(self): assert len(results) == 1 assert_result(results[0], entities[0], 0, 32, EntityRecognizer.MAX_SCORE) - + def test_TR_iban_invalid_format_valid_checksum(self): iban = 'TR33 A006 1005 1978 6457 8413 26' iban = update_iban_checksum(iban) @@ -1976,7 +1976,7 @@ def test_TR_iban_invalid_format_valid_checksum(self): def test_TR_iban_valid_checksum(self): iban = 'TR33 0006 1005 1978 6457 8413 27' results = iban_recognizer.analyze(iban, entities) - + assert len(results) == 0 # United Arab Emirates (3n,16n)  AEkk bbbc cccc cccc cccc ccc @@ -1993,7 +1993,7 @@ def test_AE_iban_valid_with_spaces(self): assert len(results) == 1 assert_result(results[0], entities[0], 0, 28, EntityRecognizer.MAX_SCORE) - + def test_AE_iban_invalid_format_valid_checksum(self): iban = 'AE07 A331 2345 6789 0123 456' iban = update_iban_checksum(iban) @@ -2021,7 +2021,7 @@ def test_GB_iban_valid_with_spaces(self): assert len(results) == 1 assert_result(results[0], entities[0], 0, 27, EntityRecognizer.MAX_SCORE) - + def test_GB_iban_invalid_format_valid_checksum(self): iban = 'GB29 1WBK 6016 1331 9268 19' iban = update_iban_checksum(iban) @@ -2049,7 +2049,7 @@ def test_VA_iban_valid_with_spaces(self): assert len(results) == 1 assert_result(results[0], entities[0], 0, 27, EntityRecognizer.MAX_SCORE) - + def test_VA_iban_invalid_format_valid_checksum(self): iban = 'VA59 A011 2300 0012 3456 78' iban = update_iban_checksum(iban) @@ -2077,7 +2077,7 @@ def test_VG_iban_valid_with_spaces(self): assert len(results) == 1 assert_result(results[0], entities[0], 0, 29, EntityRecognizer.MAX_SCORE) - + def test_VG_iban_invalid_format_valid_checksum(self): iban = 'VG96 VPVG A000 0123 4567 8901' iban = update_iban_checksum(iban) @@ -2088,7 +2088,7 @@ def test_VG_iban_invalid_format_valid_checksum(self): def test_VG_iban_valid_checksum(self): iban = 'VG96 VPVG 0000 0123 4567 8902' results = iban_recognizer.analyze(iban, entities) - + assert len(results) == 0 # Test Invalid IBANs     diff --git a/presidio-analyzer/tests/test_ip_recognizer.py b/presidio-analyzer/tests/test_ip_recognizer.py index c28c0c5d3..d116e8040 100644 --- a/presidio-analyzer/tests/test_ip_recognizer.py +++ b/presidio-analyzer/tests/test_ip_recognizer.py @@ -1,6 +1,6 @@ from unittest import TestCase -from assertions import assert_result_within_score_range +from tests import assert_result_within_score_range from analyzer.predefined_recognizers import IpRecognizer ip_recognizer = IpRecognizer() diff --git a/presidio-analyzer/tests/test_pattern_recognizer.py b/presidio-analyzer/tests/test_pattern_recognizer.py index 53421c835..e139db3a2 100644 --- a/presidio-analyzer/tests/test_pattern_recognizer.py +++ b/presidio-analyzer/tests/test_pattern_recognizer.py @@ -4,7 +4,7 @@ # https://www.datatrans.ch/showcase/test-cc-numbers # https://www.freeformatter.com/credit-card-number-generator-validator.html -from assertions import assert_result +from tests import assert_result from analyzer import Pattern from analyzer import PatternRecognizer diff --git a/presidio-analyzer/tests/test_recognizer_registry.py b/presidio-analyzer/tests/test_recognizer_registry.py index 578bdd1b5..8abcecfd5 100644 --- a/presidio-analyzer/tests/test_recognizer_registry.py +++ b/presidio-analyzer/tests/test_recognizer_registry.py @@ -1,14 +1,13 @@ +import hashlib +import logging from unittest import TestCase -import json -import hashlib import pytest -import logging + from analyzer import RecognizerRegistry, PatternRecognizer, \ EntityRecognizer, Pattern from analyzer.recognizer_registry.recognizers_store_api \ import RecognizerStoreApi # noqa: F401 -import time class RecognizerStoreApiMock(RecognizerStoreApi): diff --git a/presidio-analyzer/tests/test_spacy_recognizer.py b/presidio-analyzer/tests/test_spacy_recognizer.py index d4ef63444..8a288c35d 100644 --- a/presidio-analyzer/tests/test_spacy_recognizer.py +++ b/presidio-analyzer/tests/test_spacy_recognizer.py @@ -1,14 +1,12 @@ from unittest import TestCase -from assertions import assert_result, assert_result_within_score_range +from tests import assert_result, assert_result_within_score_range, TESTS_NLP_ENGINE -from analyzer.nlp_engine import SpacyNlpEngine from analyzer.predefined_recognizers import SpacyRecognizer from analyzer.entity_recognizer import EntityRecognizer -from analyzer.nlp_engine import NlpArtifacts NER_STRENGTH = 0.85 -nlp_engine = SpacyNlpEngine() +nlp_engine = TESTS_NLP_ENGINE spacy_recognizer = SpacyRecognizer() entities = ["PERSON", "DATE_TIME"] @@ -45,39 +43,13 @@ def test_person_full_name(self): def test_person_full_name_with_context(self): name = 'John Oliver' context = ' is the funniest comedian' - text = '{} {}'.format(name, context) + text = '{}{}'.format(name, context) results = self.prepare_and_analyze(nlp_engine, text) assert len(results) == 1 assert_result_within_score_range( results[0], entities[0], 0, 11, NER_STRENGTH, EntityRecognizer.MAX_SCORE) - def test_person_last_name(self): - text = 'Tailor' - results = self.prepare_and_analyze(nlp_engine, text) - - assert len(results) == 0 - - # Bug #617 : Spacy Recognizer doesn't recognize Mr. Tailor as PERSON even though online spacy visualizer indicates that it does - # See http://textanalysisonline.com/spacy-named-entity-recognition-ner - # def test_person_title_with_last_name(self): - # name = 'Mr. Tailor' - # results = spacy_recognizer.analyze(name, entities) - - # assert len(results) == 1 - # assert_result(results[0], entities[0], 0, 9, NER_STRENGTH) - - # Bug #617 : Spacy Recognizer doesn't recognize Mr. Tailor as PERSON even though online spacy visualizer indicates that it does - # See http://textanalysisonline.com/spacy-named-entity-recognition-ner - # def test_person_title_with_last_name_with_context_and_time(self): - # name = 'Mr. Tailor' - # context = 'Good morning' - # results = spacy_recognizer.analyze('{} {}'.format(context, name), entities) - - # assert len(results) == 2 - # assert_result_within_score_range(results[1], entities[1], 5, 12, NER_STRENGTH, EntityRecognizer.MAX_SCORE) - # assert_result_within_score_range(results[0], entities[0], 17, 23, NER_STRENGTH, EntityRecognizer.MAX_SCORE) - def test_person_full_middle_name(self): text = 'Richard Milhous Nixon' results = self.prepare_and_analyze(nlp_engine, text) @@ -93,19 +65,17 @@ def test_person_full_name_with_middle_letter(self): assert_result(results[0], entities[0], 0, 16, NER_STRENGTH) def test_person_full_name_complex(self): - text = 'Richard (Ric) C. Henderson' + text = 'Richard (Rick) C. Henderson' results = self.prepare_and_analyze(nlp_engine, text) - assert len(results) == 3 - # Richard - assert text[results[0].start:results[0].end] == "Richard" - assert_result(results[0], entities[0], 0, 7, NER_STRENGTH) - # Ric - assert text[results[1].start:results[1].end] == "Ric" - assert_result(results[1], entities[0], 9, 12, NER_STRENGTH) - # C. Henderson - assert text[results[2].start:results[2].end] == "C. Henderson" - assert_result(results[2], entities[0], 14, 26, NER_STRENGTH) + assert len(results) > 0 + + # check that most of the text is covered + covered_text = "" + for result in results: + covered_text+=text[result.start:result.end] + + assert len(text) - len(covered_text) < 5 def test_person_last_name_is_also_a_date_with_context_expected_person_only(self): name = 'Dan May' diff --git a/presidio-analyzer/tests/test_uk_nhs_recognizer.py b/presidio-analyzer/tests/test_uk_nhs_recognizer.py index fb87d393b..50d9d052f 100644 --- a/presidio-analyzer/tests/test_uk_nhs_recognizer.py +++ b/presidio-analyzer/tests/test_uk_nhs_recognizer.py @@ -1,6 +1,6 @@ from unittest import TestCase -from assertions import assert_result +from tests import assert_result from analyzer.predefined_recognizers import NhsRecognizer from analyzer.entity_recognizer import EntityRecognizer diff --git a/presidio-analyzer/tests/test_us_bank_recognizer.py b/presidio-analyzer/tests/test_us_bank_recognizer.py index 08754253c..66b9f798e 100644 --- a/presidio-analyzer/tests/test_us_bank_recognizer.py +++ b/presidio-analyzer/tests/test_us_bank_recognizer.py @@ -1,6 +1,6 @@ from unittest import TestCase -from assertions import assert_result +from tests import assert_result from analyzer.predefined_recognizers import UsBankRecognizer us_bank_recognizer = UsBankRecognizer() diff --git a/presidio-analyzer/tests/test_us_driver_license_recognizer.py b/presidio-analyzer/tests/test_us_driver_license_recognizer.py index 4f49c0b59..cc4887eb3 100644 --- a/presidio-analyzer/tests/test_us_driver_license_recognizer.py +++ b/presidio-analyzer/tests/test_us_driver_license_recognizer.py @@ -1,7 +1,7 @@ from unittest import TestCase -import os -from assertions import assert_result, assert_result_within_score_range + from analyzer.predefined_recognizers import UsLicenseRecognizer +from tests import assert_result_within_score_range us_license_recognizer = UsLicenseRecognizer() entities = ["US_DRIVER_LICENSE"] diff --git a/presidio-analyzer/tests/test_us_itin_recognizer.py b/presidio-analyzer/tests/test_us_itin_recognizer.py index 313b7936f..27497d15e 100644 --- a/presidio-analyzer/tests/test_us_itin_recognizer.py +++ b/presidio-analyzer/tests/test_us_itin_recognizer.py @@ -1,6 +1,6 @@ from unittest import TestCase -from assertions import assert_result_within_score_range +from tests import assert_result_within_score_range from analyzer.predefined_recognizers import UsItinRecognizer us_itin_recognizer = UsItinRecognizer() diff --git a/presidio-analyzer/tests/test_us_passport_recognizer.py b/presidio-analyzer/tests/test_us_passport_recognizer.py index fda20e162..684cf0fab 100644 --- a/presidio-analyzer/tests/test_us_passport_recognizer.py +++ b/presidio-analyzer/tests/test_us_passport_recognizer.py @@ -1,6 +1,6 @@ from unittest import TestCase -from assertions import assert_result_within_score_range +from tests import assert_result_within_score_range from analyzer.predefined_recognizers import UsPassportRecognizer us_passport_recognizer = UsPassportRecognizer() diff --git a/presidio-analyzer/tests/test_us_phone_recognizer.py b/presidio-analyzer/tests/test_us_phone_recognizer.py index cae6114cd..4250f415d 100644 --- a/presidio-analyzer/tests/test_us_phone_recognizer.py +++ b/presidio-analyzer/tests/test_us_phone_recognizer.py @@ -1,8 +1,8 @@ from unittest import TestCase -from assertions import assert_result_within_score_range from analyzer.predefined_recognizers import UsPhoneRecognizer from analyzer.entity_recognizer import EntityRecognizer +from tests import assert_result_within_score_range phone_recognizer = UsPhoneRecognizer() entities = ["PHONE_NUMBER"] diff --git a/presidio-analyzer/tests/test_us_ssn_recognizer.py b/presidio-analyzer/tests/test_us_ssn_recognizer.py index a0bf8740a..64bb76451 100644 --- a/presidio-analyzer/tests/test_us_ssn_recognizer.py +++ b/presidio-analyzer/tests/test_us_ssn_recognizer.py @@ -1,51 +1,50 @@ -from unittest import TestCase - -from assertions import assert_result, assert_result_within_score_range +from tests import assert_result_within_score_range from analyzer.predefined_recognizers import UsSsnRecognizer us_ssn_recognizer = UsSsnRecognizer() entities = ["US_SSN"] -class TestUsSsnRecognizer(TestCase): +def test_valid_us_ssn_very_weak_match(): + num1 = '078-051120' + num2 = '07805-1120' + results = us_ssn_recognizer.analyze( + '{} {}'.format(num1, num2), entities) + + assert len(results) == 2 + + assert results[0].score != 0 + assert_result_within_score_range( + results[0], entities[0], 0, 10, 0, 0.3) - def test_valid_us_ssn_very_weak_match(self): - num1 = '078-051120' - num2 = '07805-1120' - results = us_ssn_recognizer.analyze( - '{} {}'.format(num1, num2), entities) + assert results[0].score != 0 + assert_result_within_score_range( + results[1], entities[0], 11, 21, 0, 0.3) - assert len(results) == 2 - assert results[0].score != 0 - assert_result_within_score_range( - results[0], entities[0], 0, 10, 0, 0.3) +def test_valid_us_ssn_weak_match(): + num = '078051120' + results = us_ssn_recognizer.analyze(num, entities) - assert results[0].score != 0 - assert_result_within_score_range( - results[1], entities[0], 11, 21, 0, 0.3) + assert len(results) == 1 + assert results[0].score != 0 + assert_result_within_score_range( + results[0], entities[0], 0, 9, 0.3, 0.4) - def test_valid_us_ssn_weak_match(self): - num = '078051120' - results = us_ssn_recognizer.analyze(num, entities) - assert len(results) == 1 - assert results[0].score != 0 - assert_result_within_score_range( - results[0], entities[0], 0, 9, 0.3, 0.4) +def test_valid_us_ssn_medium_match(): + num = '078-05-1120' + results = us_ssn_recognizer.analyze(num, entities) - def test_valid_us_ssn_medium_match(self): - num = '078-05-1120' - results = us_ssn_recognizer.analyze(num, entities) + assert len(results) == 1 + assert results[0].score != 0 + assert_result_within_score_range( + results[0], entities[0], 0, 11, 0.5, 0.6) + assert 0.49 < results[0].score < 0.6 - assert len(results) == 1 - assert results[0].score != 0 - assert_result_within_score_range( - results[0], entities[0], 0, 11, 0.5, 0.6) - assert 0.49 < results[0].score < 0.6 - def test_invalid_us_ssn(self): - num = '078-05-11201' - results = us_ssn_recognizer.analyze(num, entities) +def test_invalid_us_ssn(): + num = '078-05-11201' + results = us_ssn_recognizer.analyze(num, entities) - assert len(results) == 0 + assert len(results) == 0 diff --git a/pytest.ini b/pytest.ini index 402b8dc6c..5ed1d4f99 100644 --- a/pytest.ini +++ b/pytest.ini @@ -2,4 +2,7 @@ testpaths = presidio-analyzer # Commented out to avoid performance tests failures. Uncoment when debugging tests. #log_cli = true -#log_level = DEBUG \ No newline at end of file +#log_level = INFO + +filterwarnings = + ignore::DeprecationWarning \ No newline at end of file