diff --git a/Dockerfile.golang.base b/Dockerfile.golang.base index c4fcf9860..e23acccd4 100644 --- a/Dockerfile.golang.base +++ b/Dockerfile.golang.base @@ -5,5 +5,5 @@ FROM ${REGISTRY}/presidio-golang-deps WORKDIR $GOPATH/src/github.com/Microsoft/presidio ADD . $GOPATH/src/github.com/Microsoft/presidio -RUN dep ensure && \ - make go-test +RUN dep ensure +RUN make go-test diff --git a/Dockerfile.golang.deps b/Dockerfile.golang.deps index 098e8fbbc..ef604afdd 100644 --- a/Dockerfile.golang.deps +++ b/Dockerfile.golang.deps @@ -1,9 +1,10 @@ FROM golang:1.11.3-alpine3.8 ARG DEP_VERSION="0.5.0" - -RUN apk --update add curl git make g++ + +RUN apk --update add curl git make g++ tesseract-ocr-dev RUN curl -L -s https://github.com/golang/dep/releases/download/v${DEP_VERSION}/dep-linux-amd64 -o $GOPATH/bin/dep && \ chmod +x $GOPATH/bin/dep && \ curl -L https://git.io/vp6lP | sh + diff --git a/Gopkg.lock b/Gopkg.lock index 9ed5687c5..41b05feb3 100644 --- a/Gopkg.lock +++ b/Gopkg.lock @@ -39,7 +39,7 @@ version = "v1.1.3" [[projects]] - digest = "1:5fadaa95510a9c66967dea2ce0865f420f296af6d68e8285bc08875e9146da17" + digest = "1:ced435a7600ec61f81056fe3b8f6f51e773ef3cd9caf6f14564b3d81951e803d" name = "github.com/Azure/azure-event-hubs-go" packages = [ ".", @@ -48,8 +48,8 @@ "storage", ] pruneopts = "UT" - revision = "5bee80eb97437a2daa40734b9b13cbc2c9d545e8" - version = "v1.1.0" + revision = "804d4a4235136f100951fde308046bedabe2d7c6" + version = "v1.1.1" [[projects]] digest = "1:d2ccb697dc13c8fbffafa37baae97594d5592ae8f7e113471084137315536e2b" @@ -81,7 +81,7 @@ version = "0.3.0" [[projects]] - digest = "1:ef17fa8a0edc01cb33eefed09d6865064ebdcc74ceef1637693bc466c708deac" + digest = "1:8824d5e809d68fd509eef8d4e78e5e4472bd42d173ab6c4941ceb0d5c6b550aa" name = "github.com/Azure/go-autorest" packages = [ "autorest", @@ -92,26 +92,27 @@ "autorest/validation", "logger", "tracing", + "version", ] pruneopts = "UT" revision = "f401b1ccc8eb505927fae7a0c7f6406d37ca1c7e" version = "v11.2.8" [[projects]] - digest = "1:c99bd4548f502371b98c77534239a514c9a1e715d468af3c108db06186aa692a" + digest = "1:ed77032e4241e3b8329c9304d66452ed196e795876e14be677a546f36b94e67a" name = "github.com/DataDog/zstd" packages = ["."] pruneopts = "UT" - revision = "aebefd9fcb99f22cd691ef778a12ed68f0e6a1ab" - version = "v1.3.4" + revision = "c7161f8c63c045cbc7ca051dcc969dd0e4054de2" + version = "v1.3.5" [[projects]] branch = "master" - digest = "1:badf98fd26aa74cf9cab4851149476b38e6610e7291b89b8fef294d461db9909" + digest = "1:aadb1ac57ed6201de4f898495fd0791c0ed6c8e8d652d8b8b49f4d1f1c6d2eae" name = "github.com/Microsoft/presidio-genproto" packages = ["golang"] pruneopts = "UT" - revision = "9316a054b1e4b300d3bddf159864faaaca908073" + revision = "03f9193b772ccd3b4bfe3ad4cdc6155748fcdb8d" [[projects]] digest = "1:a59a467c541a1bf8b06e4fad6113028c959be6573b78ceca9f8020cd0d2127fc" @@ -122,7 +123,7 @@ version = "v1.20.0" [[projects]] - digest = "1:a94220f2af28002d7a37af089ebb217d352226a70af7ab082fa81c45bb8a512a" + digest = "1:35564f1cd08e1163bf327e6f595b9e596c4470a77f7f13fe6421236ea99e5966" name = "github.com/aws/aws-sdk-go" packages = [ "aws", @@ -161,8 +162,8 @@ "service/sts", ] pruneopts = "UT" - revision = "5f1ca23f3ded773a9ba214e6fac36acd9a965a53" - version = "v1.16.6" + revision = "3991042237b45cf58c9d5f34295942d5533c28c6" + version = "v1.16.11" [[projects]] digest = "1:526d64d0a3ac6c24875724a9355895be56a21f89a5d3ab5ba88d91244269a7d8" @@ -220,6 +221,14 @@ revision = "06ea1031745cb8b3dab3f6a236daf2b0aa468b7e" version = "v3.2.0" +[[projects]] + branch = "master" + digest = "1:7d0b66300f67891562442ba782b7927859bf9274afd36c4651371262396bbb65" + name = "github.com/disintegration/imaging" + packages = ["."] + pruneopts = "UT" + revision = "9458da53d1e65e098d48467a4317c403327e4424" + [[projects]] digest = "1:1f0c7ab489b407a7f8f9ad16c25a504d28ab461517a971d341388a56156c1bd7" name = "github.com/eapache/go-resiliency" @@ -278,11 +287,11 @@ [[projects]] branch = "master" - digest = "1:8c86b3a4f631171d60523cdaeba992e0ccc80b9923a9e9cb605e5a216df588c0" + digest = "1:264ce7a5d411d8d4304965d87ba016e379ed2d6bce26e62340112c108a786f38" name = "github.com/gin-contrib/zap" packages = ["."] pruneopts = "UT" - revision = "a4f331736217f34737505ce743f9d41efb41e0ab" + revision = "0672bb1dbf3af725a3d294a73bd92dab67cb8adc" [[projects]] digest = "1:d5083934eb25e45d17f72ffa86cae3814f4a9d6c073c4f16b64147169b245606" @@ -298,7 +307,7 @@ version = "v1.3.0" [[projects]] - digest = "1:34a9a60fade37f8009ed4a19e02924198aba3eabfcc120ee5c6002b7de17212d" + digest = "1:424f6593024cdf0f6f90cba81bc69ca98df3758525e6fb248198ef15ead603a9" name = "github.com/go-redis/redis" packages = [ ".", @@ -311,16 +320,16 @@ "internal/util", ] pruneopts = "UT" - revision = "b3d9bf10f6666b2ee5100a6f3f84f4caf3b4e37d" - version = "v6.14.2" + revision = "7f89fbac80bcc62ce920b6dbc6ca60238d7725d1" + version = "v6.15.0" [[projects]] branch = "master" - digest = "1:b57b31665db80eb5a00c672458ab53b1bec0bea386a247368f59ec9b522af120" + digest = "1:23dca8a35ce10bf5caf35e92f6a2b5e633f55cbd28259c2207eb31d5b44b0c02" name = "github.com/go-sql-driver/mysql" packages = ["."] pruneopts = "UT" - revision = "60d456a402782453be397030407e34decaf04d73" + revision = "c45f530f8e7fe40f4687eaa50d0c8c5f1b66f9e0" [[projects]] digest = "1:436e8c1845d92384995e9c93470f639b886dbbc4b49c7babf544f9cc06361198" @@ -340,11 +349,11 @@ [[projects]] branch = "master" - digest = "1:8368ed15381fa230c3a91ccfcc4e7300fc4fcd19f91266832365846383aee80d" + digest = "1:07af58eca86d0e46804aa8eda58b6cf9ecb3110783d2fec55a32cd64639198ea" name = "github.com/go-xorm/xorm" packages = ["."] pruneopts = "UT" - revision = "401f4ee8ff8cbc40a4754cb12192fbe4f02f3979" + revision = "a8f0a7110a8049c79069836b420538f1964e6339" [[projects]] digest = "1:b402bb9a24d108a9405a6f34675091b036c8b056aac843bf6ef2389a65c5cf48" @@ -366,7 +375,8 @@ revision = "23def4e6c14b4da8ac2ed8007337bc5eb5007998" [[projects]] - digest = "1:588beb9f80d2b0afddf05663b32d01c867da419458b560471d81cca0286e76b8" + branch = "master" + digest = "1:97239b8255df64c18138842365b135975e7402112beb593e139de1b91303d5bc" name = "github.com/golang/protobuf" packages = [ "proto", @@ -378,8 +388,7 @@ "ptypes/wrappers", ] pruneopts = "UT" - revision = "aa810b61a9c79d51363740d207bb46cf8e620ed5" - version = "v1.2.0" + revision = "1d3f30b51784bec5aad268e59fd3c2fc1c2fe73f" [[projects]] branch = "master" @@ -565,6 +574,14 @@ revision = "4b7aa43c6742a2c18fdef89dd197aaae7dac7ccd" version = "1.0.1" +[[projects]] + digest = "1:6411dc2c8891eb05c1d0599abf571e81f72cbc97ebd223b44d45712f4f1799c2" + name = "github.com/otiai10/gosseract" + packages = ["."] + pruneopts = "UT" + revision = "b026a6fd291f00736db60738f4e83a79d26359cb" + version = "v2.2.0" + [[projects]] digest = "1:95741de3af260a92cc5c7f3f3061e85273f5a81b5db20d4bd68da74bd521675e" name = "github.com/pelletier/go-toml" @@ -791,6 +808,18 @@ pruneopts = "UT" revision = "505ab145d0a99da450461ae2c1a9f6cd10d1f447" +[[projects]] + branch = "master" + digest = "1:acadbbf02e5c744709c60ac929e63258a69a3404ab36ed0c1a245cc28f056220" + name = "golang.org/x/image" + packages = [ + "bmp", + "tiff", + "tiff/lzw", + ] + pruneopts = "UT" + revision = "cd38e8056d9b27bb2f265effa37fb0ea6b8a7f0f" + [[projects]] branch = "master" digest = "1:89a0cb976397aa9157a45bb2b896d0bcd07ee095ac975e0f03c53250c402265e" @@ -805,29 +834,29 @@ "trace", ] pruneopts = "UT" - revision = "e147a9138326bc0e9d4e179541ffd8af41cff8a9" + revision = "927f97764cc334a6575f4b7a1584a147864d5723" [[projects]] branch = "master" - digest = "1:d6b0cfc5ae30841c4b116ac589629f56f8add0955a39f11d8c0d06ca67f5b3d5" + digest = "1:04a5b0e4138f98eef79ce12a955a420ee358e9f787044cc3a553ac3c3ade997e" name = "golang.org/x/sync" packages = [ "errgroup", "semaphore", ] pruneopts = "UT" - revision = "42b317875d0fa942474b76e1b46a6060d720ae6e" + revision = "37e7f081c4d4c64e13b10787722085407fe5d15f" [[projects]] branch = "master" - digest = "1:ba8cbf57cfd92d5f8592b4aca1a35d92c162363d32aeabd5b12555f8896635e7" + digest = "1:10405139b45e3a97a3842c93984710e30466eb933545f219ad3f5e45246973b4" name = "golang.org/x/sys" packages = [ "unix", "windows", ] pruneopts = "UT" - revision = "4d1cda033e0619309c606fc686de3adcf599539e" + revision = "9a3f9b0469bbc6b8802087ae5c0af9f61502de01" [[projects]] digest = "1:a2ab62866c75542dd18d2b069fec854577a20211d7c0ea6ae746072a1dccdd18" @@ -866,15 +895,15 @@ name = "google.golang.org/api" packages = ["support/bundler"] pruneopts = "UT" - revision = "41dc4b66e69d5dbf20efe4ba67e19d214d147ae3" + revision = "f26a60c56f148a32e87f3f4591c8ebf834b5561f" [[projects]] digest = "1:c25289f43ac4a68d88b02245742347c94f1e108c534dda442188015ff80669b3" name = "google.golang.org/appengine" packages = ["cloudsql"] pruneopts = "UT" - revision = "4a4468ece617fc8205e99368fa2200e9d1fad421" - version = "v1.3.0" + revision = "e9657d882bb81064595ca3b56cbe2546bbabf7b1" + version = "v1.4.0" [[projects]] branch = "master" @@ -882,7 +911,7 @@ name = "google.golang.org/genproto" packages = ["googleapis/rpc/status"] pruneopts = "UT" - revision = "bd91e49a0898e27abb88c339b432fa53d7497ac0" + revision = "bd9b4fb69e2ffd37621a6caa54dcbead29b546f2" [[projects]] digest = "1:8c8ed249fa6a8db070bf2082f02052c697695fa5e1558b4e28dd0fb5f15f70a2" @@ -1163,6 +1192,7 @@ "github.com/bsm/sarama-cluster", "github.com/capitalone/fpe/ff1", "github.com/denisenkom/go-mssqldb", + "github.com/disintegration/imaging", "github.com/gin-contrib/cors", "github.com/gin-contrib/zap", "github.com/gin-gonic/gin", @@ -1173,10 +1203,12 @@ "github.com/korovkin/limiter", "github.com/lib/pq", "github.com/mattn/go-sqlite3", + "github.com/otiai10/gosseract", "github.com/presid-io/stow", "github.com/presid-io/stow/azure", "github.com/presid-io/stow/s3", "github.com/satori/go.uuid", + "github.com/spf13/pflag", "github.com/spf13/viper", "github.com/streadway/amqp", "github.com/stretchr/testify/assert", diff --git a/Gopkg.toml b/Gopkg.toml index 1b262acdd..e36d20538 100644 --- a/Gopkg.toml +++ b/Gopkg.toml @@ -1,3 +1,7 @@ +[[override]] + name = "github.com/golang/protobuf" + branch = "master" + [[constraint]] name = "github.com/Azure/azure-event-hubs-go" version = "1.1.0" @@ -50,6 +54,10 @@ branch = "master" name = "github.com/presid-io/stow" +[[constraint]] + name = "github.com/disintegration/imaging" + branch = "master" + [[constraint]] name = "github.com/stretchr/testify" version = "1.2.1" @@ -73,10 +81,10 @@ [[constraint]] name = "github.com/lib/pq" branch = "master" - + [[constraint]] name = "google.golang.org/grpc" - version = "1.14.0" + version = "1.17.0" [[constraint]] name = "github.com/grpc-ecosystem/go-grpc-middleware" diff --git a/Makefile b/Makefile index b853a336b..5db4b021e 100644 --- a/Makefile +++ b/Makefile @@ -2,8 +2,8 @@ DOCKER_REGISTRY ?= presidio.azurecr.io DOCKER_BUILD_FLAGS := LDFLAGS := -BINS = presidio-anonymizer presidio-api presidio-scheduler presidio-datasink presidio-collector -IMAGES = presidio-analyzer presidio-anonymizer presidio-api presidio-scheduler presidio-datasink presidio-collector +BINS = presidio-anonymizer presidio-ocr presidio-anonymizer-image presidio-api presidio-scheduler presidio-datasink presidio-collector +IMAGES = presidio-anonymizer presidio-ocr presidio-anonymizer-image presidio-api presidio-scheduler presidio-datasink presidio-collector presidio-analyzer GOLANG_DEPS = presidio-golang-deps PYTHON_DEPS = presidio-python-deps GOLANG_BASE = presidio-golang-base @@ -41,9 +41,9 @@ docker-build-base: # DOCKER_REGISTRY to your own personal registry if you are not pushing to the official upstream. .PHONY: docker-build docker-build: docker-build-base -docker-build: $(addsuffix -image,$(IMAGES)) +docker-build: $(addsuffix -dimage,$(IMAGES)) -%-image: +%-dimage: docker build $(DOCKER_BUILD_FLAGS) --build-arg REGISTRY=$(DOCKER_REGISTRY) --build-arg VERSION=$(VERSION) -t $(DOCKER_REGISTRY)/$*:$(PRESIDIO_LABEL) -f $*/Dockerfile . # You must be logged into DOCKER_REGISTRY before you can push. @@ -91,6 +91,9 @@ test-functional: docker-build -docker rm test-presidio-api -f -docker rm test-presidio-analyzer -f -docker rm test-presidio-anonymizer -f + -docker rm test-presidio-anonymizer-image -f + -docker rm test-presidio-ocr -f + -docker network create testnetwork docker run --rm --name test-azure-emulator --network testnetwork -e executable=blob -d -t -p 10000:10000 -p 10001:10001 -v ${HOME}/emulator:/opt/azurite/folder arafato/azurite docker run --rm --name test-kafka -d -p 2181:2181 -p 9092:9092 --env ADVERTISED_HOST=127.0.0.1 --env ADVERTISED_PORT=9092 spotify/kafka @@ -98,12 +101,16 @@ test-functional: docker-build docker run --rm --name test-s3-emulator --network testnetwork -d -p 9090:9090 -p 9191:9191 -t adobe/s3mock docker run --rm --name test-presidio-analyzer --network testnetwork -d -p 3000:3000 -e GRPC_PORT=3000 $(DOCKER_REGISTRY)/presidio-analyzer:$(PRESIDIO_LABEL) docker run --rm --name test-presidio-anonymizer --network testnetwork -d -p 3001:3001 -e GRPC_PORT=3001 $(DOCKER_REGISTRY)/presidio-anonymizer:$(PRESIDIO_LABEL) + docker run --rm --name test-presidio-anonymizer-image --network testnetwork -d -p 3002:3002 -e GRPC_PORT=3002 $(DOCKER_REGISTRY)/presidio-anonymizer-image:$(PRESIDIO_LABEL) + docker run --rm --name test-presidio-ocr --network testnetwork -d -p 3003:3003 -e GRPC_PORT=3003 $(DOCKER_REGISTRY)/presidio-ocr:$(PRESIDIO_LABEL) sleep 30 - docker run --rm --name test-presidio-api --network testnetwork -d -p 8080:8080 -e WEB_PORT=8080 -e ANALYZER_SVC_ADDRESS=test-presidio-analyzer:3000 -e ANONYMIZER_SVC_ADDRESS=test-presidio-anonymizer:3001 $(DOCKER_REGISTRY)/presidio-api:$(PRESIDIO_LABEL) + docker run --rm --name test-presidio-api --network testnetwork -d -p 8080:8080 -e WEB_PORT=8080 -e ANALYZER_SVC_ADDRESS=test-presidio-analyzer:3000 -e ANONYMIZER_SVC_ADDRESS=test-presidio-anonymizer:3001 -e ANONYMIZER_IMAGE_SVC_ADDRESS=test-presidio-anonymizer-image:3002 -e OCR_SVC_ADDRESS=test-presidio-ocr:3003 $(DOCKER_REGISTRY)/presidio-api:$(PRESIDIO_LABEL) go test --tags functional ./tests -count=1 docker rm test-presidio-api -f docker rm test-presidio-analyzer -f docker rm test-presidio-anonymizer -f + docker rm test-presidio-anonymizer-image -f + docker rm test-presidio-ocr -f docker rm test-azure-emulator -f docker rm test-kafka -f docker rm test-redis -f diff --git a/README.MD b/README.MD index 7cbe27ffc..35eb8a89f 100644 --- a/README.MD +++ b/README.MD @@ -5,9 +5,9 @@ --- -# Presidio - Data Loss Prevention API +# Presidio - Data Protection API -**Context aware, born to the cloud, customizable data loss prevention service** +**Context aware, pluggable and customizable data protection and PII anonymization service for text and images** ## Description @@ -20,6 +20,14 @@ You can find a more detailed list [here](https://microsoft.github.io/presidio/fi ## Features +***Free text anonymization*** + +[![Image1](https://user-images.githubusercontent.com/17064840/50557166-2048ca80-0ceb-11e9-9153-d39a3f507d32.png)](https://user-images.githubusercontent.com/17064840/50557166-2048ca80-0ceb-11e9-9153-d39a3f507d32.png) + +***Text anonymization in images*** + +[![Image2](https://user-images.githubusercontent.com/17064840/50557215-bc72d180-0ceb-11e9-8c92-4fbc01bbcb2a.png)](https://user-images.githubusercontent.com/17064840/50557215-bc72d180-0ceb-11e9-8c92-4fbc01bbcb2a.png) + * Text analytics - Predefined analyzers with customizable fields. * Probability scores - Customize the sensitive text detection threshold. * Anonymization - Anonymize sensitive text and images @@ -65,7 +73,7 @@ The [design document](https://microsoft.github.io/presidio/design.html) introduc 1. Analyze text ```sh - echo -n '{"text":"John Smith lives in New York. We met yesterday morning in Seattle. I called him before on (212) 555-1234 to verify the appointment. He also told me that his drivers license is AC333991", "analyzeTemplate":{"fields":[]} }' | http /api/v1/projects//analyze + $ echo -n '{"text":"John Smith lives in New York. We met yesterday morning in Seattle. I called him before on (212) 555-1234 to verify the appointment. He also told me that his drivers license is AC333991", "analyzeTemplate":{"fields":[]} }' | http /api/v1/projects//analyze ``` ***Sample 2*** @@ -74,36 +82,48 @@ You can also create reusable templates 1. Create an analyzer project ```sh - echo -n '{"fields":[]}' | http /api/v1/templates//analyze/ + $ echo -n '{"fields":[]}' | http /api/v1/templates//analyze/ ``` 2. Analyze text ```sh - echo -n '{"text":"my credit card number is 2970-84746760-9907 345954225667833 4961-2765-5327-5913", "AnalyzeTemplateId":"" }' | http /api/v1/projects//analyze + $ echo -n '{"text":"my credit card number is 2970-84746760-9907 345954225667833 4961-2765-5327-5913", "AnalyzeTemplateId":"" }' | http /api/v1/projects//analyze ``` ***Sample 3*** 1. Create an analyzer project ```sh - echo -n '{"fields":[{"name":"PHONE_NUMBER"}, {"name":"LOCATION"}, {"name":"DATE_TIME"}]}' | http /api/v1/templates//analyze/ + $ echo -n '{"fields":[{"name":"PHONE_NUMBER"}, {"name":"LOCATION"}, {"name":"DATE_TIME"}]}' | http /api/v1/templates//analyze/ ``` 2. Analyze text ```sh - echo -n '{"text":"We met yesterday morning in Seattle and his phone number is (212) 555 1234", "AnalyzeTemplateId":"" }' | http /api/v1/projects//analyze + $ echo -n '{"text":"We met yesterday morning in Seattle and his phone number is (212) 555 1234", "AnalyzeTemplateId":"" }' | http /api/v1/projects//analyze ``` ***Sample 4*** 1. Create an anonymizer template (This template replaces values in PHONE_NUMBER and redacts CREDIT_CARD) ```sh - echo -n '{"fieldTypeTransformations":[{"fields":[{"name":"PHONE_NUMBER"}],"transformation":{"replaceValue":{"newValue":"\u003cphone-number\u003e"}}},{"fields":[{"name":"CREDIT_CARD"}],"transformation":{"redactValue":{}}}]}' | http /api/v1/templates//anonymize/ + $ echo -n '{"fieldTypeTransformations":[{"fields":[{"name":"PHONE_NUMBER"}],"transformation":{"replaceValue":{"newValue":"\u003cphone-number\u003e"}}},{"fields":[{"name":"CREDIT_CARD"}],"transformation":{"redactValue":{}}}]}' | http /api/v1/templates//anonymize/ ``` 2. Anonymize text ```sh - echo -n '{"text":"my phone number is 057-555-2323 and my credit card is 4961-2765-5327-5913", "AnalyzeTemplateId":"", "AnonymizeTemplateId":"" }' | http /api/v1/projects//anonymize + $ echo -n '{"text":"my phone number is 057-555-2323 and my credit card is 4961-2765-5327-5913", "AnalyzeTemplateId":"", "AnonymizeTemplateId":"" }' | http /api/v1/projects//anonymize + ``` + +***Sample 5 (Image anonymization)*** + +1. Create an anonymizer image template (This template redact values with black color) + ```sh + $ echo -n '{"fieldTypeGraphics":[{"graphic":{"fillColorValue":{"blue":0,"red":0,"green":0}}}]}' | http /api/v1/templates//anonymize-image/ + ``` + +2. Anonymize image + ```sh + $ http -f POST /api/v1/projects//anonymize-image detectionType='OCR' analyzeTemplateId='' anonymizeImageTemplateId='' imageType='image/png' file@~/test-ocr.png > test-output.png ``` ### Current Features Status diff --git a/charts/presidio/templates/_helpers.tpl b/charts/presidio/templates/_helpers.tpl index e1053bf42..717bf5175 100644 --- a/charts/presidio/templates/_helpers.tpl +++ b/charts/presidio/templates/_helpers.tpl @@ -21,6 +21,12 @@ We truncate at 63 chars because some Kubernetes name fields are limited to this {{- define "presidio.anonymizer.fullname" -}} {{ include "presidio.fullname" . | printf "%s-anonymizer" }} {{- end -}} +{{- define "presidio.anonymizerimage.fullname" -}} +{{ include "presidio.fullname" . | printf "%s-anonymizer-image" }} +{{- end -}} +{{- define "presidio.ocr.fullname" -}} +{{ include "presidio.fullname" . | printf "%s-ocr" }} +{{- end -}} {{- define "presidio.api.fullname" -}} {{ include "presidio.fullname" . | printf "%s-api" }} {{- end -}} @@ -36,6 +42,14 @@ We truncate at 63 chars because some Kubernetes name fields are limited to this {{template "presidio.anonymizer.fullname" .}}:{{.Values.anonymizer.service.externalPort}} {{- end -}} +{{- define "presidio.anonymizerimage.address" -}} +{{template "presidio.anonymizerimage.fullname" .}}:{{.Values.anonymizerimage.service.externalPort}} +{{- end -}} + +{{- define "presidio.ocr.address" -}} +{{template "presidio.ocr.fullname" .}}:{{.Values.ocr.service.externalPort}} +{{- end -}} + {{- define "presidio.scheduler.address" -}} {{template "presidio.scheduler.fullname" .}}:{{.Values.scheduler.service.externalPort}} {{- end -}} diff --git a/charts/presidio/templates/anonymizer-image-deployment.yaml b/charts/presidio/templates/anonymizer-image-deployment.yaml new file mode 100644 index 000000000..0d6f48a6b --- /dev/null +++ b/charts/presidio/templates/anonymizer-image-deployment.yaml @@ -0,0 +1,35 @@ +{{ $fullname := include "presidio.anonymizerimage.fullname" . }} +apiVersion: apps/v1 +kind: Deployment +metadata: + name: {{ $fullname }} + labels: + app: {{ $fullname }} + chart: "{{ .Chart.Name }}-{{ .Chart.Version }}" + release: "{{ .Release.Name }}" + heritage: "{{ .Release.Service }}" +spec: + replicas: 1 + selector: + matchLabels: + app: {{ $fullname }} + template: + metadata: + labels: + app: {{ $fullname }} + spec: + containers: + - name: {{ .Chart.Name }} + image: "{{ .Values.registry }}/{{ .Values.anonymizerimage.name }}:{{ default .Chart.AppVersion .Values.anonymizerimage.tag }}" + imagePullPolicy: {{ default "IfNotPresent" .Values.anonymizerimage.imagePullPolicy }} + ports: + - containerPort: {{ .Values.anonymizerimage.service.internalPort }} + env: + - name: PRESIDIO_NAMESPACE + valueFrom: + fieldRef: + fieldPath: metadata.namespace + - name: GRPC_PORT + value: {{ .Values.anonymizerimage.service.internalPort | quote }} + {{ if .Values.privateRegistry }}imagePullSecrets: + - name: {{.Values.privateRegistry}}{{ end }} \ No newline at end of file diff --git a/charts/presidio/templates/anonymizer-image-service.yaml b/charts/presidio/templates/anonymizer-image-service.yaml new file mode 100644 index 000000000..aa14d8c73 --- /dev/null +++ b/charts/presidio/templates/anonymizer-image-service.yaml @@ -0,0 +1,18 @@ +{{ $fullname := include "presidio.anonymizerimage.fullname" . }} +apiVersion: v1 +kind: Service +metadata: + name: {{ $fullname }} + labels: + chart: "{{ .Chart.Name }}-{{ .Chart.Version | replace "+" "_" }}" + release: "{{ .Release.Name }}" + heritage: "{{ .Release.Service }}" +spec: + type: {{ .Values.anonymizerimage.service.type }} + ports: + - port: {{ .Values.anonymizerimage.service.externalPort }} + targetPort: {{ .Values.anonymizerimage.service.internalPort }} + protocol: TCP + name: {{ .Values.anonymizerimage.service.name }} + selector: + app: {{ $fullname }} \ No newline at end of file diff --git a/charts/presidio/templates/api-deployment.yaml b/charts/presidio/templates/api-deployment.yaml index 59171180c..1f67feeec 100644 --- a/charts/presidio/templates/api-deployment.yaml +++ b/charts/presidio/templates/api-deployment.yaml @@ -54,6 +54,10 @@ spec: value: {{ template "presidio.analyzer.address" . }} - name: ANONYMIZER_SVC_ADDRESS value: {{ template "presidio.anonymizer.address" . }} + - name: ANONYMIZER_IMAGE_SVC_ADDRESS + value: {{ template "presidio.anonymizerimage.address" . }} + - name: OCR_SVC_ADDRESS + value: {{ template "presidio.ocr.address" . }} - name: SCHEDULER_SVC_ADDRESS value: {{ template "presidio.scheduler.address" . }} {{ if .Values.privateRegistry }}imagePullSecrets: diff --git a/charts/presidio/templates/ocr-deployment.yaml b/charts/presidio/templates/ocr-deployment.yaml new file mode 100644 index 000000000..70eb0ed32 --- /dev/null +++ b/charts/presidio/templates/ocr-deployment.yaml @@ -0,0 +1,35 @@ +{{ $fullname := include "presidio.ocr.fullname" . }} +apiVersion: apps/v1 +kind: Deployment +metadata: + name: {{ $fullname }} + labels: + app: {{ $fullname }} + chart: "{{ .Chart.Name }}-{{ .Chart.Version }}" + release: "{{ .Release.Name }}" + heritage: "{{ .Release.Service }}" +spec: + replicas: 1 + selector: + matchLabels: + app: {{ $fullname }} + template: + metadata: + labels: + app: {{ $fullname }} + spec: + containers: + - name: {{ .Chart.Name }} + image: "{{ .Values.registry }}/{{ .Values.ocr.name }}:{{ default .Chart.AppVersion .Values.ocr.tag }}" + imagePullPolicy: {{ default "IfNotPresent" .Values.ocr.imagePullPolicy }} + ports: + - containerPort: {{ .Values.ocr.service.internalPort }} + env: + - name: PRESIDIO_NAMESPACE + valueFrom: + fieldRef: + fieldPath: metadata.namespace + - name: GRPC_PORT + value: {{ .Values.ocr.service.internalPort | quote }} + {{ if .Values.privateRegistry }}imagePullSecrets: + - name: {{.Values.privateRegistry}}{{ end }} \ No newline at end of file diff --git a/charts/presidio/templates/ocr-service.yaml b/charts/presidio/templates/ocr-service.yaml new file mode 100644 index 000000000..6716f7f45 --- /dev/null +++ b/charts/presidio/templates/ocr-service.yaml @@ -0,0 +1,18 @@ +{{ $fullname := include "presidio.ocr.fullname" . }} +apiVersion: v1 +kind: Service +metadata: + name: {{ $fullname }} + labels: + chart: "{{ .Chart.Name }}-{{ .Chart.Version | replace "+" "_" }}" + release: "{{ .Release.Name }}" + heritage: "{{ .Release.Service }}" +spec: + type: {{ .Values.ocr.service.type }} + ports: + - port: {{ .Values.ocr.service.externalPort }} + targetPort: {{ .Values.ocr.service.internalPort }} + protocol: TCP + name: {{ .Values.ocr.service.name }} + selector: + app: {{ $fullname }} \ No newline at end of file diff --git a/charts/presidio/templates/scheduler-deployment.yaml b/charts/presidio/templates/scheduler-deployment.yaml index f99021ab3..9a5a8a581 100644 --- a/charts/presidio/templates/scheduler-deployment.yaml +++ b/charts/presidio/templates/scheduler-deployment.yaml @@ -45,6 +45,10 @@ spec: value: {{ template "presidio.analyzer.address" . }} - name: ANONYMIZER_SVC_ADDRESS value: {{ template "presidio.anonymizer.address" . }} + - name: ANONYMIZER_IMAGE_SVC_ADDRESS + value: {{ template "presidio.anonymizerimage.address" . }} + - name: OCR_SVC_ADDRESS + value: {{ template "presidio.ocr.address" . }} - name: DATASINK_GRPC_PORT value: "5000" - name: DATASINK_IMAGE_NAME diff --git a/charts/presidio/values.yaml b/charts/presidio/values.yaml index ce46afdeb..842b8ee1e 100644 --- a/charts/presidio/values.yaml +++ b/charts/presidio/values.yaml @@ -46,6 +46,24 @@ anonymizer: externalPort: 3001 internalPort: 3001 +anonymizerimage: + name: presidio-anonymizer-image + # tag: + imagePullPolicy: Always + service: + type: ClusterIP + externalPort: 3001 + internalPort: 3001 + +ocr: + name: presidio-ocr + # tag: + imagePullPolicy: Always + service: + type: ClusterIP + externalPort: 3001 + internalPort: 3001 + scheduler: name: presidio-scheduler imagePullPolicy: Always diff --git a/docs/_config.yml b/docs/_config.yml index c4192631f..941557bdb 100644 --- a/docs/_config.yml +++ b/docs/_config.yml @@ -1 +1,5 @@ -theme: jekyll-theme-cayman \ No newline at end of file +theme: jekyll-theme-cayman +plugins: + - jekyll-seo-tag + - jekyll-sitemap + - jekyll-github-metadata \ No newline at end of file diff --git a/docs/design.md b/docs/design.md index 0be01a162..64fe344c2 100644 --- a/docs/design.md +++ b/docs/design.md @@ -5,7 +5,7 @@ presidio. It is a high-level explanation of the presidio design. ## Presidio As a Service - Kubernetes Deployment -![persidio-design](https://user-images.githubusercontent.com/17064840/45154654-f706aa80-b1e0-11e8-9ff6-3567f8892bd4.png) +![persidio-design](https://user-images.githubusercontent.com/17064840/50818154-dd13da80-132e-11e9-8b64-cdc4f3f6717d.png) This architecture gives us the following advantages: diff --git a/docs/development.md b/docs/development.md index 2cf8ef778..7dbe5f9c7 100644 --- a/docs/development.md +++ b/docs/development.md @@ -44,20 +44,22 @@ $ pip3 install cython ``` -7. Protobuf generator tools +7. Install [tesseract](https://github.com/tesseract-ocr/tesseract/wiki) OCR framework. + +8. Protobuf generator tools (Optional) - `https://github.com/golang/protobuf` - `https://grpc.io/docs/tutorials/basic/python.html` -8. To generate proto files, clone [presidio-genproto](https://github.com/Microsoft/presidio-genproto) and run the following commands in `$GOPATH/src/github.com/Microsoft/presidio-genproto/src` folder + To generate proto files, clone [presidio-genproto](https://github.com/Microsoft/presidio-genproto) and run the following commands in `$GOPATH/src/github.com/Microsoft/presidio-genproto/src` folder - ``` - python -m grpc_tools.protoc -I . --python_out=../python --grpc_python_out=../python ./*.proto + ```sh + $ python -m grpc_tools.protoc -I . --python_out=../python --grpc_python_out=../python ./*.proto ``` - ``` - protoc -I . --go_out=plugins=grpc:../golang ./*.proto + ```sh + $ protoc -I . --go_out=plugins=grpc:../golang ./*.proto ``` ## Development notes diff --git a/docs/install.md b/docs/install.md index 44d1470f2..5d4038658 100644 --- a/docs/install.md +++ b/docs/install.md @@ -50,7 +50,7 @@ $ docker run --rm --name presidio-api --network mynetwork -d -p 8080:8080 -e WEB ```sh # Based on the DOCKER_REGISTRY and PRESIDIO_LABEL from the previous steps - $ helm install --name presidio-demo --set registry=${DOCKER_REGISTRY},analyzer.tag=${PRESIDIO_LABEL},anonymizer.tag=${PRESIDIO_LABEL},scheduler.tag=${PRESIDIO_LABEL},api.tag=${PRESIDIO_LABEL},collector.tag=${PRESIDIO_LABEL},datasink.tag=${PRESIDIO_LABEL} . --namespace presidio + $ helm install --name presidio-demo --set registry=${DOCKER_REGISTRY},analyzer.tag=${PRESIDIO_LABEL},anonymizer.tag=${PRESIDIO_LABEL},anonymizerimage.tag=${PRESIDIO_LABEL},ocr.tag=${PRESIDIO_LABEL},scheduler.tag=${PRESIDIO_LABEL},api.tag=${PRESIDIO_LABEL},collector.tag=${PRESIDIO_LABEL},datasink.tag=${PRESIDIO_LABEL} . --namespace presidio ``` --- diff --git a/docs/overview.md b/docs/overview.md index 5442acdfa..72ae1632d 100644 --- a/docs/overview.md +++ b/docs/overview.md @@ -1,4 +1,8 @@ -# Presidio overview +# Presidio - Data Protection API + +**Context aware, pluggable and customizable data protection and PII anonymization service for text and images** + +## Description Presidio *(Origin from Latin praesidium ‘protection, garrison’)* helps to ensure sensitive text is properly managed and governed. It provides fast ***analytics*** and ***anonymization*** for sensitive text such as credit card numbers, bitcoin wallets, names, locations, social security numbers, US phone numbers and financial data. Presidio analyzes the text using predefined analyzers to identify patterns, formats, and checksums with relevant context. @@ -9,6 +13,14 @@ You can find a more detailed list [here](https://microsoft.github.io/presidio/fi ## Features +***Free text anonymization*** + +[![Image1](https://user-images.githubusercontent.com/17064840/50557166-2048ca80-0ceb-11e9-9153-d39a3f507d32.png)](https://user-images.githubusercontent.com/17064840/50557166-2048ca80-0ceb-11e9-9153-d39a3f507d32.png) + +***Text anonymization in images*** + +[![Image2](https://user-images.githubusercontent.com/17064840/50557215-bc72d180-0ceb-11e9-8c92-4fbc01bbcb2a.png)](https://user-images.githubusercontent.com/17064840/50557215-bc72d180-0ceb-11e9-8c92-4fbc01bbcb2a.png) + * Text analytics - Predefined analyzers with customizable fields. * Probability scores - Customize the sensitive text detection threshold. * Anonymization - Anonymize sensitive text and images diff --git a/docs/tutorial_scheduler_cronjob.md b/docs/tutorial_scheduler_cronjob.md index 318cd6f65..cd6317656 100644 --- a/docs/tutorial_scheduler_cronjob.md +++ b/docs/tutorial_scheduler_cronjob.md @@ -3,9 +3,7 @@ When running Presidio on a Kubernetes cluster you can set a Kubernetes [CronJob](https://kubernetes.io/docs/concepts/workloads/controllers/cron-jobs/) to scan your data periodicly. You will need to configure the scan's input and the destination to which the analyzed and anonymized results will be stored. -![Design](https://user-images.githubusercontent.com/13463870/43763824-70493396-9a34-11e8-9aa7-090057012369.jpg) - -* A detailed design of the Ingerss Control and the API Serivce can be found [here](./design.md). +* A detailed design of the Ingress Control and the API Service can be found [here](./design.md). ## Job stages diff --git a/docs/tutorial_service.md b/docs/tutorial_service.md index da29d009e..f35c2eb75 100644 --- a/docs/tutorial_service.md +++ b/docs/tutorial_service.md @@ -43,4 +43,16 @@ You can also create reusable templates 2. Anonymize text ```sh echo -n '{"text":"my phone number is 057-555-2323 and my credit card is 4961-2765-5327-5913", "AnalyzeTemplateId":"", "AnonymizeTemplateId":"" }' | http /api/v1/projects//anonymize + ``` + +***Sample 5 (Image anonymization)*** + +1. Create an anonymizer image template (This template redact values with black color) + ```sh + echo -n '{"fieldTypeGraphics":[{"graphic":{"fillColorValue":{"blue":0,"red":0,"green":0}}}]}' | http /api/v1/templates//anonymize-image/ + ``` + +2. Anonymize image + ```sh + http -f POST /api/v1/projects//anonymize-image detectionType='OCR' analyzeTemplateId='' anonymizeTemplateId='' imageType='image/png' file@~/test-ocr.png > test-output.png ``` \ No newline at end of file diff --git a/pkg/platform/platform.go b/pkg/platform/platform.go index 5b86e1056..2357e563e 100644 --- a/pkg/platform/platform.go +++ b/pkg/platform/platform.go @@ -47,25 +47,27 @@ func ConvertPullPolicyStringToType(pullPolicy string) apiv1.PullPolicy { //Settings from all services type Settings struct { - WebPort int - GrpcPort int - DatasinkGrpcPort int - Namespace string - AnalyzerSvcAddress string - AnonymizerSvcAddress string - SchedulerSvcAddress string - RedisURL string - RedisPassword string - RedisDB int - RedisSSL bool - DatasinkImage string - CollectorImage string - DatasinkImagePullPolicy string - CollectorImagePullPolicy string - ScannerRequest string - StreamRequest string - QueueURL string - LogLevel string + WebPort int + GrpcPort int + DatasinkGrpcPort int + Namespace string + AnalyzerSvcAddress string + AnonymizerSvcAddress string + AnonymizerImageSvcAddress string + OcrSvcAddress string + SchedulerSvcAddress string + RedisURL string + RedisPassword string + RedisDB int + RedisSSL bool + DatasinkImage string + CollectorImage string + DatasinkImagePullPolicy string + CollectorImagePullPolicy string + ScannerRequest string + StreamRequest string + QueueURL string + LogLevel string } //WebPort for http server @@ -86,6 +88,12 @@ const AnalyzerSvcAddress = "analyzer_svc_address" //AnonymizerSvcAddress anonymizer service address const AnonymizerSvcAddress = "anonymizer_svc_address" +//AnonymizerImageSvcAddress anonymizer image service address +const AnonymizerImageSvcAddress = "anonymizer_image_svc_address" + +//OcrSvcAddress ocr service address +const OcrSvcAddress = "ocr_svc_address" + //SchedulerSvcAddress scheduler service address const SchedulerSvcAddress = "scheduler_svc_address" @@ -131,25 +139,27 @@ func GetSettings() *Settings { viper.AutomaticEnv() settings := Settings{ - WebPort: viper.GetInt(strings.ToUpper(WebPort)), - GrpcPort: viper.GetInt(strings.ToUpper(GrpcPort)), - DatasinkGrpcPort: viper.GetInt(strings.ToUpper(DatasinkGrpcPort)), - Namespace: getTrimmedEnv(PresidioNamespace), - AnalyzerSvcAddress: getTrimmedEnv(AnalyzerSvcAddress), - AnonymizerSvcAddress: getTrimmedEnv(AnonymizerSvcAddress), - SchedulerSvcAddress: getTrimmedEnv(SchedulerSvcAddress), - RedisURL: getTrimmedEnv(RedisURL), - RedisDB: viper.GetInt(strings.ToUpper(RedisDb)), - RedisSSL: viper.GetBool(strings.ToUpper(RedisSSL)), - RedisPassword: getTrimmedEnv(RedisPassword), - DatasinkImage: getTrimmedEnv(DatasinkImageName), - CollectorImage: getTrimmedEnv(CollectorImageName), - DatasinkImagePullPolicy: getTrimmedEnv(DatasinkImagePullPolicy), - CollectorImagePullPolicy: getTrimmedEnv(CollectorImagePullPolicy), - ScannerRequest: getTrimmedEnv(ScannerRequest), - StreamRequest: getTrimmedEnv(StreamRequest), - QueueURL: getTrimmedEnv(QueueURL), - LogLevel: getTrimmedEnv(LogLevel), + WebPort: viper.GetInt(strings.ToUpper(WebPort)), + GrpcPort: viper.GetInt(strings.ToUpper(GrpcPort)), + DatasinkGrpcPort: viper.GetInt(strings.ToUpper(DatasinkGrpcPort)), + Namespace: getTrimmedEnv(PresidioNamespace), + AnalyzerSvcAddress: getTrimmedEnv(AnalyzerSvcAddress), + AnonymizerSvcAddress: getTrimmedEnv(AnonymizerSvcAddress), + AnonymizerImageSvcAddress: getTrimmedEnv(AnonymizerImageSvcAddress), + OcrSvcAddress: getTrimmedEnv(OcrSvcAddress), + SchedulerSvcAddress: getTrimmedEnv(SchedulerSvcAddress), + RedisURL: getTrimmedEnv(RedisURL), + RedisDB: viper.GetInt(strings.ToUpper(RedisDb)), + RedisSSL: viper.GetBool(strings.ToUpper(RedisSSL)), + RedisPassword: getTrimmedEnv(RedisPassword), + DatasinkImage: getTrimmedEnv(DatasinkImageName), + CollectorImage: getTrimmedEnv(CollectorImageName), + DatasinkImagePullPolicy: getTrimmedEnv(DatasinkImagePullPolicy), + CollectorImagePullPolicy: getTrimmedEnv(CollectorImagePullPolicy), + ScannerRequest: getTrimmedEnv(ScannerRequest), + StreamRequest: getTrimmedEnv(StreamRequest), + QueueURL: getTrimmedEnv(QueueURL), + LogLevel: getTrimmedEnv(LogLevel), } return &settings diff --git a/pkg/presidio/presidio.go b/pkg/presidio/presidio.go index e624b2d18..c0e864229 100644 --- a/pkg/presidio/presidio.go +++ b/pkg/presidio/presidio.go @@ -3,6 +3,7 @@ package presidio import ( "context" "encoding/json" + "fmt" types "github.com/Microsoft/presidio-genproto/golang" "github.com/Microsoft/presidio/pkg/cache" @@ -12,11 +13,18 @@ import ( type ServicesAPI interface { SetupAnalyzerService() SetupAnonymizerService() + SetupAnonymizerImageService() + SetupOCRService() SetupSchedulerService() SetupDatasinkService() SetupCache() cache.Cache AnalyzeItem(ctx context.Context, text string, template *types.AnalyzeTemplate) ([]*types.AnalyzeResult, error) - AnonymizeItem(ctx context.Context, analyzeResults []*types.AnalyzeResult, text string, anonymizeTemplate *types.AnonymizeTemplate) (*types.AnonymizeResponse, error) + AnonymizeItem(ctx context.Context, analyzeResults []*types.AnalyzeResult, text string, + anonymizeTemplate *types.AnonymizeTemplate) (*types.AnonymizeResponse, error) + AnonymizeImageItem(ctx context.Context, image *types.Image, analyzeResults []*types.AnalyzeResult, + anonymizeImageTypeEnum types.DetectionTypeEnum, + anonymizeImageTemplate *types.AnonymizeImageTemplate) (*types.AnonymizeImageResponse, error) + OcrItem(ctx context.Context, image *types.Image) (*types.OcrResponse, error) SendResultToDatasink(ctx context.Context, analyzeResults []*types.AnalyzeResult, anonymizeResults *types.AnonymizeResponse, path string) error ApplyStream(ctx context.Context, streamsJobRequest *types.StreamsJobRequest) (*types.StreamsJobResponse, error) @@ -51,6 +59,9 @@ type Item interface { // ConvertJSONToInterface convert Json to go Interface func ConvertJSONToInterface(template string, convertTo interface{}) error { + if template == "" { + return fmt.Errorf("template is empty") + } err := json.Unmarshal([]byte(template), &convertTo) return err } diff --git a/pkg/presidio/services/services.go b/pkg/presidio/services/services.go index 9e350268c..3df5eb9c1 100644 --- a/pkg/presidio/services/services.go +++ b/pkg/presidio/services/services.go @@ -17,11 +17,13 @@ import ( //Services exposes GRPC services type Services struct { - AnalyzerService types.AnalyzeServiceClient - AnonymizeService types.AnonymizeServiceClient - DatasinkService types.DatasinkServiceClient - SchedulerService types.SchedulerServiceClient - Settings *platform.Settings + AnalyzerService types.AnalyzeServiceClient + AnonymizeService types.AnonymizeServiceClient + AnonymizeImageService types.AnonymizeImageServiceClient + OcrService types.OcrServiceClient + DatasinkService types.DatasinkServiceClient + SchedulerService types.SchedulerServiceClient + Settings *platform.Settings } //New services with settings @@ -60,6 +62,40 @@ func (services *Services) SetupAnonymizerService() { } +//SetupAnonymizerImageService GRPC connection +func (services *Services) SetupAnonymizerImageService() { + + if services.Settings.AnonymizerImageSvcAddress == "" { + log.Warn("anonymizer image service address is empty") + return + } + + anonymizeImageService, err := rpc.SetupAnonymizeImageService(services.Settings.AnonymizerImageSvcAddress) + if err != nil { + log.Fatal("Connection to anonymizer image service failed %q", err) + } + + services.AnonymizeImageService = anonymizeImageService + +} + +//SetupOCRService GRPC connection +func (services *Services) SetupOCRService() { + + if services.Settings.OcrSvcAddress == "" { + log.Warn("ocr service address is empty") + return + } + + ocrService, err := rpc.SetupOcrService(services.Settings.OcrSvcAddress) + if err != nil { + log.Fatal("Connection to ocr service failed %q", err) + } + + services.OcrService = ocrService + +} + //SetupSchedulerService GRPC connection func (services *Services) SetupSchedulerService() { @@ -70,7 +106,7 @@ func (services *Services) SetupSchedulerService() { schedulerService, err := rpc.SetupSchedulerService(services.Settings.SchedulerSvcAddress) if err != nil { - log.Fatal("Connection to anonymizer service failed %q", err) + log.Fatal("Connection to scheduler service failed %q", err) } services.SchedulerService = schedulerService @@ -118,7 +154,9 @@ func (services *Services) AnalyzeItem(ctx context.Context, text string, template } //AnonymizeItem - anonymize text -func (services *Services) AnonymizeItem(ctx context.Context, analyzeResults []*types.AnalyzeResult, text string, anonymizeTemplate *types.AnonymizeTemplate) (*types.AnonymizeResponse, error) { +func (services *Services) AnonymizeItem(ctx context.Context, analyzeResults []*types.AnalyzeResult, + text string, anonymizeTemplate *types.AnonymizeTemplate) (*types.AnonymizeResponse, error) { + if anonymizeTemplate != nil { anonymizeRequest := &types.AnonymizeRequest{ @@ -126,8 +164,39 @@ func (services *Services) AnonymizeItem(ctx context.Context, analyzeResults []*t Text: text, AnalyzeResults: analyzeResults, } - res, err := services.AnonymizeService.Apply(ctx, anonymizeRequest) - return res, err + return services.AnonymizeService.Apply(ctx, anonymizeRequest) + } + return nil, nil +} + +//AnonymizeImageItem - anonymize image +func (services *Services) AnonymizeImageItem(ctx context.Context, image *types.Image, analyzeResults []*types.AnalyzeResult, + detectionType types.DetectionTypeEnum, + anonymizeImageTemplate *types.AnonymizeImageTemplate) (*types.AnonymizeImageResponse, error) { + + if anonymizeImageTemplate != nil { + + anonymizeImageRequest := &types.AnonymizeImageRequest{ + Image: image, + Template: anonymizeImageTemplate, + DetectionType: detectionType, + AnalyzeResults: analyzeResults, + } + return services.AnonymizeImageService.Apply(ctx, anonymizeImageRequest) + + } + return nil, nil +} + +//OcrItem - ocr image +func (services *Services) OcrItem(ctx context.Context, image *types.Image) (*types.OcrResponse, error) { + + if image.Data != nil { + + ocrRequest := &types.OcrRequest{ + Image: image, + } + return services.OcrService.Apply(ctx, ocrRequest) } return nil, nil } diff --git a/pkg/presidio/templates/templates.go b/pkg/presidio/templates/templates.go index 0ef16b622..d299b7ebd 100644 --- a/pkg/presidio/templates/templates.go +++ b/pkg/presidio/templates/templates.go @@ -27,14 +27,20 @@ func New(s platform.Store, c cache.Cache) presidio.TemplatesStore { } // createKey creates template key in the structure: project/action/id -func createKey(project string, action string, id string) string { +func createKey(project, action, id string) (string, error) { + if project == "" || action == "" || id == "" { + return "", fmt.Errorf("Invalid key") + } key := fmt.Sprintf("%s%s%s%s%s", project, separator, action, separator, id) - return key + return key, nil } // GetTemplate from key store -func (templates *Templates) GetTemplate(project string, action string, id string) (string, error) { - key := createKey(project, action, id) +func (templates *Templates) GetTemplate(project, action, id string) (string, error) { + key, err := createKey(project, action, id) + if err != nil { + return "", err + } if templates.cacheStore != nil { res, err := templates.cacheStore.Get(key) if res != "" && err == nil { @@ -45,8 +51,11 @@ func (templates *Templates) GetTemplate(project string, action string, id string } // InsertTemplate inserts a template to the key store -func (templates *Templates) InsertTemplate(project string, action string, id string, value string) error { - key := createKey(project, action, id) +func (templates *Templates) InsertTemplate(project, action, id, value string) error { + key, err := createKey(project, action, id) + if err != nil { + return err + } if templates.cacheStore != nil { err := templates.cacheStore.Set(key, value) if err != nil { @@ -57,8 +66,11 @@ func (templates *Templates) InsertTemplate(project string, action string, id str } // UpdateTemplate updates the template in the key store -func (templates *Templates) UpdateTemplate(project string, action string, id string, value string) error { - key := createKey(project, action, id) +func (templates *Templates) UpdateTemplate(project, action, id, value string) error { + key, err := createKey(project, action, id) + if err != nil { + return err + } if templates.cacheStore != nil { err := templates.cacheStore.Delete(key) @@ -70,7 +82,7 @@ func (templates *Templates) UpdateTemplate(project string, action string, id str log.Error(err.Error()) } } - err := templates.platformStore.DeleteKVPair(key) + err = templates.platformStore.DeleteKVPair(key) if err != nil { return err } @@ -78,8 +90,11 @@ func (templates *Templates) UpdateTemplate(project string, action string, id str } // DeleteTemplate deletes a template from key store -func (templates *Templates) DeleteTemplate(project string, action string, id string) error { - key := createKey(project, action, id) +func (templates *Templates) DeleteTemplate(project, action, id string) error { + key, err := createKey(project, action, id) + if err != nil { + return err + } if templates.cacheStore != nil { err := templates.cacheStore.Delete(key) if err != nil { diff --git a/pkg/rpc/client.go b/pkg/rpc/client.go index e2d2712ef..4833c0590 100644 --- a/pkg/rpc/client.go +++ b/pkg/rpc/client.go @@ -49,6 +49,30 @@ func SetupAnonymizeService(address string) (types.AnonymizeServiceClient, error) return client, nil } +//SetupAnonymizeImageService connect to anonymizer service with GRPC +func SetupAnonymizeImageService(address string) (types.AnonymizeImageServiceClient, error) { + + conn, err := connect(address) + if err != nil { + return nil, err + } + + client := types.NewAnonymizeImageServiceClient(conn) + return client, nil +} + +//SetupOcrService connect to anonymizer service with GRPC +func SetupOcrService(address string) (types.OcrServiceClient, error) { + + conn, err := connect(address) + if err != nil { + return nil, err + } + + client := types.NewOcrServiceClient(conn) + return client, nil +} + //SetupAnalyzerService connect to analyzer service with GRPC func SetupAnalyzerService(address string) (types.AnalyzeServiceClient, error) { conn, err := connect(address) diff --git a/pkg/server/server.go b/pkg/server/server.go index bfa8023d1..f320edf42 100644 --- a/pkg/server/server.go +++ b/pkg/server/server.go @@ -28,13 +28,14 @@ func Setup(_port int, loglevel string) *gin.Engine { if _port == 0 { _port = 8080 } - r := gin.New() - r.Use(gin.Recovery()) if strings.ToLower(loglevel) != "debug" { gin.SetMode(gin.ReleaseMode) } + r := gin.New() + r.Use(gin.Recovery()) + // Add a ginzap middleware, which: // - Logs all requests, like a combined access and error log. // - Logs to stdout. diff --git a/presidio-analyzer/analyzer/__main__.py b/presidio-analyzer/analyzer/__main__.py index 9e88cbc13..bd23ed07a 100644 --- a/presidio-analyzer/analyzer/__main__.py +++ b/presidio-analyzer/analyzer/__main__.py @@ -31,7 +31,7 @@ helps['serve'] = """ short-summary: Create a GRPC server - - presidio-analyzer serve --grpc_port 3000 + - presidio-analyzer serve --grpc-port 3000 """ helps['analyze'] = """ @@ -65,7 +65,7 @@ def Apply(self, request, context): return response -def serve_command_handler(env_grpc_port=False, grpc_port=3001): +def serve_command_handler(env_grpc_port=False, grpc_port=3000): server = grpc.server(futures.ThreadPoolExecutor(max_workers=10)) analyze_pb2_grpc.add_AnalyzeServiceServicer_to_server(Analyzer(), server) diff --git a/presidio-anonymizer-image/Dockerfile b/presidio-anonymizer-image/Dockerfile new file mode 100644 index 000000000..33aef58dd --- /dev/null +++ b/presidio-anonymizer-image/Dockerfile @@ -0,0 +1,19 @@ +ARG REGISTRY=presidio.azurecr.io + +FROM ${REGISTRY}/presidio-golang-base AS build-env + +ARG NAME=presidio-anonymizer-image +ARG PRESIDIOPATH=${GOPATH}/src/github.com/Microsoft/presidio +ARG VERSION=latest + +WORKDIR ${PRESIDIOPATH}/${NAME}/cmd/${NAME} +RUN GOOS=linux GOARCH=amd64 CGO_ENABLED=0 && go build -ldflags '-X github.com/Microsoft/presidio/pkg/version.Version=${VERSION}' -o /usr/bin/${NAME} + +#---------------------------- + +FROM alpine:3.8 + +ARG NAME=presidio-anonymizer-image +WORKDIR /usr/bin/ +COPY --from=build-env /usr/bin/${NAME} /usr/bin/ +CMD /usr/bin/presidio-anonymizer-image \ No newline at end of file diff --git a/presidio-anonymizer-image/cmd/presidio-anonymizer-image/anonymizer/anonymizer.go b/presidio-anonymizer-image/cmd/presidio-anonymizer-image/anonymizer/anonymizer.go new file mode 100644 index 000000000..462b80453 --- /dev/null +++ b/presidio-anonymizer-image/cmd/presidio-anonymizer-image/anonymizer/anonymizer.go @@ -0,0 +1,116 @@ +package anonymizer + +import ( + "bytes" + "fmt" + img "image" + "image/color" + "strings" + + "github.com/disintegration/imaging" + + types "github.com/Microsoft/presidio-genproto/golang" +) + +//AnonymizeImage text or just bounding boxes +func AnonymizeImage(image *types.Image, detectionType types.DetectionTypeEnum, results []*types.AnalyzeResult, template *types.AnonymizeImageTemplate) (*types.Image, error) { + + // Get format + if image.ImageType == "" { + return nil, fmt.Errorf("Image type is empty") + } + + splitted := strings.Split(image.ImageType, "/") + var f string + if len(splitted) == 2 { + f = splitted[1] + } else { + f = splitted[0] + } + + format, err := imaging.FormatFromExtension(f) + if err != nil { + return nil, err + } + + // Read byte slice + r := bytes.NewReader(image.Data) + decodedImage, err := imaging.Decode(r) + + if err != nil { + return nil, err + } + + // Redact text + if detectionType == types.DetectionTypeEnum_OCR { + decodedImage = redactText(decodedImage, image, results, template) + } else { + return nil, fmt.Errorf("Detection method not supported") + } + + // Save image + buf := new(bytes.Buffer) + err = imaging.Encode(buf, decodedImage, format) + if err != nil { + return nil, err + } + + return &types.Image{Data: buf.Bytes()}, nil +} + +func redactText(dimg img.Image, image *types.Image, results []*types.AnalyzeResult, template *types.AnonymizeImageTemplate) img.Image { + + for _, result := range results { + if result == nil { + continue + } + + location := result.Location + + for _, bbox := range image.Boundingboxes { + + for _, graphic := range template.FieldTypeGraphics { + //All fields will be redacted + if graphic.Fields == nil { + dimg = fillBbox(dimg, bbox, location, graphic) + break + } + //Specified fields will be redacted + for _, fieldType := range graphic.Fields { + if fieldType.Name == result.Field.Name { + dimg = fillBbox(dimg, bbox, location, graphic) + break + } + + } + } + } + } + return dimg +} + +func fillBbox(dimg img.Image, bbox *types.Boundingbox, location *types.Location, graphic *types.FieldTypeGraphic) img.Image { + + var col color.NRGBA + if graphic.Graphic != nil && graphic.Graphic.FillColorValue != nil { + col = color.NRGBA{ + (uint8)(graphic.Graphic.FillColorValue.Red), + (uint8)(graphic.Graphic.FillColorValue.Green), + (uint8)(graphic.Graphic.FillColorValue.Blue), + 255, + } + } else { + col = color.NRGBA{0, 0, 0, 255} // Black + } + + if (bbox.StartPosition >= location.Start && bbox.EndPosition <= location.End+1) || (location.Start >= bbox.StartPosition && location.End <= bbox.EndPosition) { + x := int(bbox.XLocation) + y := int(bbox.YLocation) + w := int(bbox.Width) + h := int(bbox.Height) + + dst := imaging.New(w-x, h-y, col) + dimg = imaging.Paste(dimg, dst, img.Pt(x, y)) + } + return dimg +} diff --git a/presidio-anonymizer-image/cmd/presidio-anonymizer-image/main.go b/presidio-anonymizer-image/cmd/presidio-anonymizer-image/main.go new file mode 100644 index 000000000..672394cff --- /dev/null +++ b/presidio-anonymizer-image/cmd/presidio-anonymizer-image/main.go @@ -0,0 +1,51 @@ +package main + +import ( + "flag" + + context "golang.org/x/net/context" + "google.golang.org/grpc/reflection" + + types "github.com/Microsoft/presidio-genproto/golang" + + "github.com/spf13/pflag" + "github.com/spf13/viper" + + log "github.com/Microsoft/presidio/pkg/logger" + "github.com/Microsoft/presidio/pkg/platform" + "github.com/Microsoft/presidio/pkg/rpc" + "github.com/Microsoft/presidio/presidio-anonymizer-image/cmd/presidio-anonymizer-image/anonymizer" +) + +type server struct{} + +func main() { + + pflag.Int(platform.GrpcPort, 3002, "GRPC listen port") + pflag.String("log_level", "info", "Log level - debug/info/warn/error") + + pflag.CommandLine.AddGoFlagSet(flag.CommandLine) + pflag.Parse() + viper.BindPFlags(pflag.CommandLine) + + settings := platform.GetSettings() + log.CreateLogger(settings.LogLevel) + + lis, s := rpc.SetupClient(settings.GrpcPort) + + types.RegisterAnonymizeImageServiceServer(s, &server{}) + reflection.Register(s) + if err := s.Serve(lis); err != nil { + log.Fatal(err.Error()) + } + +} + +func (s *server) Apply(ctx context.Context, r *types.AnonymizeImageRequest) (*types.AnonymizeImageResponse, error) { + + res, err := anonymizer.AnonymizeImage(r.Image, r.DetectionType, r.AnalyzeResults, r.Template) + if err != nil { + log.Error(err.Error()) + } + return &types.AnonymizeImageResponse{Image: res}, err +} diff --git a/presidio-anonymizer/cmd/presidio-anonymizer/anonymizer/anonymizer.go b/presidio-anonymizer/cmd/presidio-anonymizer/anonymizer/anonymizer.go index 5a6a6dec7..eec0f9131 100644 --- a/presidio-anonymizer/cmd/presidio-anonymizer/anonymizer/anonymizer.go +++ b/presidio-anonymizer/cmd/presidio-anonymizer/anonymizer/anonymizer.go @@ -23,8 +23,8 @@ func (a sortedResults) Less(i, j int) bool { return a[i].Score > a[j].Score } -//ApplyAnonymizerTemplate ... -func ApplyAnonymizerTemplate(text string, results []*types.AnalyzeResult, template *types.AnonymizeTemplate) (string, error) { +//AnonymizeText ... +func AnonymizeText(text string, results []*types.AnalyzeResult, template *types.AnonymizeTemplate) (string, error) { //Sort results by start location to verify order sort.Sort(sortedResults(results)) diff --git a/presidio-anonymizer/cmd/presidio-anonymizer/anonymizer/anonymizer_test.go b/presidio-anonymizer/cmd/presidio-anonymizer/anonymizer/anonymizer_test.go index 2bb885bfd..c73320197 100644 --- a/presidio-anonymizer/cmd/presidio-anonymizer/anonymizer/anonymizer_test.go +++ b/presidio-anonymizer/cmd/presidio-anonymizer/anonymizer/anonymizer_test.go @@ -263,7 +263,7 @@ func TestPlan(t *testing.T) { anonymizerTemplate := types.AnonymizeTemplate{ FieldTypeTransformations: plan.fieldTypeTransformation, } - output, err := ApplyAnonymizerTemplate(plan.text, plan.analyzeResults, &anonymizerTemplate) + output, err := AnonymizeText(plan.text, plan.analyzeResults, &anonymizerTemplate) assert.NoError(t, err) assert.Equal(t, plan.expected, output) } @@ -304,7 +304,7 @@ PR appropriately (e.g., label, comment). Simply follow the instructions provided This project has adopted the Microsoft Open Source Code of Conduct. For more information see the Code of Conduct FAQ or contact opencode@microsoft.com with any additional questions or comments.` - output, err := ApplyAnonymizerTemplate(text, analyzeResults, &anonymizerTemplate) + output, err := AnonymizeText(text, analyzeResults, &anonymizerTemplate) assert.NoError(t, err) expected := `Here are a few examples of entities we currently support: diff --git a/presidio-anonymizer/cmd/presidio-anonymizer/main.go b/presidio-anonymizer/cmd/presidio-anonymizer/main.go index ee6e71a11..4ba8b981c 100644 --- a/presidio-anonymizer/cmd/presidio-anonymizer/main.go +++ b/presidio-anonymizer/cmd/presidio-anonymizer/main.go @@ -42,7 +42,7 @@ func main() { } func (s *server) Apply(ctx context.Context, r *types.AnonymizeRequest) (*types.AnonymizeResponse, error) { - res, err := anonymizer.ApplyAnonymizerTemplate(r.Text, r.AnalyzeResults, r.Template) + res, err := anonymizer.AnonymizeText(r.Text, r.AnalyzeResults, r.Template) log.Debug(res) if err != nil { log.Error(err.Error()) diff --git a/presidio-api/cmd/presidio-api/actions.go b/presidio-api/cmd/presidio-api/actions.go deleted file mode 100644 index 5df18855e..000000000 --- a/presidio-api/cmd/presidio-api/actions.go +++ /dev/null @@ -1,264 +0,0 @@ -package main - -import ( - "fmt" - "net/http" - - "github.com/gin-gonic/gin" - - types "github.com/Microsoft/presidio-genproto/golang" - - "github.com/Microsoft/presidio/pkg/presidio" - server "github.com/Microsoft/presidio/pkg/server" -) - -func (api *API) setupGRPCServices() { - api.Services.SetupAnalyzerService() - api.Services.SetupAnonymizerService() - api.Services.SetupSchedulerService() -} - -func (api *API) analyze(c *gin.Context) { - var analyzeAPIRequest types.AnalyzeApiRequest - - if c.Bind(&analyzeAPIRequest) == nil { - analyzeTemplate := api.getAnalyzeTemplate(analyzeAPIRequest.AnalyzeTemplateId, analyzeAPIRequest.AnalyzeTemplate, c.Param("project"), c) - if analyzeTemplate == nil { - return - } - - res, err := api.Services.AnalyzeItem(c, analyzeAPIRequest.Text, analyzeTemplate) - if err != nil { - server.AbortWithError(c, http.StatusInternalServerError, err) - return - } - if res == nil { - return - } - server.WriteResponse(c, http.StatusOK, res) - } -} - -func (api *API) anonymize(c *gin.Context) { - var anonymizeAPIRequest types.AnonymizeApiRequest - - if c.Bind(&anonymizeAPIRequest) == nil { - project := c.Param("project") - - analyzeTemplate := api.getAnalyzeTemplate(anonymizeAPIRequest.AnalyzeTemplateId, anonymizeAPIRequest.AnalyzeTemplate, project, c) - if analyzeTemplate == nil { - return - } - - anonymizeTemplate := api.getAnonymizeTemplate(anonymizeAPIRequest.AnonymizeTemplateId, anonymizeAPIRequest.AnonymizeTemplate, project, c) - if anonymizeTemplate == nil { - return - } - - analyzeRes, err := api.Services.AnalyzeItem(c, anonymizeAPIRequest.Text, analyzeTemplate) - if err != nil { - server.AbortWithError(c, http.StatusInternalServerError, err) - return - } else if analyzeRes == nil { - return - } - - anonymizeRes, err := api.Services.AnonymizeItem(c, analyzeRes, anonymizeAPIRequest.Text, anonymizeTemplate) - if err != nil { - server.AbortWithError(c, http.StatusInternalServerError, err) - return - } else if anonymizeRes == nil { - return - } - server.WriteResponse(c, http.StatusOK, anonymizeRes) - } -} - -func (api *API) scheduleScannerCronJob(c *gin.Context) { - var cronAPIJobRequest types.ScannerCronJobApiRequest - - if c.Bind(&cronAPIJobRequest) == nil { - project := c.Param("project") - scannerCronJobRequest := api.getScannerCronJobRequest(&cronAPIJobRequest, project, c) - if scannerCronJobRequest == nil { - return - } - scheulderResponse := api.invokeScannerCronJobScheduler(scannerCronJobRequest, c) - if scheulderResponse == nil { - return - } - - server.WriteResponse(c, http.StatusOK, scheulderResponse) - } -} - -func (api *API) invokeScannerCronJobScheduler(scannerCronJobRequest *types.ScannerCronJobRequest, c *gin.Context) *types.ScannerCronJobResponse { - - res, err := api.Services.ApplyScan(c, scannerCronJobRequest) - if err != nil { - server.AbortWithError(c, http.StatusInternalServerError, err) - return nil - } - return res -} - -func (api *API) getScannerCronJobRequest(cronJobAPIRequest *types.ScannerCronJobApiRequest, project string, c *gin.Context) *types.ScannerCronJobRequest { - scanRequest := &types.ScanRequest{} - trigger := &types.Trigger{} - var name string - - if cronJobAPIRequest.ScannerCronJobTemplateId != "" { - cronJobTemplate := &types.ScannerCronJobTemplate{} - api.getTemplate(project, scheduleScannerCronJob, cronJobAPIRequest.ScannerCronJobTemplateId, cronJobTemplate, c) - - scanID := cronJobTemplate.ScanTemplateId - scanTemplate := &types.ScanTemplate{} - api.getTemplate(project, scan, scanID, scanTemplate, c) - - datasinkTemplate := &types.DatasinkTemplate{} - api.getTemplate(project, datasink, cronJobTemplate.DatasinkTemplateId, datasinkTemplate, c) - - analyzeTemplate := &types.AnalyzeTemplate{} - api.getTemplate(project, analyze, cronJobTemplate.AnalyzeTemplateId, analyzeTemplate, c) - - anonymizeTemplate := &types.AnonymizeTemplate{} - if cronJobTemplate.AnonymizeTemplateId != "" { - api.getTemplate(project, anonymize, cronJobTemplate.AnonymizeTemplateId, anonymizeTemplate, c) - } - trigger = cronJobTemplate.GetTrigger() - name = cronJobTemplate.GetName() - scanRequest = &types.ScanRequest{ - AnalyzeTemplate: analyzeTemplate, - AnonymizeTemplate: anonymizeTemplate, - DatasinkTemplate: datasinkTemplate, - ScanTemplate: scanTemplate, - } - } else if cronJobAPIRequest.ScannerCronJobRequest != nil { - scanRequest = cronJobAPIRequest.ScannerCronJobRequest.GetScanRequest() - trigger = cronJobAPIRequest.ScannerCronJobRequest.GetTrigger() - name = cronJobAPIRequest.ScannerCronJobRequest.GetName() - } else { - server.AbortWithError(c, http.StatusBadRequest, fmt.Errorf("ScannerCronJobTemplateId or ScannerCronJobRequest must be supplied")) - return nil - } - - return &types.ScannerCronJobRequest{ - Trigger: trigger, - ScanRequest: scanRequest, - Name: name, - } -} - -func (api *API) scheduleStreamsJob(c *gin.Context) { - var streamsJobRequest types.StreamsJobApiRequest - - if c.Bind(&streamsJobRequest) == nil { - project := c.Param("project") - streamsJobRequest := api.getStreamsJobRequest(&streamsJobRequest, project, c) - if streamsJobRequest == nil { - return - } - scheulderResponse := api.invokeStreamsJobScheduler(streamsJobRequest, c) - if scheulderResponse == nil { - return - } - - server.WriteResponse(c, http.StatusOK, scheulderResponse) - } -} - -func (api *API) invokeStreamsJobScheduler(streamsJobRequest *types.StreamsJobRequest, c *gin.Context) *types.StreamsJobResponse { - res, err := api.Services.ApplyStream(c, streamsJobRequest) - if err != nil { - server.AbortWithError(c, http.StatusInternalServerError, err) - return nil - } - return res -} - -func (api *API) getStreamsJobRequest(jobAPIRequest *types.StreamsJobApiRequest, project string, c *gin.Context) *types.StreamsJobRequest { - streamsJobRequest := &types.StreamsJobRequest{} - - if jobAPIRequest.GetStreamsJobTemplateId() != "" { - jobTemplate := &types.StreamsJobTemplate{} - api.getTemplate(project, scheduleStreamsJob, jobAPIRequest.StreamsJobTemplateId, jobTemplate, c) - - if jobTemplate == nil { - return nil - } - streamID := jobTemplate.GetStreamsTemplateId() - streamTemplate := &types.StreamTemplate{} - api.getTemplate(project, stream, streamID, streamTemplate, c) - - datasinkTemplate := &types.DatasinkTemplate{} - api.getTemplate(project, datasink, jobTemplate.GetDatasinkTemplateId(), datasinkTemplate, c) - - analyzeTemplate := &types.AnalyzeTemplate{} - api.getTemplate(project, analyze, jobTemplate.GetAnalyzeTemplateId(), analyzeTemplate, c) - - anonymizeTemplate := &types.AnonymizeTemplate{} - if jobTemplate.AnonymizeTemplateId != "" { - api.getTemplate(project, anonymize, jobTemplate.GetAnonymizeTemplateId(), anonymizeTemplate, c) - } - - if streamTemplate == nil || datasinkTemplate == nil || analyzeTemplate == nil || anonymizeTemplate == nil { - return nil - } - streamsJobRequest = &types.StreamsJobRequest{ - Name: streamTemplate.GetName(), - StreamsRequest: &types.StreamRequest{ - AnalyzeTemplate: analyzeTemplate, - AnonymizeTemplate: anonymizeTemplate, - DatasinkTemplate: datasinkTemplate, - StreamConfig: streamTemplate.GetStreamConfig(), - }, - } - } else if jobAPIRequest.GetStreamsJobRequest() != nil { - streamsJobRequest = jobAPIRequest.GetStreamsJobRequest() - } else { - server.AbortWithError(c, http.StatusBadRequest, fmt.Errorf("StreamsJobTemplateId or StreamsRequest must be supplied")) - return nil - } - - return streamsJobRequest -} - -func (api *API) getTemplate(project string, action string, id string, obj interface{}, c *gin.Context) { - template, err := api.Templates.GetTemplate(project, action, id) - if err != nil { - server.AbortWithError(c, http.StatusBadRequest, err) - return - } - err = presidio.ConvertJSONToInterface(template, obj) - if err != nil { - server.AbortWithError(c, http.StatusBadRequest, err) - } -} - -func (api *API) getAnalyzeTemplate(analyzeTemplateID string, analyzeTemplate *types.AnalyzeTemplate, - project string, c *gin.Context) *types.AnalyzeTemplate { - - if analyzeTemplate == nil && analyzeTemplateID != "" { - analyzeTemplate = &types.AnalyzeTemplate{} - api.getTemplate(project, analyze, analyzeTemplateID, analyzeTemplate, c) - } else if analyzeTemplate == nil { - server.AbortWithError(c, http.StatusBadRequest, fmt.Errorf("AnalyzeTemplate or AnalyzeTemplateId must be supplied")) - return nil - } - - return analyzeTemplate -} - -func (api *API) getAnonymizeTemplate(anonymizeTemplateID string, anonymizeTemplate *types.AnonymizeTemplate, - project string, c *gin.Context) *types.AnonymizeTemplate { - - if anonymizeTemplate == nil && anonymizeTemplateID != "" { - anonymizeTemplate = &types.AnonymizeTemplate{} - api.getTemplate(project, anonymize, anonymizeTemplateID, anonymizeTemplate, c) - } else if anonymizeTemplate == nil { - server.AbortWithError(c, http.StatusBadRequest, fmt.Errorf("AnalyzeTemplate or AnalyzeTemplateId must be supplied")) - return nil - } - - return anonymizeTemplate -} diff --git a/presidio-api/cmd/presidio-api/api.go b/presidio-api/cmd/presidio-api/api.go deleted file mode 100644 index 9a7ba39e2..000000000 --- a/presidio-api/cmd/presidio-api/api.go +++ /dev/null @@ -1,35 +0,0 @@ -package main - -import ( - "github.com/Microsoft/presidio/pkg/cache" - "github.com/Microsoft/presidio/pkg/platform" - "github.com/Microsoft/presidio/pkg/presidio" - "github.com/Microsoft/presidio/pkg/presidio/services" - "github.com/Microsoft/presidio/pkg/presidio/templates" -) - -const ( - analyze = "analyze" - anonymize = "anonymize" - scan = "scan" - stream = "stream" - datasink = "datasink" - scheduleScannerCronJob = "schedule-scanner-cronjob" - scheduleStreamsJob = "schedule-streams-job" -) - -//API kv store -type API struct { - Templates presidio.TemplatesStore - Services presidio.ServicesAPI -} - -//New KV store -func New(store platform.Store, cacheStore cache.Cache, settings *platform.Settings) *API { - template := templates.New(store, cacheStore) - svc := services.New(settings) - return &API{ - Templates: template, - Services: svc, - } -} diff --git a/presidio-api/cmd/presidio-api/api/analyze/analyze.go b/presidio-api/cmd/presidio-api/api/analyze/analyze.go new file mode 100644 index 000000000..ed69a6e8b --- /dev/null +++ b/presidio-api/cmd/presidio-api/api/analyze/analyze.go @@ -0,0 +1,35 @@ +package analyze + +import ( + "context" + "fmt" + + types "github.com/Microsoft/presidio-genproto/golang" + store "github.com/Microsoft/presidio/presidio-api/cmd/presidio-api/api" + "github.com/Microsoft/presidio/presidio-api/cmd/presidio-api/api/templates" +) + +//Analyze text +func Analyze(ctx context.Context, api *store.API, analyzeAPIRequest *types.AnalyzeApiRequest, project string) ([]*types.AnalyzeResult, error) { + + if analyzeAPIRequest.AnalyzeTemplateId == "" && analyzeAPIRequest.AnalyzeTemplate == nil { + return nil, fmt.Errorf("Analyze template is missing or empty") + } else if analyzeAPIRequest.AnalyzeTemplate == nil { + analyzeAPIRequest.AnalyzeTemplate = &types.AnalyzeTemplate{} + } + + err := templates.GetTemplate(api, project, store.Analyze, analyzeAPIRequest.AnalyzeTemplateId, analyzeAPIRequest.AnalyzeTemplate) + if err != nil { + return nil, err + } + + res, err := api.Services.AnalyzeItem(ctx, analyzeAPIRequest.Text, analyzeAPIRequest.AnalyzeTemplate) + if err != nil { + return nil, err + } + if res == nil { + return nil, fmt.Errorf("No results") + } + return res, err + +} diff --git a/presidio-api/cmd/presidio-api/api/analyze/analyze_test.go b/presidio-api/cmd/presidio-api/api/analyze/analyze_test.go new file mode 100644 index 000000000..65d686081 --- /dev/null +++ b/presidio-api/cmd/presidio-api/api/analyze/analyze_test.go @@ -0,0 +1,76 @@ +package analyze + +import ( + "context" + "testing" + + "github.com/stretchr/testify/assert" + + types "github.com/Microsoft/presidio-genproto/golang" + "github.com/Microsoft/presidio/pkg/presidio/services" + store "github.com/Microsoft/presidio/presidio-api/cmd/presidio-api/api" + "github.com/Microsoft/presidio/presidio-api/cmd/presidio-api/api/mocks" +) + +func setupMockServices() *store.API { + srv := &services.Services{ + AnalyzerService: mocks.GetAnalyzeServiceMock(mocks.GetAnalyzerMockResult()), + } + + api := &store.API{ + Services: srv, + Templates: mocks.GetTemplateMock(), + } + return api +} + +func TestAnalyzeWithTemplateId(t *testing.T) { + + api := setupMockServices() + + project := "tests" + analyzeAPIRequest := &types.AnalyzeApiRequest{ + Text: "My number is (555) 253-0000 and email johnsnow@foo.com", + AnalyzeTemplateId: "test", + AnalyzeTemplate: &types.AnalyzeTemplate{}, + } + results, err := Analyze(context.Background(), api, analyzeAPIRequest, project) + assert.NoError(t, err) + assert.Equal(t, 2, len(results)) +} + +func TestAnalyzeWithTemplateStruct(t *testing.T) { + + api := setupMockServices() + + project := "tests" + analyzeAPIRequest := &types.AnalyzeApiRequest{ + Text: "My number is (555) 253-0000 and email johnsnow@foo.com", + AnalyzeTemplate: &types.AnalyzeTemplate{ + Fields: []*types.FieldTypes{ + { + Name: types.FieldTypesEnum_PHONE_NUMBER.String(), + }, + { + Name: types.FieldTypesEnum_EMAIL_ADDRESS.String(), + }, + }, + }, + } + results, err := Analyze(context.Background(), api, analyzeAPIRequest, project) + assert.NoError(t, err) + assert.Equal(t, 2, len(results)) +} + +func TestAnalyzeWithNoTemplate(t *testing.T) { + + api := setupMockServices() + + project := "tests" + analyzeAPIRequest := &types.AnalyzeApiRequest{ + Text: "My number is (555) 253-0000 and email johnsnow@foo.com", + } + _, err := Analyze(context.Background(), api, analyzeAPIRequest, project) + assert.Error(t, err) + +} diff --git a/presidio-api/cmd/presidio-api/api/anonymize-image/anonymize-image.go b/presidio-api/cmd/presidio-api/api/anonymize-image/anonymize-image.go new file mode 100644 index 000000000..5227f8ae9 --- /dev/null +++ b/presidio-api/cmd/presidio-api/api/anonymize-image/anonymize-image.go @@ -0,0 +1,131 @@ +package anonymizeimage + +import ( + "context" + "fmt" + + types "github.com/Microsoft/presidio-genproto/golang" + "github.com/Microsoft/presidio/pkg/presidio" + store "github.com/Microsoft/presidio/presidio-api/cmd/presidio-api/api" + "github.com/Microsoft/presidio/presidio-api/cmd/presidio-api/api/templates" +) + +const maxImageSize = 4194304 +const ( + jpeg = "image/jpeg" + jpg = "image/jpg" + tiff = "image/tiff" + tif = "image/tif" + png = "image/png" + bmp = "image/bmp" +) + +//AnonymizeImage anonymize image +func AnonymizeImage(ctx context.Context, api *store.API, anonymizeImageAPIRequest *types.AnonymizeImageApiRequest, project string) ([]byte, error) { + + err := validateFormat(anonymizeImageAPIRequest.Data, anonymizeImageAPIRequest.ImageType) + if err != nil { + return nil, err + } + + err = validateAnonymizeImageTemplate(anonymizeImageAPIRequest) + if err != nil { + return nil, err + } + + err = templates.GetTemplate(api, project, store.AnonymizeImage, anonymizeImageAPIRequest.AnonymizeImageTemplateId, &anonymizeImageAPIRequest.AnonymizeImageTemplate) + if err != nil { + return nil, err + } + + image := &types.Image{ + Data: anonymizeImageAPIRequest.Data, + } + + if anonymizeImageAPIRequest.DetectionType == types.DetectionTypeEnum_OCR { + + err = validateAnalyzeTemplate(anonymizeImageAPIRequest) + if err != nil { + return nil, err + } + + err = templates.GetTemplate(api, project, store.Analyze, anonymizeImageAPIRequest.AnalyzeTemplateId, &anonymizeImageAPIRequest.AnalyzeTemplate) + if err != nil { + return nil, err + } + + analyzeResults, err := applyPresidioOCR(ctx, api.Services, image, anonymizeImageAPIRequest.AnalyzeTemplate) + if err != nil { + return nil, err + } + image.ImageType = anonymizeImageAPIRequest.ImageType + anonymizeResult, err := api.Services.AnonymizeImageItem(ctx, image, analyzeResults, anonymizeImageAPIRequest.DetectionType, anonymizeImageAPIRequest.AnonymizeImageTemplate) + if err != nil { + return nil, err + } + return anonymizeResult.Image.Data, nil + } + return nil, fmt.Errorf("Not method found") +} + +func validateFormat(data []byte, imageType string) error { + + if data == nil { + return fmt.Errorf("Image is empty or null") + } + if len(data) >= maxImageSize { + return fmt.Errorf("File size is over 4MB") + } + if imageType == "" || (imageType != jpg && + imageType != jpeg && + imageType != tif && + imageType != tiff && + imageType != png && + imageType != bmp) { + return fmt.Errorf("Image type is missing or wrong (image/jpg, image/jpeg, image/png, image/tiff, image/gif, image/bmp)") + } + return nil +} + +func validateAnalyzeTemplate(anonymizeImageAPIRequest *types.AnonymizeImageApiRequest) error { + if anonymizeImageAPIRequest.AnalyzeTemplateId == "" && anonymizeImageAPIRequest.AnalyzeTemplate == nil { + return fmt.Errorf("Analyze template is missing or empty") + } else if anonymizeImageAPIRequest.AnalyzeTemplate == nil { + anonymizeImageAPIRequest.AnalyzeTemplate = &types.AnalyzeTemplate{} + } + return nil +} + +func validateAnonymizeImageTemplate(anonymizeImageAPIRequest *types.AnonymizeImageApiRequest) error { + if anonymizeImageAPIRequest.AnonymizeImageTemplateId == "" && anonymizeImageAPIRequest.AnonymizeImageTemplate == nil { + return fmt.Errorf("Anonymize image template is missing or empty") + } else if anonymizeImageAPIRequest.AnonymizeImageTemplate == nil { + anonymizeImageAPIRequest.AnonymizeImageTemplate = &types.AnonymizeImageTemplate{} + } + return nil +} + +func applyPresidioOCR(ctx context.Context, services presidio.ServicesAPI, image *types.Image, analyzeTemplate *types.AnalyzeTemplate) ([]*types.AnalyzeResult, error) { + ocrRes, err := services.OcrItem(ctx, image) + if err != nil { + return nil, err + } + if ocrRes.Image.Text == "" { + return nil, fmt.Errorf("No content found in image") + + } + + image.Text = ocrRes.Image.Text + image.Boundingboxes = ocrRes.Image.Boundingboxes + + analyzeResults, err := services.AnalyzeItem(ctx, ocrRes.Image.Text, analyzeTemplate) + if err != nil { + return nil, err + + } + if analyzeResults == nil { + return nil, fmt.Errorf("No PII content found in image") + + } + return analyzeResults, nil +} diff --git a/presidio-api/cmd/presidio-api/api/anonymize-image/anonymize-image_test.go b/presidio-api/cmd/presidio-api/api/anonymize-image/anonymize-image_test.go new file mode 100644 index 000000000..e5a94d9d1 --- /dev/null +++ b/presidio-api/cmd/presidio-api/api/anonymize-image/anonymize-image_test.go @@ -0,0 +1,92 @@ +package anonymizeimage + +import ( + "context" + + "github.com/stretchr/testify/assert" + + types "github.com/Microsoft/presidio-genproto/golang" + "github.com/Microsoft/presidio/pkg/presidio/services" + + store "github.com/Microsoft/presidio/presidio-api/cmd/presidio-api/api" + "github.com/Microsoft/presidio/presidio-api/cmd/presidio-api/api/mocks" + + "testing" +) + +func setupMockServices() *store.API { + srv := &services.Services{ + AnalyzerService: mocks.GetAnalyzeServiceMock(mocks.GetAnalyzerMockResult()), + AnonymizeImageService: mocks.GetAnonymizerImageServiceMock(mocks.GetAnonymizerImageMockResult()), + OcrService: mocks.GetOcrServiceMock(mocks.GetOcrMockResult()), + } + + api := &store.API{ + Services: srv, + Templates: mocks.GetTemplateMock(), + } + return api +} + +func TestAnonymizeImageWithTemplateId(t *testing.T) { + + api := setupMockServices() + + project := "tests" + anonymizeImageAPIRequest := &types.AnonymizeImageApiRequest{ + AnalyzeTemplateId: "test", + AnalyzeTemplate: &types.AnalyzeTemplate{}, + AnonymizeImageTemplateId: "test", + ImageType: "image/jpg", + Data: make([]byte, 1), + } + + results, err := AnonymizeImage(context.Background(), api, anonymizeImageAPIRequest, project) + + assert.NoError(t, err) + assert.True(t, len(results) > 0) +} + +func TestAnonymizeImageWithTemplateStruct(t *testing.T) { + + api := setupMockServices() + + project := "tests" + anonymizeImageAPIRequest := &types.AnonymizeImageApiRequest{ + AnalyzeTemplateId: "test", + ImageType: "image/jpg", + AnalyzeTemplate: &types.AnalyzeTemplate{}, + AnonymizeImageTemplate: &types.AnonymizeImageTemplate{ + FieldTypeGraphics: []*types.FieldTypeGraphic{{ + Graphic: &types.Graphic{ + FillColorValue: &types.FillColorValue{ + Blue: 50, + Red: 50, + Green: 50, + }, + }, + }}, + }, + Data: make([]byte, 1), + } + + results, err := AnonymizeImage(context.Background(), api, anonymizeImageAPIRequest, project) + + assert.NoError(t, err) + assert.True(t, len(results) > 0) +} + +func TestAnonymizeImageWithNoTemplate(t *testing.T) { + + api := setupMockServices() + + project := "tests" + anonymizeImageAPIRequest := &types.AnonymizeImageApiRequest{ + AnalyzeTemplateId: "test", + Data: make([]byte, 1), + } + + _, err := AnonymizeImage(context.Background(), api, anonymizeImageAPIRequest, project) + + assert.Error(t, err) +} diff --git a/presidio-api/cmd/presidio-api/api/anonymize/anonymize.go b/presidio-api/cmd/presidio-api/api/anonymize/anonymize.go new file mode 100644 index 000000000..49af70d46 --- /dev/null +++ b/presidio-api/cmd/presidio-api/api/anonymize/anonymize.go @@ -0,0 +1,60 @@ +package anonymize + +import ( + "context" + "fmt" + + types "github.com/Microsoft/presidio-genproto/golang" + store "github.com/Microsoft/presidio/presidio-api/cmd/presidio-api/api" + "github.com/Microsoft/presidio/presidio-api/cmd/presidio-api/api/templates" +) + +//Anonymize text +func Anonymize(ctx context.Context, api *store.API, anonymizeAPIRequest *types.AnonymizeApiRequest, project string) (*types.AnonymizeResponse, error) { + + err := validateTemplate(anonymizeAPIRequest) + if err != nil { + return nil, err + } + + err = templates.GetTemplate(api, project, store.Analyze, anonymizeAPIRequest.AnalyzeTemplateId, anonymizeAPIRequest.AnalyzeTemplate) + if err != nil { + return nil, err + } + + err = templates.GetTemplate(api, project, store.Anonymize, anonymizeAPIRequest.AnonymizeTemplateId, anonymizeAPIRequest.AnonymizeTemplate) + if err != nil { + return nil, err + } + + analyzeRes, err := api.Services.AnalyzeItem(ctx, anonymizeAPIRequest.Text, anonymizeAPIRequest.AnalyzeTemplate) + if err != nil { + return nil, err + } + if analyzeRes == nil { + return nil, fmt.Errorf("No analyze results") + } + + anonymizeRes, err := api.Services.AnonymizeItem(ctx, analyzeRes, anonymizeAPIRequest.Text, anonymizeAPIRequest.AnonymizeTemplate) + if err != nil { + return nil, err + } else if anonymizeRes == nil { + return nil, fmt.Errorf("No anonymize results") + } + return anonymizeRes, nil +} + +func validateTemplate(anonymizeAPIRequest *types.AnonymizeApiRequest) error { + if anonymizeAPIRequest.AnalyzeTemplateId == "" && anonymizeAPIRequest.AnalyzeTemplate == nil { + return fmt.Errorf("Analyze template is missing or empty") + } else if anonymizeAPIRequest.AnalyzeTemplate == nil { + anonymizeAPIRequest.AnalyzeTemplate = &types.AnalyzeTemplate{} + } + + if anonymizeAPIRequest.AnonymizeTemplateId == "" && anonymizeAPIRequest.AnonymizeTemplate == nil { + return fmt.Errorf("Anonymize template is missing or empty") + } else if anonymizeAPIRequest.AnonymizeTemplate == nil { + anonymizeAPIRequest.AnonymizeTemplate = &types.AnonymizeTemplate{} + } + return nil +} diff --git a/presidio-api/cmd/presidio-api/api/anonymize/anonymize_test.go b/presidio-api/cmd/presidio-api/api/anonymize/anonymize_test.go new file mode 100644 index 000000000..14ff0d7eb --- /dev/null +++ b/presidio-api/cmd/presidio-api/api/anonymize/anonymize_test.go @@ -0,0 +1,99 @@ +package anonymize + +import ( + "context" + + "github.com/stretchr/testify/assert" + + types "github.com/Microsoft/presidio-genproto/golang" + "github.com/Microsoft/presidio/pkg/presidio/services" + + store "github.com/Microsoft/presidio/presidio-api/cmd/presidio-api/api" + "github.com/Microsoft/presidio/presidio-api/cmd/presidio-api/api/mocks" + + "testing" +) + +func setupMockServices() *store.API { + srv := &services.Services{ + AnalyzerService: mocks.GetAnalyzeServiceMock(mocks.GetAnalyzerMockResult()), + AnonymizeService: mocks.GetAnonymizerServiceMock(mocks.GetAnonymizerMockResult()), + } + + api := &store.API{ + Services: srv, + Templates: mocks.GetTemplateMock(), + } + return api +} + +func TestAnonymizeWithTemplateId(t *testing.T) { + + api := setupMockServices() + + project := "tests" + anonymizeAPIRequest := &types.AnonymizeApiRequest{ + Text: "My number is (555) 253-0000 and email johnsnow@foo.com", + AnalyzeTemplateId: "test", + AnalyzeTemplate: &types.AnalyzeTemplate{}, + AnonymizeTemplateId: "test", + } + + results, err := Anonymize(context.Background(), api, anonymizeAPIRequest, project) + + assert.NoError(t, err) + assert.Equal(t, "My number is and email ", results.Text) +} + +func TestAnonymizeWithTemplateStruct(t *testing.T) { + + api := setupMockServices() + + project := "tests" + anonymizeAPIRequest := &types.AnonymizeApiRequest{ + Text: "My number is (555) 253-0000 and email johnsnow@foo.com", + AnalyzeTemplateId: "test", + AnalyzeTemplate: &types.AnalyzeTemplate{}, + AnonymizeTemplate: &types.AnonymizeTemplate{ + FieldTypeTransformations: []*types.FieldTypeTransformation{{ + Fields: []*types.FieldTypes{{ + Name: types.FieldTypesEnum_PHONE_NUMBER.String(), + }}, + Transformation: &types.Transformation{ + ReplaceValue: &types.ReplaceValue{ + NewValue: "", + }, + }, + }, { + Fields: []*types.FieldTypes{{ + Name: types.FieldTypesEnum_EMAIL_ADDRESS.String(), + }}, + Transformation: &types.Transformation{ + ReplaceValue: &types.ReplaceValue{ + NewValue: "", + }, + }, + }}, + }, + } + + results, err := Anonymize(context.Background(), api, anonymizeAPIRequest, project) + + assert.NoError(t, err) + assert.Equal(t, "My number is and email ", results.Text) +} + +func TestAnonymizeWithNoTemplate(t *testing.T) { + + api := setupMockServices() + + project := "tests" + anonymizeAPIRequest := &types.AnonymizeApiRequest{ + Text: "My number is (555) 253-0000 and email johnsnow@foo.com", + AnalyzeTemplateId: "test", + } + + _, err := Anonymize(context.Background(), api, anonymizeAPIRequest, project) + + assert.Error(t, err) +} diff --git a/presidio-api/cmd/presidio-api/api/api.go b/presidio-api/cmd/presidio-api/api/api.go new file mode 100644 index 000000000..c2abbddcb --- /dev/null +++ b/presidio-api/cmd/presidio-api/api/api.go @@ -0,0 +1,53 @@ +package api + +import ( + "github.com/Microsoft/presidio/pkg/cache" + "github.com/Microsoft/presidio/pkg/platform" + "github.com/Microsoft/presidio/pkg/presidio" + "github.com/Microsoft/presidio/pkg/presidio/services" + "github.com/Microsoft/presidio/pkg/presidio/templates" +) + +const ( + //Analyze service + Analyze = "analyze" + //Anonymize service + Anonymize = "anonymize" + //AnonymizeImage service + AnonymizeImage = "anonymize-image" + //Scan service + Scan = "scan" + //Stream service + Stream = "stream" + //Datasink service + Datasink = "datasink" + //ScheduleScannerCronJob service + ScheduleScannerCronJob = "schedule-scanner-cronjob" + //ScheduleStreamsJob service + ScheduleStreamsJob = "schedule-streams-job" +) + +//API kv store +type API struct { + Templates presidio.TemplatesStore + Services presidio.ServicesAPI +} + +//New KV store +func New(store platform.Store, cacheStore cache.Cache, settings *platform.Settings) *API { + template := templates.New(store, cacheStore) + svc := services.New(settings) + return &API{ + Templates: template, + Services: svc, + } +} + +//SetupGRPCServices to presidio services +func (api *API) SetupGRPCServices() { + api.Services.SetupAnalyzerService() + api.Services.SetupAnonymizerService() + api.Services.SetupAnonymizerImageService() + api.Services.SetupSchedulerService() + api.Services.SetupOCRService() +} diff --git a/presidio-api/cmd/presidio-api/api/mocks/mocks.go b/presidio-api/cmd/presidio-api/api/mocks/mocks.go new file mode 100644 index 000000000..1b1b9b37d --- /dev/null +++ b/presidio-api/cmd/presidio-api/api/mocks/mocks.go @@ -0,0 +1,195 @@ +package mocks + +import ( + "context" + + types "github.com/Microsoft/presidio-genproto/golang" + "github.com/Microsoft/presidio/pkg/presidio" + store "github.com/Microsoft/presidio/presidio-api/cmd/presidio-api/api" + + "github.com/stretchr/testify/mock" + "google.golang.org/grpc" +) + +//AnalyzerMockedObject analyzer mock +type AnalyzerMockedObject struct { + mock.Mock +} + +//AnonymizerMockedObject anonymizer mock +type AnonymizerMockedObject struct { + mock.Mock +} + +//AnonymizerImageMockedObject anonymizer mock +type AnonymizerImageMockedObject struct { + mock.Mock +} + +//OcrMockedObject anonymizer mock +type OcrMockedObject struct { + mock.Mock +} + +//TemplateMockedObject template mock +type TemplateMockedObject struct { + mock.Mock +} + +//GetAnalyzerMockResult get analyzer mock response +func GetAnalyzerMockResult() *types.AnalyzeResponse { + location := &types.Location{ + Start: 153, End: 163, Length: 10, + } + results := [](*types.AnalyzeResult){ + &types.AnalyzeResult{ + Field: &types.FieldTypes{Name: types.FieldTypesEnum_PHONE_NUMBER.String()}, + Text: "(555) 253-0000", + Score: 1.0, + Location: location, + }, + &types.AnalyzeResult{ + Field: &types.FieldTypes{Name: types.FieldTypesEnum_EMAIL_ADDRESS.String()}, + Text: "johnsnow@foo.com", + Score: 1.0, + Location: location, + }, + } + return &types.AnalyzeResponse{ + AnalyzeResults: results, + } +} + +//GetAnalyzeServiceMock get service mock +func GetAnalyzeServiceMock(expectedResult *types.AnalyzeResponse) types.AnalyzeServiceClient { + analyzeService := &AnalyzerMockedObject{} + analyzeService.On("Apply", mock.Anything, mock.Anything, mock.Anything).Return(expectedResult, nil) + return analyzeService +} + +//Apply analyzer mock +func (m *AnalyzerMockedObject) Apply(c context.Context, analyzeRequest *types.AnalyzeRequest, opts ...grpc.CallOption) (*types.AnalyzeResponse, error) { + args := m.Mock.Called() + var result *types.AnalyzeResponse + if args.Get(0) != nil { + result = args.Get(0).(*types.AnalyzeResponse) + } + return result, args.Error(1) +} + +//GetAnonymizerMockResult get anonymizer mock response +func GetAnonymizerMockResult() *types.AnonymizeResponse { + + return &types.AnonymizeResponse{ + Text: "My number is and email ", + } +} + +//GetAnonymizerServiceMock get service mock +func GetAnonymizerServiceMock(expectedResult *types.AnonymizeResponse) types.AnonymizeServiceClient { + anonymizerService := &AnonymizerMockedObject{} + anonymizerService.On("Apply", mock.Anything, mock.Anything, mock.Anything).Return(expectedResult, nil) + return anonymizerService +} + +//Apply anonymizer mock +func (m *AnonymizerMockedObject) Apply(c context.Context, anonymizeRequest *types.AnonymizeRequest, opts ...grpc.CallOption) (*types.AnonymizeResponse, error) { + args := m.Mock.Called() + var result *types.AnonymizeResponse + if args.Get(0) != nil { + result = args.Get(0).(*types.AnonymizeResponse) + } + return result, args.Error(1) +} + +//GetAnonymizerImageServiceMock get service mock +func GetAnonymizerImageServiceMock(expectedResult *types.AnonymizeImageResponse) types.AnonymizeImageServiceClient { + anonymizerImageService := &AnonymizerImageMockedObject{} + anonymizerImageService.On("Apply", mock.Anything, mock.Anything, mock.Anything).Return(expectedResult, nil) + return anonymizerImageService +} + +//Apply anonymizer mock +func (m *AnonymizerImageMockedObject) Apply(c context.Context, anonymizeImageRequest *types.AnonymizeImageRequest, opts ...grpc.CallOption) (*types.AnonymizeImageResponse, error) { + args := m.Mock.Called() + var result *types.AnonymizeImageResponse + if args.Get(0) != nil { + result = args.Get(0).(*types.AnonymizeImageResponse) + } + return result, args.Error(1) +} + +//GetAnonymizerImageMockResult get anonymizer image mock response +func GetAnonymizerImageMockResult() *types.AnonymizeImageResponse { + + return &types.AnonymizeImageResponse{ + Image: &types.Image{ + Data: make([]byte, 1), + Boundingboxes: make([]*types.Boundingbox, 1), + }, + } +} + +//GetOcrServiceMock get service mock +func GetOcrServiceMock(expectedResult *types.OcrResponse) types.OcrServiceClient { + ocrService := &OcrMockedObject{} + ocrService.On("Apply", mock.Anything, mock.Anything, mock.Anything).Return(expectedResult, nil) + return ocrService +} + +//Apply ocr mock +func (m *OcrMockedObject) Apply(c context.Context, ocrRequest *types.OcrRequest, opts ...grpc.CallOption) (*types.OcrResponse, error) { + args := m.Mock.Called() + var result *types.OcrResponse + if args.Get(0) != nil { + result = args.Get(0).(*types.OcrResponse) + } + return result, args.Error(1) +} + +//GetOcrMockResult get ocr mock response +func GetOcrMockResult() *types.OcrResponse { + + return &types.OcrResponse{ + Image: &types.Image{ + Text: "My number is (555) 253-0000 and email johnsnow@foo.com", + }, + } +} + +//GetTemplateMock mock +func GetTemplateMock() presidio.TemplatesStore { + templateService := &TemplateMockedObject{} + templateService.On("GetTemplate", mock.Anything, store.Analyze, mock.Anything). + Return(`{"fields":[{"name":"PHONE_NUMBER"}, {"name":"EMAIL_ADDRESS"}]}`, nil). + On("GetTemplate", mock.Anything, store.Anonymize, mock.Anything). + Return(`{"fieldTypeTransformations":[{"fields":[],"transformation":{"replaceValue":{"newValue":""}}}]}`, nil). + On("GetTemplate", mock.Anything, store.AnonymizeImage, mock.Anything). + Return(`{"fieldTypeGraphics":[{"graphic":{"fillColorValue":{"blue":0,"red":0,"green":0}}}]}`, nil) + return templateService +} + +//GetTemplate mock +func (m *TemplateMockedObject) GetTemplate(project string, action string, id string) (string, error) { + args := m.Mock.Called(project, action, id) + result := args.String(0) + return result, args.Error(1) +} + +//InsertTemplate mock +func (m *TemplateMockedObject) InsertTemplate(project, action, id, value string) error { + args := m.Mock.Called() + return args.Error(1) +} + +//UpdateTemplate mock +func (m *TemplateMockedObject) UpdateTemplate(project, action, id, value string) error { + args := m.Mock.Called() + return args.Error(1) +} + +//DeleteTemplate mock +func (m *TemplateMockedObject) DeleteTemplate(project string, action string, id string) error { + args := m.Mock.Called() + return args.Error(1) +} diff --git a/presidio-api/cmd/presidio-api/api/scanner-cron-job/scanner-cron-job.go b/presidio-api/cmd/presidio-api/api/scanner-cron-job/scanner-cron-job.go new file mode 100644 index 000000000..d1987ab9f --- /dev/null +++ b/presidio-api/cmd/presidio-api/api/scanner-cron-job/scanner-cron-job.go @@ -0,0 +1,82 @@ +package scannercronjob + +import ( + "context" + "fmt" + + types "github.com/Microsoft/presidio-genproto/golang" + "github.com/Microsoft/presidio/pkg/presidio" + store "github.com/Microsoft/presidio/presidio-api/cmd/presidio-api/api" + "github.com/Microsoft/presidio/presidio-api/cmd/presidio-api/api/templates" +) + +//ScheduleScannerCronJob schedule scanner cron job +func ScheduleScannerCronJob(ctx context.Context, api *store.API, cronAPIJobRequest *types.ScannerCronJobApiRequest, project string) (*types.ScannerCronJobResponse, error) { + + scannerCronJobRequest, err := getScannerCronJobRequest(api, cronAPIJobRequest, project) + if err != nil { + return nil, err + } + schedulerResponse, err := invokeScannerCronJobScheduler(ctx, api.Services, scannerCronJobRequest) + if err != nil { + return nil, err + } + + return schedulerResponse, nil + +} + +func invokeScannerCronJobScheduler(ctx context.Context, services presidio.ServicesAPI, scannerCronJobRequest *types.ScannerCronJobRequest) (*types.ScannerCronJobResponse, error) { + + res, err := services.ApplyScan(ctx, scannerCronJobRequest) + if err != nil { + return nil, err + } + return res, nil +} + +func getScannerCronJobRequest(api *store.API, cronJobAPIRequest *types.ScannerCronJobApiRequest, project string) (*types.ScannerCronJobRequest, error) { + scanRequest := &types.ScanRequest{} + trigger := &types.Trigger{} + var name string + + if cronJobAPIRequest.ScannerCronJobTemplateId != "" { + cronJobTemplate := &types.ScannerCronJobTemplate{} + templates.GetTemplate(api, project, store.ScheduleScannerCronJob, cronJobAPIRequest.ScannerCronJobTemplateId, cronJobTemplate) + + scanID := cronJobTemplate.ScanTemplateId + scanTemplate := &types.ScanTemplate{} + templates.GetTemplate(api, project, store.Scan, scanID, scanTemplate) + + datasinkTemplate := &types.DatasinkTemplate{} + templates.GetTemplate(api, project, store.Datasink, cronJobTemplate.DatasinkTemplateId, datasinkTemplate) + + analyzeTemplate := &types.AnalyzeTemplate{} + templates.GetTemplate(api, project, store.Analyze, cronJobTemplate.AnalyzeTemplateId, analyzeTemplate) + + anonymizeTemplate := &types.AnonymizeTemplate{} + if cronJobTemplate.AnonymizeTemplateId != "" { + templates.GetTemplate(api, project, store.Anonymize, cronJobTemplate.AnonymizeTemplateId, anonymizeTemplate) + } + trigger = cronJobTemplate.GetTrigger() + name = cronJobTemplate.GetName() + scanRequest = &types.ScanRequest{ + AnalyzeTemplate: analyzeTemplate, + AnonymizeTemplate: anonymizeTemplate, + DatasinkTemplate: datasinkTemplate, + ScanTemplate: scanTemplate, + } + } else if cronJobAPIRequest.ScannerCronJobRequest != nil { + scanRequest = cronJobAPIRequest.ScannerCronJobRequest.GetScanRequest() + trigger = cronJobAPIRequest.ScannerCronJobRequest.GetTrigger() + name = cronJobAPIRequest.ScannerCronJobRequest.GetName() + } else { + return nil, fmt.Errorf("ScannerCronJobTemplateId or ScannerCronJobRequest must be supplied") + } + + return &types.ScannerCronJobRequest{ + Trigger: trigger, + ScanRequest: scanRequest, + Name: name, + }, nil +} diff --git a/presidio-api/cmd/presidio-api/api/stream-job/stream-job.go b/presidio-api/cmd/presidio-api/api/stream-job/stream-job.go new file mode 100644 index 000000000..9d417e505 --- /dev/null +++ b/presidio-api/cmd/presidio-api/api/stream-job/stream-job.go @@ -0,0 +1,89 @@ +package streamjob + +import ( + "context" + "fmt" + + types "github.com/Microsoft/presidio-genproto/golang" + "github.com/Microsoft/presidio/pkg/presidio" + store "github.com/Microsoft/presidio/presidio-api/cmd/presidio-api/api" + "github.com/Microsoft/presidio/presidio-api/cmd/presidio-api/api/templates" +) + +//ScheduleStreamsJob schedule stream job +func ScheduleStreamsJob(ctx context.Context, api *store.API, streamsJobAPIRequest *types.StreamsJobApiRequest, project string) (*types.StreamsJobResponse, error) { + + streamsJobRequest, err := getStreamsJobRequest(api, streamsJobAPIRequest, project) + if err != nil { + return nil, err + } + schedulerResponse, err := invokeStreamsJobScheduler(ctx, api.Services, streamsJobRequest) + if err != nil { + return nil, err + } + + return schedulerResponse, nil +} + +func invokeStreamsJobScheduler(ctx context.Context, services presidio.ServicesAPI, streamsJobRequest *types.StreamsJobRequest) (*types.StreamsJobResponse, error) { + res, err := services.ApplyStream(ctx, streamsJobRequest) + if err != nil { + return nil, err + } + return res, nil +} + +func getStreamsJobRequest(api *store.API, jobAPIRequest *types.StreamsJobApiRequest, project string) (*types.StreamsJobRequest, error) { + streamsJobRequest := &types.StreamsJobRequest{} + + if jobAPIRequest.GetStreamsJobTemplateId() != "" { + jobTemplate := &types.StreamsJobTemplate{} + err := templates.GetTemplate(api, project, store.ScheduleStreamsJob, jobAPIRequest.StreamsJobTemplateId, jobTemplate) + if err != nil { + return nil, err + } + streamID := jobTemplate.GetStreamsTemplateId() + streamTemplate := &types.StreamTemplate{} + err = templates.GetTemplate(api, project, store.Stream, streamID, streamTemplate) + if err != nil { + return nil, err + } + if err != nil { + return nil, err + } + + datasinkTemplate := &types.DatasinkTemplate{} + err = templates.GetTemplate(api, project, store.Datasink, jobTemplate.GetDatasinkTemplateId(), datasinkTemplate) + if err != nil { + return nil, err + } + analyzeTemplate := &types.AnalyzeTemplate{} + err = templates.GetTemplate(api, project, store.Analyze, jobTemplate.GetAnalyzeTemplateId(), analyzeTemplate) + if err != nil { + return nil, err + } + anonymizeTemplate := &types.AnonymizeTemplate{} + if jobTemplate.AnonymizeTemplateId != "" { + err = templates.GetTemplate(api, project, store.Anonymize, jobTemplate.GetAnonymizeTemplateId(), anonymizeTemplate) + if err != nil { + return nil, err + } + } + + streamsJobRequest = &types.StreamsJobRequest{ + Name: streamTemplate.GetName(), + StreamsRequest: &types.StreamRequest{ + AnalyzeTemplate: analyzeTemplate, + AnonymizeTemplate: anonymizeTemplate, + DatasinkTemplate: datasinkTemplate, + StreamConfig: streamTemplate.GetStreamConfig(), + }, + } + } else if jobAPIRequest.GetStreamsJobRequest() != nil { + streamsJobRequest = jobAPIRequest.GetStreamsJobRequest() + } else { + return nil, fmt.Errorf("StreamsJobTemplateId or StreamsRequest must be supplied") + } + + return streamsJobRequest, nil +} diff --git a/presidio-api/cmd/presidio-api/api/templates/templates.go b/presidio-api/cmd/presidio-api/api/templates/templates.go new file mode 100644 index 000000000..c251316ec --- /dev/null +++ b/presidio-api/cmd/presidio-api/api/templates/templates.go @@ -0,0 +1,73 @@ +package templates + +import ( + "fmt" + + types "github.com/Microsoft/presidio-genproto/golang" + "github.com/Microsoft/presidio/pkg/presidio" + store "github.com/Microsoft/presidio/presidio-api/cmd/presidio-api/api" +) + +//GetFieldTypes get all available field types +func GetFieldTypes() []types.FieldTypes { + var fieldTypeArray []types.FieldTypes + for key := range types.FieldTypesEnum_value { + fieldTypeArray = append(fieldTypeArray, types.FieldTypes{Name: key}) + } + return fieldTypeArray +} + +//GetActionTemplate get template for different actions +func GetActionTemplate(api *store.API, project, action, id string) (string, error) { + return api.Templates.GetTemplate(project, action, id) +} + +//PostActionTemplate insert template for different actions +func PostActionTemplate(api *store.API, project, action, id, value string) (string, error) { + err := api.Templates.InsertTemplate(project, action, id, value) + if err != nil { + return "", err + } + return "Template added successfully ", nil +} + +//PutActionTemplate update template +func PutActionTemplate(api *store.API, project, action, id, value string) (string, error) { + err := api.Templates.UpdateTemplate(project, action, id, value) + if err != nil { + return "", err + } + + return "Template updated successfully", nil +} + +//DeleteActionTemplate delete template +func DeleteActionTemplate(api *store.API, project, action, id string) (string, error) { + err := api.Templates.DeleteTemplate(project, action, id) + if err != nil { + return "", err + } + return "Template deleted successfully", err +} + +//GetTemplate based on id or json(tmpl) +func GetTemplate(api *store.API, project, action, id string, obj interface{}) error { + + if id == "" { + return nil + } + + template, err := api.Templates.GetTemplate(project, action, id) + if err != nil { + return err + } + + err = presidio.ConvertJSONToInterface(template, obj) + if err != nil { + return err + } + if obj == nil { + return fmt.Errorf("%s template is empty", action) + } + return nil +} diff --git a/presidio-api/cmd/presidio-api/main.go b/presidio-api/cmd/presidio-api/main.go index 4afe6ed77..797b05e06 100644 --- a/presidio-api/cmd/presidio-api/main.go +++ b/presidio-api/cmd/presidio-api/main.go @@ -12,17 +12,23 @@ import ( "github.com/Microsoft/presidio/pkg/platform" "github.com/Microsoft/presidio/pkg/platform/kube" "github.com/Microsoft/presidio/pkg/platform/local" + "github.com/Microsoft/presidio/pkg/presidio/services" server "github.com/Microsoft/presidio/pkg/server" + store "github.com/Microsoft/presidio/presidio-api/cmd/presidio-api/api" ) +var api *store.API + func main() { pflag.Int(platform.WebPort, 8080, "HTTP listen port") pflag.String(platform.AnalyzerSvcAddress, "localhost:3000", "Analyzer service address") pflag.String(platform.AnonymizerSvcAddress, "localhost:3001", "Anonymizer service address") - pflag.String(platform.SchedulerSvcAddress, "", "Scheduler service address") - pflag.String(platform.RedisURL, "", "Redis address") + pflag.String(platform.AnonymizerImageSvcAddress, "", "Anonymizer image service address (optional)") + pflag.String(platform.OcrSvcAddress, "", "ocr service address (optional)") + pflag.String(platform.SchedulerSvcAddress, "", "Scheduler service address (optional)") + pflag.String(platform.RedisURL, "", "Redis address (optional)") pflag.String(platform.RedisPassword, "", "Redis db password (optional)") pflag.Int(platform.RedisDb, 0, "Redis db (optional)") pflag.Bool(platform.RedisSSL, false, "Redis ssl (optional)") @@ -38,8 +44,6 @@ func main() { svc := services.New(settings) - var api *API - // Setup Redis cache var cacheStore cache.Cache @@ -50,25 +54,25 @@ func main() { // Kubernetes platform if settings.Namespace != "" { - store, err := kube.New(settings.Namespace, "") + platformStore, err := kube.New(settings.Namespace, "") if err != nil { log.Fatal(err.Error()) } - api = New(store, cacheStore, settings) + api = store.New(platformStore, cacheStore, settings) } else { // Local platform - store, err := local.New(os.TempDir()) + platformStore, err := local.New(os.TempDir()) if err != nil { log.Fatal(err.Error()) } - api = New(store, cacheStore, settings) + api = store.New(platformStore, cacheStore, settings) } - api.setupGRPCServices() - setupHTTPServer(api, settings.WebPort, settings.LogLevel) + api.SetupGRPCServices() + setupHTTPServer(settings.WebPort, settings.LogLevel) } -func setupHTTPServer(api *API, port int, loglevel string) { +func setupHTTPServer(port int, loglevel string) { r := server.Setup(port, loglevel) @@ -86,13 +90,13 @@ func setupHTTPServer(api *API, port int, loglevel string) { action := templates.Group("/:action") { // GET template - action.GET(":id", api.getActionTemplate) + action.GET(":id", getActionTemplate) // POST template - action.POST(":id", api.postActionTemplate) + action.POST(":id", postActionTemplate) // PUT, update template - action.PUT(":id", api.putActionTemplate) + action.PUT(":id", putActionTemplate) // DELETE template - action.DELETE(":id", api.deleteActionTemplate) + action.DELETE(":id", deleteActionTemplate) } } @@ -101,20 +105,23 @@ func setupHTTPServer(api *API, port int, loglevel string) { { // Analyze text // /api/v1/projects/123/analyze - projects.POST("/analyze", api.analyze) + projects.POST("/analyze", analyzeText) // Anonymize text // /api/v1/projects/123/anonymize - projects.POST("/anonymize", api.anonymize) + projects.POST("/anonymize", anonymizeText) + + // Anonymize image + // /api/v1/projects/123/anonymize-image + projects.POST("/anonymize-image", anonymizeImage) // Schedule scanning cron job // /api/v1/projects/123/schedule-scanner-cronjob - projects.POST("/schedule-scanner-cronjob", api.scheduleScannerCronJob) + projects.POST("/schedule-scanner-cronjob", scheduleScannerCronJob) // Schedule streams job // /api/v1/projects/123/schedule-streams-job - projects.POST("/schedule-streams-job", api.scheduleStreamsJob) - + projects.POST("/schedule-streams-job", scheduleStreamJob) } } diff --git a/presidio-api/cmd/presidio-api/methods.go b/presidio-api/cmd/presidio-api/methods.go new file mode 100644 index 000000000..eb878b1ce --- /dev/null +++ b/presidio-api/cmd/presidio-api/methods.go @@ -0,0 +1,240 @@ +package main + +import ( + "fmt" + "mime/multipart" + "net/http" + + "github.com/gin-gonic/gin" + + types "github.com/Microsoft/presidio-genproto/golang" + "github.com/Microsoft/presidio/pkg/presidio" + server "github.com/Microsoft/presidio/pkg/server" + store "github.com/Microsoft/presidio/presidio-api/cmd/presidio-api/api" + "github.com/Microsoft/presidio/presidio-api/cmd/presidio-api/api/analyze" + "github.com/Microsoft/presidio/presidio-api/cmd/presidio-api/api/anonymize" + ai "github.com/Microsoft/presidio/presidio-api/cmd/presidio-api/api/anonymize-image" + scj "github.com/Microsoft/presidio/presidio-api/cmd/presidio-api/api/scanner-cron-job" + sj "github.com/Microsoft/presidio/presidio-api/cmd/presidio-api/api/stream-job" + "github.com/Microsoft/presidio/presidio-api/cmd/presidio-api/api/templates" +) + +func getFieldTypes(c *gin.Context) { + t := templates.GetFieldTypes() + server.WriteResponse(c, http.StatusOK, t) + +} + +func getActionTemplate(c *gin.Context) { + action := c.Param("action") + project := c.Param("project") + id := c.Param("id") + result, err := templates.GetActionTemplate(api, project, action, id) + if err != nil { + server.AbortWithError(c, http.StatusBadRequest, err) + return + + } + server.WriteResponse(c, http.StatusOK, result) +} + +func postActionTemplate(c *gin.Context) { + action := c.Param("action") + project := c.Param("project") + id := c.Param("id") + value, err := validateTemplate(action, c) + if err != nil { + server.AbortWithError(c, http.StatusBadRequest, err) + return + } + result, err := templates.PostActionTemplate(api, project, action, id, value) + if err != nil { + server.AbortWithError(c, http.StatusBadRequest, err) + return + } + server.WriteResponse(c, http.StatusOK, result) +} + +func putActionTemplate(c *gin.Context) { + action := c.Param("action") + project := c.Param("project") + id := c.Param("id") + value, err := validateTemplate(action, c) + if err != nil { + server.AbortWithError(c, http.StatusBadRequest, err) + return + } + result, err := templates.PutActionTemplate(api, project, action, id, value) + if err != nil { + server.AbortWithError(c, http.StatusBadRequest, err) + return + } + server.WriteResponse(c, http.StatusOK, result) +} + +func deleteActionTemplate(c *gin.Context) { + action := c.Param("action") + project := c.Param("project") + id := c.Param("id") + result, err := templates.DeleteActionTemplate(api, project, action, id) + if err != nil { + server.AbortWithError(c, http.StatusBadRequest, err) + return + + } + server.WriteResponse(c, http.StatusOK, result) +} + +func analyzeText(c *gin.Context) { + var analyzeAPIRequest *types.AnalyzeApiRequest + project := c.Param("project") + if c.Bind(&analyzeAPIRequest) == nil { + result, err := analyze.Analyze(c, api, analyzeAPIRequest, project) + if err != nil { + server.AbortWithError(c, http.StatusBadRequest, err) + return + } + server.WriteResponse(c, http.StatusOK, result) + } +} + +func anonymizeText(c *gin.Context) { + var anonymizeAPIRequest *types.AnonymizeApiRequest + project := c.Param("project") + if c.Bind(&anonymizeAPIRequest) == nil { + result, err := anonymize.Anonymize(c, api, anonymizeAPIRequest, project) + if err != nil { + server.AbortWithError(c, http.StatusBadRequest, err) + return + } + server.WriteResponse(c, http.StatusOK, result) + } +} + +func anonymizeImage(c *gin.Context) { + + project := c.Param("project") + + anonymizeImageAPIRequest, err := bindAnonymizeImageParameters(c) + if err != nil { + server.AbortWithError(c, http.StatusBadRequest, err) + return + } + + result, err := ai.AnonymizeImage(c, api, anonymizeImageAPIRequest, project) + if err != nil { + server.AbortWithError(c, http.StatusBadRequest, err) + return + } + c.Data(http.StatusOK, anonymizeImageAPIRequest.ImageType, result) +} + +func bindAnonymizeImageParameters(c *gin.Context) (*types.AnonymizeImageApiRequest, error) { + anonymizeImageAPIRequest := &types.AnonymizeImageApiRequest{} + + anonymizeImageAPIRequest.ImageType, _ = c.GetPostForm("imageType") + + anonymizeImageTemplate, _ := c.GetPostForm("anonymizeImageTemplate") + if anonymizeImageTemplate != "" { + anonymizeImageAPIRequest.AnonymizeImageTemplate = &types.AnonymizeImageTemplate{} + presidio.ConvertJSONToInterface(anonymizeImageTemplate, &anonymizeImageAPIRequest.AnonymizeImageTemplate) + } + + anonymizeImageAPIRequest.AnonymizeImageTemplateId, _ = c.GetPostForm("anonymizeImageTemplateId") + + analyzeTemplate, _ := c.GetPostForm("analyzeTemplate") + if analyzeTemplate != "" { + anonymizeImageAPIRequest.AnalyzeTemplate = &types.AnalyzeTemplate{} + presidio.ConvertJSONToInterface(analyzeTemplate, &anonymizeImageAPIRequest.AnalyzeTemplate) + } + + anonymizeImageAPIRequest.AnalyzeTemplateId, _ = c.GetPostForm("analyzeTemplateId") + + file, err := c.FormFile("file") + if err != nil { + return nil, err + } + anonymizeImageAPIRequest.Data, err = openFile(file) + if err != nil { + return nil, err + } + return anonymizeImageAPIRequest, nil +} + +func scheduleScannerCronJob(c *gin.Context) { + var scannerCronJobAPIRequest *types.ScannerCronJobApiRequest + project := c.Param("project") + if c.Bind(&scannerCronJobAPIRequest) == nil { + result, err := scj.ScheduleScannerCronJob(c, api, scannerCronJobAPIRequest, project) + if err != nil { + server.AbortWithError(c, http.StatusBadRequest, err) + return + } + server.WriteResponse(c, http.StatusOK, result) + } +} + +func scheduleStreamJob(c *gin.Context) { + var streamsJobAPIRequest *types.StreamsJobApiRequest + project := c.Param("project") + if c.Bind(&streamsJobAPIRequest) == nil { + result, err := sj.ScheduleStreamsJob(c, api, streamsJobAPIRequest, project) + if err != nil { + server.AbortWithError(c, http.StatusBadRequest, err) + return + } + server.WriteResponse(c, http.StatusOK, result) + } +} + +func validateTemplate(action string, c *gin.Context) (string, error) { + switch action { + case store.Analyze: + var analyzerTemplate types.AnalyzeTemplate + return bindAndConvert(analyzerTemplate, c) + case store.Anonymize: + var anonymizeTemplate types.AnonymizeTemplate + return bindAndConvert(anonymizeTemplate, c) + case store.AnonymizeImage: + var anonymizeImageTemplate types.AnonymizeImageTemplate + return bindAndConvert(anonymizeImageTemplate, c) + case store.Scan: + var scanTemplate types.ScanTemplate + return bindAndConvert(scanTemplate, c) + case store.Datasink: + var datasinkTemplate types.DatasinkTemplate + return bindAndConvert(datasinkTemplate, c) + case store.ScheduleScannerCronJob: + var scannerCronjobTemplate types.ScannerCronJobTemplate + return bindAndConvert(scannerCronjobTemplate, c) + case store.ScheduleStreamsJob: + var streamsJobTemplate types.StreamsJobTemplate + return bindAndConvert(streamsJobTemplate, c) + } + + return "", fmt.Errorf("No template found") +} + +func bindAndConvert(template interface{}, c *gin.Context) (string, error) { + if c.BindJSON(&template) == nil { + return presidio.ConvertInterfaceToJSON(template) + } + return "", fmt.Errorf("No template found") +} + +func openFile(header *multipart.FileHeader) ([]byte, error) { + + file, err := header.Open() + if err != nil { + return nil, err + } + + defer file.Close() + bt := make([]byte, header.Size) + _, err = file.Read(bt) + if err != nil { + return nil, err + } + + return bt, nil +} diff --git a/presidio-api/cmd/presidio-api/templates.go b/presidio-api/cmd/presidio-api/templates.go deleted file mode 100644 index 7582d94f8..000000000 --- a/presidio-api/cmd/presidio-api/templates.go +++ /dev/null @@ -1,112 +0,0 @@ -package main - -import ( - "fmt" - "net/http" - - "github.com/gin-gonic/gin" - - types "github.com/Microsoft/presidio-genproto/golang" - "github.com/Microsoft/presidio/pkg/presidio" - server "github.com/Microsoft/presidio/pkg/server" -) - -func getFieldTypes(c *gin.Context) { - var fieldTypeArray []types.FieldTypes - for key := range types.FieldTypesEnum_value { - fieldTypeArray = append(fieldTypeArray, types.FieldTypes{Name: key}) - } - server.WriteResponse(c, http.StatusOK, fieldTypeArray) -} - -func (api *API) getActionTemplate(c *gin.Context) { - action := c.Param("action") - project := c.Param("project") - id := c.Param("id") - result, err := api.Templates.GetTemplate(project, action, id) - if err != nil { - server.AbortWithError(c, http.StatusBadRequest, err) - return - } - server.WriteResponse(c, http.StatusOK, result) -} - -func (api *API) postActionTemplate(c *gin.Context) { - action := c.Param("action") - project := c.Param("project") - id := c.Param("id") - value, err := validateTemplate(action, c) - if err != nil { - server.AbortWithError(c, http.StatusBadRequest, err) - return - } - err = api.Templates.InsertTemplate(project, action, id, value) - if err != nil { - server.AbortWithError(c, http.StatusBadRequest, err) - return - } - server.WriteResponse(c, http.StatusCreated, "Template added successfully ") -} - -func (api *API) putActionTemplate(c *gin.Context) { - action := c.Param("action") - project := c.Param("project") - id := c.Param("id") - value, err := validateTemplate(action, c) - if err != nil { - server.AbortWithError(c, http.StatusBadRequest, err) - return - } - err = api.Templates.UpdateTemplate(project, action, id, value) - if err != nil { - server.AbortWithError(c, http.StatusBadRequest, err) - return - } - - server.WriteResponse(c, http.StatusOK, "Template updated successfully") -} - -func (api *API) deleteActionTemplate(c *gin.Context) { - action := c.Param("action") - project := c.Param("project") - id := c.Param("id") - err := api.Templates.DeleteTemplate(project, action, id) - if err != nil { - server.AbortWithError(c, http.StatusBadRequest, err) - return - } - server.WriteResponse(c, http.StatusNoContent, "") -} - -//TODO: Need to better validate templates -func validateTemplate(action string, c *gin.Context) (string, error) { - switch action { - case analyze: - var analyzerTemplate types.AnalyzeTemplate - return bindAndConvert(analyzerTemplate, c) - case anonymize: - var anonymizeTemplate types.AnonymizeTemplate - return bindAndConvert(anonymizeTemplate, c) - case scan: - var scanTemplate types.ScanTemplate - return bindAndConvert(scanTemplate, c) - case datasink: - var datasinkTemplate types.DatasinkTemplate - return bindAndConvert(datasinkTemplate, c) - case scheduleScannerCronJob: - var scannerCronjobTemplate types.ScannerCronJobTemplate - return bindAndConvert(scannerCronjobTemplate, c) - case scheduleStreamsJob: - var streamsJobTemplate types.StreamsJobTemplate - return bindAndConvert(streamsJobTemplate, c) - } - - return "", fmt.Errorf("No template found") -} - -func bindAndConvert(template interface{}, c *gin.Context) (string, error) { - if c.BindJSON(&template) == nil { - return presidio.ConvertInterfaceToJSON(template) - } - return "", fmt.Errorf("No template found") -} diff --git a/presidio-ocr/Dockerfile b/presidio-ocr/Dockerfile new file mode 100644 index 000000000..fc8996b98 --- /dev/null +++ b/presidio-ocr/Dockerfile @@ -0,0 +1,21 @@ +ARG REGISTRY=presidio.azurecr.io + +FROM ${REGISTRY}/presidio-golang-base AS build-env + +ARG NAME=presidio-ocr +ARG PRESIDIOPATH=${GOPATH}/src/github.com/Microsoft/presidio +ARG VERSION=latest + +WORKDIR ${PRESIDIOPATH}/${NAME}/cmd/${NAME} +RUN GOOS=linux GOARCH=amd64 CGO_ENABLED=1 && go build -ldflags '-X github.com/Microsoft/presidio/pkg/version.Version=${VERSION}' -o /usr/bin/${NAME} + +#---------------------------- + +FROM alpine:3.8 + +RUN apk --update add tesseract-ocr + +ARG NAME=presidio-ocr +WORKDIR /usr/bin/ +COPY --from=build-env /usr/bin/${NAME} /usr/bin/ +CMD /usr/bin/presidio-ocr \ No newline at end of file diff --git a/presidio-ocr/cmd/presidio-ocr/main.go b/presidio-ocr/cmd/presidio-ocr/main.go new file mode 100644 index 000000000..e1bebf559 --- /dev/null +++ b/presidio-ocr/cmd/presidio-ocr/main.go @@ -0,0 +1,52 @@ +package main + +import ( + "context" + "flag" + + "google.golang.org/grpc/reflection" + + types "github.com/Microsoft/presidio-genproto/golang" + + "github.com/spf13/pflag" + "github.com/spf13/viper" + + log "github.com/Microsoft/presidio/pkg/logger" + "github.com/Microsoft/presidio/pkg/platform" + "github.com/Microsoft/presidio/pkg/rpc" + "github.com/Microsoft/presidio/presidio-ocr/cmd/presidio-ocr/ocr" +) + +type server struct{} + +func main() { + + pflag.Int(platform.GrpcPort, 3003, "GRPC listen port") + pflag.String("log_level", "info", "Log level - debug/info/warn/error") + + pflag.CommandLine.AddGoFlagSet(flag.CommandLine) + pflag.Parse() + viper.BindPFlags(pflag.CommandLine) + + settings := platform.GetSettings() + log.CreateLogger(settings.LogLevel) + + lis, s := rpc.SetupClient(settings.GrpcPort) + + types.RegisterOcrServiceServer(s, &server{}) + reflection.Register(s) + if err := s.Serve(lis); err != nil { + log.Fatal(err.Error()) + } + +} + +func (s *server) Apply(ctx context.Context, r *types.OcrRequest) (*types.OcrResponse, error) { + + res, err := ocr.PerformOCR(r.Image) + if err != nil { + log.Error(err.Error()) + } + + return &types.OcrResponse{Image: res}, err +} diff --git a/presidio-ocr/cmd/presidio-ocr/ocr/ocr.go b/presidio-ocr/cmd/presidio-ocr/ocr/ocr.go new file mode 100644 index 000000000..9d39c513b --- /dev/null +++ b/presidio-ocr/cmd/presidio-ocr/ocr/ocr.go @@ -0,0 +1,119 @@ +package ocr + +import ( + "encoding/xml" + "strconv" + "strings" + + "github.com/otiai10/gosseract" + + types "github.com/Microsoft/presidio-genproto/golang" + log "github.com/Microsoft/presidio/pkg/logger" +) + +//HOCR document +type document struct { + Elements []element `xml:"div>p>span>span"` +} + +//element in hocr xml +type element struct { + Lang string `xml:"lang,attr"` + Direction string `xml:"dir,attr"` + Title string `xml:"title,attr"` + ID string `xml:"id,attr"` + Class string `xml:"class,attr"` + Content string `xml:",innerxml"` +} + +//PerformOCR and return hocr output +func PerformOCR(image *types.Image) (*types.Image, error) { + + // Setup tesseract client + client := gosseract.NewClient() + defer client.Close() + + err := client.SetImageFromBytes(image.Data) + if err != nil { + return nil, err + } + + out, err := client.HOCRText() + if err != nil { + return nil, err + } + + log.Debug(out) + + doc, err := convertHOcrToElements(out) + if err != nil { + return nil, err + } + + ocrImage := convertElementsToLocations(doc) + + return ocrImage, nil +} + +func convertHOcrToElements(hocr string) (document, error) { + var doc document + header := `` + "\n" + err := xml.Unmarshal(([]byte)(header+hocr), &doc) + if err != nil { + return document{}, err + } + return doc, nil +} + +func convertElementsToLocations(doc document) *types.Image { + + currentLocation := 0 + + image := &types.Image{ + Boundingboxes: make([]*types.Boundingbox, len(doc.Elements)), + } + + for i, element := range doc.Elements { + content := element.Content + content = strings.Replace(content, "", "", -1) + content = strings.Replace(content, "", "", -1) + + image.Text += content + " " + length := len([]rune(content)) + bBox := strings.Split(element.Title, " ") + + xlocation, err := strconv.ParseFloat(bBox[1], 32) + if err != nil { + log.Warn("Error parsing xlocation:", err.Error()) + } + + width, err := strconv.ParseFloat(bBox[3], 32) + if err != nil { + log.Warn("Error parsing width:", err.Error()) + } + + ylocation, err := strconv.ParseFloat(bBox[2], 32) + if err != nil { + log.Warn("Error parsing ylocation:", err.Error()) + } + + height, err := strconv.ParseFloat(strings.TrimRight(bBox[4], ";"), 32) + if err != nil { + log.Warn("Error parsing height:", err.Error()) + } + + image.Boundingboxes[i] = &types.Boundingbox{ + XLocation: (float32)(xlocation), + Width: (float32)(width), + YLocation: (float32)(ylocation), + Height: (float32)(height), + Text: content, + StartPosition: (int32)(currentLocation), + EndPosition: (int32)(currentLocation + length), + } + currentLocation += length + 1 + } + + return image + +} diff --git a/tests/functional_test.go b/tests/functional_http_test.go similarity index 51% rename from tests/functional_test.go rename to tests/functional_http_test.go index dc36bf9cb..86a1e3311 100644 --- a/tests/functional_test.go +++ b/tests/functional_http_test.go @@ -4,9 +4,13 @@ package tests import ( "bytes" + "io" "io/ioutil" + "mime/multipart" "net/http" "net/url" + "os" + "strings" "testing" "github.com/stretchr/testify/assert" @@ -42,6 +46,49 @@ func invokeHTTPRequest(t *testing.T, path string, method string, payload []byte) return string(bodyBytes) } +func invokeHTTPUpload(t *testing.T, path string, values map[string]io.Reader) []byte { + + var b bytes.Buffer + w := multipart.NewWriter(&b) + for key, r := range values { + var fw io.Writer + var err error + if x, ok := r.(io.Closer); ok { + defer x.Close() + } + // Add an image file + if x, ok := r.(*os.File); ok { + fw, err = w.CreateFormFile(key, x.Name()) + assert.NoError(t, err) + + } else { + // Add other fields + fw, err = w.CreateFormField(key) + assert.NoError(t, err) + + } + _, err = io.Copy(fw, r) + assert.NoError(t, err) + + } + + w.Close() + + req, err := http.NewRequest("POST", "http://localhost:8080"+path, &b) + assert.NoError(t, err) + req.Header.Set("Content-Type", w.FormDataContentType()) + + res, err := http.DefaultClient.Do(req) + assert.NoError(t, err) + + if res.StatusCode != http.StatusOK { + t.Errorf("bad status: %s", res.Status) + } + defer res.Body.Close() + bodyBytes, _ := ioutil.ReadAll(res.Body) + return bodyBytes +} + func TestAddTemplate(t *testing.T) { payload := generatePayload("analyze-template.json") invokeHTTPRequest(t, "/api/v1/templates/test/analyze/test", "POST", payload) @@ -66,3 +113,24 @@ func TestAnonymizer(t *testing.T) { payload := generatePayload("anonymize-request.json") invokeHTTPRequest(t, "/api/v1/projects/test/anonymize", "POST", payload) } + +func TestImageAnonymizer(t *testing.T) { + + file, err := os.Open("./testdata/ocr-test.png") + assert.NoError(t, err) + + payload := map[string]io.Reader{ + "file": file, + "analyzeTemplate": strings.NewReader((string)(generatePayload("analyze-image-template.json"))), + "anonymizeImageTemplate": strings.NewReader((string)(generatePayload("anonymize-image-template.json"))), + "imageType": strings.NewReader("image/png"), + "detectionType": strings.NewReader("OCR"), + } + + result := invokeHTTPUpload(t, "/api/v1/projects/test/anonymize-image", payload) + savedOutputImage, err := ioutil.ReadFile("./testdata/ocr-result.png") + assert.NoError(t, err) + + assert.Equal(t, len(savedOutputImage), len(result)) + +} diff --git a/tests/integration_storage_scanner_test.go b/tests/functional_storage_scanner_test.go similarity index 100% rename from tests/integration_storage_scanner_test.go rename to tests/functional_storage_scanner_test.go diff --git a/tests/integration_anonymize_image_test.go b/tests/integration_anonymize_image_test.go new file mode 100644 index 000000000..1a468680d --- /dev/null +++ b/tests/integration_anonymize_image_test.go @@ -0,0 +1,82 @@ +// +build integration + +package tests + +import ( + "encoding/json" + "io/ioutil" + + "github.com/stretchr/testify/assert" + + types "github.com/Microsoft/presidio-genproto/golang" + "github.com/Microsoft/presidio/presidio-anonymizer-image/cmd/presidio-anonymizer-image/anonymizer" + + "testing" +) + +func TestAnonymizeImage(t *testing.T) { + + content, err := ioutil.ReadFile("./testdata/ocr-test.png") + assert.NoError(t, err) + + jcontent, err := ioutil.ReadFile("./testdata/ocr-result.json") + assert.NoError(t, err) + + image := &types.Image{ + Data: content, + ImageType: "image/png", + } + json.Unmarshal(jcontent, image) + + results := []*types.AnalyzeResult{ + { + Location: &types.Location{ + Start: 27, End: 40, + }, + }, + { + Location: &types.Location{ + Start: 294, End: 311, + }, + }, + { + Location: &types.Location{ + Start: 323, End: 337, + }, + }, + { + Location: &types.Location{ + Start: 749, End: 771, + }, + }, + { + Location: &types.Location{ + Start: 758, End: 771, + }, + }, + } + + template := &types.AnonymizeImageTemplate{ + FieldTypeGraphics: []*types.FieldTypeGraphic{ + { + Graphic: &types.Graphic{ + FillColorValue: &types.FillColorValue{ + Red: 0, + Green: 0, + Blue: 0, + }, + }, + }, + }, + } + + result, err := anonymizer.AnonymizeImage(image, types.DetectionTypeEnum_OCR, results, template) + assert.NoError(t, err) + assert.NotNil(t, result) + + savedOutputImage, err := ioutil.ReadFile("./testdata/ocr-result.png") + assert.NoError(t, err) + + assert.Equal(t, len(savedOutputImage), len(result.Data)) + +} diff --git a/tests/integration_kafka_test.go b/tests/integration_kafka_test.go index f485efdc8..8866121dd 100644 --- a/tests/integration_kafka_test.go +++ b/tests/integration_kafka_test.go @@ -1,4 +1,4 @@ -// +build functional +// +build integration package tests diff --git a/tests/integration_redis_test.go b/tests/integration_redis_test.go index 7c82dccc5..f33533cad 100644 --- a/tests/integration_redis_test.go +++ b/tests/integration_redis_test.go @@ -1,4 +1,4 @@ -// +build functional +// +build integration package tests diff --git a/tests/integration_tesseract_test.go b/tests/integration_tesseract_test.go new file mode 100644 index 000000000..bc0118d0c --- /dev/null +++ b/tests/integration_tesseract_test.go @@ -0,0 +1,35 @@ +// +build integration + +package tests + +import ( + "io/ioutil" + "testing" + + "encoding/json" + + "github.com/stretchr/testify/assert" + + types "github.com/Microsoft/presidio-genproto/golang" + "github.com/Microsoft/presidio/presidio-ocr/cmd/presidio-ocr/ocr" +) + +func TestOCR(t *testing.T) { + + content, err := ioutil.ReadFile("./testdata/ocr-test.png") + assert.NoError(t, err) + + image := &types.Image{ + Data: content, + } + result, err := ocr.PerformOCR(image) + + assert.NoError(t, err) + assert.NotEqual(t, "", result.Text) + + savedJSONResult, err := ioutil.ReadFile("./testdata/ocr-result.json") + assert.NoError(t, err) + jsonResult, _ := json.Marshal(result) + assert.Equal(t, savedJSONResult, jsonResult) + +} diff --git a/tests/testdata/analyze-image-template.json b/tests/testdata/analyze-image-template.json new file mode 100644 index 000000000..f348d5c30 --- /dev/null +++ b/tests/testdata/analyze-image-template.json @@ -0,0 +1,15 @@ +{ + "fields": [{ + "name": "PHONE_NUMBER" + }, + { + "name": "DOMAIN_NAME" + }, + { + "name": "EMAIL_ADDRESS" + }, + { + "name": "PERSON" + } + ] +} diff --git a/tests/testdata/anonymize-image-template.json b/tests/testdata/anonymize-image-template.json new file mode 100644 index 000000000..019108247 --- /dev/null +++ b/tests/testdata/anonymize-image-template.json @@ -0,0 +1,11 @@ +{ + "fieldTypeGraphics": [{ + "graphic": { + "fillColorValue": { + "blue": 0, + "red": 0, + "green": 0 + } + } + }] +} diff --git a/tests/testdata/ocr-result.json b/tests/testdata/ocr-result.json new file mode 100755 index 000000000..7b8d200a1 --- /dev/null +++ b/tests/testdata/ocr-result.json @@ -0,0 +1 @@ +{"Boundingboxes":[{"xLocation":27,"width":99,"yLocation":20,"height":51,"text":"This","endPosition":4},{"xLocation":112,"width":229,"yLocation":22,"height":59,"text":"project","startPosition":5,"endPosition":12},{"xLocation":242,"width":267,"yLocation":22,"height":51,"text":"is","startPosition":13,"endPosition":15},{"xLocation":280,"width":407,"yLocation":20,"height":51,"text":"created","startPosition":16,"endPosition":23},{"xLocation":421,"width":459,"yLocation":20,"height":59,"text":"by","startPosition":24,"endPosition":26},{"xLocation":472,"width":563,"yLocation":20,"height":51,"text":"David","startPosition":27,"endPosition":32},{"xLocation":576,"width":723,"yLocation":20,"height":51,"text":"Johnson","startPosition":33,"endPosition":40},{"xLocation":737,"width":798,"yLocation":20,"height":51,"text":"and","startPosition":41,"endPosition":44},{"xLocation":811,"width":985,"yLocation":20,"height":51,"text":"welcomes","startPosition":45,"endPosition":53},{"xLocation":27,"width":253,"yLocation":76,"height":107,"text":"contributions","startPosition":54,"endPosition":67},{"xLocation":267,"width":327,"yLocation":76,"height":107,"text":"and","startPosition":68,"endPosition":71},{"xLocation":341,"width":559,"yLocation":78,"height":114,"text":"suggestions.","startPosition":72,"endPosition":84},{"xLocation":574,"width":659,"yLocation":78,"height":107,"text":"Most","startPosition":85,"endPosition":89},{"xLocation":671,"width":897,"yLocation":76,"height":107,"text":"contributions","startPosition":90,"endPosition":103},{"xLocation":911,"width":1028,"yLocation":78,"height":114,"text":"require","startPosition":104,"endPosition":111},{"xLocation":26,"width":86,"yLocation":141,"height":171,"text":"you","startPosition":112,"endPosition":115},{"xLocation":97,"width":131,"yLocation":136,"height":163,"text":"to","startPosition":116,"endPosition":118},{"xLocation":144,"width":238,"yLocation":141,"height":170,"text":"agree","startPosition":119,"endPosition":124},{"xLocation":249,"width":282,"yLocation":136,"height":163,"text":"to","startPosition":125,"endPosition":127},{"xLocation":295,"width":312,"yLocation":141,"height":163,"text":"a","startPosition":128,"endPosition":129},{"xLocation":326,"width":519,"yLocation":132,"height":163,"text":"Contributor","startPosition":130,"endPosition":141},{"xLocation":532,"width":662,"yLocation":134,"height":163,"text":"License","startPosition":142,"endPosition":149},{"xLocation":673,"width":862,"yLocation":134,"height":170,"text":"Agreement","startPosition":150,"endPosition":159},{"xLocation":875,"width":970,"yLocation":131,"height":171,"text":"(CLA)","startPosition":160,"endPosition":165},{"xLocation":28,"width":183,"yLocation":188,"height":226,"text":"declaring","startPosition":166,"endPosition":175},{"xLocation":195,"width":262,"yLocation":188,"height":219,"text":"that","startPosition":176,"endPosition":180},{"xLocation":274,"width":333,"yLocation":197,"height":227,"text":"you","startPosition":181,"endPosition":184},{"xLocation":347,"width":405,"yLocation":188,"height":219,"text":"have","startPosition":185,"endPosition":189},{"xLocation":408,"width":490,"yLocation":188,"height":219,"text":"the","startPosition":190,"endPosition":193},{"xLocation":503,"width":578,"yLocation":188,"height":226,"text":"right","startPosition":194,"endPosition":199},{"xLocation":589,"width":630,"yLocation":192,"height":224,"text":"to,","startPosition":200,"endPosition":203},{"xLocation":643,"width":703,"yLocation":188,"height":219,"text":"and","startPosition":204,"endPosition":207},{"xLocation":717,"width":849,"yLocation":188,"height":227,"text":"actually","startPosition":208,"endPosition":216},{"xLocation":861,"width":910,"yLocation":188,"height":224,"text":"do,","startPosition":217,"endPosition":220},{"xLocation":923,"width":1010,"yLocation":192,"height":226,"text":"grant","startPosition":221,"endPosition":226},{"xLocation":28,"width":66,"yLocation":252,"height":274,"text":"us","startPosition":227,"endPosition":229},{"xLocation":77,"width":131,"yLocation":243,"height":274,"text":"the","startPosition":230,"endPosition":233},{"xLocation":144,"width":239,"yLocation":243,"height":281,"text":"rights","startPosition":234,"endPosition":240},{"xLocation":250,"width":283,"yLocation":247,"height":274,"text":"to","startPosition":241,"endPosition":243},{"xLocation":297,"width":356,"yLocation":252,"height":274,"text":"use","startPosition":244,"endPosition":247},{"xLocation":367,"width":441,"yLocation":252,"height":282,"text":"your","startPosition":248,"endPosition":252},{"xLocation":453,"width":668,"yLocation":243,"height":274,"text":"contribution.","startPosition":253,"endPosition":266},{"xLocation":683,"width":736,"yLocation":245,"height":274,"text":"For","startPosition":267,"endPosition":270},{"xLocation":748,"width":868,"yLocation":243,"height":279,"text":"details,","startPosition":271,"endPosition":279},{"xLocation":880,"width":949,"yLocation":245,"height":274,"text":"visit","startPosition":280,"endPosition":285},{"xLocation":28,"width":466,"yLocation":299,"height":337,"text":"https://cla.microsoft.com","startPosition":286,"endPosition":311},{"xLocation":479,"width":513,"yLocation":308,"height":330,"text":"or","startPosition":312,"endPosition":314},{"xLocation":525,"width":653,"yLocation":303,"height":330,"text":"contact","startPosition":315,"endPosition":322},{"xLocation":666,"width":754,"yLocation":298,"height":338,"text":"(212)","startPosition":323,"endPosition":328},{"xLocation":769,"width":938,"yLocation":301,"height":330,"text":"555-1234.","startPosition":329,"endPosition":338},{"xLocation":27,"width":102,"yLocation":355,"height":386,"text":"When","startPosition":339,"endPosition":343},{"xLocation":105,"width":193,"yLocation":364,"height":394,"text":"you","startPosition":344,"endPosition":347},{"xLocation":207,"width":324,"yLocation":355,"height":386,"text":"submit","startPosition":348,"endPosition":354},{"xLocation":337,"width":354,"yLocation":364,"height":386,"text":"a","startPosition":355,"endPosition":356},{"xLocation":368,"width":426,"yLocation":355,"height":393,"text":"pull","startPosition":357,"endPosition":361},{"xLocation":440,"width":575,"yLocation":359,"height":393,"text":"request,","startPosition":362,"endPosition":370},{"xLocation":588,"width":605,"yLocation":364,"height":386,"text":"a","startPosition":371,"endPosition":372},{"xLocation":618,"width":742,"yLocation":355,"height":386,"text":"CLA-bot","startPosition":373,"endPosition":380},{"xLocation":744,"width":821,"yLocation":355,"height":386,"text":"will","startPosition":381,"endPosition":385},{"xLocation":28,"width":259,"yLocation":410,"height":449,"text":"automatically","startPosition":386,"endPosition":399},{"xLocation":271,"width":444,"yLocation":410,"height":441,"text":"determine","startPosition":400,"endPosition":409},{"xLocation":455,"width":593,"yLocation":410,"height":441,"text":"whether","startPosition":410,"endPosition":417},{"xLocation":604,"width":663,"yLocation":419,"height":449,"text":"you","startPosition":418,"endPosition":421},{"xLocation":677,"width":757,"yLocation":410,"height":441,"text":"need","startPosition":422,"endPosition":426},{"xLocation":769,"width":802,"yLocation":414,"height":441,"text":"to","startPosition":427,"endPosition":429},{"xLocation":816,"width":940,"yLocation":410,"height":448,"text":"provide","startPosition":430,"endPosition":437},{"xLocation":953,"width":970,"yLocation":419,"height":441,"text":"a","startPosition":438,"endPosition":439},{"xLocation":984,"width":1054,"yLocation":412,"height":441,"text":"CLA","startPosition":440,"endPosition":443},{"xLocation":28,"width":88,"yLocation":466,"height":497,"text":"and","startPosition":444,"endPosition":447},{"xLocation":102,"width":252,"yLocation":466,"height":497,"text":"decorate","startPosition":448,"endPosition":456},{"xLocation":262,"width":316,"yLocation":466,"height":497,"text":"the","startPosition":457,"endPosition":460},{"xLocation":29,"width":74,"yLocation":524,"height":552,"text":"PR","startPosition":461,"endPosition":463},{"xLocation":86,"width":311,"yLocation":522,"height":561,"text":"appropriately","startPosition":464,"endPosition":477},{"xLocation":323,"width":404,"yLocation":521,"height":561,"text":"(e.g.,","startPosition":478,"endPosition":484},{"xLocation":417,"width":503,"yLocation":522,"height":558,"text":"label,","startPosition":485,"endPosition":491},{"xLocation":516,"width":702,"yLocation":521,"height":561,"text":"comment).","startPosition":492,"endPosition":501},{"xLocation":715,"width":830,"yLocation":522,"height":561,"text":"Simply","startPosition":502,"endPosition":508},{"xLocation":841,"width":946,"yLocation":522,"height":553,"text":"follow","startPosition":509,"endPosition":515},{"xLocation":956,"width":1010,"yLocation":522,"height":553,"text":"the","startPosition":516,"endPosition":519},{"xLocation":29,"width":229,"yLocation":579,"height":608,"text":"instructions","startPosition":520,"endPosition":532},{"xLocation":243,"width":388,"yLocation":577,"height":615,"text":"provided","startPosition":533,"endPosition":541},{"xLocation":402,"width":439,"yLocation":577,"height":616,"text":"by","startPosition":542,"endPosition":544},{"xLocation":450,"width":504,"yLocation":577,"height":608,"text":"the","startPosition":545,"endPosition":548},{"xLocation":517,"width":578,"yLocation":577,"height":608,"text":"bot.","startPosition":549,"endPosition":553},{"xLocation":591,"width":655,"yLocation":579,"height":608,"text":"You","startPosition":554,"endPosition":557},{"xLocation":667,"width":722,"yLocation":577,"height":607,"text":"will","startPosition":558,"endPosition":562},{"xLocation":735,"width":805,"yLocation":577,"height":616,"text":"only","startPosition":563,"endPosition":567},{"xLocation":818,"width":897,"yLocation":577,"height":608,"text":"need","startPosition":568,"endPosition":572},{"xLocation":910,"width":943,"yLocation":581,"height":608,"text":"to","startPosition":573,"endPosition":575},{"xLocation":956,"width":997,"yLocation":577,"height":608,"text":"do","startPosition":576,"endPosition":578},{"xLocation":26,"width":88,"yLocation":633,"height":664,"text":"this","startPosition":579,"endPosition":583},{"xLocation":101,"width":183,"yLocation":642,"height":664,"text":"once","startPosition":584,"endPosition":588},{"xLocation":196,"width":289,"yLocation":642,"height":664,"text":"across","startPosition":589,"endPosition":595},{"xLocation":292,"width":358,"yLocation":633,"height":664,"text":"all","startPosition":596,"endPosition":599},{"xLocation":373,"width":466,"yLocation":642,"height":671,"text":"repos","startPosition":600,"endPosition":605},{"xLocation":480,"width":570,"yLocation":635,"height":671,"text":"using","startPosition":606,"endPosition":611},{"xLocation":583,"width":638,"yLocation":642,"height":664,"text":"our","startPosition":612,"endPosition":615},{"xLocation":650,"width":734,"yLocation":626,"height":673,"text":"CLA|","startPosition":616,"endPosition":620},{"xLocation":27,"width":99,"yLocation":745,"height":776,"text":"This","startPosition":621,"endPosition":625},{"xLocation":112,"width":229,"yLocation":747,"height":784,"text":"project","startPosition":626,"endPosition":633},{"xLocation":242,"width":301,"yLocation":745,"height":776,"text":"has","startPosition":634,"endPosition":637},{"xLocation":314,"width":452,"yLocation":745,"height":783,"text":"adopted","startPosition":638,"endPosition":645},{"xLocation":464,"width":518,"yLocation":745,"height":776,"text":"the","startPosition":646,"endPosition":649},{"xLocation":532,"width":696,"yLocation":745,"height":776,"text":"Microsoft","startPosition":650,"endPosition":659},{"xLocation":708,"width":795,"yLocation":747,"height":783,"text":"Open","startPosition":660,"endPosition":664},{"xLocation":808,"width":927,"yLocation":747,"height":776,"text":"Source","startPosition":665,"endPosition":671},{"xLocation":939,"width":1027,"yLocation":745,"height":776,"text":"Code","startPosition":672,"endPosition":676},{"xLocation":27,"width":62,"yLocation":800,"height":831,"text":"of","startPosition":677,"endPosition":679},{"xLocation":73,"width":225,"yLocation":800,"height":831,"text":"Conduct.","startPosition":680,"endPosition":688},{"xLocation":240,"width":293,"yLocation":802,"height":831,"text":"For","startPosition":689,"endPosition":692},{"xLocation":306,"width":392,"yLocation":809,"height":831,"text":"more","startPosition":693,"endPosition":697},{"xLocation":406,"width":602,"yLocation":800,"height":831,"text":"information","startPosition":698,"endPosition":709},{"xLocation":616,"width":674,"yLocation":809,"height":831,"text":"see","startPosition":710,"endPosition":713},{"xLocation":685,"width":739,"yLocation":800,"height":831,"text":"the","startPosition":714,"endPosition":717},{"xLocation":751,"width":839,"yLocation":800,"height":831,"text":"Code","startPosition":718,"endPosition":722},{"xLocation":851,"width":886,"yLocation":800,"height":831,"text":"of","startPosition":723,"endPosition":725},{"xLocation":897,"width":1040,"yLocation":800,"height":831,"text":"Conduct","startPosition":726,"endPosition":733},{"xLocation":29,"width":94,"yLocation":858,"height":891,"text":"FAQ","startPosition":734,"endPosition":737},{"xLocation":107,"width":141,"yLocation":865,"height":887,"text":"or","startPosition":738,"endPosition":740},{"xLocation":152,"width":281,"yLocation":860,"height":887,"text":"contact","startPosition":741,"endPosition":748},{"xLocation":27,"width":485,"yLocation":912,"height":951,"text":"opencode@microsoft.com","startPosition":749,"endPosition":771},{"xLocation":498,"width":568,"yLocation":912,"height":943,"text":"with","startPosition":772,"endPosition":776},{"xLocation":582,"width":640,"yLocation":921,"height":951,"text":"any","startPosition":777,"endPosition":780},{"xLocation":652,"width":819,"yLocation":912,"height":943,"text":"additional","startPosition":781,"endPosition":791},{"xLocation":833,"width":1000,"yLocation":914,"height":950,"text":"questions","startPosition":792,"endPosition":801},{"xLocation":1013,"width":1047,"yLocation":921,"height":943,"text":"or","startPosition":802,"endPosition":804},{"xLocation":27,"width":219,"yLocation":971,"height":998,"text":"comments.","startPosition":805,"endPosition":814}],"text":"This project is created by David Johnson and welcomes contributions and suggestions. Most contributions require you to agree to a Contributor License Agreement (CLA) declaring that you have the right to, and actually do, grant us the rights to use your contribution. For details, visit https://cla.microsoft.com or contact (212) 555-1234. When you submit a pull request, a CLA-bot will automatically determine whether you need to provide a CLA and decorate the PR appropriately (e.g., label, comment). Simply follow the instructions provided by the bot. You will only need to do this once across all repos using our CLA| This project has adopted the Microsoft Open Source Code of Conduct. For more information see the Code of Conduct FAQ or contact opencode@microsoft.com with any additional questions or comments. "} \ No newline at end of file diff --git a/tests/testdata/ocr-result.png b/tests/testdata/ocr-result.png new file mode 100644 index 000000000..6181d696f Binary files /dev/null and b/tests/testdata/ocr-result.png differ diff --git a/tests/testdata/ocr-test.png b/tests/testdata/ocr-test.png new file mode 100644 index 000000000..61ae83395 Binary files /dev/null and b/tests/testdata/ocr-test.png differ