From ea95dfda7676951ba63f5474e21ccffd3a720707 Mon Sep 17 00:00:00 2001 From: sebastianMindee <130448732+sebastianMindee@users.noreply.github.com> Date: Thu, 14 Nov 2024 10:21:40 +0100 Subject: [PATCH] :sparkles: add support for business cards, delivery notes, indian passport & update resume (#272) --- .../code_samples/business_card_v1_async.txt | 16 + .../code_samples/delivery_notes_v1_async.txt | 16 + .../code_samples/ind_passport_v1_async.txt | 16 + docs/extras/guide/business_card_v1.md | 167 +++++++++++ docs/extras/guide/delivery_notes_v1.md | 142 +++++++++ docs/extras/guide/financial_document_v1.md | 8 +- docs/extras/guide/ind_passport_v1.md | 280 ++++++++++++++++++ docs/extras/guide/invoices_v4.md | 16 +- docs/extras/guide/resume_v1.md | 33 ++- docs/product/business_card_v1.rst | 15 + docs/product/delivery_note_v1.rst | 15 + docs/product/ind/indian_passport_v1.rst | 15 + mindee/client.py | 16 +- mindee/product/__init__.py | 4 +- mindee/product/business_card/__init__.py | 4 + .../product/business_card/business_card_v1.py | 37 +++ .../business_card_v1_document.py | 105 +++++++ mindee/product/delivery_note/__init__.py | 4 + .../product/delivery_note/delivery_note_v1.py | 37 +++ .../delivery_note_v1_document.py | 74 +++++ .../financial_document_v1_document.py | 2 +- mindee/product/ind/__init__.py | 4 + .../product/ind/indian_passport/__init__.py | 4 + .../ind/indian_passport/indian_passport_v1.py | 37 +++ .../indian_passport_v1_document.py | 186 ++++++++++++ mindee/product/invoice/invoice_v4_document.py | 2 +- mindee/product/resume/resume_v1_document.py | 4 +- .../resume_v1_professional_experience.py | 7 + tests/data | 2 +- tests/product/business_card/__init__.py | 0 .../business_card/test_business_card_v1.py | 56 ++++ tests/product/delivery_note/__init__.py | 0 .../delivery_note/test_delivery_note_v1.py | 52 ++++ tests/product/ind/indian_passport/__init__.py | 0 .../test_indian_passport_v1.py | 66 +++++ 35 files changed, 1401 insertions(+), 41 deletions(-) create mode 100644 docs/extras/code_samples/business_card_v1_async.txt create mode 100644 docs/extras/code_samples/delivery_notes_v1_async.txt create mode 100644 docs/extras/code_samples/ind_passport_v1_async.txt create mode 100644 docs/extras/guide/business_card_v1.md create mode 100644 docs/extras/guide/delivery_notes_v1.md create mode 100644 docs/extras/guide/ind_passport_v1.md create mode 100644 docs/product/business_card_v1.rst create mode 100644 docs/product/delivery_note_v1.rst create mode 100644 docs/product/ind/indian_passport_v1.rst create mode 100644 mindee/product/business_card/__init__.py create mode 100644 mindee/product/business_card/business_card_v1.py create mode 100644 mindee/product/business_card/business_card_v1_document.py create mode 100644 mindee/product/delivery_note/__init__.py create mode 100644 mindee/product/delivery_note/delivery_note_v1.py create mode 100644 mindee/product/delivery_note/delivery_note_v1_document.py create mode 100644 mindee/product/ind/__init__.py create mode 100644 mindee/product/ind/indian_passport/__init__.py create mode 100644 mindee/product/ind/indian_passport/indian_passport_v1.py create mode 100644 mindee/product/ind/indian_passport/indian_passport_v1_document.py create mode 100644 tests/product/business_card/__init__.py create mode 100644 tests/product/business_card/test_business_card_v1.py create mode 100644 tests/product/delivery_note/__init__.py create mode 100644 tests/product/delivery_note/test_delivery_note_v1.py create mode 100644 tests/product/ind/indian_passport/__init__.py create mode 100644 tests/product/ind/indian_passport/test_indian_passport_v1.py diff --git a/docs/extras/code_samples/business_card_v1_async.txt b/docs/extras/code_samples/business_card_v1_async.txt new file mode 100644 index 00000000..43882dc6 --- /dev/null +++ b/docs/extras/code_samples/business_card_v1_async.txt @@ -0,0 +1,16 @@ +from mindee import Client, product, AsyncPredictResponse + +# Init a new client +mindee_client = Client(api_key="my-api-key") + +# Load a file from disk +input_doc = mindee_client.source_from_path("/path/to/the/file.ext") + +# Load a file from disk and enqueue it. +result: AsyncPredictResponse = mindee_client.enqueue_and_parse( + product.BusinessCardV1, + input_doc, +) + +# Print a brief summary of the parsed data +print(result.document) diff --git a/docs/extras/code_samples/delivery_notes_v1_async.txt b/docs/extras/code_samples/delivery_notes_v1_async.txt new file mode 100644 index 00000000..def25556 --- /dev/null +++ b/docs/extras/code_samples/delivery_notes_v1_async.txt @@ -0,0 +1,16 @@ +from mindee import Client, product, AsyncPredictResponse + +# Init a new client +mindee_client = Client(api_key="my-api-key") + +# Load a file from disk +input_doc = mindee_client.source_from_path("/path/to/the/file.ext") + +# Load a file from disk and enqueue it. +result: AsyncPredictResponse = mindee_client.enqueue_and_parse( + product.DeliveryNoteV1, + input_doc, +) + +# Print a brief summary of the parsed data +print(result.document) diff --git a/docs/extras/code_samples/ind_passport_v1_async.txt b/docs/extras/code_samples/ind_passport_v1_async.txt new file mode 100644 index 00000000..c7ce4e15 --- /dev/null +++ b/docs/extras/code_samples/ind_passport_v1_async.txt @@ -0,0 +1,16 @@ +from mindee import Client, product, AsyncPredictResponse + +# Init a new client +mindee_client = Client(api_key="my-api-key") + +# Load a file from disk +input_doc = mindee_client.source_from_path("/path/to/the/file.ext") + +# Load a file from disk and enqueue it. +result: AsyncPredictResponse = mindee_client.enqueue_and_parse( + product.ind.IndianPassportV1, + input_doc, +) + +# Print a brief summary of the parsed data +print(result.document) diff --git a/docs/extras/guide/business_card_v1.md b/docs/extras/guide/business_card_v1.md new file mode 100644 index 00000000..0c7d1e5a --- /dev/null +++ b/docs/extras/guide/business_card_v1.md @@ -0,0 +1,167 @@ +--- +title: Business Card OCR Python +category: 622b805aaec68102ea7fcbc2 +slug: python-business-card-ocr +parentDoc: 609808f773b0b90051d839de +--- +The Python OCR SDK supports the [Business Card API](https://platform.mindee.com/mindee/business_card). + +Using the [sample below](https://github.com/mindee/client-lib-test-data/blob/main/products/business_card/default_sample.jpg), we are going to illustrate how to extract the data that we want using the OCR SDK. +![Business Card sample](https://github.com/mindee/client-lib-test-data/blob/main/products/business_card/default_sample.jpg?raw=true) + +# Quick-Start +```py +from mindee import Client, product, AsyncPredictResponse + +# Init a new client +mindee_client = Client(api_key="my-api-key") + +# Load a file from disk +input_doc = mindee_client.source_from_path("/path/to/the/file.ext") + +# Load a file from disk and enqueue it. +result: AsyncPredictResponse = mindee_client.enqueue_and_parse( + product.BusinessCardV1, + input_doc, +) + +# Print a brief summary of the parsed data +print(result.document) + +``` + +**Output (RST):** +```rst +######## +Document +######## +:Mindee ID: 6f9a261f-7609-4687-9af0-46a45156566e +:Filename: default_sample.jpg + +Inference +######### +:Product: mindee/business_card v1.0 +:Rotation applied: Yes + +Prediction +========== +:Firstname: Andrew +:Lastname: Morin +:Job Title: Founder & CEO +:Company: RemoteGlobal +:Email: amorin@remoteglobalconsulting.com +:Phone Number: +14015555555 +:Mobile Number: +13015555555 +:Fax Number: +14015555556 +:Address: 178 Main Avenue, Providence, RI 02111 +:Website: www.remoteglobalconsulting.com +:Social Media: https://www.linkedin.com/in/johndoe + https://twitter.com/johndoe +``` + +# Field Types +## Standard Fields +These fields are generic and used in several products. + +### BaseField +Each prediction object contains a set of fields that inherit from the generic `BaseField` class. +A typical `BaseField` object will have the following attributes: + +* **value** (`Union[float, str]`): corresponds to the field value. Can be `None` if no value was extracted. +* **confidence** (`float`): the confidence score of the field prediction. +* **bounding_box** (`[Point, Point, Point, Point]`): contains exactly 4 relative vertices (points) coordinates of a right rectangle containing the field in the document. +* **polygon** (`List[Point]`): contains the relative vertices coordinates (`Point`) of a polygon containing the field in the image. +* **page_id** (`int`): the ID of the page, always `None` when at document-level. +* **reconstructed** (`bool`): indicates whether an object was reconstructed (not extracted as the API gave it). + +> **Note:** A `Point` simply refers to a List of two numbers (`[float, float]`). + + +Aside from the previous attributes, all basic fields have access to a custom `__str__` method that can be used to print their value as a string. + +### StringField +The text field `StringField` only has one constraint: its **value** is an `Optional[str]`. + +# Attributes +The following fields are extracted for Business Card V1: + +## Address +**address** ([StringField](#stringfield)): The address of the person. + +```py +print(result.document.inference.prediction.address.value) +``` + +## Company +**company** ([StringField](#stringfield)): The company the person works for. + +```py +print(result.document.inference.prediction.company.value) +``` + +## Email +**email** ([StringField](#stringfield)): The email address of the person. + +```py +print(result.document.inference.prediction.email.value) +``` + +## Fax Number +**fax_number** ([StringField](#stringfield)): The Fax number of the person. + +```py +print(result.document.inference.prediction.fax_number.value) +``` + +## Firstname +**firstname** ([StringField](#stringfield)): The given name of the person. + +```py +print(result.document.inference.prediction.firstname.value) +``` + +## Job Title +**job_title** ([StringField](#stringfield)): The job title of the person. + +```py +print(result.document.inference.prediction.job_title.value) +``` + +## Lastname +**lastname** ([StringField](#stringfield)): The lastname of the person. + +```py +print(result.document.inference.prediction.lastname.value) +``` + +## Mobile Number +**mobile_number** ([StringField](#stringfield)): The mobile number of the person. + +```py +print(result.document.inference.prediction.mobile_number.value) +``` + +## Phone Number +**phone_number** ([StringField](#stringfield)): The phone number of the person. + +```py +print(result.document.inference.prediction.phone_number.value) +``` + +## Social Media +**social_media** (List[[StringField](#stringfield)]): The social media profiles of the person or company. + +```py +for social_media_elem in result.document.inference.prediction.social_media: + print(social_media_elem.value) +``` + +## Website +**website** ([StringField](#stringfield)): The website of the person or company. + +```py +print(result.document.inference.prediction.website.value) +``` + +# Questions? +[Join our Slack](https://join.slack.com/t/mindee-community/shared_invite/zt-2d0ds7dtz-DPAF81ZqTy20chsYpQBW5g) diff --git a/docs/extras/guide/delivery_notes_v1.md b/docs/extras/guide/delivery_notes_v1.md new file mode 100644 index 00000000..db65e431 --- /dev/null +++ b/docs/extras/guide/delivery_notes_v1.md @@ -0,0 +1,142 @@ +--- +title: Delivery note OCR Python +category: 622b805aaec68102ea7fcbc2 +slug: python-delivery-note-ocr +parentDoc: 609808f773b0b90051d839de +--- +The Python OCR SDK supports the [Delivery note API](https://platform.mindee.com/mindee/delivery_notes). + +Using the [sample below](https://github.com/mindee/client-lib-test-data/blob/main/products/delivery_notes/default_sample.jpg), we are going to illustrate how to extract the data that we want using the OCR SDK. +![Delivery note sample](https://github.com/mindee/client-lib-test-data/blob/main/products/delivery_notes/default_sample.jpg?raw=true) + +# Quick-Start +```py +from mindee import Client, product, AsyncPredictResponse + +# Init a new client +mindee_client = Client(api_key="my-api-key") + +# Load a file from disk +input_doc = mindee_client.source_from_path("/path/to/the/file.ext") + +# Load a file from disk and enqueue it. +result: AsyncPredictResponse = mindee_client.enqueue_and_parse( + product.DeliveryNoteV1, + input_doc, +) + +# Print a brief summary of the parsed data +print(result.document) + +``` + +**Output (RST):** +```rst +######## +Document +######## +:Mindee ID: d5ead821-edec-4d31-a69a-cf3998d9a506 +:Filename: default_sample.jpg + +Inference +######### +:Product: mindee/delivery_notes v1.0 +:Rotation applied: Yes + +Prediction +========== +:Delivery Date: 2019-10-02 +:Delivery Number: INT-001 +:Supplier Name: John Smith +:Supplier Address: 4490 Oak Drive, Albany, NY 12210 +:Customer Name: Jessie M Horne +:Customer Address: 4312 Wood Road, New York, NY 10031 +:Total Amount: 204.75 +``` + +# Field Types +## Standard Fields +These fields are generic and used in several products. + +### BaseField +Each prediction object contains a set of fields that inherit from the generic `BaseField` class. +A typical `BaseField` object will have the following attributes: + +* **value** (`Union[float, str]`): corresponds to the field value. Can be `None` if no value was extracted. +* **confidence** (`float`): the confidence score of the field prediction. +* **bounding_box** (`[Point, Point, Point, Point]`): contains exactly 4 relative vertices (points) coordinates of a right rectangle containing the field in the document. +* **polygon** (`List[Point]`): contains the relative vertices coordinates (`Point`) of a polygon containing the field in the image. +* **page_id** (`int`): the ID of the page, always `None` when at document-level. +* **reconstructed** (`bool`): indicates whether an object was reconstructed (not extracted as the API gave it). + +> **Note:** A `Point` simply refers to a List of two numbers (`[float, float]`). + + +Aside from the previous attributes, all basic fields have access to a custom `__str__` method that can be used to print their value as a string. + + +### AmountField +The amount field `AmountField` only has one constraint: its **value** is an `Optional[float]`. + +### DateField +Aside from the basic `BaseField` attributes, the date field `DateField` also implements the following: + +* **date_object** (`Date`): an accessible representation of the value as a python object. Can be `None`. + +### StringField +The text field `StringField` only has one constraint: its **value** is an `Optional[str]`. + +# Attributes +The following fields are extracted for Delivery note V1: + +## Customer Address +**customer_address** ([StringField](#stringfield)): The address of the customer receiving the goods. + +```py +print(result.document.inference.prediction.customer_address.value) +``` + +## Customer Name +**customer_name** ([StringField](#stringfield)): The name of the customer receiving the goods. + +```py +print(result.document.inference.prediction.customer_name.value) +``` + +## Delivery Date +**delivery_date** ([DateField](#datefield)): The date on which the delivery is scheduled to arrive. + +```py +print(result.document.inference.prediction.delivery_date.value) +``` + +## Delivery Number +**delivery_number** ([StringField](#stringfield)): A unique identifier for the delivery note. + +```py +print(result.document.inference.prediction.delivery_number.value) +``` + +## Supplier Address +**supplier_address** ([StringField](#stringfield)): The address of the supplier providing the goods. + +```py +print(result.document.inference.prediction.supplier_address.value) +``` + +## Supplier Name +**supplier_name** ([StringField](#stringfield)): The name of the supplier providing the goods. + +```py +print(result.document.inference.prediction.supplier_name.value) +``` + +## Total Amount +**total_amount** ([AmountField](#amountfield)): The total monetary value of the goods being delivered. + +```py +print(result.document.inference.prediction.total_amount.value) +``` + +# Questions? +[Join our Slack](https://join.slack.com/t/mindee-community/shared_invite/zt-2d0ds7dtz-DPAF81ZqTy20chsYpQBW5g) diff --git a/docs/extras/guide/financial_document_v1.md b/docs/extras/guide/financial_document_v1.md index 04b11bd2..284ae04c 100644 --- a/docs/extras/guide/financial_document_v1.md +++ b/docs/extras/guide/financial_document_v1.md @@ -58,17 +58,17 @@ print(result.document) ######## Document ######## -:Mindee ID: 340ee4ae-b4da-41f0-b5ea-81ae29852b57 +:Mindee ID: b26161ce-35d0-4984-b1ff-886645e160e6 :Filename: default_sample.jpg Inference ######### -:Product: mindee/financial_document v1.10 +:Product: mindee/financial_document v1.11 :Rotation applied: Yes Prediction ========== -:Locale: en; en; USD; +:Locale: en-US; en; US; USD; :Invoice Number: INT-001 :Purchase Order Number: 2412/2019 :Receipt Number: @@ -120,7 +120,7 @@ Page Predictions Page 0 ------ -:Locale: en; en; USD; +:Locale: en-US; en; US; USD; :Invoice Number: INT-001 :Purchase Order Number: 2412/2019 :Receipt Number: diff --git a/docs/extras/guide/ind_passport_v1.md b/docs/extras/guide/ind_passport_v1.md new file mode 100644 index 00000000..40aa8fd4 --- /dev/null +++ b/docs/extras/guide/ind_passport_v1.md @@ -0,0 +1,280 @@ +--- +title: IND Passport - India OCR Python +category: 622b805aaec68102ea7fcbc2 +slug: python-ind-passport---india-ocr +parentDoc: 609808f773b0b90051d839de +--- +The Python OCR SDK supports the [Passport - India API](https://platform.mindee.com/mindee/ind_passport). + +Using the [sample below](https://github.com/mindee/client-lib-test-data/blob/main/products/ind_passport/default_sample.jpg), we are going to illustrate how to extract the data that we want using the OCR SDK. +![Passport - India sample](https://github.com/mindee/client-lib-test-data/blob/main/products/ind_passport/default_sample.jpg?raw=true) + +# Quick-Start +```py +from mindee import Client, product, AsyncPredictResponse + +# Init a new client +mindee_client = Client(api_key="my-api-key") + +# Load a file from disk +input_doc = mindee_client.source_from_path("/path/to/the/file.ext") + +# Load a file from disk and enqueue it. +result: AsyncPredictResponse = mindee_client.enqueue_and_parse( + product.ind.IndianPassportV1, + input_doc, +) + +# Print a brief summary of the parsed data +print(result.document) + +``` + +**Output (RST):** +```rst +######## +Document +######## +:Mindee ID: cf88fd43-eaa1-497a-ba29-a9569a4edaa7 +:Filename: default_sample.jpg + +Inference +######### +:Product: mindee/ind_passport v1.0 +:Rotation applied: Yes + +Prediction +========== +:Page Number: 1 +:Country: IND +:ID Number: J8369854 +:Given Names: JOCELYN MICHELLE +:Surname: DOE +:Birth Date: 1959-09-23 +:Birth Place: GUNDUGOLANU +:Issuance Place: HYDERABAD +:Gender: F +:Issuance Date: 2011-10-11 +:Expiry Date: 2021-10-10 +:MRZ Line 1: P **Note:** A `Point` simply refers to a List of two numbers (`[float, float]`). + + +Aside from the previous attributes, all basic fields have access to a custom `__str__` method that can be used to print their value as a string. + + +### ClassificationField +The classification field `ClassificationField` does not implement all the basic `BaseField` attributes. It only implements **value**, **confidence** and **page_id**. + +> Note: a classification field's `value is always a `str`. + +### DateField +Aside from the basic `BaseField` attributes, the date field `DateField` also implements the following: + +* **date_object** (`Date`): an accessible representation of the value as a python object. Can be `None`. + +### StringField +The text field `StringField` only has one constraint: its **value** is an `Optional[str]`. + +# Attributes +The following fields are extracted for Passport - India V1: + +## Address Line 1 +**address1** ([StringField](#stringfield)): The first line of the address of the passport holder. + +```py +print(result.document.inference.prediction.address1.value) +``` + +## Address Line 2 +**address2** ([StringField](#stringfield)): The second line of the address of the passport holder. + +```py +print(result.document.inference.prediction.address2.value) +``` + +## Address Line 3 +**address3** ([StringField](#stringfield)): The third line of the address of the passport holder. + +```py +print(result.document.inference.prediction.address3.value) +``` + +## Birth Date +**birth_date** ([DateField](#datefield)): The birth date of the passport holder, ISO format: YYYY-MM-DD. + +```py +print(result.document.inference.prediction.birth_date.value) +``` + +## Birth Place +**birth_place** ([StringField](#stringfield)): The birth place of the passport holder. + +```py +print(result.document.inference.prediction.birth_place.value) +``` + +## Country +**country** ([StringField](#stringfield)): ISO 3166-1 alpha-3 country code (3 letters format). + +```py +print(result.document.inference.prediction.country.value) +``` + +## Expiry Date +**expiry_date** ([DateField](#datefield)): The date when the passport will expire, ISO format: YYYY-MM-DD. + +```py +print(result.document.inference.prediction.expiry_date.value) +``` + +## File Number +**file_number** ([StringField](#stringfield)): The file number of the passport document. + +```py +print(result.document.inference.prediction.file_number.value) +``` + +## Gender +**gender** ([ClassificationField](#classificationfield)): The gender of the passport holder. + +#### Possible values include: + - M + - F + +```py +print(result.document.inference.prediction.gender.value) +``` + +## Given Names +**given_names** ([StringField](#stringfield)): The given names of the passport holder. + +```py +print(result.document.inference.prediction.given_names.value) +``` + +## ID Number +**id_number** ([StringField](#stringfield)): The identification number of the passport document. + +```py +print(result.document.inference.prediction.id_number.value) +``` + +## Issuance Date +**issuance_date** ([DateField](#datefield)): The date when the passport was issued, ISO format: YYYY-MM-DD. + +```py +print(result.document.inference.prediction.issuance_date.value) +``` + +## Issuance Place +**issuance_place** ([StringField](#stringfield)): The place where the passport was issued. + +```py +print(result.document.inference.prediction.issuance_place.value) +``` + +## Legal Guardian +**legal_guardian** ([StringField](#stringfield)): The name of the legal guardian of the passport holder (if applicable). + +```py +print(result.document.inference.prediction.legal_guardian.value) +``` + +## MRZ Line 1 +**mrz1** ([StringField](#stringfield)): The first line of the machine-readable zone (MRZ) of the passport document. + +```py +print(result.document.inference.prediction.mrz1.value) +``` + +## MRZ Line 2 +**mrz2** ([StringField](#stringfield)): The second line of the machine-readable zone (MRZ) of the passport document. + +```py +print(result.document.inference.prediction.mrz2.value) +``` + +## Name of Mother +**name_of_mother** ([StringField](#stringfield)): The name of the mother of the passport holder. + +```py +print(result.document.inference.prediction.name_of_mother.value) +``` + +## Name of Spouse +**name_of_spouse** ([StringField](#stringfield)): The name of the spouse of the passport holder (if applicable). + +```py +print(result.document.inference.prediction.name_of_spouse.value) +``` + +## Old Passport Date of Issue +**old_passport_date_of_issue** ([DateField](#datefield)): The date of issue of the old passport (if applicable), ISO format: YYYY-MM-DD. + +```py +print(result.document.inference.prediction.old_passport_date_of_issue.value) +``` + +## Old Passport Number +**old_passport_number** ([StringField](#stringfield)): The number of the old passport (if applicable). + +```py +print(result.document.inference.prediction.old_passport_number.value) +``` + +## Old Passport Place of Issue +**old_passport_place_of_issue** ([StringField](#stringfield)): The place of issue of the old passport (if applicable). + +```py +print(result.document.inference.prediction.old_passport_place_of_issue.value) +``` + +## Page Number +**page_number** ([ClassificationField](#classificationfield)): The page number of the passport document. + +#### Possible values include: + - 1 + - 2 + +```py +print(result.document.inference.prediction.page_number.value) +``` + +## Surname +**surname** ([StringField](#stringfield)): The surname of the passport holder. + +```py +print(result.document.inference.prediction.surname.value) +``` + +# Questions? +[Join our Slack](https://join.slack.com/t/mindee-community/shared_invite/zt-2d0ds7dtz-DPAF81ZqTy20chsYpQBW5g) diff --git a/docs/extras/guide/invoices_v4.md b/docs/extras/guide/invoices_v4.md index 2a8b1a3e..b54546a2 100644 --- a/docs/extras/guide/invoices_v4.md +++ b/docs/extras/guide/invoices_v4.md @@ -58,23 +58,23 @@ print(result.document) ######## Document ######## -:Mindee ID: 651b6417-bc50-496e-aa81-207219f0b9f4 +:Mindee ID: a67b70ea-4b1e-4eac-ae75-dda47a7064ae :Filename: default_sample.jpg Inference ######### -:Product: mindee/invoices v4.8 +:Product: mindee/invoices v4.9 :Rotation applied: Yes Prediction ========== -:Locale: en; en; CAD; +:Locale: en-CA; en; CA; CAD; :Invoice Number: 14 :Purchase Order Number: AD29094 :Reference Numbers: AD29094 :Purchase Date: 2018-09-25 -:Due Date: -:Payment Date: +:Due Date: 2011-12-01 +:Payment Date: 2011-12-01 :Total Net: 2145.00 :Total Amount: 2608.20 :Total Tax: 193.20 @@ -114,13 +114,13 @@ Page Predictions Page 0 ------ -:Locale: en; en; CAD; +:Locale: en-CA; en; CA; CAD; :Invoice Number: 14 :Purchase Order Number: AD29094 :Reference Numbers: AD29094 :Purchase Date: 2018-09-25 -:Due Date: -:Payment Date: +:Due Date: 2011-12-01 +:Payment Date: 2011-12-01 :Total Net: 2145.00 :Total Amount: 2608.20 :Total Tax: 193.20 diff --git a/docs/extras/guide/resume_v1.md b/docs/extras/guide/resume_v1.md index fa2b2682..8186aedd 100644 --- a/docs/extras/guide/resume_v1.md +++ b/docs/extras/guide/resume_v1.md @@ -35,13 +35,13 @@ print(result.document) ######## Document ######## -:Mindee ID: bc80bae0-af75-4464-95a9-2419403c75bf +:Mindee ID: 9daa3085-152c-454e-9245-636f13fc9dc3 :Filename: default_sample.jpg Inference ######### -:Product: mindee/resume v1.0 -:Rotation applied: No +:Product: mindee/resume v1.1 +:Rotation applied: Yes Prediction ========== @@ -51,8 +51,8 @@ Prediction :Surnames: Morgan :Nationality: :Email Address: christoper.m@gmail.com -:Phone Number: +44 (0) 20 7666 8555 -:Address: 177 Great Portland Street, London W5W 6PQ +:Phone Number: +44 (0)20 7666 8555 +:Address: 177 Great Portland Street, London, W5W 6PQ :Social Networks: +----------------------+----------------------------------------------------+ | Name | URL | @@ -69,38 +69,37 @@ Prediction +----------+----------------------+ | ZHO | Beginner | +----------+----------------------+ - | DEU | Intermediate | + | DEU | Beginner | +----------+----------------------+ :Hard Skills: HTML5 PHP OOP JavaScript CSS MySQL + SQL :Soft Skills: Project management + Creative design Strong decision maker Innovative Complex problem solver - Creative design Service-focused :Education: +-----------------+---------------------------+-----------+----------+---------------------------+-------------+------------+ | Domain | Degree | End Month | End Year | School | Start Month | Start Year | +=================+===========================+===========+==========+===========================+=============+============+ - | Computer Inf... | Bachelor | | | Columbia University, NY | | 2014 | + | Computer Inf... | Bachelor | | 2014 | Columbia University, NY | | | +-----------------+---------------------------+-----------+----------+---------------------------+-------------+------------+ :Professional Experiences: - +-----------------+------------+---------------------------+-----------+----------+----------------------+-------------+------------+ - | Contract Type | Department | Employer | End Month | End Year | Role | Start Month | Start Year | - +=================+============+===========================+===========+==========+======================+=============+============+ - | Full-Time | | Luna Web Design, New York | 05 | 2019 | Web Developer | 09 | 2015 | - +-----------------+------------+---------------------------+-----------+----------+----------------------+-------------+------------+ + +-----------------+------------+--------------------------------------+---------------------------+-----------+----------+----------------------+-------------+------------+ + | Contract Type | Department | Description | Employer | End Month | End Year | Role | Start Month | Start Year | + +=================+============+======================================+===========================+===========+==========+======================+=============+============+ + | | | Cooperate with designers to creat... | Luna Web Design, New York | 05 | 2019 | Web Developer | 09 | 2015 | + +-----------------+------------+--------------------------------------+---------------------------+-----------+----------+----------------------+-------------+------------+ :Certificates: +------------+--------------------------------+---------------------------+------+ | Grade | Name | Provider | Year | +============+================================+===========================+======+ - | | PHP Framework (certificate)... | | 2014 | - +------------+--------------------------------+---------------------------+------+ - | | Programming Languages: Java... | | | + | | PHP Framework (certificate)... | | | +------------+--------------------------------+---------------------------+------+ ``` @@ -170,6 +169,7 @@ A `ResumeV1Language` implements the following attributes: * **level** (`str`): The candidate's level for the language. #### Possible values include: + - Native - Fluent - Proficient - Intermediate @@ -191,6 +191,7 @@ A `ResumeV1ProfessionalExperience` implements the following attributes: - Freelance * **department** (`str`): The specific department or division within the company. +* **description** (`str`): The description of the professional experience as written in the document. * **employer** (`str`): The name of the company or organization. * **end_month** (`str`): The month when the professional experience ended. * **end_year** (`str`): The year when the professional experience ended. diff --git a/docs/product/business_card_v1.rst b/docs/product/business_card_v1.rst new file mode 100644 index 00000000..7fa9c2d9 --- /dev/null +++ b/docs/product/business_card_v1.rst @@ -0,0 +1,15 @@ +Business Card V1 +---------------- + +**Sample Code:** + +.. literalinclude:: /extras/code_samples/business_card_v1.txt + :language: Python + +.. autoclass:: mindee.product.business_card.business_card_v1.BusinessCardV1 + :members: + :inherited-members: + +.. autoclass:: mindee.product.business_card.business_card_v1_document.BusinessCardV1Document + :members: + :inherited-members: diff --git a/docs/product/delivery_note_v1.rst b/docs/product/delivery_note_v1.rst new file mode 100644 index 00000000..876ce41b --- /dev/null +++ b/docs/product/delivery_note_v1.rst @@ -0,0 +1,15 @@ +Delivery note V1 +---------------- + +**Sample Code:** + +.. literalinclude:: /extras/code_samples/delivery_notes_v1.txt + :language: Python + +.. autoclass:: mindee.product.delivery_note.delivery_note_v1.DeliveryNoteV1 + :members: + :inherited-members: + +.. autoclass:: mindee.product.delivery_note.delivery_note_v1_document.DeliveryNoteV1Document + :members: + :inherited-members: diff --git a/docs/product/ind/indian_passport_v1.rst b/docs/product/ind/indian_passport_v1.rst new file mode 100644 index 00000000..d6aa690d --- /dev/null +++ b/docs/product/ind/indian_passport_v1.rst @@ -0,0 +1,15 @@ +Passport - India V1 +------------------- + +**Sample Code:** + +.. literalinclude:: /extras/code_samples/ind_passport_v1.txt + :language: Python + +.. autoclass:: mindee.product.ind.indian_passport.indian_passport_v1.IndianPassportV1 + :members: + :inherited-members: + +.. autoclass:: mindee.product.ind.indian_passport.indian_passport_v1_document.IndianPassportV1Document + :members: + :inherited-members: diff --git a/mindee/client.py b/mindee/client.py index 75bdce76..ca2a0560 100644 --- a/mindee/client.py +++ b/mindee/client.py @@ -234,15 +234,15 @@ def _validate_async_params( self, initial_delay_sec: float, delay_sec: float, max_retries: int ) -> None: min_delay = 1 - min_initial_delay = 2 - min_retries = 2 + min_initial_delay = 1 + min_retries = 1 if delay_sec < min_delay: raise MindeeClientError( - f"Cannot set auto-parsing delay to less than {min_delay} seconds." + f"Cannot set auto-parsing delay to less than {min_delay} second(s)." ) if initial_delay_sec < min_initial_delay: raise MindeeClientError( - f"Cannot set initial parsing delay to less than {min_initial_delay} seconds." + f"Cannot set initial parsing delay to less than {min_initial_delay} second(s)." ) if max_retries < min_retries: raise MindeeClientError(f"Cannot set retries to less than {min_retries}.") @@ -256,8 +256,8 @@ def enqueue_and_parse( page_options: Optional[PageOptions] = None, cropper: bool = False, endpoint: Optional[Endpoint] = None, - initial_delay_sec: float = 4, - delay_sec: float = 2, + initial_delay_sec: float = 2, + delay_sec: float = 1.5, max_retries: int = 30, full_text: bool = False, ) -> AsyncPredictResponse: @@ -283,9 +283,9 @@ def enqueue_and_parse( :param endpoint: For custom endpoints, an endpoint has to be given. - :param initial_delay_sec: Delay between each polling attempts This should not be shorter than 4 seconds. + :param initial_delay_sec: Delay between each polling attempts This should not be shorter than 1 second. - :param delay_sec: Delay between each polling attempts This should not be shorter than 2 seconds. + :param delay_sec: Delay between each polling attempts This should not be shorter than 1 second. :param max_retries: Total amount of polling attempts. diff --git a/mindee/product/__init__.py b/mindee/product/__init__.py index 6c1e61db..bcb213bd 100644 --- a/mindee/product/__init__.py +++ b/mindee/product/__init__.py @@ -1,4 +1,4 @@ -from mindee.product import eu, fr, us +from mindee.product import eu, fr, ind, us from mindee.product.barcode_reader import BarcodeReaderV1, BarcodeReaderV1Document from mindee.product.bill_of_lading import ( BillOfLadingV1, @@ -9,8 +9,10 @@ BillOfLadingV1NotifyParty, BillOfLadingV1Shipper, ) +from mindee.product.business_card import BusinessCardV1, BusinessCardV1Document from mindee.product.cropper import CropperV1, CropperV1Document from mindee.product.custom import CustomV1, CustomV1Document, CustomV1Page +from mindee.product.delivery_note import DeliveryNoteV1, DeliveryNoteV1Document from mindee.product.financial_document import ( FinancialDocumentV1, FinancialDocumentV1Document, diff --git a/mindee/product/business_card/__init__.py b/mindee/product/business_card/__init__.py new file mode 100644 index 00000000..bf544a23 --- /dev/null +++ b/mindee/product/business_card/__init__.py @@ -0,0 +1,4 @@ +from mindee.product.business_card.business_card_v1 import BusinessCardV1 +from mindee.product.business_card.business_card_v1_document import ( + BusinessCardV1Document, +) diff --git a/mindee/product/business_card/business_card_v1.py b/mindee/product/business_card/business_card_v1.py new file mode 100644 index 00000000..297e1d26 --- /dev/null +++ b/mindee/product/business_card/business_card_v1.py @@ -0,0 +1,37 @@ +from typing import List + +from mindee.parsing.common import Inference, Page, StringDict +from mindee.product.business_card.business_card_v1_document import ( + BusinessCardV1Document, +) + + +class BusinessCardV1(Inference): + """Business Card API version 1 inference prediction.""" + + prediction: BusinessCardV1Document + """Document-level prediction.""" + pages: List[Page[BusinessCardV1Document]] + """Page-level prediction(s).""" + endpoint_name = "business_card" + """Name of the endpoint.""" + endpoint_version = "1" + """Version of the endpoint.""" + + def __init__(self, raw_prediction: StringDict): + """ + Business Card v1 inference. + + :param raw_prediction: Raw prediction from the HTTP response. + """ + super().__init__(raw_prediction) + + self.prediction = BusinessCardV1Document(raw_prediction["prediction"]) + self.pages = [] + for page in raw_prediction["pages"]: + try: + page_prediction = page["prediction"] + except KeyError: + continue + if page_prediction: + self.pages.append(Page(BusinessCardV1Document, page)) diff --git a/mindee/product/business_card/business_card_v1_document.py b/mindee/product/business_card/business_card_v1_document.py new file mode 100644 index 00000000..c9831425 --- /dev/null +++ b/mindee/product/business_card/business_card_v1_document.py @@ -0,0 +1,105 @@ +from typing import List, Optional + +from mindee.parsing.common import Prediction, StringDict, clean_out_string +from mindee.parsing.standard import StringField + + +class BusinessCardV1Document(Prediction): + """Business Card API version 1.0 document data.""" + + address: StringField + """The address of the person.""" + company: StringField + """The company the person works for.""" + email: StringField + """The email address of the person.""" + fax_number: StringField + """The Fax number of the person.""" + firstname: StringField + """The given name of the person.""" + job_title: StringField + """The job title of the person.""" + lastname: StringField + """The lastname of the person.""" + mobile_number: StringField + """The mobile number of the person.""" + phone_number: StringField + """The phone number of the person.""" + social_media: List[StringField] + """The social media profiles of the person or company.""" + website: StringField + """The website of the person or company.""" + + def __init__( + self, + raw_prediction: StringDict, + page_id: Optional[int] = None, + ): + """ + Business Card document. + + :param raw_prediction: Raw prediction from HTTP response + :param page_id: Page number for multi pages pdf input + """ + super().__init__(raw_prediction, page_id) + self.address = StringField( + raw_prediction["address"], + page_id=page_id, + ) + self.company = StringField( + raw_prediction["company"], + page_id=page_id, + ) + self.email = StringField( + raw_prediction["email"], + page_id=page_id, + ) + self.fax_number = StringField( + raw_prediction["fax_number"], + page_id=page_id, + ) + self.firstname = StringField( + raw_prediction["firstname"], + page_id=page_id, + ) + self.job_title = StringField( + raw_prediction["job_title"], + page_id=page_id, + ) + self.lastname = StringField( + raw_prediction["lastname"], + page_id=page_id, + ) + self.mobile_number = StringField( + raw_prediction["mobile_number"], + page_id=page_id, + ) + self.phone_number = StringField( + raw_prediction["phone_number"], + page_id=page_id, + ) + self.social_media = [ + StringField(prediction, page_id=page_id) + for prediction in raw_prediction["social_media"] + ] + self.website = StringField( + raw_prediction["website"], + page_id=page_id, + ) + + def __str__(self) -> str: + social_media = f"\n { ' ' * 14 }".join( + [str(item) for item in self.social_media], + ) + out_str: str = f":Firstname: {self.firstname}\n" + out_str += f":Lastname: {self.lastname}\n" + out_str += f":Job Title: {self.job_title}\n" + out_str += f":Company: {self.company}\n" + out_str += f":Email: {self.email}\n" + out_str += f":Phone Number: {self.phone_number}\n" + out_str += f":Mobile Number: {self.mobile_number}\n" + out_str += f":Fax Number: {self.fax_number}\n" + out_str += f":Address: {self.address}\n" + out_str += f":Website: {self.website}\n" + out_str += f":Social Media: {social_media}\n" + return clean_out_string(out_str) diff --git a/mindee/product/delivery_note/__init__.py b/mindee/product/delivery_note/__init__.py new file mode 100644 index 00000000..6de4eb38 --- /dev/null +++ b/mindee/product/delivery_note/__init__.py @@ -0,0 +1,4 @@ +from mindee.product.delivery_note.delivery_note_v1 import DeliveryNoteV1 +from mindee.product.delivery_note.delivery_note_v1_document import ( + DeliveryNoteV1Document, +) diff --git a/mindee/product/delivery_note/delivery_note_v1.py b/mindee/product/delivery_note/delivery_note_v1.py new file mode 100644 index 00000000..6364eb7d --- /dev/null +++ b/mindee/product/delivery_note/delivery_note_v1.py @@ -0,0 +1,37 @@ +from typing import List + +from mindee.parsing.common import Inference, Page, StringDict +from mindee.product.delivery_note.delivery_note_v1_document import ( + DeliveryNoteV1Document, +) + + +class DeliveryNoteV1(Inference): + """Delivery note API version 1 inference prediction.""" + + prediction: DeliveryNoteV1Document + """Document-level prediction.""" + pages: List[Page[DeliveryNoteV1Document]] + """Page-level prediction(s).""" + endpoint_name = "delivery_notes" + """Name of the endpoint.""" + endpoint_version = "1" + """Version of the endpoint.""" + + def __init__(self, raw_prediction: StringDict): + """ + Delivery note v1 inference. + + :param raw_prediction: Raw prediction from the HTTP response. + """ + super().__init__(raw_prediction) + + self.prediction = DeliveryNoteV1Document(raw_prediction["prediction"]) + self.pages = [] + for page in raw_prediction["pages"]: + try: + page_prediction = page["prediction"] + except KeyError: + continue + if page_prediction: + self.pages.append(Page(DeliveryNoteV1Document, page)) diff --git a/mindee/product/delivery_note/delivery_note_v1_document.py b/mindee/product/delivery_note/delivery_note_v1_document.py new file mode 100644 index 00000000..0e09922f --- /dev/null +++ b/mindee/product/delivery_note/delivery_note_v1_document.py @@ -0,0 +1,74 @@ +from typing import Optional + +from mindee.parsing.common import Prediction, StringDict, clean_out_string +from mindee.parsing.standard import AmountField, DateField, StringField + + +class DeliveryNoteV1Document(Prediction): + """Delivery note API version 1.1 document data.""" + + customer_address: StringField + """The address of the customer receiving the goods.""" + customer_name: StringField + """The name of the customer receiving the goods.""" + delivery_date: DateField + """The date on which the delivery is scheduled to arrive.""" + delivery_number: StringField + """A unique identifier for the delivery note.""" + supplier_address: StringField + """The address of the supplier providing the goods.""" + supplier_name: StringField + """The name of the supplier providing the goods.""" + total_amount: AmountField + """The total monetary value of the goods being delivered.""" + + def __init__( + self, + raw_prediction: StringDict, + page_id: Optional[int] = None, + ): + """ + Delivery note document. + + :param raw_prediction: Raw prediction from HTTP response + :param page_id: Page number for multi pages pdf input + """ + super().__init__(raw_prediction, page_id) + self.customer_address = StringField( + raw_prediction["customer_address"], + page_id=page_id, + ) + self.customer_name = StringField( + raw_prediction["customer_name"], + page_id=page_id, + ) + self.delivery_date = DateField( + raw_prediction["delivery_date"], + page_id=page_id, + ) + self.delivery_number = StringField( + raw_prediction["delivery_number"], + page_id=page_id, + ) + self.supplier_address = StringField( + raw_prediction["supplier_address"], + page_id=page_id, + ) + self.supplier_name = StringField( + raw_prediction["supplier_name"], + page_id=page_id, + ) + self.total_amount = AmountField( + raw_prediction["total_amount"], + page_id=page_id, + ) + + def __str__(self) -> str: + out_str: str = f":Delivery Date: {self.delivery_date}\n" + out_str += f":Delivery Number: {self.delivery_number}\n" + out_str += f":Supplier Name: {self.supplier_name}\n" + out_str += f":Supplier Address: {self.supplier_address}\n" + out_str += f":Customer Name: {self.customer_name}\n" + out_str += f":Customer Address: {self.customer_address}\n" + out_str += f":Total Amount: {self.total_amount}\n" + return clean_out_string(out_str) diff --git a/mindee/product/financial_document/financial_document_v1_document.py b/mindee/product/financial_document/financial_document_v1_document.py index a95884f4..03eb16d5 100644 --- a/mindee/product/financial_document/financial_document_v1_document.py +++ b/mindee/product/financial_document/financial_document_v1_document.py @@ -17,7 +17,7 @@ class FinancialDocumentV1Document(Prediction): - """Financial Document API version 1.10 document data.""" + """Financial Document API version 1.11 document data.""" billing_address: StringField """The customer's address used for billing.""" diff --git a/mindee/product/ind/__init__.py b/mindee/product/ind/__init__.py new file mode 100644 index 00000000..26a05e47 --- /dev/null +++ b/mindee/product/ind/__init__.py @@ -0,0 +1,4 @@ +from mindee.product.ind.indian_passport.indian_passport_v1 import IndianPassportV1 +from mindee.product.ind.indian_passport.indian_passport_v1_document import ( + IndianPassportV1Document, +) diff --git a/mindee/product/ind/indian_passport/__init__.py b/mindee/product/ind/indian_passport/__init__.py new file mode 100644 index 00000000..26a05e47 --- /dev/null +++ b/mindee/product/ind/indian_passport/__init__.py @@ -0,0 +1,4 @@ +from mindee.product.ind.indian_passport.indian_passport_v1 import IndianPassportV1 +from mindee.product.ind.indian_passport.indian_passport_v1_document import ( + IndianPassportV1Document, +) diff --git a/mindee/product/ind/indian_passport/indian_passport_v1.py b/mindee/product/ind/indian_passport/indian_passport_v1.py new file mode 100644 index 00000000..a1504e63 --- /dev/null +++ b/mindee/product/ind/indian_passport/indian_passport_v1.py @@ -0,0 +1,37 @@ +from typing import List + +from mindee.parsing.common import Inference, Page, StringDict +from mindee.product.ind.indian_passport.indian_passport_v1_document import ( + IndianPassportV1Document, +) + + +class IndianPassportV1(Inference): + """Passport - India API version 1 inference prediction.""" + + prediction: IndianPassportV1Document + """Document-level prediction.""" + pages: List[Page[IndianPassportV1Document]] + """Page-level prediction(s).""" + endpoint_name = "ind_passport" + """Name of the endpoint.""" + endpoint_version = "1" + """Version of the endpoint.""" + + def __init__(self, raw_prediction: StringDict): + """ + Passport - India v1 inference. + + :param raw_prediction: Raw prediction from the HTTP response. + """ + super().__init__(raw_prediction) + + self.prediction = IndianPassportV1Document(raw_prediction["prediction"]) + self.pages = [] + for page in raw_prediction["pages"]: + try: + page_prediction = page["prediction"] + except KeyError: + continue + if page_prediction: + self.pages.append(Page(IndianPassportV1Document, page)) diff --git a/mindee/product/ind/indian_passport/indian_passport_v1_document.py b/mindee/product/ind/indian_passport/indian_passport_v1_document.py new file mode 100644 index 00000000..e1772135 --- /dev/null +++ b/mindee/product/ind/indian_passport/indian_passport_v1_document.py @@ -0,0 +1,186 @@ +from typing import Optional + +from mindee.parsing.common import Prediction, StringDict, clean_out_string +from mindee.parsing.standard import ClassificationField, DateField, StringField + + +class IndianPassportV1Document(Prediction): + """Passport - India API version 1.0 document data.""" + + address1: StringField + """The first line of the address of the passport holder.""" + address2: StringField + """The second line of the address of the passport holder.""" + address3: StringField + """The third line of the address of the passport holder.""" + birth_date: DateField + """The birth date of the passport holder, ISO format: YYYY-MM-DD.""" + birth_place: StringField + """The birth place of the passport holder.""" + country: StringField + """ISO 3166-1 alpha-3 country code (3 letters format).""" + expiry_date: DateField + """The date when the passport will expire, ISO format: YYYY-MM-DD.""" + file_number: StringField + """The file number of the passport document.""" + gender: ClassificationField + """The gender of the passport holder.""" + given_names: StringField + """The given names of the passport holder.""" + id_number: StringField + """The identification number of the passport document.""" + issuance_date: DateField + """The date when the passport was issued, ISO format: YYYY-MM-DD.""" + issuance_place: StringField + """The place where the passport was issued.""" + legal_guardian: StringField + """The name of the legal guardian of the passport holder (if applicable).""" + mrz1: StringField + """The first line of the machine-readable zone (MRZ) of the passport document.""" + mrz2: StringField + """The second line of the machine-readable zone (MRZ) of the passport document.""" + name_of_mother: StringField + """The name of the mother of the passport holder.""" + name_of_spouse: StringField + """The name of the spouse of the passport holder (if applicable).""" + old_passport_date_of_issue: DateField + """The date of issue of the old passport (if applicable), ISO format: YYYY-MM-DD.""" + old_passport_number: StringField + """The number of the old passport (if applicable).""" + old_passport_place_of_issue: StringField + """The place of issue of the old passport (if applicable).""" + page_number: ClassificationField + """The page number of the passport document.""" + surname: StringField + """The surname of the passport holder.""" + + def __init__( + self, + raw_prediction: StringDict, + page_id: Optional[int] = None, + ): + """ + Passport - India document. + + :param raw_prediction: Raw prediction from HTTP response + :param page_id: Page number for multi pages pdf input + """ + super().__init__(raw_prediction, page_id) + self.address1 = StringField( + raw_prediction["address1"], + page_id=page_id, + ) + self.address2 = StringField( + raw_prediction["address2"], + page_id=page_id, + ) + self.address3 = StringField( + raw_prediction["address3"], + page_id=page_id, + ) + self.birth_date = DateField( + raw_prediction["birth_date"], + page_id=page_id, + ) + self.birth_place = StringField( + raw_prediction["birth_place"], + page_id=page_id, + ) + self.country = StringField( + raw_prediction["country"], + page_id=page_id, + ) + self.expiry_date = DateField( + raw_prediction["expiry_date"], + page_id=page_id, + ) + self.file_number = StringField( + raw_prediction["file_number"], + page_id=page_id, + ) + self.gender = ClassificationField( + raw_prediction["gender"], + page_id=page_id, + ) + self.given_names = StringField( + raw_prediction["given_names"], + page_id=page_id, + ) + self.id_number = StringField( + raw_prediction["id_number"], + page_id=page_id, + ) + self.issuance_date = DateField( + raw_prediction["issuance_date"], + page_id=page_id, + ) + self.issuance_place = StringField( + raw_prediction["issuance_place"], + page_id=page_id, + ) + self.legal_guardian = StringField( + raw_prediction["legal_guardian"], + page_id=page_id, + ) + self.mrz1 = StringField( + raw_prediction["mrz1"], + page_id=page_id, + ) + self.mrz2 = StringField( + raw_prediction["mrz2"], + page_id=page_id, + ) + self.name_of_mother = StringField( + raw_prediction["name_of_mother"], + page_id=page_id, + ) + self.name_of_spouse = StringField( + raw_prediction["name_of_spouse"], + page_id=page_id, + ) + self.old_passport_date_of_issue = DateField( + raw_prediction["old_passport_date_of_issue"], + page_id=page_id, + ) + self.old_passport_number = StringField( + raw_prediction["old_passport_number"], + page_id=page_id, + ) + self.old_passport_place_of_issue = StringField( + raw_prediction["old_passport_place_of_issue"], + page_id=page_id, + ) + self.page_number = ClassificationField( + raw_prediction["page_number"], + page_id=page_id, + ) + self.surname = StringField( + raw_prediction["surname"], + page_id=page_id, + ) + + def __str__(self) -> str: + out_str: str = f":Page Number: {self.page_number}\n" + out_str += f":Country: {self.country}\n" + out_str += f":ID Number: {self.id_number}\n" + out_str += f":Given Names: {self.given_names}\n" + out_str += f":Surname: {self.surname}\n" + out_str += f":Birth Date: {self.birth_date}\n" + out_str += f":Birth Place: {self.birth_place}\n" + out_str += f":Issuance Place: {self.issuance_place}\n" + out_str += f":Gender: {self.gender}\n" + out_str += f":Issuance Date: {self.issuance_date}\n" + out_str += f":Expiry Date: {self.expiry_date}\n" + out_str += f":MRZ Line 1: {self.mrz1}\n" + out_str += f":MRZ Line 2: {self.mrz2}\n" + out_str += f":Legal Guardian: {self.legal_guardian}\n" + out_str += f":Name of Spouse: {self.name_of_spouse}\n" + out_str += f":Name of Mother: {self.name_of_mother}\n" + out_str += f":Old Passport Date of Issue: {self.old_passport_date_of_issue}\n" + out_str += f":Old Passport Number: {self.old_passport_number}\n" + out_str += f":Address Line 1: {self.address1}\n" + out_str += f":Address Line 2: {self.address2}\n" + out_str += f":Address Line 3: {self.address3}\n" + out_str += f":Old Passport Place of Issue: {self.old_passport_place_of_issue}\n" + out_str += f":File Number: {self.file_number}\n" + return clean_out_string(out_str) diff --git a/mindee/product/invoice/invoice_v4_document.py b/mindee/product/invoice/invoice_v4_document.py index 6f31ee1b..8bb30ea7 100644 --- a/mindee/product/invoice/invoice_v4_document.py +++ b/mindee/product/invoice/invoice_v4_document.py @@ -15,7 +15,7 @@ class InvoiceV4Document(Prediction): - """Invoice API version 4.8 document data.""" + """Invoice API version 4.9 document data.""" billing_address: StringField """The customer's address used for billing.""" diff --git a/mindee/product/resume/resume_v1_document.py b/mindee/product/resume/resume_v1_document.py index 35e8ecfc..00c7ce56 100644 --- a/mindee/product/resume/resume_v1_document.py +++ b/mindee/product/resume/resume_v1_document.py @@ -14,7 +14,7 @@ class ResumeV1Document(Prediction): - """Resume API version 1.0 document data.""" + """Resume API version 1.1 document data.""" address: StringField """The location information of the candidate, including city, state, and country.""" @@ -216,6 +216,7 @@ def _professional_experiences_separator(char: str) -> str: out_str = " " out_str += f"+{char * 17}" out_str += f"+{char * 12}" + out_str += f"+{char * 38}" out_str += f"+{char * 27}" out_str += f"+{char * 11}" out_str += f"+{char * 10}" @@ -235,6 +236,7 @@ def _professional_experiences_to_str(self) -> str: out_str += f"\n{self._professional_experiences_separator('-')}\n " out_str += " | Contract Type " out_str += " | Department" + out_str += " | Description " out_str += " | Employer " out_str += " | End Month" out_str += " | End Year" diff --git a/mindee/product/resume/resume_v1_professional_experience.py b/mindee/product/resume/resume_v1_professional_experience.py index 358c9ecd..b3d42605 100644 --- a/mindee/product/resume/resume_v1_professional_experience.py +++ b/mindee/product/resume/resume_v1_professional_experience.py @@ -11,6 +11,8 @@ class ResumeV1ProfessionalExperience(FieldPositionMixin, FieldConfidenceMixin): """The type of contract for the professional experience.""" department: Optional[str] """The specific department or division within the company.""" + description: Optional[str] + """The description of the professional experience as written in the document.""" employer: Optional[str] """The name of the company or organization.""" end_month: Optional[str] @@ -44,6 +46,7 @@ def __init__( self.contract_type = raw_prediction["contract_type"] self.department = raw_prediction["department"] + self.description = raw_prediction["description"] self.employer = raw_prediction["employer"] self.end_month = raw_prediction["end_month"] self.end_year = raw_prediction["end_year"] @@ -56,6 +59,7 @@ def _printable_values(self) -> Dict[str, str]: out_dict: Dict[str, str] = {} out_dict["contract_type"] = format_for_display(self.contract_type) out_dict["department"] = format_for_display(self.department) + out_dict["description"] = format_for_display(self.description) out_dict["employer"] = format_for_display(self.employer) out_dict["end_month"] = format_for_display(self.end_month) out_dict["end_year"] = format_for_display(self.end_year) @@ -69,6 +73,7 @@ def _table_printable_values(self) -> Dict[str, str]: out_dict: Dict[str, str] = {} out_dict["contract_type"] = format_for_display(self.contract_type, 15) out_dict["department"] = format_for_display(self.department, 10) + out_dict["description"] = format_for_display(self.description, 36) out_dict["employer"] = format_for_display(self.employer, 25) out_dict["end_month"] = format_for_display(self.end_month, None) out_dict["end_year"] = format_for_display(self.end_year, None) @@ -82,6 +87,7 @@ def to_table_line(self) -> str: printable = self._table_printable_values() out_str: str = f"| {printable['contract_type']:<15} | " out_str += f"{printable['department']:<10} | " + out_str += f"{printable['description']:<36} | " out_str += f"{printable['employer']:<25} | " out_str += f"{printable['end_month']:<9} | " out_str += f"{printable['end_year']:<8} | " @@ -95,6 +101,7 @@ def __str__(self) -> str: printable = self._printable_values() out_str: str = f"Contract Type: {printable['contract_type']}, \n" out_str += f"Department: {printable['department']}, \n" + out_str += f"Description: {printable['description']}, \n" out_str += f"Employer: {printable['employer']}, \n" out_str += f"End Month: {printable['end_month']}, \n" out_str += f"End Year: {printable['end_year']}, \n" diff --git a/tests/data b/tests/data index 2198cbaf..96f73126 160000 --- a/tests/data +++ b/tests/data @@ -1 +1 @@ -Subproject commit 2198cbafa34412f56e677494ecbc25ed79f0af04 +Subproject commit 96f73126330ffffe6911d03b8c7fc13b8d301dfe diff --git a/tests/product/business_card/__init__.py b/tests/product/business_card/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/tests/product/business_card/test_business_card_v1.py b/tests/product/business_card/test_business_card_v1.py new file mode 100644 index 00000000..3956adeb --- /dev/null +++ b/tests/product/business_card/test_business_card_v1.py @@ -0,0 +1,56 @@ +import json + +import pytest + +from mindee.parsing.common.document import Document +from mindee.parsing.common.page import Page +from mindee.product import BusinessCardV1 +from mindee.product.business_card.business_card_v1_document import ( + BusinessCardV1Document, +) +from tests.product import PRODUCT_DATA_DIR + +RESPONSE_DIR = PRODUCT_DATA_DIR / "business_card" / "response_v1" + +BusinessCardV1DocumentType = Document[ + BusinessCardV1Document, + Page[BusinessCardV1Document], +] + + +@pytest.fixture +def complete_doc() -> BusinessCardV1DocumentType: + file_path = RESPONSE_DIR / "complete.json" + with open(file_path, "r", encoding="utf-8") as open_file: + json_data = json.load(open_file) + return Document(BusinessCardV1, json_data["document"]) + + +@pytest.fixture +def empty_doc() -> BusinessCardV1DocumentType: + file_path = RESPONSE_DIR / "empty.json" + with open(file_path, "r", encoding="utf-8") as open_file: + json_data = json.load(open_file) + return Document(BusinessCardV1, json_data["document"]) + + +def test_complete_doc(complete_doc: BusinessCardV1DocumentType): + file_path = RESPONSE_DIR / "summary_full.rst" + with open(file_path, "r", encoding="utf-8") as open_file: + reference_str = open_file.read() + assert str(complete_doc) == reference_str + + +def test_empty_doc(empty_doc: BusinessCardV1DocumentType): + prediction = empty_doc.inference.prediction + assert prediction.firstname.value is None + assert prediction.lastname.value is None + assert prediction.job_title.value is None + assert prediction.company.value is None + assert prediction.email.value is None + assert prediction.phone_number.value is None + assert prediction.mobile_number.value is None + assert prediction.fax_number.value is None + assert prediction.address.value is None + assert prediction.website.value is None + assert len(prediction.social_media) == 0 diff --git a/tests/product/delivery_note/__init__.py b/tests/product/delivery_note/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/tests/product/delivery_note/test_delivery_note_v1.py b/tests/product/delivery_note/test_delivery_note_v1.py new file mode 100644 index 00000000..324cc48f --- /dev/null +++ b/tests/product/delivery_note/test_delivery_note_v1.py @@ -0,0 +1,52 @@ +import json + +import pytest + +from mindee.parsing.common.document import Document +from mindee.parsing.common.page import Page +from mindee.product import DeliveryNoteV1 +from mindee.product.delivery_note.delivery_note_v1_document import ( + DeliveryNoteV1Document, +) +from tests.product import PRODUCT_DATA_DIR + +RESPONSE_DIR = PRODUCT_DATA_DIR / "delivery_notes" / "response_v1" + +DeliveryNoteV1DocumentType = Document[ + DeliveryNoteV1Document, + Page[DeliveryNoteV1Document], +] + + +@pytest.fixture +def complete_doc() -> DeliveryNoteV1DocumentType: + file_path = RESPONSE_DIR / "complete.json" + with open(file_path, "r", encoding="utf-8") as open_file: + json_data = json.load(open_file) + return Document(DeliveryNoteV1, json_data["document"]) + + +@pytest.fixture +def empty_doc() -> DeliveryNoteV1DocumentType: + file_path = RESPONSE_DIR / "empty.json" + with open(file_path, "r", encoding="utf-8") as open_file: + json_data = json.load(open_file) + return Document(DeliveryNoteV1, json_data["document"]) + + +def test_complete_doc(complete_doc: DeliveryNoteV1DocumentType): + file_path = RESPONSE_DIR / "summary_full.rst" + with open(file_path, "r", encoding="utf-8") as open_file: + reference_str = open_file.read() + assert str(complete_doc) == reference_str + + +def test_empty_doc(empty_doc: DeliveryNoteV1DocumentType): + prediction = empty_doc.inference.prediction + assert prediction.delivery_date.value is None + assert prediction.delivery_number.value is None + assert prediction.supplier_name.value is None + assert prediction.supplier_address.value is None + assert prediction.customer_name.value is None + assert prediction.customer_address.value is None + assert prediction.total_amount.value is None diff --git a/tests/product/ind/indian_passport/__init__.py b/tests/product/ind/indian_passport/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/tests/product/ind/indian_passport/test_indian_passport_v1.py b/tests/product/ind/indian_passport/test_indian_passport_v1.py new file mode 100644 index 00000000..477a09eb --- /dev/null +++ b/tests/product/ind/indian_passport/test_indian_passport_v1.py @@ -0,0 +1,66 @@ +import json + +import pytest + +from mindee.parsing.common.document import Document +from mindee.parsing.common.page import Page +from mindee.product.ind import IndianPassportV1 +from mindee.product.ind.indian_passport.indian_passport_v1_document import ( + IndianPassportV1Document, +) +from tests.product import PRODUCT_DATA_DIR + +RESPONSE_DIR = PRODUCT_DATA_DIR / "ind_passport" / "response_v1" + +IndianPassportV1DocumentType = Document[ + IndianPassportV1Document, + Page[IndianPassportV1Document], +] + + +@pytest.fixture +def complete_doc() -> IndianPassportV1DocumentType: + file_path = RESPONSE_DIR / "complete.json" + with open(file_path, "r", encoding="utf-8") as open_file: + json_data = json.load(open_file) + return Document(IndianPassportV1, json_data["document"]) + + +@pytest.fixture +def empty_doc() -> IndianPassportV1DocumentType: + file_path = RESPONSE_DIR / "empty.json" + with open(file_path, "r", encoding="utf-8") as open_file: + json_data = json.load(open_file) + return Document(IndianPassportV1, json_data["document"]) + + +def test_complete_doc(complete_doc: IndianPassportV1DocumentType): + file_path = RESPONSE_DIR / "summary_full.rst" + with open(file_path, "r", encoding="utf-8") as open_file: + reference_str = open_file.read() + assert str(complete_doc) == reference_str + + +def test_empty_doc(empty_doc: IndianPassportV1DocumentType): + prediction = empty_doc.inference.prediction + assert prediction.country.value is None + assert prediction.id_number.value is None + assert prediction.given_names.value is None + assert prediction.surname.value is None + assert prediction.birth_date.value is None + assert prediction.birth_place.value is None + assert prediction.issuance_place.value is None + assert prediction.issuance_date.value is None + assert prediction.expiry_date.value is None + assert prediction.mrz1.value is None + assert prediction.mrz2.value is None + assert prediction.legal_guardian.value is None + assert prediction.name_of_spouse.value is None + assert prediction.name_of_mother.value is None + assert prediction.old_passport_date_of_issue.value is None + assert prediction.old_passport_number.value is None + assert prediction.address1.value is None + assert prediction.address2.value is None + assert prediction.address3.value is None + assert prediction.old_passport_place_of_issue.value is None + assert prediction.file_number.value is None