Skip to content

Commit 4de8028

Browse files
authoredFeb 1, 2023
♻️ rework financial document so as not to cause breaking changes (#121)
·
v4.25.0v3.4.0
1 parent 30bebd9 commit 4de8028

File tree

12 files changed

+681
-671
lines changed

12 files changed

+681
-671
lines changed
 

‎mindee/cli.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -34,9 +34,9 @@ class CommandConfig(Generic[TypeDoc]):
3434
help="Passport",
3535
doc_class=documents.TypePassportV1,
3636
),
37-
"financial": CommandConfig(
37+
"financial-document": CommandConfig(
3838
help="Financial Document (receipt or invoice)",
39-
doc_class=documents.TypeFinancialV1,
39+
doc_class=documents.TypeFinancialDocumentV1,
4040
),
4141
"proof-of-address": CommandConfig(
4242
help="Proof of Address",

‎mindee/client.py

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@
44
from mindee.documents import (
55
CropperV1,
66
CustomV1,
7-
FinancialV0,
7+
FinancialDocumentV1,
88
FinancialV1,
99
InvoiceV3,
1010
InvoiceV4,
@@ -230,9 +230,9 @@ def _init_default_endpoints(self) -> None:
230230
)
231231
],
232232
),
233-
(OTS_OWNER, FinancialV0.__name__): DocumentConfig(
233+
(OTS_OWNER, FinancialV1.__name__): DocumentConfig(
234234
document_type="financial_doc",
235-
document_class=FinancialV0,
235+
document_class=FinancialV1,
236236
endpoints=[
237237
StandardEndpoint(
238238
url_name="invoices", version="3", api_key=self.api_key
@@ -242,9 +242,9 @@ def _init_default_endpoints(self) -> None:
242242
),
243243
],
244244
),
245-
(OTS_OWNER, FinancialV1.__name__): DocumentConfig(
246-
document_type="financial_doc",
247-
document_class=FinancialV1,
245+
(OTS_OWNER, FinancialDocumentV1.__name__): DocumentConfig(
246+
document_type="financial_document",
247+
document_class=FinancialDocumentV1,
248248
endpoints=[
249249
StandardEndpoint(
250250
url_name="financial_document", version="1", api_key=self.api_key

‎mindee/documents/__init__.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2,9 +2,9 @@
22
from mindee.documents.cropper import CropperV1, TypeCropperV1
33
from mindee.documents.custom import CustomV1, TypeCustomV1
44
from mindee.documents.financial import (
5-
FinancialV0,
5+
FinancialDocumentV1,
66
FinancialV1,
7-
TypeFinancialV0,
7+
TypeFinancialDocumentV1,
88
TypeFinancialV1,
99
)
1010
from mindee.documents.invoice import InvoiceV3, InvoiceV4, TypeInvoiceV3, TypeInvoiceV4
Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,2 +1,2 @@
1-
from .financial_v0 import FinancialV0, TypeFinancialV0
1+
from .financial_document_v1 import FinancialDocumentV1, TypeFinancialDocumentV1
22
from .financial_v1 import FinancialV1, TypeFinancialV1
Lines changed: 191 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,191 @@
1+
from typing import List, Optional, TypeVar
2+
3+
from mindee.documents.base import Document, TypeApiPrediction, clean_out_string
4+
from mindee.documents.invoice.line_item import InvoiceLineItem
5+
from mindee.fields.amount import AmountField
6+
from mindee.fields.company_registration import CompanyRegistrationField
7+
from mindee.fields.date import DateField
8+
from mindee.fields.locale import LocaleField
9+
from mindee.fields.payment_details import PaymentDetails
10+
from mindee.fields.tax import TaxField
11+
from mindee.fields.text import TextField
12+
13+
14+
class FinancialDocumentV1(Document):
15+
locale: LocaleField
16+
"""locale information"""
17+
total_amount: AmountField
18+
"""Total including taxes"""
19+
total_net: AmountField
20+
"""Total excluding taxes"""
21+
date: DateField
22+
"""Date the invoice was issued"""
23+
invoice_number: TextField
24+
"""Invoice number"""
25+
reference_numbers: List[TextField]
26+
"""List of Reference numbers including PO number."""
27+
due_date: DateField
28+
"""Date the invoice is due"""
29+
taxes: List[TaxField] = []
30+
"""List of all taxes"""
31+
total_tax: AmountField
32+
"""Sum total of all taxes"""
33+
supplier_name: TextField
34+
"""Supplier 's name"""
35+
supplier_address: TextField
36+
"""Supplier's address"""
37+
supplier_company_registrations: List[CompanyRegistrationField]
38+
"""Company numbers"""
39+
customer_name: TextField
40+
"""Customer's name"""
41+
customer_address: TextField
42+
"""Customer's address"""
43+
customer_company_registrations: List[CompanyRegistrationField]
44+
"""Customer company registration numbers"""
45+
supplier_payment_details: List[PaymentDetails]
46+
"""Payment details"""
47+
line_items: List[InvoiceLineItem]
48+
"""Details of line items"""
49+
tip: AmountField
50+
"""Total amount of tip and gratuity."""
51+
time: TextField
52+
"""Time as seen on the receipt in HH:MM format."""
53+
document_type: TextField
54+
"""A classification field, among predefined classes."""
55+
category: TextField
56+
"""The invoice or receipt category among predefined classes."""
57+
subcategory: TextField
58+
"""The invoice or receipt sub-category among predefined classes."""
59+
60+
def __init__(
61+
self,
62+
api_prediction=None,
63+
input_source=None,
64+
page_n: Optional[int] = None,
65+
document_type="financial_doc",
66+
):
67+
"""
68+
Union of `Invoice` and `Receipt`.
69+
70+
:param api_prediction: Raw prediction from HTTP response
71+
:param input_source: Input object
72+
:param page_n: Page number for multi-page PDF input
73+
"""
74+
# need this for building from prediction
75+
self.input_file = input_source
76+
77+
super().__init__(
78+
input_source=input_source,
79+
document_type=document_type,
80+
api_prediction=api_prediction,
81+
page_n=page_n,
82+
)
83+
self._build_from_api_prediction(api_prediction["prediction"], page_n=page_n)
84+
85+
def _build_from_api_prediction(
86+
self, api_prediction: TypeApiPrediction, page_n: Optional[int] = None
87+
) -> None:
88+
"""
89+
Build the document from an API response JSON.
90+
91+
:param api_prediction: Raw prediction from HTTP response
92+
:param page_n: Page number for multi pages pdf input
93+
"""
94+
self.supplier_company_registrations = [
95+
CompanyRegistrationField(field_dict, page_n=page_n)
96+
for field_dict in api_prediction["supplier_company_registrations"]
97+
]
98+
self.date = DateField(api_prediction["date"], page_n=page_n)
99+
self.due_date = DateField(api_prediction["due_date"], page_n=page_n)
100+
self.invoice_number = TextField(api_prediction["invoice_number"], page_n=page_n)
101+
self.reference_numbers = [
102+
TextField(reference_number, page_n=page_n)
103+
for reference_number in api_prediction["reference_numbers"]
104+
]
105+
self.locale = LocaleField(
106+
api_prediction["locale"], value_key="language", page_n=page_n
107+
)
108+
self.supplier_name = TextField(api_prediction["supplier_name"], page_n=page_n)
109+
self.supplier_address = TextField(
110+
api_prediction["supplier_address"], page_n=page_n
111+
)
112+
self.customer_name = TextField(api_prediction["customer_name"], page_n=page_n)
113+
self.customer_company_registrations = [
114+
CompanyRegistrationField(field_dict, page_n=page_n)
115+
for field_dict in api_prediction["customer_company_registrations"]
116+
]
117+
self.customer_address = TextField(
118+
api_prediction["customer_address"], page_n=page_n
119+
)
120+
121+
self.taxes = [
122+
TaxField(tax_prediction, page_n=page_n, value_key="value")
123+
for tax_prediction in api_prediction["taxes"]
124+
]
125+
self.supplier_payment_details = [
126+
PaymentDetails(payment_detail, page_n=page_n)
127+
for payment_detail in api_prediction["supplier_payment_details"]
128+
]
129+
self.line_items = [
130+
InvoiceLineItem(prediction=line_item, page_n=page_n)
131+
for line_item in api_prediction["line_items"]
132+
]
133+
self.total_amount = AmountField(api_prediction["total_amount"], page_n=page_n)
134+
self.total_net = AmountField(api_prediction["total_net"], page_n=page_n)
135+
self.total_tax = AmountField(api_prediction["total_tax"], page_n=page_n)
136+
self.tip = AmountField(api_prediction["tip"], page_n=page_n)
137+
self.time = TextField(api_prediction["time"], page_n=page_n)
138+
self.document_type = TextField(api_prediction["document_type"], page_n=page_n)
139+
self.category = TextField(api_prediction["category"], page_n=page_n)
140+
self.subcategory = TextField(api_prediction["subcategory"], page_n=page_n)
141+
142+
def __str__(self) -> str:
143+
supplier_company_registrations = "; ".join(
144+
[str(n.value) for n in self.supplier_company_registrations]
145+
)
146+
customer_company_registrations = "; ".join(
147+
[str(n.value) for n in self.customer_company_registrations]
148+
)
149+
reference_numbers = ", ".join([str(n.value) for n in self.reference_numbers])
150+
payment_details = "\n ".join(
151+
[str(p) for p in self.supplier_payment_details]
152+
)
153+
taxes = "\n ".join(f"{t}" for t in self.taxes)
154+
line_items = "\n"
155+
if self.line_items:
156+
line_items = "\n Code | QTY | Price | Amount | Tax (Rate) | Description\n"
157+
for item in self.line_items:
158+
line_items += f" {item}\n"
159+
return clean_out_string(
160+
"----- Financial Document V1 -----\n"
161+
f"Filename: {self.filename or ''}\n"
162+
f"Document type: {self.document_type}\n"
163+
f"Category: {self.category}\n"
164+
f"Subcategory: {self.subcategory}\n"
165+
f"Locale: {self.locale}\n"
166+
f"Invoice number: {self.invoice_number}\n"
167+
f"Reference numbers: {reference_numbers}\n"
168+
f"Date: {self.date}\n"
169+
f"Due date: {self.due_date}\n"
170+
f"Time: {self.time}\n"
171+
f"Supplier name: {self.supplier_name}\n"
172+
f"Supplier address: {self.supplier_address}\n"
173+
f"Supplier company registrations: {supplier_company_registrations}\n"
174+
f"Supplier payment details: {payment_details}\n"
175+
f"Customer name: {self.customer_name}\n"
176+
f"Customer address: {self.customer_address}\n"
177+
f"Customer company registrations: {customer_company_registrations}\n"
178+
f"Tip: {self.tip}\n"
179+
f"Taxes: {taxes}\n"
180+
f"Total tax: {self.total_tax}\n"
181+
f"Total net: {self.total_net}\n"
182+
f"Total amount: {self.total_amount}\n"
183+
f"Line Items: {line_items}"
184+
"----------------------"
185+
)
186+
187+
def _checklist(self) -> None:
188+
pass
189+
190+
191+
TypeFinancialDocumentV1 = TypeVar("TypeFinancialDocumentV1", bound=FinancialDocumentV1)

‎mindee/documents/financial/financial_v0.py

Lines changed: 0 additions & 219 deletions
This file was deleted.
Lines changed: 143 additions & 113 deletions
Original file line numberDiff line numberDiff line change
@@ -1,61 +1,52 @@
11
from typing import List, Optional, TypeVar
22

33
from mindee.documents.base import Document, TypeApiPrediction, clean_out_string
4-
from mindee.documents.invoice.line_item import InvoiceLineItem
4+
from mindee.documents.invoice.invoice_v3 import InvoiceV3
5+
from mindee.documents.receipt.receipt_v3 import ReceiptV3
6+
from mindee.endpoints import Endpoint
57
from mindee.fields.amount import AmountField
68
from mindee.fields.company_registration import CompanyRegistrationField
79
from mindee.fields.date import DateField
810
from mindee.fields.locale import LocaleField
911
from mindee.fields.payment_details import PaymentDetails
1012
from mindee.fields.tax import TaxField
1113
from mindee.fields.text import TextField
14+
from mindee.input.sources import InputSource
1215

1316

1417
class FinancialV1(Document):
1518
locale: LocaleField
1619
"""locale information"""
17-
total_amount: AmountField
20+
total_incl: AmountField
1821
"""Total including taxes"""
19-
total_net: AmountField
22+
total_excl: AmountField
2023
"""Total excluding taxes"""
2124
date: DateField
22-
"""Date the invoice was issued"""
25+
"""Date the document was issued"""
26+
time: TextField
27+
"""Time the document was issued"""
2328
invoice_number: TextField
2429
"""Invoice number"""
25-
reference_numbers: List[TextField]
26-
"""List of Reference numbers including PO number."""
2730
due_date: DateField
2831
"""Date the invoice is due"""
29-
taxes: List[TaxField] = []
32+
taxes: List[TaxField]
3033
"""List of all taxes"""
31-
total_tax: AmountField
32-
"""Sum total of all taxes"""
33-
supplier_name: TextField
34-
"""Supplier 's name"""
34+
merchant_name: TextField
35+
"""Merchant/Supplier's name"""
3536
supplier_address: TextField
36-
"""Supplier's address"""
37-
supplier_company_registrations: List[CompanyRegistrationField]
38-
"""Company numbers"""
37+
"""Merchant/Supplier's address"""
3938
customer_name: TextField
4039
"""Customer's name"""
4140
customer_address: TextField
4241
"""Customer's address"""
43-
customer_company_registrations: List[CompanyRegistrationField]
42+
customer_company_registration: List[CompanyRegistrationField]
4443
"""Customer company registration numbers"""
45-
supplier_payment_details: List[PaymentDetails]
44+
payment_details: List[PaymentDetails]
4645
"""Payment details"""
47-
line_items: List[InvoiceLineItem]
48-
"""Details of line items"""
49-
tip: AmountField
50-
"""Total amount of tip and gratuity."""
51-
time: TextField
52-
"""Time as seen on the receipt in HH:MM format."""
53-
document_type: TextField
54-
"""A classification field, among predefined classes."""
55-
category: TextField
56-
"""The invoice or receipt category among predefined classes."""
57-
subcategory: TextField
58-
"""The invoice or receipt sub-category among predefined classes."""
46+
company_number: List[CompanyRegistrationField]
47+
"""Company numbers"""
48+
total_tax: AmountField
49+
"""Sum total of all taxes"""
5950

6051
def __init__(
6152
self,
@@ -67,6 +58,8 @@ def __init__(
6758
"""
6859
Union of `Invoice` and `Receipt`.
6960
61+
DEPRECATED, do not use.
62+
7063
:param api_prediction: Raw prediction from HTTP response
7164
:param input_source: Input object
7265
:param page_n: Page number for multi-page PDF input
@@ -80,7 +73,8 @@ def __init__(
8073
api_prediction=api_prediction,
8174
page_n=page_n,
8275
)
83-
self._build_from_api_prediction(api_prediction["prediction"], page_n=page_n)
76+
self._build_from_api_prediction(api_prediction, page_n=page_n)
77+
self._checklist()
8478

8579
def _build_from_api_prediction(
8680
self, api_prediction: TypeApiPrediction, page_n: Optional[int] = None
@@ -91,101 +85,137 @@ def _build_from_api_prediction(
9185
:param api_prediction: Raw prediction from HTTP response
9286
:param page_n: Page number for multi pages pdf input
9387
"""
94-
self.supplier_company_registrations = [
95-
CompanyRegistrationField(field_dict, page_n=page_n)
96-
for field_dict in api_prediction["supplier_company_registrations"]
97-
]
98-
self.date = DateField(api_prediction["date"], page_n=page_n)
99-
self.due_date = DateField(api_prediction["due_date"], page_n=page_n)
100-
self.invoice_number = TextField(api_prediction["invoice_number"], page_n=page_n)
101-
self.reference_numbers = [
102-
TextField(reference_number, page_n=page_n)
103-
for reference_number in api_prediction["reference_numbers"]
104-
]
105-
self.locale = LocaleField(
106-
api_prediction["locale"], value_key="language", page_n=page_n
107-
)
108-
self.supplier_name = TextField(api_prediction["supplier_name"], page_n=page_n)
109-
self.supplier_address = TextField(
110-
api_prediction["supplier_address"], page_n=page_n
111-
)
112-
self.customer_name = TextField(api_prediction["customer_name"], page_n=page_n)
113-
self.customer_company_registrations = [
114-
CompanyRegistrationField(field_dict, page_n=page_n)
115-
for field_dict in api_prediction["customer_company_registrations"]
116-
]
117-
self.customer_address = TextField(
118-
api_prediction["customer_address"], page_n=page_n
119-
)
120-
121-
self.taxes = [
122-
TaxField(tax_prediction, page_n=page_n, value_key="value")
123-
for tax_prediction in api_prediction["taxes"]
124-
]
125-
self.supplier_payment_details = [
126-
PaymentDetails(payment_detail, page_n=page_n)
127-
for payment_detail in api_prediction["supplier_payment_details"]
128-
]
129-
self.line_items = [
130-
InvoiceLineItem(prediction=line_item, page_n=page_n)
131-
for line_item in api_prediction["line_items"]
132-
]
133-
self.total_amount = AmountField(api_prediction["total_amount"], page_n=page_n)
134-
self.total_net = AmountField(api_prediction["total_net"], page_n=page_n)
135-
self.total_tax = AmountField(api_prediction["total_tax"], page_n=page_n)
136-
self.tip = AmountField(api_prediction["tip"], page_n=page_n)
137-
self.time = TextField(api_prediction["time"], page_n=page_n)
138-
self.document_type = TextField(api_prediction["document_type"], page_n=page_n)
139-
self.category = TextField(api_prediction["category"], page_n=page_n)
140-
self.subcategory = TextField(api_prediction["subcategory"], page_n=page_n)
88+
if "invoice_number" in api_prediction["prediction"].keys():
89+
invoice = InvoiceV3(api_prediction, self.input_file, page_n=page_n)
90+
self.locale = invoice.locale
91+
self.total_incl = invoice.total_incl
92+
self.total_excl = invoice.total_excl
93+
self.date = invoice.invoice_date
94+
self.invoice_number = invoice.invoice_number
95+
self.due_date = invoice.due_date
96+
self.taxes = invoice.taxes
97+
self.merchant_name = invoice.supplier
98+
self.payment_details = invoice.payment_details
99+
self.company_number = invoice.company_number
100+
self.orientation = invoice.orientation
101+
self.total_tax = invoice.total_tax
102+
self.time = TextField({"value": None, "confidence": 0.0})
103+
self.supplier_address = invoice.supplier_address
104+
self.customer_name = invoice.customer_name
105+
self.customer_company_registration = invoice.customer_company_registration
106+
self.customer_address = invoice.customer_address
107+
else:
108+
receipt = ReceiptV3(api_prediction, self.input_file, page_n=page_n)
109+
self.orientation = receipt.orientation
110+
self.date = receipt.date
111+
self.due_date = receipt.date
112+
self.taxes = receipt.taxes
113+
self.locale = receipt.locale
114+
self.total_incl = receipt.total_incl
115+
self.total_excl = receipt.total_excl
116+
self.merchant_name = receipt.merchant_name
117+
self.time = receipt.time
118+
self.total_tax = receipt.total_tax
119+
self.customer_company_registration = []
120+
self.company_number = []
121+
self.payment_details = []
122+
self.invoice_number = TextField({"value": None, "confidence": 0.0})
123+
self.supplier_address = TextField({"value": None, "confidence": 0.0})
124+
self.customer_name = TextField({"value": None, "confidence": 0.0})
125+
self.customer_address = TextField({"value": None, "confidence": 0.0})
141126

142127
def __str__(self) -> str:
143-
supplier_company_registrations = "; ".join(
144-
[str(n.value) for n in self.supplier_company_registrations]
145-
)
146-
customer_company_registrations = "; ".join(
147-
[str(n.value) for n in self.customer_company_registrations]
148-
)
149-
reference_numbers = ", ".join([str(n.value) for n in self.reference_numbers])
150-
payment_details = "\n ".join(
151-
[str(p) for p in self.supplier_payment_details]
152-
)
153-
taxes = "\n ".join(f"{t}" for t in self.taxes)
154-
line_items = "\n"
155-
if self.line_items:
156-
line_items = "\n Code | QTY | Price | Amount | Tax (Rate) | Description\n"
157-
for item in self.line_items:
158-
line_items += f" {item}\n"
159128
return clean_out_string(
160-
"----- Financial Document V1 -----\n"
129+
"-----Financial Document data-----\n"
161130
f"Filename: {self.filename or ''}\n"
162-
f"Document type: {self.document_type}\n"
163-
f"Category: {self.category}\n"
164-
f"Subcategory: {self.subcategory}\n"
165-
f"Locale: {self.locale}\n"
166-
f"Invoice number: {self.invoice_number}\n"
167-
f"Reference numbers: {reference_numbers}\n"
168-
f"Date: {self.date}\n"
169-
f"Due date: {self.due_date}\n"
170-
f"Time: {self.time}\n"
171-
f"Supplier name: {self.supplier_name}\n"
131+
f"Invoice number: {self.invoice_number.value}\n"
132+
f"Total amount including taxes: {self.total_incl.value}\n"
133+
f"Total amount excluding taxes: {self.total_excl.value}\n"
134+
"Date: %s\n"
135+
"Invoice due date: %s\n"
136+
"Supplier name: %s\n"
172137
f"Supplier address: {self.supplier_address}\n"
173-
f"Supplier company registrations: {supplier_company_registrations}\n"
174-
f"Supplier payment details: {payment_details}\n"
175138
f"Customer name: {self.customer_name}\n"
139+
f"Customer company registration: {self.customer_company_registration}\n"
176140
f"Customer address: {self.customer_address}\n"
177-
f"Customer company registrations: {customer_company_registrations}\n"
178-
f"Tip: {self.tip}\n"
179-
f"Taxes: {taxes}\n"
180-
f"Total tax: {self.total_tax}\n"
181-
f"Total net: {self.total_net}\n"
182-
f"Total amount: {self.total_amount}\n"
183-
f"Line Items: {line_items}"
141+
"Taxes: %s\n"
142+
"Total taxes: %s\n"
184143
"----------------------"
144+
% (
145+
self.date.value,
146+
self.due_date.value,
147+
self.merchant_name.value,
148+
",".join([str(t.value) + " " + str(t.rate) + "%" for t in self.taxes]),
149+
self.total_tax.value,
150+
)
151+
)
152+
153+
@staticmethod
154+
def request(
155+
endpoints: List[Endpoint],
156+
input_source: InputSource,
157+
include_words: bool = False,
158+
close_file: bool = True,
159+
cropper: bool = False,
160+
):
161+
"""
162+
Make request to prediction endpoint.
163+
164+
:param input_source: Input object
165+
:param endpoints: Endpoints config
166+
:param include_words: Include Mindee vision words in http_response
167+
:param close_file: Whether to `close()` the file after parsing it.
168+
:param cropper: Including Mindee cropper results.
169+
"""
170+
if "pdf" in input_source.file_mimetype:
171+
# invoices is index 0, receipts 1 (this should be cleaned up)
172+
index = 0
173+
else:
174+
index = 1
175+
return endpoints[index].predict_req_post(
176+
input_source, include_words, close_file, cropper=cropper
185177
)
186178

187179
def _checklist(self) -> None:
188-
pass
180+
"""Set the validation rules."""
181+
self.checklist = {"taxes_match_total_incl": self.__taxes_match_total_incl()}
182+
183+
# Checks
184+
def __taxes_match_total_incl(self) -> bool:
185+
"""
186+
Check invoice rule of matching between taxes and total_incl.
187+
188+
:return: True if rule matches, False otherwise
189+
"""
190+
# Check taxes and total_incl exist
191+
if len(self.taxes) == 0 or self.total_incl.value is None:
192+
return False
193+
194+
# Reconstruct total_incl from taxes
195+
total_vat = 0.0
196+
reconstructed_total = 0.0
197+
for tax in self.taxes:
198+
if tax.rate is not None and tax.rate != 0 and tax.value is not None:
199+
total_vat += tax.value
200+
reconstructed_total += tax.value + 100 * tax.value / tax.rate
201+
202+
# Sanity check
203+
if total_vat <= 0:
204+
return False
205+
206+
# Crate epsilon
207+
eps = 1 / (100 * total_vat)
208+
if (
209+
self.total_incl.value * (1 - eps) - 0.02
210+
<= reconstructed_total
211+
<= self.total_incl.value * (1 + eps) + 0.02
212+
):
213+
for tax in self.taxes:
214+
tax.confidence = 1.0
215+
self.total_tax.confidence = 1.0
216+
self.total_incl.confidence = 1.0
217+
return True
218+
return False
189219

190220

191221
TypeFinancialV1 = TypeVar("TypeFinancialV1", bound=FinancialV1)
Lines changed: 73 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,73 @@
1+
import json
2+
3+
import pytest
4+
5+
from mindee.documents.financial.financial_document_v1 import FinancialDocumentV1
6+
from tests import FINANCIAL_DOC_DATA_DIR
7+
8+
FILE_PATH_FINANCIAL_DOC_V1_INVOICE = (
9+
f"{FINANCIAL_DOC_DATA_DIR}/response_v1/complete_invoice.json"
10+
)
11+
FILE_PATH_FINANCIAL_DOC_V1_RECEIPT = (
12+
f"{FINANCIAL_DOC_DATA_DIR}/response_v1/complete_receipt.json"
13+
)
14+
FILE_PATH_FINANCIAL_DOC_V1_EMPTY = f"{FINANCIAL_DOC_DATA_DIR}/response_v1/empty.json"
15+
16+
17+
@pytest.fixture
18+
def financial_doc_from_invoice_object():
19+
json_data = json.load(open(FILE_PATH_FINANCIAL_DOC_V1_INVOICE))
20+
return FinancialDocumentV1(
21+
api_prediction=json_data["document"]["inference"], page_n=None
22+
)
23+
24+
25+
@pytest.fixture
26+
def financial_doc_from_receipt_object():
27+
json_data = json.load(open(FILE_PATH_FINANCIAL_DOC_V1_RECEIPT))
28+
return FinancialDocumentV1(
29+
api_prediction=json_data["document"]["inference"], page_n=None
30+
)
31+
32+
33+
@pytest.fixture
34+
def financial_doc_object_all_na():
35+
json_data = json.load(open(FILE_PATH_FINANCIAL_DOC_V1_EMPTY))
36+
return FinancialDocumentV1(
37+
api_prediction=json_data["document"]["inference"]["pages"][0]
38+
)
39+
40+
41+
def test_doc_constructor_invoice(financial_doc_from_invoice_object):
42+
assert financial_doc_from_invoice_object.date.value == "2019-02-11"
43+
assert (
44+
financial_doc_from_invoice_object.supplier_address.value
45+
== "4490 Oak Drive Albany, NY 12210"
46+
)
47+
doc_str = (
48+
open(f"{FINANCIAL_DOC_DATA_DIR}/response_v1/invoice_to_string.txt")
49+
.read()
50+
.strip()
51+
)
52+
assert str(financial_doc_from_invoice_object) == doc_str
53+
54+
55+
def test_doc_constructor_receipt(financial_doc_from_receipt_object):
56+
assert financial_doc_from_receipt_object.date.value == "2014-07-07"
57+
assert financial_doc_from_receipt_object.supplier_address.value is None
58+
doc_str = (
59+
open(f"{FINANCIAL_DOC_DATA_DIR}/response_v1/receipt_to_string.txt")
60+
.read()
61+
.strip()
62+
)
63+
assert str(financial_doc_from_receipt_object) == doc_str
64+
65+
66+
def test_all_na(financial_doc_object_all_na):
67+
assert financial_doc_object_all_na.orientation is None
68+
assert financial_doc_object_all_na.locale.value is None
69+
assert financial_doc_object_all_na.total_amount.value is None
70+
assert financial_doc_object_all_na.date.value is None
71+
assert financial_doc_object_all_na.supplier_name.value is None
72+
assert financial_doc_object_all_na.total_tax.value is None
73+
assert len(financial_doc_object_all_na.taxes) == 0

‎tests/documents/test_financial_v0.py

Lines changed: 0 additions & 285 deletions
This file was deleted.

‎tests/documents/test_financial_v1.py

Lines changed: 254 additions & 36 deletions
Original file line numberDiff line numberDiff line change
@@ -3,65 +3,283 @@
33
import pytest
44

55
from mindee.documents.financial.financial_v1 import FinancialV1
6-
from tests import FINANCIAL_DOC_DATA_DIR
7-
8-
FILE_PATH_FINANCIAL_DOC_V1_INVOICE = (
9-
f"{FINANCIAL_DOC_DATA_DIR}/response_v1/complete_invoice.json"
6+
from tests.documents.test_invoice_v3 import (
7+
FILE_PATH_INVOICE_V3_COMPLETE,
8+
FILE_PATH_INVOICE_V3_EMPTY,
109
)
11-
FILE_PATH_FINANCIAL_DOC_V1_RECEIPT = (
12-
f"{FINANCIAL_DOC_DATA_DIR}/response_v1/complete_receipt.json"
10+
from tests.documents.test_receipt_v3 import (
11+
FILE_PATH_RECEIPT_V3_COMPLETE,
12+
FILE_PATH_RECEIPT_V3_EMPTY,
1313
)
14-
FILE_PATH_FINANCIAL_DOC_V1_EMPTY = f"{FINANCIAL_DOC_DATA_DIR}/response_v1/empty.json"
1514

1615

1716
@pytest.fixture
1817
def financial_doc_from_invoice_object():
19-
json_data = json.load(open(FILE_PATH_FINANCIAL_DOC_V1_INVOICE))
18+
json_data = json.load(open(FILE_PATH_INVOICE_V3_COMPLETE))
2019
return FinancialV1(api_prediction=json_data["document"]["inference"], page_n=None)
2120

2221

2322
@pytest.fixture
2423
def financial_doc_from_receipt_object():
25-
json_data = json.load(open(FILE_PATH_FINANCIAL_DOC_V1_RECEIPT))
24+
json_data = json.load(open(FILE_PATH_RECEIPT_V3_COMPLETE))
2625
return FinancialV1(api_prediction=json_data["document"]["inference"], page_n=None)
2726

2827

2928
@pytest.fixture
30-
def financial_doc_object_all_na():
31-
json_data = json.load(open(FILE_PATH_FINANCIAL_DOC_V1_EMPTY))
29+
def financial_doc_from_receipt_object_all_na():
30+
json_data = json.load(open(FILE_PATH_RECEIPT_V3_EMPTY))
3231
return FinancialV1(api_prediction=json_data["document"]["inference"]["pages"][0])
3332

3433

35-
def test_doc_constructor_invoice(financial_doc_from_invoice_object):
36-
assert financial_doc_from_invoice_object.date.value == "2019-02-11"
34+
@pytest.fixture
35+
def financial_doc_from_invoice_object_all_na():
36+
json_data = json.load(open(FILE_PATH_INVOICE_V3_EMPTY))
37+
return FinancialV1(api_prediction=json_data["document"]["inference"]["pages"][0])
38+
39+
40+
@pytest.fixture
41+
def receipt_pred():
42+
return json.load(open(FILE_PATH_RECEIPT_V3_EMPTY))["document"]["inference"][
43+
"pages"
44+
][0]
45+
46+
47+
@pytest.fixture
48+
def invoice_pred():
49+
return json.load(open(FILE_PATH_INVOICE_V3_EMPTY))["document"]["inference"][
50+
"pages"
51+
][0]
52+
53+
54+
def test_constructor_1(financial_doc_from_invoice_object):
55+
assert financial_doc_from_invoice_object.date.value == "2020-02-17"
3756
assert (
3857
financial_doc_from_invoice_object.supplier_address.value
39-
== "4490 Oak Drive Albany, NY 12210"
58+
== "156 University Ave, Toronto ON, Canada M5H 2H7"
4059
)
41-
doc_str = (
42-
open(f"{FINANCIAL_DOC_DATA_DIR}/response_v1/invoice_to_string.txt")
43-
.read()
44-
.strip()
45-
)
46-
assert str(financial_doc_from_invoice_object) == doc_str
4760

4861

49-
def test_doc_constructor_receipt(financial_doc_from_receipt_object):
50-
assert financial_doc_from_receipt_object.date.value == "2014-07-07"
62+
def test_constructor_2(financial_doc_from_receipt_object):
63+
assert financial_doc_from_receipt_object.date.value == "2016-02-26"
5164
assert financial_doc_from_receipt_object.supplier_address.value is None
52-
doc_str = (
53-
open(f"{FINANCIAL_DOC_DATA_DIR}/response_v1/receipt_to_string.txt")
54-
.read()
55-
.strip()
56-
)
57-
assert str(financial_doc_from_receipt_object) == doc_str
5865

5966

60-
def test_all_na(financial_doc_object_all_na):
61-
assert financial_doc_object_all_na.orientation is None
62-
assert financial_doc_object_all_na.locale.value is None
63-
assert financial_doc_object_all_na.total_amount.value is None
64-
assert financial_doc_object_all_na.date.value is None
65-
assert financial_doc_object_all_na.supplier_name.value is None
66-
assert financial_doc_object_all_na.total_tax.value is None
67-
assert len(financial_doc_object_all_na.taxes) == 0
67+
def test_all_na_receipt(financial_doc_from_receipt_object_all_na):
68+
assert financial_doc_from_receipt_object_all_na.orientation is None
69+
assert financial_doc_from_receipt_object_all_na.locale.value is None
70+
assert financial_doc_from_receipt_object_all_na.total_incl.value is None
71+
assert financial_doc_from_receipt_object_all_na.date.value is None
72+
assert financial_doc_from_receipt_object_all_na.merchant_name.value is None
73+
assert financial_doc_from_receipt_object_all_na.total_tax.value is None
74+
assert len(financial_doc_from_receipt_object_all_na.taxes) == 0
75+
76+
77+
def test_all_na_invoice(financial_doc_from_invoice_object_all_na):
78+
assert financial_doc_from_invoice_object_all_na.orientation is None
79+
assert financial_doc_from_invoice_object_all_na.locale.value is None
80+
assert financial_doc_from_invoice_object_all_na.total_incl.value is None
81+
assert financial_doc_from_invoice_object_all_na.date.value is None
82+
assert financial_doc_from_invoice_object_all_na.merchant_name.value is None
83+
assert financial_doc_from_invoice_object_all_na.total_tax.value is None
84+
assert len(financial_doc_from_invoice_object_all_na.taxes) == 0
85+
86+
87+
def test__str__invoice(financial_doc_from_invoice_object):
88+
assert type(financial_doc_from_invoice_object.__str__()) == str
89+
90+
91+
def test__str__receipt(financial_doc_from_receipt_object):
92+
assert type(financial_doc_from_receipt_object.__str__()) == str
93+
94+
95+
# Business tests from receipt
96+
def test__receipt_reconstruct_total_excl_from_total_and_taxes_1(receipt_pred):
97+
# no incl implies no reconstruct for total excl
98+
receipt_pred["prediction"]["total_incl"] = {"value": "N/A", "confidence": 0.0}
99+
receipt_pred["prediction"]["taxes"] = [
100+
{"rate": 20, "value": 9.5, "confidence": 0.9}
101+
]
102+
financial_doc = FinancialV1(receipt_pred)
103+
assert financial_doc.total_excl.value is None
104+
105+
106+
def test__receipt_reconstruct_total_excl_from_total_and_taxes_2(receipt_pred):
107+
# no taxes implies no reconstruct for total excl
108+
receipt_pred["prediction"]["total_incl"] = {"value": 12.54, "confidence": 0.0}
109+
receipt_pred["prediction"]["taxes"] = []
110+
financial_doc = FinancialV1(receipt_pred)
111+
assert financial_doc.total_excl.value is None
112+
113+
114+
def test__receipt_reconstruct_total_excl_from_total_and_taxes_3(receipt_pred):
115+
# working example
116+
receipt_pred["prediction"]["total_incl"] = {"value": 12.54, "confidence": 0.5}
117+
receipt_pred["prediction"]["taxes"] = [
118+
{"rate": 20, "value": 0.5, "confidence": 0.1},
119+
{"rate": 10, "value": 4.25, "confidence": 0.6},
120+
]
121+
financial_doc = FinancialV1(receipt_pred)
122+
assert financial_doc.total_excl.confidence == 0.03
123+
assert financial_doc.total_excl.value == 7.79
124+
125+
126+
def test__receipt_reconstruct_total_tax_1(receipt_pred):
127+
# no taxes implies no reconstruct for total tax
128+
receipt_pred["prediction"]["taxes"] = []
129+
financial_doc = FinancialV1(receipt_pred)
130+
assert financial_doc.total_tax.value is None
131+
132+
133+
def test__receipt_reconstruct_total_tax_2(receipt_pred):
134+
# working example
135+
receipt_pred["prediction"]["taxes"] = [
136+
{"rate": 20, "value": 10.2, "confidence": 0.5},
137+
{"rate": 10, "value": 40.0, "confidence": 0.1},
138+
]
139+
financial_doc = FinancialV1(receipt_pred)
140+
assert financial_doc.total_tax.value == 50.2
141+
assert financial_doc.total_tax.confidence == 0.05
142+
143+
144+
def test__receipt_taxes_match_total_incl_1(receipt_pred):
145+
# matching example
146+
receipt_pred["prediction"]["total_incl"] = {"value": 507.25, "confidence": 0.6}
147+
receipt_pred["prediction"]["taxes"] = [
148+
{"rate": 20, "value": 10.99, "confidence": 0.5},
149+
{"rate": 10, "value": 40.12, "confidence": 0.1},
150+
]
151+
financial_doc = FinancialV1(receipt_pred)
152+
assert financial_doc.checklist["taxes_match_total_incl"] is True
153+
assert financial_doc.total_incl.confidence == 1.0
154+
for tax in financial_doc.taxes:
155+
assert tax.confidence == 1.0
156+
157+
158+
def test__receipt_taxes_match_total_incl_2(receipt_pred):
159+
# not matching example with close error
160+
receipt_pred["prediction"]["total_incl"] = {"value": 507.25, "confidence": 0.6}
161+
receipt_pred["prediction"]["taxes"] = [
162+
{"rate": 20, "value": 10.9, "confidence": 0.5},
163+
{"rate": 10, "value": 40.12, "confidence": 0.1},
164+
]
165+
financial_doc = FinancialV1(receipt_pred)
166+
assert financial_doc.checklist["taxes_match_total_incl"] is False
167+
168+
169+
def test__receipt_taxes_match_total_incl_3(receipt_pred):
170+
# sanity check with null tax
171+
receipt_pred["prediction"]["total_incl"] = {"value": 507.25, "confidence": 0.6}
172+
receipt_pred["prediction"]["taxes"] = [
173+
{"rate": 20, "value": 0.0, "confidence": 0.5}
174+
]
175+
financial_doc = FinancialV1(receipt_pred)
176+
assert financial_doc.checklist["taxes_match_total_incl"] is False
177+
178+
179+
# Business tests from invoice
180+
def test__invoice_reconstruct_total_excl_from_total_and_taxes_1(invoice_pred):
181+
# no incl implies no reconstruct for total excl
182+
invoice_pred["prediction"]["total_incl"] = {"amount": "N/A", "confidence": 0.0}
183+
invoice_pred["prediction"]["taxes"] = [
184+
{"rate": 20, "amount": 9.5, "confidence": 0.9}
185+
]
186+
financial_doc = FinancialV1(invoice_pred)
187+
assert financial_doc.total_excl.value is None
188+
189+
190+
def test__invoice_reconstruct_total_excl_from_total_and_taxes_2(invoice_pred):
191+
# no taxes implies no reconstruct for total excl
192+
invoice_pred["prediction"]["total_incl"] = {"amount": 12.54, "confidence": 0.0}
193+
invoice_pred["prediction"]["taxes"] = []
194+
financial_doc = FinancialV1(invoice_pred)
195+
assert financial_doc.total_excl.value is None
196+
197+
198+
def test__invoice_reconstruct_total_excl_from_total_and_taxes_3(invoice_pred):
199+
# working example
200+
invoice_pred["prediction"]["total_incl"] = {"value": 12.54, "confidence": 0.5}
201+
invoice_pred["prediction"]["taxes"] = [
202+
{"rate": 20, "value": 0.5, "confidence": 0.1},
203+
{"rate": 10, "value": 4.25, "confidence": 0.6},
204+
]
205+
financial_doc = FinancialV1(invoice_pred)
206+
assert financial_doc.total_excl.confidence == 0.03
207+
assert financial_doc.total_excl.value == 7.79
208+
209+
210+
def test__invoice_reconstruct_total_tax_1(invoice_pred):
211+
# no taxes implies no reconstruct for total tax
212+
invoice_pred["prediction"]["taxes"] = []
213+
financial_doc = FinancialV1(invoice_pred)
214+
assert financial_doc.total_tax.value is None
215+
216+
217+
def test__invoice_reconstruct_total_tax_2(invoice_pred):
218+
# working example
219+
invoice_pred["prediction"]["taxes"] = [
220+
{"rate": 20, "value": 10.2, "confidence": 0.5},
221+
{"rate": 10, "value": 40.0, "confidence": 0.1},
222+
]
223+
financial_doc = FinancialV1(invoice_pred)
224+
assert financial_doc.total_tax.value == 50.2
225+
assert financial_doc.total_tax.confidence == 0.05
226+
227+
228+
def test__invoice_taxes_match_total_incl_1(invoice_pred):
229+
# matching example
230+
invoice_pred["prediction"]["total_incl"] = {"value": 507.25, "confidence": 0.6}
231+
invoice_pred["prediction"]["taxes"] = [
232+
{"rate": 20, "value": 10.99, "confidence": 0.5},
233+
{"rate": 10, "value": 40.12, "confidence": 0.1},
234+
]
235+
financial_doc = FinancialV1(invoice_pred)
236+
assert financial_doc.checklist["taxes_match_total_incl"] is True
237+
assert financial_doc.total_incl.confidence == 1.0
238+
for tax in financial_doc.taxes:
239+
assert tax.confidence == 1.0
240+
241+
242+
def test__invoice_taxes_match_total_incl_2(invoice_pred):
243+
# not matching example with close error
244+
invoice_pred["prediction"]["total_incl"] = {"value": 507.25, "confidence": 0.6}
245+
invoice_pred["prediction"]["taxes"] = [
246+
{"rate": 20, "value": 10.9, "confidence": 0.5},
247+
{"rate": 10, "value": 40.12, "confidence": 0.1},
248+
]
249+
financial_doc = FinancialV1(invoice_pred)
250+
assert financial_doc.checklist["taxes_match_total_incl"] is False
251+
252+
253+
def test__invoice_taxes_match_total_incl_3(invoice_pred):
254+
# sanity check with null tax
255+
invoice_pred["prediction"]["total_incl"] = {"value": 507.25, "confidence": 0.6}
256+
invoice_pred["prediction"]["taxes"] = [
257+
{"rate": 20, "value": 0.0, "confidence": 0.5}
258+
]
259+
financial_doc = FinancialV1(invoice_pred)
260+
assert financial_doc.checklist["taxes_match_total_incl"] is False
261+
262+
263+
def test__shouldnt_raise_when_tax_rate_none(invoice_pred):
264+
# sanity check with null tax
265+
invoice_pred["prediction"]["total_incl"] = {"value": 507.25, "confidence": 0.6}
266+
invoice_pred["prediction"]["taxes"] = [
267+
{"rate": "N/A", "value": 0.0, "confidence": 0.5}
268+
]
269+
financial_doc = FinancialV1(invoice_pred)
270+
assert financial_doc.checklist["taxes_match_total_incl"] is False
271+
272+
273+
def test_invoice_or_receipt_get_same_field_types(receipt_pred, invoice_pred):
274+
financial_doc_from_receipt = FinancialV1(receipt_pred)
275+
financial_doc_from_invoice = FinancialV1(invoice_pred)
276+
assert set(dir(financial_doc_from_invoice)) == set(dir(financial_doc_from_receipt))
277+
for key in dir(financial_doc_from_receipt):
278+
if key.startswith("_"):
279+
continue
280+
receipt_attr = getattr(financial_doc_from_receipt, key)
281+
invoice_attr = getattr(financial_doc_from_invoice, key)
282+
print(key)
283+
assert isinstance(
284+
receipt_attr, type(invoice_attr)
285+
), f"Types do not match for: {key}"

‎tests/test_cli.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -67,7 +67,7 @@ def test_cli_receipt(ots_doc):
6767

6868

6969
def test_cli_financial_doc(ots_doc):
70-
ots_doc.product_name = "financial"
70+
ots_doc.product_name = "financial-document"
7171
ots_doc.api_key = ""
7272
with pytest.raises(RuntimeError):
7373
call_endpoint(ots_doc)

‎tests/test_response.py

Lines changed: 8 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -4,15 +4,17 @@
44

55
from mindee import Client
66
from mindee.documents.base import Document
7-
from mindee.documents.financial.financial_v1 import FinancialV1
7+
from mindee.documents.financial.financial_document_v1 import FinancialDocumentV1
88
from mindee.documents.invoice.invoice_v3 import InvoiceV3
99
from mindee.documents.passport.passport_v1 import PassportV1
1010
from mindee.documents.receipt.receipt_v3 import ReceiptV3
1111
from mindee.documents.receipt.receipt_v4 import ReceiptV4
1212
from mindee.endpoints import OTS_OWNER
1313
from mindee.input.sources import PathInput
1414
from mindee.response import PredictResponse
15-
from tests.documents.test_financial_v1 import FILE_PATH_FINANCIAL_DOC_V1_RECEIPT
15+
from tests.documents.test_financial_document_v1 import (
16+
FILE_PATH_FINANCIAL_DOC_V1_RECEIPT,
17+
)
1618
from tests.documents.test_invoice_v3 import FILE_PATH_INVOICE_V3_COMPLETE
1719
from tests.documents.test_passport_v1 import FILE_PATH_PASSPORT_V1_COMPLETE
1820
from tests.documents.test_receipt_v3 import FILE_PATH_RECEIPT_V3_COMPLETE
@@ -80,15 +82,15 @@ def test_response_receipt_v4(dummy_file_input, dummy_config):
8082

8183
def test_response_financial_doc_with_receipt(dummy_file_input, dummy_config):
8284
response = json.load(open(FILE_PATH_FINANCIAL_DOC_V1_RECEIPT))
83-
parsed_response = PredictResponse[FinancialV1](
84-
doc_config=dummy_config[(OTS_OWNER, FinancialV1.__name__)],
85+
parsed_response = PredictResponse[FinancialDocumentV1](
86+
doc_config=dummy_config[(OTS_OWNER, FinancialDocumentV1.__name__)],
8587
http_response=response,
8688
input_source=dummy_file_input,
8789
response_ok=True,
8890
)
89-
assert isinstance(parsed_response.document, FinancialV1)
91+
assert isinstance(parsed_response.document, FinancialDocumentV1)
9092
for page in parsed_response.pages:
91-
assert isinstance(page, FinancialV1)
93+
assert isinstance(page, FinancialDocumentV1)
9294

9395

9496
def test_response_passport_v1(dummy_file_input, dummy_config):

0 commit comments

Comments
 (0)
Please sign in to comment.