Skip to content

Commit

Permalink
Invoice2.1 (#8)
Browse files Browse the repository at this point in the history
* chg 🔖 prepare V1.2.0

* chg: ✨ updated SDK to new Mindee API

* chg: ✅ Updated tests

* chg: ➖ Deleted Numpy dependency

* chg: 🙈 added DS_Store to gitignore

* chg: 🙈 deleted DS_Store
  • Loading branch information
jonathanMindee authored Aug 25, 2021
1 parent 110f397 commit 45ebd61
Show file tree
Hide file tree
Showing 30 changed files with 1,865 additions and 999 deletions.
3 changes: 3 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -128,3 +128,6 @@ dmypy.json
# Pyre type checker
.pyre/
/data/

# Mac OS
.DS_Store
9 changes: 9 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,14 @@
# Mindee python SDK

## v1.2.0 (2020-08-25)

### Chg

* :sparkles: Adapted SDK to the new Mindee API endpoint
* :zap: Single page object reconstruction is now server-side
* :heavy_minus_sign: Removed Numpy dependency
* :white_check_mark: Updated tests with new data

## v1.1.3 (2020-02-21)

### Fix
Expand Down
78 changes: 31 additions & 47 deletions mindee/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,14 @@
from mindee.documents.passport import Passport
from mindee.benchmark import Benchmark

DOCUMENT_CLASSES = {
"receipt": Receipt,
"invoice": Invoice,
"financial_document": FinancialDocument,
"passport": Passport,
"license_plate": CarPlate
}


class Client(object):
def __init__(
Expand All @@ -29,7 +37,7 @@ def __init__(
"""
assert type(raise_on_error) == bool
self.raise_on_error = raise_on_error
self.base_url = "https://api.mindee.net/products/"
self.base_url = "https://api.mindee.net/v1/products/mindee/"
self.expense_receipt_token = expense_receipt_token
self.invoice_token = invoice_token
self.passport_token = passport_token
Expand Down Expand Up @@ -79,10 +87,11 @@ def _wrap_response(
:return: Full response object
"""
dict_response = response.json()
if response.status_code != 200 and self.raise_on_error:

if response.status_code > 201 and self.raise_on_error:
raise HTTPException(
"Receipt API %s HTTP error: %s" % (response.status_code, json.dumps(dict_response)))
elif response.status_code != 200:
elif response.status_code > 201:
return Response(
http_response=dict_response,
pages=[],
Expand Down Expand Up @@ -288,55 +297,30 @@ def format_response(json_response, document_type, input_file):
json_response["filepath"] = input_file.filepath
json_response["file_extension"] = input_file.file_extension
pages = []
for page_n, page_prediction in enumerate(json_response["predictions"]):
if document_type == "receipt":
pages.append(
Receipt(
api_prediction=page_prediction,
input_file=input_file,
page_n=page_n
)
)
elif document_type == "invoice":
pages.append(
Invoice(
api_prediction=page_prediction,
input_file=input_file,
page_n=page_n
)
)
elif document_type == "financial_document":
pages.append(
FinancialDocument(
api_prediction=page_prediction,
input_file=input_file,
page_n=page_n
)
)
elif document_type == "passport":
pages.append(
Passport(
api_prediction=page_prediction,
input_file=input_file,
page_n=page_n
)
)
elif document_type == "license_plate":
pages.append(
CarPlate(
api_prediction=page_prediction,
input_file=input_file,
page_n=page_n
)

if document_type not in DOCUMENT_CLASSES.keys():
raise Exception("Document type not supported.")

# Create page level objects
for page_n, page_prediction in enumerate(json_response["document"]["inference"]["pages"]):
pages.append(
DOCUMENT_CLASSES[document_type](
api_prediction=page_prediction["prediction"],
input_file=input_file,
page_n=page_prediction["id"]
)
else:
raise Exception("Document type not supported.")
)

document = Document.merge_pages(pages)
# Create the document level object
document_level = DOCUMENT_CLASSES[document_type](
api_prediction=json_response["document"]["inference"]["prediction"],
input_file=input_file,
page_n="-1"
)

return Response(
http_response=json_response,
pages=pages,
document=document,
document=document_level,
document_type=document_type
)
21 changes: 0 additions & 21 deletions mindee/documents/__init__.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,3 @@
import copy


class Document(object):
def __init__(self, input_file=None):
self.filepath = None
Expand All @@ -24,21 +21,3 @@ def _reconstruct(self, *args):

def all_checks(self):
return all(self.checklist)

@staticmethod
def merge_pages(page_documents):
"""
:param page_documents: Document object list
:return: A single Document where each field is set with the maximum probability field
"""
document = copy.deepcopy(page_documents[0])
attributes = [a for a in dir(document)]
for doc in page_documents:
for attribute in attributes:
if not hasattr(getattr(doc, attribute), "probability"):
continue

if getattr(doc, attribute).probability > getattr(document, attribute).probability:
setattr(document, attribute, getattr(doc, attribute))

return document
14 changes: 11 additions & 3 deletions mindee/documents/financial_document.py
Original file line number Diff line number Diff line change
Expand Up @@ -138,18 +138,26 @@ def build_from_api_prediction(self, api_prediction, input_file, page_n=0):
self.company_number = []

def __str__(self):
return "-----Financial document-----\n" \
return "-----Financial Document data-----\n" \
"Filename: %s \n" \
"Total amount: %s \n" \
"Invoice number: %s \n" \
"Total amount including taxes: %s \n" \
"Total amount excluding taxes: %s \n" \
"Date: %s\n" \
"Merchant name: %s\n" \
"Invoice due date: %s\n" \
"Supplier name: %s\n" \
"Taxes: %s\n" \
"Total taxes: %s\n" \
"----------------------" % \
(
self.filename,
self.invoice_number.value,
self.total_incl.value,
self.total_excl.value,
self.date.value,
self.due_date.value,
self.merchant_name.value,
",".join([str(t.value) + " " + str(t.rate) + "%" for t in self.taxes]),
self.total_tax.value
)

Expand Down
6 changes: 4 additions & 2 deletions mindee/documents/invoice.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,6 @@ def __init__(
supplier=None,
payment_details=None,
company_number=None,
vat_number=None,
orientation=None,
total_tax=None,
page_n=0
Expand Down Expand Up @@ -106,7 +105,8 @@ def build_from_api_prediction(self, api_prediction, page_n=0):
self.due_date = Date(api_prediction["due_date"], value_key="value", page_n=page_n)
self.invoice_number = Field(api_prediction["invoice_number"], page_n=page_n)
self.locale = Locale(api_prediction["locale"], value_key="language", page_n=page_n)
self.orientation = Orientation(api_prediction["orientation"], page_n=page_n)
if str(page_n) != "-1":
self.orientation = Orientation(api_prediction["orientation"], page_n=page_n)
self.supplier = Field(api_prediction["supplier"], page_n=page_n)
self.taxes = [
Tax(tax_prediction, page_n=page_n, value_key="value") for tax_prediction in api_prediction["taxes"]
Expand All @@ -128,6 +128,7 @@ def __str__(self):
"Total amount including taxes: %s \n" \
"Total amount excluding taxes: %s \n" \
"Invoice date: %s\n" \
"Invoice due date: %s\n" \
"Supplier name: %s\n" \
"Taxes: %s\n" \
"Total taxes: %s\n" \
Expand All @@ -138,6 +139,7 @@ def __str__(self):
self.total_incl.value,
self.total_excl.value,
self.invoice_date.value,
self.due_date.value,
self.supplier.value,
",".join([str(t.value) + " " + str(t.rate) + "%" for t in self.taxes]),
self.total_tax.value
Expand Down
3 changes: 2 additions & 1 deletion mindee/documents/receipt.py
Original file line number Diff line number Diff line change
Expand Up @@ -117,7 +117,8 @@ def build_from_api_prediction(self, api_prediction, page_n=0):
self.taxes = [
Tax(tax_prediction, page_n=page_n, value_key="value", rate_key="rate", code_key="code")
for tax_prediction in api_prediction["taxes"]]
self.orientation = Orientation(api_prediction["orientation"], page_n=page_n)
if str(page_n) != "-1":
self.orientation = Orientation(api_prediction["orientation"], page_n=page_n)
self.total_tax = Amount({"value": None, "probability": 0.}, value_key="value", page_n=page_n)
self.total_excl = Amount({"value": None, "probability": 0.}, value_key="value", page_n=page_n)

Expand Down
4 changes: 2 additions & 2 deletions mindee/fields/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,8 +27,8 @@ def __init__(
else:
self.probability = 0.

if "segmentation" in abstract_prediction:
self.bbox = abstract_prediction["segmentation"]["bounding_box"]
if "polygon" in abstract_prediction:
self.bbox = abstract_prediction["polygon"]
else:
self.bbox = []

Expand Down
4 changes: 2 additions & 2 deletions mindee/http.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ def request(url, input_file, token, include_words=False):
"""
input_file.file_object.seek(0)

files = {"file": input_file.file_object.read()}
files = {"document": input_file.file_object.read()}

headers = {"X-Inferuser-Token": token}

Expand All @@ -20,7 +20,7 @@ def request(url, input_file, token, include_words=False):
params["include_mvision"] = "true"

response = requests.post(
url+"?include_mvision=True",
url,
files=files,
headers=headers,
data=params
Expand Down
20 changes: 12 additions & 8 deletions mindee/plots.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@
import numpy as np
import matplotlib.pyplot as plt


Expand All @@ -17,30 +16,35 @@ def autolabel(ax, rects):
ha='center', va='bottom', rotation=90)


def plot_metrics(metrics, accuracies, precisions, save_path):
def plot_metrics(metrics, accuracies, precisions, save_path, savefig=True):
"""
:param savefig: Boolean to specify whether saving the plot as a png file or not
:param metrics: List of metrics names
:param accuracies: List of accuracy values
:param precisions: List of precision values
:param save_path: Path to save the figure
:return: (void) plot the precision and accuracy bar charts and save the figure in save_path
:return: the plt object
"""
x = np.arange(len(metrics)) # the label locations
x_range = [float(k) for k in range(len(metrics))] # the label locations
width = 0.4 # the width of the bars

fig, ax = plt.subplots()
fig.subplots_adjust(bottom=0.15)
rects1 = ax.bar(x - width / 2, accuracies, width, color='#fd3246', label='Accuracy')
rects2 = ax.bar(x + width / 2, precisions, width, color='#007af9', label='Precision')
rects1 = ax.bar([x - width / 2 for x in x_range], accuracies, width, color='#fd3246', label='Accuracy')
rects2 = ax.bar([x + width / 2 for x in x_range], precisions, width, color='#007af9', label='Precision')

autolabel(ax, rects1)
autolabel(ax, rects2)

# Add some text for labels, title and custom x-axis tick labels, etc.
ax.set_ylabel('%')
ax.set_title('Metrics')
ax.set_xticks(x)
ax.set_xticks(x_range)
ax.set_xticklabels(metrics, rotation=45, fontsize=6)
ax.legend(loc='lower left')
plt.grid(True, linestyle='--', color='#e1e1e1', alpha=0.4)

plt.savefig(save_path, dpi=300)
if savefig:
plt.savefig(save_path, dpi=300)

return plt
1 change: 0 additions & 1 deletion requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -2,5 +2,4 @@ requests~=2.23.0
pytz~=2021.1
setuptools~=49.2.0
matplotlib~=3.1.2
numpy~=1.18.5
PyMuPDF~=1.18.6
Loading

0 comments on commit 45ebd61

Please sign in to comment.