Invoice2.1 (#8)

* chg 🔖 prepare V1.2.0 * chg: ✨ updated SDK to new Mindee API * chg: ✅ Updated tests * chg: ➖ Deleted Numpy dependency * chg: 🙈 added DS_Store to gitignore * chg: 🙈 deleted DS_Store
mindee · Aug 25, 2021 · 45ebd61 · 45ebd61
1 parent 110f397
commit 45ebd61
Show file tree

Hide file tree

Showing 30 changed files with 1,865 additions and 999 deletions.
diff --git a/.gitignore b/.gitignore
@@ -128,3 +128,6 @@ dmypy.json
 # Pyre type checker
 .pyre/
 /data/
+
+# Mac OS
+.DS_Store
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -1,5 +1,14 @@
 # Mindee python SDK
 
+## v1.2.0 (2020-08-25)
+
+### Chg
+
+* :sparkles: Adapted SDK to the new Mindee API endpoint
+* :zap: Single page object reconstruction is now server-side
+* :heavy_minus_sign: Removed Numpy dependency
+* :white_check_mark: Updated tests with new data
+
 ## v1.1.3 (2020-02-21)
 
 ### Fix

diff --git a/mindee/__init__.py b/mindee/__init__.py
@@ -10,6 +10,14 @@
 from mindee.documents.passport import Passport
 from mindee.benchmark import Benchmark
 
+DOCUMENT_CLASSES = {
+    "receipt": Receipt,
+    "invoice": Invoice,
+    "financial_document": FinancialDocument,
+    "passport": Passport,
+    "license_plate": CarPlate
+}
+
 
 class Client(object):
     def __init__(
@@ -29,7 +37,7 @@ def __init__(
         """
         assert type(raise_on_error) == bool
         self.raise_on_error = raise_on_error
-        self.base_url = "https://api.mindee.net/products/"
+        self.base_url = "https://api.mindee.net/v1/products/mindee/"
         self.expense_receipt_token = expense_receipt_token
         self.invoice_token = invoice_token
         self.passport_token = passport_token
@@ -79,10 +87,11 @@ def _wrap_response(
         :return: Full response object
         """
         dict_response = response.json()
-        if response.status_code != 200 and self.raise_on_error:
+
+        if response.status_code > 201 and self.raise_on_error:
             raise HTTPException(
                 "Receipt API %s HTTP error: %s" % (response.status_code, json.dumps(dict_response)))
-        elif response.status_code != 200:
+        elif response.status_code > 201:
             return Response(
                 http_response=dict_response,
                 pages=[],
@@ -288,55 +297,30 @@ def format_response(json_response, document_type, input_file):
         json_response["filepath"] = input_file.filepath
         json_response["file_extension"] = input_file.file_extension
         pages = []
-        for page_n, page_prediction in enumerate(json_response["predictions"]):
-            if document_type == "receipt":
-                pages.append(
-                    Receipt(
-                        api_prediction=page_prediction,
-                        input_file=input_file,
-                        page_n=page_n
-                    )
-                )
-            elif document_type == "invoice":
-                pages.append(
-                    Invoice(
-                        api_prediction=page_prediction,
-                        input_file=input_file,
-                        page_n=page_n
-                    )
-                )
-            elif document_type == "financial_document":
-                pages.append(
-                    FinancialDocument(
-                        api_prediction=page_prediction,
-                        input_file=input_file,
-                        page_n=page_n
-                    )
-                )
-            elif document_type == "passport":
-                pages.append(
-                    Passport(
-                        api_prediction=page_prediction,
-                        input_file=input_file,
-                        page_n=page_n
-                    )
-                )
-            elif document_type == "license_plate":
-                pages.append(
-                    CarPlate(
-                        api_prediction=page_prediction,
-                        input_file=input_file,
-                        page_n=page_n
-                    )
+
+        if document_type not in DOCUMENT_CLASSES.keys():
+            raise Exception("Document type not supported.")
+
+        # Create page level objects
+        for page_n, page_prediction in enumerate(json_response["document"]["inference"]["pages"]):
+            pages.append(
+                DOCUMENT_CLASSES[document_type](
+                    api_prediction=page_prediction["prediction"],
+                    input_file=input_file,
+                    page_n=page_prediction["id"]
                 )
-            else:
-                raise Exception("Document type not supported.")
+            )
 
-        document = Document.merge_pages(pages)
+        # Create the document level object
+        document_level = DOCUMENT_CLASSES[document_type](
+            api_prediction=json_response["document"]["inference"]["prediction"],
+            input_file=input_file,
+            page_n="-1"
+        )
 
         return Response(
             http_response=json_response,
             pages=pages,
-            document=document,
+            document=document_level,
             document_type=document_type
         )
diff --git a/mindee/documents/__init__.py b/mindee/documents/__init__.py
@@ -1,6 +1,3 @@
-import copy
-
-
 class Document(object):
     def __init__(self, input_file=None):
         self.filepath = None
@@ -24,21 +21,3 @@ def _reconstruct(self, *args):
 
     def all_checks(self):
         return all(self.checklist)
-
-    @staticmethod
-    def merge_pages(page_documents):
-        """
-        :param page_documents: Document object list
-        :return: A single Document where each field is set with the maximum probability field
-        """
-        document = copy.deepcopy(page_documents[0])
-        attributes = [a for a in dir(document)]
-        for doc in page_documents:
-            for attribute in attributes:
-                if not hasattr(getattr(doc, attribute), "probability"):
-                    continue
-
-                if getattr(doc, attribute).probability > getattr(document, attribute).probability:
-                    setattr(document, attribute, getattr(doc, attribute))
-
-        return document
diff --git a/mindee/documents/financial_document.py b/mindee/documents/financial_document.py
@@ -138,18 +138,26 @@ def build_from_api_prediction(self, api_prediction, input_file, page_n=0):
             self.company_number = []
 
     def __str__(self):
-        return "-----Financial document-----\n" \
+        return "-----Financial Document data-----\n" \
                "Filename: %s \n" \
-               "Total amount: %s \n" \
+               "Invoice number: %s \n" \
+               "Total amount including taxes: %s \n" \
+               "Total amount excluding taxes: %s \n" \
                "Date: %s\n" \
-               "Merchant name: %s\n" \
+               "Invoice due date: %s\n" \
+               "Supplier name: %s\n" \
+               "Taxes: %s\n" \
                "Total taxes: %s\n" \
                "----------------------" % \
                (
                    self.filename,
+                   self.invoice_number.value,
                    self.total_incl.value,
+                   self.total_excl.value,
                    self.date.value,
+                   self.due_date.value,
                    self.merchant_name.value,
+                   ",".join([str(t.value) + " " + str(t.rate) + "%" for t in self.taxes]),
                    self.total_tax.value
                )
 

diff --git a/mindee/documents/invoice.py b/mindee/documents/invoice.py
@@ -26,7 +26,6 @@ def __init__(
             supplier=None,
             payment_details=None,
             company_number=None,
-            vat_number=None,
             orientation=None,
             total_tax=None,
             page_n=0
@@ -106,7 +105,8 @@ def build_from_api_prediction(self, api_prediction, page_n=0):
         self.due_date = Date(api_prediction["due_date"], value_key="value", page_n=page_n)
         self.invoice_number = Field(api_prediction["invoice_number"], page_n=page_n)
         self.locale = Locale(api_prediction["locale"], value_key="language", page_n=page_n)
-        self.orientation = Orientation(api_prediction["orientation"], page_n=page_n)
+        if str(page_n) != "-1":
+            self.orientation = Orientation(api_prediction["orientation"], page_n=page_n)
         self.supplier = Field(api_prediction["supplier"], page_n=page_n)
         self.taxes = [
             Tax(tax_prediction, page_n=page_n, value_key="value") for tax_prediction in api_prediction["taxes"]
@@ -128,6 +128,7 @@ def __str__(self):
                "Total amount including taxes: %s \n" \
                "Total amount excluding taxes: %s \n" \
                "Invoice date: %s\n" \
+               "Invoice due date: %s\n" \
                "Supplier name: %s\n" \
                "Taxes: %s\n" \
                "Total taxes: %s\n" \
@@ -138,6 +139,7 @@ def __str__(self):
                    self.total_incl.value,
                    self.total_excl.value,
                    self.invoice_date.value,
+                   self.due_date.value,
                    self.supplier.value,
                    ",".join([str(t.value) + " " + str(t.rate) + "%" for t in self.taxes]),
                    self.total_tax.value

diff --git a/mindee/documents/receipt.py b/mindee/documents/receipt.py
@@ -117,7 +117,8 @@ def build_from_api_prediction(self, api_prediction, page_n=0):
         self.taxes = [
             Tax(tax_prediction, page_n=page_n, value_key="value", rate_key="rate", code_key="code")
             for tax_prediction in api_prediction["taxes"]]
-        self.orientation = Orientation(api_prediction["orientation"], page_n=page_n)
+        if str(page_n) != "-1":
+            self.orientation = Orientation(api_prediction["orientation"], page_n=page_n)
         self.total_tax = Amount({"value": None, "probability": 0.}, value_key="value", page_n=page_n)
         self.total_excl = Amount({"value": None, "probability": 0.}, value_key="value", page_n=page_n)
 

diff --git a/mindee/fields/__init__.py b/mindee/fields/__init__.py
@@ -27,8 +27,8 @@ def __init__(
             else:
                 self.probability = 0.
 
-            if "segmentation" in abstract_prediction:
-                self.bbox = abstract_prediction["segmentation"]["bounding_box"]
+            if "polygon" in abstract_prediction:
+                self.bbox = abstract_prediction["polygon"]
             else:
                 self.bbox = []
 

diff --git a/mindee/http.py b/mindee/http.py
@@ -11,7 +11,7 @@ def request(url, input_file, token, include_words=False):
     """
     input_file.file_object.seek(0)
 
-    files = {"file": input_file.file_object.read()}
+    files = {"document": input_file.file_object.read()}
 
     headers = {"X-Inferuser-Token": token}
 
@@ -20,7 +20,7 @@ def request(url, input_file, token, include_words=False):
         params["include_mvision"] = "true"
 
     response = requests.post(
-        url+"?include_mvision=True",
+        url,
         files=files,
         headers=headers,
         data=params

diff --git a/mindee/plots.py b/mindee/plots.py
@@ -1,4 +1,3 @@
-import numpy as np
 import matplotlib.pyplot as plt
 
 
@@ -17,30 +16,35 @@ def autolabel(ax, rects):
                     ha='center', va='bottom', rotation=90)
 
 
-def plot_metrics(metrics, accuracies, precisions, save_path):
+def plot_metrics(metrics, accuracies, precisions, save_path, savefig=True):
     """
+    :param savefig: Boolean to specify whether saving the plot as a png file or not
     :param metrics: List of metrics names
     :param accuracies: List of accuracy values
     :param precisions: List of precision values
     :param save_path: Path to save the figure
-    :return: (void) plot the precision and accuracy bar charts and save the figure in save_path
+    :return: the plt object
     """
-    x = np.arange(len(metrics))  # the label locations
+    x_range = [float(k) for k in range(len(metrics))]  # the label locations
     width = 0.4  # the width of the bars
 
     fig, ax = plt.subplots()
     fig.subplots_adjust(bottom=0.15)
-    rects1 = ax.bar(x - width / 2, accuracies, width, color='#fd3246', label='Accuracy')
-    rects2 = ax.bar(x + width / 2, precisions, width, color='#007af9', label='Precision')
+    rects1 = ax.bar([x - width / 2 for x in x_range], accuracies, width, color='#fd3246', label='Accuracy')
+    rects2 = ax.bar([x + width / 2 for x in x_range], precisions, width, color='#007af9', label='Precision')
 
     autolabel(ax, rects1)
     autolabel(ax, rects2)
+
     # Add some text for labels, title and custom x-axis tick labels, etc.
     ax.set_ylabel('%')
     ax.set_title('Metrics')
-    ax.set_xticks(x)
+    ax.set_xticks(x_range)
     ax.set_xticklabels(metrics, rotation=45, fontsize=6)
     ax.legend(loc='lower left')
     plt.grid(True, linestyle='--', color='#e1e1e1', alpha=0.4)
 
-    plt.savefig(save_path, dpi=300)
+    if savefig:
+        plt.savefig(save_path, dpi=300)
+
+    return plt
diff --git a/requirements.txt b/requirements.txt
@@ -2,5 +2,4 @@ requests~=2.23.0
 pytz~=2021.1
 setuptools~=49.2.0
 matplotlib~=3.1.2
-numpy~=1.18.5
 PyMuPDF~=1.18.6