Skip to content

Commit 8a6425a

Browse files
🔧 rework custom internals
1 parent 99ac17b commit 8a6425a

File tree

17 files changed

+214
-74
lines changed

17 files changed

+214
-74
lines changed

docs/extras/code_samples/custom_v1.txt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@ from mindee import Client, PredictResponse, product
33
# Init a new client
44
mindee_client = Client(api_key="my-api-key")
55

6-
custom_endpoint = mindee_client.create_endpoint("field_test", "solution-eng-tests")
6+
custom_endpoint = mindee_client.create_endpoint("my-endpoint", "my-account")
77

88
# Load a file from disk
99
input_doc = mindee_client.source_from_path("/path/to/the/file.ext")

docs/extras/guide/custom_v1.md

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -99,7 +99,7 @@ The **columns_to_line_items()** function can be called from the document and pag
9999

100100
It takes the following arguments:
101101

102-
* **anchor_names** (`List[str]`): a list of the names of possible anchor (field) candidate for the horizontal placement a line. If all provided anchors are invalid, the `LineItemV1` won't be built.
102+
* **anchor_names** (`List[str]`): a list of the names of possible anchor (field) candidate for the horizontal placement a line. If all provided anchors are invalid, the `CustomLine` won't be built.
103103
* **field_names** (`List[str]`): a list of fields to retrieve the values from
104104
* **height_tolerance** (`float`): Optional, the height tolerance used to build the line. It helps when the height of a line can vary unexpectedly.
105105

@@ -121,14 +121,14 @@ response.document.pages[0].prediction.columns_to_line_items(
121121
)
122122
```
123123

124-
It returns a list of [CustomLineV1](#CustomlineV1) objects.
124+
It returns a list of [CustomLine](#CustomLine) objects.
125125

126-
## CustomlineV1
126+
## CustomLine
127127

128-
`CustomlineV1` represents a line as it has been read from column fields. It has the following attributes:
128+
`CustomLine` represents a line as it has been read from column fields. It has the following attributes:
129129

130130
* **row_number** (`int`): Number of a given line. Starts at 1.
131-
* **fields** (`Dict[str, ListFieldValueV1]`[]): List of the fields associated with the line, indexed by their column name.
131+
* **fields** (`Dict[str, ListFieldValue]`[]): List of the fields associated with the line, indexed by their column name.
132132
* **bbox** (`BBox`): Simple bounding box of the current line representing the 4 minimum & maximum coordinates as `float` values.
133133

134134

docs/parsing/custom.rst

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -4,26 +4,26 @@ Custom Fields
44

55
Classification
66
==============
7-
.. autoclass:: mindee.parsing.custom.classification.ClassificationFieldV1
7+
.. autoclass:: mindee.parsing.custom.classification.ClassificationField
88
:members:
99

1010

1111
Line Items
1212
==========
13-
.. autoclass:: mindee.parsing.custom.line_items.CustomLineV1
13+
.. autoclass:: mindee.parsing.custom.line_items.CustomLine
1414
:members:
1515

1616
Lists
1717
=====
1818

1919
List Field
2020
----------
21-
.. autoclass:: mindee.parsing.custom.list.ListFieldV1
21+
.. autoclass:: mindee.parsing.custom.list.ListField
2222
:members:
2323

2424
List Field Value
2525
----------------
26-
.. autoclass:: mindee.parsing.custom.list.ListFieldValueV1
26+
.. autoclass:: mindee.parsing.custom.list.ListFieldValue
2727
:members:
2828

2929
String Dict

mindee/error/__init__.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
from mindee.error.geometry_error import GeometryError
22
from mindee.error.mimetype_error import MimeTypeError
3-
from mindee.error.mindee_error import MindeeClientError, MindeeError
3+
from mindee.error.mindee_error import MindeeClientError, MindeeError, MindeeProductError
44
from mindee.error.mindee_http_error import (
55
MindeeHTTPClientError,
66
MindeeHTTPError,

mindee/error/mindee_error.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,3 +16,7 @@ class MindeeApiError(MindeeError):
1616

1717
class MindeeSourceError(MindeeError):
1818
"""An exception relating to document loading."""
19+
20+
21+
class MindeeProductError(MindeeApiError):
22+
"""An exception relating to the use of an incorrect product/version."""

mindee/input/sources.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -45,7 +45,7 @@ class LocalInputSource:
4545
filename: str
4646
file_mimetype: str
4747
input_type: InputType
48-
filepath: Optional[str] = None
48+
filepath: Optional[str]
4949

5050
def __init__(self, input_type: InputType):
5151
self.input_type = input_type

mindee/parsing/common/job.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,7 @@ class Job:
1414
"""ID of the job sent by the API in response to an enqueue request."""
1515
issued_at: datetime
1616
"""Timestamp of the request reception by the API."""
17-
available_at: Optional[datetime] = None
17+
available_at: Optional[datetime]
1818
"""Timestamp of the request after it has been completed."""
1919
status: str
2020
"""Status of the request, as seen by the API."""
@@ -30,6 +30,8 @@ def __init__(self, json_response: dict) -> None:
3030
self.issued_at = datetime.fromisoformat(json_response["issued_at"])
3131
if json_response.get("available_at"):
3232
self.available_at = datetime.fromisoformat(json_response["available_at"])
33+
else:
34+
self.available_at = None
3335
self.id = json_response["id"]
3436
self.status = json_response["status"]
3537
if self.available_at:

mindee/parsing/custom/__init__.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,3 @@
1-
from mindee.parsing.custom.classification import ClassificationFieldV1
2-
from mindee.parsing.custom.line_items import CustomLineV1, get_line_items
3-
from mindee.parsing.custom.list import ListFieldV1, ListFieldValueV1
1+
from mindee.parsing.custom.classification import ClassificationField
2+
from mindee.parsing.custom.line_items import CustomLine, get_line_items
3+
from mindee.parsing.custom.list import ListField, ListFieldValue

mindee/parsing/custom/classification.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
from mindee.parsing.common.string_dict import StringDict
22

33

4-
class ClassificationFieldV1:
4+
class ClassificationField:
55
"""A classification field."""
66

77
value: str

mindee/parsing/custom/line_items.py

Lines changed: 19 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -4,10 +4,10 @@
44
from mindee.geometry.bbox import BBox, extend_bbox, get_bbox
55
from mindee.geometry.minmax import MinMax, get_min_max_y
66
from mindee.geometry.quadrilateral import get_bounding_box
7-
from mindee.parsing.custom.list import ListFieldV1, ListFieldValueV1
7+
from mindee.parsing.custom.list import ListField, ListFieldValue
88

99

10-
def _find_best_anchor(anchors: Sequence[str], fields: Dict[str, ListFieldV1]) -> str:
10+
def _find_best_anchor(anchors: Sequence[str], fields: Dict[str, ListField]) -> str:
1111
"""
1212
Find the anchor with the most rows, in the order specified by `anchors`.
1313

@@ -23,12 +23,12 @@ def _find_best_anchor(anchors: Sequence[str], fields: Dict[str, ListFieldV1]) ->
2323
return anchor
2424

2525

26-
class CustomLineV1:
26+
class CustomLine:
2727
"""Represent a single line."""
2828

2929
row_number: int
3030
"""Index of the row of a given line."""
31-
fields: Dict[str, ListFieldValueV1]
31+
fields: Dict[str, ListFieldValue]
3232
"""Fields contained in the line."""
3333
bbox: BBox
3434
"""Simplified bounding box of the line."""
@@ -38,7 +38,7 @@ def __init__(self, row_number: int):
3838
self.bbox = BBox(1, 1, 0, 0)
3939
self.fields = {}
4040

41-
def update_field(self, field_name: str, field_value: ListFieldValueV1) -> None:
41+
def update_field(self, field_name: str, field_value: ListFieldValue) -> None:
4242
"""
4343
Updates a field value if it exists.
4444

@@ -61,7 +61,7 @@ def update_field(self, field_name: str, field_value: ListFieldValueV1) -> None:
6161
merged_confidence = field_value.confidence
6262
merged_polygon = get_bounding_box(field_value.polygon)
6363

64-
self.fields[field_name] = ListFieldValueV1(
64+
self.fields[field_name] = ListFieldValue(
6565
{
6666
"content": merged_content,
6767
"confidence": merged_confidence,
@@ -70,9 +70,7 @@ def update_field(self, field_name: str, field_value: ListFieldValueV1) -> None:
7070
)
7171

7272

73-
def is_box_in_line(
74-
line: CustomLineV1, bbox: BBox, height_line_tolerance: float
75-
) -> bool:
73+
def is_box_in_line(line: CustomLine, bbox: BBox, height_line_tolerance: float) -> bool:
7674
"""
7775
Checks if the bbox fits inside the line.
7876

@@ -86,25 +84,25 @@ def is_box_in_line(
8684

8785

8886
def prepare(
89-
anchor_name: str, fields: Dict[str, ListFieldV1], height_line_tolerance: float
90-
) -> List[CustomLineV1]:
87+
anchor_name: str, fields: Dict[str, ListField], height_line_tolerance: float
88+
) -> List[CustomLine]:
9189
"""
9290
Prepares lines before filling them.
9391

9492
:param anchor_name: name of the anchor.
9593
:param fields: fields to build lines from.
9694
:param height_line_tolerance: line height tolerance for custom line reconstruction.
9795
"""
98-
lines_prepared: List[CustomLineV1] = []
96+
lines_prepared: List[CustomLine] = []
9997
try:
100-
anchor_field: ListFieldV1 = fields[anchor_name]
98+
anchor_field: ListField = fields[anchor_name]
10199
except KeyError as exc:
102100
raise MindeeError("No lines have been detected.") from exc
103101

104102
current_line_number: int = 1
105-
current_line = CustomLineV1(current_line_number)
103+
current_line = CustomLine(current_line_number)
106104
if anchor_field and len(anchor_field.values) > 0:
107-
current_value: ListFieldValueV1 = anchor_field.values[0]
105+
current_value: ListFieldValue = anchor_field.values[0]
108106
current_line.bbox = extend_bbox(
109107
current_line.bbox,
110108
current_value.polygon,
@@ -118,7 +116,7 @@ def prepare(
118116
):
119117
lines_prepared.append(current_line)
120118
current_line_number += 1
121-
current_line = CustomLineV1(current_line_number)
119+
current_line = CustomLine(current_line_number)
122120
current_line.bbox = extend_bbox(
123121
current_line.bbox,
124122
current_value.polygon,
@@ -140,26 +138,26 @@ def prepare(
140138
def get_line_items(
141139
anchors: Sequence[str],
142140
field_names: Sequence[str],
143-
fields: Dict[str, ListFieldV1],
141+
fields: Dict[str, ListField],
144142
height_line_tolerance: float = 0.01,
145-
) -> List[CustomLineV1]:
143+
) -> List[CustomLine]:
146144
"""
147145
Reconstruct line items from fields.
148146

149147
:anchors: Possible fields to use as an anchor
150148
:columns: All fields which are columns
151149
:fields: List of field names to reconstruct table with
152150
"""
153-
line_items: List[CustomLineV1] = []
154-
fields_to_transform: Dict[str, ListFieldV1] = {}
151+
line_items: List[CustomLine] = []
152+
fields_to_transform: Dict[str, ListField] = {}
155153
for field_name, field_value in fields.items():
156154
if field_name in field_names:
157155
fields_to_transform[field_name] = field_value
158156
anchor = _find_best_anchor(anchors, fields_to_transform)
159157
if not anchor:
160158
print(Warning("Could not find an anchor!"))
161159
return line_items
162-
lines_prepared: List[CustomLineV1] = prepare(
160+
lines_prepared: List[CustomLine] = prepare(
163161
anchor, fields_to_transform, height_line_tolerance
164162
)
165163

mindee/parsing/custom/list.py

Lines changed: 9 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -4,33 +4,35 @@
44
from mindee.parsing.standard.base import FieldPositionMixin
55

66

7-
class ListFieldValueV1(FieldPositionMixin):
7+
class ListFieldValue(FieldPositionMixin):
88
"""A single value or word."""
99

1010
content: str
1111
"""The content text"""
1212
confidence: float
1313
"""Confidence score"""
14+
page_id: Optional[int]
1415

15-
def __init__(self, raw_prediction: StringDict) -> None:
16+
def __init__(
17+
self, raw_prediction: StringDict, page_id: Optional[int] = None
18+
) -> None:
1619
self.content = raw_prediction["content"]
1720
self.confidence = raw_prediction["confidence"]
21+
self.page_id = page_id
1822
self._set_position(raw_prediction)
1923

2024
def __str__(self) -> str:
2125
return self.content
2226

2327

24-
class ListFieldV1:
28+
class ListField:
2529
"""A list of values or words."""
2630

2731
confidence: float
2832
"""Confidence score"""
2933
reconstructed: bool
3034
"""Whether the field was reconstructed from other fields."""
31-
page_id: Optional[int]
32-
"""The document page on which the information was found."""
33-
values: List[ListFieldValueV1]
35+
values: List[ListFieldValue]
3436
"""List of word values"""
3537

3638
def __init__(
@@ -43,15 +45,9 @@ def __init__(
4345
self.reconstructed = reconstructed
4446

4547
for value in raw_prediction["values"]:
46-
self.values.append(ListFieldValueV1(value))
4748
if "page_id" in value:
4849
page_id = value["page_id"]
49-
50-
if page_id is None:
51-
if "page_id" in raw_prediction:
52-
self.page_id = raw_prediction["page_id"]
53-
else:
54-
self.page_id = page_id
50+
self.values.append(ListFieldValue(value, page_id))
5551
self.confidence = raw_prediction["confidence"]
5652

5753
@property

mindee/product/custom/custom_v1_document.py

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1,16 +1,16 @@
11
from typing import Dict, List
22

33
from mindee.parsing.common import Prediction, StringDict, clean_out_string
4-
from mindee.parsing.custom import ClassificationFieldV1, ListFieldV1
5-
from mindee.parsing.custom.line_items import CustomLineV1, get_line_items
4+
from mindee.parsing.custom import ClassificationField, ListField
5+
from mindee.parsing.custom.line_items import CustomLine, get_line_items
66

77

88
class CustomV1Document(Prediction):
99
"""Custom V1 document prediction results."""
1010

11-
fields: Dict[str, ListFieldV1]
11+
fields: Dict[str, ListField]
1212
"""Dictionary of all fields in the document"""
13-
classifications: Dict[str, ClassificationFieldV1]
13+
classifications: Dict[str, ClassificationField]
1414
"""Dictionary of all classifications in the document"""
1515

1616
def __init__(self, raw_prediction: StringDict) -> None:
@@ -23,17 +23,17 @@ def __init__(self, raw_prediction: StringDict) -> None:
2323
self.classifications = {}
2424
for field_name, field_contents in raw_prediction.items():
2525
if "value" in field_contents:
26-
self.classifications[field_name] = ClassificationFieldV1(field_contents)
26+
self.classifications[field_name] = ClassificationField(field_contents)
2727
# Only value lists have the 'values' attribute.
2828
elif "values" in field_contents:
29-
self.fields[field_name] = ListFieldV1(field_contents)
29+
self.fields[field_name] = ListField(field_contents)
3030

3131
def columns_to_line_items(
3232
self,
3333
anchor_names: List[str],
3434
field_names: List[str],
3535
height_tolerance: float = 0.01,
36-
) -> List[CustomLineV1]:
36+
) -> List[CustomLine]:
3737
"""
3838
Order column fields into line items.
3939

mindee/product/custom/custom_v1_page.py

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,14 +1,14 @@
11
from typing import Dict, List, Optional
22

33
from mindee.parsing.common import Prediction, StringDict, clean_out_string
4-
from mindee.parsing.custom import ListFieldV1
5-
from mindee.parsing.custom.line_items import CustomLineV1, get_line_items
4+
from mindee.parsing.custom import ListField
5+
from mindee.parsing.custom.line_items import CustomLine, get_line_items
66

77

88
class CustomV1Page(Prediction):
99
"""Custom V1 page prediction results."""
1010

11-
fields: Dict[str, ListFieldV1]
11+
fields: Dict[str, ListField]
1212
"""Dictionary of all fields in the document"""
1313

1414
def __init__(self, raw_prediction: StringDict, page_id: Optional[int]) -> None:
@@ -19,14 +19,14 @@ def __init__(self, raw_prediction: StringDict, page_id: Optional[int]) -> None:
1919
"""
2020
self.fields = {}
2121
for field_name, field_contents in raw_prediction.items():
22-
self.fields[field_name] = ListFieldV1(field_contents, page_id=page_id)
22+
self.fields[field_name] = ListField(field_contents, page_id=page_id)
2323

2424
def columns_to_line_items(
2525
self,
2626
anchor_names: List[str],
2727
field_names: List[str],
2828
height_tolerance: float = 0.01,
29-
) -> List[CustomLineV1]:
29+
) -> List[CustomLine]:
3030
"""
3131
Order column fields into line items.
3232

0 commit comments

Comments
 (0)