Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

🔧 fix non-page-level id for fields #201

Closed
wants to merge 1 commit into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion docs/extras/guide/custom_v1.md
Original file line number Diff line number Diff line change
Expand Up @@ -65,7 +65,7 @@ Values of `ListField`s are stored in a `ListFieldValue` structure, which is impl
* **confidence** (`float`): the confidence score of the prediction
* **bounding_box** (`BBox`): 4 relative vertices corrdinates of a rectangle containing the word in the document.
* **polygon** (`Polygon`): vertices of a polygon containing the word.
* **page_id** (`int`): the ID of the page, is `undefined` when at document-level.
* **page_id** (`Optional[int]`): the ID of the page, is `null` when at document-level.


### Classification Field
Expand Down
5 changes: 1 addition & 4 deletions mindee/parsing/common/page.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,10 +30,7 @@ def __init__(
self.orientation = OrientationField(
raw_prediction["orientation"], page_id=self.id
)
try:
self.prediction = prediction_type(raw_prediction["prediction"], self.id)
except TypeError:
self.prediction = prediction_type(raw_prediction["prediction"])
self.prediction = prediction_type(raw_prediction["prediction"])

if "extras" in raw_prediction and raw_prediction["extras"]:
self.extras = Extras(raw_prediction["extras"])
Expand Down
19 changes: 6 additions & 13 deletions mindee/parsing/custom/list.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,12 +14,12 @@ class ListFieldValue(FieldPositionMixin):
page_id: Optional[int]
"""Id of the page the field was found on."""

def __init__(
self, raw_prediction: StringDict, page_id: Optional[int] = None
) -> None:
def __init__(self, raw_prediction: StringDict) -> None:
self.content = raw_prediction["content"]
self.confidence = raw_prediction["confidence"]
self.page_id = page_id
self.page_id = (
raw_prediction["page_id"] if "page_id" in raw_prediction else None
)
self._set_position(raw_prediction)

def __str__(self) -> str:
Expand All @@ -36,19 +36,12 @@ class ListField:
values: List[ListFieldValue]
"""List of word values"""

def __init__(
self,
raw_prediction: StringDict,
reconstructed: bool = False,
page_id: Optional[int] = None,
) -> None:
def __init__(self, raw_prediction: StringDict, reconstructed: bool = False) -> None:
self.values = []
self.reconstructed = reconstructed

for value in raw_prediction["values"]:
if "page_id" in value:
page_id = value["page_id"]
self.values.append(ListFieldValue(value, page_id))
self.values.append(ListFieldValue(value))
self.confidence = raw_prediction["confidence"]

@property
Expand Down
6 changes: 3 additions & 3 deletions mindee/product/custom/custom_v1_page.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
from typing import Dict, List, Optional
from typing import Dict, List

from mindee.parsing.common import Prediction, StringDict, clean_out_string
from mindee.parsing.custom import ListField
Expand All @@ -11,15 +11,15 @@ class CustomV1Page(Prediction):
fields: Dict[str, ListField]
"""Dictionary of all fields in the document"""

def __init__(self, raw_prediction: StringDict, page_id: Optional[int]) -> None:
def __init__(self, raw_prediction: StringDict) -> None:
"""
Custom document object.

:param raw_prediction: Dictionary containing the JSON document response
"""
self.fields = {}
for field_name, field_contents in raw_prediction.items():
self.fields[field_name] = ListField(field_contents, page_id=page_id)
self.fields[field_name] = ListField(field_contents)

def columns_to_line_items(
self,
Expand Down
Loading