diff --git a/CHANGELOG.md b/CHANGELOG.md index aae973d8ed..8c88a317e8 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,9 +1,4 @@ -## 0.18.27-dev5 - -### Enhancement -- Upgrade pdfminer-six to 20260107 to fix ~15-18% performance regression from eager f-string evaluation - -## 0.18.27-dev4 +## 0.18.27-dev6 ### Fixes - Comment no-ops in `zoom_image` (codeflash) @@ -13,6 +8,8 @@ - Optimize `_PartitionerLoader._load_partitioner` (codeflash) - Optimize `detect_languages` (codeflash) - Optimize `contains_verb` (codeflash) +- Optimize `get_bbox_thickness` (codeflash) +- Upgrade pdfminer-six to 20260107 to fix ~15-18% performance regression from eager f-string evaluation ## 0.18.26 diff --git a/unstructured/__version__.py b/unstructured/__version__.py index c72d9a6020..c0da63dd46 100644 --- a/unstructured/__version__.py +++ b/unstructured/__version__.py @@ -1 +1 @@ -__version__ = "0.18.27-dev5" # pragma: no cover +__version__ = "0.18.27-dev6" # pragma: no cover diff --git a/unstructured/partition/pdf_image/analysis/bbox_visualisation.py b/unstructured/partition/pdf_image/analysis/bbox_visualisation.py index 4de4828122..197a07f331 100644 --- a/unstructured/partition/pdf_image/analysis/bbox_visualisation.py +++ b/unstructured/partition/pdf_image/analysis/bbox_visualisation.py @@ -117,8 +117,9 @@ def _get_optimal_value_for_bbox( The optimal value for the given bounding box and parameters given. """ bbox_to_page_ratio = _get_bbox_to_page_ratio(bbox, page_size) - coefficients = np.polyfit((ratio_for_min_value, ratio_for_max_value), (min_value, max_value), 1) - value = int(bbox_to_page_ratio * coefficients[0] + coefficients[1]) + # Direct linear interpolation instead of np.polyfit for better performance + slope = (max_value - min_value) / (ratio_for_max_value - ratio_for_min_value) + value = int(min_value + slope * (bbox_to_page_ratio - ratio_for_min_value)) return max(min_value, min(max_value, value))