diff --git a/.gitignore b/.gitignore index 15b099b..7c712ba 100644 --- a/.gitignore +++ b/.gitignore @@ -10,3 +10,5 @@ wheels/ .venv .DS_Store + +draw.py diff --git a/assets/us_card.result.png b/assets/us_card.result.png index 3dd1437..6acdf5f 100644 --- a/assets/us_card.result.png +++ b/assets/us_card.result.png @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:71bb92fbe6bc2b3441f80d81854f47d6f4fb34adac359163f84a44c697a0ebc7 -size 434789 +oid sha256:c8ef032bcfa41a8f09be5ea4453c45b34982705fb5de82964e5953df7efb7f37 +size 435867 diff --git a/pyproject.toml b/pyproject.toml index a4198f8..e17ef9a 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,7 +1,7 @@ [project] name = "microwink" -version = "0.0.1" -description = "Lightweight instance segmentation for card IDs" +version = "0.0.2" +description = "Lightweight instance segmentation of card IDs" readme = "README.md" license = { text = "Apache-2.0" } authors = [{ name = "cospectrum", email = "severinalexeyv@gmail.com" }] @@ -9,7 +9,6 @@ requires-python = ">=3.10" dependencies = [ "numpy>=2.2.0", "onnxruntime>=1.20.1", - "opencv-python>=4.10.0.84", "pillow>=11.0.0", ] diff --git a/src/microwink/common.py b/src/microwink/common.py index 46daffc..5589ed8 100644 --- a/src/microwink/common.py +++ b/src/microwink/common.py @@ -32,7 +32,7 @@ def draw_box( box: Box, *, color: tuple[int, ...] | str | float = (255, 0, 0), - width: int = 4, + width: int = 3, ) -> PILImage: image = image.copy() draw = ImageDraw.Draw(image) diff --git a/src/microwink/seg.py b/src/microwink/seg.py index 9f2fd4f..afa2a9c 100644 --- a/src/microwink/seg.py +++ b/src/microwink/seg.py @@ -1,11 +1,11 @@ -import math import os +import math import onnxruntime as ort # type: ignore # missing stubs import numpy as np -import cv2 as cv from typing import Sequence from dataclasses import dataclass +from PIL import Image from PIL.Image import Image as PILImage from . import common @@ -79,7 +79,9 @@ def from_session(session: ort.InferenceSession) -> "SegModel": ) def apply( - self, image: PILImage, threshold: Threshold = Threshold.default() + self, + image: PILImage, + threshold: Threshold = Threshold.default(), ) -> list[SegResult]: assert image.mode == "RGB" tensor = self.preprocess(image) @@ -104,7 +106,10 @@ def apply( return out def postprocess( - self, outs: list[np.ndarray], img_size: tuple[H, W], threshold: Threshold + self, + outs: list[np.ndarray], + img_size: tuple[H, W], + threshold: Threshold, ) -> Result | None: NUM_MASKS = 32 box_out, mask_out = outs @@ -130,7 +135,10 @@ def postprocess( final_boxes = boxes[indexes] final_scores = scores[indexes] final_mask_maps = self.postprocess_mask( - mask_preds[indexes], mask_out, final_boxes, img_size + mask_preds[indexes], + mask_out, + final_boxes, + img_size, ) assert len(final_boxes) == len(final_scores) == len(final_mask_maps) return Result( @@ -165,33 +173,30 @@ def postprocess_mask( iw, ) ) - blur_size = ( - int(iw / mask_width), - int(ih / mask_height), - ) - for i in range(len(scaled_boxes)): - scaled_box: np.ndarray = scaled_boxes[i] - box: np.ndarray = boxes[i] - scale_x1 = int(math.floor(scaled_box[0])) - scale_y1 = int(math.floor(scaled_box[1])) - scale_x2 = int(math.ceil(scaled_box[2])) - scale_y2 = int(math.ceil(scaled_box[3])) - - x1 = int(math.floor(box[0])) - y1 = int(math.floor(box[1])) - x2 = int(math.ceil(box[2])) - y2 = int(math.ceil(box[3])) - - mask: np.ndarray = masks[i] + assert len(scaled_boxes) == len(masks) + assert len(scaled_boxes) == len(boxes) + for i, (box, scaled_box, mask) in enumerate(zip(boxes, scaled_boxes, masks)): assert 2 == len(mask.shape) - final_mask = cv.resize( + + scale_x1 = math.floor(scaled_box[0]) + scale_y1 = math.floor(scaled_box[1]) + scale_x2 = math.ceil(scaled_box[2]) + scale_y2 = math.ceil(scaled_box[3]) + + x1 = math.floor(box[0]) + y1 = math.floor(box[1]) + x2 = math.ceil(box[2]) + y2 = math.ceil(box[3]) + + ow, oh = (x2 - x1, y2 - y1) + assert ow >= 0 + assert oh >= 0 + resized_mask = resize( mask[scale_y1:scale_y2, scale_x1:scale_x2], - (x2 - x1, y2 - y1), - interpolation=cv.INTER_CUBIC, + (ow, oh), ) - final_mask = cv.blur(final_mask, blur_size) # -> [-inf, +inf] - final_mask = common.sigmoid(final_mask).clip(0.0, 1.0) - mask_maps[i, y1:y2, x1:x2] = final_mask + assert resized_mask.shape == (oh, ow) + mask_maps[i, y1:y2, x1:x2] = common.sigmoid(resized_mask).clip(0.0, 1.0) return mask_maps @@ -204,7 +209,8 @@ def forward(self, tensor: np.ndarray) -> list[np.ndarray]: def preprocess(self, image: PILImage) -> np.ndarray: size = (self.input_shape.w, self.input_shape.h) - image = image.resize(size) + if image.size != size: + image = image.resize(size) img = np.array(image).astype(np.float32) assert len(img.shape) == 3 img /= 255.0 @@ -295,3 +301,11 @@ def compute_iou(box: np.ndarray, boxes: np.ndarray) -> np.ndarray: iou = intersection_area / union_area return iou + + +def resize(buf: np.ndarray, size: tuple[W, H]) -> np.ndarray: + img = Image.fromarray(buf).resize(size) + out = np.array(img) + assert out.dtype == buf.dtype + assert len(out.shape) == len(buf.shape) + return out diff --git a/tests/truth/seg_model/mklovin.png b/tests/truth/seg_model/mklovin.png index bae2345..1f8fa36 100644 --- a/tests/truth/seg_model/mklovin.png +++ b/tests/truth/seg_model/mklovin.png @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:14c21f1b869065c4406c820d990fd372b4116a8a50cb649f53ad0dfc13d7bc87 -size 1147208 +oid sha256:eacb855f217f35c5b42a7f6517191fbc390c1f3ec1939cf142617866898b8f71 +size 1147223 diff --git a/tests/truth/seg_model/us_card.png b/tests/truth/seg_model/us_card.png index cb0fac2..266e0c9 100644 --- a/tests/truth/seg_model/us_card.png +++ b/tests/truth/seg_model/us_card.png @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:a58bd52b3ac9ef35b971e30eb8711fca0196896e48bb80f34675a14f47bf3637 -size 438314 +oid sha256:2e0936b50f63dae06fc2936ba1b6b7d18506676be7266311144bec6229ece606 +size 438449 diff --git a/uv.lock b/uv.lock index 41c1600..159c2d7 100644 --- a/uv.lock +++ b/uv.lock @@ -74,12 +74,11 @@ wheels = [ [[package]] name = "microwink" -version = "0.0.1" +version = "0.0.2" source = { editable = "." } dependencies = [ { name = "numpy" }, { name = "onnxruntime" }, - { name = "opencv-python" }, { name = "pillow" }, ] @@ -94,7 +93,6 @@ dev = [ requires-dist = [ { name = "numpy", specifier = ">=2.2.0" }, { name = "onnxruntime", specifier = ">=1.20.1" }, - { name = "opencv-python", specifier = ">=4.10.0.84" }, { name = "pillow", specifier = ">=11.0.0" }, ] @@ -255,23 +253,6 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/14/56/fd990ca222cef4f9f4a9400567b9a15b220dee2eafffb16b2adbc55c8281/onnxruntime-1.20.1-cp313-cp313t-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:0df6f2df83d61f46e842dbcde610ede27218947c33e994545a22333491e72a3b", size = 13337040 }, ] -[[package]] -name = "opencv-python" -version = "4.10.0.84" -source = { registry = "https://pypi.org/simple" } -dependencies = [ - { name = "numpy" }, -] -sdist = { url = "https://files.pythonhosted.org/packages/4a/e7/b70a2d9ab205110d715906fc8ec83fbb00404aeb3a37a0654fdb68eb0c8c/opencv-python-4.10.0.84.tar.gz", hash = "sha256:72d234e4582e9658ffea8e9cae5b63d488ad06994ef12d81dc303b17472f3526", size = 95103981 } -wheels = [ - { url = "https://files.pythonhosted.org/packages/66/82/564168a349148298aca281e342551404ef5521f33fba17b388ead0a84dc5/opencv_python-4.10.0.84-cp37-abi3-macosx_11_0_arm64.whl", hash = "sha256:fc182f8f4cda51b45f01c64e4cbedfc2f00aff799debebc305d8d0210c43f251", size = 54835524 }, - { url = "https://files.pythonhosted.org/packages/64/4a/016cda9ad7cf18c58ba074628a4eaae8aa55f3fd06a266398cef8831a5b9/opencv_python-4.10.0.84-cp37-abi3-macosx_12_0_x86_64.whl", hash = "sha256:71e575744f1d23f79741450254660442785f45a0797212852ee5199ef12eed98", size = 56475426 }, - { url = "https://files.pythonhosted.org/packages/81/e4/7a987ebecfe5ceaf32db413b67ff18eb3092c598408862fff4d7cc3fd19b/opencv_python-4.10.0.84-cp37-abi3-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:09a332b50488e2dda866a6c5573ee192fe3583239fb26ff2f7f9ceb0bc119ea6", size = 41746971 }, - { url = "https://files.pythonhosted.org/packages/3f/a4/d2537f47fd7fcfba966bd806e3ec18e7ee1681056d4b0a9c8d983983e4d5/opencv_python-4.10.0.84-cp37-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:9ace140fc6d647fbe1c692bcb2abce768973491222c067c131d80957c595b71f", size = 62548253 }, - { url = "https://files.pythonhosted.org/packages/1e/39/bbf57e7b9dab623e8773f6ff36385456b7ae7fa9357a5e53db732c347eac/opencv_python-4.10.0.84-cp37-abi3-win32.whl", hash = "sha256:2db02bb7e50b703f0a2d50c50ced72e95c574e1e5a0bb35a8a86d0b35c98c236", size = 28737688 }, - { url = "https://files.pythonhosted.org/packages/ec/6c/fab8113424af5049f85717e8e527ca3773299a3c6b02506e66436e19874f/opencv_python-4.10.0.84-cp37-abi3-win_amd64.whl", hash = "sha256:32dbbd94c26f611dc5cc6979e6b7aa1f55a64d6b463cc1dcd3c95505a63e48fe", size = 38842521 }, -] - [[package]] name = "packaging" version = "24.2"