viam-modules · Rob1in · Apr 25, 2025 · Apr 21, 2025 · Apr 23, 2025 · Apr 23, 2025
diff --git a/Makefile b/Makefile
@@ -17,5 +17,5 @@ dist/main:
 	. .venv/bin/activate && python -m PyInstaller --onefile --hidden-import="googleapiclient" --add-data="./src:src" src/main.py
 
 lint:
-	. .venv/bin/activate && pylint --disable=C0114,E0401,E1101,C0116,W0613,R0913,C0116,R0914,C0103,W0201,W0719 src/
+	. .venv/bin/activate && pylint --disable=C0114,E0401,E1101,C0116,W0613,R0913,C0116,R0914,C0103,W0201,W0719,R0902,R0912 src/
 
diff --git a/README.md b/README.md
@@ -44,7 +44,10 @@ The following attributes are available for `viam:vision:motion-detector` vision
 | `min_box_percent` | int | **Optional** | The fraction of the image (between 0 and 1) that the smallest bounding box must cover. Relevant for GetDetections/GetDetectionsFromCamera only. You must specify at most one of `min_box_size` and `min_box_percent`.
 | `max_box_size` | int | **Optional** | The size (in square pixels) of the largest bounding box to allow. Relevant for GetDetections/GetDetectionsFromCamera only. You must specify at most one of `max_box_size` and `max_box_percent`.
 | `max_box_percent` | int | **Optional** | The fraction of the image (between 0 and 1) that the largest bounding box can cover. Relevant for GetDetections/GetDetectionsFromCamera only. You must specify at most one of `max_box_size` and `max_box_percent`.
-| `sensitivity` | float | **Optional** | A number from 0 - 1. Larger numbers will make the module more sensitive to motion. Default = 0.9 |
+| `sensitivity` | float | **Optional**   | A number from 0 - 1. Larger numbers will make the module more sensitive to motion. Default = 0.9 
+| `crop_region` | dict   | **Optional**  | Defines a region of the image to crop for processing. Must include four float values between 0 and 1: `x1_rel`, `y1_rel`, `x2_rel`, `y2_rel` representing the relative coordinates of the crop region.|
+
+
 
 > [!WARNING]  
 > Either one of `camera_name` or `cam_name` will be accepted, but not both. `camera_name` is preferred.

diff --git a/src/motion_detector.py b/src/motion_detector.py
@@ -1,25 +1,22 @@
 import math
-from typing import ClassVar, List, Mapping, Sequence, Any, Dict, Optional
-from typing_extensions import Self
+from typing import Any, ClassVar, Dict, List, Mapping, Optional, Sequence
+
 import cv2
 import numpy as np
-
-
+import PIL
+from typing_extensions import Self
 from viam.components.camera import Camera
-from viam.media.video import ViamImage, CameraMimeType
+from viam.logging import getLogger
 from viam.media.utils import pil
-from viam.proto.service.vision import Classification, Detection
-from viam.services.vision import Vision, CaptureAllResult
+from viam.media.video import CameraMimeType, ViamImage
 from viam.module.types import Reconfigurable
 from viam.proto.app.robot import ServiceConfig
 from viam.proto.common import PointCloudObject, ResourceName
+from viam.proto.service.vision import Classification, Detection
 from viam.resource.base import ResourceBase
 from viam.resource.types import Model, ModelFamily
+from viam.services.vision import CaptureAllResult, Vision
 from viam.utils import ValueTypes
-from viam.logging import getLogger
-
-
-
 
 LOGGER = getLogger("MotionDetectorLogger")
 
@@ -54,45 +51,81 @@ def new_service(
 
     # Validates JSON Configuration
     @classmethod
-    def validate_config(
-        cls,
-        config: ServiceConfig
-    ) -> Sequence[str]:
+    def validate_config(cls, config: ServiceConfig) -> Sequence[str]:
         validate_cam_name = config.attributes.fields["cam_name"].string_value
         validate_camera_name = config.attributes.fields["camera_name"].string_value
 
         if validate_cam_name == "" and validate_camera_name == "":
             raise ValueError(
                 "Source camera must be provided as 'cam_name' or 'camera_name', "
-                "but neither was provided")
+                "but neither was provided"
+            )
         if validate_cam_name != "" and validate_camera_name != "":
             raise ValueError(
                 "Source camera must be provided as 'cam_name' or 'camera_name', "
-                "but both were provided")
-        source_cam = validate_cam_name if validate_cam_name != "" else validate_camera_name
+                "but both were provided"
+            )
+        source_cam = (
+            validate_cam_name if validate_cam_name != "" else validate_camera_name
+        )
 
-        min_box_size    = config.attributes.fields["min_box_size"].number_value
+        min_box_size = config.attributes.fields["min_box_size"].number_value
         min_box_percent = config.attributes.fields["min_box_percent"].number_value
         if min_box_size < 0:
-            raise ValueError("Minimum bounding box size should be a non-negative integer")
+            raise ValueError(
+                "Minimum bounding box size should be a non-negative integer"
+            )
         if min_box_percent < 0.0 or min_box_percent > 1.0:
-            raise ValueError("Minimum bounding box percent should be between 0.0 and 1.0")
+            raise ValueError(
+                "Minimum bounding box percent should be between 0.0 and 1.0"
+            )
         if min_box_size != 0 and min_box_percent != 0.0:
-            raise ValueError("Cannot specify the minimum box in both pixels and percentages")
+            raise ValueError(
+                "Cannot specify the minimum box in both pixels and percentages"
+            )
 
         sensitivity = config.attributes.fields["sensitivity"].number_value
         if sensitivity < 0 or sensitivity > 1:
             raise ValueError("Sensitivity should be a number between 0.0 and 1.0")
 
-        max_box_size    = config.attributes.fields["max_box_size"].number_value
+        max_box_size = config.attributes.fields["max_box_size"].number_value
         max_box_percent = config.attributes.fields["max_box_percent"].number_value
         if max_box_size < 0:
-            raise ValueError("Maximum bounding box size should be a non-negative integer")
+            raise ValueError(
+                "Maximum bounding box size should be a non-negative integer"
+            )
         if max_box_percent < 0.0 or max_box_percent > 1.0:
-            raise ValueError("Maximum bounding box percent should be between 0.0 and 1.0")
+            raise ValueError(
+                "Maximum bounding box percent should be between 0.0 and 1.0"
+            )
         if max_box_size != 0 and max_box_percent != 0.0:
-            raise ValueError("Cannot specify the maximum box in both pixels and percentages")
+            raise ValueError(
+                "Cannot specify the maximum box in both pixels and percentages"
+            )
 
+        if config.attributes.fields["crop_region"].struct_value:
+            crop_region = dict(
+                config.attributes.fields["crop_region"].struct_value.fields
+            )
+            x1_rel = float(crop_region["x1_rel"].number_value)
+            x2_rel = float(crop_region["x2_rel"].number_value)
+            y1_rel = float(crop_region["y1_rel"].number_value)
+            y2_rel = float(crop_region["y2_rel"].number_value)
+
+            if x1_rel < 0.0 or x1_rel > 1.0:
+                raise ValueError("x1_rel should be between 0.0 and 1.0")
+            if x2_rel < 0.0 or x2_rel > 1.0:
+                raise ValueError("x2_rel should be between 0.0 and 1.0")
+            if y1_rel < 0.0 or y1_rel > 1.0:
+                raise ValueError("y1_rel should be between 0.0 and 1.0")
+            if y2_rel < 0.0 or y2_rel > 1.0:
+                raise ValueError("y2_rel should be between 0.0 and 1.0")
+            if x1_rel >= x2_rel:
+                raise ValueError("x1_rel should be less than x2_rel")
+            if x1_rel > x2_rel:
+                raise ValueError("x1_rel should be less than x2_rel")
+            if y1_rel > y2_rel:
+                raise ValueError("y1_rel should be less than y2_rel")
         return [source_cam]
 
     # Handles attribute reconfiguration
@@ -115,6 +148,18 @@ def reconfigure(
         self.max_box_size = config.attributes.fields["max_box_size"].number_value
         self.max_box_percent = config.attributes.fields["max_box_percent"].number_value
 
+        # Crop region is optional, so we need to check if it exists
+        if config.attributes.fields["crop_region"].struct_value:
+            self.crop_region = dict(
+                config.attributes.fields["crop_region"].struct_value.fields
+            )
+            self.crop_region["x1_rel"] = float(self.crop_region["x1_rel"].number_value)
+            self.crop_region["y1_rel"] = float(self.crop_region["y1_rel"].number_value)
+            self.crop_region["x2_rel"] = float(self.crop_region["x2_rel"].number_value)
+            self.crop_region["y2_rel"] = float(self.crop_region["y2_rel"].number_value)
+        else:
+            self.crop_region = None
+
     # This will be the main method implemented in this module.
     # Given a camera. Perform frame differencing and return how much of the image is moving
     async def get_classifications(
@@ -133,6 +178,7 @@ async def get_classifications(
                 "image mime type must be PNG or JPEG, not ", input1.mime_type
             )
         img1 = pil.viam_to_pil_image(input1)
+        img1, _, _ = self.crop_image(img1)
         gray1 = cv2.cvtColor(np.array(img1), cv2.COLOR_BGR2GRAY)
 
         input2 = await self.camera.get_image()
@@ -141,6 +187,7 @@ async def get_classifications(
                 "image mime type must be PNG or JPEG, not ", input2.mime_type
             )
         img2 = pil.viam_to_pil_image(input2)
+        img2, _, _ = self.crop_image(img2)
         gray2 = cv2.cvtColor(np.array(img2), cv2.COLOR_BGR2GRAY)
 
         return self.classification_from_gray_imgs(gray1=gray1, gray2=gray2)
@@ -163,8 +210,7 @@ async def get_classifications_from_camera(
                 "is not the configured 'cam_name'",
                 self.cam_name,
             )
-        image = await self.camera.get_image()
-        return await self.get_classifications(image=image, count=count)
+        return await self.get_classifications(image=None, count=count)
 
     # Not implemented for now. Eventually want this to return the location of the movement
     async def get_detections(
@@ -182,6 +228,7 @@ async def get_detections(
                 "image mime type must be PNG or JPEG, not ", input1.mime_type
             )
         img1 = pil.viam_to_pil_image(input1)
+        img1, width, height = self.crop_image(img1)
         gray1 = cv2.cvtColor(np.array(img1), cv2.COLOR_BGR2GRAY)
 
         input2 = await self.camera.get_image()
@@ -190,9 +237,9 @@ async def get_detections(
                 "image mime type must be PNG or JPEG, not ", input2.mime_type
             )
         img2 = pil.viam_to_pil_image(input2)
+        img2, width, height = self.crop_image(img2)
         gray2 = cv2.cvtColor(np.array(img2), cv2.COLOR_BGR2GRAY)
-
-        return self.detections_from_gray_imgs(gray1, gray2)
+        return self.detections_from_gray_imgs(gray1, gray2, width, height)
 
     async def get_detections_from_camera(
         self,
@@ -291,7 +338,7 @@ def classification_from_gray_imgs(self, gray1, gray2):
         classifications = [{"class_name": "motion", "confidence": conf}]
         return classifications
 
-    def detections_from_gray_imgs(self, gray1, gray2):
+    def detections_from_gray_imgs(self, gray1, gray2, width=None, height=None):
         detections = []
         # Frame difference
         diff = cv2.absdiff(gray2, gray1)
@@ -314,7 +361,9 @@ def detections_from_gray_imgs(self, gray1, gray2):
         img_out = cv2.erode(img3, kernel2)
 
         # List points around the remaining blobs
-        contours, _ = cv2.findContours(img_out, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_NONE)
+        contours, _ = cv2.findContours(
+            img_out, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_NONE
+        )
 
         # Make boxes from the contours
         for c in contours:
@@ -335,6 +384,17 @@ def detections_from_gray_imgs(self, gray1, gray2):
             if self.max_box_percent > 0 and area_percent > self.max_box_percent:
                 continue
 
+            if self.crop_region:
+                # Adjust coordinates based on crop region
+                x_offset = int(self.crop_region.get("x1_rel") * width)
+                y_offset = int(self.crop_region.get("y1_rel") * height)
+
+                # Convert back to original image coordinates
+                xmin = min(width - 1, xmin + x_offset)
+                ymin = min(height - 1, ymin + y_offset)
+                xmax = min(width - 1, xmax + x_offset)
+                ymax = min(height - 1, ymax + y_offset)
+
             detection = {
                 "confidence": 0.5,
                 "class_name": "motion",
@@ -353,7 +413,19 @@ def detections_from_gray_imgs(self, gray1, gray2):
                         "y_max_normalized": ymax / diff.shape[0],
                     }
                 )
-
             detections.append(detection)
 
         return detections
+
+    def crop_image(self, image: PIL.Image.Image):
+        if not self.crop_region:
+            return image, None, None
+        width, height = image.size
+        x1 = int(self.crop_region["x1_rel"] * width)
+        y1 = int(self.crop_region["y1_rel"] * height)
+        x2 = int(self.crop_region["x2_rel"] * width)
+        y2 = int(self.crop_region["y2_rel"] * height)
+        return image.crop((x1, y1, x2, y2)), width, height
+
+    def retrieve_original_coordinates(self, x_normalized, y_normalized, width, height):
+        pass