Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -17,5 +17,5 @@ dist/main:
. .venv/bin/activate && python -m PyInstaller --onefile --hidden-import="googleapiclient" --add-data="./src:src" src/main.py

lint:
. .venv/bin/activate && pylint --disable=C0114,E0401,E1101,C0116,W0613,R0913,C0116,R0914,C0103,W0201,W0719 src/
. .venv/bin/activate && pylint --disable=C0114,E0401,E1101,C0116,W0613,R0913,C0116,R0914,C0103,W0201,W0719,R0902,R0912 src/

5 changes: 4 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,10 @@ The following attributes are available for `viam:vision:motion-detector` vision
| `min_box_percent` | int | **Optional** | The fraction of the image (between 0 and 1) that the smallest bounding box must cover. Relevant for GetDetections/GetDetectionsFromCamera only. You must specify at most one of `min_box_size` and `min_box_percent`.
| `max_box_size` | int | **Optional** | The size (in square pixels) of the largest bounding box to allow. Relevant for GetDetections/GetDetectionsFromCamera only. You must specify at most one of `max_box_size` and `max_box_percent`.
| `max_box_percent` | int | **Optional** | The fraction of the image (between 0 and 1) that the largest bounding box can cover. Relevant for GetDetections/GetDetectionsFromCamera only. You must specify at most one of `max_box_size` and `max_box_percent`.
| `sensitivity` | float | **Optional** | A number from 0 - 1. Larger numbers will make the module more sensitive to motion. Default = 0.9 |
| `sensitivity` | float | **Optional** | A number from 0 - 1. Larger numbers will make the module more sensitive to motion. Default = 0.9
| `crop_region` | dict | **Optional** | Defines a region of the image to crop for processing. Must include four float values between 0 and 1: `x1_rel`, `y1_rel`, `x2_rel`, `y2_rel` representing the relative coordinates of the crop region.|



> [!WARNING]
> Either one of `camera_name` or `cam_name` will be accepted, but not both. `camera_name` is preferred.
Expand Down
138 changes: 105 additions & 33 deletions src/motion_detector.py
Original file line number Diff line number Diff line change
@@ -1,25 +1,22 @@
import math
from typing import ClassVar, List, Mapping, Sequence, Any, Dict, Optional
from typing_extensions import Self
from typing import Any, ClassVar, Dict, List, Mapping, Optional, Sequence

import cv2
import numpy as np


import PIL
from typing_extensions import Self
from viam.components.camera import Camera
from viam.media.video import ViamImage, CameraMimeType
from viam.logging import getLogger
from viam.media.utils import pil
from viam.proto.service.vision import Classification, Detection
from viam.services.vision import Vision, CaptureAllResult
from viam.media.video import CameraMimeType, ViamImage
from viam.module.types import Reconfigurable
from viam.proto.app.robot import ServiceConfig
from viam.proto.common import PointCloudObject, ResourceName
from viam.proto.service.vision import Classification, Detection
from viam.resource.base import ResourceBase
from viam.resource.types import Model, ModelFamily
from viam.services.vision import CaptureAllResult, Vision
from viam.utils import ValueTypes
from viam.logging import getLogger




LOGGER = getLogger("MotionDetectorLogger")

Expand Down Expand Up @@ -54,45 +51,81 @@ def new_service(

# Validates JSON Configuration
@classmethod
def validate_config(
cls,
config: ServiceConfig
) -> Sequence[str]:
def validate_config(cls, config: ServiceConfig) -> Sequence[str]:
validate_cam_name = config.attributes.fields["cam_name"].string_value
validate_camera_name = config.attributes.fields["camera_name"].string_value

if validate_cam_name == "" and validate_camera_name == "":
raise ValueError(
"Source camera must be provided as 'cam_name' or 'camera_name', "
"but neither was provided")
"but neither was provided"
)
if validate_cam_name != "" and validate_camera_name != "":
raise ValueError(
"Source camera must be provided as 'cam_name' or 'camera_name', "
"but both were provided")
source_cam = validate_cam_name if validate_cam_name != "" else validate_camera_name
"but both were provided"
)
source_cam = (
validate_cam_name if validate_cam_name != "" else validate_camera_name
)

min_box_size = config.attributes.fields["min_box_size"].number_value
min_box_size = config.attributes.fields["min_box_size"].number_value
min_box_percent = config.attributes.fields["min_box_percent"].number_value
if min_box_size < 0:
raise ValueError("Minimum bounding box size should be a non-negative integer")
raise ValueError(
"Minimum bounding box size should be a non-negative integer"
)
if min_box_percent < 0.0 or min_box_percent > 1.0:
raise ValueError("Minimum bounding box percent should be between 0.0 and 1.0")
raise ValueError(
"Minimum bounding box percent should be between 0.0 and 1.0"
)
if min_box_size != 0 and min_box_percent != 0.0:
raise ValueError("Cannot specify the minimum box in both pixels and percentages")
raise ValueError(
"Cannot specify the minimum box in both pixels and percentages"
)

sensitivity = config.attributes.fields["sensitivity"].number_value
if sensitivity < 0 or sensitivity > 1:
raise ValueError("Sensitivity should be a number between 0.0 and 1.0")

max_box_size = config.attributes.fields["max_box_size"].number_value
max_box_size = config.attributes.fields["max_box_size"].number_value
max_box_percent = config.attributes.fields["max_box_percent"].number_value
if max_box_size < 0:
raise ValueError("Maximum bounding box size should be a non-negative integer")
raise ValueError(
"Maximum bounding box size should be a non-negative integer"
)
if max_box_percent < 0.0 or max_box_percent > 1.0:
raise ValueError("Maximum bounding box percent should be between 0.0 and 1.0")
raise ValueError(
"Maximum bounding box percent should be between 0.0 and 1.0"
)
if max_box_size != 0 and max_box_percent != 0.0:
raise ValueError("Cannot specify the maximum box in both pixels and percentages")
raise ValueError(
"Cannot specify the maximum box in both pixels and percentages"
)

if config.attributes.fields["crop_region"].struct_value:
crop_region = dict(
config.attributes.fields["crop_region"].struct_value.fields
)
x1_rel = float(crop_region["x1_rel"].number_value)
x2_rel = float(crop_region["x2_rel"].number_value)
y1_rel = float(crop_region["y1_rel"].number_value)
y2_rel = float(crop_region["y2_rel"].number_value)

if x1_rel < 0.0 or x1_rel > 1.0:
raise ValueError("x1_rel should be between 0.0 and 1.0")
if x2_rel < 0.0 or x2_rel > 1.0:
raise ValueError("x2_rel should be between 0.0 and 1.0")
if y1_rel < 0.0 or y1_rel > 1.0:
raise ValueError("y1_rel should be between 0.0 and 1.0")
if y2_rel < 0.0 or y2_rel > 1.0:
raise ValueError("y2_rel should be between 0.0 and 1.0")
if x1_rel >= x2_rel:
raise ValueError("x1_rel should be less than x2_rel")
if x1_rel > x2_rel:
raise ValueError("x1_rel should be less than x2_rel")
if y1_rel > y2_rel:
raise ValueError("y1_rel should be less than y2_rel")
return [source_cam]

# Handles attribute reconfiguration
Expand All @@ -115,6 +148,18 @@ def reconfigure(
self.max_box_size = config.attributes.fields["max_box_size"].number_value
self.max_box_percent = config.attributes.fields["max_box_percent"].number_value

# Crop region is optional, so we need to check if it exists
if config.attributes.fields["crop_region"].struct_value:
self.crop_region = dict(
config.attributes.fields["crop_region"].struct_value.fields
)
self.crop_region["x1_rel"] = float(self.crop_region["x1_rel"].number_value)
self.crop_region["y1_rel"] = float(self.crop_region["y1_rel"].number_value)
self.crop_region["x2_rel"] = float(self.crop_region["x2_rel"].number_value)
self.crop_region["y2_rel"] = float(self.crop_region["y2_rel"].number_value)
else:
self.crop_region = None

# This will be the main method implemented in this module.
# Given a camera. Perform frame differencing and return how much of the image is moving
async def get_classifications(
Expand All @@ -133,6 +178,7 @@ async def get_classifications(
"image mime type must be PNG or JPEG, not ", input1.mime_type
)
img1 = pil.viam_to_pil_image(input1)
img1, _, _ = self.crop_image(img1)
gray1 = cv2.cvtColor(np.array(img1), cv2.COLOR_BGR2GRAY)

input2 = await self.camera.get_image()
Expand All @@ -141,6 +187,7 @@ async def get_classifications(
"image mime type must be PNG or JPEG, not ", input2.mime_type
)
img2 = pil.viam_to_pil_image(input2)
img2, _, _ = self.crop_image(img2)
gray2 = cv2.cvtColor(np.array(img2), cv2.COLOR_BGR2GRAY)

return self.classification_from_gray_imgs(gray1=gray1, gray2=gray2)
Expand All @@ -163,8 +210,7 @@ async def get_classifications_from_camera(
"is not the configured 'cam_name'",
self.cam_name,
)
image = await self.camera.get_image()
return await self.get_classifications(image=image, count=count)
return await self.get_classifications(image=None, count=count)

# Not implemented for now. Eventually want this to return the location of the movement
async def get_detections(
Expand All @@ -182,6 +228,7 @@ async def get_detections(
"image mime type must be PNG or JPEG, not ", input1.mime_type
)
img1 = pil.viam_to_pil_image(input1)
img1, width, height = self.crop_image(img1)
gray1 = cv2.cvtColor(np.array(img1), cv2.COLOR_BGR2GRAY)

input2 = await self.camera.get_image()
Expand All @@ -190,9 +237,9 @@ async def get_detections(
"image mime type must be PNG or JPEG, not ", input2.mime_type
)
img2 = pil.viam_to_pil_image(input2)
img2, width, height = self.crop_image(img2)
gray2 = cv2.cvtColor(np.array(img2), cv2.COLOR_BGR2GRAY)

return self.detections_from_gray_imgs(gray1, gray2)
return self.detections_from_gray_imgs(gray1, gray2, width, height)

async def get_detections_from_camera(
self,
Expand Down Expand Up @@ -291,7 +338,7 @@ def classification_from_gray_imgs(self, gray1, gray2):
classifications = [{"class_name": "motion", "confidence": conf}]
return classifications

def detections_from_gray_imgs(self, gray1, gray2):
def detections_from_gray_imgs(self, gray1, gray2, width=None, height=None):
detections = []
# Frame difference
diff = cv2.absdiff(gray2, gray1)
Expand All @@ -314,7 +361,9 @@ def detections_from_gray_imgs(self, gray1, gray2):
img_out = cv2.erode(img3, kernel2)

# List points around the remaining blobs
contours, _ = cv2.findContours(img_out, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_NONE)
contours, _ = cv2.findContours(
img_out, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_NONE
)

# Make boxes from the contours
for c in contours:
Expand All @@ -335,6 +384,17 @@ def detections_from_gray_imgs(self, gray1, gray2):
if self.max_box_percent > 0 and area_percent > self.max_box_percent:
continue

if self.crop_region:
# Adjust coordinates based on crop region
x_offset = int(self.crop_region.get("x1_rel") * width)
y_offset = int(self.crop_region.get("y1_rel") * height)

# Convert back to original image coordinates
xmin = min(width - 1, xmin + x_offset)
ymin = min(height - 1, ymin + y_offset)
xmax = min(width - 1, xmax + x_offset)
ymax = min(height - 1, ymax + y_offset)

detection = {
"confidence": 0.5,
"class_name": "motion",
Expand All @@ -353,7 +413,19 @@ def detections_from_gray_imgs(self, gray1, gray2):
"y_max_normalized": ymax / diff.shape[0],
}
)

detections.append(detection)

return detections

def crop_image(self, image: PIL.Image.Image):
if not self.crop_region:
return image, None, None
width, height = image.size
x1 = int(self.crop_region["x1_rel"] * width)
y1 = int(self.crop_region["y1_rel"] * height)
x2 = int(self.crop_region["x2_rel"] * width)
y2 = int(self.crop_region["y2_rel"] * height)
return image.crop((x1, y1, x2, y2)), width, height

def retrieve_original_coordinates(self, x_normalized, y_normalized, width, height):
pass
Loading