diff --git a/plugins/forgesyte-yolo-tracker/src/forgesyte_yolo_tracker/manifest.json b/plugins/forgesyte-yolo-tracker/src/forgesyte_yolo_tracker/manifest.json index 81b2f67..e5e7952 100644 --- a/plugins/forgesyte-yolo-tracker/src/forgesyte_yolo_tracker/manifest.json +++ b/plugins/forgesyte-yolo-tracker/src/forgesyte_yolo_tracker/manifest.json @@ -81,6 +81,23 @@ "radar_points": "array", "annotated_image_base64": "string?" } + }, + { + "id": "video_player_detection", + "title": "Video Player Detection", + "description": "Run player detection on every frame of a video.", + "input_types": ["video"], + "output_types": ["video_detections"], + "capabilities": ["player_detection"], + "inputs": { + "video_path": "string", + "device": "string", + "annotated": "boolean" + }, + "outputs": { + "frames": "array", + "summary": "object" + } } ] } diff --git a/plugins/forgesyte-yolo-tracker/src/forgesyte_yolo_tracker/plugin.py b/plugins/forgesyte-yolo-tracker/src/forgesyte_yolo_tracker/plugin.py index 49f143d..487742f 100644 --- a/plugins/forgesyte-yolo-tracker/src/forgesyte_yolo_tracker/plugin.py +++ b/plugins/forgesyte-yolo-tracker/src/forgesyte_yolo_tracker/plugin.py @@ -207,6 +207,89 @@ def _tool_radar_video(video_path: str, output_path: str, device: str = "cpu") -> return {"status": "success", "output_path": output_path} +# --------------------------------------------------------- +# v0.9.5 Video Tool (JSON frame-level output) +# --------------------------------------------------------- +def _tool_video_player_detection( + video_path: str, device: str = "cpu", annotated: bool = False +) -> Dict[str, Any]: + """Run player detection on video frames, returning JSON results. + + Uses YOLO streaming inference for memory efficiency. + Returns frame-level detections aggregated into a single JSON response. + + Args: + video_path: Path to input video file + device: Device to run model on ('cpu' or 'cuda') + annotated: Whether to include annotated frames (not implemented in v0.9.5) + + Returns: + Dict with 'frames' array and 'summary' object + """ + from pathlib import Path + + from forgesyte_yolo_tracker.configs import get_model_path + + # Lazy import to avoid loading YOLO at module load time + from ultralytics import YOLO + + # Construct model path + MODEL_NAME = get_model_path("player_detection") + MODEL_PATH = str(Path(__file__).parent / "models" / MODEL_NAME) + + # Load model and set device + model = YOLO(MODEL_PATH).to(device=device) + + frame_results: list = [] + frame_index = 0 + + # Use YOLO streaming inference for memory efficiency + results = model(video_path, stream=True, verbose=False) + + for result in results: + # Extract detections from result + boxes = result.boxes + + # Check if boxes has detections by examining xyxy shape + if boxes is not None: + xyxy_array = boxes.xyxy.cpu().numpy() + if len(xyxy_array) > 0: + xyxy = xyxy_array.tolist() + confidence = boxes.conf.cpu().numpy().tolist() + class_id = boxes.cls.cpu().numpy().tolist() + else: + xyxy = [] + confidence = [] + class_id = [] + else: + xyxy = [] + confidence = [] + class_id = [] + + frame_results.append( + { + "frame_index": frame_index, + "detections": { + "xyxy": xyxy, + "confidence": confidence, + "class_id": class_id, + }, + } + ) + frame_index += 1 + + # Calculate summary + total_detections = sum(len(f["detections"]["xyxy"]) for f in frame_results) + + return { + "frames": frame_results, + "summary": { + "total_frames": frame_index, + "total_detections": total_detections, + }, + } + + # --------------------------------------------------------- # Plugin class — FINAL, CORRECT, LOADER-COMPATIBLE # --------------------------------------------------------- @@ -320,6 +403,20 @@ class Plugin(BasePlugin): # type: ignore[misc] "output_schema": {"status": {"type": "string"}}, "handler": _tool_radar_video, }, + # v0.9.5 Video Tool (JSON frame-level output) + "video_player_detection": { + "description": "Run player detection on video frames, returning JSON results", + "input_schema": { + "video_path": {"type": "string"}, + "device": {"type": "string", "default": "cpu"}, + "annotated": {"type": "boolean", "default": False}, + }, + "output_schema": { + "frames": {"type": "array"}, + "summary": {"type": "object"}, + }, + "handler": _tool_video_player_detection, + }, } # ------------------------------------------------------- @@ -343,7 +440,15 @@ def run_tool(self, tool_name: str, args: Dict[str, Any]) -> Any: handler = self.tools[tool_name]["handler"] - # Video tools use different args + # v0.9.5 video tool (JSON frame-level output, no output_path) + if tool_name == "video_player_detection": + return handler( + video_path=args.get("video_path"), + device=args.get("device", "cpu"), + annotated=args.get("annotated", False), + ) + + # Legacy video tools (output annotated video files) if "video" in tool_name: return handler( video_path=args.get("video_path"), diff --git a/plugins/forgesyte-yolo-tracker/tests_contract/test_video_player_detection.py b/plugins/forgesyte-yolo-tracker/tests_contract/test_video_player_detection.py new file mode 100644 index 0000000..d0117f7 --- /dev/null +++ b/plugins/forgesyte-yolo-tracker/tests_contract/test_video_player_detection.py @@ -0,0 +1,233 @@ +"""Unit tests for video_player_detection tool implementation. + +Tests the tool's input/output contract without loading YOLO models. +Uses mocked YOLO inference results. +""" + +from typing import Any, Dict, Generator, List +from unittest.mock import MagicMock, patch + + +from forgesyte_yolo_tracker.plugin import Plugin + +# YOLO is lazy-imported inside _tool_video_player_detection, so we patch ultralytics.YOLO +YOLO_PATCH_PATH = "ultralytics.YOLO" + + +class MockDetectionResult: + """Mock YOLO detection result for a single frame.""" + + def __init__(self, boxes: List[Dict[str, Any]]): + self.boxes = MagicMock() + self.boxes.data = MagicMock() + + # Simulate ultralytics Results.boxes.xyxy format + if boxes: + import numpy as np + + xyxy = np.array([b["xyxy"] for b in boxes], dtype=np.float32) + conf = np.array([b["confidence"] for b in boxes], dtype=np.float32) + cls = np.array([b["class_id"] for b in boxes], dtype=np.int32) + + self.boxes.xyxy.cpu.return_value.numpy.return_value = xyxy + self.boxes.conf.cpu.return_value.numpy.return_value = conf + self.boxes.cls.cpu.return_value.numpy.return_value = cls + else: + import numpy as np + + self.boxes.xyxy.cpu.return_value.numpy.return_value = np.array( + [], dtype=np.float32 + ).reshape(0, 4) + self.boxes.conf.cpu.return_value.numpy.return_value = np.array( + [], dtype=np.float32 + ) + self.boxes.cls.cpu.return_value.numpy.return_value = np.array( + [], dtype=np.int32 + ) + + +class MockYOLOModel: + """Mock YOLO model for testing.""" + + def __init__(self, frame_results: List[List[Dict[str, Any]]]): + """Initialize with pre-defined frame results. + + Args: + frame_results: List of frame detection results, each frame is a list + of detection dicts with xyxy, confidence, class_id + """ + self.frame_results = frame_results + self.call_count = 0 + + def __call__( + self, video_path: str, stream: bool = False, verbose: bool = False, **kwargs: Any + ) -> Generator[MockDetectionResult, None, None]: + """Simulate YOLO streaming inference.""" + for frame_boxes in self.frame_results: + yield MockDetectionResult(frame_boxes) + + def to(self, device: str) -> "MockYOLOModel": + """Simulate model.to() for device placement.""" + return self + + +class TestVideoPlayerDetectionContract: + """Tests for video_player_detection tool contract.""" + + def test_returns_dict_with_frames_and_summary(self) -> None: + """Verify tool returns dict with frames and summary keys.""" + plugin = Plugin() + + # Create mock model with 3 frames + mock_model = MockYOLOModel( + frame_results=[ + [{"xyxy": [10, 10, 50, 50], "confidence": 0.9, "class_id": 0}], + [], + [ + {"xyxy": [20, 20, 60, 60], "confidence": 0.85, "class_id": 1}, + {"xyxy": [30, 30, 70, 70], "confidence": 0.8, "class_id": 2}, + ], + ] + ) + + with patch(YOLO_PATCH_PATH, return_value=mock_model): + result = plugin.run_tool( + "video_player_detection", + {"video_path": "/tmp/test.mp4", "device": "cpu"}, + ) + + assert isinstance(result, dict) + assert "frames" in result + assert "summary" in result + + def test_frames_have_correct_structure(self) -> None: + """Verify each frame has frame_index and detections.""" + plugin = Plugin() + + mock_model = MockYOLOModel( + frame_results=[ + [{"xyxy": [10, 10, 50, 50], "confidence": 0.9, "class_id": 0}], + [], + ] + ) + + with patch(YOLO_PATCH_PATH, return_value=mock_model): + result = plugin.run_tool( + "video_player_detection", + {"video_path": "/tmp/test.mp4", "device": "cpu"}, + ) + + frames = result["frames"] + assert isinstance(frames, list) + assert len(frames) == 2 + + # First frame + assert frames[0]["frame_index"] == 0 + assert "detections" in frames[0] + assert "xyxy" in frames[0]["detections"] + assert "confidence" in frames[0]["detections"] + assert "class_id" in frames[0]["detections"] + + # Second frame + assert frames[1]["frame_index"] == 1 + + def test_summary_has_correct_structure(self) -> None: + """Verify summary has total_frames and total_detections.""" + plugin = Plugin() + + mock_model = MockYOLOModel( + frame_results=[ + [{"xyxy": [10, 10, 50, 50], "confidence": 0.9, "class_id": 0}], + [], + [ + {"xyxy": [20, 20, 60, 60], "confidence": 0.85, "class_id": 1}, + {"xyxy": [30, 30, 70, 70], "confidence": 0.8, "class_id": 2}, + ], + ] + ) + + with patch(YOLO_PATCH_PATH, return_value=mock_model): + result = plugin.run_tool( + "video_player_detection", + {"video_path": "/tmp/test.mp4", "device": "cpu"}, + ) + + summary = result["summary"] + assert isinstance(summary, dict) + assert "total_frames" in summary + assert "total_detections" in summary + assert summary["total_frames"] == 3 + assert summary["total_detections"] == 3 # 1 + 0 + 2 + + def test_empty_video_returns_empty_frames(self) -> None: + """Verify tool handles empty video (no frames).""" + plugin = Plugin() + + mock_model = MockYOLOModel(frame_results=[]) + + with patch(YOLO_PATCH_PATH, return_value=mock_model): + result = plugin.run_tool( + "video_player_detection", + {"video_path": "/tmp/empty.mp4", "device": "cpu"}, + ) + + assert result["frames"] == [] + assert result["summary"]["total_frames"] == 0 + assert result["summary"]["total_detections"] == 0 + + def test_uses_stream_mode(self) -> None: + """Verify tool processes frames sequentially (streaming mode behavior).""" + plugin = Plugin() + + # Create mock model with multiple frames - streaming processes sequentially + mock_model = MockYOLOModel( + frame_results=[ + [{"xyxy": [10, 10, 50, 50], "confidence": 0.9, "class_id": 0}], + [{"xyxy": [20, 20, 60, 60], "confidence": 0.85, "class_id": 1}], + ] + ) + + with patch(YOLO_PATCH_PATH, return_value=mock_model): + result = plugin.run_tool( + "video_player_detection", + {"video_path": "/tmp/test.mp4", "device": "cpu"}, + ) + + # Verify all frames were processed (streaming yields each frame) + assert len(result["frames"]) == 2 + assert result["summary"]["total_frames"] == 2 + + +class TestVideoPlayerDetectionDevice: + """Tests for device parameter handling.""" + + def test_default_device_is_cpu(self) -> None: + """Verify device defaults to 'cpu'.""" + plugin = Plugin() + + mock_model = MockYOLOModel(frame_results=[[]]) + mock_model.to = MagicMock(return_value=mock_model) + + with patch(YOLO_PATCH_PATH, return_value=mock_model): + plugin.run_tool( + "video_player_detection", + {"video_path": "/tmp/test.mp4"}, # No device specified + ) + + # Verify to() was called with 'cpu' + mock_model.to.assert_called_with(device="cpu") + + def test_cuda_device_passed_to_model(self) -> None: + """Verify device='cuda' is passed to model.""" + plugin = Plugin() + + mock_model = MockYOLOModel(frame_results=[[]]) + mock_model.to = MagicMock(return_value=mock_model) + + with patch(YOLO_PATCH_PATH, return_value=mock_model): + plugin.run_tool( + "video_player_detection", + {"video_path": "/tmp/test.mp4", "device": "cuda"}, + ) + + mock_model.to.assert_called_with(device="cuda") \ No newline at end of file diff --git a/plugins/forgesyte-yolo-tracker/tests_contract/test_video_tool_contract.py b/plugins/forgesyte-yolo-tracker/tests_contract/test_video_tool_contract.py new file mode 100644 index 0000000..a0a6ffb --- /dev/null +++ b/plugins/forgesyte-yolo-tracker/tests_contract/test_video_tool_contract.py @@ -0,0 +1,131 @@ +"""Contract tests for video_player_detection tool manifest. + +Tests that the manifest declares a video-capable tool with correct schema. +These tests validate the manifest structure without loading YOLO models. +""" + +import json + +import pytest + +from forgesyte_yolo_tracker.plugin import Plugin + + +class TestVideoToolManifest: + """Tests for video_player_detection tool in manifest.json.""" + + @pytest.fixture + def manifest(self) -> dict: + """Load manifest.json as dict.""" + import forgesyte_yolo_tracker + + manifest_path = ( + forgesyte_yolo_tracker.__file__.rsplit("/", 1)[0] + "/manifest.json" + ) + with open(manifest_path) as f: + return json.load(f) + + def test_manifest_has_video_tool(self, manifest: dict) -> None: + """Verify manifest has a tool with input_types containing 'video'.""" + tools = manifest.get("tools", []) + video_tools = [t for t in tools if "video" in t.get("input_types", [])] + + assert len(video_tools) > 0, "Manifest should have at least one video tool" + + def test_video_tool_id_is_video_player_detection(self, manifest: dict) -> None: + """Verify video tool has correct id.""" + tools = manifest.get("tools", []) + video_tools = [t for t in tools if "video" in t.get("input_types", [])] + + tool_ids = [t["id"] for t in video_tools] + assert "video_player_detection" in tool_ids, ( + f"Expected video_player_detection tool, found: {tool_ids}" + ) + + def test_video_tool_input_schema(self, manifest: dict) -> None: + """Verify video_player_detection has correct input schema.""" + tools = manifest.get("tools", []) + video_tool = next( + (t for t in tools if t.get("id") == "video_player_detection"), None + ) + + assert video_tool is not None, "video_player_detection tool not found" + + inputs = video_tool.get("inputs", {}) + + # Required: video_path + assert "video_path" in inputs, "video_player_detection must accept video_path" + + # Optional: device (should default to 'cpu') + assert "device" in inputs, "video_player_detection should accept device parameter" + + def test_video_tool_output_schema(self, manifest: dict) -> None: + """Verify video_player_detection has correct output schema.""" + tools = manifest.get("tools", []) + video_tool = next( + (t for t in tools if t.get("id") == "video_player_detection"), None + ) + + assert video_tool is not None, "video_player_detection tool not found" + + outputs = video_tool.get("outputs", {}) + + # Must return frames array and summary object + assert "frames" in outputs, "video_player_detection must output frames" + assert "summary" in outputs, "video_player_detection must output summary" + + def test_video_tool_input_types(self, manifest: dict) -> None: + """Verify video_player_detection has input_types: ['video'].""" + tools = manifest.get("tools", []) + video_tool = next( + (t for t in tools if t.get("id") == "video_player_detection"), None + ) + + assert video_tool is not None, "video_player_detection tool not found" + + input_types = video_tool.get("input_types", []) + assert "video" in input_types, ( + f"video_player_detection input_types must contain 'video', got: {input_types}" + ) + + +class TestVideoToolPluginRegistration: + """Tests for video_player_detection tool registration in Plugin class.""" + + def test_plugin_has_video_player_detection_tool(self) -> None: + """Verify Plugin class has video_player_detection registered.""" + plugin = Plugin() + + assert "video_player_detection" in plugin.tools, ( + "video_player_detection not found in plugin.tools" + ) + + def test_video_tool_has_handler(self) -> None: + """Verify video_player_detection tool has callable handler.""" + plugin = Plugin() + tool = plugin.tools.get("video_player_detection", {}) + + assert "handler" in tool, "video_player_detection missing handler" + assert callable(tool["handler"]), "video_player_detection handler must be callable" + + def test_video_tool_input_schema_in_plugin(self) -> None: + """Verify video_player_detection has input_schema in Plugin.tools.""" + plugin = Plugin() + tool = plugin.tools.get("video_player_detection", {}) + + assert "input_schema" in tool, "video_player_detection missing input_schema" + + input_schema = tool["input_schema"] + assert "video_path" in input_schema, "input_schema must have video_path" + assert "device" in input_schema, "input_schema should have device" + + def test_video_tool_output_schema_in_plugin(self) -> None: + """Verify video_player_detection has output_schema in Plugin.tools.""" + plugin = Plugin() + tool = plugin.tools.get("video_player_detection", {}) + + assert "output_schema" in tool, "video_player_detection missing output_schema" + + output_schema = tool["output_schema"] + assert "frames" in output_schema, "output_schema must have frames" + assert "summary" in output_schema, "output_schema must have summary"