nvidia-holoscan · tbirdso · Jan 30, 2025 · Jan 23, 2025 · Jan 23, 2025 · Jan 23, 2025
diff --git a/applications/CMakeLists.txt b/applications/CMakeLists.txt
@@ -82,6 +82,8 @@ add_holohub_application(qt_video_replayer DEPENDS OPERATORS qt_video npp_filter)
 
 add_holohub_application(realsense_visualizer DEPENDS OPERATORS realsense_camera)
 
+add_holohub_application(stereo_vision)
+
 add_holohub_application(tao_peoplenet)
 
 add_holohub_application(network_radar_pipeline DEPENDS

diff --git a/applications/stereo_vision/CMakeLists.txt b/applications/stereo_vision/CMakeLists.txt
@@ -0,0 +1,19 @@
+# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+cmake_minimum_required(VERSION 3.20)
+project(stereo_vision_app)
+
+add_subdirectory(cpp)
diff --git a/applications/stereo_vision/README.md b/applications/stereo_vision/README.md
@@ -0,0 +1,76 @@
+# Stereo Vision
+
+<p align="center">
+  <img src="./images/plants.gif" alt="Holoscan Stereo Vision">
+</p>
+
+## Overview
+
+A demo pipeline showcasing stereo disparity estimation and object detection.
+
+## Description
+
+This pipeline takes video from a stereo camera and estimates disparity using DNN ESS and object
+detection using YOLO. The disparity maps and bounding boxes are displayed through Holoviz.
+
+## Requirements
+
+This application requires a V4L2 stereo camera or recorded stereo video as input. A video acquired from a StereoLabs ZED
+camera is downloaded when running the `get_data_and_models.sh` script when building the application.
+A script for obtaining the calibration for StereoLabs cameras is also provided.
+
+### Camera Calibration
+
+The default calibration will work for the sample video. If using a stereolabs camera the calibration
+can be retrieved using `get_zed_calibration.py` and the devices serial number.
+
+```sh
+python3 get_zed_calibration.py -s [Serial Number]
+```
+
+### Input video
+
+For the input video stream, either use a v4l2 stereo camera such as those produced by stereolabs or the recorded `stereo-plants.mp4` video. If using
+recorded video this can be played using v4l2 loopback as described [here.](https://github.com/nvidia-holoscan/holoscan-sdk/tree/main/examples/v4l2_camera#use-with-v4l2-loopback-devices)
+The `stereo-plants.mp4` video is provided [here](https://catalog.ngc.nvidia.com/orgs/nvidia/teams/clara-holoscan/resources/holoscan_stereo_video) and will be downloaded when running `get_data_and_models.sh` when following the instructions in the <b>Build and Run Instructions</b> section.
+
+The source device in stereo_vision.yaml should be modified to match the device the v4l2 video is
+using. This can be found using `v4l2-ctl --list-devices`.
+
+
+## Models
+
+This demo requires the ESS DNN Stereo Disparity available from the NGC catalog for disparity and the
+YOLOv8 onnx model for object detection. Both models are downloaded when you build the application.
+
+### ESS DNN
+
+The ESS engine files generated in this demo application is specific to TRT8.6; make sure
+you build the devcontainer with a compatible `base_img` as shown in the <b>Build and Run Instructions</b> section.
+
+### YOLOv8
+
+For object detection, a YOLOv8 model from [Ultralytics](https://docs.ultralytics.com/models/yolov8/) is used
+and exported to ONNX with non-max suppression plugin as mentioned [here](https://github.com/triple-Mu/YOLOv8-TensorRT).
+
+## Build and Run Instructions
+
+To build this application and download the necessary videos and models, run:
+```sh
+./dev_container build --base_img nvcr.io/nvidia/clara-holoscan/holoscan:v2.4.0-dgpu --img holohub:stereo_vision
+./dev_container launch --img holohub:stereo_vision
+source applications/stereo_vision/scripts/get_data_and_models.sh data/stereo_vision
+./run build stereo_vision
+```
+
+If you are using the recorded video as input, start the video playback outside the docker container.  The
+following command will mount and stream the video to `/dev/video3` device.
+```sh
+sudo modprobe v4l2loopback video_nr=3 max_buffers=4
+ffmpeg -stream_loop -1 -re -i data/stereo_vision/stereo-plants.mp4 -pix_fmt yuyv422 -f v4l2 /dev/video3
+```
+
+Return to the first terminal and run:
+```sh
+./run launch stereo_vision
+```
diff --git a/applications/stereo_vision/cpp/CMakeLists.txt b/applications/stereo_vision/cpp/CMakeLists.txt
@@ -0,0 +1,70 @@
+# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+
+cmake_minimum_required(VERSION 3.20)
+project(stereo_depth CXX CUDA)
+
+find_package(holoscan 2.4 REQUIRED CONFIG
+             PATHS "/opt/nvidia/holoscan" "/workspace/holoscan-sdk/install")
+
+
+include(FetchContent)
+FetchContent_Declare(
+  Eigen3
+  URL https://gitlab.com/libeigen/eigen/-/archive/3.4.0/eigen-3.4.0.tar.gz
+)
+FetchContent_MakeAvailable(Eigen3)
+
+add_executable(stereo_depth
+  main.cpp
+  undistort_rectify.cpp
+  split_video.cpp
+  heat_map.cpp
+  stereo_depth_kernels.cu
+  crop.cpp
+  ess_processor.cpp
+  tracking_postprocessor.cpp
+)
+target_link_libraries(stereo_depth
+  PRIVATE
+  holoscan::core
+  holoscan::ops::video_stream_replayer
+  holoscan::ops::holoviz
+  holoscan::ops::v4l2
+  holoscan::ops::format_converter
+  holoscan::ops::inference
+  holoscan::ops::inference_processor
+  CUDA::nppif
+  CUDA::nppidei
+  CUDA::nppicc
+  CUDA::nppial
+  Eigen3::Eigen
+)
+
+# Copy config file
+add_custom_target(stereo_depth_yaml
+    COMMAND ${CMAKE_COMMAND} -E copy_if_different "${CMAKE_CURRENT_SOURCE_DIR}/stereo_vision.yaml" ${CMAKE_CURRENT_BINARY_DIR}
+    DEPENDS "stereo_vision.yaml"
+    BYPRODUCTS "stereo_vision.yaml"
+)
+
+add_custom_target(stereo_calibration_yaml
+    COMMAND ${CMAKE_COMMAND} -E copy_if_different "${HOLOHUB_DATA_DIR}/stereo_vision/stereo_calibration.yaml" ${CMAKE_CURRENT_BINARY_DIR}
+    DEPENDS "stereo_calibration.yaml"
+    BYPRODUCTS "stereo_calibration.yaml"
+)
+
+add_dependencies(stereo_depth stereo_depth_yaml stereo_calibration_yaml)
diff --git a/applications/stereo_vision/cpp/crop.cpp b/applications/stereo_vision/cpp/crop.cpp
@@ -0,0 +1,100 @@
+/*
+ * SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "crop.h"
+#include <gxf/std/tensor.hpp>
+#if __has_include("gxf/std/dlpack_utils.hpp")
+#define GXF_HAS_DLPACK_SUPPORT 1
+#else
+#define GXF_HAS_DLPACK_SUPPORT 0
+// Holoscan 1.0 used GXF without DLPack so gxf_tensor.hpp was needed to add it
+#include <holoscan/core/gxf/gxf_tensor.hpp>
+#endif
+
+namespace holoscan::ops {
+
+void CropOp::setup(OperatorSpec& spec) {
+  spec.input<holoscan::gxf::Entity>("input");
+  spec.output<holoscan::gxf::Entity>("output");
+  spec.param(x_, "x", "top left x", "top left x coordinate", 0);
+  spec.param(y_, "y", "top left y", "top left y coordinate", 0);
+  spec.param(width_, "width", "width", "width", 0);
+  spec.param(height_, "height", "height", "height", 0);
+}
+
+void CropOp::compute(InputContext& op_input, OutputContext& op_output, ExecutionContext& context) {
+  auto maybe_tensormap = op_input.receive<holoscan::TensorMap>("input");
+  const auto tensormap = maybe_tensormap.value();
+
+  if (tensormap.size() != 1) { throw std::runtime_error("Expecting single tensor input"); }
+
+  auto tensor = tensormap.begin()->second;
+  int orig_height = tensor->shape()[0];
+  int orig_width = tensor->shape()[1];
+  int nChannels = tensor->shape()[2];
+
+  // Need to create a GXF tensor to access data type. Is there a better way?
+  nvidia::gxf::Tensor tensor_gxf(tensor->dl_ctx());
+  nvidia::gxf::PrimitiveType data_type = tensor_gxf.element_type();
+  int element_size = nvidia::gxf::PrimitiveTypeSize(data_type);
+
+  if (x_ < 0 || y_ < 0 || width_ <= 0 || height_ <= 0) {
+    throw std::runtime_error("Invalid crop dimensions");
+  }
+
+  if ((x_ + width_) > orig_width || (y_ + height_) > orig_height) {
+    std::cout << "orig_width " << orig_width << std::endl;
+    std::cout << "orig_height " << orig_height << std::endl;
+    std::cout << "nChannels " << nChannels << std::endl;
+    throw std::runtime_error("Crop exceeds image boundaries");
+  }
+
+  auto pointer = std::shared_ptr<void*>(new void*, [](void** pointer) {
+    if (pointer != nullptr) {
+      if (*pointer != nullptr) { cudaFree(*pointer); }
+      delete pointer;
+    }
+  });
+  cudaMalloc(pointer.get(), width_ * height_ * element_size * nChannels);
+
+  nvidia::gxf::Shape shape = nvidia::gxf::Shape{height_, width_, nChannels};
+  cudaMemcpy2D(*pointer,
+               width_ * element_size * nChannels,
+               static_cast<void*>((char*)tensor->data() + x_ * element_size * nChannels),
+               orig_width * element_size * nChannels,
+               width_ * element_size * nChannels,
+               height_,
+               cudaMemcpyDeviceToDevice);
+
+  auto out_message = nvidia::gxf::Entity::New(context.context());
+  auto gxf_tensor = out_message.value().add<nvidia::gxf::Tensor>("");
+
+  gxf_tensor.value()->wrapMemory(shape,
+                                 nvidia::gxf::PrimitiveType::kUnsigned8,
+                                 element_size,
+                                 nvidia::gxf::ComputeTrivialStrides(shape, element_size),
+                                 nvidia::gxf::MemoryStorageType::kDevice,
+                                 *pointer,
+                                 [orig_pointer = pointer](void*) mutable {
+                                   orig_pointer.reset();  // decrement ref count
+                                   return nvidia::gxf::Success;
+                                 });
+
+  op_output.emit(out_message.value(), "output");
+}
+
+}  // namespace holoscan::ops
diff --git a/applications/stereo_vision/cpp/crop.h b/applications/stereo_vision/cpp/crop.h
@@ -0,0 +1,41 @@
+/*
+ * SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef OPERATORS_CROP
+#define OPERATORS_CROP
+
+#include <holoscan/holoscan.hpp>
+#include <holoscan/utils/cuda_stream_handler.hpp>
+
+
+namespace holoscan::ops {
+
+class CropOp : public Operator{
+ public:
+  HOLOSCAN_OPERATOR_FORWARD_ARGS(CropOp);
+  CropOp() = default;
+  void setup(OperatorSpec& spec) override;
+  void compute(InputContext&, OutputContext& op_output, ExecutionContext&) override;
+ private:
+  Parameter<int> x_;
+  Parameter<int> y_;
+  Parameter<int> width_;
+  Parameter<int> height_;
+};
+
+}  // namespace holoscan::ops
+#endif