nvidia-holoscan · tbirdso · Jan 30, 2025 · Jan 23, 2025 · Jan 23, 2025 · Jan 23, 2025
diff --git a/applications/CMakeLists.txt b/applications/CMakeLists.txt
@@ -82,6 +82,8 @@ add_holohub_application(qt_video_replayer DEPENDS OPERATORS qt_video npp_filter)
 
 add_holohub_application(realsense_visualizer DEPENDS OPERATORS realsense_camera)
 
+add_holohub_application(stereo_vision)
+
 add_holohub_application(tao_peoplenet)
 
 add_holohub_application(network_radar_pipeline DEPENDS

diff --git a/applications/stereo_vision/CMakeLists.txt b/applications/stereo_vision/CMakeLists.txt
@@ -0,0 +1,18 @@
+# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+cmake_minimum_required(VERSION 3.20)
+project(stereo_vision_app)
+add_subdirectory(cpp)
diff --git a/applications/stereo_vision/README.md b/applications/stereo_vision/README.md
@@ -0,0 +1,58 @@
+# Stereo Vision
+
+<p align="center">
+  <img src="./images/plants.gif" alt="Holoscan Stereo Vision">
+</p>
+
+## Overview
+
+A demo pipeline showcasing stereo disparity estimation.
+
+## Description
+
+This pipeline takes video from a stereo camera and estimates disparity using DNN ESS. The disparity map is displayed through Holoviz.
+
+## Requirements
+
+This application requires a V4L2 stereo camera or recorded stereo video as input. A video acquired from a StereoLabs ZED
+camera is downloaded when running the `get_data_and_models.sh` script when building the application.
+A script for obtaining the calibration for StereoLabs cameras is also provided.
+Holoscan SDK >=2.0,<=2.5 is required for TensorRT 8.6 compatibility.
+### Camera Calibration
+
+The default calibration will work for the sample video. If using a stereolabs camera the calibration
+can be retrieved using `get_zed_calibration.py` and the devices serial number.
+
+```sh
+python3 get_zed_calibration.py -s [Serial Number]
+```
+
+### Input video
+
+For the input video stream, either use a v4l2 stereo camera such as those produced by stereolabs or included recorded video.
+The `stereo-plants.mp4` video is provided [here](https://catalog.ngc.nvidia.com/orgs/nvidia/teams/clara-holoscan/resources/holoscan_stereo_video) and will be downloaded and converted to the necessary format when building the application.
+
+The source device in `stereo_vision.yaml` should be modified to match the device the v4l2 video is
+using. This can be found using `v4l2-ctl --list-devices`.
+
+
+## Models
+
+This demo requires the ESS DNN Stereo Disparity available from the NGC catalog for disparity estimation. This model is downloaded when you build the application.
+
+### ESS DNN
+
+The ESS engine files generated in this demo application is specific to TRT8.6; make sure
+you build the devcontainer with a compatible `base_img` as shown in the <b>Build and Run Instructions</b> section.
+
+## Build and Run Instructions
+
+Run the following command to build and run application using the recorded video:
+```sh
+./dev_container build_and_run stereo_vision --base_img nvcr.io/nvidia/clara-holoscan/holoscan:v2.4.0-dgpu
+```
+
+To run the application using a v4l2 compatible stereo camera, run:
+```sh
+./dev_container build_and_run stereo_vision --base_img nvcr.io/nvidia/clara-holoscan/holoscan:v2.4.0-dgpu --run_args "--source v4l2"
+```
diff --git a/applications/stereo_vision/cpp/CMakeLists.txt b/applications/stereo_vision/cpp/CMakeLists.txt
@@ -0,0 +1,92 @@
+# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+
+cmake_minimum_required(VERSION 3.20)
+project(stereo_depth CXX CUDA)
+
+find_package(holoscan 2.4 REQUIRED CONFIG
+             PATHS "/opt/nvidia/holoscan" "/workspace/holoscan-sdk/install")
+
+
+include(FetchContent)
+FetchContent_Declare(
+  Eigen3
+  URL https://gitlab.com/libeigen/eigen/-/archive/3.4.0/eigen-3.4.0.tar.gz
+)
+FetchContent_MakeAvailable(Eigen3)
+
+add_executable(stereo_depth
+  main.cpp
+  undistort_rectify.cpp
+  split_video.cpp
+  heat_map.cpp
+  stereo_depth_kernels.cu
+  crop.cpp
+  ess_processor.cpp
+)
+target_link_libraries(stereo_depth
+  PRIVATE
+  holoscan::core
+  holoscan::ops::video_stream_replayer
+  holoscan::ops::holoviz
+  holoscan::ops::v4l2
+  holoscan::ops::format_converter
+  holoscan::ops::inference
+  holoscan::ops::inference_processor
+  CUDA::nppif
+  CUDA::nppidei
+  CUDA::nppicc
+  CUDA::nppial
+  Eigen3::Eigen
+)
+
+# Download the stereo vision sample video
+if(HOLOHUB_DOWNLOAD_DATASETS)
+    include(holoscan_download_data)
+    holoscan_download_data(stereo_vision
+      URL nvidia/clara-holoscan/holoscan_stereo_video:20241216
+      DOWNLOAD_NAME holoscan_stereo_vision_20241216.zip
+      DOWNLOAD_DIR ${HOLOHUB_DATA_DIR}
+      GENERATE_GXF_ENTITIES
+      GXF_ENTITIES_HEIGHT 1080
+      GXF_ENTITIES_WIDTH 3840
+      GXF_ENTITIES_CHANNELS 3
+      GXF_ENTITIES_FRAMERATE 30
+      ALL
+    )
+endif()
+
+# Copy config file
+add_custom_target(stereo_depth_yaml
+    COMMAND ${CMAKE_COMMAND} -E copy_if_different "${CMAKE_CURRENT_SOURCE_DIR}/stereo_vision.yaml" ${CMAKE_CURRENT_BINARY_DIR}
+    DEPENDS "stereo_vision.yaml"
+    BYPRODUCTS "stereo_vision.yaml"
+)
+
+# This command should run after stereo_vision_data which removes existing files
+add_custom_command(
+    OUTPUT "${HOLOHUB_DATA_DIR}/stereo_vision/ess.engine"
+    COMMAND bash "${CMAKE_CURRENT_SOURCE_DIR}/../scripts/get_data_and_models.sh" "${HOLOHUB_DATA_DIR}/stereo_vision"
+    BYPRODUCTS "${HOLOHUB_DATA_DIR}/stereo_vision/ess.engine"
+    DEPENDS stereo_vision_data
+)
+
+add_custom_target(get_data_and_models ALL
+                  DEPENDS
+                  "${HOLOHUB_DATA_DIR}/stereo_vision/ess.engine"
+)
+
+add_dependencies(stereo_depth stereo_depth_yaml get_data_and_models)
diff --git a/applications/stereo_vision/cpp/crop.cpp b/applications/stereo_vision/cpp/crop.cpp
@@ -0,0 +1,89 @@
+/*
+ * SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "crop.h"
+#include <gxf/std/tensor.hpp>
+
+namespace holoscan::ops {
+
+void CropOp::setup(OperatorSpec& spec) {
+  spec.input<holoscan::gxf::Entity>("input");
+  spec.output<holoscan::gxf::Entity>("output");
+  spec.param(x_, "x", "top left x", "top left x coordinate", 0);
+  spec.param(y_, "y", "top left y", "top left y coordinate", 0);
+  spec.param(width_, "width", "width", "width", 0);
+  spec.param(height_, "height", "height", "height", 0);
+}
+
+void CropOp::compute(InputContext& op_input, OutputContext& op_output, ExecutionContext& context) {
+  auto maybe_tensormap = op_input.receive<holoscan::TensorMap>("input");
+  const auto tensormap = maybe_tensormap.value();
+
+  if (tensormap.size() != 1) { throw std::runtime_error("Expecting single tensor input"); }
+
+  auto tensor = tensormap.begin()->second;
+  int orig_height = tensor->shape()[0];
+  int orig_width = tensor->shape()[1];
+  int nChannels = tensor->shape()[2];
+
+  nvidia::gxf::Tensor tensor_gxf(tensor->dl_ctx());
+  nvidia::gxf::PrimitiveType data_type = tensor_gxf.element_type();
+  int element_size = nvidia::gxf::PrimitiveTypeSize(data_type);
+
+  if (x_ < 0 || y_ < 0 || width_ <= 0 || height_ <= 0) {
+    throw std::runtime_error("Invalid crop dimensions");
+  }
+
+  if ((x_ + width_) > orig_width || (y_ + height_) > orig_height) {
+    throw std::runtime_error("Crop exceeds image boundaries");
+  }
+
+  auto pointer = std::shared_ptr<void*>(new void*, [](void** pointer) {
+    if (pointer != nullptr) {
+      if (*pointer != nullptr) { cudaFree(*pointer); }
+      delete pointer;
+    }
+  });
+  cudaMalloc(pointer.get(), width_ * height_ * element_size * nChannels);
+
+  nvidia::gxf::Shape shape = nvidia::gxf::Shape{height_, width_, nChannels};
+  cudaMemcpy2D(*pointer,
+               width_ * element_size * nChannels,
+               static_cast<void*>((char*)tensor->data() + x_ * element_size * nChannels),
+               orig_width * element_size * nChannels,
+               width_ * element_size * nChannels,
+               height_,
+               cudaMemcpyDeviceToDevice);
+
+  auto out_message = nvidia::gxf::Entity::New(context.context());
+  auto gxf_tensor = out_message.value().add<nvidia::gxf::Tensor>("");
+
+  gxf_tensor.value()->wrapMemory(shape,
+                                 data_type,
+                                 element_size,
+                                 nvidia::gxf::ComputeTrivialStrides(shape, element_size),
+                                 nvidia::gxf::MemoryStorageType::kDevice,
+                                 *pointer,
+                                 [orig_pointer = pointer](void*) mutable {
+                                   orig_pointer.reset();  // decrement ref count
+                                   return nvidia::gxf::Success;
+                                 });
+
+  op_output.emit(out_message.value(), "output");
+}
+
+}  // namespace holoscan::ops
diff --git a/applications/stereo_vision/cpp/crop.h b/applications/stereo_vision/cpp/crop.h
@@ -0,0 +1,41 @@
+/*
+ * SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef OPERATORS_CROP
+#define OPERATORS_CROP
+
+#include <holoscan/holoscan.hpp>
+#include <holoscan/utils/cuda_stream_handler.hpp>
+
+
+namespace holoscan::ops {
+
+class CropOp : public Operator{
+ public:
+  HOLOSCAN_OPERATOR_FORWARD_ARGS(CropOp);
+  CropOp() = default;
+  void setup(OperatorSpec& spec) override;
+  void compute(InputContext&, OutputContext& op_output, ExecutionContext&) override;
+ private:
+  Parameter<int> x_;
+  Parameter<int> y_;
+  Parameter<int> width_;
+  Parameter<int> height_;
+};
+
+}  // namespace holoscan::ops
+#endif