Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add stereo_vision app #661

Merged
merged 13 commits into from
Jan 30, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions applications/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -82,6 +82,8 @@ add_holohub_application(qt_video_replayer DEPENDS OPERATORS qt_video npp_filter)

add_holohub_application(realsense_visualizer DEPENDS OPERATORS realsense_camera)

add_holohub_application(stereo_vision)

add_holohub_application(tao_peoplenet)

add_holohub_application(network_radar_pipeline DEPENDS
Expand Down
18 changes: 18 additions & 0 deletions applications/stereo_vision/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-License-Identifier: Apache-2.0
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

cmake_minimum_required(VERSION 3.20)
project(stereo_vision_app)
add_subdirectory(cpp)
58 changes: 58 additions & 0 deletions applications/stereo_vision/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,58 @@
# Stereo Vision

<p align="center">
<img src="./images/plants.gif" alt="Holoscan Stereo Vision">
</p>

## Overview

A demo pipeline showcasing stereo disparity estimation.

## Description

This pipeline takes video from a stereo camera and estimates disparity using DNN ESS. The disparity map is displayed through Holoviz.

## Requirements

This application requires a V4L2 stereo camera or recorded stereo video as input. A video acquired from a StereoLabs ZED
camera is downloaded when running the `get_data_and_models.sh` script when building the application.
A script for obtaining the calibration for StereoLabs cameras is also provided.
Holoscan SDK >=2.0,<=2.5 is required for TensorRT 8.6 compatibility.
### Camera Calibration

The default calibration will work for the sample video. If using a stereolabs camera the calibration
can be retrieved using `get_zed_calibration.py` and the devices serial number.

```sh
python3 get_zed_calibration.py -s [Serial Number]
```

### Input video

For the input video stream, either use a v4l2 stereo camera such as those produced by stereolabs or included recorded video.
The `stereo-plants.mp4` video is provided [here](https://catalog.ngc.nvidia.com/orgs/nvidia/teams/clara-holoscan/resources/holoscan_stereo_video) and will be downloaded and converted to the necessary format when building the application.

The source device in `stereo_vision.yaml` should be modified to match the device the v4l2 video is
using. This can be found using `v4l2-ctl --list-devices`.


## Models

This demo requires the ESS DNN Stereo Disparity available from the NGC catalog for disparity estimation. This model is downloaded when you build the application.

### ESS DNN

The ESS engine files generated in this demo application is specific to TRT8.6; make sure
you build the devcontainer with a compatible `base_img` as shown in the <b>Build and Run Instructions</b> section.

## Build and Run Instructions

Run the following command to build and run application using the recorded video:
```sh
./dev_container build_and_run stereo_vision --base_img nvcr.io/nvidia/clara-holoscan/holoscan:v2.4.0-dgpu
```

To run the application using a v4l2 compatible stereo camera, run:
```sh
./dev_container build_and_run stereo_vision --base_img nvcr.io/nvidia/clara-holoscan/holoscan:v2.4.0-dgpu --run_args "--source v4l2"
```
92 changes: 92 additions & 0 deletions applications/stereo_vision/cpp/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,92 @@
# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-License-Identifier: Apache-2.0
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.


cmake_minimum_required(VERSION 3.20)
project(stereo_depth CXX CUDA)

find_package(holoscan 2.4 REQUIRED CONFIG
PATHS "/opt/nvidia/holoscan" "/workspace/holoscan-sdk/install")


include(FetchContent)
FetchContent_Declare(
Eigen3
URL https://gitlab.com/libeigen/eigen/-/archive/3.4.0/eigen-3.4.0.tar.gz
)
FetchContent_MakeAvailable(Eigen3)

add_executable(stereo_depth
main.cpp
undistort_rectify.cpp
split_video.cpp
heat_map.cpp
stereo_depth_kernels.cu
crop.cpp
ess_processor.cpp
)
target_link_libraries(stereo_depth
PRIVATE
holoscan::core
holoscan::ops::video_stream_replayer
holoscan::ops::holoviz
holoscan::ops::v4l2
holoscan::ops::format_converter
holoscan::ops::inference
holoscan::ops::inference_processor
CUDA::nppif
CUDA::nppidei
CUDA::nppicc
CUDA::nppial
Eigen3::Eigen
)

# Download the stereo vision sample video
if(HOLOHUB_DOWNLOAD_DATASETS)
include(holoscan_download_data)
holoscan_download_data(stereo_vision
URL nvidia/clara-holoscan/holoscan_stereo_video:20241216
DOWNLOAD_NAME holoscan_stereo_vision_20241216.zip
DOWNLOAD_DIR ${HOLOHUB_DATA_DIR}
GENERATE_GXF_ENTITIES
GXF_ENTITIES_HEIGHT 1080
GXF_ENTITIES_WIDTH 3840
GXF_ENTITIES_CHANNELS 3
GXF_ENTITIES_FRAMERATE 30
ALL
)
endif()

# Copy config file
add_custom_target(stereo_depth_yaml
COMMAND ${CMAKE_COMMAND} -E copy_if_different "${CMAKE_CURRENT_SOURCE_DIR}/stereo_vision.yaml" ${CMAKE_CURRENT_BINARY_DIR}
DEPENDS "stereo_vision.yaml"
BYPRODUCTS "stereo_vision.yaml"
)

# This command should run after stereo_vision_data which removes existing files
add_custom_command(
OUTPUT "${HOLOHUB_DATA_DIR}/stereo_vision/ess.engine"
COMMAND bash "${CMAKE_CURRENT_SOURCE_DIR}/../scripts/get_data_and_models.sh" "${HOLOHUB_DATA_DIR}/stereo_vision"
BYPRODUCTS "${HOLOHUB_DATA_DIR}/stereo_vision/ess.engine"
DEPENDS stereo_vision_data
)

add_custom_target(get_data_and_models ALL
DEPENDS
"${HOLOHUB_DATA_DIR}/stereo_vision/ess.engine"
)

add_dependencies(stereo_depth stereo_depth_yaml get_data_and_models)
89 changes: 89 additions & 0 deletions applications/stereo_vision/cpp/crop.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,89 @@
/*
* SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
* SPDX-License-Identifier: Apache-2.0
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

#include "crop.h"
#include <gxf/std/tensor.hpp>

namespace holoscan::ops {

void CropOp::setup(OperatorSpec& spec) {
spec.input<holoscan::gxf::Entity>("input");
spec.output<holoscan::gxf::Entity>("output");
spec.param(x_, "x", "top left x", "top left x coordinate", 0);
spec.param(y_, "y", "top left y", "top left y coordinate", 0);
spec.param(width_, "width", "width", "width", 0);
spec.param(height_, "height", "height", "height", 0);
}

void CropOp::compute(InputContext& op_input, OutputContext& op_output, ExecutionContext& context) {
auto maybe_tensormap = op_input.receive<holoscan::TensorMap>("input");
const auto tensormap = maybe_tensormap.value();

if (tensormap.size() != 1) { throw std::runtime_error("Expecting single tensor input"); }

auto tensor = tensormap.begin()->second;
int orig_height = tensor->shape()[0];
int orig_width = tensor->shape()[1];
int nChannels = tensor->shape()[2];

nvidia::gxf::Tensor tensor_gxf(tensor->dl_ctx());
nvidia::gxf::PrimitiveType data_type = tensor_gxf.element_type();
int element_size = nvidia::gxf::PrimitiveTypeSize(data_type);

if (x_ < 0 || y_ < 0 || width_ <= 0 || height_ <= 0) {
throw std::runtime_error("Invalid crop dimensions");
}

if ((x_ + width_) > orig_width || (y_ + height_) > orig_height) {
throw std::runtime_error("Crop exceeds image boundaries");
}

auto pointer = std::shared_ptr<void*>(new void*, [](void** pointer) {
if (pointer != nullptr) {
if (*pointer != nullptr) { cudaFree(*pointer); }
delete pointer;
}
});
cudaMalloc(pointer.get(), width_ * height_ * element_size * nChannels);

nvidia::gxf::Shape shape = nvidia::gxf::Shape{height_, width_, nChannels};
cudaMemcpy2D(*pointer,
width_ * element_size * nChannels,
static_cast<void*>((char*)tensor->data() + x_ * element_size * nChannels),
orig_width * element_size * nChannels,
width_ * element_size * nChannels,
height_,
cudaMemcpyDeviceToDevice);

auto out_message = nvidia::gxf::Entity::New(context.context());
auto gxf_tensor = out_message.value().add<nvidia::gxf::Tensor>("");

gxf_tensor.value()->wrapMemory(shape,
data_type,
element_size,
nvidia::gxf::ComputeTrivialStrides(shape, element_size),
nvidia::gxf::MemoryStorageType::kDevice,
*pointer,
[orig_pointer = pointer](void*) mutable {
orig_pointer.reset(); // decrement ref count
return nvidia::gxf::Success;
});

op_output.emit(out_message.value(), "output");
}

} // namespace holoscan::ops
41 changes: 41 additions & 0 deletions applications/stereo_vision/cpp/crop.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
/*
* SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
* SPDX-License-Identifier: Apache-2.0
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

#ifndef OPERATORS_CROP
#define OPERATORS_CROP

#include <holoscan/holoscan.hpp>
#include <holoscan/utils/cuda_stream_handler.hpp>


namespace holoscan::ops {

class CropOp : public Operator{
public:
HOLOSCAN_OPERATOR_FORWARD_ARGS(CropOp);
CropOp() = default;
void setup(OperatorSpec& spec) override;
void compute(InputContext&, OutputContext& op_output, ExecutionContext&) override;
private:
Parameter<int> x_;
Parameter<int> y_;
Parameter<int> width_;
Parameter<int> height_;
};

} // namespace holoscan::ops
#endif
Loading