Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add PathType::SCAN_16PATH #78

Open
wants to merge 3 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion include/libsgm.h
Original file line number Diff line number Diff line change
Expand Up @@ -63,7 +63,8 @@ enum ExecuteInOut
enum class PathType
{
SCAN_4PATH, //>! Horizontal and vertical paths.
SCAN_8PATH //>! Horizontal, vertical and oblique paths.
SCAN_8PATH, //>! Horizontal, vertical and oblique paths.
SCAN_16PATH
};

/**
Expand Down
4 changes: 2 additions & 2 deletions sample/stereosgm_benchmark.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -65,7 +65,7 @@ int main(int argc, char* argv[])
ASSERT_MSG(I1.size() == I2.size() && I1.type() == I2.type(), "input images must be same size and type.");
ASSERT_MSG(I1.type() == CV_8U || I1.type() == CV_16U, "input image format must be CV_8U or CV_16U.");
ASSERT_MSG(disp_size == 64 || disp_size == 128 || disp_size == 256, "disparity size must be 64, 128 or 256.");
ASSERT_MSG(num_paths == 4 || num_paths == 8, "number of scanlines must be 4 or 8.");
ASSERT_MSG(num_paths == 4 || num_paths == 8 || num_paths == 16, "number of scanlines must be 4, 8 or 16.");
ASSERT_MSG(census_type == sgm::CensusType::CENSUS_9x7 || census_type == sgm::CensusType::SYMMETRIC_CENSUS_9x7, "census type must be 0 or 1.");
ASSERT_MSG(dst_depth == 8 || dst_depth == 16, "output depth bits must be 8 or 16");
if (subpixel)
Expand All @@ -77,7 +77,7 @@ int main(int argc, char* argv[])
const int src_depth = I1.type() == CV_8U ? 8 : 16;
const int src_bytes = src_depth * width * height / 8;
const int dst_bytes = dst_depth * width * height / 8;
const sgm::PathType path_type = num_paths == 8 ? sgm::PathType::SCAN_8PATH : sgm::PathType::SCAN_4PATH;
const sgm::PathType path_type = num_paths == 8 ? sgm::PathType::SCAN_8PATH : num_paths == 4 ? sgm::PathType::SCAN_4PATH : sgm::PathType::SCAN_16PATH;

const sgm::StereoSGM::Parameters param(10, 120, 0.95f, subpixel, path_type, 0, 1, census_type);
sgm::StereoSGM sgm(width, height, disp_size, src_depth, dst_depth, sgm::EXECUTE_INOUT_CUDA2CUDA, param);
Expand Down
8 changes: 5 additions & 3 deletions sample/stereosgm_image.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -67,12 +67,12 @@ int main(int argc, char* argv[])
ASSERT_MSG(I1.size() == I2.size() && I1.type() == I2.type(), "input images must be same size and type.");
ASSERT_MSG(I1.type() == CV_8U || I1.type() == CV_16U, "input image format must be CV_8U or CV_16U.");
ASSERT_MSG(disp_size == 64 || disp_size == 128 || disp_size == 256, "disparity size must be 64, 128 or 256.");
ASSERT_MSG(num_paths == 4 || num_paths == 8, "number of scanlines must be 4 or 8.");
ASSERT_MSG(num_paths == 4 || num_paths == 8 || num_paths == 16, "number of scanlines must be 4, 8 or 16.");
ASSERT_MSG(census_type == sgm::CensusType::CENSUS_9x7 || census_type == sgm::CensusType::SYMMETRIC_CENSUS_9x7, "census type must be 0 or 1.");

const int src_depth = I1.type() == CV_8U ? 8 : 16;
const int dst_depth = 16;
const sgm::PathType path_type = num_paths == 8 ? sgm::PathType::SCAN_8PATH : sgm::PathType::SCAN_4PATH;
const sgm::PathType path_type = num_paths == 8 ? sgm::PathType::SCAN_8PATH : num_paths == 4 ? sgm::PathType::SCAN_4PATH : sgm::PathType::SCAN_16PATH;

const sgm::StereoSGM::Parameters param(P1, P2, uniqueness, false, path_type, min_disp, LR_max_diff, census_type);
sgm::StereoSGM ssgm(I1.cols, I1.rows, disp_size, src_depth, dst_depth, sgm::EXECUTE_INOUT_HOST2HOST, param);
Expand Down Expand Up @@ -100,9 +100,11 @@ int main(int argc, char* argv[])
std::cout << "\tESC - quit the program" << std::endl;
std::cout << "\ts - switch display (disparity | colored disparity | input image)" << std::endl;

cv::namedWindow("image", cv::WINDOW_KEEPRATIO);

int mode = 0;
while (true) {

cv::setWindowTitle("image", titles[mode]);
cv::imshow("image", images[mode]);

Expand Down
4 changes: 2 additions & 2 deletions sample/stereosgm_image_cv_gpumat.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -67,10 +67,10 @@ int main(int argc, char* argv[])
ASSERT_MSG(I1.size() == I2.size() && I1.type() == I2.type(), "input images must be same size and type.");
ASSERT_MSG(I1.type() == CV_8U || I1.type() == CV_16U, "input image format must be CV_8U or CV_16U.");
ASSERT_MSG(disp_size == 64 || disp_size == 128 || disp_size == 256, "disparity size must be 64, 128 or 256.");
ASSERT_MSG(num_paths == 4 || num_paths == 8, "number of scanlines must be 4 or 8.");
ASSERT_MSG(num_paths == 4 || num_paths == 8 || num_paths == 16, "number of scanlines must be 4, 8 or 16.");
ASSERT_MSG(census_type == sgm::CensusType::CENSUS_9x7 || census_type == sgm::CensusType::SYMMETRIC_CENSUS_9x7, "census type must be 0 or 1.");

const sgm::PathType path_type = num_paths == 8 ? sgm::PathType::SCAN_8PATH : sgm::PathType::SCAN_4PATH;
const sgm::PathType path_type = num_paths == 8 ? sgm::PathType::SCAN_8PATH : num_paths == 4 ? sgm::PathType::SCAN_4PATH : sgm::PathType::SCAN_16PATH;
sgm::LibSGMWrapper sgm(disp_size, P1, P2, uniqueness, false, path_type, min_disp, LR_max_diff, census_type);
cv::Mat disparity;

Expand Down
209 changes: 204 additions & 5 deletions src/cost_aggregation.cu
Original file line number Diff line number Diff line change
Expand Up @@ -427,8 +427,9 @@ __global__ void aggregate_oblique_path_kernel(
static const unsigned int RIGHT_BUFFER_SIZE = MAX_DISPARITY + PATHS_PER_BLOCK;
static const unsigned int RIGHT_BUFFER_ROWS = RIGHT_BUFFER_SIZE / DP_BLOCK_SIZE;

static_assert(X_DIRECTION == 1 || X_DIRECTION == -1, "");
static_assert(Y_DIRECTION == 1 || Y_DIRECTION == -1, "");
static_assert(X_DIRECTION == 1 || X_DIRECTION == -1 || X_DIRECTION == 2 || X_DIRECTION == -2, "");
static_assert(Y_DIRECTION == 1 || Y_DIRECTION == -1 || Y_DIRECTION == 2 || Y_DIRECTION == -2, "");

if (width == 0 || height == 0) {
return;
}
Expand Down Expand Up @@ -585,6 +586,185 @@ void aggregate_downleft2upright(
CUDA_CHECK(cudaGetLastError());
}

/******* 16_PATH ********/

template <typename CENSUS_TYPE, unsigned int MAX_DISPARITY>
void aggregate_upupleft2downdownright(
COST_TYPE *dest,
const CENSUS_TYPE *left,
const CENSUS_TYPE *right,
int width,
int height,
unsigned int p1,
unsigned int p2,
int min_disp,
cudaStream_t stream)
{
static const unsigned int SUBGROUP_SIZE = MAX_DISPARITY / DP_BLOCK_SIZE;
static const unsigned int PATHS_PER_BLOCK = BLOCK_SIZE / SUBGROUP_SIZE;

const int gdim = (width + height + PATHS_PER_BLOCK - 2) / PATHS_PER_BLOCK;
const int bdim = BLOCK_SIZE;
aggregate_oblique_path_kernel<CENSUS_TYPE, 1, 2, MAX_DISPARITY><<<gdim, bdim, 0, stream>>>(
dest, left, right, width, height, p1, p2, min_disp);
CUDA_CHECK(cudaGetLastError());
}

template <typename CENSUS_TYPE, unsigned int MAX_DISPARITY>
void aggregate_downdownrigh2upupleft(
COST_TYPE *dest,
const CENSUS_TYPE *left,
const CENSUS_TYPE *right,
int width,
int height,
unsigned int p1,
unsigned int p2,
int min_disp,
cudaStream_t stream)
{
static const unsigned int SUBGROUP_SIZE = MAX_DISPARITY / DP_BLOCK_SIZE;
static const unsigned int PATHS_PER_BLOCK = BLOCK_SIZE / SUBGROUP_SIZE;

const int gdim = (width + height + PATHS_PER_BLOCK - 2) / PATHS_PER_BLOCK;
const int bdim = BLOCK_SIZE;
aggregate_oblique_path_kernel<CENSUS_TYPE, -1, -2, MAX_DISPARITY><<<gdim, bdim, 0, stream>>>(
dest, left, right, width, height, p1, p2, min_disp);
CUDA_CHECK(cudaGetLastError());
}

template <typename CENSUS_TYPE, unsigned int MAX_DISPARITY>
void aggregate_upupright2downdownleft(
COST_TYPE *dest,
const CENSUS_TYPE *left,
const CENSUS_TYPE *right,
int width,
int height,
unsigned int p1,
unsigned int p2,
int min_disp,
cudaStream_t stream)
{
static const unsigned int SUBGROUP_SIZE = MAX_DISPARITY / DP_BLOCK_SIZE;
static const unsigned int PATHS_PER_BLOCK = BLOCK_SIZE / SUBGROUP_SIZE;

const int gdim = (width + height + PATHS_PER_BLOCK - 2) / PATHS_PER_BLOCK;
const int bdim = BLOCK_SIZE;
aggregate_oblique_path_kernel<CENSUS_TYPE, -1, 2, MAX_DISPARITY><<<gdim, bdim, 0, stream>>>(
dest, left, right, width, height, p1, p2, min_disp);
CUDA_CHECK(cudaGetLastError());
}

template <typename CENSUS_TYPE, unsigned int MAX_DISPARITY>
void aggregate_downdownleft2upupright(
COST_TYPE *dest,
const CENSUS_TYPE *left,
const CENSUS_TYPE *right,
int width,
int height,
unsigned int p1,
unsigned int p2,
int min_disp,
cudaStream_t stream)
{
static const unsigned int SUBGROUP_SIZE = MAX_DISPARITY / DP_BLOCK_SIZE;
static const unsigned int PATHS_PER_BLOCK = BLOCK_SIZE / SUBGROUP_SIZE;

const int gdim = (width + height + PATHS_PER_BLOCK - 2) / PATHS_PER_BLOCK;
const int bdim = BLOCK_SIZE;
aggregate_oblique_path_kernel<CENSUS_TYPE, 1, -2, MAX_DISPARITY><<<gdim, bdim, 0, stream>>>(
dest, left, right, width, height, p1, p2, min_disp);
CUDA_CHECK(cudaGetLastError());
}

template <typename CENSUS_TYPE, unsigned int MAX_DISPARITY>
void aggregate_upleftleft2downrightright(
COST_TYPE *dest,
const CENSUS_TYPE *left,
const CENSUS_TYPE *right,
int width,
int height,
unsigned int p1,
unsigned int p2,
int min_disp,
cudaStream_t stream)
{
static const unsigned int SUBGROUP_SIZE = MAX_DISPARITY / DP_BLOCK_SIZE;
static const unsigned int PATHS_PER_BLOCK = BLOCK_SIZE / SUBGROUP_SIZE;

const int gdim = (width + height + PATHS_PER_BLOCK - 2) / PATHS_PER_BLOCK;
const int bdim = BLOCK_SIZE;
aggregate_oblique_path_kernel<CENSUS_TYPE, 2, -1, MAX_DISPARITY><<<gdim, bdim, 0, stream>>>(
dest, left, right, width, height, p1, p2, min_disp);
CUDA_CHECK(cudaGetLastError());
}

template <typename CENSUS_TYPE, unsigned int MAX_DISPARITY>
void aggregate_downrightright2upleftleft(
COST_TYPE *dest,
const CENSUS_TYPE *left,
const CENSUS_TYPE *right,
int width,
int height,
unsigned int p1,
unsigned int p2,
int min_disp,
cudaStream_t stream)
{
static const unsigned int SUBGROUP_SIZE = MAX_DISPARITY / DP_BLOCK_SIZE;
static const unsigned int PATHS_PER_BLOCK = BLOCK_SIZE / SUBGROUP_SIZE;

const int gdim = (width + height + PATHS_PER_BLOCK - 2) / PATHS_PER_BLOCK;
const int bdim = BLOCK_SIZE;
aggregate_oblique_path_kernel<CENSUS_TYPE, -2, 1, MAX_DISPARITY><<<gdim, bdim, 0, stream>>>(
dest, left, right, width, height, p1, p2, min_disp);
CUDA_CHECK(cudaGetLastError());
}

template <typename CENSUS_TYPE, unsigned int MAX_DISPARITY>
void aggregate_uprightright2downleftleft(
COST_TYPE *dest,
const CENSUS_TYPE *left,
const CENSUS_TYPE *right,
int width,
int height,
unsigned int p1,
unsigned int p2,
int min_disp,
cudaStream_t stream)
{
static const unsigned int SUBGROUP_SIZE = MAX_DISPARITY / DP_BLOCK_SIZE;
static const unsigned int PATHS_PER_BLOCK = BLOCK_SIZE / SUBGROUP_SIZE;

const int gdim = (width + height + PATHS_PER_BLOCK - 2) / PATHS_PER_BLOCK;
const int bdim = BLOCK_SIZE;
aggregate_oblique_path_kernel<CENSUS_TYPE, -2, -1, MAX_DISPARITY><<<gdim, bdim, 0, stream>>>(
dest, left, right, width, height, p1, p2, min_disp);
CUDA_CHECK(cudaGetLastError());
}

template <typename CENSUS_TYPE, unsigned int MAX_DISPARITY>
void aggregate_downleftleft2uprightright(
COST_TYPE *dest,
const CENSUS_TYPE *left,
const CENSUS_TYPE *right,
int width,
int height,
unsigned int p1,
unsigned int p2,
int min_disp,
cudaStream_t stream)
{
static const unsigned int SUBGROUP_SIZE = MAX_DISPARITY / DP_BLOCK_SIZE;
static const unsigned int PATHS_PER_BLOCK = BLOCK_SIZE / SUBGROUP_SIZE;

const int gdim = (width + height + PATHS_PER_BLOCK - 2) / PATHS_PER_BLOCK;
const int bdim = BLOCK_SIZE;
aggregate_oblique_path_kernel<CENSUS_TYPE, 2, 1, MAX_DISPARITY><<<gdim, bdim, 0, stream>>>(
dest, left, right, width, height, p1, p2, min_disp);
CUDA_CHECK(cudaGetLastError());
}


} // namespace oblique

} // namespace cost_aggregation
Expand All @@ -598,14 +778,14 @@ void cost_aggregation_(const DeviceImage& srcL, const DeviceImage& srcR, DeviceI
{
const int width = srcL.cols;
const int height = srcL.rows;
const int num_paths = path_type == PathType::SCAN_4PATH ? 4 : 8;
const int num_paths = path_type == PathType::SCAN_4PATH ? 4 : path_type == PathType::SCAN_8PATH ? 8 : 16;

dst.create(num_paths, height * width * MAX_DISPARITY, SGM_8U);

const CENSUS_TYPE* left = srcL.ptr<CENSUS_TYPE>();
const CENSUS_TYPE* right = srcR.ptr<CENSUS_TYPE>();

cudaStream_t streams[8];
cudaStream_t streams[16];
for (int i = 0; i < num_paths; i++)
cudaStreamCreate(&streams[i]);

Expand All @@ -618,7 +798,7 @@ void cost_aggregation_(const DeviceImage& srcL, const DeviceImage& srcR, DeviceI
cost_aggregation::horizontal::aggregate_right2left<CENSUS_TYPE, MAX_DISPARITY>(
dst.ptr<COST_TYPE>(3), left, right, width, height, P1, P2, min_disp, streams[3]);

if (path_type == PathType::SCAN_8PATH) {
if (path_type == PathType::SCAN_8PATH || path_type == PathType::SCAN_16PATH) {
cost_aggregation::oblique::aggregate_upleft2downright<CENSUS_TYPE, MAX_DISPARITY>(
dst.ptr<COST_TYPE>(4), left, right, width, height, P1, P2, min_disp, streams[4]);
cost_aggregation::oblique::aggregate_upright2downleft<CENSUS_TYPE, MAX_DISPARITY>(
Expand All @@ -629,6 +809,25 @@ void cost_aggregation_(const DeviceImage& srcL, const DeviceImage& srcR, DeviceI
dst.ptr<COST_TYPE>(7), left, right, width, height, P1, P2, min_disp, streams[7]);
}

if (path_type == PathType::SCAN_16PATH) {
cost_aggregation::oblique::aggregate_upupleft2downdownright<CENSUS_TYPE, MAX_DISPARITY>(
dst.ptr<COST_TYPE>(8), left, right, width, height, P1, P2, min_disp, streams[8]);
cost_aggregation::oblique::aggregate_upupright2downdownleft<CENSUS_TYPE, MAX_DISPARITY>(
dst.ptr<COST_TYPE>(9), left, right, width, height, P1, P2, min_disp, streams[9]);
cost_aggregation::oblique::aggregate_upleftleft2downrightright<CENSUS_TYPE, MAX_DISPARITY>(
dst.ptr<COST_TYPE>(10), left, right, width, height, P1, P2, min_disp, streams[10]);
cost_aggregation::oblique::aggregate_uprightright2downleftleft<CENSUS_TYPE, MAX_DISPARITY>(
dst.ptr<COST_TYPE>(11), left, right, width, height, P1, P2, min_disp, streams[11]);
cost_aggregation::oblique::aggregate_downdownleft2upupright<CENSUS_TYPE, MAX_DISPARITY>(
dst.ptr<COST_TYPE>(12), left, right, width, height, P1, P2, min_disp, streams[12]);
cost_aggregation::oblique::aggregate_downdownrigh2upupleft<CENSUS_TYPE, MAX_DISPARITY>(
dst.ptr<COST_TYPE>(13), left, right, width, height, P1, P2, min_disp, streams[13]);
cost_aggregation::oblique::aggregate_downleftleft2uprightright<CENSUS_TYPE, MAX_DISPARITY>(
dst.ptr<COST_TYPE>(14), left, right, width, height, P1, P2, min_disp, streams[14]);
cost_aggregation::oblique::aggregate_downrightright2upleftleft<CENSUS_TYPE, MAX_DISPARITY>(
dst.ptr<COST_TYPE>(15), left, right, width, height, P1, P2, min_disp, streams[15]);
}

for (int i = 0; i < num_paths; i++)
cudaStreamSynchronize(streams[i]);
for (int i = 0; i < num_paths; i++)
Expand Down
41 changes: 26 additions & 15 deletions src/winner_takes_all.cu
Original file line number Diff line number Diff line change
Expand Up @@ -232,21 +232,32 @@ void winner_takes_all_(const DeviceImage& src, DeviceImage& dstL, DeviceImage& d
output_type* dispL = dstL.ptr<output_type>();
output_type* dispR = dstR.ptr<output_type>();

if (subpixel && path_type == PathType::SCAN_8PATH) {
winner_takes_all_kernel<MAX_DISPARITY, 8, compute_disparity_subpixel<MAX_DISPARITY>><<<gdim, bdim>>>(
dispL, dispR, cost, width, height, pitch, uniqueness);
}
else if (subpixel && path_type == PathType::SCAN_4PATH) {
winner_takes_all_kernel<MAX_DISPARITY, 4, compute_disparity_subpixel<MAX_DISPARITY>><<<gdim, bdim>>>(
dispL, dispR, cost, width, height, pitch, uniqueness);
}
else if (!subpixel && path_type == PathType::SCAN_8PATH) {
winner_takes_all_kernel<MAX_DISPARITY, 8, compute_disparity_normal><<<gdim, bdim>>>(
dispL, dispR, cost, width, height, pitch, uniqueness);
}
else /* if (!subpixel && path_type == PathType::SCAN_4PATH) */ {
winner_takes_all_kernel<MAX_DISPARITY, 4, compute_disparity_normal><<<gdim, bdim>>>(
dispL, dispR, cost, width, height, pitch, uniqueness);
switch (path_type) {
case PathType::SCAN_4PATH: if (subpixel) {
winner_takes_all_kernel<MAX_DISPARITY, 4, compute_disparity_subpixel<MAX_DISPARITY>><<<gdim, bdim>>>(
dispL, dispR, cost, width, height, pitch, uniqueness);
} else {
winner_takes_all_kernel<MAX_DISPARITY, 4, compute_disparity_normal><<<gdim, bdim>>>(
dispL, dispR, cost, width, height, pitch, uniqueness);
} break;

case PathType::SCAN_8PATH: if (subpixel) {
winner_takes_all_kernel<MAX_DISPARITY, 8, compute_disparity_subpixel<MAX_DISPARITY>><<<gdim, bdim>>>(
dispL, dispR, cost, width, height, pitch, uniqueness);
} else {
winner_takes_all_kernel<MAX_DISPARITY, 8, compute_disparity_normal><<<gdim, bdim>>>(
dispL, dispR, cost, width, height, pitch, uniqueness);
} break;

case PathType::SCAN_16PATH: if (subpixel) {
winner_takes_all_kernel<MAX_DISPARITY, 16, compute_disparity_subpixel<MAX_DISPARITY>><<<gdim, bdim>>>(
dispL, dispR, cost, width, height, pitch, uniqueness);
} else {
winner_takes_all_kernel<MAX_DISPARITY, 16, compute_disparity_normal><<<gdim, bdim>>>(
dispL, dispR, cost, width, height, pitch, uniqueness);
} break;

default: assert(("unimplemented", false));
}

CUDA_CHECK(cudaGetLastError());
Expand Down
4 changes: 2 additions & 2 deletions test/cost_aggregation_test.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -126,7 +126,7 @@ void cost_aggregation(const HostImage& srcL, const HostImage& srcR, HostImage& d

const int w = srcL.cols;
const int h = srcL.rows;
const int num_paths = path_type == PathType::SCAN_4PATH ? 4 : 8;
const int num_paths = path_type == PathType::SCAN_4PATH ? 4 : path_type == PathType::SCAN_8PATH ? 8 : 16;

dst.create(num_paths, h * w * disp_size, SGM_8U);

Expand Down Expand Up @@ -155,7 +155,7 @@ TEST_P(CostAggregationTest, AllPathsTest)
const int h = 240;
const int disp_size = param.disp_size;
const auto path_type = PathType::SCAN_8PATH;
const int num_paths = path_type == PathType::SCAN_4PATH ? 4 : 8;
const int num_paths = path_type == PathType::SCAN_4PATH ? 4 : path_type == PathType::SCAN_8PATH ? 8 : 16;
const int P1 = param.P1;
const int P2 = param.P2;
const int min_disp = param.min_disp;
Expand Down
Loading