Skip to content

Commit

Permalink
Fixed CUDA 7 compatibility
Browse files Browse the repository at this point in the history
  • Loading branch information
gineshidalgo99 committed Jul 10, 2017
1 parent 635dc58 commit c047d13
Showing 1 changed file with 12 additions and 2 deletions.
14 changes: 12 additions & 2 deletions src/openpose/core/maximumBase.cu
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,10 @@ namespace op
// __global__ void fillTargetPtrChannel(T* targetPtrOffsetted, const T* sourcePtrOffsetted, const int width, const int imageOffset)
// {
// const auto sourceThrustPtr = thrust::device_pointer_cast(sourcePtrOffsetted);
// const auto sourceIndexIterator = thrust::max_element(thrust::device, sourceThrustPtr, sourceThrustPtr + imageOffset);
// // Ideal option (not working for CUDA < 8)
// // const auto sourceIndexIterator = thrust::max_element(thrust::device, sourceThrustPtr, sourceThrustPtr + imageOffset);
// // Workaround to make it work for CUDA 7.5
// const auto sourceIndexIterator = thrust::max_element(sourceThrustPtr, sourceThrustPtr + imageOffset);
// const auto sourceIndex = (int)(sourceIndexIterator - sourceThrustPtr);
// targetPtrOffsetted[0] = sourceIndex % width;
// targetPtrOffsetted[1] = sourceIndex / width;
Expand All @@ -40,6 +43,10 @@ namespace op
// const auto* const sourcePtrOffsetted = sourcePtr + (offsetChannel + part) * imageOffset;
// auto sourceThrustPtr = thrust::device_pointer_cast(sourcePtrOffsetted);
// const auto sourceIndexIterator = thrust::max_element(thrust::device, sourceThrustPtr, sourceThrustPtr + imageOffset);
// // Ideal option (not working for CUDA < 8)
// // const auto sourceIndexIterator = thrust::max_element(thrust::device, sourceThrustPtr, sourceThrustPtr + imageOffset);
// // Workaround to make it work for CUDA 7.5
// const auto sourceIndexIterator = thrust::max_element(sourceThrustPtr, sourceThrustPtr + imageOffset);
// const auto sourceIndex = (int)(sourceIndexIterator - sourceThrustPtr);
// targetPtrOffsetted[0] = sourceIndex % width;
// targetPtrOffsetted[1] = sourceIndex / width;
Expand Down Expand Up @@ -81,7 +88,10 @@ namespace op
const auto* const sourcePtrOffsetted = sourcePtr + (offsetChannel + part) * imageOffset;
// Option a - 6.3 fps
const auto sourceThrustPtr = thrust::device_pointer_cast(sourcePtrOffsetted);
const auto sourceIndexIterator = thrust::max_element(thrust::device, sourceThrustPtr, sourceThrustPtr + imageOffset);
// Ideal option (not working for CUDA < 8)
// const auto sourceIndexIterator = thrust::max_element(thrust::device, sourceThrustPtr, sourceThrustPtr + imageOffset);
// Workaround to make it work for CUDA 7.5
const auto sourceIndexIterator = thrust::max_element(sourceThrustPtr, sourceThrustPtr + imageOffset);
const auto sourceIndex = (int)(sourceIndexIterator - sourceThrustPtr);
fillTargetPtrPart<<<1, 1>>>(targetPtrOffsetted, sourcePtrOffsetted, sourceIndex, sourceIndex % width, sourceIndex / width);
// // Option b - <1 fps
Expand Down

0 comments on commit c047d13

Please sign in to comment.