Skip to content

Commit

Permalink
ENH: Add support for 16 and 32-bit radiograph images
Browse files Browse the repository at this point in the history
Prior to these changes, the supported CUDA texture types (and by extension
radiograph images) was hard-coded to 8-bit (unsigned char) and managed as
a static global variable.

Since texture references needed to be known at compile time and the current
type was hard-coded to 8-bit, the use of them prevented from being able to
easily support 8, 16, and 32 bit radiograph images known at runtime.

To address this, this commit removes the use of `cudaBindTextureToArray`
(Deprecated since CUDA 11.3 and removed in CUDA 12.x)
and switch to using texture object instead by introducing a convenience helper
function `createTextureObjectFromArray`.

Approach implemented here is based of recommendation the post titled
"Kepler Texture Objects Improve Performance and Flexibility" published
in 2013.
See https://developer.nvidia.com/blog/cuda-pro-tip-kepler-texture-objects-improve-performance-and-flexibility/

For reference, before this changes, the error message reported at build time
while attempting to use autoscoper built against CUDA 12.x was:

```
3 Errors under file path `...\libautoscoper\src\gpu\cuda\RadRenderer_kernels.cu:
- no instance of overloaded function "tex2D" matches the argument list
- identifier "cudaBindTextureToArray" is undefined
- texture is not a template
```
  • Loading branch information
NicerNewerCar committed Dec 20, 2023
1 parent 6444737 commit 11b91cc
Show file tree
Hide file tree
Showing 14 changed files with 76 additions and 95 deletions.
8 changes: 6 additions & 2 deletions libautoscoper/src/gpu/cuda/BackgroundRenderer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,7 @@
#include <sstream>

#include <cuda.h>
#include <cutil_create_tex_obj.h>
#include <cutil_inline.h>
#include <cutil_math.h>

Expand Down Expand Up @@ -111,8 +112,9 @@ BackgroundRenderer::set_viewport(float x, float y, float width, float height)
void
BackgroundRenderer::render(float* buffer, size_t width, size_t height, float threshold) const
{
background_bind_array(array_);
background_render(buffer,
cudaTextureObject_t tex = createTexureObjectFromArray(array_, cudaReadModeElementType);

background_render(tex,buffer,
(int)width,
(int)height,
image_plane_[0],
Expand All @@ -124,6 +126,8 @@ BackgroundRenderer::render(float* buffer, size_t width, size_t height, float thr
viewport_[2],
viewport_[3],
threshold);

cudaDestroyTextureObject(tex);
}

} } // namespace xromm::cuda
26 changes: 5 additions & 21 deletions libautoscoper/src/gpu/cuda/BackgroundRenderer_kernels.cu
Original file line number Diff line number Diff line change
Expand Up @@ -43,14 +43,10 @@

#include <cutil_inline.h>

/////// Global Variables ////////

static texture<float,2> tex;

//////// Image Rendering Kernel ////////

__global__
void background_render_kernel(float* output, int width, int height, float u0,
void background_render_kernel(cudaTextureObject_t tex,float* output, int width, int height, float u0,
float v0, float u1, float v1, float u2, float v2,
float u3, float v3, float threshold);

Expand All @@ -60,20 +56,8 @@ namespace xromm
namespace gpu
{

void background_bind_array(const cudaArray* array)
{
// Setup 2D texture.
tex.normalized = true;
tex.filterMode = cudaFilterModeLinear;
tex.addressMode[0] = cudaAddressModeClamp;
tex.addressMode[1] = cudaAddressModeClamp;

// Bind array to 3D texture.
cutilSafeCall(cudaBindTextureToArray(tex, array));
}


void background_render(float* output, int width, int height, float u0,
void background_render(cudaTextureObject_t tex,float* output, int width, int height, float u0,
float v0, float u1, float v1, float u2, float v2,
float u3, float v3, float threshold)
{
Expand All @@ -82,7 +66,7 @@ void background_render(float* output, int width, int height, float u0,
dim3 gridDim((width+blockDim.x-1)/blockDim.x,
(height+blockDim.y-1)/blockDim.y);

background_render_kernel << <gridDim, blockDim >> >(output, width, height,
background_render_kernel << <gridDim, blockDim >> >(tex,output, width, height,
u0, v0, u1, v1, u2, v2,
u3, v3, threshold);
}
Expand All @@ -92,7 +76,7 @@ void background_render(float* output, int width, int height, float u0,
} // namespace xromm

__global__
void background_render_kernel(float* output, int width, int height, float u0,
void background_render_kernel(cudaTextureObject_t tex,float* output, int width, int height, float u0,
float v0, float u1, float v1, float u2, float v2,
float u3, float v3, float threshold)
{
Expand All @@ -113,7 +97,7 @@ void background_render_kernel(float* output, int width, int height, float u0,
output[width*y+x] = 0.0f;
}
else {
output[width*y + x] = (threshold <= tex2D(tex, s, t)) ? 1.0f : 0.0f;
output[width*y + x] = (threshold <= tex2D<float>(tex, s, t)) ? 1.0f : 0.0f;
}
}

3 changes: 1 addition & 2 deletions libautoscoper/src/gpu/cuda/BackgroundRenderer_kernels.h
Original file line number Diff line number Diff line change
Expand Up @@ -48,9 +48,8 @@ namespace xromm
namespace gpu
{

void background_bind_array(const cudaArray* array);

void background_render(float* output, int width, int height, float u0,
void background_render(cudaTextureObject_t tex,float* output, int width, int height, float u0,
float v0, float u1, float v1, float u2, float v2,
float u3, float v3, float threshold);

Expand Down
4 changes: 2 additions & 2 deletions libautoscoper/src/gpu/cuda/Compositor_kernels.cu
Original file line number Diff line number Diff line change
Expand Up @@ -67,8 +67,8 @@ void composite(float* src1,
{
// Calculate the block and grid sizes.
dim3 blockDim(32, 32);
dim3 gridDim((width+blockDim.x-1)/blockDim.x,
(height+blockDim.y-1)/blockDim.y);
dim3 gridDim(((unsigned int)width+blockDim.x-1)/blockDim.x,
((unsigned int)height+blockDim.y-1)/blockDim.y);

// Call the kernel
composite_kernel<<<gridDim, blockDim>>>(src1,src2,src3,src4,dest,width,height);
Expand Down
4 changes: 2 additions & 2 deletions libautoscoper/src/gpu/cuda/DRRBackground_kernels.cu
Original file line number Diff line number Diff line change
Expand Up @@ -58,8 +58,8 @@ namespace xromm {
{
// Calculate the block and grid sizes.
dim3 blockDim(32, 32);
dim3 gridDim((width+blockDim.x-1)/blockDim.x,
(height+blockDim.y-1)/blockDim.y);
dim3 gridDim(((unsigned int)width+blockDim.x-1)/blockDim.x,
((unsigned int)height+blockDim.y-1)/blockDim.y);

// Call the kernel
drr_background_kernel << <gridDim, blockDim >> >(src1, dest, width, height);
Expand Down
4 changes: 2 additions & 2 deletions libautoscoper/src/gpu/cuda/Merger_kernels.cu
Original file line number Diff line number Diff line change
Expand Up @@ -60,8 +60,8 @@ void merge(float* src1,
{
// Calculate the block and grid sizes.
dim3 blockDim(32, 32);
dim3 gridDim((width+blockDim.x-1)/blockDim.x,
(height+blockDim.y-1)/blockDim.y);
dim3 gridDim(((unsigned int)width+blockDim.x-1)/blockDim.x,
((unsigned int)height+blockDim.y-1)/blockDim.y);

// Call the kernel
merge_kernel<<<gridDim, blockDim>>>(src1,src2,dest,width,height);
Expand Down
4 changes: 2 additions & 2 deletions libautoscoper/src/gpu/cuda/Mult_kernels.cu
Original file line number Diff line number Diff line change
Expand Up @@ -60,8 +60,8 @@ namespace xromm {
{
// Calculate the block and grid sizes.
dim3 blockDim(32, 32);
dim3 gridDim((width + blockDim.x - 1) / blockDim.x,
(height + blockDim.y - 1) / blockDim.y);
dim3 gridDim(((unsigned int)width + blockDim.x - 1) / blockDim.x,
((unsigned int)height + blockDim.y - 1) / blockDim.y);

// Call the kernel
mult_kernel<<<gridDim, blockDim>>>(src1, src2, dest, width, height);
Expand Down
8 changes: 6 additions & 2 deletions libautoscoper/src/gpu/cuda/RadRenderer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,7 @@
#include <sstream>

#include <cuda.h>
#include <cutil_create_tex_obj.h>
#include <cutil_inline.h>
#include <cutil_math.h>

Expand Down Expand Up @@ -125,8 +126,9 @@ RadRenderer::set_viewport(float x, float y, float width, float height)
void
RadRenderer::render(float* buffer, size_t width, size_t height) const
{
video_bind_array(array_);
video_render(buffer,
cudaTextureObject_t tex = createTexureObjectFromArray(array_, cudaReadModeNormalizedFloat);

video_render(tex,buffer,
(int)width,
(int)height,
image_plane_[0],
Expand All @@ -137,6 +139,8 @@ RadRenderer::render(float* buffer, size_t width, size_t height) const
viewport_[1],
viewport_[2],
viewport_[3]);

cudaDestroyTextureObject(tex);
}

} } // namespace xromm::cuda
33 changes: 5 additions & 28 deletions libautoscoper/src/gpu/cuda/RadRenderer_kernels.cu
Original file line number Diff line number Diff line change
Expand Up @@ -43,14 +43,11 @@

#include <cutil_inline.h>

/////// Global Variables ////////

static texture<unsigned char, 2, cudaReadModeNormalizedFloat> tex;

//////// Image Rendering Kernel ////////

__global__
void image_render_kernel(float* output, int width, int height, float u0,
void image_render_kernel(cudaTextureObject_t tex,float* output, int width, int height, float u0,
float v0, float u1, float v1, float u2, float v2,
float u3, float v3);

Expand All @@ -60,27 +57,7 @@ namespace xromm
namespace gpu
{

void video_bind_array(const cudaArray* array)
{
// Setup 2D texture.
tex.normalized = true;
tex.filterMode = cudaFilterModeLinear;
tex.addressMode[0] = cudaAddressModeClamp;
tex.addressMode[1] = cudaAddressModeClamp;

// Bind array to 3D texture.
cutilSafeCall(cudaBindTextureToArray(tex, array));
}

/*
void image_deinit()
{
cutilSafeCall(cudaUnbindTexture(tex));
cutilSafeCall(cudaFreeArray(array));
}
*/

void video_render(float* output, int width, int height, float u0,
void video_render(cudaTextureObject_t tex,float* output, int width, int height, float u0,
float v0, float u1, float v1, float u2, float v2,
float u3, float v3)
{
Expand All @@ -89,7 +66,7 @@ void video_render(float* output, int width, int height, float u0,
dim3 gridDim((width+blockDim.x-1)/blockDim.x,
(height+blockDim.y-1)/blockDim.y);

image_render_kernel<<<gridDim, blockDim>>>(output, width, height,
image_render_kernel<<<gridDim, blockDim>>>(tex,output, width, height,
u0, v0, u1, v1, u2, v2,
u3, v3);
}
Expand All @@ -99,7 +76,7 @@ void video_render(float* output, int width, int height, float u0,
} // namespace xromm

__global__
void image_render_kernel(float* output, int width, int height, float u0,
void image_render_kernel(cudaTextureObject_t tex,float* output, int width, int height, float u0,
float v0, float u1, float v1, float u2, float v2,
float u3, float v3)
{
Expand All @@ -120,7 +97,7 @@ void image_render_kernel(float* output, int width, int height, float u0,
output[width*y+x] = 0.0f;
}
else {
output[width*y+x] = 1.0f-tex2D(tex, s, t);
output[width*y+x] = 1.0f-tex2D<float>(tex, s, t);
}
}

6 changes: 1 addition & 5 deletions libautoscoper/src/gpu/cuda/RadRenderer_kernels.h
Original file line number Diff line number Diff line change
Expand Up @@ -47,11 +47,7 @@ namespace xromm
namespace gpu
{

void video_bind_array(const cudaArray* array);

//void cuda_image_deinit();

void video_render(float* output, int width, int height, float u0,
void video_render(cudaTextureObject_t tex,float* output, int width, int height, float u0,
float v0, float u1, float v1, float u2, float v2,
float u3, float v3);

Expand Down
13 changes: 10 additions & 3 deletions libautoscoper/src/gpu/cuda/RayCaster.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,11 @@
#include <iostream>
#include <sstream>

#include <cuda.h>
#include <cutil_create_tex_obj.h>
#include <cutil_inline.h>
#include <cutil_math.h>

#include "RayCaster.hpp"
#include "RayCaster_kernels.h"
#include "VolumeDescription.hpp"
Expand Down Expand Up @@ -142,16 +147,18 @@ RayCaster::render(float* buffer, size_t width, size_t height)
return;
}

//float aspectRatio = (float)width/(float)height;
volume_bind_array(volumeDescription_->image());
cudaTextureObject_t tex = createTexureObjectFromArray((cudaArray_t)volumeDescription_->image(), cudaReadModeNormalizedFloat);

volume_viewport(viewport_[0], viewport_[1], viewport_[2], viewport_[3]);
volume_render(buffer,
volume_render(tex,buffer,
width,
height,
invModelView_,
sampleDistance_,
rayIntensity_,
cutoff_);

cudaDestroyTextureObject(tex);
}

/*
Expand Down
28 changes: 7 additions & 21 deletions libautoscoper/src/gpu/cuda/RayCaster_kernels.cu
Original file line number Diff line number Diff line change
Expand Up @@ -58,13 +58,11 @@ struct float3x4
// Forward declarations

__global__
void cuda_volume_render_kernel(float* output, size_t width, size_t height,
void cuda_volume_render_kernel(cudaTextureObject_t tex,float* output, size_t width, size_t height,
float step, float intensity, float cutoff);

// Global variables

static texture<unsigned short, 3, cudaReadModeNormalizedFloat> tex;

static __constant__ float4 d_viewport;
static __constant__ float3x4 d_invModelView;

Expand All @@ -74,25 +72,13 @@ namespace xromm
namespace gpu
{

void volume_bind_array(const cudaArray* array)
{
// Setup 3D texture.
tex.normalized = true;
tex.filterMode = cudaFilterModeLinear;
tex.addressMode[0] = cudaAddressModeClamp;
tex.addressMode[1] = cudaAddressModeClamp;

// Bind array to 3D texture.
cutilSafeCall(cudaBindTextureToArray(tex, array));
}

void volume_viewport(float x, float y, float width, float height)
{
float4 viewport = make_float4(x, y, width, height);
cutilSafeCall(cudaMemcpyToSymbol(d_viewport, &viewport, sizeof(float4)));
}

void volume_render(float* buffer, size_t width, size_t height,
void volume_render(cudaTextureObject_t tex,float* buffer, size_t width, size_t height,
const float* invModelView, float step, float intensity,
float cutoff)
{
Expand All @@ -103,11 +89,11 @@ void volume_render(float* buffer, size_t width, size_t height,

// Calculate the block and grid sizes.
dim3 blockDim(32, 32);
dim3 gridDim((width+blockDim.x-1)/blockDim.x,
(height+blockDim.y-1)/blockDim.y);
dim3 gridDim(((unsigned int)width+blockDim.x-1)/blockDim.x,
((unsigned int)height+blockDim.y-1)/blockDim.y);

// Call the kernel
cuda_volume_render_kernel<<<gridDim, blockDim>>>(buffer, width, height,
cuda_volume_render_kernel<<<gridDim, blockDim>>>(tex,buffer, width, height,
step, intensity, cutoff);

//This crashes it under windows
Expand Down Expand Up @@ -168,7 +154,7 @@ float4 mul(const float3x4 &M, const float4 &v)

// Render the volume using ray marching.
__global__
void cuda_volume_render_kernel(float* buffer, size_t width, size_t height,
void cuda_volume_render_kernel(cudaTextureObject_t tex,float* buffer, size_t width, size_t height,
float step, float intensity, float cutoff)
{
uint x = blockIdx.x*blockDim.x+threadIdx.x;
Expand Down Expand Up @@ -208,7 +194,7 @@ void cuda_volume_render_kernel(float* buffer, size_t width, size_t height,
float density = 0.0f;
while (t > _near) {
float3 point = ray.origin+t*ray.direction;
float sample = tex3D(tex, point.x, 1.0f-point.y, -point.z);
float sample = tex3D<float>(tex, point.x, 1.0f-point.y, -point.z);
density += sample > cutoff? step*sample: 0.0f;
t -= 1.0f;
}
Expand Down
4 changes: 1 addition & 3 deletions libautoscoper/src/gpu/cuda/RayCaster_kernels.h
Original file line number Diff line number Diff line change
Expand Up @@ -50,11 +50,9 @@ namespace xromm
namespace gpu
{

void volume_bind_array(const cudaArray* array);

void volume_viewport(float x, float y, float width, float height);

void volume_render(float* buffer,
void volume_render(cudaTextureObject_t tex, float* buffer,
size_t width,
size_t height,
const float* invModelViewMat,
Expand Down
Loading

0 comments on commit 11b91cc

Please sign in to comment.