Skip to content

Commit

Permalink
Add radix.cl, main_radix.cpp
Browse files Browse the repository at this point in the history
  • Loading branch information
Нина Чекалина committed Jan 12, 2025
1 parent 070cf05 commit 436ad70
Show file tree
Hide file tree
Showing 2 changed files with 154 additions and 9 deletions.
97 changes: 96 additions & 1 deletion src/cl/radix.cl
Original file line number Diff line number Diff line change
@@ -1 +1,96 @@
// TODO

#define WORK_GROUP_SIZE 4
#define TILE_SIZE 16

__kernel void fill_with_zeros(__global unsigned int *as, unsigned int n)
{
const unsigned int gid = get_global_id(0);
if (gid >= n) {
return;
}
as[gid] = 0;
}

__kernel void count(__global unsigned int *as, __global unsigned int *counters, unsigned int n, unsigned int shift, unsigned int bits_count) {
unsigned int gid = get_global_id(0);
if (gid >= n) {
return;
}
unsigned int value = (as[gid] >> shift) & ((1 << bits_count) - 1);
unsigned int wgid = get_group_id(0);
atomic_inc(&counters[wgid * (1 << bits_count) + value]);
}

__kernel void matrix_transpose_local_good_banks(
__global float *a,
__global float *at,
unsigned int m,
unsigned int k
) {
int i = get_global_id(0);
int j = get_global_id(1);

__local float tile[TILE_SIZE][TILE_SIZE + 1];
int local_i = get_local_id(0);
int local_j = get_local_id(1);

if (i < k && j < m) {
tile[local_j][local_i] = a[j * k + i];
} else {
tile[local_j][local_i] = 0;
}

barrier(CLK_LOCAL_MEM_FENCE);

int target_j = (i - local_i) + local_j;
int target_i = (j - local_j) + local_i;
if (target_i < k && target_j < m) {
at[target_j * k + target_i] = tile[local_i][local_j];
}
}

__kernel void prefix_sum(__global unsigned int *as, __global unsigned int *bs, unsigned int i, unsigned int n)
{
unsigned int gid = get_global_id(0);
if (gid >= n) {
return;
}
if (gid >= i) {
bs[gid] = as[gid - i] + as[gid];
} else {
bs[gid] = as[gid];
}
}

__kernel void radix_sort(__global unsigned int *as, __global unsigned int *bs, __global unsigned int *counters, unsigned int n, unsigned int shift, unsigned int bits_count)
{
unsigned int gid = get_global_id(0);
if (gid >= n) {
return;
}

unsigned int value = (as[gid] >> shift) & ((1 << bits_count) - 1);

unsigned int wgid = get_group_id(0);

unsigned int start = wgid * WORK_GROUP_SIZE;
unsigned int end = gid;
unsigned int offset = 0;

for (unsigned int i = start; i < end; ++i) {
unsigned int prev_value = (as[i] >> shift) & ((1 << bits_count) - 1);
if (prev_value == value) {
++offset;
}
}

unsigned int base_idx;
unsigned int counters_idx = wgid + value * WORK_GROUP_SIZE;
if (counters_idx > 0) {
base_idx = counters[counters_idx - 1];
} else {
base_idx = 0;
}

bs[base_idx + offset] = as[gid];
}
66 changes: 58 additions & 8 deletions src/main_radix.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -11,9 +11,13 @@
#include <stdexcept>
#include <vector>

#define TILE_SIZE 16

const int benchmarkingIters = 10;
const int benchmarkingItersCPU = 1;
const unsigned int n = 32 * 1024 * 1024;
const unsigned int n = 16;

const int bits_count = 2;

template<typename T>
void raiseFail(const T &a, const T &b, std::string message, std::string filename, int line) {
Expand Down Expand Up @@ -57,26 +61,72 @@ int main(int argc, char **argv) {
std::cout << "Data generated for n=" << n << "!" << std::endl;

const std::vector<unsigned int> cpu_reference = computeCPU(as);

// remove me
return 0;


unsigned int workGroupSize = 4;
unsigned int workGroupsCount = (n + workGroupSize - 1) / workGroupSize;
unsigned int globalWorkSize = workGroupsCount * workGroupSize;
unsigned int countersSize = workGroupsCount * (1 << bits_count);
unsigned int countersWorkSize = (countersSize + workGroupSize - 1) / workGroupSize * workGroupSize;

gpu::gpu_mem_32u as_gpu;
gpu::gpu_mem_32u bs_gpu;
gpu::gpu_mem_32u counters_gpu;
gpu::gpu_mem_32u counters_gpu_tmp;

as_gpu.resizeN(n);
bs_gpu.resizeN(n);
counters_gpu.resizeN(countersSize);
counters_gpu_tmp.resizeN(countersSize);

{
ocl::Kernel fill_with_zeros(radix_kernel, radix_kernel_length, "fill_with_zeros");
fill_with_zeros.compile();
ocl::Kernel count(radix_kernel, radix_kernel_length, "count");
count.compile();
ocl::Kernel transpose(radix_kernel, radix_kernel_length, "matrix_transpose_local_good_banks");
transpose.compile();
ocl::Kernel prefix_sum(radix_kernel, radix_kernel_length, "prefix_sum");
prefix_sum.compile();
ocl::Kernel radix_sort(radix_kernel, radix_kernel_length, "radix_sort");
radix_sort.compile();

std::vector<unsigned int> counters(countersSize, 0);

timer t;
for (int iter = 0; iter < benchmarkingIters; ++iter) {
// Запускаем секундомер после прогрузки данных, чтобы замерять время работы кернела, а не трансфер данных
as_gpu.writeN(as.data(), n);
t.restart();

// TODO
for (unsigned int shift = 0; shift < 32; shift += bits_count) {
fill_with_zeros.exec(gpu::WorkSize(workGroupSize, countersWorkSize), counters_gpu, countersSize);

count.exec(gpu::WorkSize(workGroupSize, globalWorkSize), as_gpu, counters_gpu, n, shift, bits_count);

unsigned int x_size = ((1 << bits_count) + TILE_SIZE - 1) / TILE_SIZE * TILE_SIZE;
unsigned int y_size = (workGroupsCount + TILE_SIZE - 1) / TILE_SIZE * TILE_SIZE;
transpose.exec(gpu::WorkSize(TILE_SIZE, TILE_SIZE, x_size, y_size), counters_gpu, counters_gpu_tmp, 1 << bits_count, workGroupsCount);
std::swap(counters_gpu, counters_gpu_tmp);

for (unsigned int i = 1; i < countersSize; i *= 2) {
prefix_sum.exec(gpu::WorkSize(workGroupSize, countersWorkSize), counters_gpu, counters_gpu_tmp, i, countersSize);
std::swap(counters_gpu, counters_gpu_tmp);
}

radix_sort.exec(gpu::WorkSize(workGroupSize, globalWorkSize), as_gpu, bs_gpu, counters_gpu, n, shift, bits_count);
std::swap(as_gpu, bs_gpu);
}
t.nextLap();
}
t.stop();

std::cout << "GPU: " << t.lapAvg() << "+-" << t.lapStd() << " s" << std::endl;
std::cout << "GPU: " << (n / 1000.0 / 1000.0) / t.lapAvg() << " millions/s" << std::endl;
as_gpu.readN(as.data(), n);
}

// Проверяем корректность результатов
for (int i = 0; i < n; ++i) {
EXPECT_THE_SAME(as[i], cpu_reference[i], "GPU results should be equal to CPU results!");
}
return 0;
}
}

0 comments on commit 436ad70

Please sign in to comment.