-
Notifications
You must be signed in to change notification settings - Fork 41
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
1 parent
ebe3665
commit fd54127
Showing
4 changed files
with
175 additions
and
38 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,27 +1,116 @@ | ||
#ifdef __CLION_IDE__ | ||
#include <libgpu/opencl/cl/clion_defines.cl> | ||
|
||
#include <libgpu/opencl/cl/clion_defines.cl> | ||
|
||
#endif | ||
|
||
|
||
#line 6 | ||
|
||
// TILE_SIZE и WORK_PER_THREAD задаются через поле 'defines' в кернел конфиге | ||
|
||
__kernel void matrix_multiplication_naive() | ||
{ | ||
// TODO | ||
__kernel void matrix_multiplication_naive( | ||
__global float *as, | ||
__global float *bs, | ||
__global float *cs, | ||
const unsigned int M, | ||
const unsigned int K, | ||
const unsigned int N | ||
) { | ||
unsigned int i = get_global_id(0); | ||
unsigned int j = get_global_id(1); | ||
|
||
if (i >= N || j >= M) | ||
return; | ||
|
||
float sum = 0.0f; | ||
for (unsigned int k = 0; k < K; k++) | ||
sum += as[j * K + k] * bs[k * N + i]; | ||
cs[j * N + i] = sum; | ||
} | ||
|
||
#ifdef TILE_SIZE | ||
__kernel void matrix_multiplication_local() | ||
{ | ||
// TODO | ||
__kernel void matrix_multiplication_local( | ||
__global float *as, | ||
__global float *bs, | ||
__global float *cs, | ||
const unsigned int M, | ||
const unsigned int K, | ||
const unsigned int N | ||
) { | ||
unsigned int i = get_global_id(0); | ||
unsigned int j = get_global_id(1); | ||
|
||
__local float tile_a[TILE_SIZE][TILE_SIZE]; | ||
__local float tile_b[TILE_SIZE][TILE_SIZE]; | ||
|
||
unsigned int local_i = get_local_id(0); | ||
unsigned int local_j = get_local_id(1); | ||
|
||
float sum = 0.0f; | ||
for (int tile_start = 0; tile_start < K; tile_start += TILE_SIZE) { | ||
if (i < N && j < M && tile_start + local_i < K) | ||
tile_a[local_j][local_i] = as[(tile_start + local_i) + j * K]; | ||
else | ||
tile_a[local_j][local_i] = 0.0f; | ||
|
||
if (i < N && j < M && tile_start + local_j < K) | ||
tile_b[local_j][local_i] = bs[i + (tile_start + local_j) * K]; | ||
else | ||
tile_b[local_j][local_i] = 0.0f; | ||
|
||
barrier(CLK_LOCAL_MEM_FENCE); | ||
|
||
for (int l = 0; l < TILE_SIZE; ++l) | ||
sum += tile_a[local_j][l] * tile_b[l][local_i]; | ||
|
||
barrier(CLK_LOCAL_MEM_FENCE); | ||
} | ||
|
||
if (i < N && j < M) | ||
cs[j * N + i] = sum; | ||
} | ||
#endif | ||
|
||
#if defined(TILE_SIZE) && defined(WORK_PER_THREAD) | ||
__kernel void matrix_multiplication_local_wpt() | ||
{ | ||
// TODO | ||
__kernel void matrix_multiplication_local_wpt( | ||
__global float *as, | ||
__global float *bs, | ||
__global float *cs, | ||
const unsigned int M, | ||
const unsigned int K, | ||
const unsigned int N | ||
) { | ||
unsigned int i = get_global_id(0); | ||
unsigned int j = get_global_id(1); | ||
|
||
__local float tile_a[TILE_SIZE][TILE_SIZE]; | ||
__local float tile_b[TILE_SIZE][TILE_SIZE]; | ||
|
||
unsigned int local_i = get_local_id(0); | ||
unsigned int local_j = get_local_id(1); | ||
|
||
float sum = 0.0f; | ||
for (int tile_start = 0; tile_start < K; tile_start += TILE_SIZE) { | ||
if (i < N && j < M && tile_start + local_i < K) | ||
tile_a[local_j][local_i] = as[(tile_start + local_i) + j * K]; | ||
else | ||
tile_a[local_j][local_i] = 0.0f; | ||
|
||
if (i < N && j < M && tile_start + local_j < K) | ||
tile_b[local_j][local_i] = bs[i + (tile_start + local_j) * N]; | ||
else | ||
tile_b[local_j][local_i] = 0.0f; | ||
|
||
barrier(CLK_LOCAL_MEM_FENCE); | ||
|
||
for (int l = 0; l < TILE_SIZE; ++l) | ||
sum += tile_a[local_j][l] * tile_b[l][local_i]; | ||
|
||
barrier(CLK_LOCAL_MEM_FENCE); | ||
} | ||
|
||
if (i < N && j < M) | ||
cs[j * N + i] = sum; | ||
} | ||
#endif |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,21 +1,79 @@ | ||
#ifdef __CLION_IDE__ | ||
#include <libgpu/opencl/cl/clion_defines.cl> | ||
|
||
#include <libgpu/opencl/cl/clion_defines.cl> | ||
|
||
#endif | ||
|
||
|
||
#line 6 | ||
|
||
__kernel void matrix_transpose_naive() | ||
{ | ||
// TODO | ||
__kernel void matrix_transpose_naive( | ||
__global float *as, | ||
__global float *as_t, | ||
const unsigned int M, | ||
const unsigned int K | ||
) { | ||
unsigned int i = get_global_id(0); | ||
unsigned int j = get_global_id(1); | ||
|
||
if (i >= M || j >= K) | ||
return; | ||
|
||
as_t[j * M + i] = as[i * K + j]; | ||
} | ||
|
||
__kernel void matrix_transpose_local_bad_banks() | ||
{ | ||
// TODO | ||
#define TILE_SIZE 16 | ||
__kernel void matrix_transpose_local_bad_banks( | ||
__global float *as, | ||
__global float *as_t, | ||
const unsigned int M, | ||
const unsigned int K | ||
) { | ||
unsigned int i = get_global_id(0); | ||
unsigned int j = get_global_id(1); | ||
|
||
unsigned int local_i = get_local_id(0); | ||
unsigned int local_j = get_local_id(1); | ||
|
||
__local float tile[TILE_SIZE][TILE_SIZE]; | ||
|
||
unsigned int group_i = get_group_id(0); | ||
unsigned int group_j = get_group_id(1); | ||
|
||
unsigned int i_new = group_i * TILE_SIZE + local_j; | ||
unsigned int j_new = group_j * TILE_SIZE + local_i; | ||
|
||
tile[local_j][local_i] = as[j * M + i]; | ||
|
||
barrier(CLK_LOCAL_MEM_FENCE); | ||
|
||
as_t[i_new * K + j_new] = tile[local_i][local_j]; | ||
} | ||
|
||
__kernel void matrix_transpose_local_good_banks() | ||
{ | ||
// TODO | ||
#define TILE_SIZE 16 | ||
__kernel void matrix_transpose_local_good_banks( | ||
__global float *as, | ||
__global float *as_t, | ||
const unsigned int M, | ||
const unsigned int K | ||
) { | ||
unsigned int i = get_global_id(0); | ||
unsigned int j = get_global_id(1); | ||
|
||
unsigned int local_i = get_local_id(0); | ||
unsigned int local_j = get_local_id(1); | ||
|
||
__local float tile[TILE_SIZE * (TILE_SIZE + 1)]; | ||
|
||
unsigned int group_i = get_group_id(0); | ||
unsigned int group_j = get_group_id(1); | ||
|
||
unsigned int i_new = group_i * TILE_SIZE + local_j; | ||
unsigned int j_new = group_j * TILE_SIZE + local_i; | ||
|
||
tile[local_j * (TILE_SIZE + 1) + local_i] = as[j * M + i]; | ||
|
||
barrier(CLK_LOCAL_MEM_FENCE); | ||
|
||
as_t[i_new * K + j_new] = tile[local_i * (TILE_SIZE + 1) + local_j]; | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters