Skip to content

Commit

Permalink
[SYCL][E2E] Resolve reqd_sub_group_size issue for get_coordinate tests (
Browse files Browse the repository at this point in the history
#16429)

This PR resolves the reqd_sub_group_size issue and refactors the code in
get_coordinate_ops_imp.hpp
  • Loading branch information
YixingZhang007 authored Dec 20, 2024
1 parent 5c63de2 commit ae0b835
Showing 1 changed file with 23 additions and 29 deletions.
52 changes: 23 additions & 29 deletions sycl/test-e2e/Matrix/get_coordinate_ops_impl.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,23 @@
template <typename T, size_t Rows, size_t Cols, layout Layout, use Use>
class matrix_process;

template <typename TResult, typename AccessorType>
void reduce_and_accumulate(sub_group sg, size_t sg_size, size_t global_idy,
AccessorType &global_acc, TResult *local_sums,
size_t count) {
for (size_t i = 0; i < count; i++) {
local_sums[i] = reduce_over_group(sg, local_sums[i], sycl::plus<>());

// Only the subgroup leader performs the global accumulation
if (global_idy % sg_size == 0) {
sycl::atomic_ref<TResult, sycl::memory_order::relaxed,
sycl::memory_scope::device>
aref(global_acc[i]);
aref.fetch_add(local_sums[i]);
}
}
}

template <typename T, typename TResult, size_t NUM_ROWS, size_t NUM_COLS,
size_t SROWS, size_t SCOLS, use Use, layout Layout, size_t VF>
void matrix_sum(big_matrix<T, NUM_ROWS / VF, NUM_COLS * VF> &M,
Expand All @@ -32,7 +49,7 @@ void matrix_sum(big_matrix<T, NUM_ROWS / VF, NUM_COLS * VF> &M,
{1, 1 * sg_size}),
[=](nd_item<2> spmd_item)
#ifdef SG_SZ
[[intel::reqd_sub_group_size(SG_SZ)]]
[[sycl::reqd_sub_group_size(SG_SZ)]]
#endif
{
// The submatrix API has to be accessed by all the workitems in a
Expand Down Expand Up @@ -83,29 +100,10 @@ void matrix_sum(big_matrix<T, NUM_ROWS / VF, NUM_COLS * VF> &M,
});
}

for (int i = 0; i < NUM_ROWS; i++) {
sum_local_rows[i] =
reduce_over_group(sg, sum_local_rows[i], sycl::plus<>());
// only Groups leader perform the global reduction
if (global_idy % sg_size == 0) {
sycl::atomic_ref<TResult, sycl::memory_order::relaxed,
sycl::memory_scope::device>
aref(v_rows[i]);
aref.fetch_add(sum_local_rows[i]);
}
}

for (int i = 0; i < NUM_COLS; i++) {
sum_local_cols[i] =
reduce_over_group(sg, sum_local_cols[i], sycl::plus<>());
// only Groups leader perform the global reduction
if (global_idy % sg_size == 0) {
sycl::atomic_ref<TResult, sycl::memory_order::relaxed,
sycl::memory_scope::device>
aref(v_cols[i]);
aref.fetch_add(sum_local_cols[i]);
}
}
reduce_and_accumulate(sg, sg_size, global_idy, v_rows,
sum_local_rows, NUM_ROWS);
reduce_and_accumulate(sg, sg_size, global_idy, v_cols,
sum_local_cols, NUM_COLS);
}); // parallel for
}).wait();
}
Expand All @@ -124,11 +122,7 @@ void test_get_coord_op() {
TResult sum_cols[Cols] = {0};
TResult sum_cols_ref[Cols] = {0};

for (int i = 0; i < Rows; i++) {
for (int j = 0; j < Cols; j++) {
M[i][j] = i + j;
}
}
matrix_fill(Rows, Cols, (T *)M, [](int i, int j) { return T(i + j); });

matrix_vnni<T>(Rows, Cols, *M, *Mvnni, VF);
big_matrix<T, Rows / VF, Cols * VF> MM((T *)&Mvnni);
Expand Down

0 comments on commit ae0b835

Please sign in to comment.