Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Update fill function for CUDA matrix #626

Merged
merged 5 commits into from
Aug 21, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .github/workflows/mac.yml
Original file line number Diff line number Diff line change
Expand Up @@ -67,7 +67,7 @@ jobs:
strategy:
matrix:
compiler:
- { cpp: g++-11, c: gcc-11}
- { cpp: g++-14, c: gcc-14}
- { cpp: g++-12, c: gcc-12}
- { cpp: clang++, c: clang}
build_type: [Release]
Expand Down
11 changes: 9 additions & 2 deletions include/micm/cuda/util/cuda_dense_matrix.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -229,8 +229,15 @@ namespace micm
/// @param val Value to set each element to
void Fill(T val)
{
std::fill(this->data_.begin(), this->data_.end(), val);
CHECK_CUDA_ERROR(micm::cuda::CopyToDevice(this->param_, this->data_), "cudaMemcpyHostToDevice");
if constexpr (std::is_same_v<T, int>)
{
// the cudaMemset function only works for integer types and is an asynchronous function: https://docs.nvidia.com/cuda/cuda-runtime-api/group__CUDART__MEMORY.html#group__CUDART__MEMORY_1gf7338650f7683c51ee26aadc6973c63a
CHECK_CUDA_ERROR(cudaMemset(this->param_.d_data_, val, sizeof(double) * this->param_.number_of_elements_), "cudaMemset");
}
else
{
CHECK_CUDA_ERROR(micm::cuda::FillCudaMatrix<T>(this->param_, val), "FillCudaMatrix");
}
}

}; // class CudaDenseMatrix
Expand Down
7 changes: 7 additions & 0 deletions include/micm/cuda/util/cuda_matrix.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -40,5 +40,12 @@ namespace micm
/// @param vectorMatrixSrc Struct containing allocated source device memory to copy from
/// @returns Error code from copying to destination device memory from source device memory, if any
cudaError_t CopyToDeviceFromDevice(CudaMatrixParam& vectorMatrixDest, const CudaMatrixParam& vectorMatrixSrc);


/// @brief Fills a CUDA matrix with a specified value
/// @param param Struct containing allocated device memory
/// @param val Value to fill the matrix with
template<typename T>
cudaError_t FillCudaMatrix(CudaMatrixParam& param, T val);
} // namespace cuda
} // namespace micm
11 changes: 9 additions & 2 deletions include/micm/cuda/util/cuda_sparse_matrix.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -129,8 +129,15 @@ namespace micm
/// @param val Value to set each element to
void Fill(T val)
{
std::fill(this->data_.begin(), this->data_.end(), val);
CHECK_CUDA_ERROR(micm::cuda::CopyToDevice(this->param_, this->data_), "cudaMemcpyHostToDevice");
if constexpr (std::is_same_v<T, int>)
{
// the cudaMemset function only works for integer types and is an asynchronous function: https://docs.nvidia.com/cuda/cuda-runtime-api/group__CUDART__MEMORY.html#group__CUDART__MEMORY_1gf7338650f7683c51ee26aadc6973c63a
CHECK_CUDA_ERROR(cudaMemset(this->param_.d_data_, val, sizeof(double) * this->param_.number_of_elements_), "cudaMemset");
}
else
{
CHECK_CUDA_ERROR(micm::cuda::FillCudaMatrix<T>(this->param_, val), "FillCudaMatrix");
}
}

CudaMatrixParam AsDeviceParam() const
Expand Down
6 changes: 3 additions & 3 deletions include/micm/util/sparse_matrix_standard_ordering.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ namespace micm
const std::vector<std::size_t>& row_start)
{
return number_of_blocks * row_ids.size();
};
}

static std::size_t VectorIndex(
std::size_t number_of_blocks,
Expand All @@ -43,7 +43,7 @@ namespace micm
if (elem == end)
throw std::system_error(make_error_code(MicmMatrixErrc::ZeroElementAccess));
return std::size_t{ (elem - row_ids.begin()) + block * row_ids.size() };
};
}

static void AddToDiagonal(
const std::vector<std::size_t>& diagonal_ids,
Expand All @@ -55,6 +55,6 @@ namespace micm
for (std::size_t block_start = 0; block_start < number_of_blocks * block_size; block_start += block_size)
for (const auto& i : diagonal_ids)
data[block_start + i] += value;
};
}
};
} // namespace micm
24 changes: 23 additions & 1 deletion src/util/cuda_matrix.cu
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
// SPDX-License-Identifier: Apache-2.0
#include <micm/cuda/util/cuda_matrix.cuh>
#include <micm/util/internal_error.hpp>

#include <micm/cuda/util/cuda_param.hpp>
#include <cuda_runtime.h>

#include <vector>
Expand Down Expand Up @@ -55,5 +55,27 @@ namespace micm
cudaMemcpyDeviceToDevice);
return err;
}

template<typename T>
__global__ void FillCudaMatrixKernel(T* d_data, std::size_t number_of_elements, T val)
{
std::size_t tid = blockIdx.x * BLOCK_SIZE + threadIdx.x;
if (tid < number_of_elements)
{
d_data[tid] = val;
}
}

template<typename T>
cudaError_t FillCudaMatrix(CudaMatrixParam& param, T val)
{
std::size_t number_of_blocks = (param.number_of_elements_ + BLOCK_SIZE - 1) / BLOCK_SIZE;
FillCudaMatrixKernel<<<number_of_blocks, BLOCK_SIZE>>>(param.d_data_, param.number_of_elements_, val);
cudaError_t err = cudaGetLastError();
return err;
}

// source code needs the instantiation of the template
template cudaError_t FillCudaMatrix<double>(CudaMatrixParam& param, double val);
} // namespace cuda
} // namespace micm
Loading