diff --git a/include/micm/cuda/util/cuda_dense_matrix.hpp b/include/micm/cuda/util/cuda_dense_matrix.hpp index 65bad87db..192807ff0 100644 --- a/include/micm/cuda/util/cuda_dense_matrix.hpp +++ b/include/micm/cuda/util/cuda_dense_matrix.hpp @@ -69,48 +69,35 @@ namespace micm public: - CudaDenseMatrix() requires(std::is_same_v) + CudaDenseMatrix() : VectorMatrix() { this->param_.number_of_grid_cells_ = 0; this->param_.number_of_elements_ = this->data_.size(); - CHECK_CUDA_ERROR(micm::cuda::MallocVector(this->param_, this->param_.number_of_elements_), "cudaMalloc"); - } - CudaDenseMatrix() - : VectorMatrix() - { + CHECK_CUDA_ERROR(micm::cuda::MallocVector(this->param_, this->param_.number_of_elements_), "cudaMalloc"); } - CudaDenseMatrix(std::size_t x_dim, std::size_t y_dim) requires(std::is_same_v) + CudaDenseMatrix(std::size_t x_dim, std::size_t y_dim) : VectorMatrix(x_dim, y_dim) { this->param_.number_of_elements_ = this->data_.size(); this->param_.number_of_grid_cells_ = x_dim; - CHECK_CUDA_ERROR(micm::cuda::MallocVector(this->param_, this->param_.number_of_elements_), "cudaMalloc"); - } - CudaDenseMatrix(std::size_t x_dim, std::size_t y_dim) - : VectorMatrix(x_dim, y_dim) - { + CHECK_CUDA_ERROR(micm::cuda::MallocVector(this->param_, this->param_.number_of_elements_), "cudaMalloc"); } - CudaDenseMatrix(std::size_t x_dim, std::size_t y_dim, T initial_value) requires(std::is_same_v) + CudaDenseMatrix(std::size_t x_dim, std::size_t y_dim, T initial_value) : VectorMatrix(x_dim, y_dim, initial_value) { this->param_.number_of_elements_ = this->data_.size(); this->param_.number_of_grid_cells_ = x_dim; if (this->param_.number_of_elements_ != 0) { - CHECK_CUDA_ERROR(micm::cuda::MallocVector(this->param_, this->param_.number_of_elements_), "cudaMalloc"); + CHECK_CUDA_ERROR(micm::cuda::MallocVector(this->param_, this->param_.number_of_elements_), "cudaMalloc"); Fill(initial_value); } } - CudaDenseMatrix(std::size_t x_dim, std::size_t y_dim, T initial_value) - : VectorMatrix(x_dim, y_dim, initial_value) - { - } - - CudaDenseMatrix(const std::vector> other) requires(std::is_same_v) + CudaDenseMatrix(const std::vector> other) : VectorMatrix(other) { this->param_.number_of_grid_cells_ = 0; @@ -119,28 +106,18 @@ namespace micm { this->param_.number_of_elements_ += inner_vector.size(); } - CHECK_CUDA_ERROR(micm::cuda::MallocVector(this->param_, this->param_.number_of_elements_), "cudaMalloc"); + CHECK_CUDA_ERROR(micm::cuda::MallocVector(this->param_, this->param_.number_of_elements_), "cudaMalloc"); } - CudaDenseMatrix(const std::vector> other) - : VectorMatrix(other) - { - } - - CudaDenseMatrix(const CudaDenseMatrix& other) requires(std::is_same_v) + CudaDenseMatrix(const CudaDenseMatrix& other) : VectorMatrix(other) { this->param_ = other.param_; this->param_.d_data_ = nullptr; this->param_.number_of_elements_ = other.param_.number_of_elements_; this->param_.number_of_grid_cells_ = other.param_.number_of_grid_cells_; - CHECK_CUDA_ERROR(micm::cuda::MallocVector(this->param_, this->param_.number_of_elements_), "cudaMalloc"); - CHECK_CUDA_ERROR(micm::cuda::CopyToDeviceFromDevice(this->param_, other.param_), "cudaMemcpyDeviceToDevice"); - } - - CudaDenseMatrix(const CudaDenseMatrix& other) - : VectorMatrix(other) - { + CHECK_CUDA_ERROR(micm::cuda::MallocVector(this->param_, this->param_.number_of_elements_), "cudaMalloc"); + CHECK_CUDA_ERROR(micm::cuda::CopyToDeviceFromDevice(this->param_, other.param_), "cudaMemcpyDeviceToDevice"); } CudaDenseMatrix(CudaDenseMatrix&& other) noexcept @@ -156,8 +133,8 @@ namespace micm if (this->param_.d_data_ != nullptr) CHECK_CUDA_ERROR(micm::cuda::FreeVector(this->param_), "cudaFree"); this->param_ = other.param_; - CHECK_CUDA_ERROR(micm::cuda::MallocVector(this->param_, this->param_.number_of_elements_), "cudaMalloc"); - CHECK_CUDA_ERROR(micm::cuda::CopyToDeviceFromDevice(this->param_, other.param_), "cudaMemcpyDeviceToDevice"); + CHECK_CUDA_ERROR(micm::cuda::MallocVector(this->param_, this->param_.number_of_elements_), "cudaMalloc"); + CHECK_CUDA_ERROR(micm::cuda::CopyToDeviceFromDevice(this->param_, other.param_), "cudaMemcpyDeviceToDevice"); return *this; } @@ -183,14 +160,12 @@ namespace micm void CopyToDevice() { - static_assert(std::is_same_v); - CHECK_CUDA_ERROR(micm::cuda::CopyToDevice(this->param_, this->data_), "cudaMemcpyHostToDevice"); + CHECK_CUDA_ERROR(micm::cuda::CopyToDevice(this->param_, this->data_), "cudaMemcpyHostToDevice"); } void CopyToHost() { - static_assert(std::is_same_v); - CHECK_CUDA_ERROR(micm::cuda::CopyToHost(this->param_, this->data_), "cudaMemcpyDeviceToHost"); + CHECK_CUDA_ERROR(micm::cuda::CopyToHost(this->param_, this->data_), "cudaMemcpyDeviceToHost"); } CudaMatrixParam AsDeviceParam() const @@ -227,7 +202,7 @@ namespace micm { throw std::runtime_error("Both CUDA dense matrices must have the same size."); } - CHECK_CUDA_ERROR(micm::cuda::CopyToDeviceFromDevice(this->param_, other.param_), "cudaMemcpyDeviceToDevice"); + CHECK_CUDA_ERROR(micm::cuda::CopyToDeviceFromDevice(this->param_, other.param_), "cudaMemcpyDeviceToDevice"); } /// @brief Set every matrix element to a given value on the GPU @@ -239,7 +214,7 @@ namespace micm // the cudaMemset function only works for integer types and is an asynchronous function: // https://docs.nvidia.com/cuda/cuda-runtime-api/group__CUDART__MEMORY.html#group__CUDART__MEMORY_1gf7338650f7683c51ee26aadc6973c63a CHECK_CUDA_ERROR( - cudaMemset(this->param_.d_data_, val, sizeof(double) * this->param_.number_of_elements_), "cudaMemset"); + cudaMemset(this->param_.d_data_, val, sizeof(T) * this->param_.number_of_elements_), "cudaMemset"); } else { diff --git a/include/micm/cuda/util/cuda_matrix.cuh b/include/micm/cuda/util/cuda_matrix.cuh index f68efef64..61bfff6e5 100644 --- a/include/micm/cuda/util/cuda_matrix.cuh +++ b/include/micm/cuda/util/cuda_matrix.cuh @@ -16,6 +16,7 @@ namespace micm /// @param vectorMatrix Reference to struct containing information about allocated memory /// @param num_elements Requested number of elements to allocate /// @returns Error code from allocating data on the device, if any + template cudaError_t MallocVector(CudaMatrixParam& vectorMatrix, std::size_t num_elements); /// @brief Free memory allocated on device @@ -27,18 +28,21 @@ namespace micm /// @param vectorMatrix Struct containing allocated device memory /// @param h_data Host data to copy from /// @returns Error code from copying to device from the host, if any - cudaError_t CopyToDevice(CudaMatrixParam& vectorMatrix, std::vector& h_data); + template + cudaError_t CopyToDevice(CudaMatrixParam& vectorMatrix, std::vector& h_data); /// @brief Copies data from the device to the host /// @param vectorMatrix Struct containing allocated device memory /// @param h_data Host data to copy data to /// @returns Error code from copying from the device to the host, if any - cudaError_t CopyToHost(CudaMatrixParam& vectorMatrix, std::vector& h_data); + template + cudaError_t CopyToHost(CudaMatrixParam& vectorMatrix, std::vector& h_data); /// @brief Copies data to the destination device memory block from the source device memory block /// @param vectorMatrixDest Struct containing allocated destination device memory to copy to /// @param vectorMatrixSrc Struct containing allocated source device memory to copy from /// @returns Error code from copying to destination device memory from source device memory, if any + template cudaError_t CopyToDeviceFromDevice(CudaMatrixParam& vectorMatrixDest, const CudaMatrixParam& vectorMatrixSrc); /// @brief Fills a CUDA matrix with a specified value diff --git a/include/micm/cuda/util/cuda_sparse_matrix.hpp b/include/micm/cuda/util/cuda_sparse_matrix.hpp index 25a92d4b9..25eb07cfd 100644 --- a/include/micm/cuda/util/cuda_sparse_matrix.hpp +++ b/include/micm/cuda/util/cuda_sparse_matrix.hpp @@ -32,47 +32,28 @@ namespace micm this->param_.d_data_ = nullptr; } - CudaSparseMatrix(const SparseMatrixBuilder& builder) requires(std::is_same_v) - : SparseMatrix(builder) - { - this->param_.number_of_grid_cells_ = this->number_of_blocks_; - CHECK_CUDA_ERROR(micm::cuda::MallocVector(this->param_, this->data_.size()), "cudaMalloc"); - } CudaSparseMatrix(const SparseMatrixBuilder& builder) : SparseMatrix(builder) { - this->param_.d_data_ = nullptr; - } - - CudaSparseMatrix& operator=(const SparseMatrixBuilder& builder) requires( - std::is_same_v) - { - SparseMatrix::operator=(builder); this->param_.number_of_grid_cells_ = this->number_of_blocks_; - CHECK_CUDA_ERROR(micm::cuda::MallocVector(this->param_, this->data_.size()), "cudaMalloc"); - return *this; + CHECK_CUDA_ERROR(micm::cuda::MallocVector(this->param_, this->data_.size()), "cudaMalloc"); } CudaSparseMatrix& operator=(const SparseMatrixBuilder& builder) { SparseMatrix::operator=(builder); - this->param_.d_data_ = nullptr; + this->param_.number_of_grid_cells_ = this->number_of_blocks_; + CHECK_CUDA_ERROR(micm::cuda::MallocVector(this->param_, this->data_.size()), "cudaMalloc"); return *this; } - CudaSparseMatrix(const CudaSparseMatrix& other) requires(std::is_same_v) - : SparseMatrix(other) - { - this->param_ = other.param_; - this->param_.d_data_ = nullptr; - CHECK_CUDA_ERROR(micm::cuda::MallocVector(this->param_, this->data_.size()), "cudaMalloc"); - CHECK_CUDA_ERROR(micm::cuda::CopyToDeviceFromDevice(this->param_, other.param_), "cudaMemcpyDeviceToDevice"); - } - CudaSparseMatrix(const CudaSparseMatrix& other) : SparseMatrix(other) { + this->param_ = other.param_; this->param_.d_data_ = nullptr; + CHECK_CUDA_ERROR(micm::cuda::MallocVector(this->param_, this->data_.size()), "cudaMalloc"); + CHECK_CUDA_ERROR(micm::cuda::CopyToDeviceFromDevice(this->param_, other.param_), "cudaMemcpyDeviceToDevice"); } CudaSparseMatrix(CudaSparseMatrix&& other) noexcept @@ -87,8 +68,8 @@ namespace micm SparseMatrix::operator=(other); this->param_ = other.param_; this->param_.d_data_ = nullptr; - CHECK_CUDA_ERROR(micm::cuda::MallocVector(this->param_, this->data_.size()), "cudaMalloc"); - CHECK_CUDA_ERROR(micm::cuda::CopyToDeviceFromDevice(this->param_, other.param_), "cudaMemcpyDeviceToDevice"); + CHECK_CUDA_ERROR(micm::cuda::MallocVector(this->param_, this->data_.size()), "cudaMalloc"); + CHECK_CUDA_ERROR(micm::cuda::CopyToDeviceFromDevice(this->param_, other.param_), "cudaMemcpyDeviceToDevice"); return *this; } @@ -102,27 +83,20 @@ namespace micm return *this; } - ~CudaSparseMatrix() requires(std::is_same_v) - { - CHECK_CUDA_ERROR(micm::cuda::FreeVector(this->param_), "cudaFree"); - this->param_.d_data_ = nullptr; - } - ~CudaSparseMatrix() { + CHECK_CUDA_ERROR(micm::cuda::FreeVector(this->param_), "cudaFree"); this->param_.d_data_ = nullptr; } void CopyToDevice() { - static_assert(std::is_same_v); - CHECK_CUDA_ERROR(micm::cuda::CopyToDevice(this->param_, this->data_), "cudaMemcpyHostToDevice"); + CHECK_CUDA_ERROR(micm::cuda::CopyToDevice(this->param_, this->data_), "cudaMemcpyHostToDevice"); } void CopyToHost() { - static_assert(std::is_same_v); - CHECK_CUDA_ERROR(micm::cuda::CopyToHost(this->param_, this->data_), "cudaMemcpyDeviceToHost"); + CHECK_CUDA_ERROR(micm::cuda::CopyToHost(this->param_, this->data_), "cudaMemcpyDeviceToHost"); } /// @brief Set every matrix element to a given value on the GPU @@ -134,7 +108,7 @@ namespace micm // the cudaMemset function only works for integer types and is an asynchronous function: // https://docs.nvidia.com/cuda/cuda-runtime-api/group__CUDART__MEMORY.html#group__CUDART__MEMORY_1gf7338650f7683c51ee26aadc6973c63a CHECK_CUDA_ERROR( - cudaMemset(this->param_.d_data_, val, sizeof(double) * this->param_.number_of_elements_), "cudaMemset"); + cudaMemset(this->param_.d_data_, val, sizeof(T) * this->param_.number_of_elements_), "cudaMemset"); } else { diff --git a/src/util/cuda_matrix.cu b/src/util/cuda_matrix.cu index cc0e373a0..0f79bd133 100644 --- a/src/util/cuda_matrix.cu +++ b/src/util/cuda_matrix.cu @@ -12,10 +12,11 @@ namespace micm { namespace cuda { + template cudaError_t MallocVector(CudaMatrixParam& param, std::size_t number_of_elements) { param.number_of_elements_ = number_of_elements; - cudaError_t err = cudaMalloc(&(param.d_data_), sizeof(double) * number_of_elements); + cudaError_t err = cudaMalloc(&(param.d_data_), sizeof(T) * number_of_elements); return err; } @@ -32,27 +33,30 @@ namespace micm return err; } - cudaError_t CopyToDevice(CudaMatrixParam& param, std::vector& h_data) + template + cudaError_t CopyToDevice(CudaMatrixParam& param, std::vector& h_data) { cudaError_t err = - cudaMemcpy(param.d_data_, h_data.data(), sizeof(double) * param.number_of_elements_, cudaMemcpyHostToDevice); + cudaMemcpy(param.d_data_, h_data.data(), sizeof(T) * param.number_of_elements_, cudaMemcpyHostToDevice); return err; } - cudaError_t CopyToHost(CudaMatrixParam& param, std::vector& h_data) + template + cudaError_t CopyToHost(CudaMatrixParam& param, std::vector& h_data) { cudaDeviceSynchronize(); cudaError_t err = - cudaMemcpy(h_data.data(), param.d_data_, sizeof(double) * param.number_of_elements_, cudaMemcpyDeviceToHost); + cudaMemcpy(h_data.data(), param.d_data_, sizeof(T) * param.number_of_elements_, cudaMemcpyDeviceToHost); return err; } + template cudaError_t CopyToDeviceFromDevice(CudaMatrixParam& vectorMatrixDest, const CudaMatrixParam& vectorMatrixSrc) { cudaError_t err = cudaMemcpy( vectorMatrixDest.d_data_, vectorMatrixSrc.d_data_, - sizeof(double) * vectorMatrixSrc.number_of_elements_, + sizeof(T) * vectorMatrixSrc.number_of_elements_, cudaMemcpyDeviceToDevice); return err; } @@ -77,6 +81,12 @@ namespace micm } // source code needs the instantiation of the template + template cudaError_t MallocVector(CudaMatrixParam& param, std::size_t number_of_elements); + template cudaError_t MallocVector(CudaMatrixParam& param, std::size_t number_of_elements); + template cudaError_t CopyToDevice(CudaMatrixParam& param, std::vector& h_data); + template cudaError_t CopyToHost(CudaMatrixParam& param, std::vector& h_data); + template cudaError_t CopyToDeviceFromDevice(CudaMatrixParam& vectorMatrixDest, const CudaMatrixParam& vectorMatrixSrc); + template cudaError_t CopyToDeviceFromDevice(CudaMatrixParam& vectorMatrixDest, const CudaMatrixParam& vectorMatrixSrc); template cudaError_t FillCudaMatrix(CudaMatrixParam& param, double val); } // namespace cuda } // namespace micm diff --git a/test/unit/cuda/util/test_cuda_dense_matrix.cpp b/test/unit/cuda/util/test_cuda_dense_matrix.cpp index b68ebd437..86d5c538c 100644 --- a/test/unit/cuda/util/test_cuda_dense_matrix.cpp +++ b/test/unit/cuda/util/test_cuda_dense_matrix.cpp @@ -24,10 +24,10 @@ TEST(CudaDenseMatrix, DeviceMemCopy) std::size_t num_elements = h_vector.size(); CudaMatrixParam param; - micm::cuda::MallocVector(param, num_elements); - micm::cuda::CopyToDevice(param, h_vector); + micm::cuda::MallocVector(param, num_elements); + micm::cuda::CopyToDevice(param, h_vector); micm::cuda::SquareDriver(param); - micm::cuda::CopyToHost(param, h_vector); + micm::cuda::CopyToHost(param, h_vector); EXPECT_EQ(h_vector[0], 1 * 1); EXPECT_EQ(h_vector[1], 2 * 2);