diff --git a/include/micm/cuda/util/cuda_dense_matrix.hpp b/include/micm/cuda/util/cuda_dense_matrix.hpp
index 65bad87db..192807ff0 100644
--- a/include/micm/cuda/util/cuda_dense_matrix.hpp
+++ b/include/micm/cuda/util/cuda_dense_matrix.hpp
@@ -69,48 +69,35 @@ namespace micm
 
    public:
 
-    CudaDenseMatrix() requires(std::is_same_v<T, double>)
+    CudaDenseMatrix()
         : VectorMatrix<T, L>()
     {
       this->param_.number_of_grid_cells_ = 0;
       this->param_.number_of_elements_ = this->data_.size();
-      CHECK_CUDA_ERROR(micm::cuda::MallocVector(this->param_, this->param_.number_of_elements_), "cudaMalloc");
-    }
-    CudaDenseMatrix()
-        : VectorMatrix<T, L>()
-    {
+      CHECK_CUDA_ERROR(micm::cuda::MallocVector<T>(this->param_, this->param_.number_of_elements_), "cudaMalloc");
     }
 
-    CudaDenseMatrix(std::size_t x_dim, std::size_t y_dim) requires(std::is_same_v<T, double>)
+    CudaDenseMatrix(std::size_t x_dim, std::size_t y_dim)
         : VectorMatrix<T, L>(x_dim, y_dim)
     {
       this->param_.number_of_elements_ = this->data_.size();
       this->param_.number_of_grid_cells_ = x_dim;
-      CHECK_CUDA_ERROR(micm::cuda::MallocVector(this->param_, this->param_.number_of_elements_), "cudaMalloc");
-    }
-    CudaDenseMatrix(std::size_t x_dim, std::size_t y_dim)
-        : VectorMatrix<T, L>(x_dim, y_dim)
-    {
+      CHECK_CUDA_ERROR(micm::cuda::MallocVector<T>(this->param_, this->param_.number_of_elements_), "cudaMalloc");
     }
 
-    CudaDenseMatrix(std::size_t x_dim, std::size_t y_dim, T initial_value) requires(std::is_same_v<T, double>)
+    CudaDenseMatrix(std::size_t x_dim, std::size_t y_dim, T initial_value)
         : VectorMatrix<T, L>(x_dim, y_dim, initial_value)
     {
       this->param_.number_of_elements_ = this->data_.size();
       this->param_.number_of_grid_cells_ = x_dim;
       if (this->param_.number_of_elements_ != 0)
       {
-        CHECK_CUDA_ERROR(micm::cuda::MallocVector(this->param_, this->param_.number_of_elements_), "cudaMalloc");
+        CHECK_CUDA_ERROR(micm::cuda::MallocVector<T>(this->param_, this->param_.number_of_elements_), "cudaMalloc");
         Fill(initial_value);
       }
     }
 
-    CudaDenseMatrix(std::size_t x_dim, std::size_t y_dim, T initial_value)
-        : VectorMatrix<T, L>(x_dim, y_dim, initial_value)
-    {
-    } 
-    
-    CudaDenseMatrix(const std::vector<std::vector<T>> other) requires(std::is_same_v<T, double>)
+    CudaDenseMatrix(const std::vector<std::vector<T>> other)
         : VectorMatrix<T, L>(other)
     {
       this->param_.number_of_grid_cells_ = 0;
@@ -119,28 +106,18 @@ namespace micm
       {
         this->param_.number_of_elements_ += inner_vector.size();
       }
-      CHECK_CUDA_ERROR(micm::cuda::MallocVector(this->param_, this->param_.number_of_elements_), "cudaMalloc");
+      CHECK_CUDA_ERROR(micm::cuda::MallocVector<T>(this->param_, this->param_.number_of_elements_), "cudaMalloc");
     }
 
-    CudaDenseMatrix(const std::vector<std::vector<T>> other)
-        : VectorMatrix<T, L>(other)
-    {
-    }
-
-    CudaDenseMatrix(const CudaDenseMatrix& other) requires(std::is_same_v<T, double>)
+    CudaDenseMatrix(const CudaDenseMatrix& other)
         : VectorMatrix<T, L>(other)
     {
       this->param_ = other.param_;
       this->param_.d_data_ = nullptr;
       this->param_.number_of_elements_ = other.param_.number_of_elements_;
       this->param_.number_of_grid_cells_ = other.param_.number_of_grid_cells_;
-      CHECK_CUDA_ERROR(micm::cuda::MallocVector(this->param_, this->param_.number_of_elements_), "cudaMalloc");
-      CHECK_CUDA_ERROR(micm::cuda::CopyToDeviceFromDevice(this->param_, other.param_), "cudaMemcpyDeviceToDevice");
-    }
-
-    CudaDenseMatrix(const CudaDenseMatrix& other)
-        : VectorMatrix<T, L>(other)
-    {
+      CHECK_CUDA_ERROR(micm::cuda::MallocVector<T>(this->param_, this->param_.number_of_elements_), "cudaMalloc");
+      CHECK_CUDA_ERROR(micm::cuda::CopyToDeviceFromDevice<T>(this->param_, other.param_), "cudaMemcpyDeviceToDevice");
     }
 
     CudaDenseMatrix(CudaDenseMatrix&& other) noexcept
@@ -156,8 +133,8 @@ namespace micm
       if (this->param_.d_data_ != nullptr)
         CHECK_CUDA_ERROR(micm::cuda::FreeVector(this->param_), "cudaFree");
       this->param_ = other.param_;
-      CHECK_CUDA_ERROR(micm::cuda::MallocVector(this->param_, this->param_.number_of_elements_), "cudaMalloc");
-      CHECK_CUDA_ERROR(micm::cuda::CopyToDeviceFromDevice(this->param_, other.param_), "cudaMemcpyDeviceToDevice");
+      CHECK_CUDA_ERROR(micm::cuda::MallocVector<T>(this->param_, this->param_.number_of_elements_), "cudaMalloc");
+      CHECK_CUDA_ERROR(micm::cuda::CopyToDeviceFromDevice<T>(this->param_, other.param_), "cudaMemcpyDeviceToDevice");
       return *this;
     }
 
@@ -183,14 +160,12 @@ namespace micm
 
     void CopyToDevice()
     {
-      static_assert(std::is_same_v<T, double>);
-      CHECK_CUDA_ERROR(micm::cuda::CopyToDevice(this->param_, this->data_), "cudaMemcpyHostToDevice");
+      CHECK_CUDA_ERROR(micm::cuda::CopyToDevice<T>(this->param_, this->data_), "cudaMemcpyHostToDevice");
     }
 
     void CopyToHost()
     {
-      static_assert(std::is_same_v<T, double>);
-      CHECK_CUDA_ERROR(micm::cuda::CopyToHost(this->param_, this->data_), "cudaMemcpyDeviceToHost");
+      CHECK_CUDA_ERROR(micm::cuda::CopyToHost<T>(this->param_, this->data_), "cudaMemcpyDeviceToHost");
     }
 
     CudaMatrixParam AsDeviceParam() const
@@ -227,7 +202,7 @@ namespace micm
       {
         throw std::runtime_error("Both CUDA dense matrices must have the same size.");
       }
-      CHECK_CUDA_ERROR(micm::cuda::CopyToDeviceFromDevice(this->param_, other.param_), "cudaMemcpyDeviceToDevice");
+      CHECK_CUDA_ERROR(micm::cuda::CopyToDeviceFromDevice<T>(this->param_, other.param_), "cudaMemcpyDeviceToDevice");
     }
 
     /// @brief Set every matrix element to a given value on the GPU
@@ -239,7 +214,7 @@ namespace micm
         // the cudaMemset function only works for integer types and is an asynchronous function:
         // https://docs.nvidia.com/cuda/cuda-runtime-api/group__CUDART__MEMORY.html#group__CUDART__MEMORY_1gf7338650f7683c51ee26aadc6973c63a
         CHECK_CUDA_ERROR(
-            cudaMemset(this->param_.d_data_, val, sizeof(double) * this->param_.number_of_elements_), "cudaMemset");
+            cudaMemset(this->param_.d_data_, val, sizeof(T) * this->param_.number_of_elements_), "cudaMemset");
       }
       else
       {
diff --git a/include/micm/cuda/util/cuda_matrix.cuh b/include/micm/cuda/util/cuda_matrix.cuh
index f68efef64..61bfff6e5 100644
--- a/include/micm/cuda/util/cuda_matrix.cuh
+++ b/include/micm/cuda/util/cuda_matrix.cuh
@@ -16,6 +16,7 @@ namespace micm
     /// @param vectorMatrix Reference to struct containing information about allocated memory
     /// @param num_elements Requested number of elements to allocate
     /// @returns Error code from allocating data on the device, if any
+    template<typename T>
     cudaError_t MallocVector(CudaMatrixParam& vectorMatrix, std::size_t num_elements);
 
     /// @brief Free memory allocated on device
@@ -27,18 +28,21 @@ namespace micm
     /// @param vectorMatrix Struct containing allocated device memory
     /// @param h_data Host data to copy from
     /// @returns Error code from copying to device from the host, if any
-    cudaError_t CopyToDevice(CudaMatrixParam& vectorMatrix, std::vector<double>& h_data);
+    template<typename T>
+    cudaError_t CopyToDevice(CudaMatrixParam& vectorMatrix, std::vector<T>& h_data);
 
     /// @brief Copies data from the device to the host
     /// @param vectorMatrix Struct containing allocated device memory
     /// @param h_data Host data to copy data to
     /// @returns Error code from copying from the device to the host, if any
-    cudaError_t CopyToHost(CudaMatrixParam& vectorMatrix, std::vector<double>& h_data);
+    template<typename T>
+    cudaError_t CopyToHost(CudaMatrixParam& vectorMatrix, std::vector<T>& h_data);
 
     /// @brief Copies data to the destination device memory block from the source device memory block
     /// @param vectorMatrixDest Struct containing allocated destination device memory to copy to
     /// @param vectorMatrixSrc Struct containing allocated source device memory to copy from
     /// @returns Error code from copying to destination device memory from source device memory, if any
+    template<typename T>
     cudaError_t CopyToDeviceFromDevice(CudaMatrixParam& vectorMatrixDest, const CudaMatrixParam& vectorMatrixSrc);
 
     /// @brief Fills a CUDA matrix with a specified value
diff --git a/include/micm/cuda/util/cuda_sparse_matrix.hpp b/include/micm/cuda/util/cuda_sparse_matrix.hpp
index 25a92d4b9..25eb07cfd 100644
--- a/include/micm/cuda/util/cuda_sparse_matrix.hpp
+++ b/include/micm/cuda/util/cuda_sparse_matrix.hpp
@@ -32,47 +32,28 @@ namespace micm
       this->param_.d_data_ = nullptr;
     }
 
-    CudaSparseMatrix(const SparseMatrixBuilder<T, OrderingPolicy>& builder) requires(std::is_same_v<T, double>)
-        : SparseMatrix<T, OrderingPolicy>(builder)
-    {
-      this->param_.number_of_grid_cells_ = this->number_of_blocks_;
-      CHECK_CUDA_ERROR(micm::cuda::MallocVector(this->param_, this->data_.size()), "cudaMalloc");
-    }
     CudaSparseMatrix(const SparseMatrixBuilder<T, OrderingPolicy>& builder)
         : SparseMatrix<T, OrderingPolicy>(builder)
     {
-      this->param_.d_data_ = nullptr;
-    }
-
-    CudaSparseMatrix<T, OrderingPolicy>& operator=(const SparseMatrixBuilder<T, OrderingPolicy>& builder) requires(
-        std::is_same_v<T, double>)
-    {
-      SparseMatrix<T, OrderingPolicy>::operator=(builder);
       this->param_.number_of_grid_cells_ = this->number_of_blocks_;
-      CHECK_CUDA_ERROR(micm::cuda::MallocVector(this->param_, this->data_.size()), "cudaMalloc");
-      return *this;
+      CHECK_CUDA_ERROR(micm::cuda::MallocVector<T>(this->param_, this->data_.size()), "cudaMalloc");
     }
 
     CudaSparseMatrix<T, OrderingPolicy>& operator=(const SparseMatrixBuilder<T, OrderingPolicy>& builder)
     {
       SparseMatrix<T, OrderingPolicy>::operator=(builder);
-      this->param_.d_data_ = nullptr;
+      this->param_.number_of_grid_cells_ = this->number_of_blocks_;
+      CHECK_CUDA_ERROR(micm::cuda::MallocVector<T>(this->param_, this->data_.size()), "cudaMalloc");
       return *this;
     }
 
-    CudaSparseMatrix(const CudaSparseMatrix& other) requires(std::is_same_v<T, double>)
-        : SparseMatrix<T, OrderingPolicy>(other)
-    {
-      this->param_ = other.param_;
-      this->param_.d_data_ = nullptr;
-      CHECK_CUDA_ERROR(micm::cuda::MallocVector(this->param_, this->data_.size()), "cudaMalloc");
-      CHECK_CUDA_ERROR(micm::cuda::CopyToDeviceFromDevice(this->param_, other.param_), "cudaMemcpyDeviceToDevice");
-    }
-
     CudaSparseMatrix(const CudaSparseMatrix& other)
         : SparseMatrix<T, OrderingPolicy>(other)
     {
+      this->param_ = other.param_;
       this->param_.d_data_ = nullptr;
+      CHECK_CUDA_ERROR(micm::cuda::MallocVector<T>(this->param_, this->data_.size()), "cudaMalloc");
+      CHECK_CUDA_ERROR(micm::cuda::CopyToDeviceFromDevice<T>(this->param_, other.param_), "cudaMemcpyDeviceToDevice");
     }
 
     CudaSparseMatrix(CudaSparseMatrix&& other) noexcept
@@ -87,8 +68,8 @@ namespace micm
       SparseMatrix<T, OrderingPolicy>::operator=(other);
       this->param_ = other.param_;
       this->param_.d_data_ = nullptr;
-      CHECK_CUDA_ERROR(micm::cuda::MallocVector(this->param_, this->data_.size()), "cudaMalloc");
-      CHECK_CUDA_ERROR(micm::cuda::CopyToDeviceFromDevice(this->param_, other.param_), "cudaMemcpyDeviceToDevice");
+      CHECK_CUDA_ERROR(micm::cuda::MallocVector<T>(this->param_, this->data_.size()), "cudaMalloc");
+      CHECK_CUDA_ERROR(micm::cuda::CopyToDeviceFromDevice<T>(this->param_, other.param_), "cudaMemcpyDeviceToDevice");
       return *this;
     }
 
@@ -102,27 +83,20 @@ namespace micm
       return *this;
     }
 
-    ~CudaSparseMatrix() requires(std::is_same_v<T, double>)
-    {
-      CHECK_CUDA_ERROR(micm::cuda::FreeVector(this->param_), "cudaFree");
-      this->param_.d_data_ = nullptr;
-    }
-
     ~CudaSparseMatrix()
     {
+      CHECK_CUDA_ERROR(micm::cuda::FreeVector(this->param_), "cudaFree");
       this->param_.d_data_ = nullptr;
     }
 
     void CopyToDevice()
     {
-      static_assert(std::is_same_v<T, double>);
-      CHECK_CUDA_ERROR(micm::cuda::CopyToDevice(this->param_, this->data_), "cudaMemcpyHostToDevice");
+      CHECK_CUDA_ERROR(micm::cuda::CopyToDevice<T>(this->param_, this->data_), "cudaMemcpyHostToDevice");
     }
 
     void CopyToHost()
     {
-      static_assert(std::is_same_v<T, double>);
-      CHECK_CUDA_ERROR(micm::cuda::CopyToHost(this->param_, this->data_), "cudaMemcpyDeviceToHost");
+      CHECK_CUDA_ERROR(micm::cuda::CopyToHost<T>(this->param_, this->data_), "cudaMemcpyDeviceToHost");
     }
 
     /// @brief Set every matrix element to a given value on the GPU
@@ -134,7 +108,7 @@ namespace micm
         // the cudaMemset function only works for integer types and is an asynchronous function:
         // https://docs.nvidia.com/cuda/cuda-runtime-api/group__CUDART__MEMORY.html#group__CUDART__MEMORY_1gf7338650f7683c51ee26aadc6973c63a
         CHECK_CUDA_ERROR(
-            cudaMemset(this->param_.d_data_, val, sizeof(double) * this->param_.number_of_elements_), "cudaMemset");
+            cudaMemset(this->param_.d_data_, val, sizeof(T) * this->param_.number_of_elements_), "cudaMemset");
       }
       else
       {
diff --git a/src/util/cuda_matrix.cu b/src/util/cuda_matrix.cu
index cc0e373a0..0f79bd133 100644
--- a/src/util/cuda_matrix.cu
+++ b/src/util/cuda_matrix.cu
@@ -12,10 +12,11 @@ namespace micm
 {
   namespace cuda
   {
+    template<typename T>
     cudaError_t MallocVector(CudaMatrixParam& param, std::size_t number_of_elements)
     {
       param.number_of_elements_ = number_of_elements;
-      cudaError_t err = cudaMalloc(&(param.d_data_), sizeof(double) * number_of_elements);
+      cudaError_t err = cudaMalloc(&(param.d_data_), sizeof(T) * number_of_elements);
       return err;
     }
 
@@ -32,27 +33,30 @@ namespace micm
       return err;
     }
 
-    cudaError_t CopyToDevice(CudaMatrixParam& param, std::vector<double>& h_data)
+    template<typename T>
+    cudaError_t CopyToDevice(CudaMatrixParam& param, std::vector<T>& h_data)
     {
       cudaError_t err =
-          cudaMemcpy(param.d_data_, h_data.data(), sizeof(double) * param.number_of_elements_, cudaMemcpyHostToDevice);
+          cudaMemcpy(param.d_data_, h_data.data(), sizeof(T) * param.number_of_elements_, cudaMemcpyHostToDevice);
       return err;
     }
 
-    cudaError_t CopyToHost(CudaMatrixParam& param, std::vector<double>& h_data)
+    template<typename T>
+    cudaError_t CopyToHost(CudaMatrixParam& param, std::vector<T>& h_data)
     {
       cudaDeviceSynchronize();
       cudaError_t err =
-          cudaMemcpy(h_data.data(), param.d_data_, sizeof(double) * param.number_of_elements_, cudaMemcpyDeviceToHost);
+          cudaMemcpy(h_data.data(), param.d_data_, sizeof(T) * param.number_of_elements_, cudaMemcpyDeviceToHost);
       return err;
     }
 
+    template<typename T>
     cudaError_t CopyToDeviceFromDevice(CudaMatrixParam& vectorMatrixDest, const CudaMatrixParam& vectorMatrixSrc)
     {
       cudaError_t err = cudaMemcpy(
           vectorMatrixDest.d_data_,
           vectorMatrixSrc.d_data_,
-          sizeof(double) * vectorMatrixSrc.number_of_elements_,
+          sizeof(T) * vectorMatrixSrc.number_of_elements_,
           cudaMemcpyDeviceToDevice);
       return err;
     }
@@ -77,6 +81,12 @@ namespace micm
     }
 
     // source code needs the instantiation of the template
+    template cudaError_t MallocVector<double>(CudaMatrixParam& param, std::size_t number_of_elements);
+    template cudaError_t MallocVector<int>(CudaMatrixParam& param, std::size_t number_of_elements);
+    template cudaError_t CopyToDevice<double>(CudaMatrixParam& param, std::vector<double>& h_data);
+    template cudaError_t CopyToHost<double>(CudaMatrixParam& param, std::vector<double>& h_data);
+    template cudaError_t CopyToDeviceFromDevice<double>(CudaMatrixParam& vectorMatrixDest, const CudaMatrixParam& vectorMatrixSrc);
+    template cudaError_t CopyToDeviceFromDevice<int>(CudaMatrixParam& vectorMatrixDest, const CudaMatrixParam& vectorMatrixSrc);
     template cudaError_t FillCudaMatrix<double>(CudaMatrixParam& param, double val);
   }  // namespace cuda
 }  // namespace micm
diff --git a/test/unit/cuda/util/test_cuda_dense_matrix.cpp b/test/unit/cuda/util/test_cuda_dense_matrix.cpp
index b68ebd437..86d5c538c 100644
--- a/test/unit/cuda/util/test_cuda_dense_matrix.cpp
+++ b/test/unit/cuda/util/test_cuda_dense_matrix.cpp
@@ -24,10 +24,10 @@ TEST(CudaDenseMatrix, DeviceMemCopy)
   std::size_t num_elements = h_vector.size();
   CudaMatrixParam param;
 
-  micm::cuda::MallocVector(param, num_elements);
-  micm::cuda::CopyToDevice(param, h_vector);
+  micm::cuda::MallocVector<double>(param, num_elements);
+  micm::cuda::CopyToDevice<double>(param, h_vector);
   micm::cuda::SquareDriver(param);
-  micm::cuda::CopyToHost(param, h_vector);
+  micm::cuda::CopyToHost<double>(param, h_vector);
 
   EXPECT_EQ(h_vector[0], 1 * 1);
   EXPECT_EQ(h_vector[1], 2 * 2);