Skip to content

Commit

Permalink
Merge pull request #188 from NCAR/AddJacobianTerms_CUDA
Browse files Browse the repository at this point in the history
Add jacobian terms cuda
  • Loading branch information
K20shores authored Aug 21, 2023
2 parents 5155e7e + 430f41a commit d2fc692
Show file tree
Hide file tree
Showing 6 changed files with 340 additions and 104 deletions.
33 changes: 24 additions & 9 deletions include/micm/process/cuda_process_set.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -4,22 +4,37 @@ namespace micm
{
namespace cuda
{
void AddForcingTerms_kernelSetup(
std::chrono::nanoseconds AddForcingTermsKernelDriver(
const double* rate_constants_data,
const double* state_variables_data,
double* forcing_data,
int ngrids,
int nrxns,
int nspecs,
size_t n_grids,
size_t n_reactions,
size_t n_species,
const size_t* number_of_reactants,
int number_of_reactants_size,
const size_t* reactant_ids,
int reactant_ids_size,
size_t reactant_ids_size,
const size_t* number_of_products,
int number_of_products_size,
const size_t* product_ids,
int product_ids_size,
size_t product_ids_size,
const double* yields,
int yields_size);
size_t yields_size);

std::chrono::nanoseconds AddJacobianTermsKernelDriver(
const double* rate_constants,
const double* state_variables,
size_t n_grids,
size_t n_reactions,
size_t n_species,
double* jacobian,
size_t jacobian_size,
const size_t* number_of_reactants,
const size_t* reactant_ids,
size_t reactant_ids_size,
const size_t* number_of_products,
const double* yields,
size_t yields_size,
const size_t* jacobian_flat_ids,
size_t jacobian_flat_ids_size);
} // namespace cuda
} // namespace micm
56 changes: 43 additions & 13 deletions include/micm/process/cuda_process_set.hpp
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@

// Copyright (C) 2023 National Center for Atmospheric Research,
//
// SPDX-License-Identifier: Apache-2.0
Expand All @@ -10,9 +9,9 @@
# include <micm/process/cuda_process_set.cuh>
#endif

#ifdef USE_CUDA
namespace micm
{

/// @brief A GPU-based implementation of ProcessSet
class CudaProcessSet : public ProcessSet
{
Expand All @@ -23,14 +22,19 @@ namespace micm
template<template<class> class MatrixPolicy>
CudaProcessSet(const std::vector<Process>& processes, const State<MatrixPolicy>& state);

#ifdef USE_CUDA
template<template<class> typename MatrixPolicy>
requires VectorizableDense<MatrixPolicy<double>>
void AddForcingTerms(
std::chrono::nanoseconds AddForcingTerms(
const MatrixPolicy<double>& rate_constants,
const MatrixPolicy<double>& state_variables,
MatrixPolicy<double>& forcing) const;
#endif

template<template<class> class MatrixPolicy, template<class> class SparseMatrixPolicy>
requires VectorizableDense<MatrixPolicy<double>>&& VectorizableSparse<SparseMatrixPolicy<double>>
std::chrono::nanoseconds AddJacobianTerms(
const MatrixPolicy<double>& rate_constants,
const MatrixPolicy<double>& state_variables,
SparseMatrixPolicy<double>& jacobian)const;
};

template<template<class> class MatrixPolicy>
Expand All @@ -39,31 +43,57 @@ namespace micm
{
}

#ifdef USE_CUDA

template<template<class> class MatrixPolicy>
requires VectorizableDense<MatrixPolicy<double>>
inline void CudaProcessSet::AddForcingTerms(
inline std::chrono::nanoseconds CudaProcessSet::AddForcingTerms(
const MatrixPolicy<double>& rate_constants,
const MatrixPolicy<double>& state_variables,
MatrixPolicy<double>& forcing) const
{
micm::cuda::AddForcingTerms_kernelSetup(
std::chrono::nanoseconds kernel_duration =
micm::cuda::AddForcingTermsKernelDriver(
rate_constants.AsVector().data(),
state_variables.AsVector().data(),
forcing.AsVector().data(),
rate_constants.size(),
rate_constants[0].size(),
state_variables[0].size(),
number_of_reactants_.data(),
number_of_reactants_.size(),
reactant_ids_.data(),
reactant_ids_.size(),
number_of_products_.data(),
number_of_products_.size(),
number_of_products_.data(),
product_ids_.data(),
product_ids_.size(),
yields_.data(),
yields_.size());
return kernel_duration; //time performance of kernel function
}
#endif
} // namespace micm
template<template<class> class MatrixPolicy, template<class> class SparseMatrixPolicy>
requires VectorizableDense<MatrixPolicy<double>>&& VectorizableSparse<SparseMatrixPolicy<double>>
inline std::chrono::nanoseconds CudaProcessSet::AddJacobianTerms(
const MatrixPolicy<double>& rate_constants,
const MatrixPolicy<double>& state_variables,
SparseMatrixPolicy<double>& jacobian)const
{
std::chrono::nanoseconds kernel_duration =
micm::cuda::AddJacobianTermsKernelDriver(
rate_constants.AsVector().data(),
state_variables.AsVector().data(),
rate_constants.size(), //n_grids
rate_constants[0].size(), //n_reactions
state_variables[0].size(), //n_species
jacobian.AsVector().data(),
jacobian.AsVector().size(),
number_of_reactants_.data(),
reactant_ids_.data(),
reactant_ids_.size(),
number_of_products_.data(),
yields_.data(),
yields_.size(),
jacobian_flat_ids_.data(),
jacobian_flat_ids_.size());
return kernel_duration; //time performance of kernel function
}
} // namespace micm
#endif
12 changes: 4 additions & 8 deletions include/micm/process/process_set.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ namespace micm
std::vector<std::size_t> product_ids_;
std::vector<double> yields_;
std::vector<std::size_t> jacobian_flat_ids_;

public:
/// @brief Default constructor
ProcessSet() = default;
Expand Down Expand Up @@ -231,16 +231,14 @@ namespace micm
const MatrixPolicy<double>& state_variables,
SparseMatrixPolicy<double>& jacobian) const
{
// cell_jacobian is an iterator -> update after each row
auto cell_jacobian = jacobian.AsVector().begin();

// loop over grid cells
for (std::size_t i_cell = 0; i_cell < state_variables.size(); ++i_cell)
{
auto cell_rate_constants = rate_constants[i_cell]; // rate of every reaction in a grid
auto cell_state = state_variables[i_cell]; // state of every specie in a grid
auto cell_rate_constants = rate_constants[i_cell];
auto cell_state = state_variables[i_cell];

// every grid starts with every react_id, yield and flat_id
auto react_id = reactant_ids_.begin();
auto yield = yields_.begin();
auto flat_id = jacobian_flat_ids_.begin();
Expand All @@ -260,10 +258,8 @@ namespace micm
d_rate_d_ind *= cell_state[react_id[i_react]];
}
for (std::size_t i_dep = 0; i_dep < number_of_reactants_[i_rxn]; ++i_dep)

cell_jacobian[*(flat_id++)] -= d_rate_d_ind;
for (std::size_t i_dep = 0; i_dep < number_of_products_[i_rxn]; ++i_dep)
// flat_id (iterator) is not reset from previous loop
cell_jacobian[*(flat_id++)] += yield[i_dep] * d_rate_d_ind;
}
react_id += number_of_reactants_[i_rxn];
Expand Down Expand Up @@ -294,6 +290,7 @@ namespace micm
std::size_t offset_rc = i_group * rate_constants.GroupSize();
std::size_t offset_state = i_group * state_variables.GroupSize();
std::size_t offset_jacobian = i_group * jacobian.GroupSize(jacobian.FlatBlockSize());

auto flat_id = jacobian_flat_ids_.begin();
for (std::size_t i_rxn = 0; i_rxn < number_of_reactants_.size(); ++i_rxn)
{
Expand Down Expand Up @@ -327,5 +324,4 @@ namespace micm
}
}
}

} // namespace micm
2 changes: 1 addition & 1 deletion src/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -71,7 +71,7 @@ if(ENABLE_CUDA)

add_library(micm_cuda)
add_library(musica::micm_cuda ALIAS micm_cuda)

set(CMAKE_CUDA_ARCHITECTURES 80)
target_link_libraries(micm_cuda
PRIVATE micm
)
Expand Down
Loading

0 comments on commit d2fc692

Please sign in to comment.