Skip to content

Commit

Permalink
Merge pull request #8 from 3MAH/openMP
Browse files Browse the repository at this point in the history
Open mp support for launch_umat (simcoon mat for fedoo)
  • Loading branch information
chemiskyy authored Jun 13, 2024
2 parents 654775d + fd17080 commit edbd126
Show file tree
Hide file tree
Showing 4 changed files with 141 additions and 74 deletions.
35 changes: 7 additions & 28 deletions simcoon-python-builder/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -43,29 +43,6 @@ set(LIBRARY_OUTPUT_PATH lib/${CMAKE_BUILD_TYPE})

find_package(Python3 COMPONENTS Interpreter Development NumPy)

if (Python3_FOUND)
if(MSVC)
set(CMAKE_FIND_FRAMEWORK NEVER)
FIND_PACKAGE(Boost REQUIRED)
#FIND_PACKAGE(PythonInterp 3)
#FIND_PACKAGE(PythonLibs 3 REQUIRED)
#FIND_PACKAGE(NumPy REQUIRED)
elseif (UNIX AND NOT APPLE)
FIND_PACKAGE(Boost REQUIRED)
FIND_PACKAGE(PythonInterp 3)
FIND_PACKAGE(PythonLibs 3 REQUIRED)
FIND_PACKAGE(NumPy REQUIRED)
else()
set(CMAKE_FIND_FRAMEWORK NEVER)
FIND_PACKAGE(Boost REQUIRED)
# FIND_PACKAGE(PythonInterp 3)
# FIND_PACKAGE(PythonLibs 3 REQUIRED)
# FIND_PACKAGE(NumPy REQUIRED)
endif()
else()
message("Python not found")
endif()

#Inclusion of Armadillo
if (MSVC)
find_package(pybind11 REQUIRED)
Expand All @@ -79,6 +56,7 @@ else()
find_package(carma CONFIG REQUIRED)
endif()

find_package(OpenMP)
find_package(simcoon REQUIRED)

#${PYTHON_VERSION_MAJOR}${PYTHON_VERSION_MINOR}
Expand Down Expand Up @@ -114,7 +92,7 @@ endif()

if (MSVC)
set(CMAKE_WINDOWS_EXPORT_ALL_SYMBOLS ON)
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /std:c++latest /Drestrict= /Y-") #\Y- to disable precompile header (don't work for an unknown reason)
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /std:c++latest /Drestrict= /openmp:llvm /Y-") #\Y- to disable precompile header (don't work for an unknown reason)
#set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /std:c++latest /Drestrict=")
set(CMAKE_SHARED_LINKER_FLAGS "${CMAKE_SHARED_LINKER_FLAGS}") #do nothing but kept anyway if required to add some options
set(CMAKE_MODULE_LINKER_FLAGS "${CMAKE_MODULE_LINKER_FLAGS}") #do nothing but kept anyway if required to add some options
Expand Down Expand Up @@ -145,10 +123,6 @@ else()
endif()
endif()

if(OPENMP_FOUND)
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${OpenMP_CXX_FLAGS}")
endif()

#Inclusion of public headers
include_directories(include)

Expand Down Expand Up @@ -184,6 +158,11 @@ add_test(NAME simmit_test WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}/test/sim
#Add the files to the lib
pybind11_add_module(simmit ${source_files_simmit})
#Wrapper library set_target properties

if(OpenMP_CXX_FOUND)
target_link_libraries(simmit PUBLIC OpenMP::OpenMP_CXX)
endif()

if (MSVC)
set_target_properties(simmit PROPERTIES PREFIX "" SUFFIX ".pyd")
else()
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,10 @@
#include <carma>
#include <armadillo>

#ifdef _OPENMP
#include <omp.h>
#endif

#include <simcoon/Simulation/Maths/rotation.hpp>
#include <simcoon/Continuum_mechanics/Functions/objective_rates.hpp>
#include <simcoon/python_wrappers/Libraries/Continuum_mechanics/objective_rates.hpp>
Expand Down Expand Up @@ -75,8 +79,8 @@ py::tuple objective_rate(const std::string& corate_name, const py::array_t<doubl
throw std::invalid_argument("the number of dim of F1 should be the same as F0");
}

mat F0_cpp = carma::arr_to_mat(F0);
mat F1_cpp = carma::arr_to_mat(F1);
mat F0_cpp = carma::arr_to_mat_view(F0);
mat F1_cpp = carma::arr_to_mat_view(F1);
mat DR = zeros(3,3);
mat D = zeros(3,3);
mat Omega = zeros(3,3);
Expand Down Expand Up @@ -129,7 +133,11 @@ py::tuple objective_rate(const std::string& corate_name, const py::array_t<doubl
cube D(3,3,nb_points);
cube N_1(3,3,nb_points);
cube N_2(3,3,nb_points);
cube Omega = zeros(3,3, nb_points);
cube Omega = zeros(3,3, nb_points);
mat de;
if(return_de) de.set_size(6,nb_points);
mat DR_N = zeros(3,3);
mat I = eye(3,3);

if (F0.ndim() == 2) {
mat vec_F0 = carma::arr_to_mat_view(F0);
Expand All @@ -138,10 +146,20 @@ py::tuple objective_rate(const std::string& corate_name, const py::array_t<doubl
switch (corate) {
case 0: case 1: case 2: {
corate_function(DR.slice(pt), D.slice(pt), Omega.slice(pt), DTime, vec_F0, F1_cpp.slice(pt));
if (return_de) {
vec de_col = de.unsafe_col(pt);
de_col = (0.5*DTime) * simcoon::t2v_strain(D.slice(pt)+(DR.slice(pt)*D.slice(pt)*DR.slice(pt).t()));
}
break;
}
case 3: {
corate_function_2(DR.slice(pt), N_1.slice(pt), N_2.slice(pt), D.slice(pt), Omega.slice(pt), DTime, vec_F0, F1_cpp.slice(pt));
if (return_de) {
vec de_col = de.unsafe_col(pt);
DR_N = (inv(I-0.5*DTime*(N_1.slice(pt)-N_2.slice(pt))))*(I+0.5*DTime*(N_1.slice(pt)-N_2.slice(pt)));
de_col = (0.5*DTime) * simcoon::t2v_strain(D.slice(pt)+(DR.slice(pt)*D.slice(pt)*DR.slice(pt).t()));
de_col = simcoon::rotate_strain(de_col, DR_N);
}
break;
}
}
Expand All @@ -151,57 +169,79 @@ py::tuple objective_rate(const std::string& corate_name, const py::array_t<doubl
cube F0_cpp = carma::arr_to_cube_view(F0);
if (F0_cpp.n_slices==1) {
mat vec_F0 = F0_cpp.slice(0);

int max_threads = omp_get_max_threads();
omp_set_num_threads(4);
py::gil_scoped_release release;

#ifdef _OPENMP
omp_set_max_active_levels(3);
#pragma omp parallel for shared(DR, D, Omega, F1_cpp)
#endif
for (int pt = 0; pt < nb_points; pt++) {

switch (corate) {
case 0: case 1: case 2: {
corate_function(DR.slice(pt), D.slice(pt), Omega.slice(pt), DTime, vec_F0, F1_cpp.slice(pt));
if (return_de) {
vec de_col = de.unsafe_col(pt);
de_col = (0.5*DTime) * simcoon::t2v_strain(D.slice(pt)+(DR.slice(pt)*D.slice(pt)*DR.slice(pt).t()));
}
break;
}
case 3: {
corate_function_2(DR.slice(pt), N_1.slice(pt), N_2.slice(pt), D.slice(pt), Omega.slice(pt), DTime, vec_F0, F1_cpp.slice(pt));
if (return_de) {
vec de_col = de.unsafe_col(pt);
DR_N = (inv(I-0.5*DTime*(N_1.slice(pt)-N_2.slice(pt))))*(I+0.5*DTime*(N_1.slice(pt)-N_2.slice(pt)));
de_col = (0.5*DTime) * simcoon::t2v_strain(D.slice(pt)+(DR.slice(pt)*D.slice(pt)*DR.slice(pt).t()));
de_col = simcoon::rotate_strain(de_col, DR_N);
}
break;
}
}
}
py::gil_scoped_acquire acquire;
omp_set_num_threads(max_threads);
}
else {

int max_threads = omp_get_max_threads();
omp_set_num_threads(4);
py::gil_scoped_release release;

#ifdef _OPENMP
omp_set_max_active_levels(3);
#pragma omp parallel for shared(DR, D, Omega, F0_cpp, F1_cpp)
#endif
for (int pt = 0; pt < nb_points; pt++) {

switch (corate) {
case 0: case 1: case 2: {
corate_function(DR.slice(pt), D.slice(pt), Omega.slice(pt), DTime, F0_cpp.slice(pt), F1_cpp.slice(pt));
if (return_de) {
vec de_col = de.unsafe_col(pt);
de_col = (0.5*DTime) * simcoon::t2v_strain(D.slice(pt)+(DR.slice(pt)*D.slice(pt)*DR.slice(pt).t()));
}
break;
}
case 3: {
corate_function_2(DR.slice(pt), N_1.slice(pt), N_2.slice(pt), D.slice(pt), Omega.slice(pt), DTime, F0_cpp.slice(pt), F1_cpp.slice(pt));
if (return_de) {
vec de_col = de.unsafe_col(pt);
DR_N = (inv(I-0.5*DTime*(N_1.slice(pt)-N_2.slice(pt))))*(I+0.5*DTime*(N_1.slice(pt)-N_2.slice(pt)));
de_col = (0.5*DTime) * simcoon::t2v_strain(D.slice(pt)+(DR.slice(pt)*D.slice(pt)*DR.slice(pt).t()));
de_col = simcoon::rotate_strain(de.col(pt), DR_N);
}
break;
}
}
}
py::gil_scoped_acquire acquire;
omp_set_num_threads(max_threads);
}
}
if (return_de){
//also return the strain increment
mat de(6,nb_points);
for (int pt = 0; pt < nb_points; pt++) {

switch (corate) {
case 0: case 1: case 2: {
//could use simcoon::Delta_log_strain(D, Omega, DTime) but it would recompute DR (waste of time).
//vec de = simcoon::t2v_strain(simcoon::Delta_log_strain(D, Omega, DTime));
de.col(pt) = (0.5*DTime) * simcoon::t2v_strain(D.slice(pt)+(DR.slice(pt)*D.slice(pt)*DR.slice(pt).t()));
break;
}
case 3: {
mat I = eye(3,3);
mat DR_N = (inv(I-0.5*DTime*(N_1.slice(pt)-N_2.slice(pt))))*(I+0.5*DTime*(N_1.slice(pt)-N_2.slice(pt)));
de.col(pt) = (0.5*DTime) * simcoon::t2v_strain(D.slice(pt)+(DR.slice(pt)*D.slice(pt)*DR.slice(pt).t()));
de.col(pt) = simcoon::rotate_strain(de.col(pt), DR_N);
break;
}
}
}
if (return_de){
return py::make_tuple(carma::mat_to_arr(de, false), carma::cube_to_arr(D, false), carma::cube_to_arr(DR, false), carma::cube_to_arr(Omega, false));
}
else{
Expand Down Expand Up @@ -250,11 +290,21 @@ py::array_t<double> Lt_convert(const py::array_t<double> &Lt, const py::array_t<
throw std::invalid_argument("the number of dim of Lt, F and stress are not consistent");
}

mat F_cpp = carma::arr_to_mat(F);
mat Lt_cpp = carma::arr_to_mat(Lt);
vec stress_cpp = carma::arr_to_col(stress);
mat F_cpp = carma::arr_to_mat_view(F);
mat Lt_cpp = carma::arr_to_mat_view(Lt);
vec stress_cpp = carma::arr_to_col_view(stress);
mat Lt_converted(6,6);

mat Lt_converted = convert_function(Lt_cpp, F_cpp, stress_cpp);
switch (select) {
case 0: case 1: case 2: {
Lt_converted = convert_function(Lt_cpp, F_cpp, stress_cpp);
break;
}
case 3: {
Lt_converted = convert_function2(F_cpp, det(F_cpp));
break;
}
}
return carma::mat_to_arr(Lt_converted,false);
}
else if (Lt.ndim() == 3) {
Expand All @@ -266,6 +316,15 @@ py::array_t<double> Lt_convert(const py::array_t<double> &Lt, const py::array_t<

mat stress_pt;

int max_threads = omp_get_max_threads();
omp_set_num_threads(4);
py::gil_scoped_release release;

#ifdef _OPENMP
omp_set_max_active_levels(3);
#pragma omp parallel for shared(Lt_converted)
#endif

for (int pt = 0; pt < nb_points; pt++) {
//vec stress_pt = stress_cpp.unsafe_col(pt);
stress_pt = simcoon::v2t_stress(stress_cpp.col(pt));
Expand All @@ -280,6 +339,8 @@ py::array_t<double> Lt_convert(const py::array_t<double> &Lt, const py::array_t<
}
}
}
py::gil_scoped_acquire acquire;
omp_set_num_threads(max_threads);
return carma::cube_to_arr(Lt_converted,false);
}
}
Expand Down
Original file line number Diff line number Diff line change
@@ -1,8 +1,13 @@
#include <pybind11/embed.h>
#include <pybind11/pybind11.h>
#include <pybind11/numpy.h>

#include <carma>
#include <armadillo>
#include <carma>

#ifdef _OPENMP
#include <omp.h>
#endif

#include <simcoon/python_wrappers/Libraries/Continuum_mechanics/umat.hpp>

Expand Down Expand Up @@ -52,11 +57,12 @@ namespace simpy {

py::tuple launch_umat(const std::string& umat_name_py, const py::array_t<double> &etot_py, const py::array_t<double> &Detot_py, const py::array_t<double> &sigma_py, const py::array_t<double> &DR_py, const py::array_t<double> &props_py, const py::array_t<double> &statev_py, const float Time, const float DTime, const py::array_t<double> &Wm_py, const std::optional<py::array_t<double>> &T_py){
//Get the id of umat

std::map<string, int> list_umat;
list_umat = { {"UMEXT",0},{"UMABA",1},{"ELISO",2},{"ELIST",3},{"ELORT",4},{"EPICP",5},{"EPKCP",6},{"EPCHA",7},{"SMAUT",8},{"LLDM0",9},{"ZENER",10},{"ZENNK",11},{"PRONK",12},{"EPHIC",17},{"EPHIN",18},{"SMAMO",19},{"SMAMC",20},{"MIHEN",100},{"MIMTN",101},{"MISCN",103},{"MIPLN",104} };
int id_umat = list_umat[umat_name_py];
int arguments_type; //depends on the argument used in the umat

void (*umat_function)(const arma::vec &, const arma::vec &, arma::vec &, arma::mat &, arma::mat &, arma::vec &, const arma::mat &, const int &, const arma::vec &, const int &, arma::vec &, const double &, const double &,const double &,const double &, double &, double &, double &, double &, const int &, const int &, const bool &, const int &, double &);
void (*umat_function_2)(const arma::vec &, const arma::vec &, arma::vec &, arma::mat &, const arma::mat &, const int &, const arma::vec &, const int &, arma::vec &, const double &, const double &,const double &,const double &, double &, double &, double &, double &, const int &, const int &, const bool &, double &);
void (*umat_function_3)(const arma::vec &, const arma::vec &, arma::vec &, arma::mat &, arma::mat &, const arma::mat &, const int &, const arma::vec &, const int &, arma::vec &, const double &, const double &,const double &,const double &, double &, double &, double &, double &, const int &, const int &, const bool &, double &);
Expand Down Expand Up @@ -220,46 +226,68 @@ namespace simpy {
else {
use_temp = false;
}
mat etot = carma::arr_to_mat_view(etot_py);
int nb_points = etot.n_cols; //number of material points
mat Detot = carma::arr_to_mat_view(Detot_py);
mat list_sigma = carma::arr_to_mat(sigma_py); //copy data because values are changed by the umat and returned to python

mat list_etot = carma::arr_to_mat_view(etot_py);
int nb_points = list_etot.n_cols; //number of material points
mat list_Detot = carma::arr_to_mat_view(Detot_py);
mat list_sigma = carma::arr_to_mat(std::move(sigma_py)); //copy data because values are changed by the umat and returned to python
cube DR = carma::arr_to_cube_view(DR_py);
mat list_props = carma::arr_to_mat_view(props_py);
mat list_statev = carma::arr_to_mat(statev_py); //copy data because values are changed by the umat and returned to python
mat Wm = carma::arr_to_mat(Wm_py); //copy data because values are changed by the umat and returned to python
mat list_statev = carma::arr_to_mat(std::move(statev_py)); //copy data because values are changed by the umat and returned to python
mat list_Wm = carma::arr_to_mat(std::move(Wm_py)); //copy data because values are changed by the umat and returned to python
cube L(ncomp, ncomp, nb_points);
cube Lt(ncomp, ncomp, nb_points);
vec sigma_in = zeros(1); //not used
int nprops = list_props.n_rows;
int nstatev = list_statev.n_rows;

vec props(ncomp);


int max_threads = omp_get_max_threads();
omp_set_num_threads(4);
py::gil_scoped_release release;

#ifdef _OPENMP
omp_set_max_active_levels(3);
#pragma omp parallel for shared(Lt, L, DR) private(props)
#endif
for (int pt = 0; pt < nb_points; pt++) {

//if (use_temp) T = list_T(pt);
if (pt < list_props.n_cols) props = list_props.col(pt); //if list_props has only one element, we keep only this one (assuming homogeneous material)
if (pt < list_props.n_cols) {
props = list_props.col(pt); //if list_props has only one element, we keep only this one (assuming homogeneous material)
}
else {
props = list_props.col(0);
}
vec statev = list_statev.unsafe_col(pt);
vec sigma = list_sigma.unsafe_col(pt);
if (use_temp) T=vec_T(pt);

vec etot = list_etot.unsafe_col(pt);
vec Detot = list_Detot.unsafe_col(pt);
vec Wm = list_Wm.unsafe_col(pt);

if (use_temp) T=vec_T(pt);

switch (arguments_type) {

case 1: {
umat_function(etot.col(pt), Detot.col(pt), sigma, Lt.slice(pt), L.slice(pt), sigma_in, DR.slice(pt), nprops, props, nstatev, statev, T, DT, Time, DTime, Wm(0,pt), Wm(1,pt), Wm(2,pt), Wm(3,pt), ndi, nshr, start, solver_type, tnew_dt);
umat_function(etot, Detot, sigma, Lt.slice(pt), L.slice(pt), sigma_in, DR.slice(pt), nprops, props, nstatev, statev, T, DT, Time, DTime, Wm(0), Wm(1), Wm(2), Wm(3), ndi, nshr, start, solver_type, tnew_dt);
break;
}
case 2: {
umat_function_2(etot.col(pt), Detot.col(pt), sigma, Lt.slice(pt), DR.slice(pt), nprops, props, nstatev, statev, T, DT, Time, DTime, Wm(0,pt), Wm(1,pt), Wm(2,pt), Wm(3,pt), ndi, nshr, start, tnew_dt);
umat_function_2(etot, Detot, sigma, Lt.slice(pt), DR.slice(pt), nprops, props, nstatev, statev, T, DT, Time, DTime, Wm(0), Wm(1), Wm(2), Wm(3), ndi, nshr, start, tnew_dt);
break;
}
case 3: {
umat_function_3(etot.col(pt), Detot.col(pt), sigma, Lt.slice(pt), L.slice(pt), DR.slice(pt), nprops, props, nstatev, statev, T, DT, Time, DTime, Wm(0,pt), Wm(1,pt), Wm(2,pt), Wm(3,pt), ndi, nshr, start, tnew_dt);
umat_function_3(etot, Detot, sigma, Lt.slice(pt), L.slice(pt), DR.slice(pt), nprops, props, nstatev, statev, T, DT, Time, DTime, Wm(0), Wm(1), Wm(2), Wm(3), ndi, nshr, start, tnew_dt);
break;
}
}
}
return py::make_tuple(carma::mat_to_arr(list_sigma, false), carma::mat_to_arr(list_statev, false), carma::mat_to_arr(Wm, false), carma::cube_to_arr(Lt, false));
py::gil_scoped_acquire acquire;
omp_set_num_threads(max_threads);
return py::make_tuple(carma::mat_to_arr(list_sigma, false), carma::mat_to_arr(list_statev, false), carma::mat_to_arr(list_Wm, false), carma::cube_to_arr(Lt, false));
}
}
}
Expand Down
Loading

0 comments on commit edbd126

Please sign in to comment.