From 0574de57a511b4521b2826407007a6b92ba33a6e Mon Sep 17 00:00:00 2001 From: Haavard Rue Date: Fri, 27 Sep 2024 17:37:17 +0300 Subject: [PATCH] Use ARMPL sparse-vector-vector inner product --- gmrflib/dot.c | 12 +++++++++++- gmrflib/dot.h | 7 +++++++ gmrflib/idxval.c | 24 ++++++++++++++++++++++-- gmrflib/idxval.h | 10 ++++++++++ inlaprog/src/inla-parse.c | 25 +++++++++++++++++++++++++ inlaprog/src/inla.c | 6 +++--- 6 files changed, 78 insertions(+), 6 deletions(-) diff --git a/gmrflib/dot.c b/gmrflib/dot.c index 0aeb97201..10e7d0234 100644 --- a/gmrflib/dot.c +++ b/gmrflib/dot.c @@ -35,7 +35,7 @@ double GMRFLib_dot_product(GMRFLib_idxval_tp *__restrict ELM_, double *__restrict ARR_) { if (ELM_->dot_product_func) { -#if !defined(INLA_WITH_MKL) +#if !defined(INLA_WITH_MKL) && !defined(INLA_WITH_ARMPL) if (GMRFLib_dot_product_gain >= 0.0) { _Pragma("omp atomic") GMRFLib_dot_product_gain += ELM_->cpu_gain; @@ -205,3 +205,13 @@ double GMRFLib_ddot_idx_mkl(int n, double *__restrict v, double *__restrict a, i } #endif /* if defined(INLA_WITH_MKL) */ + +#if defined(INLA_WITH_ARMPL) +double GMRFLib_dot_product_serial_armpl(GMRFLib_idxval_tp *__restrict ELM_, double *__restrict ARR_) +{ + double res = 0.0; + armpl_status_t info = armpl_spdot_exec_d(ELM_->spvec, ARR_, &res); + assert(info == ARMPL_STATUS_SUCCESS); + return (res); +} +#endif diff --git a/gmrflib/dot.h b/gmrflib/dot.h index 9b5135b64..2455775c9 100644 --- a/gmrflib/dot.h +++ b/gmrflib/dot.h @@ -52,7 +52,9 @@ #endif __BEGIN_DECLS + #include "GMRFLib/GMRFLibP.h" + double GMRFLib_ddot(int n, double *x, double *y); double GMRFLib_ddot_idx(int n, double *v, double *a, int *idx); double GMRFLib_ddot_idx_mkl(int n, double *v, double *a, int *idx); @@ -114,5 +116,10 @@ void GMRFLib_chose_threshold_ddot(void); #define GMRFLib_dot_product_INLINE_ADDTO(ans_, v_, a_) ans_ += GMRFLib_dot_product(v_, a_) #endif +#if defined(INLA_WITH_ARMPL) +#include "armpl_sparse.h" +double GMRFLib_dot_product_serial_armpl(GMRFLib_idxval_tp *__restrict ELM_, double *__restrict ARR_); +#endif + __END_DECLS #endif diff --git a/gmrflib/idxval.c b/gmrflib/idxval.c index ee44edc10..619a28d86 100644 --- a/gmrflib/idxval.c +++ b/gmrflib/idxval.c @@ -43,6 +43,10 @@ #include "GMRFLib/GMRFLibP.h" #include "GMRFLib/hashP.h" +#if defined(INLA_WITH_ARMPL) +#include "armpl_sparse.h" +#endif + #define IDX_ALLOC_INITIAL 8 #define IDX_ALLOC_ADD 128 #define IDX_ALLOC_NDIV 4 @@ -553,14 +557,22 @@ int GMRFLib_idxval_nsort_x_core(GMRFLib_idxval_tp *h, double *x, int prepare, in h->dot_product_func = (GMRFLib_dot_product_tp *) GMRFLib_dot_product_serial_mkl; h->cpu_gain = 0.0; return GMRFLib_SUCCESS; -#else +#endif +#if defined(INLA_WITH_ARMPL) + armpl_status_t info = armpl_spvec_create_d(&(h->spvec), 0, h->idx[h->n - 1], h->n, h->idx, h->val, 0); + assert(info == ARMPL_STATUS_SUCCESS); + h->spvec_in_use = 1; + h->preference = IDXVAL_SERIAL_ARMPL; + h->dot_product_func = (GMRFLib_dot_product_tp *) GMRFLib_dot_product_serial_armpl; + h->cpu_gain = 0.0; + return GMRFLib_SUCCESS; +#endif if (!prepare || !GMRFLib_internal_opt) { h->preference = IDXVAL_SERIAL_MKL; h->dot_product_func = (GMRFLib_dot_product_tp *) GMRFLib_dot_product_serial; h->cpu_gain = 0.0; return GMRFLib_SUCCESS; } -#endif // an upper bound for the number of groups for memory allocation int ng = 1; @@ -1175,6 +1187,14 @@ int GMRFLib_idxval_free(GMRFLib_idxval_tp *hold) if (hold->g_mem) { Free(hold->g_mem); } +#if defined(INLA_WITH_ARMPL) + if (hold->spvec_in_use) { + + armpl_status_t info = armpl_spvec_destroy(hold->spvec); + assert(info == ARMPL_STATUS_SUCCESS); + hold->spvec_in_use = 0; + } +#endif Free(hold); } return GMRFLib_SUCCESS; diff --git a/gmrflib/idxval.h b/gmrflib/idxval.h index c254524f8..0217ac5c3 100644 --- a/gmrflib/idxval.h +++ b/gmrflib/idxval.h @@ -54,6 +54,11 @@ __BEGIN_DECLS #include "GMRFLib/hashP.h" #include "GMRFLib/GMRFLibP.h" + +#if defined(INLA_WITH_ARMPL) +#include "armpl_sparse.h" +#endif + typedef struct { int n; int n_alloc; @@ -83,6 +88,7 @@ typedef enum { IDXVAL_SERIAL, IDXVAL_SERIAL_MKL, IDXVAL_SERIAL_MKL_ALT, + IDXVAL_SERIAL_ARMPL, IDXVAL_GROUP, IDXVAL_GROUP_MKL, IDXVAL_GROUP_MKL_ALT @@ -104,6 +110,10 @@ typedef struct { double **g_val; double *val; void **g_mem; +#if defined(INLA_WITH_ARMPL) + int spvec_in_use; + armpl_spvec_t spvec; +#endif GMRFLib_idxval_preference_tp preference; GMRFLib_dot_product_tp *dot_product_func; } GMRFLib_idxval_tp; diff --git a/inlaprog/src/inla-parse.c b/inlaprog/src/inla-parse.c index 70153c89f..2494d6532 100644 --- a/inlaprog/src/inla-parse.c +++ b/inlaprog/src/inla-parse.c @@ -380,9 +380,34 @@ int inla_parse_problem(inla_tp *mb, dictionary *ini, int sec, int make_dir) #endif #if defined(__SSSE3__) printf("\t\tCompiler macro defined [__SSSE3__]\n"); +#endif +#if defined(INLA_WITH_PARDISO) + printf("\t\tCompiled with -DINLA_WITH_PARDISO\n"); +#endif +#if defined(INLA_WITH_PARDISO_WORKAROUND) + printf("\t\tCompiled with -DINLA_WITH_PARDISO_WORKAROUND\n"); +#endif +#if defined(INLA_WITH_LIBR) + printf("\t\tCompiled with -DINLA_WITH_LIBR\n"); +#endif +#if defined(INLA_WITH_MUPARSER) + printf("\t\tCompiled with -DINLA_WITH_MUPARSER\n"); +#endif +#if defined(INLA_WITH_SIMD) + printf("\t\tCompiled with -DINLA_WITH_SIMD\n"); +#endif +#if defined(INLA_WITH_MKL) + printf("\t\tCompiled with -DINLA_WITH_MKL\n"); +#endif +#if defined(INLA_WITH_OPENBLAS) + printf("\t\tCompiled with -DINLA_WITH_OPENBLAS\n"); +#endif +#if defined(INLA_WITH_ARMPL) + printf("\t\tCompiled with -DINLA_WITH_ARMPL\n"); #endif } + openmp_strategy = Strdup(iniparser_getstring(ini, inla_string_join(secname, "OPENMP.STRATEGY"), Strdup("DEFAULT"))); if (mb->verbose) { printf("\t\topenmp.strategy=[%s]\n", openmp_strategy); diff --git a/inlaprog/src/inla.c b/inlaprog/src/inla.c index ea5dab075..2983dad33 100644 --- a/inlaprog/src/inla.c +++ b/inlaprog/src/inla.c @@ -5592,7 +5592,7 @@ int inla_INLA_preopt_experimental(inla_tp *mb) } } #endif -#if !defined(INLA_WITH_MKL) +#if !defined(INLA_WITH_MKL) && !defined(INLA_WITH_ARMPL) // report timings double time_loop[5] = { 0.0, 0.0, 0.0, 0.0, 0.0 }; if (GMRFLib_internal_opt && GMRFLib_dot_product_optim_report) { @@ -5658,7 +5658,7 @@ int inla_INLA_preopt_experimental(inla_tp *mb) printf("\t group [%4.1f] group.mkl [%4.1f] group.mkl.alt [%4.1f]\n", 100 * time_loop[10], 100 * time_loop[11], 100 * time_loop[12]); #endif -#if !defined(INLA_WITH_MKL) +#if !defined(INLA_WITH_MKL) && !defined(INLA_WITH_ARMPL) printf("\tOptimizing dot-products.... serial[%.3f] group[%.3f]\n", time_loop[0], time_loop[1]); printf("\t ==> optimal.mix.strategy[%.3f]\n", time_loop[2]); printf("\t serial[%4.1f] group[%4.1f]\n", 100 * time_loop[3], 100 * time_loop[4]); @@ -6909,7 +6909,7 @@ int main(int argc, char **argv) } } -#if !defined(INLA_WITH_MKL) +#if !defined(INLA_WITH_MKL) && !defined(INLA_WITH_ARMPL) // I need to set it here as it depends on MAX_THREADS GMRFLib_dot_product_optim_report = Calloc(GMRFLib_CACHE_LEN(), double *); for (i = 0; i < GMRFLib_CACHE_LEN(); i++) {