Skip to content

Commit

Permalink
Use ARMPL sparse-vector-vector inner product
Browse files Browse the repository at this point in the history
  • Loading branch information
hrue committed Sep 27, 2024
1 parent 7af4d10 commit 0574de5
Showing 6 changed files with 78 additions and 6 deletions.
12 changes: 11 additions & 1 deletion gmrflib/dot.c
Original file line number Diff line number Diff line change
@@ -35,7 +35,7 @@
double GMRFLib_dot_product(GMRFLib_idxval_tp *__restrict ELM_, double *__restrict ARR_)
{
if (ELM_->dot_product_func) {
#if !defined(INLA_WITH_MKL)
#if !defined(INLA_WITH_MKL) && !defined(INLA_WITH_ARMPL)
if (GMRFLib_dot_product_gain >= 0.0) {
_Pragma("omp atomic")
GMRFLib_dot_product_gain += ELM_->cpu_gain;
@@ -205,3 +205,13 @@ double GMRFLib_ddot_idx_mkl(int n, double *__restrict v, double *__restrict a, i
}

#endif /* if defined(INLA_WITH_MKL) */

#if defined(INLA_WITH_ARMPL)
double GMRFLib_dot_product_serial_armpl(GMRFLib_idxval_tp *__restrict ELM_, double *__restrict ARR_)
{
double res = 0.0;
armpl_status_t info = armpl_spdot_exec_d(ELM_->spvec, ARR_, &res);
assert(info == ARMPL_STATUS_SUCCESS);
return (res);
}
#endif
7 changes: 7 additions & 0 deletions gmrflib/dot.h
Original file line number Diff line number Diff line change
@@ -52,7 +52,9 @@
#endif

__BEGIN_DECLS

#include "GMRFLib/GMRFLibP.h"

double GMRFLib_ddot(int n, double *x, double *y);
double GMRFLib_ddot_idx(int n, double *v, double *a, int *idx);
double GMRFLib_ddot_idx_mkl(int n, double *v, double *a, int *idx);
@@ -114,5 +116,10 @@ void GMRFLib_chose_threshold_ddot(void);
#define GMRFLib_dot_product_INLINE_ADDTO(ans_, v_, a_) ans_ += GMRFLib_dot_product(v_, a_)
#endif

#if defined(INLA_WITH_ARMPL)
#include "armpl_sparse.h"
double GMRFLib_dot_product_serial_armpl(GMRFLib_idxval_tp *__restrict ELM_, double *__restrict ARR_);
#endif

__END_DECLS
#endif
24 changes: 22 additions & 2 deletions gmrflib/idxval.c
Original file line number Diff line number Diff line change
@@ -43,6 +43,10 @@
#include "GMRFLib/GMRFLibP.h"
#include "GMRFLib/hashP.h"

#if defined(INLA_WITH_ARMPL)
#include "armpl_sparse.h"
#endif

#define IDX_ALLOC_INITIAL 8
#define IDX_ALLOC_ADD 128
#define IDX_ALLOC_NDIV 4
@@ -553,14 +557,22 @@ int GMRFLib_idxval_nsort_x_core(GMRFLib_idxval_tp *h, double *x, int prepare, in
h->dot_product_func = (GMRFLib_dot_product_tp *) GMRFLib_dot_product_serial_mkl;
h->cpu_gain = 0.0;
return GMRFLib_SUCCESS;
#else
#endif
#if defined(INLA_WITH_ARMPL)
armpl_status_t info = armpl_spvec_create_d(&(h->spvec), 0, h->idx[h->n - 1], h->n, h->idx, h->val, 0);
assert(info == ARMPL_STATUS_SUCCESS);
h->spvec_in_use = 1;
h->preference = IDXVAL_SERIAL_ARMPL;
h->dot_product_func = (GMRFLib_dot_product_tp *) GMRFLib_dot_product_serial_armpl;
h->cpu_gain = 0.0;
return GMRFLib_SUCCESS;
#endif
if (!prepare || !GMRFLib_internal_opt) {
h->preference = IDXVAL_SERIAL_MKL;
h->dot_product_func = (GMRFLib_dot_product_tp *) GMRFLib_dot_product_serial;
h->cpu_gain = 0.0;
return GMRFLib_SUCCESS;
}
#endif

// an upper bound for the number of groups for memory allocation
int ng = 1;
@@ -1175,6 +1187,14 @@ int GMRFLib_idxval_free(GMRFLib_idxval_tp *hold)
if (hold->g_mem) {
Free(hold->g_mem);
}
#if defined(INLA_WITH_ARMPL)
if (hold->spvec_in_use) {

armpl_status_t info = armpl_spvec_destroy(hold->spvec);
assert(info == ARMPL_STATUS_SUCCESS);
hold->spvec_in_use = 0;
}
#endif
Free(hold);
}
return GMRFLib_SUCCESS;
10 changes: 10 additions & 0 deletions gmrflib/idxval.h
Original file line number Diff line number Diff line change
@@ -54,6 +54,11 @@
__BEGIN_DECLS
#include "GMRFLib/hashP.h"
#include "GMRFLib/GMRFLibP.h"

#if defined(INLA_WITH_ARMPL)
#include "armpl_sparse.h"
#endif

typedef struct {
int n;
int n_alloc;
@@ -83,6 +88,7 @@ typedef enum {
IDXVAL_SERIAL,
IDXVAL_SERIAL_MKL,
IDXVAL_SERIAL_MKL_ALT,
IDXVAL_SERIAL_ARMPL,
IDXVAL_GROUP,
IDXVAL_GROUP_MKL,
IDXVAL_GROUP_MKL_ALT
@@ -104,6 +110,10 @@ typedef struct {
double **g_val;
double *val;
void **g_mem;
#if defined(INLA_WITH_ARMPL)
int spvec_in_use;
armpl_spvec_t spvec;
#endif
GMRFLib_idxval_preference_tp preference;
GMRFLib_dot_product_tp *dot_product_func;
} GMRFLib_idxval_tp;
25 changes: 25 additions & 0 deletions inlaprog/src/inla-parse.c
Original file line number Diff line number Diff line change
@@ -380,9 +380,34 @@ int inla_parse_problem(inla_tp *mb, dictionary *ini, int sec, int make_dir)
#endif
#if defined(__SSSE3__)
printf("\t\tCompiler macro defined [__SSSE3__]\n");
#endif
#if defined(INLA_WITH_PARDISO)
printf("\t\tCompiled with -DINLA_WITH_PARDISO\n");
#endif
#if defined(INLA_WITH_PARDISO_WORKAROUND)
printf("\t\tCompiled with -DINLA_WITH_PARDISO_WORKAROUND\n");
#endif
#if defined(INLA_WITH_LIBR)
printf("\t\tCompiled with -DINLA_WITH_LIBR\n");
#endif
#if defined(INLA_WITH_MUPARSER)
printf("\t\tCompiled with -DINLA_WITH_MUPARSER\n");
#endif
#if defined(INLA_WITH_SIMD)
printf("\t\tCompiled with -DINLA_WITH_SIMD\n");
#endif
#if defined(INLA_WITH_MKL)
printf("\t\tCompiled with -DINLA_WITH_MKL\n");
#endif
#if defined(INLA_WITH_OPENBLAS)
printf("\t\tCompiled with -DINLA_WITH_OPENBLAS\n");
#endif
#if defined(INLA_WITH_ARMPL)
printf("\t\tCompiled with -DINLA_WITH_ARMPL\n");
#endif
}


openmp_strategy = Strdup(iniparser_getstring(ini, inla_string_join(secname, "OPENMP.STRATEGY"), Strdup("DEFAULT")));
if (mb->verbose) {
printf("\t\topenmp.strategy=[%s]\n", openmp_strategy);
6 changes: 3 additions & 3 deletions inlaprog/src/inla.c
Original file line number Diff line number Diff line change
@@ -5592,7 +5592,7 @@ int inla_INLA_preopt_experimental(inla_tp *mb)
}
}
#endif
#if !defined(INLA_WITH_MKL)
#if !defined(INLA_WITH_MKL) && !defined(INLA_WITH_ARMPL)
// report timings
double time_loop[5] = { 0.0, 0.0, 0.0, 0.0, 0.0 };
if (GMRFLib_internal_opt && GMRFLib_dot_product_optim_report) {
@@ -5658,7 +5658,7 @@ int inla_INLA_preopt_experimental(inla_tp *mb)
printf("\t group [%4.1f] group.mkl [%4.1f] group.mkl.alt [%4.1f]\n",
100 * time_loop[10], 100 * time_loop[11], 100 * time_loop[12]);
#endif
#if !defined(INLA_WITH_MKL)
#if !defined(INLA_WITH_MKL) && !defined(INLA_WITH_ARMPL)
printf("\tOptimizing dot-products.... serial[%.3f] group[%.3f]\n", time_loop[0], time_loop[1]);
printf("\t ==> optimal.mix.strategy[%.3f]\n", time_loop[2]);
printf("\t serial[%4.1f] group[%4.1f]\n", 100 * time_loop[3], 100 * time_loop[4]);
@@ -6909,7 +6909,7 @@ int main(int argc, char **argv)
}
}

#if !defined(INLA_WITH_MKL)
#if !defined(INLA_WITH_MKL) && !defined(INLA_WITH_ARMPL)
// I need to set it here as it depends on MAX_THREADS
GMRFLib_dot_product_optim_report = Calloc(GMRFLib_CACHE_LEN(), double *);
for (i = 0; i < GMRFLib_CACHE_LEN(); i++) {

0 comments on commit 0574de5

Please sign in to comment.