From 7471dd152bfcc8edd3bcb0d809b74e0fbbf1d4a0 Mon Sep 17 00:00:00 2001 From: Tomas Racek Date: Tue, 14 Jun 2016 16:33:50 +0200 Subject: [PATCH] Make gnu version to use reference LAPACK This version should be faster and less prone to errors. --- src/Makefile | 2 +- src/eem.c | 156 ++++++++++------------------------------------- src/parameters.c | 97 +++++++++++++++-------------- 3 files changed, 83 insertions(+), 172 deletions(-) diff --git a/src/Makefile b/src/Makefile index b0251f2..a0a513b 100644 --- a/src/Makefile +++ b/src/Makefile @@ -30,7 +30,7 @@ neemp-gnu: EXTRA_DEFINE= neemp-gnu: CC=gcc neemp-gnu: ICC_DISABLE_WARNS= neemp-gnu: CFLAGS=-Wall -Wextra -std=c99 -pedantic -O3 -march=native -g -gdwarf-3 -fopenmp -neemp-gnu: libraries=-lm -lz -fopenmp -lgfortran +neemp-gnu: libraries=-lm -lz -fopenmp -lgfortran -llapack neemp-gnu: $(objects) neemp neemp: $(objects) diff --git a/src/eem.c b/src/eem.c index 0671a59..cb9a2d8 100644 --- a/src/eem.c +++ b/src/eem.c @@ -13,6 +13,8 @@ #ifdef USE_MKL #include +#else +extern void sspsvx_(char *fact, char *uplo, int *n, int *nrhs, const float *ap, float *afp, int *ipiv, const float *b, int *ldb, float *x, int *ldx, float *rcond, float *ferr, float *berr, float *work, int *iwork, int *info); #endif /* USE_MKL */ #include "eem.h" @@ -24,12 +26,8 @@ extern const struct training_set ts; extern const struct settings s; -#ifdef USE_MKL + static void fill_EEM_matrix_packed(float * const A, const struct molecule * const m, const struct kappa_data * const kd); -#else -static void fill_EEM_matrix_full(float * const A, const struct molecule * const m, const struct kappa_data * const kd); -static int GEM_solver(float * const A, float * const b, long int n); -#endif /* USE_MKL */ #ifdef NOT_USED static void print_matrix_packed(const float * const A, long int n); @@ -69,7 +67,6 @@ static void print_matrix_full(const float * const A, int n) { #endif /* NOT_USED */ -#ifdef USE_MKL /* Use packed storage scheme to fill EEM matrix */ static void fill_EEM_matrix_packed(float * const A, const struct molecule * const m, const struct kappa_data * const kd) { @@ -101,95 +98,6 @@ static void fill_EEM_matrix_packed(float * const A, const struct molecule * cons A[U_IDX(n, n)] = 0.0f; #undef U_IDX } -#else -/* Fill the whole standard EEM matrix */ -static void fill_EEM_matrix_full(float * const A, const struct molecule * const m, const struct kappa_data * const kd) { - - assert(A != NULL); - assert(m != NULL); - assert(kd != NULL); - - const long int n = m->atoms_count; - - #define IDX(x, y) (x * (n + 1) + y) - /* Fill the n x n block */ - for(long int i = 0; i < n; i++) { - for(long int j = 0; j < n; j++) - if(i == j) - A[IDX(i, i)] = kd->parameters_beta[get_atom_type_idx(&m->atoms[i])]; - else { - if(s.mode == MODE_PARAMS) - A[IDX(i, j)] = (float) (kd->kappa * m->atoms[i].rdists[j]); - else - A[IDX(i, j)] = (float) (kd->kappa * rdist(&m->atoms[i], &m->atoms[j])); - } - } - - /* Fill last column */ - for(int i = 0; i < n; i++) - A[IDX(i, n)] = 1.0f; - - /* Fill last row */ - for(int i = 0; i < n; i++) - A[IDX(n, i)] = 1.0f; - - /* Set the bottom right element to zero */ - A[IDX(n, n)] = 0.0f; - #undef IDX -} - -/* Solve the system of linear eqns using GEM w/ partial pivoting */ -static int GEM_solver(float * const A, float * const b, long int n) { - - assert(A != NULL); - assert(b != NULL); - - #define IDX(x, y) (x * n + y) - /* GEM with partial pivoting */ - for(long int i = 0; i < n; i++) { - /* Find the largest pivot element */ - long int best_row = i; - for(long int j = i + 1; j < n; j++) - if(fabsf(A[IDX(best_row, i)]) < fabsf(A[IDX(j, i)])) - best_row = j; - - /* Swap rows i and best_row */ - for(long int j = 0; j < n; j++) { - float tmp = A[IDX(best_row, j)]; - A[IDX(best_row, j)] = A[IDX(i, j)]; - A[IDX(i, j)] = tmp; - } - - /* Swap elements in vector b in the same way */ - float tmp = b[best_row]; - b[best_row] = b[i]; - b[i] = tmp; - - /* Is matrix singular? */ - if(fabsf(A[IDX(i, i)]) < EPS) - return i + 1; - - /* Actual elimination */ - for(long int j = i + 1; j < n; j++) { - const float mult = A[IDX(j, i)] / A[IDX(i, i)]; - for(long int k = i + 1; k < n; k++) - A[IDX(j, k)] -= mult * A[IDX(i, k)]; - - b[j] -= mult * b[i]; - } - } - - /* Backward substitution */ - for(long int i = n - 1; i >= 0; i--) { - b[i] /= A[IDX(i, i)]; - for(long int j = i - 1; j >= 0; j--) - b[j] -= A[IDX(j, i)] * b[i]; - } - #undef IDX - - return 0; -} -#endif /* USE_MKL */ /* Calculate charges for a particular kappa_data structure */ void calculate_charges(struct subset * const ss, struct kappa_data * const kd) { @@ -208,28 +116,29 @@ void calculate_charges(struct subset * const ss, struct kappa_data * const kd) { nt /= s.de_threads; if (s.params_method == PARAMS_GM) nt /= s.gm_threads; - int nthreads = ts.molecules_count < nt ? ts.molecules_count : nt; + + int nthreads = ts.molecules_count < nt ? ts.molecules_count : nt; #pragma omp parallel for num_threads(nthreads) for(int i = 0; i < ts.molecules_count; i++) { #define MOLECULE ts.molecules[i] - const long int n = MOLECULE.atoms_count; + const int n = MOLECULE.atoms_count; #ifdef USE_MKL - float *A = (float *) mkl_malloc(((n + 1) * (n + 2)) / 2 * sizeof(float), 32); - float *b = (float *) mkl_malloc((n + 1) * sizeof(float), 32); + float *A = (float *) mkl_malloc(((n + 1) * (n + 2)) / 2 * sizeof(float), 64); + float *b = (float *) mkl_malloc((n + 1) * sizeof(float), 64); + float *Ap = (float *) mkl_malloc(((n + 1) * (n + 2)) / 2 * sizeof(float), 64); + float *x = (float *) mkl_malloc((n + 1) * sizeof(float), 64); #else - float *A = (float *) malloc((n + 1) * (n + 1) * sizeof(float)); + float *A = (float *) malloc((n + 1) * (n + 2) / 2 * sizeof(float)); float *b = (float *) malloc((n + 1) * sizeof(float)); + float *Ap = (float *) malloc(((n + 1) * (n + 2)) / 2 * sizeof(float)); + float *x = (float *) malloc((n + 1) * sizeof(float)); #endif /* USE_MKL */ - if(!A || !b) + if(!A || !b || !Ap || !x) EXIT_ERROR(MEM_ERROR, "%s", "Cannot allocate memory for EEM system.\n"); - #ifdef USE_MKL fill_EEM_matrix_packed(A, &ts.molecules[i], kd); - #else - fill_EEM_matrix_full(A, &ts.molecules[i], kd); - #endif /* USE_MKL */ /* Fill vector b */ for(int j = 0; j < n; j++) @@ -238,39 +147,36 @@ void calculate_charges(struct subset * const ss, struct kappa_data * const kd) { b[n] = MOLECULE.sum_of_charges; /* Solve EEM system */ - #ifdef USE_MKL int ipiv[n + 1]; - - /* int LAPACKE_sspsvx(int matrix_layout, char fact, char uplo, int n, int nrhs, const float *ap, float *afp, - * int *ipiv, const float *b, int ldb, float *x, int ldx, float *rcond, float *ferr, float *berr ); */ - - float *Ap = (float *) mkl_malloc(((n + 1) * (n + 2)) / 2 * sizeof(float), 32); - float *x = (float *) mkl_malloc((n + 1) * sizeof(float), 32); float rcond; float berr, ferr; + char fact = 'N'; + char uplo = 'U'; + int nn = n + 1; + int nrhs = 1; + int ldb = n + 1; + int ldx = n + 1; + + #ifdef USE_MKL + LAPACKE_sspsvx(LAPACK_COL_MAJOR, fact, uplo, nn, nrhs, A, Ap, ipiv, b, ldb, x, ldx, &rcond, &ferr, &berr); + #else + int info; + int iwork[n + 1]; + float work[3 * (n + 1)]; - LAPACKE_sspsvx(LAPACK_COL_MAJOR, 'N', 'U', n + 1, 1, A, Ap, ipiv, b, n + 1, x, n + 1, &rcond, &ferr, &berr); + sspsvx_(&fact, &uplo, &nn, &nrhs, A, Ap, ipiv, b, &ldb, x, &ldx, &rcond, &ferr, &berr, work, iwork, &info); + #endif /* USE_MKL */ kd->per_molecule_stats[i].cond = 1 / rcond; - if(s.mode == MODE_CHARGES && rcond < WARN_MIN_RCOND) + if(s.mode == MODE_CHARGES && (1 / rcond) > WARN_MAX_COND) fprintf(stderr, "Ill-conditioned EEM system for molecule %s. Charges might be inaccurate.\n", MOLECULE.name); /* Store computed charges */ for(int j = 0; j < n; j++) kd->charges[starts[i] + j] = x[j]; - #else - - GEM_solver(A, b, n + 1); - - /* Store computed charges */ - for(int j = 0; j < n; j++) - kd->charges[starts[i] + j] = b[j]; - - #endif /* USE_MKL */ - #undef MOLECULE /* Clean things up */ @@ -282,6 +188,8 @@ void calculate_charges(struct subset * const ss, struct kappa_data * const kd) { #else free(A); free(b); + free(Ap); + free(x); #endif /* USE_MKL */ } } diff --git a/src/parameters.c b/src/parameters.c index 05999eb..7fb7281 100644 --- a/src/parameters.c +++ b/src/parameters.c @@ -11,6 +11,8 @@ #ifdef USE_MKL #include +#else +extern void dgels_(const char* trans, const int *m, const int *n, const int *nrhs, double *a, const int *lda, double *b, const int *ldb, double *work, const int *lwork, int *info); #endif /* USE_MKL */ #include "neemp.h" @@ -28,7 +30,6 @@ static inline int is_molecule_enabled(const struct subset * const ss, int mol_id return b_get(&ss->molecules, mol_idx); } -#ifdef USE_MKL /* Calculate parameters for give kappa_data structure */ void calculate_parameters(struct subset * const ss, struct kappa_data * const kd) { @@ -38,10 +39,15 @@ void calculate_parameters(struct subset * const ss, struct kappa_data * const kd for(int i = 0; i < ts.atom_types_count; i++) { #define AT ts.atom_types[i] - double *A = (double *) mkl_malloc(2 * AT.atoms_count * sizeof(double), 32); + #ifdef USE_MKL + double *A = (double *) mkl_malloc(2 * AT.atoms_count * sizeof(double), 64); /* b has to have at least 2 elements since b[1] is used to store beta parameter. * If atoms_count is 1 (which is unlikely but possible) we would probably crash... */ - double *b = (double *) mkl_malloc((1 + AT.atoms_count) * sizeof(double), 32); + double *b = (double *) mkl_malloc((1 + AT.atoms_count) * sizeof(double), 64); + #else + double *A = (double *) malloc(2 * AT.atoms_count * sizeof(double)); + double *b = (double *) malloc((1 + AT.atoms_count) * sizeof(double)); + #endif /* USE_MKL */ /* If some molecule is disabled, skip its atoms. * To avoid having empty (zero) rows, change the index so that all used atoms are @@ -52,8 +58,7 @@ void calculate_parameters(struct subset * const ss, struct kappa_data * const kd #define ATOM ts.molecules[AT.atoms_molecule_idx[j]].atoms[AT.atoms_atom_idx[j]] if(is_molecule_enabled(ss, AT.atoms_molecule_idx[j])) { - A[2 * (j - disabled_count)] = 1.0; - A[2 * (j - disabled_count) + 1] = ATOM.reference_charge; + A[j - disabled_count] = 1.0; b[j - disabled_count] = MOLECULE.electronegativity - kd->kappa * ATOM.y; } else disabled_count++; @@ -62,56 +67,54 @@ void calculate_parameters(struct subset * const ss, struct kappa_data * const kd #undef MOLECULE } - int info = LAPACKE_dgels(LAPACK_ROW_MAJOR, 'N', AT.atoms_count - disabled_count, 2, 1, A, 2, b, 1); - if(info) - EXIT_ERROR(RUN_ERROR, "%s", "The least squares method failed.\n"); - - kd->parameters_alpha[i] = (float) b[0]; - kd->parameters_beta[i] = (float) b[1]; - #undef AT + int border = AT.atoms_count - disabled_count; + disabled_count = 0; - mkl_free(A); - mkl_free(b); - } -} -#else -void calculate_parameters(struct subset * const ss, struct kappa_data * const kd) { - - assert(ss != NULL); - assert(kd != NULL); - - for(int i = 0; i < ts.atom_types_count; i++) { - #define AT ts.atom_types[i] - - double sum1 = 0.0; - double sum2 = 0.0; - double sum3 = 0.0; - double sum4 = 0.0; - - int disabled_count = 0; - for(int j = 0; j < AT.atoms_count; j++) { + for(int j = 0; j < AT.atoms_count;j++) { #define MOLECULE ts.molecules[AT.atoms_molecule_idx[j]] #define ATOM ts.molecules[AT.atoms_molecule_idx[j]].atoms[AT.atoms_atom_idx[j]] - if(!is_molecule_enabled(ss, AT.atoms_molecule_idx[j])) { + if(is_molecule_enabled(ss, AT.atoms_molecule_idx[j])) + A[border + j - disabled_count] = ATOM.reference_charge; + else disabled_count++; - continue; - } - - double x = MOLECULE.electronegativity - kd->kappa * ATOM.y; - - sum1 += ATOM.reference_charge * x; - sum2 += ATOM.reference_charge; - sum3 += x; - sum4 += ATOM.reference_charge * ATOM.reference_charge; - #undef ATOM #undef MOLECULE } - const int atoms_used = AT.atoms_count - disabled_count; - kd->parameters_beta[i] = (float) ((atoms_used * sum1 - sum2 * sum3) / (atoms_used * sum4 - sum2 * sum2)); - kd->parameters_alpha[i] = (float) ((sum3 - kd->parameters_beta[i] * sum2) / atoms_used); + char trans = 'N'; + int m = AT.atoms_count - disabled_count; + int n = 2; + int nrhs = 1; + int lda = m; + int ldb = m; + int info; + + #ifdef USE_MKL + info = LAPACKE_dgels(LAPACK_COL_MAJOR, trans, m, n, nrhs, A, lda, b, ldb); + #else + int lwork = -1; + double work_opt; + + /* Check for optimal value of lwork */ + dgels_(&trans, &m, &n, &nrhs, A, &lda, b, &ldb, &work_opt, &lwork, &info); + lwork = (int) work_opt; + double work[lwork]; + dgels_(&trans, &m, &n, &nrhs, A, &lda, b, &ldb, work, &lwork, &info); + #endif /* USE_MKL */ + + if(info) + EXIT_ERROR(RUN_ERROR, "%s", "The least squares method failed.\n"); + + kd->parameters_alpha[i] = (float) b[0]; + kd->parameters_beta[i] = (float) b[1]; #undef AT + + #ifdef USE_MKL + mkl_free(A); + mkl_free(b); + #else + free(A); + free(b); + #endif /* USE_MKL */ } } -#endif /* USE_MKL */