Skip to content

Commit

Permalink
Make gnu version to use reference LAPACK
Browse files Browse the repository at this point in the history
This version should be faster and less prone to errors.
  • Loading branch information
krab1k committed Jun 14, 2016
1 parent 73a0088 commit 7471dd1
Show file tree
Hide file tree
Showing 3 changed files with 83 additions and 172 deletions.
2 changes: 1 addition & 1 deletion src/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@ neemp-gnu: EXTRA_DEFINE=
neemp-gnu: CC=gcc
neemp-gnu: ICC_DISABLE_WARNS=
neemp-gnu: CFLAGS=-Wall -Wextra -std=c99 -pedantic -O3 -march=native -g -gdwarf-3 -fopenmp
neemp-gnu: libraries=-lm -lz -fopenmp -lgfortran
neemp-gnu: libraries=-lm -lz -fopenmp -lgfortran -llapack
neemp-gnu: $(objects) neemp

neemp: $(objects)
Expand Down
156 changes: 32 additions & 124 deletions src/eem.c
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,8 @@

#ifdef USE_MKL
#include <mkl.h>
#else
extern void sspsvx_(char *fact, char *uplo, int *n, int *nrhs, const float *ap, float *afp, int *ipiv, const float *b, int *ldb, float *x, int *ldx, float *rcond, float *ferr, float *berr, float *work, int *iwork, int *info);
#endif /* USE_MKL */

#include "eem.h"
Expand All @@ -24,12 +26,8 @@
extern const struct training_set ts;
extern const struct settings s;

#ifdef USE_MKL

static void fill_EEM_matrix_packed(float * const A, const struct molecule * const m, const struct kappa_data * const kd);
#else
static void fill_EEM_matrix_full(float * const A, const struct molecule * const m, const struct kappa_data * const kd);
static int GEM_solver(float * const A, float * const b, long int n);
#endif /* USE_MKL */

#ifdef NOT_USED
static void print_matrix_packed(const float * const A, long int n);
Expand Down Expand Up @@ -69,7 +67,6 @@ static void print_matrix_full(const float * const A, int n) {

#endif /* NOT_USED */

#ifdef USE_MKL
/* Use packed storage scheme to fill EEM matrix */
static void fill_EEM_matrix_packed(float * const A, const struct molecule * const m, const struct kappa_data * const kd) {

Expand Down Expand Up @@ -101,95 +98,6 @@ static void fill_EEM_matrix_packed(float * const A, const struct molecule * cons
A[U_IDX(n, n)] = 0.0f;
#undef U_IDX
}
#else
/* Fill the whole standard EEM matrix */
static void fill_EEM_matrix_full(float * const A, const struct molecule * const m, const struct kappa_data * const kd) {

assert(A != NULL);
assert(m != NULL);
assert(kd != NULL);

const long int n = m->atoms_count;

#define IDX(x, y) (x * (n + 1) + y)
/* Fill the n x n block */
for(long int i = 0; i < n; i++) {
for(long int j = 0; j < n; j++)
if(i == j)
A[IDX(i, i)] = kd->parameters_beta[get_atom_type_idx(&m->atoms[i])];
else {
if(s.mode == MODE_PARAMS)
A[IDX(i, j)] = (float) (kd->kappa * m->atoms[i].rdists[j]);
else
A[IDX(i, j)] = (float) (kd->kappa * rdist(&m->atoms[i], &m->atoms[j]));
}
}

/* Fill last column */
for(int i = 0; i < n; i++)
A[IDX(i, n)] = 1.0f;

/* Fill last row */
for(int i = 0; i < n; i++)
A[IDX(n, i)] = 1.0f;

/* Set the bottom right element to zero */
A[IDX(n, n)] = 0.0f;
#undef IDX
}

/* Solve the system of linear eqns using GEM w/ partial pivoting */
static int GEM_solver(float * const A, float * const b, long int n) {

assert(A != NULL);
assert(b != NULL);

#define IDX(x, y) (x * n + y)
/* GEM with partial pivoting */
for(long int i = 0; i < n; i++) {
/* Find the largest pivot element */
long int best_row = i;
for(long int j = i + 1; j < n; j++)
if(fabsf(A[IDX(best_row, i)]) < fabsf(A[IDX(j, i)]))
best_row = j;

/* Swap rows i and best_row */
for(long int j = 0; j < n; j++) {
float tmp = A[IDX(best_row, j)];
A[IDX(best_row, j)] = A[IDX(i, j)];
A[IDX(i, j)] = tmp;
}

/* Swap elements in vector b in the same way */
float tmp = b[best_row];
b[best_row] = b[i];
b[i] = tmp;

/* Is matrix singular? */
if(fabsf(A[IDX(i, i)]) < EPS)
return i + 1;

/* Actual elimination */
for(long int j = i + 1; j < n; j++) {
const float mult = A[IDX(j, i)] / A[IDX(i, i)];
for(long int k = i + 1; k < n; k++)
A[IDX(j, k)] -= mult * A[IDX(i, k)];

b[j] -= mult * b[i];
}
}

/* Backward substitution */
for(long int i = n - 1; i >= 0; i--) {
b[i] /= A[IDX(i, i)];
for(long int j = i - 1; j >= 0; j--)
b[j] -= A[IDX(j, i)] * b[i];
}
#undef IDX

return 0;
}
#endif /* USE_MKL */

/* Calculate charges for a particular kappa_data structure */
void calculate_charges(struct subset * const ss, struct kappa_data * const kd) {
Expand All @@ -208,28 +116,29 @@ void calculate_charges(struct subset * const ss, struct kappa_data * const kd) {
nt /= s.de_threads;
if (s.params_method == PARAMS_GM)
nt /= s.gm_threads;
int nthreads = ts.molecules_count < nt ? ts.molecules_count : nt;

int nthreads = ts.molecules_count < nt ? ts.molecules_count : nt;
#pragma omp parallel for num_threads(nthreads)
for(int i = 0; i < ts.molecules_count; i++) {
#define MOLECULE ts.molecules[i]
const long int n = MOLECULE.atoms_count;
const int n = MOLECULE.atoms_count;

#ifdef USE_MKL
float *A = (float *) mkl_malloc(((n + 1) * (n + 2)) / 2 * sizeof(float), 32);
float *b = (float *) mkl_malloc((n + 1) * sizeof(float), 32);
float *A = (float *) mkl_malloc(((n + 1) * (n + 2)) / 2 * sizeof(float), 64);
float *b = (float *) mkl_malloc((n + 1) * sizeof(float), 64);
float *Ap = (float *) mkl_malloc(((n + 1) * (n + 2)) / 2 * sizeof(float), 64);
float *x = (float *) mkl_malloc((n + 1) * sizeof(float), 64);
#else
float *A = (float *) malloc((n + 1) * (n + 1) * sizeof(float));
float *A = (float *) malloc((n + 1) * (n + 2) / 2 * sizeof(float));
float *b = (float *) malloc((n + 1) * sizeof(float));
float *Ap = (float *) malloc(((n + 1) * (n + 2)) / 2 * sizeof(float));
float *x = (float *) malloc((n + 1) * sizeof(float));
#endif /* USE_MKL */

if(!A || !b)
if(!A || !b || !Ap || !x)
EXIT_ERROR(MEM_ERROR, "%s", "Cannot allocate memory for EEM system.\n");

#ifdef USE_MKL
fill_EEM_matrix_packed(A, &ts.molecules[i], kd);
#else
fill_EEM_matrix_full(A, &ts.molecules[i], kd);
#endif /* USE_MKL */

/* Fill vector b */
for(int j = 0; j < n; j++)
Expand All @@ -238,39 +147,36 @@ void calculate_charges(struct subset * const ss, struct kappa_data * const kd) {
b[n] = MOLECULE.sum_of_charges;

/* Solve EEM system */
#ifdef USE_MKL

int ipiv[n + 1];

/* int LAPACKE_sspsvx(int matrix_layout, char fact, char uplo, int n, int nrhs, const float *ap, float *afp,
* int *ipiv, const float *b, int ldb, float *x, int ldx, float *rcond, float *ferr, float *berr ); */

float *Ap = (float *) mkl_malloc(((n + 1) * (n + 2)) / 2 * sizeof(float), 32);
float *x = (float *) mkl_malloc((n + 1) * sizeof(float), 32);
float rcond;
float berr, ferr;
char fact = 'N';
char uplo = 'U';
int nn = n + 1;
int nrhs = 1;
int ldb = n + 1;
int ldx = n + 1;

#ifdef USE_MKL
LAPACKE_sspsvx(LAPACK_COL_MAJOR, fact, uplo, nn, nrhs, A, Ap, ipiv, b, ldb, x, ldx, &rcond, &ferr, &berr);
#else
int info;
int iwork[n + 1];
float work[3 * (n + 1)];

LAPACKE_sspsvx(LAPACK_COL_MAJOR, 'N', 'U', n + 1, 1, A, Ap, ipiv, b, n + 1, x, n + 1, &rcond, &ferr, &berr);
sspsvx_(&fact, &uplo, &nn, &nrhs, A, Ap, ipiv, b, &ldb, x, &ldx, &rcond, &ferr, &berr, work, iwork, &info);
#endif /* USE_MKL */

kd->per_molecule_stats[i].cond = 1 / rcond;

if(s.mode == MODE_CHARGES && rcond < WARN_MIN_RCOND)
if(s.mode == MODE_CHARGES && (1 / rcond) > WARN_MAX_COND)
fprintf(stderr, "Ill-conditioned EEM system for molecule %s. Charges might be inaccurate.\n", MOLECULE.name);

/* Store computed charges */
for(int j = 0; j < n; j++)
kd->charges[starts[i] + j] = x[j];

#else

GEM_solver(A, b, n + 1);

/* Store computed charges */
for(int j = 0; j < n; j++)
kd->charges[starts[i] + j] = b[j];

#endif /* USE_MKL */

#undef MOLECULE

/* Clean things up */
Expand All @@ -282,6 +188,8 @@ void calculate_charges(struct subset * const ss, struct kappa_data * const kd) {
#else
free(A);
free(b);
free(Ap);
free(x);
#endif /* USE_MKL */
}
}
97 changes: 50 additions & 47 deletions src/parameters.c
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,8 @@

#ifdef USE_MKL
#include <mkl.h>
#else
extern void dgels_(const char* trans, const int *m, const int *n, const int *nrhs, double *a, const int *lda, double *b, const int *ldb, double *work, const int *lwork, int *info);
#endif /* USE_MKL */

#include "neemp.h"
Expand All @@ -28,7 +30,6 @@ static inline int is_molecule_enabled(const struct subset * const ss, int mol_id
return b_get(&ss->molecules, mol_idx);
}

#ifdef USE_MKL
/* Calculate parameters for give kappa_data structure */
void calculate_parameters(struct subset * const ss, struct kappa_data * const kd) {

Expand All @@ -38,10 +39,15 @@ void calculate_parameters(struct subset * const ss, struct kappa_data * const kd
for(int i = 0; i < ts.atom_types_count; i++) {
#define AT ts.atom_types[i]

double *A = (double *) mkl_malloc(2 * AT.atoms_count * sizeof(double), 32);
#ifdef USE_MKL
double *A = (double *) mkl_malloc(2 * AT.atoms_count * sizeof(double), 64);
/* b has to have at least 2 elements since b[1] is used to store beta parameter.
* If atoms_count is 1 (which is unlikely but possible) we would probably crash... */
double *b = (double *) mkl_malloc((1 + AT.atoms_count) * sizeof(double), 32);
double *b = (double *) mkl_malloc((1 + AT.atoms_count) * sizeof(double), 64);
#else
double *A = (double *) malloc(2 * AT.atoms_count * sizeof(double));
double *b = (double *) malloc((1 + AT.atoms_count) * sizeof(double));
#endif /* USE_MKL */

/* If some molecule is disabled, skip its atoms.
* To avoid having empty (zero) rows, change the index so that all used atoms are
Expand All @@ -52,8 +58,7 @@ void calculate_parameters(struct subset * const ss, struct kappa_data * const kd
#define ATOM ts.molecules[AT.atoms_molecule_idx[j]].atoms[AT.atoms_atom_idx[j]]

if(is_molecule_enabled(ss, AT.atoms_molecule_idx[j])) {
A[2 * (j - disabled_count)] = 1.0;
A[2 * (j - disabled_count) + 1] = ATOM.reference_charge;
A[j - disabled_count] = 1.0;
b[j - disabled_count] = MOLECULE.electronegativity - kd->kappa * ATOM.y;
} else
disabled_count++;
Expand All @@ -62,56 +67,54 @@ void calculate_parameters(struct subset * const ss, struct kappa_data * const kd
#undef MOLECULE
}

int info = LAPACKE_dgels(LAPACK_ROW_MAJOR, 'N', AT.atoms_count - disabled_count, 2, 1, A, 2, b, 1);
if(info)
EXIT_ERROR(RUN_ERROR, "%s", "The least squares method failed.\n");

kd->parameters_alpha[i] = (float) b[0];
kd->parameters_beta[i] = (float) b[1];
#undef AT
int border = AT.atoms_count - disabled_count;
disabled_count = 0;

mkl_free(A);
mkl_free(b);
}
}
#else
void calculate_parameters(struct subset * const ss, struct kappa_data * const kd) {

assert(ss != NULL);
assert(kd != NULL);

for(int i = 0; i < ts.atom_types_count; i++) {
#define AT ts.atom_types[i]

double sum1 = 0.0;
double sum2 = 0.0;
double sum3 = 0.0;
double sum4 = 0.0;

int disabled_count = 0;
for(int j = 0; j < AT.atoms_count; j++) {
for(int j = 0; j < AT.atoms_count;j++) {
#define MOLECULE ts.molecules[AT.atoms_molecule_idx[j]]
#define ATOM ts.molecules[AT.atoms_molecule_idx[j]].atoms[AT.atoms_atom_idx[j]]
if(!is_molecule_enabled(ss, AT.atoms_molecule_idx[j])) {
if(is_molecule_enabled(ss, AT.atoms_molecule_idx[j]))
A[border + j - disabled_count] = ATOM.reference_charge;
else
disabled_count++;
continue;
}

double x = MOLECULE.electronegativity - kd->kappa * ATOM.y;

sum1 += ATOM.reference_charge * x;
sum2 += ATOM.reference_charge;
sum3 += x;
sum4 += ATOM.reference_charge * ATOM.reference_charge;

#undef ATOM
#undef MOLECULE
}

const int atoms_used = AT.atoms_count - disabled_count;
kd->parameters_beta[i] = (float) ((atoms_used * sum1 - sum2 * sum3) / (atoms_used * sum4 - sum2 * sum2));
kd->parameters_alpha[i] = (float) ((sum3 - kd->parameters_beta[i] * sum2) / atoms_used);
char trans = 'N';
int m = AT.atoms_count - disabled_count;
int n = 2;
int nrhs = 1;
int lda = m;
int ldb = m;
int info;

#ifdef USE_MKL
info = LAPACKE_dgels(LAPACK_COL_MAJOR, trans, m, n, nrhs, A, lda, b, ldb);
#else
int lwork = -1;
double work_opt;

/* Check for optimal value of lwork */
dgels_(&trans, &m, &n, &nrhs, A, &lda, b, &ldb, &work_opt, &lwork, &info);
lwork = (int) work_opt;
double work[lwork];
dgels_(&trans, &m, &n, &nrhs, A, &lda, b, &ldb, work, &lwork, &info);
#endif /* USE_MKL */

if(info)
EXIT_ERROR(RUN_ERROR, "%s", "The least squares method failed.\n");

kd->parameters_alpha[i] = (float) b[0];
kd->parameters_beta[i] = (float) b[1];
#undef AT

#ifdef USE_MKL
mkl_free(A);
mkl_free(b);
#else
free(A);
free(b);
#endif /* USE_MKL */
}
}
#endif /* USE_MKL */

0 comments on commit 7471dd1

Please sign in to comment.