Skip to content

Commit

Permalink
Fixed memset
Browse files Browse the repository at this point in the history
  • Loading branch information
scemama committed Dec 13, 2024
1 parent 11f89e1 commit 243e937
Showing 1 changed file with 8 additions and 77 deletions.
85 changes: 8 additions & 77 deletions org/qmckl_jastrow_champ.org
Original file line number Diff line number Diff line change
Expand Up @@ -11503,8 +11503,8 @@ qmckl_compute_jastrow_champ_factor_een_gl_hpc(const qmckl_context context,
#pragma omp parallel for
#endif
for (size_t nw = 0; nw < (size_t) walk_num; ++nw) {
bool touched = false;
double* const restrict factor_een_gl_0nw = &(factor_een_gl[elec_num*4*nw]);
memset(factor_een_gl_0nw, 0, elec_num*4*sizeof(double));
for (size_t n = 0; n < (size_t) dim_c_vector; ++n) {
const size_t l = lkpm_combined_index[n];
const size_t k = lkpm_combined_index[n+ dim_c_vector];
Expand Down Expand Up @@ -11561,11 +11561,10 @@ qmckl_compute_jastrow_champ_factor_een_gl_hpc(const qmckl_context context,

double tmp3[elec_num];

if (touched) {
#ifdef HAVE_OPENMP
#pragma omp simd
#endif
for (size_t j = 0; j < (size_t) elec_num; ++j) {
for (size_t j = 0; j < (size_t) elec_num; ++j) {

factor_een_gl_0nw[j] = factor_een_gl_0nw[j] + cn * (
dtmp_c_0amknw [j] * een_rescaled_n_amlnw[j] +
Expand All @@ -11576,12 +11575,12 @@ qmckl_compute_jastrow_champ_factor_een_gl_hpc(const qmckl_context context,
tmp3[j] =
dtmp_c_0amknw [j] * een_rescaled_n_gl_0amlnw[j] +
dtmp_c_0amlknw[j] * een_rescaled_n_gl_0amnw [j];
}
}

#ifdef HAVE_OPENMP
#pragma omp simd
#endif
for (size_t j = 0; j < (size_t) elec_num; ++j) {
for (size_t j = 0; j < (size_t) elec_num; ++j) {

factor_een_gl_1nw[j] = factor_een_gl_1nw[j] + cn * (
dtmp_c_1amknw [j] * een_rescaled_n_amlnw[j] +
Expand All @@ -11592,12 +11591,12 @@ qmckl_compute_jastrow_champ_factor_een_gl_hpc(const qmckl_context context,
tmp3[j] = tmp3[j] +
dtmp_c_1amknw [j] * een_rescaled_n_gl_1amlnw[j] +
dtmp_c_1amlknw[j] * een_rescaled_n_gl_1amnw [j];
}
}

#ifdef HAVE_OPENMP
#pragma omp simd
#endif
for (size_t j = 0; j < (size_t) elec_num; ++j) {
for (size_t j = 0; j < (size_t) elec_num; ++j) {

factor_een_gl_2nw[j] = factor_een_gl_2nw[j] + cn * (
dtmp_c_2amknw [j] * een_rescaled_n_amlnw[j] +
Expand All @@ -11608,12 +11607,12 @@ qmckl_compute_jastrow_champ_factor_een_gl_hpc(const qmckl_context context,
tmp3[j] = tmp3[j] +
dtmp_c_2amknw [j] * een_rescaled_n_gl_2amlnw[j] +
dtmp_c_2amlknw[j] * een_rescaled_n_gl_2amnw [j];
}
}

#ifdef HAVE_OPENMP
#pragma omp simd
#endif
for (size_t j = 0; j < (size_t) elec_num; ++j) {
for (size_t j = 0; j < (size_t) elec_num; ++j) {
factor_een_gl_3nw[j] = factor_een_gl_3nw[j] + cn * (
dtmp_c_3amknw [j] * een_rescaled_n_amlnw[j] +
dtmp_c_3amlknw[j] * een_rescaled_n_amnw [j] +
Expand All @@ -11622,76 +11621,8 @@ qmckl_compute_jastrow_champ_factor_een_gl_hpc(const qmckl_context context,
tmp3[j]*2.0);
}

} else {

touched = true;

#ifdef HAVE_OPENMP
#pragma omp simd
#endif
for (size_t j = 0; j < (size_t) elec_num; ++j) {

factor_een_gl_0nw[j] = cn * (
dtmp_c_0amknw [j] * een_rescaled_n_amlnw[j] +
dtmp_c_0amlknw[j] * een_rescaled_n_amnw [j] +
tmp_c_amkn [j] * een_rescaled_n_gl_0amlnw[j] +
tmp_c_amlkn[j] * een_rescaled_n_gl_0amnw [j]);

tmp3[j] =
dtmp_c_0amknw [j] * een_rescaled_n_gl_0amlnw[j] +
dtmp_c_0amlknw[j] * een_rescaled_n_gl_0amnw [j];
}

#ifdef HAVE_OPENMP
#pragma omp simd
#endif
for (size_t j = 0; j < (size_t) elec_num; ++j) {

factor_een_gl_1nw[j] = cn * (
dtmp_c_1amknw [j] * een_rescaled_n_amlnw[j] +
dtmp_c_1amlknw[j] * een_rescaled_n_amnw [j] +
tmp_c_amkn [j] * een_rescaled_n_gl_1amlnw[j] +
tmp_c_amlkn[j] * een_rescaled_n_gl_1amnw [j]);

tmp3[j] = tmp3[j] +
dtmp_c_1amknw [j] * een_rescaled_n_gl_1amlnw[j] +
dtmp_c_1amlknw[j] * een_rescaled_n_gl_1amnw [j];
}

#ifdef HAVE_OPENMP
#pragma omp simd
#endif
for (size_t j = 0; j < (size_t) elec_num; ++j) {

factor_een_gl_2nw[j] = cn * (
dtmp_c_2amknw [j] * een_rescaled_n_amlnw[j] +
dtmp_c_2amlknw[j] * een_rescaled_n_amnw [j] +
tmp_c_amkn [j] * een_rescaled_n_gl_2amlnw[j] +
tmp_c_amlkn[j] * een_rescaled_n_gl_2amnw [j]);

tmp3[j] = tmp3[j] +
dtmp_c_2amknw[j] * een_rescaled_n_gl_2amlnw[j] +
dtmp_c_2amlknw[j] * een_rescaled_n_gl_2amnw[j];
}

#ifdef HAVE_OPENMP
#pragma omp simd
#endif
for (size_t j = 0; j < (size_t) elec_num; ++j) {
factor_een_gl_3nw[j] = cn * (
dtmp_c_3amknw [j] * een_rescaled_n_amlnw[j] +
dtmp_c_3amlknw[j] * een_rescaled_n_amnw [j] +
tmp_c_amkn [j] * een_rescaled_n_gl_3amlnw[j] +
tmp_c_amlkn[j] * een_rescaled_n_gl_3amnw [j] +
tmp3[j]*2.0);
}

}
}
}
if (!touched) {
memset(factor_een_gl_0nw, 0, elec_num*4*sizeof(double));
}
}
return info;
}
Expand Down

0 comments on commit 243e937

Please sign in to comment.