From 2402b064b2b07b4c440b154cc2ed5352fda62c80 Mon Sep 17 00:00:00 2001 From: Anthony Scemama Date: Fri, 13 Dec 2024 17:46:06 +0100 Subject: [PATCH] Fixed HPC Jastrow --- org/qmckl_jastrow_champ.org | 353 +++++++++++++++++++----------------- 1 file changed, 182 insertions(+), 171 deletions(-) diff --git a/org/qmckl_jastrow_champ.org b/org/qmckl_jastrow_champ.org index b86344a5..6cd7531f 100644 --- a/org/qmckl_jastrow_champ.org +++ b/org/qmckl_jastrow_champ.org @@ -10436,20 +10436,6 @@ qmckl_exit_code qmckl_provide_jastrow_champ_factor_een(qmckl_context context) ctx->jastrow_champ.factor_een = factor_een; } - /* - rc = qmckl_compute_jastrow_champ_factor_een_naive(context, - ctx->electron.walker.num, - ctx->electron.num, - ctx->nucleus.num, - ctx->jastrow_champ.cord_num, - ctx->jastrow_champ.dim_c_vector, - ctx->jastrow_champ.c_vector_full, - ctx->jastrow_champ.lkpm_combined_index, - ctx->jastrow_champ.een_rescaled_e, - ctx->jastrow_champ.een_rescaled_n, - ctx->jastrow_champ.factor_een); - */ - rc = qmckl_compute_jastrow_champ_factor_een(context, ctx->electron.walker.num, ctx->electron.num, @@ -10522,44 +10508,16 @@ integer function qmckl_compute_jastrow_champ_factor_een_naive_f( & if (cord_num < 0) info = QMCKL_INVALID_ARG_5 if (info /= QMCKL_SUCCESS) return - -! do nw =1, walk_num -! factor_een(nw) = 0.0d0 -! do n = 1, dim_c_vector -! l = lkpm_combined_index(n, 1) -! k = lkpm_combined_index(n, 2) -! p = lkpm_combined_index(n, 3) -! m = lkpm_combined_index(n, 4) - -! do a = 1, nucl_num -! accu2 = 0.0d0 -! cn = c_vector_full(a, n) -! do j = 1, elec_num -! accu = 0.0d0 -! do i = 1, elec_num - -! accu = accu + een_rescaled_e(i,j,k,nw) * & -! een_rescaled_n(i,a,m,nw) -! end do -! accu2 = accu2 + accu * een_rescaled_n(j,a,m + l,nw) -! end do -! factor_een(nw) = factor_een(nw) + accu2 * cn -! end do -! end do -! end do - do nw =1, walk_num factor_een(nw) = 0.d0 do n = 1, dim_c_vector l = lkpm_combined_index(n, 1) k = lkpm_combined_index(n, 2) - p = lkpm_combined_index(n, 3) m = lkpm_combined_index(n, 4) do a = 1, nucl_num accu2 = 0.0d0 cn = c_vector_full(a, n) - print *, a, l, k, p, cn do j = 1, elec_num accu = 0.0d0 do i = 1, j-1 @@ -10683,7 +10641,7 @@ integer function qmckl_compute_jastrow_champ_factor_een_doc_f( & double precision , intent(in) :: een_rescaled_n(elec_num, nucl_num, 0:cord_num, walk_num) double precision , intent(out) :: factor_een(walk_num) - integer*8 :: i, a, j, l, k, p, m, n, nw + integer*8 :: i, a, j, l, k, m, n, nw double precision :: accu, accu2, cn info = QMCKL_SUCCESS @@ -10703,7 +10661,6 @@ integer function qmckl_compute_jastrow_champ_factor_een_doc_f( & do n = 1, dim_c_vector l = lkpm_combined_index(n, 1) k = lkpm_combined_index(n, 2) - p = lkpm_combined_index(n, 3) m = lkpm_combined_index(n, 4) do a = 1, nucl_num @@ -10872,6 +10829,32 @@ double factor_een[walk_num]; rc = qmckl_get_jastrow_champ_factor_een(context, &(factor_een[0]),walk_num); assert(fabs(factor_een[0] + 0.382580260174321) < 1e-12); + +{ + double factor_een_naive[walk_num]; + qmckl_context_struct* const ctx = (qmckl_context_struct*) context; + assert (ctx != NULL); + + rc = qmckl_compute_jastrow_champ_factor_een_naive(context, + ctx->electron.walker.num, + ctx->electron.num, + ctx->nucleus.num, + ctx->jastrow_champ.cord_num, + ctx->jastrow_champ.dim_c_vector, + ctx->jastrow_champ.c_vector_full, + ctx->jastrow_champ.lkpm_combined_index, + ctx->jastrow_champ.een_rescaled_e, + ctx->jastrow_champ.een_rescaled_n, + factor_een_naive); + + for (int64_t i = 0; i < walk_num; i++) { + if (fabs(factor_een[i] - factor_een_naive[i]) > 1e-12) { + printf("factor_een[%ld] = %e\n", i, factor_een[i]); + printf("factor_een_naive[%ld] = %e\n", i, factor_een_naive[i]); + } + assert(fabs(factor_een[i] - factor_een_naive[i]) < 1e-12); + } +} #+end_src *** Electron-electron-nucleus Jastrow $f_{een}$ derivative @@ -11030,22 +11013,6 @@ qmckl_exit_code qmckl_provide_jastrow_champ_factor_een_gl(qmckl_context context) ctx->jastrow_champ.factor_een_gl = factor_een_gl; } - /* - rc = qmckl_compute_jastrow_champ_factor_een_gl_naive (context, - ctx->electron.walker.num, - ctx->electron.num, - ctx->nucleus.num, - ctx->jastrow_champ.cord_num, - ctx->jastrow_champ.dim_c_vector, - ctx->jastrow_champ.c_vector_full, - ctx->jastrow_champ.lkpm_combined_index, - ctx->jastrow_champ.een_rescaled_e, - ctx->jastrow_champ.een_rescaled_n, - ctx->jastrow_champ.een_rescaled_e_gl, - ctx->jastrow_champ.een_rescaled_n_gl, - ctx->jastrow_champ.factor_een_gl); - ,*/ - rc = qmckl_compute_jastrow_champ_factor_een_gl(context, ctx->electron.walker.num, ctx->electron.num, @@ -11113,7 +11080,7 @@ integer function qmckl_compute_jastrow_champ_factor_een_gl_naive_f( & double precision , intent(in) :: een_rescaled_n_gl(elec_num, 4, nucl_num, 0:cord_num, walk_num) double precision , intent(out) :: factor_een_gl(elec_num, 4, walk_num) - integer*8 :: i, a, j, l, k, p, m, n, nw + integer*8 :: i, a, j, l, k, m, n, nw double precision :: accu, accu2, cn double precision :: daccu(1:4), daccu2(1:4) @@ -11132,7 +11099,6 @@ integer function qmckl_compute_jastrow_champ_factor_een_gl_naive_f( & do n = 1, dim_c_vector l = lkpm_combined_index(n, 1) k = lkpm_combined_index(n, 2) - p = lkpm_combined_index(n, 3) m = lkpm_combined_index(n, 4) do a = 1, nucl_num @@ -11303,11 +11269,13 @@ integer function qmckl_compute_jastrow_champ_factor_een_gl_doc_f( & if (cord_num < 0) info = QMCKL_INVALID_ARG_5 if (info /= QMCKL_SUCCESS) return - factor_een_gl = 0.0d0 - - if (cord_num == 0) return + if (cord_num == 0) then + factor_een_gl = 0.0d0 + return + end if do nw =1, walk_num + factor_een_gl(:,:,nw) = 0.0d0 do n = 1, dim_c_vector l = lkpm_combined_index(n, 1) k = lkpm_combined_index(n, 2) @@ -11319,24 +11287,24 @@ integer function qmckl_compute_jastrow_champ_factor_een_gl_doc_f( & do ii = 1, 4 do j = 1, elec_num - factor_een_gl(j,ii,nw) = factor_een_gl(j,ii,nw) + ( & - tmp_c(j,a,m,k,nw) * een_rescaled_n_gl(j,ii,a,m+l,nw) + & - (dtmp_c(j,ii,a,m,k,nw)) * een_rescaled_n(j,a,m+l,nw) + & - (dtmp_c(j,ii,a,m+l,k,nw)) * een_rescaled_n(j,a,m ,nw) + & - tmp_c(j,a,m+l,k,nw) * een_rescaled_n_gl(j,ii,a,m,nw) & + factor_een_gl(j,ii,nw) = factor_een_gl(j,ii,nw) + ( & + tmp_c (j, a,m ,k,nw) * een_rescaled_n_gl(j,ii,a,m+l,nw) + & + tmp_c (j, a,m+l,k,nw) * een_rescaled_n_gl(j,ii,a,m ,nw) + & + dtmp_c(j,ii,a,m ,k,nw) * een_rescaled_n (j, a,m+l,nw) + & + dtmp_c(j,ii,a,m+l,k,nw) * een_rescaled_n (j, a,m ,nw) & ) * cn end do end do cn = cn + cn do j = 1, elec_num - factor_een_gl(j,4,nw) = factor_een_gl(j,4,nw) + ( & - (dtmp_c(j,1,a,m ,k,nw)) * een_rescaled_n_gl(j,1,a,m+l,nw) + & - (dtmp_c(j,2,a,m ,k,nw)) * een_rescaled_n_gl(j,2,a,m+l,nw) + & - (dtmp_c(j,3,a,m ,k,nw)) * een_rescaled_n_gl(j,3,a,m+l,nw) + & - (dtmp_c(j,1,a,m+l,k,nw)) * een_rescaled_n_gl(j,1,a,m ,nw) + & - (dtmp_c(j,2,a,m+l,k,nw)) * een_rescaled_n_gl(j,2,a,m ,nw) + & - (dtmp_c(j,3,a,m+l,k,nw)) * een_rescaled_n_gl(j,3,a,m ,nw) & + factor_een_gl(j,4,nw) = factor_een_gl(j,4,nw) + ( & + dtmp_c(j,1,a,m ,k,nw) * een_rescaled_n_gl(j,1,a,m+l,nw) + & + dtmp_c(j,2,a,m ,k,nw) * een_rescaled_n_gl(j,2,a,m+l,nw) + & + dtmp_c(j,3,a,m ,k,nw) * een_rescaled_n_gl(j,3,a,m+l,nw) + & + dtmp_c(j,1,a,m+l,k,nw) * een_rescaled_n_gl(j,1,a,m ,nw) + & + dtmp_c(j,2,a,m+l,k,nw) * een_rescaled_n_gl(j,2,a,m ,nw) + & + dtmp_c(j,3,a,m+l,k,nw) * een_rescaled_n_gl(j,3,a,m ,nw) & ) * cn end do end do @@ -11528,8 +11496,8 @@ qmckl_compute_jastrow_champ_factor_een_gl_hpc(const qmckl_context context, return QMCKL_SUCCESS; } - const size_t elec_num2 = elec_num << 1; - const size_t elec_num3 = elec_num * 3; + const size_t elec_num2 = elec_num + elec_num; + const size_t elec_num3 = elec_num2 + elec_num; #ifdef HAVE_OPENMP #pragma omp parallel for @@ -11544,34 +11512,35 @@ qmckl_compute_jastrow_champ_factor_een_gl_hpc(const qmckl_context context, const size_t en = elec_num*nucl_num; const size_t len = l*en; - const size_t len4 = len << 2; - const size_t cn = cord_num*nw; + const size_t len4 = len*4; const size_t c1 = cord_num+1; + const size_t cn = cord_num*nw; const size_t addr0 = en*(m+c1*(k+cn)); - const size_t addr1 = en*(m+cn); + const size_t addr1 = en*(m+c1*nw); const double* restrict tmp_c_mkn = &(tmp_c[addr0]); const double* restrict tmp_c_mlkn = tmp_c_mkn + len; const double* restrict een_rescaled_n_mnw = &(een_rescaled_n[addr1]); const double* restrict een_rescaled_n_mlnw = een_rescaled_n_mnw + len; - const double* restrict dtmp_c_mknw = &(dtmp_c[addr0 << 2]); + const double* restrict dtmp_c_mknw = &(dtmp_c[addr0*4]); const double* restrict dtmp_c_mlknw = dtmp_c_mknw + len4; - const double* restrict een_rescaled_n_gl_mnw = &(een_rescaled_n_gl[addr1 << 2]); + const double* restrict een_rescaled_n_gl_mnw = &(een_rescaled_n_gl[addr1*4]); // ? const double* restrict een_rescaled_n_gl_mlnw = een_rescaled_n_gl_mnw + len4; + for (size_t a = 0; a < (size_t) nucl_num; a++) { double cn = c_vector_full[a+n*nucl_num]; if (cn == 0.0) continue; const size_t ishift = elec_num*a; - const size_t ishift4 = ishift << 2; + const size_t ishift4 = ishift*4; + const double* restrict tmp_c_amkn = tmp_c_mkn + ishift; const double* restrict tmp_c_amlkn = tmp_c_mlkn + ishift; - const double* restrict tmp_c_amkn = tmp_c_mkn + ishift; - const double* restrict een_rescaled_n_amnw = een_rescaled_n_mnw + ishift; + const double* restrict een_rescaled_n_amnw = een_rescaled_n_mnw + ishift; const double* restrict een_rescaled_n_amlnw = een_rescaled_n_mlnw + ishift; - const double* restrict dtmp_c_0amknw = dtmp_c_mknw + ishift4; + const double* restrict dtmp_c_0amknw = dtmp_c_mknw + ishift4; const double* restrict dtmp_c_0amlknw = dtmp_c_mlknw + ishift4; - const double* restrict een_rescaled_n_gl_0amnw = een_rescaled_n_gl_mnw + ishift4; + const double* restrict een_rescaled_n_gl_0amnw = een_rescaled_n_gl_mnw + ishift4; const double* restrict een_rescaled_n_gl_0amlnw = een_rescaled_n_gl_mlnw + ishift4; const double* restrict dtmp_c_1amknw = dtmp_c_0amknw + elec_num; @@ -11597,53 +11566,59 @@ qmckl_compute_jastrow_champ_factor_een_gl_hpc(const qmckl_context context, #pragma omp simd #endif for (size_t j = 0; j < (size_t) elec_num; ++j) { - factor_een_gl_0nw[j] = factor_een_gl_0nw[j] + cn * - (tmp_c_amkn[j] * een_rescaled_n_gl_0amlnw[j] + - dtmp_c_0amknw[j] * een_rescaled_n_amlnw[j] + - dtmp_c_0amlknw[j] * een_rescaled_n_amnw[j] + - tmp_c_amlkn[j] * een_rescaled_n_gl_0amnw[j]); + + factor_een_gl_0nw[j] = factor_een_gl_0nw[j] + cn * ( + dtmp_c_0amknw [j] * een_rescaled_n_amlnw[j] + + dtmp_c_0amlknw[j] * een_rescaled_n_amnw [j] + + tmp_c_amkn [j] * een_rescaled_n_gl_0amlnw[j] + + tmp_c_amlkn[j] * een_rescaled_n_gl_0amnw [j] ); + tmp3[j] = - dtmp_c_0amknw[j] * een_rescaled_n_gl_0amlnw[j] + - dtmp_c_0amlknw[j] * een_rescaled_n_gl_0amnw[j]; + dtmp_c_0amknw [j] * een_rescaled_n_gl_0amlnw[j] + + dtmp_c_0amlknw[j] * een_rescaled_n_gl_0amnw [j]; } #ifdef HAVE_OPENMP #pragma omp simd #endif for (size_t j = 0; j < (size_t) elec_num; ++j) { - factor_een_gl_1nw[j] = factor_een_gl_1nw[j] + cn * - (tmp_c_amkn[j] * een_rescaled_n_gl_1amlnw[j] + - dtmp_c_1amknw[j] * een_rescaled_n_amlnw[j] + - dtmp_c_1amlknw[j] * een_rescaled_n_amnw[j] + - tmp_c_amlkn[j] * een_rescaled_n_gl_1amnw[j]); + + factor_een_gl_1nw[j] = factor_een_gl_1nw[j] + cn * ( + dtmp_c_1amknw [j] * een_rescaled_n_amlnw[j] + + dtmp_c_1amlknw[j] * een_rescaled_n_amnw [j] + + tmp_c_amkn [j] * een_rescaled_n_gl_1amlnw[j] + + tmp_c_amlkn[j] * een_rescaled_n_gl_1amnw [j]); + tmp3[j] = tmp3[j] + - dtmp_c_1amknw[j] * een_rescaled_n_gl_1amlnw[j] + - dtmp_c_1amlknw[j] * een_rescaled_n_gl_1amnw[j]; + dtmp_c_1amknw [j] * een_rescaled_n_gl_1amlnw[j] + + dtmp_c_1amlknw[j] * een_rescaled_n_gl_1amnw [j]; } #ifdef HAVE_OPENMP #pragma omp simd #endif for (size_t j = 0; j < (size_t) elec_num; ++j) { - factor_een_gl_2nw[j] = factor_een_gl_2nw[j] + cn * - (tmp_c_amkn[j] * een_rescaled_n_gl_2amlnw[j] + - dtmp_c_2amknw[j] * een_rescaled_n_amlnw[j] + - dtmp_c_2amlknw[j] * een_rescaled_n_amnw[j] + - tmp_c_amlkn[j] * een_rescaled_n_gl_2amnw[j]); + + factor_een_gl_2nw[j] = factor_een_gl_2nw[j] + cn * ( + dtmp_c_2amknw [j] * een_rescaled_n_amlnw[j] + + dtmp_c_2amlknw[j] * een_rescaled_n_amnw [j] + + tmp_c_amkn [j] * een_rescaled_n_gl_2amlnw[j] + + tmp_c_amlkn[j] * een_rescaled_n_gl_2amnw [j]); + tmp3[j] = tmp3[j] + - dtmp_c_2amknw[j] * een_rescaled_n_gl_2amlnw[j] + - dtmp_c_2amlknw[j] * een_rescaled_n_gl_2amnw[j]; + dtmp_c_2amknw [j] * een_rescaled_n_gl_2amlnw[j] + + dtmp_c_2amlknw[j] * een_rescaled_n_gl_2amnw [j]; } #ifdef HAVE_OPENMP #pragma omp simd #endif for (size_t j = 0; j < (size_t) elec_num; ++j) { - factor_een_gl_3nw[j] = factor_een_gl_3nw[j] + cn * - (tmp_c_amkn[j] * een_rescaled_n_gl_3amlnw[j] + - dtmp_c_3amknw[j] * een_rescaled_n_amlnw[j] + - dtmp_c_3amlknw[j] * een_rescaled_n_amnw[j] + - tmp_c_amlkn[j] * een_rescaled_n_gl_3amnw[j] + + factor_een_gl_3nw[j] = factor_een_gl_3nw[j] + cn * ( + dtmp_c_3amknw [j] * een_rescaled_n_amlnw[j] + + dtmp_c_3amlknw[j] * een_rescaled_n_amnw [j] + + tmp_c_amkn [j] * een_rescaled_n_gl_3amlnw[j] + + tmp_c_amlkn[j] * een_rescaled_n_gl_3amnw [j] + tmp3[j]*2.0); } @@ -11655,39 +11630,45 @@ qmckl_compute_jastrow_champ_factor_een_gl_hpc(const qmckl_context context, #pragma omp simd #endif for (size_t j = 0; j < (size_t) elec_num; ++j) { - factor_een_gl_0nw[j] = cn * - (tmp_c_amkn[j] * een_rescaled_n_gl_0amlnw[j] + - dtmp_c_0amknw[j] * een_rescaled_n_amlnw[j] + - dtmp_c_0amlknw[j] * een_rescaled_n_amnw[j] + - tmp_c_amlkn[j] * een_rescaled_n_gl_0amnw[j]); + + factor_een_gl_0nw[j] = cn * ( + dtmp_c_0amknw [j] * een_rescaled_n_amlnw[j] + + dtmp_c_0amlknw[j] * een_rescaled_n_amnw [j] + + tmp_c_amkn [j] * een_rescaled_n_gl_0amlnw[j] + + tmp_c_amlkn[j] * een_rescaled_n_gl_0amnw [j]); + tmp3[j] = - dtmp_c_0amknw[j] * een_rescaled_n_gl_0amlnw[j] + - dtmp_c_0amlknw[j] * een_rescaled_n_gl_0amnw[j]; + dtmp_c_0amknw [j] * een_rescaled_n_gl_0amlnw[j] + + dtmp_c_0amlknw[j] * een_rescaled_n_gl_0amnw [j]; } #ifdef HAVE_OPENMP #pragma omp simd #endif for (size_t j = 0; j < (size_t) elec_num; ++j) { - factor_een_gl_1nw[j] = cn * - (tmp_c_amkn[j] * een_rescaled_n_gl_1amlnw[j] + - dtmp_c_1amknw[j] * een_rescaled_n_amlnw[j] + - dtmp_c_1amlknw[j] * een_rescaled_n_amnw[j] + - tmp_c_amlkn[j] * een_rescaled_n_gl_1amnw[j]); + + factor_een_gl_1nw[j] = cn * ( + dtmp_c_1amknw [j] * een_rescaled_n_amlnw[j] + + dtmp_c_1amlknw[j] * een_rescaled_n_amnw [j] + + tmp_c_amkn [j] * een_rescaled_n_gl_1amlnw[j] + + tmp_c_amlkn[j] * een_rescaled_n_gl_1amnw [j]); + tmp3[j] = tmp3[j] + - dtmp_c_1amknw[j] * een_rescaled_n_gl_1amlnw[j] + - dtmp_c_1amlknw[j] * een_rescaled_n_gl_1amnw[j]; + dtmp_c_1amknw [j] * een_rescaled_n_gl_1amlnw[j] + + dtmp_c_1amlknw[j] * een_rescaled_n_gl_1amnw [j]; } #ifdef HAVE_OPENMP #pragma omp simd #endif for (size_t j = 0; j < (size_t) elec_num; ++j) { - factor_een_gl_2nw[j] = cn * - (tmp_c_amkn[j] * een_rescaled_n_gl_2amlnw[j] + - dtmp_c_2amknw[j] * een_rescaled_n_amlnw[j] + - dtmp_c_2amlknw[j] * een_rescaled_n_amnw[j] + - tmp_c_amlkn[j] * een_rescaled_n_gl_2amnw[j]); + + factor_een_gl_2nw[j] = cn * ( + dtmp_c_2amknw [j] * een_rescaled_n_amlnw[j] + + dtmp_c_2amlknw[j] * een_rescaled_n_amnw [j] + + tmp_c_amkn [j] * een_rescaled_n_gl_2amlnw[j] + + tmp_c_amlkn[j] * een_rescaled_n_gl_2amnw [j]); + tmp3[j] = tmp3[j] + dtmp_c_2amknw[j] * een_rescaled_n_gl_2amlnw[j] + dtmp_c_2amlknw[j] * een_rescaled_n_gl_2amnw[j]; @@ -11697,11 +11678,11 @@ qmckl_compute_jastrow_champ_factor_een_gl_hpc(const qmckl_context context, #pragma omp simd #endif for (size_t j = 0; j < (size_t) elec_num; ++j) { - factor_een_gl_3nw[j] = cn * - (tmp_c_amkn[j] * een_rescaled_n_gl_3amlnw[j] + - dtmp_c_3amknw[j] * een_rescaled_n_amlnw[j] + - dtmp_c_3amlknw[j] * een_rescaled_n_amnw[j] + - tmp_c_amlkn[j] * een_rescaled_n_gl_3amnw[j] + + factor_een_gl_3nw[j] = cn * ( + dtmp_c_3amknw [j] * een_rescaled_n_amlnw[j] + + dtmp_c_3amlknw[j] * een_rescaled_n_amnw [j] + + tmp_c_amkn [j] * een_rescaled_n_gl_3amlnw[j] + + tmp_c_amlkn[j] * een_rescaled_n_gl_3amnw [j] + tmp3[j]*2.0); } @@ -11797,25 +11778,8 @@ TODO printf("factor_een_gl_hpc\n"); qmckl_context_struct* const ctx = (qmckl_context_struct*) context; - assert (ctx != NULL); - - double factor_een_gl_doc[walk_num*4*elec_num]; - memset(&(factor_een_gl_doc[0]), 0, walk_num*4*elec_num*sizeof(double)); - rc = qmckl_compute_jastrow_champ_factor_een_gl_doc(context, - ctx->electron.walker.num, - ctx->electron.num, - ctx->nucleus.num, - ctx->jastrow_champ.cord_num, - ctx->jastrow_champ.dim_c_vector, - ctx->jastrow_champ.c_vector_full, - ctx->jastrow_champ.lkpm_combined_index, - ctx->jastrow_champ.tmp_c, - ctx->jastrow_champ.dtmp_c, - ctx->jastrow_champ.een_rescaled_n, - ctx->jastrow_champ.een_rescaled_n_gl, - factor_een_gl_doc); - assert(rc == QMCKL_SUCCESS); + assert (ctx != NULL); assert(walk_num == 2); assert(elec_num == 10); assert(nucl_num == 2); @@ -11828,21 +11792,68 @@ TODO assert(ctx->jastrow_champ.cord_num == cord_num); assert(ctx->jastrow_champ.dim_c_vector == dim_c_vector); + double factor_een_gl_naive[walk_num*4*elec_num]; + memset(&(factor_een_gl_naive[0]), 0, sizeof(factor_een_gl_naive)); + + rc = qmckl_compute_jastrow_champ_factor_een_gl_naive(context, + ctx->electron.walker.num, + ctx->electron.num, + ctx->nucleus.num, + ctx->jastrow_champ.cord_num, + ctx->jastrow_champ.dim_c_vector, + ctx->jastrow_champ.c_vector_full, + ctx->jastrow_champ.lkpm_combined_index, + ctx->jastrow_champ.een_rescaled_e, + ctx->jastrow_champ.een_rescaled_n, + ctx->jastrow_champ.een_rescaled_e_gl, + ctx->jastrow_champ.een_rescaled_n_gl, + factor_een_gl_naive); + assert(rc == QMCKL_SUCCESS); + + double factor_een_gl_doc[walk_num*4*elec_num]; + memset(&(factor_een_gl_doc[0]), 0, sizeof(factor_een_gl_doc)); + + rc = qmckl_compute_jastrow_champ_factor_een_gl_doc(context, + ctx->electron.walker.num, + ctx->electron.num, + ctx->nucleus.num, + ctx->jastrow_champ.cord_num, + ctx->jastrow_champ.dim_c_vector, + ctx->jastrow_champ.c_vector_full, + ctx->jastrow_champ.lkpm_combined_index, + ctx->jastrow_champ.tmp_c, + ctx->jastrow_champ.dtmp_c, + ctx->jastrow_champ.een_rescaled_n, + ctx->jastrow_champ.een_rescaled_n_gl, + factor_een_gl_doc); + assert(rc == QMCKL_SUCCESS); + + for (int64_t i = 0; i < walk_num*4*elec_num; ++i) { + if (fabs(factor_een_gl_naive[i] - factor_een_gl_doc[i]) > 1e-12) { + printf("i = %ld\n", i); + printf("factor_een_gl_naive = %20.15e\n", factor_een_gl_naive[i]); + printf("factor_een_gl_doc = %20.15e\n", factor_een_gl_doc[i]); + fflush(stdout); + } + assert(fabs(factor_een_gl_naive[i] - factor_een_gl_doc[i]) < 1e-8); + } + double factor_een_gl_hpc[walk_num*4*elec_num]; - memset(&(factor_een_gl_hpc[0]), 0, walk_num*4*elec_num*sizeof(double)); - rc = qmckl_compute_jastrow_champ_factor_een_gl(context, - ctx->electron.walker.num, - ctx->electron.num, - ctx->nucleus.num, - ctx->jastrow_champ.cord_num, - ctx->jastrow_champ.dim_c_vector, - ctx->jastrow_champ.c_vector_full, - ctx->jastrow_champ.lkpm_combined_index, - ctx->jastrow_champ.tmp_c, - ctx->jastrow_champ.dtmp_c, - ctx->jastrow_champ.een_rescaled_n, - ctx->jastrow_champ.een_rescaled_n_gl, - factor_een_gl_hpc); + memset(&(factor_een_gl_hpc[0]), 0, sizeof(factor_een_gl_hpc)); + + rc = qmckl_compute_jastrow_champ_factor_een_gl_hpc(context, + ctx->electron.walker.num, + ctx->electron.num, + ctx->nucleus.num, + ctx->jastrow_champ.cord_num, + ctx->jastrow_champ.dim_c_vector, + ctx->jastrow_champ.c_vector_full, + ctx->jastrow_champ.lkpm_combined_index, + ctx->jastrow_champ.tmp_c, + ctx->jastrow_champ.dtmp_c, + ctx->jastrow_champ.een_rescaled_n, + ctx->jastrow_champ.een_rescaled_n_gl, + factor_een_gl_hpc); for (int64_t i = 0; i < walk_num*4*elec_num; ++i) { if (fabs(factor_een_gl_doc[i] - factor_een_gl_hpc[i]) > 1e-12) {