From e9cfaedadafa547ad46e70a85c41e4bae67de0f2 Mon Sep 17 00:00:00 2001 From: Kota Ido <13773226+k-ido@users.noreply.github.com> Date: Wed, 1 May 2024 10:02:59 +0900 Subject: [PATCH 1/3] Update README.md update reference --- README.md | 19 +++++++++++-------- 1 file changed, 11 insertions(+), 8 deletions(-) diff --git a/README.md b/README.md index 3ae5fa7d..b0471fc5 100644 --- a/README.md +++ b/README.md @@ -31,7 +31,7 @@ The distribution of the program package and the source codes for HPhi follow GNU [“Quantum lattice model solver HΦ”, M. Kawamura, K. Yoshimi, T. Misawa, Y. Yamaji, S. Todo, and N. Kawashima, Computer Physics Communications 217, 180 (2017).](https://doi.org/10.1016/j.cpc.2017.04.006) -[“Update of HΦ: Newly added functions and methods in versions 2 and 3”, K. Ido, M. Kawamura, Y. Motoyama, K. Yoshimi, Y. Yamaji, S. Todo, N. Kawashima, and T. Misawa, arXiv:2307.13222.](https://arxiv.org/abs/2307.13222) +[“Update of HΦ: Newly added functions and methods in versions 2 and 3”, K. Ido, M. Kawamura, Y. Motoyama, K. Yoshimi, Y. Yamaji, S. Todo, N. Kawashima, and T. Misawa, Comput. Phys. Commun. 298, 109093 (2024).](https://doi.org/10.1016/j.cpc.2024.109093) Bibtex: @@ -47,13 +47,16 @@ url = {https://www.sciencedirect.com/science/article/pii/S0010465517301200}, author = {Mitsuaki Kawamura and Kazuyoshi Yoshimi and Takahiro Misawa and Youhei Yamaji and Synge Todo and Naoki Kawashima} } -@misc{ido2023update, - title={Update of $\mathcal{H}\Phi$: Newly added functions and methods in versions 2 and 3}, - author={Kota Ido and Mitsuaki Kawamura and Yuichi Motoyama and Kazuyoshi Yoshimi and Youhei Yamaji and Synge Todo and Naoki Kawashima and Takahiro Misawa}, - year={2023}, - eprint={2307.13222}, - archivePrefix={arXiv}, - primaryClass={cond-mat.str-el} +@article{ido2024update, + title = {Update of $\mathcal{H}\Phi$: Newly added functions and methods in versions 2 and 3}, + author = {Kota Ido and Mitsuaki Kawamura and Yuichi Motoyama and Kazuyoshi Yoshimi and Youhei Yamaji and Synge Todo and Naoki Kawashima and Takahiro Misawa}, + journal = {Computer Physics Communications}, + volume = {298}, + pages = {109093}, + year = {2024}, + issn = {0010-4655}, + doi = {https://doi.org/10.1016/j.cpc.2024.109093}, + url = {https://www.sciencedirect.com/science/article/pii/S001046552400016X} } From fd6edfa06a8e85e2c741899842efc31040ab43b9 Mon Sep 17 00:00:00 2001 From: Mitsuaki Kawamura Date: Fri, 29 Nov 2024 01:04:22 +0900 Subject: [PATCH 2/3] For exitation vector and correlation function of N/spin conserved case, hermite counterpart should not computed because it causes out-of memory. --- src/mltplyMPIHubbard.c | 18 +++++++++--------- src/mltplyMPIHubbardCore.c | 15 +++++++++++---- src/mltplyMPISpin.c | 16 ++++++++-------- 3 files changed, 28 insertions(+), 21 deletions(-) diff --git a/src/mltplyMPIHubbard.c b/src/mltplyMPIHubbard.c index 80a556a9..7bea0200 100644 --- a/src/mltplyMPIHubbard.c +++ b/src/mltplyMPIHubbard.c @@ -143,7 +143,7 @@ double complex child_CisAjt_MPIdouble( long unsigned int *list_2_2_target//!<[in] ) { #ifdef MPI - int mask1, mask2, state1, state2, ierr, origin, bitdiff, Fsgn; + int mask1, mask2, state1, state2, ierr, origin, bitdiff, Fsgn, only_send = 0; unsigned long int idim_max_buf, j, ioff; MPI_Status statusMPI; double complex trans, dmv; @@ -164,9 +164,7 @@ double complex child_CisAjt_MPIdouble( }/*if (state1 == 0 && state2 == mask2)*/ else if (state1 == mask1 && state2 == 0) { trans = -(double) Fsgn * conj(tmp_trans); - if (X->Large.mode == M_CORR|| X->Large.mode == M_CALCSPEC) { - trans = 0; - } + if (X->Large.mode == M_CORR || X->Large.mode == M_CALCSPEC) only_send = 1; }/*if (state1 == mask1 && state2 == 0)*/ else return 0; @@ -184,6 +182,8 @@ double complex child_CisAjt_MPIdouble( v1buf, idim_max_buf + 1, MPI_DOUBLE_COMPLEX, origin, 0, MPI_COMM_WORLD, &statusMPI); if (ierr != 0) exitMPI(-1); + + if (only_send == 1)return 0; if (X->Large.mode == M_MLTPLY|| X->Large.mode == M_CALCSPEC) { #pragma omp parallel for default(none) private(j, dmv, ioff) \ @@ -363,7 +363,7 @@ double complex child_general_hopp_MPIdouble( double complex *tmp_v1//!<[in] v0 = H v1 ) { #ifdef MPI - int mask1, mask2, state1, state2, ierr, origin, bitdiff, Fsgn; + int mask1, mask2, state1, state2, ierr, origin, bitdiff, Fsgn, only_send = 0; unsigned long int idim_max_buf, j, ioff; MPI_Status statusMPI; double complex trans, dmv, dam_pr; @@ -385,7 +385,7 @@ double complex child_general_hopp_MPIdouble( } else if (state1 == mask1 && state2 == 0) { trans = -(double) Fsgn * conj(tmp_trans); - if (X->Large.mode == M_CORR|| X->Large.mode == M_CALCSPEC) trans = 0; + if (X->Large.mode == M_CORR|| X->Large.mode == M_CALCSPEC) only_send = 1; } else return 0; @@ -402,6 +402,8 @@ double complex child_general_hopp_MPIdouble( MPI_COMM_WORLD, &statusMPI); if (ierr != 0) exitMPI(-1); + if (only_send == 1)return 0; + dam_pr = 0.0; #pragma omp parallel default(none) reduction(+:dam_pr) private(j, dmv, Fsgn, ioff) \ firstprivate(idim_max_buf, trans, X) shared(list_2_1, list_2_2, list_1buf, v1buf, tmp_v1, tmp_v0) @@ -506,9 +508,7 @@ double complex child_general_hopp_MPIsingle( else if (state2 == 0) { state1check = mask1; trans = -(double) Fsgn * conj(tmp_trans); - if (X->Large.mode == M_CORR|| X->Large.mode == M_CALCSPEC) { - trans = 0; - } + if (X->Large.mode == M_CORR || X->Large.mode == M_CALCSPEC) return 0; } else return 0; diff --git a/src/mltplyMPIHubbardCore.c b/src/mltplyMPIHubbardCore.c index d45e8a93..41ea3a7e 100644 --- a/src/mltplyMPIHubbardCore.c +++ b/src/mltplyMPIHubbardCore.c @@ -922,7 +922,7 @@ double complex child_CisAjtCkuAlv_Hubbard_MPI( double complex dam_pr = 0.0; unsigned long int i_max = X->Check.idim_max; unsigned long int idim_max_buf; - int iCheck, ierr, Fsgn; + int iCheck, ierr, Fsgn, only_send = 0; unsigned long int isite1, isite2, isite3, isite4; unsigned long int tmp_isite1, tmp_isite2, tmp_isite3, tmp_isite4; unsigned long int j, Adiff, Bdiff; @@ -959,7 +959,7 @@ double complex child_CisAjtCkuAlv_Hubbard_MPI( tmp_isite2 = X->Def.OrgTpow[2 * org_isite3 + org_ispin3]; tmp_isite1 = X->Def.OrgTpow[2 * org_isite4 + org_ispin4]; iFlgHermite = TRUE; - if (X->Large.mode == M_CORR || X->Large.mode == M_CALCSPEC) tmp_V = 0; + if (X->Large.mode == M_CORR || X->Large.mode == M_CALCSPEC) only_send = 1; }/*if (iCheck == TRUE)*/ else return 0.0; }/*if (iCheck == FALSE)*/ @@ -1019,6 +1019,9 @@ firstprivate(i_max, tmp_V, X, isite1, isite4, Adiff) shared(tmp_v1, tmp_v0) v1buf, idim_max_buf + 1, MPI_DOUBLE_COMPLEX, origin, 0, MPI_COMM_WORLD, &statusMPI); if (ierr != 0) exitMPI(-1); + + if (only_send == 1) return 0; + if (org_isite1 + 1 > X->Def.Nsite && org_isite2 + 1 > X->Def.Nsite && org_isite3 + 1 > X->Def.Nsite && org_isite4 + 1 > X->Def.Nsite) { @@ -1135,7 +1138,7 @@ double complex child_CisAjtCkuAku_Hubbard_MPI( double complex dam_pr = 0.0; unsigned long int i_max = X->Check.idim_max; unsigned long int idim_max_buf, ioff; - int iCheck, ierr, Fsgn; + int iCheck, ierr, Fsgn, only_send = 0; unsigned long int isite1, isite2, isite3; unsigned long int tmp_isite1, tmp_isite2, tmp_isite3, tmp_isite4; unsigned long int j, Asum, Adiff; @@ -1169,13 +1172,15 @@ double complex child_CisAjtCkuAku_Hubbard_MPI( Asum = tmp_isite3 + tmp_isite4; if (tmp_isite4 > tmp_isite3) Adiff = tmp_isite4 - tmp_isite3 * 2; else Adiff = tmp_isite3 - tmp_isite4 * 2; - if (X->Large.mode == M_CORR || X->Large.mode == M_CALCSPEC) tmp_V = 0; + if (X->Large.mode == M_CORR || X->Large.mode == M_CALCSPEC) only_send = 1; //printf("tmp_isite1=%ld, tmp_isite2=%ld, Adiff=%ld\n", tmp_isite1, tmp_isite2, Adiff); }/*if (iCheck == TRUE)*/ else return 0.0; }/*if (iCheck == FALSE)*/ if (myrank == origin) {// only k is in PE + + if (only_send == 1) return 0; //for hermite #pragma omp parallel default(none) reduction(+:dam_pr) \ firstprivate(i_max, Asum, Adiff, isite1, isite2, tmp_V, X) private(j) shared(tmp_v0, tmp_v1) @@ -1207,6 +1212,8 @@ firstprivate(i_max, Asum, Adiff, isite1, isite2, tmp_V, X) private(j) shared(tmp MPI_COMM_WORLD, &statusMPI); if (ierr != 0) exitMPI(-1); + if (only_send == 1) return 0; + #pragma omp parallel default(none) reduction(+:dam_pr) private(j, dmv, ioff, tmp_off, Fsgn, Adiff) \ firstprivate(idim_max_buf, tmp_V, X, tmp_isite1, tmp_isite2, tmp_isite3, tmp_isite4, org_rankbit, isite3) \ shared(v1buf, tmp_v1, tmp_v0, list_1buf, list_2_1, list_2_2, origin, org_isite3, myrank, isite1, isite2, org_isite1, org_isite2) diff --git a/src/mltplyMPISpin.c b/src/mltplyMPISpin.c index 42fa1914..f7d43179 100644 --- a/src/mltplyMPISpin.c +++ b/src/mltplyMPISpin.c @@ -71,7 +71,7 @@ double complex child_general_int_spin_MPIdouble( double complex *tmp_v1//!<[in] Vector to be producted ) { #ifdef MPI - int mask1, mask2, state1, state2, ierr, origin; + int mask1, mask2, state1, state2, ierr, origin, only_send = 0; unsigned long int idim_max_buf, j, ioff; MPI_Status statusMPI; double complex Jint, dmv, dam_pr; @@ -88,9 +88,7 @@ double complex child_general_int_spin_MPIdouble( } else if (state1 == org_ispin1 && state2 == org_ispin3) { Jint = conj(tmp_J); - if (X->Large.mode == M_CORR || X->Large.mode == M_CALCSPEC) { - Jint = 0; - } + if (X->Large.mode == M_CORR || X->Large.mode == M_CALCSPEC) only_send = 1; } else return 0; @@ -104,6 +102,8 @@ double complex child_general_int_spin_MPIdouble( v1buf, idim_max_buf + 1, MPI_DOUBLE_COMPLEX, origin, 0, MPI_COMM_WORLD, &statusMPI); if (ierr != 0) exitMPI(-1); + if (only_send == 1) return 0; + dam_pr = 0.0; if (X->Large.mode == M_MLTPLY || X->Large.mode == M_CALCSPEC) { #pragma omp parallel for default(none) reduction(+:dam_pr) private(j, dmv, ioff) \ @@ -232,7 +232,7 @@ double complex child_general_int_spin_MPIsingle( double complex *tmp_v1//!<[in] Vector to be producted ) { #ifdef MPI - int mask2, state2, ierr, origin; + int mask2, state2, ierr, origin, only_send = 0; unsigned long int mask1, idim_max_buf, j, ioff, state1, jreal, state1check; MPI_Status statusMPI; double complex Jint, dmv, dam_pr; @@ -250,9 +250,7 @@ double complex child_general_int_spin_MPIsingle( else if (state2 == org_ispin3) { state1check = (unsigned long int) org_ispin1; Jint = conj(tmp_J); - if (X->Large.mode == M_CORR || X->Large.mode == M_CALCSPEC) { - Jint = 0; - } + if (X->Large.mode == M_CORR || X->Large.mode == M_CALCSPEC) only_send = 1; } else return 0; @@ -268,6 +266,8 @@ double complex child_general_int_spin_MPIsingle( v1buf, idim_max_buf + 1, MPI_DOUBLE_COMPLEX, origin, 0, MPI_COMM_WORLD, &statusMPI); if (ierr != 0) exitMPI(-1); + + if (only_send == 1) return 0; /* Index in the intra PE */ From 522fa44c15036cbd67a8cbdf8f343e11c307b2c1 Mon Sep 17 00:00:00 2001 From: Mitsuaki Kawamura Date: Fri, 29 Nov 2024 02:51:51 +0900 Subject: [PATCH 3/3] For computing exited state, Hermite countor part causes out-of-memory --- src/mltplyMPIHubbardCore.c | 2 ++ test/lanczos_spin_kagome.sh | 4 ++-- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/src/mltplyMPIHubbardCore.c b/src/mltplyMPIHubbardCore.c index 41ea3a7e..84c1c129 100644 --- a/src/mltplyMPIHubbardCore.c +++ b/src/mltplyMPIHubbardCore.c @@ -1600,6 +1600,7 @@ double complex child_Cis_MPI( if (state2 == mask2) { trans = 0; + return 0; } else if (state2 == 0) { trans = (double)Fsgn * tmp_trans; @@ -1680,6 +1681,7 @@ double complex child_Ajt_MPI( if (state2 == 0) { trans = 0; + return 0; } else if (state2 == mask2) { trans = (double)Fsgn * tmp_trans; diff --git a/test/lanczos_spin_kagome.sh b/test/lanczos_spin_kagome.sh index 5ad88348..8c162fc4 100755 --- a/test/lanczos_spin_kagome.sh +++ b/test/lanczos_spin_kagome.sh @@ -1372,7 +1372,7 @@ cat > reference.dat < paste3.dat diff=`awk 'BEGIN{diff='${diff}'} {diff+=sqrt(($9-$19)*($9-$19)+($10-$20)*($10-$20))} -END{printf "%8.6f", diff}' paste3.dat` +END{printf "%7.5f", diff}' paste3.dat` -test "${diff}" = "0.000000" +test "${diff}" = "0.00000" exit $?