From e9cfaedadafa547ad46e70a85c41e4bae67de0f2 Mon Sep 17 00:00:00 2001
From: Kota Ido <13773226+k-ido@users.noreply.github.com>
Date: Wed, 1 May 2024 10:02:59 +0900
Subject: [PATCH 1/3] Update README.md

update reference
---
 README.md | 19 +++++++++++--------
 1 file changed, 11 insertions(+), 8 deletions(-)

diff --git a/README.md b/README.md
index 3ae5fa7d..b0471fc5 100644
--- a/README.md
+++ b/README.md
@@ -31,7 +31,7 @@ The distribution of the program package and the source codes for HPhi follow GNU
 
 [“Quantum lattice model solver HΦ”, M. Kawamura, K. Yoshimi, T. Misawa, Y. Yamaji, S. Todo, and N. Kawashima, Computer Physics Communications 217, 180 (2017).](https://doi.org/10.1016/j.cpc.2017.04.006)
 
-[“Update of HΦ: Newly added functions and methods in versions 2 and 3”, K. Ido, M. Kawamura, Y. Motoyama, K. Yoshimi, Y. Yamaji, S. Todo, N. Kawashima, and T. Misawa, arXiv:2307.13222.](https://arxiv.org/abs/2307.13222)
+[“Update of HΦ: Newly added functions and methods in versions 2 and 3”, K. Ido, M. Kawamura, Y. Motoyama, K. Yoshimi, Y. Yamaji, S. Todo, N. Kawashima, and T. Misawa, Comput. Phys. Commun. 298, 109093 (2024).](https://doi.org/10.1016/j.cpc.2024.109093)
 
 Bibtex:
 
@@ -47,13 +47,16 @@ url = {https://www.sciencedirect.com/science/article/pii/S0010465517301200},
 author = {Mitsuaki Kawamura and Kazuyoshi Yoshimi and Takahiro Misawa and Youhei Yamaji and Synge Todo and Naoki Kawashima}
 }
 
-@misc{ido2023update,
-      title={Update of $\mathcal{H}\Phi$: Newly added functions and methods in versions 2 and 3},
-      author={Kota Ido and Mitsuaki Kawamura and Yuichi Motoyama and Kazuyoshi Yoshimi and Youhei Yamaji and Synge Todo and Naoki Kawashima and Takahiro Misawa},
-      year={2023},
-      eprint={2307.13222},
-      archivePrefix={arXiv},
-      primaryClass={cond-mat.str-el}
+@article{ido2024update,
+      title = {Update of $\mathcal{H}\Phi$: Newly added functions and methods in versions 2 and 3},
+      author = {Kota Ido and Mitsuaki Kawamura and Yuichi Motoyama and Kazuyoshi Yoshimi and Youhei Yamaji and Synge Todo and Naoki Kawashima and Takahiro Misawa},
+      journal = {Computer Physics Communications},
+      volume = {298},
+      pages = {109093},
+      year = {2024},
+      issn = {0010-4655},
+      doi = {https://doi.org/10.1016/j.cpc.2024.109093},
+      url = {https://www.sciencedirect.com/science/article/pii/S001046552400016X}  
 }
 
 

From fd6edfa06a8e85e2c741899842efc31040ab43b9 Mon Sep 17 00:00:00 2001
From: Mitsuaki Kawamura <kawamitsuaki@gmail.com>
Date: Fri, 29 Nov 2024 01:04:22 +0900
Subject: [PATCH 2/3] For exitation vector and correlation function of N/spin
 conserved case, hermite counterpart should not computed because it causes
 out-of memory.

---
 src/mltplyMPIHubbard.c     | 18 +++++++++---------
 src/mltplyMPIHubbardCore.c | 15 +++++++++++----
 src/mltplyMPISpin.c        | 16 ++++++++--------
 3 files changed, 28 insertions(+), 21 deletions(-)

diff --git a/src/mltplyMPIHubbard.c b/src/mltplyMPIHubbard.c
index 80a556a9..7bea0200 100644
--- a/src/mltplyMPIHubbard.c
+++ b/src/mltplyMPIHubbard.c
@@ -143,7 +143,7 @@ double complex child_CisAjt_MPIdouble(
   long unsigned int *list_2_2_target//!<[in]
 ) {
 #ifdef MPI
-  int mask1, mask2, state1, state2, ierr, origin, bitdiff, Fsgn;
+  int mask1, mask2, state1, state2, ierr, origin, bitdiff, Fsgn, only_send = 0;
   unsigned long int idim_max_buf, j, ioff;
   MPI_Status statusMPI;
   double complex trans, dmv;
@@ -164,9 +164,7 @@ double complex child_CisAjt_MPIdouble(
   }/*if (state1 == 0 && state2 == mask2)*/
   else if (state1 == mask1 && state2 == 0) {
     trans = -(double) Fsgn * conj(tmp_trans);
-    if (X->Large.mode == M_CORR|| X->Large.mode == M_CALCSPEC) {
-      trans = 0;
-    }
+    if (X->Large.mode == M_CORR || X->Large.mode == M_CALCSPEC) only_send = 1;
   }/*if (state1 == mask1 && state2 == 0)*/
   else return 0;
 
@@ -184,6 +182,8 @@ double complex child_CisAjt_MPIdouble(
                       v1buf,          idim_max_buf + 1, MPI_DOUBLE_COMPLEX, origin, 0,
                       MPI_COMM_WORLD, &statusMPI);
   if (ierr != 0) exitMPI(-1);
+
+  if (only_send == 1)return 0;
   
   if (X->Large.mode == M_MLTPLY|| X->Large.mode == M_CALCSPEC) {
 #pragma omp parallel for default(none) private(j, dmv, ioff) \
@@ -363,7 +363,7 @@ double complex child_general_hopp_MPIdouble(
   double complex *tmp_v1//!<[in] v0 = H v1
 ) {
 #ifdef MPI
-  int mask1, mask2, state1, state2, ierr, origin, bitdiff, Fsgn;
+  int mask1, mask2, state1, state2, ierr, origin, bitdiff, Fsgn, only_send = 0;
   unsigned long int idim_max_buf, j, ioff;
   MPI_Status statusMPI;
   double complex trans, dmv, dam_pr;
@@ -385,7 +385,7 @@ double complex child_general_hopp_MPIdouble(
   }
   else if (state1 == mask1 && state2 == 0) {
     trans = -(double) Fsgn * conj(tmp_trans);
-    if (X->Large.mode == M_CORR|| X->Large.mode == M_CALCSPEC) trans = 0;
+    if (X->Large.mode == M_CORR|| X->Large.mode == M_CALCSPEC) only_send = 1;
   }
   else return 0;
 
@@ -402,6 +402,8 @@ double complex child_general_hopp_MPIdouble(
                       MPI_COMM_WORLD, &statusMPI);
   if (ierr != 0) exitMPI(-1);
 
+  if (only_send == 1)return 0;
+
   dam_pr = 0.0;
 #pragma omp parallel default(none) reduction(+:dam_pr) private(j, dmv, Fsgn, ioff) \
   firstprivate(idim_max_buf, trans, X) shared(list_2_1, list_2_2, list_1buf, v1buf, tmp_v1, tmp_v0)
@@ -506,9 +508,7 @@ double complex child_general_hopp_MPIsingle(
   else if (state2 == 0) {
     state1check = mask1;
     trans = -(double) Fsgn * conj(tmp_trans);
-    if (X->Large.mode == M_CORR|| X->Large.mode == M_CALCSPEC) {
-      trans = 0;
-    }
+    if (X->Large.mode == M_CORR || X->Large.mode == M_CALCSPEC) return 0;
   }
   else return 0;
 
diff --git a/src/mltplyMPIHubbardCore.c b/src/mltplyMPIHubbardCore.c
index d45e8a93..41ea3a7e 100644
--- a/src/mltplyMPIHubbardCore.c
+++ b/src/mltplyMPIHubbardCore.c
@@ -922,7 +922,7 @@ double complex child_CisAjtCkuAlv_Hubbard_MPI(
   double complex dam_pr = 0.0;
   unsigned long int i_max = X->Check.idim_max;
   unsigned long int idim_max_buf;
-  int iCheck, ierr, Fsgn;
+  int iCheck, ierr, Fsgn, only_send = 0;
   unsigned long int isite1, isite2, isite3, isite4;
   unsigned long int tmp_isite1, tmp_isite2, tmp_isite3, tmp_isite4;
   unsigned long int j, Adiff, Bdiff;
@@ -959,7 +959,7 @@ double complex child_CisAjtCkuAlv_Hubbard_MPI(
       tmp_isite2 = X->Def.OrgTpow[2 * org_isite3 + org_ispin3];
       tmp_isite1 = X->Def.OrgTpow[2 * org_isite4 + org_ispin4];
       iFlgHermite = TRUE;
-      if (X->Large.mode == M_CORR || X->Large.mode == M_CALCSPEC) tmp_V = 0;     
+      if (X->Large.mode == M_CORR || X->Large.mode == M_CALCSPEC) only_send = 1;
     }/*if (iCheck == TRUE)*/
     else return 0.0;
   }/*if (iCheck == FALSE)*/
@@ -1019,6 +1019,9 @@ firstprivate(i_max, tmp_V, X, isite1, isite4, Adiff) shared(tmp_v1, tmp_v0)
                         v1buf,       idim_max_buf + 1, MPI_DOUBLE_COMPLEX, origin, 0,
                         MPI_COMM_WORLD, &statusMPI);
     if (ierr != 0) exitMPI(-1);
+
+    if (only_send == 1) return 0;
+
     if (org_isite1 + 1 > X->Def.Nsite && org_isite2 + 1 > X->Def.Nsite
      && org_isite3 + 1 > X->Def.Nsite && org_isite4 + 1 > X->Def.Nsite)
     {
@@ -1135,7 +1138,7 @@ double complex child_CisAjtCkuAku_Hubbard_MPI(
   double complex dam_pr = 0.0;
   unsigned long int i_max = X->Check.idim_max;
   unsigned long int idim_max_buf, ioff;
-  int iCheck, ierr, Fsgn;
+  int iCheck, ierr, Fsgn, only_send = 0;
   unsigned long int isite1, isite2, isite3;
   unsigned long int tmp_isite1, tmp_isite2, tmp_isite3, tmp_isite4;
   unsigned long int j, Asum, Adiff;
@@ -1169,13 +1172,15 @@ double complex child_CisAjtCkuAku_Hubbard_MPI(
       Asum = tmp_isite3 + tmp_isite4;
       if (tmp_isite4 > tmp_isite3) Adiff = tmp_isite4 - tmp_isite3 * 2;
       else Adiff = tmp_isite3 - tmp_isite4 * 2;
-      if (X->Large.mode == M_CORR || X->Large.mode == M_CALCSPEC) tmp_V = 0;
+      if (X->Large.mode == M_CORR || X->Large.mode == M_CALCSPEC) only_send = 1;
       //printf("tmp_isite1=%ld, tmp_isite2=%ld, Adiff=%ld\n", tmp_isite1, tmp_isite2, Adiff);
     }/*if (iCheck == TRUE)*/
     else return 0.0;   
   }/*if (iCheck == FALSE)*/
 
   if (myrank == origin) {// only k is in PE
+
+    if (only_send == 1) return 0;
     //for hermite
 #pragma omp parallel default(none) reduction(+:dam_pr) \
 firstprivate(i_max, Asum, Adiff, isite1, isite2, tmp_V, X) private(j) shared(tmp_v0, tmp_v1)
@@ -1207,6 +1212,8 @@ firstprivate(i_max, Asum, Adiff, isite1, isite2, tmp_V, X) private(j) shared(tmp
                         MPI_COMM_WORLD, &statusMPI);
     if (ierr != 0) exitMPI(-1);
 
+    if (only_send == 1) return 0;
+
 #pragma omp parallel default(none) reduction(+:dam_pr) private(j, dmv, ioff, tmp_off, Fsgn, Adiff) \
 firstprivate(idim_max_buf, tmp_V, X, tmp_isite1, tmp_isite2, tmp_isite3, tmp_isite4, org_rankbit, isite3) \
 shared(v1buf, tmp_v1, tmp_v0, list_1buf, list_2_1, list_2_2, origin, org_isite3, myrank, isite1, isite2, org_isite1, org_isite2)
diff --git a/src/mltplyMPISpin.c b/src/mltplyMPISpin.c
index 42fa1914..f7d43179 100644
--- a/src/mltplyMPISpin.c
+++ b/src/mltplyMPISpin.c
@@ -71,7 +71,7 @@ double complex child_general_int_spin_MPIdouble(
   double complex *tmp_v1//!<[in] Vector to be producted
 ) {
 #ifdef MPI
-  int mask1, mask2, state1, state2, ierr, origin;
+  int mask1, mask2, state1, state2, ierr, origin, only_send = 0;
   unsigned long int idim_max_buf, j, ioff;
   MPI_Status statusMPI;
   double complex Jint, dmv, dam_pr;
@@ -88,9 +88,7 @@ double complex child_general_int_spin_MPIdouble(
   }
   else if (state1 == org_ispin1 && state2 == org_ispin3) {
     Jint = conj(tmp_J);
-    if (X->Large.mode == M_CORR || X->Large.mode == M_CALCSPEC) {
-      Jint = 0;
-    }
+    if (X->Large.mode == M_CORR || X->Large.mode == M_CALCSPEC) only_send = 1;
   }
   else return 0;
 
@@ -104,6 +102,8 @@ double complex child_general_int_spin_MPIdouble(
                        v1buf,      idim_max_buf + 1, MPI_DOUBLE_COMPLEX, origin, 0, MPI_COMM_WORLD, &statusMPI);
   if (ierr != 0) exitMPI(-1);
 
+  if (only_send == 1) return 0;
+
   dam_pr = 0.0;
   if (X->Large.mode == M_MLTPLY || X->Large.mode == M_CALCSPEC) {
 #pragma omp parallel for default(none) reduction(+:dam_pr) private(j, dmv, ioff) \
@@ -232,7 +232,7 @@ double complex child_general_int_spin_MPIsingle(
   double complex *tmp_v1//!<[in] Vector to be producted
 ) {
 #ifdef MPI
-  int mask2, state2, ierr, origin;
+  int mask2, state2, ierr, origin, only_send = 0;
   unsigned long int mask1, idim_max_buf, j, ioff, state1, jreal, state1check;
   MPI_Status statusMPI;
   double complex Jint, dmv, dam_pr;
@@ -250,9 +250,7 @@ double complex child_general_int_spin_MPIsingle(
   else if (state2 == org_ispin3) {
     state1check = (unsigned long int) org_ispin1;
     Jint = conj(tmp_J);
-    if (X->Large.mode == M_CORR || X->Large.mode == M_CALCSPEC) {
-      Jint = 0;
-    }
+    if (X->Large.mode == M_CORR || X->Large.mode == M_CALCSPEC) only_send = 1;
   }
   else return 0;
 
@@ -268,6 +266,8 @@ double complex child_general_int_spin_MPIsingle(
                       v1buf,       idim_max_buf + 1, MPI_DOUBLE_COMPLEX, origin, 0,
                       MPI_COMM_WORLD, &statusMPI);
   if (ierr != 0) exitMPI(-1);
+
+  if (only_send == 1) return 0;
   /*
   Index in the intra PE
   */

From 522fa44c15036cbd67a8cbdf8f343e11c307b2c1 Mon Sep 17 00:00:00 2001
From: Mitsuaki Kawamura <kawamitsuaki@gmail.com>
Date: Fri, 29 Nov 2024 02:51:51 +0900
Subject: [PATCH 3/3] For computing exited state, Hermite countor part causes
 out-of-memory

---
 src/mltplyMPIHubbardCore.c  | 2 ++
 test/lanczos_spin_kagome.sh | 4 ++--
 2 files changed, 4 insertions(+), 2 deletions(-)

diff --git a/src/mltplyMPIHubbardCore.c b/src/mltplyMPIHubbardCore.c
index 41ea3a7e..84c1c129 100644
--- a/src/mltplyMPIHubbardCore.c
+++ b/src/mltplyMPIHubbardCore.c
@@ -1600,6 +1600,7 @@ double complex child_Cis_MPI(
 
   if (state2 == mask2) {
     trans = 0;
+    return 0;
   }
   else if (state2 == 0) {
     trans = (double)Fsgn * tmp_trans;
@@ -1680,6 +1681,7 @@ double complex child_Ajt_MPI(
 
   if (state2 == 0) {
     trans = 0;
+    return 0;
   }
   else if (state2 == mask2) {
     trans = (double)Fsgn * tmp_trans;
diff --git a/test/lanczos_spin_kagome.sh b/test/lanczos_spin_kagome.sh
index 5ad88348..8c162fc4 100755
--- a/test/lanczos_spin_kagome.sh
+++ b/test/lanczos_spin_kagome.sh
@@ -1372,7 +1372,7 @@ cat > reference.dat <<EOF
 EOF
 paste output/zvo_cisajscktalt.dat reference.dat > paste3.dat
 diff=`awk 'BEGIN{diff='${diff}'} {diff+=sqrt(($9-$19)*($9-$19)+($10-$20)*($10-$20))} 
-END{printf "%8.6f", diff}' paste3.dat`
+END{printf "%7.5f", diff}' paste3.dat`
 
-test "${diff}" = "0.000000"
+test "${diff}" = "0.00000"
 exit $?