Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Bug Fix: Out-of-memory in MPI+Canonnical+Sz/N-changing exitation #190

Merged
merged 6 commits into from
Dec 3, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
19 changes: 11 additions & 8 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@ The distribution of the program package and the source codes for HPhi follow GNU

[“Quantum lattice model solver HΦ”, M. Kawamura, K. Yoshimi, T. Misawa, Y. Yamaji, S. Todo, and N. Kawashima, Computer Physics Communications 217, 180 (2017).](https://doi.org/10.1016/j.cpc.2017.04.006)

[“Update of HΦ: Newly added functions and methods in versions 2 and 3”, K. Ido, M. Kawamura, Y. Motoyama, K. Yoshimi, Y. Yamaji, S. Todo, N. Kawashima, and T. Misawa, arXiv:2307.13222.](https://arxiv.org/abs/2307.13222)
[“Update of HΦ: Newly added functions and methods in versions 2 and 3”, K. Ido, M. Kawamura, Y. Motoyama, K. Yoshimi, Y. Yamaji, S. Todo, N. Kawashima, and T. Misawa, Comput. Phys. Commun. 298, 109093 (2024).](https://doi.org/10.1016/j.cpc.2024.109093)

Bibtex:

Expand All @@ -47,13 +47,16 @@ url = {https://www.sciencedirect.com/science/article/pii/S0010465517301200},
author = {Mitsuaki Kawamura and Kazuyoshi Yoshimi and Takahiro Misawa and Youhei Yamaji and Synge Todo and Naoki Kawashima}
}

@misc{ido2023update,
title={Update of $\mathcal{H}\Phi$: Newly added functions and methods in versions 2 and 3},
author={Kota Ido and Mitsuaki Kawamura and Yuichi Motoyama and Kazuyoshi Yoshimi and Youhei Yamaji and Synge Todo and Naoki Kawashima and Takahiro Misawa},
year={2023},
eprint={2307.13222},
archivePrefix={arXiv},
primaryClass={cond-mat.str-el}
@article{ido2024update,
title = {Update of $\mathcal{H}\Phi$: Newly added functions and methods in versions 2 and 3},
author = {Kota Ido and Mitsuaki Kawamura and Yuichi Motoyama and Kazuyoshi Yoshimi and Youhei Yamaji and Synge Todo and Naoki Kawashima and Takahiro Misawa},
journal = {Computer Physics Communications},
volume = {298},
pages = {109093},
year = {2024},
issn = {0010-4655},
doi = {https://doi.org/10.1016/j.cpc.2024.109093},
url = {https://www.sciencedirect.com/science/article/pii/S001046552400016X}
}


Expand Down
18 changes: 9 additions & 9 deletions src/mltplyMPIHubbard.c
Original file line number Diff line number Diff line change
Expand Up @@ -143,7 +143,7 @@ double complex child_CisAjt_MPIdouble(
long unsigned int *list_2_2_target//!<[in]
) {
#ifdef MPI
int mask1, mask2, state1, state2, ierr, origin, bitdiff, Fsgn;
int mask1, mask2, state1, state2, ierr, origin, bitdiff, Fsgn, only_send = 0;
unsigned long int idim_max_buf, j, ioff;
MPI_Status statusMPI;
double complex trans, dmv;
Expand All @@ -164,9 +164,7 @@ double complex child_CisAjt_MPIdouble(
}/*if (state1 == 0 && state2 == mask2)*/
else if (state1 == mask1 && state2 == 0) {
trans = -(double) Fsgn * conj(tmp_trans);
if (X->Large.mode == M_CORR|| X->Large.mode == M_CALCSPEC) {
trans = 0;
}
if (X->Large.mode == M_CORR || X->Large.mode == M_CALCSPEC) only_send = 1;
}/*if (state1 == mask1 && state2 == 0)*/
else return 0;

Expand All @@ -184,6 +182,8 @@ double complex child_CisAjt_MPIdouble(
v1buf, idim_max_buf + 1, MPI_DOUBLE_COMPLEX, origin, 0,
MPI_COMM_WORLD, &statusMPI);
if (ierr != 0) exitMPI(-1);

if (only_send == 1)return 0;

if (X->Large.mode == M_MLTPLY|| X->Large.mode == M_CALCSPEC) {
#pragma omp parallel for default(none) private(j, dmv, ioff) \
Expand Down Expand Up @@ -363,7 +363,7 @@ double complex child_general_hopp_MPIdouble(
double complex *tmp_v1//!<[in] v0 = H v1
) {
#ifdef MPI
int mask1, mask2, state1, state2, ierr, origin, bitdiff, Fsgn;
int mask1, mask2, state1, state2, ierr, origin, bitdiff, Fsgn, only_send = 0;
unsigned long int idim_max_buf, j, ioff;
MPI_Status statusMPI;
double complex trans, dmv, dam_pr;
Expand All @@ -385,7 +385,7 @@ double complex child_general_hopp_MPIdouble(
}
else if (state1 == mask1 && state2 == 0) {
trans = -(double) Fsgn * conj(tmp_trans);
if (X->Large.mode == M_CORR|| X->Large.mode == M_CALCSPEC) trans = 0;
if (X->Large.mode == M_CORR|| X->Large.mode == M_CALCSPEC) only_send = 1;
}
else return 0;

Expand All @@ -402,6 +402,8 @@ double complex child_general_hopp_MPIdouble(
MPI_COMM_WORLD, &statusMPI);
if (ierr != 0) exitMPI(-1);

if (only_send == 1)return 0;

dam_pr = 0.0;
#pragma omp parallel default(none) reduction(+:dam_pr) private(j, dmv, Fsgn, ioff) \
firstprivate(idim_max_buf, trans, X) shared(list_2_1, list_2_2, list_1buf, v1buf, tmp_v1, tmp_v0)
Expand Down Expand Up @@ -506,9 +508,7 @@ double complex child_general_hopp_MPIsingle(
else if (state2 == 0) {
state1check = mask1;
trans = -(double) Fsgn * conj(tmp_trans);
if (X->Large.mode == M_CORR|| X->Large.mode == M_CALCSPEC) {
trans = 0;
}
if (X->Large.mode == M_CORR || X->Large.mode == M_CALCSPEC) return 0;
}
else return 0;

Expand Down
17 changes: 13 additions & 4 deletions src/mltplyMPIHubbardCore.c
Original file line number Diff line number Diff line change
Expand Up @@ -922,7 +922,7 @@ double complex child_CisAjtCkuAlv_Hubbard_MPI(
double complex dam_pr = 0.0;
unsigned long int i_max = X->Check.idim_max;
unsigned long int idim_max_buf;
int iCheck, ierr, Fsgn;
int iCheck, ierr, Fsgn, only_send = 0;
unsigned long int isite1, isite2, isite3, isite4;
unsigned long int tmp_isite1, tmp_isite2, tmp_isite3, tmp_isite4;
unsigned long int j, Adiff, Bdiff;
Expand Down Expand Up @@ -959,7 +959,7 @@ double complex child_CisAjtCkuAlv_Hubbard_MPI(
tmp_isite2 = X->Def.OrgTpow[2 * org_isite3 + org_ispin3];
tmp_isite1 = X->Def.OrgTpow[2 * org_isite4 + org_ispin4];
iFlgHermite = TRUE;
if (X->Large.mode == M_CORR || X->Large.mode == M_CALCSPEC) tmp_V = 0;
if (X->Large.mode == M_CORR || X->Large.mode == M_CALCSPEC) only_send = 1;
}/*if (iCheck == TRUE)*/
else return 0.0;
}/*if (iCheck == FALSE)*/
Expand Down Expand Up @@ -1019,6 +1019,9 @@ firstprivate(i_max, tmp_V, X, isite1, isite4, Adiff) shared(tmp_v1, tmp_v0)
v1buf, idim_max_buf + 1, MPI_DOUBLE_COMPLEX, origin, 0,
MPI_COMM_WORLD, &statusMPI);
if (ierr != 0) exitMPI(-1);

if (only_send == 1) return 0;

if (org_isite1 + 1 > X->Def.Nsite && org_isite2 + 1 > X->Def.Nsite
&& org_isite3 + 1 > X->Def.Nsite && org_isite4 + 1 > X->Def.Nsite)
{
Expand Down Expand Up @@ -1135,7 +1138,7 @@ double complex child_CisAjtCkuAku_Hubbard_MPI(
double complex dam_pr = 0.0;
unsigned long int i_max = X->Check.idim_max;
unsigned long int idim_max_buf, ioff;
int iCheck, ierr, Fsgn;
int iCheck, ierr, Fsgn, only_send = 0;
unsigned long int isite1, isite2, isite3;
unsigned long int tmp_isite1, tmp_isite2, tmp_isite3, tmp_isite4;
unsigned long int j, Asum, Adiff;
Expand Down Expand Up @@ -1169,13 +1172,15 @@ double complex child_CisAjtCkuAku_Hubbard_MPI(
Asum = tmp_isite3 + tmp_isite4;
if (tmp_isite4 > tmp_isite3) Adiff = tmp_isite4 - tmp_isite3 * 2;
else Adiff = tmp_isite3 - tmp_isite4 * 2;
if (X->Large.mode == M_CORR || X->Large.mode == M_CALCSPEC) tmp_V = 0;
if (X->Large.mode == M_CORR || X->Large.mode == M_CALCSPEC) only_send = 1;
//printf("tmp_isite1=%ld, tmp_isite2=%ld, Adiff=%ld\n", tmp_isite1, tmp_isite2, Adiff);
}/*if (iCheck == TRUE)*/
else return 0.0;
}/*if (iCheck == FALSE)*/

if (myrank == origin) {// only k is in PE

if (only_send == 1) return 0;
//for hermite
#pragma omp parallel default(none) reduction(+:dam_pr) \
firstprivate(i_max, Asum, Adiff, isite1, isite2, tmp_V, X) private(j) shared(tmp_v0, tmp_v1)
Expand Down Expand Up @@ -1207,6 +1212,8 @@ firstprivate(i_max, Asum, Adiff, isite1, isite2, tmp_V, X) private(j) shared(tmp
MPI_COMM_WORLD, &statusMPI);
if (ierr != 0) exitMPI(-1);

if (only_send == 1) return 0;

#pragma omp parallel default(none) reduction(+:dam_pr) private(j, dmv, ioff, tmp_off, Fsgn, Adiff) \
firstprivate(idim_max_buf, tmp_V, X, tmp_isite1, tmp_isite2, tmp_isite3, tmp_isite4, org_rankbit, isite3) \
shared(v1buf, tmp_v1, tmp_v0, list_1buf, list_2_1, list_2_2, origin, org_isite3, myrank, isite1, isite2, org_isite1, org_isite2)
Expand Down Expand Up @@ -1593,6 +1600,7 @@ double complex child_Cis_MPI(

if (state2 == mask2) {
trans = 0;
return 0;
}
else if (state2 == 0) {
trans = (double)Fsgn * tmp_trans;
Expand Down Expand Up @@ -1673,6 +1681,7 @@ double complex child_Ajt_MPI(

if (state2 == 0) {
trans = 0;
return 0;
}
else if (state2 == mask2) {
trans = (double)Fsgn * tmp_trans;
Expand Down
16 changes: 8 additions & 8 deletions src/mltplyMPISpin.c
Original file line number Diff line number Diff line change
Expand Up @@ -71,7 +71,7 @@ double complex child_general_int_spin_MPIdouble(
double complex *tmp_v1//!<[in] Vector to be producted
) {
#ifdef MPI
int mask1, mask2, state1, state2, ierr, origin;
int mask1, mask2, state1, state2, ierr, origin, only_send = 0;
unsigned long int idim_max_buf, j, ioff;
MPI_Status statusMPI;
double complex Jint, dmv, dam_pr;
Expand All @@ -88,9 +88,7 @@ double complex child_general_int_spin_MPIdouble(
}
else if (state1 == org_ispin1 && state2 == org_ispin3) {
Jint = conj(tmp_J);
if (X->Large.mode == M_CORR || X->Large.mode == M_CALCSPEC) {
Jint = 0;
}
if (X->Large.mode == M_CORR || X->Large.mode == M_CALCSPEC) only_send = 1;
}
else return 0;

Expand All @@ -104,6 +102,8 @@ double complex child_general_int_spin_MPIdouble(
v1buf, idim_max_buf + 1, MPI_DOUBLE_COMPLEX, origin, 0, MPI_COMM_WORLD, &statusMPI);
if (ierr != 0) exitMPI(-1);

if (only_send == 1) return 0;

dam_pr = 0.0;
if (X->Large.mode == M_MLTPLY || X->Large.mode == M_CALCSPEC) {
#pragma omp parallel for default(none) reduction(+:dam_pr) private(j, dmv, ioff) \
Expand Down Expand Up @@ -232,7 +232,7 @@ double complex child_general_int_spin_MPIsingle(
double complex *tmp_v1//!<[in] Vector to be producted
) {
#ifdef MPI
int mask2, state2, ierr, origin;
int mask2, state2, ierr, origin, only_send = 0;
unsigned long int mask1, idim_max_buf, j, ioff, state1, jreal, state1check;
MPI_Status statusMPI;
double complex Jint, dmv, dam_pr;
Expand All @@ -250,9 +250,7 @@ double complex child_general_int_spin_MPIsingle(
else if (state2 == org_ispin3) {
state1check = (unsigned long int) org_ispin1;
Jint = conj(tmp_J);
if (X->Large.mode == M_CORR || X->Large.mode == M_CALCSPEC) {
Jint = 0;
}
if (X->Large.mode == M_CORR || X->Large.mode == M_CALCSPEC) only_send = 1;
}
else return 0;

Expand All @@ -268,6 +266,8 @@ double complex child_general_int_spin_MPIsingle(
v1buf, idim_max_buf + 1, MPI_DOUBLE_COMPLEX, origin, 0,
MPI_COMM_WORLD, &statusMPI);
if (ierr != 0) exitMPI(-1);

if (only_send == 1) return 0;
/*
Index in the intra PE
*/
Expand Down
4 changes: 2 additions & 2 deletions test/lanczos_spin_kagome.sh
Original file line number Diff line number Diff line change
Expand Up @@ -1372,7 +1372,7 @@ cat > reference.dat <<EOF
EOF
paste output/zvo_cisajscktalt.dat reference.dat > paste3.dat
diff=`awk 'BEGIN{diff='${diff}'} {diff+=sqrt(($9-$19)*($9-$19)+($10-$20)*($10-$20))}
END{printf "%8.6f", diff}' paste3.dat`
END{printf "%7.5f", diff}' paste3.dat`

test "${diff}" = "0.000000"
test "${diff}" = "0.00000"
exit $?