@@ -6967,6 +6967,11 @@ int VMK::alltoall(void *in, int inCount, void *out, int outCount,
6967
6967
#endif
6968
6968
#if 1
6969
6969
{
6970
+ {
6971
+ std::stringstream msg;
6972
+ msg << "VMK::alltoall(): hierarchical implementation inCount=" << inCount;
6973
+ ESMC_LogDefault.Write(msg.str(), ESMC_LOGMSG_DEBUG);
6974
+ }
6970
6975
// Hierarchical Alltoall implementation, with SSI roots as intermediary
6971
6976
// Step-0: SSI-local exchange via alltoallv to avoid data movements.
6972
6977
std::vector<int> inCounts(ssiLocalPetCount, inCount);
@@ -6985,6 +6990,11 @@ int VMK::alltoall(void *in, int inCount, void *out, int outCount,
6985
6990
if (ssiCount > 1){
6986
6991
// Multiple SSIs in the VM, under each mpi_c_ssi communicator,
6987
6992
// using task 0 as the SSI root PET below.
6993
+ {
6994
+ std::stringstream msg;
6995
+ msg << "VMK::alltoall(): ssiCount=" << ssiCount;
6996
+ ESMC_LogDefault.Write(msg.str(), ESMC_LOGMSG_DEBUG);
6997
+ }
6988
6998
// Step-1: SSI root PETs gather xfer data on their SSI.
6989
6999
// - Each PET prepares an xferBuffer to send its "in" data that is
6990
7000
// destined for PETs outside the local SSI to its SSI root PET.
@@ -6997,6 +7007,12 @@ int VMK::alltoall(void *in, int inCount, void *out, int outCount,
6997
7007
memcpy(xferBC+outCount*size*j, inC+inCount*size*i, inCount*size);
6998
7008
++j;
6999
7009
}
7010
+ {
7011
+ std::stringstream msg;
7012
+ msg << "VMK::alltoall(): line=" << __LINE__
7013
+ << " inCount=" << inCount;
7014
+ ESMC_LogDefault.Write(msg.str(), ESMC_LOGMSG_DEBUG);
7015
+ }
7000
7016
// - SSI roots gather xfer data from their SSI PETs toward other SSI
7001
7017
char *xferSsiBC = NULL;
7002
7018
std::vector<char> xferSsiBuffer;
@@ -7007,8 +7023,23 @@ int VMK::alltoall(void *in, int inCount, void *out, int outCount,
7007
7023
}
7008
7024
MPI_Gather(xferBC, (npets-ssiLocalPetCount)*inCount, mpitype,
7009
7025
xferSsiBC, (npets-ssiLocalPetCount)*outCount, mpitype, 0, mpi_c_ssi);
7026
+ {
7027
+ std::stringstream msg;
7028
+ msg << "VMK::alltoall(): line=" << __LINE__
7029
+ << " gathered into xferSsiBuffer bytes =" << (npets-ssiLocalPetCount)
7030
+ *inCount*size*ssiLocalPetCount;
7031
+ ESMC_LogDefault.Write(msg.str(), ESMC_LOGMSG_DEBUG);
7032
+ }
7010
7033
// Total exchange between SSI roots
7011
7034
if (mpi_c_ssi_roots != MPI_COMM_NULL){
7035
+ // - SSI roots prepare local xferPetMap
7036
+ std::map<int,int> xferPetMap;
7037
+ j=0;
7038
+ for (int i=0; i<npets; i++){
7039
+ if (ssiLocalPetSet.find(i) != ssiLocalPetSet.end()) continue; //skip
7040
+ xferPetMap[i]=j;
7041
+ ++j;
7042
+ }
7012
7043
// - SSI roots exchange their ssiLocalPetCount
7013
7044
std::vector<int> ssiLocalPetCounts(ssiCount);
7014
7045
MPI_Allgather(&ssiLocalPetCount, 1, MPI_INT,
@@ -7024,6 +7055,7 @@ int VMK::alltoall(void *in, int inCount, void *out, int outCount,
7024
7055
&(ssiLocalPetLists[0]), &(ssiLocalPetCounts[0]), &(offsets[0]),
7025
7056
MPI_INT, mpi_c_ssi_roots);
7026
7057
// - SSI roots keep track of all the PETs in the other SSIs
7058
+ // -> TODO: might be obsolete
7027
7059
std::vector<std::set<int> > ssiLocalPetSets(ssiCount);
7028
7060
j=0;
7029
7061
for (int i=0; i<ssiCount; i++){
@@ -7038,12 +7070,19 @@ int VMK::alltoall(void *in, int inCount, void *out, int outCount,
7038
7070
char *xferSsiSBC = (char *)&(xferSsiSendBuffer[0]);
7039
7071
int localSsi; // rank of local SSI's root, same as SSI index
7040
7072
MPI_Comm_rank(mpi_c_ssi_roots, &localSsi);
7073
+ {
7074
+ std::stringstream msg;
7075
+ msg << "VMK::alltoall(): line=" << __LINE__
7076
+ << " localSsi=" << localSsi;
7077
+ ESMC_LogDefault.Write(msg.str(), ESMC_LOGMSG_DEBUG);
7078
+ }
7079
+
7041
7080
std::vector<int> xferInCounts(ssiCount);
7042
7081
std::vector<int> xferInOffsets(ssiCount);
7043
7082
std::vector<int> xferOutCounts(ssiCount);
7044
7083
std::vector<int> xferOutOffsets(ssiCount);
7045
7084
xferInOffsets[0] = xferOutOffsets[0] = 0;
7046
- j=0; int jj=0;
7085
+ j=0;
7047
7086
for (int i=0; i<ssiCount; i++){
7048
7087
// prep data block to send to PETs in SSI i
7049
7088
if (i > 0){
@@ -7063,18 +7102,21 @@ int VMK::alltoall(void *in, int inCount, void *out, int outCount,
7063
7102
// prepare block from local SSI local PET l to PET k on SSI i
7064
7103
memcpy(xferSsiSBC+inCount*size*j, xferSsiBC
7065
7104
+inCount*size*(npets-ssiLocalPetCount)*l
7066
- +inCount*size*ssiLocalPetLists[offsets[i]+k],
7105
+ +inCount*size*xferPetMap[ ssiLocalPetLists[offsets[i]+k] ],
7067
7106
inCount*size);
7068
7107
++j;
7069
7108
}
7070
7109
}
7071
7110
}
7111
+ #if 1
7072
7112
// - Alltoallv xferSsiSendBuffer -> xferSsiBuffer
7073
7113
MPI_Alltoallv(xferSsiSBC, &(xferInCounts[0]), &(xferInOffsets[0]),
7074
7114
mpitype, xferSsiBC, &(xferOutCounts[0]), &(xferOutOffsets[0]),
7075
- mpitype, mpi_c_ssi);
7115
+ mpitype, mpi_c_ssi_roots);
7116
+ #endif
7076
7117
}
7077
7118
}
7119
+ MPI_Barrier(mpi_c); //TODO: remove once implementation done
7078
7120
}
7079
7121
#endif
7080
7122
}else{
0 commit comments