@@ -7006,7 +7006,7 @@ int VMK::alltoall(void *in, int inCount, void *out, int outCount,
7006
7006
int j=0;
7007
7007
for (int i=0; i<npets; i++){
7008
7008
if (ssiLocalPetSet.find(i) != ssiLocalPetSet.end()) continue; // skip
7009
- memcpy(xferBC+outCount *size*j, inC+inCount*size*i, inCount*size);
7009
+ memcpy(xferBC+inCount *size*j, inC+inCount*size*i, inCount*size);
7010
7010
++j;
7011
7011
}
7012
7012
{
@@ -7033,7 +7033,7 @@ int VMK::alltoall(void *in, int inCount, void *out, int outCount,
7033
7033
<< " gathered into xferSsiBuffer bytes =" << bufferSize;
7034
7034
ESMC_LogDefault.Write(msg.str(), ESMC_LOGMSG_DEBUG);
7035
7035
}
7036
- // Total exchange between SSI roots
7036
+ // Step-2: Total exchange between SSI roots
7037
7037
if (mpi_c_ssi_roots != MPI_COMM_NULL){
7038
7038
// - SSI roots prepare local xferPetMap
7039
7039
std::map<int,int> xferPetMap;
@@ -7116,6 +7116,33 @@ int VMK::alltoall(void *in, int inCount, void *out, int outCount,
7116
7116
MPI_Alltoallv(xferSsiSBC, &(xferInCounts[0]), &(xferInOffsets[0]),
7117
7117
mpitype, xferSsiBC, &(xferOutCounts[0]), &(xferOutOffsets[0]),
7118
7118
mpitype, mpi_c_ssi_roots);
7119
+ // - SSI roots re-arrange data for scattering to PETs on same SSI
7120
+ for (int i=0; i<ssiCount; i++){
7121
+ // data block received from PETs in SSI i
7122
+ if (i == localSsi) continue; // no self communication for local SSI
7123
+ for (int k=0; k<ssiLocalPetCounts[i]; k++){
7124
+ // data block from local PET k on SSI i to local SSI
7125
+ for (int l=0; l<ssiLocalPetCount; l++){
7126
+ // data block from local PET k on SSI i to SSI local PET l
7127
+ memcpy(xferSsiSBC+inCount*size*(npets-ssiLocalPetCount)*l
7128
+ +inCount*size*xferPetMap[ssiLocalPetLists[offsets[i]+k]],
7129
+ xferSsiBC+inCount*size*j, inCount*size);
7130
+ ++j;
7131
+ }
7132
+ }
7133
+ }
7134
+ }
7135
+ // Step-3: SSI roots scatter xfer data to PETs on their SSI
7136
+ // - SSI roots scatter xfer data to their SSI PETs from other SSI
7137
+ MPI_Scatter(xferSsiBC, (npets-ssiLocalPetCount)*inCount, mpitype,
7138
+ xferBC, (npets-ssiLocalPetCount)*outCount, mpitype, 0, mpi_c_ssi);
7139
+ // - Each PET upacks the xferBuffer into its "out" data.
7140
+ char *outC = (char *)out;
7141
+ j=0;
7142
+ for (int i=0; i<npets; i++){
7143
+ if (ssiLocalPetSet.find(i) != ssiLocalPetSet.end()) continue; // skip
7144
+ memcpy(outC+outCount*size*i, xferBC+outCount*size*j, outCount*size);
7145
+ ++j;
7119
7146
}
7120
7147
}
7121
7148
MPI_Barrier(mpi_c); //TODO: remove once implementation done
0 commit comments