Skip to content

Commit b61764f

Browse files
committed
Activate first portion of hierarchical Alltoall() implementation for
preliminary performance check.
1 parent 1cbb568 commit b61764f

File tree

1 file changed

+114
-2
lines changed

1 file changed

+114
-2
lines changed

src/Infrastructure/VM/src/ESMCI_VMKernel.C

+114-2
Original file line numberDiff line numberDiff line change
@@ -6959,11 +6959,123 @@ int VMK::alltoall(void *in, int inCount, void *out, int outCount,
69596959
&req);
69606960
MPI_Wait(&req, MPI_STATUS_IGNORE);
69616961
#endif
6962-
#if 1
6962+
#if 0
69636963
for (int i=0; i<npets; i++){
69646964
MPI_Scatter(in, inCount, mpitype, out+outCount*i*size, outCount, mpitype,
69656965
i, mpi_c);
69666966
}
6967+
#endif
6968+
#if 1
6969+
{
6970+
// Hierarchical Alltoall implementation, with SSI roots as intermediary
6971+
// Step-0: SSI-local exchange via alltoallv to avoid data movements.
6972+
std::vector<int> inCounts(ssiLocalPetCount, inCount);
6973+
std::vector<int> outCounts(ssiLocalPetCount, outCount);
6974+
std::vector<int> inOffsets(ssiLocalPetCount);
6975+
std::vector<int> outOffsets(ssiLocalPetCount);
6976+
std::set<int> ssiLocalPetSet;
6977+
for (int i=0; i<ssiLocalPetCount; i++){
6978+
ssiLocalPetSet.insert(ssiLocalPetList[i]);
6979+
inOffsets[i] = ssiLocalPetList[i] * inCount;
6980+
outOffsets[i] = ssiLocalPetList[i] * outCount;
6981+
}
6982+
MPI_Alltoallv(in, &(inCounts[0]), &(inOffsets[0]), mpitype,
6983+
out, &(outCounts[0]), &(outOffsets[0]), mpitype, mpi_c_ssi);
6984+
// Only go up the hierarchy if there are more than one SSI in the VM
6985+
if (ssiCount > 1){
6986+
// Multiple SSIs in the VM, under each mpi_c_ssi communicator,
6987+
// using task 0 as the SSI root PET below.
6988+
// Step-1: SSI root PETs gather xfer data on their SSI.
6989+
// - Each PET prepares an xferBuffer to send its "in" data that is
6990+
// destined for PETs outside the local SSI to its SSI root PET.
6991+
std::vector<char> xferBuffer((npets-ssiLocalPetCount)*inCount*size);
6992+
char *xferBC = (char *)&(xferBuffer[0]);
6993+
char *inC = (char *)in;
6994+
int j=0;
6995+
for (int i=0; i<npets; i++){
6996+
if (ssiLocalPetSet.find(i) != ssiLocalPetSet.end()) continue; // skip
6997+
memcpy(xferBC+outCount*size*j, inC+inCount*size*i, inCount*size);
6998+
++j;
6999+
}
7000+
// - SSI roots gather xfer data from their SSI PETs toward other SSI
7001+
char *xferSsiBC = NULL;
7002+
std::vector<char> xferSsiBuffer;
7003+
if (mpi_c_ssi_roots != MPI_COMM_NULL){
7004+
xferSsiBuffer.resize((npets-ssiLocalPetCount)
7005+
*inCount*size*ssiLocalPetCount);
7006+
xferSsiBC = (char *)&(xferSsiBuffer[0]);
7007+
}
7008+
MPI_Gather(xferBC, (npets-ssiLocalPetCount)*inCount, mpitype,
7009+
xferSsiBC, (npets-ssiLocalPetCount)*outCount, mpitype, 0, mpi_c_ssi);
7010+
// Total exchange between SSI roots
7011+
if (mpi_c_ssi_roots != MPI_COMM_NULL){
7012+
// - SSI roots exchange their ssiLocalPetCount
7013+
std::vector<int> ssiLocalPetCounts(ssiCount);
7014+
MPI_Allgather(&ssiLocalPetCount, 1, MPI_INT,
7015+
&(ssiLocalPetCounts[0]), 1, MPI_INT, mpi_c_ssi_roots);
7016+
// - Construct offsets array to match the received ssiLocalPetCounts
7017+
std::vector<int> offsets(ssiCount);
7018+
offsets[0] = 0;
7019+
for (int i=1; i<ssiCount; i++)
7020+
offsets[i] = offsets[i-1] + ssiLocalPetCounts[i-1];
7021+
// - SSI roots exchange their ssiLocalPetList information
7022+
std::vector<int> ssiLocalPetLists(npets);
7023+
MPI_Allgatherv(ssiLocalPetList, ssiLocalPetCount, MPI_INT,
7024+
&(ssiLocalPetLists[0]), &(ssiLocalPetCounts[0]), &(offsets[0]),
7025+
MPI_INT, mpi_c_ssi_roots);
7026+
// - SSI roots keep track of all the PETs in the other SSIs
7027+
std::vector<std::set<int> > ssiLocalPetSets(ssiCount);
7028+
j=0;
7029+
for (int i=0; i<ssiCount; i++){
7030+
for (int k=0; k<ssiLocalPetCounts[i]; k++){
7031+
ssiLocalPetSets[i].insert(ssiLocalPetLists[j]);
7032+
++j;
7033+
}
7034+
}
7035+
// - SSI roots collate data into SSI blocks for sending
7036+
std::vector<char> xferSsiSendBuffer((npets-ssiLocalPetCount)
7037+
*inCount*size*ssiLocalPetCount);
7038+
char *xferSsiSBC = (char *)&(xferSsiSendBuffer[0]);
7039+
int localSsi; // rank of local SSI's root, same as SSI index
7040+
MPI_Comm_rank(mpi_c_ssi_roots, &localSsi);
7041+
std::vector<int> xferInCounts(ssiCount);
7042+
std::vector<int> xferInOffsets(ssiCount);
7043+
std::vector<int> xferOutCounts(ssiCount);
7044+
std::vector<int> xferOutOffsets(ssiCount);
7045+
xferInOffsets[0] = xferOutOffsets[0] = 0;
7046+
j=0; int jj=0;
7047+
for (int i=0; i<ssiCount; i++){
7048+
// prep data block to send to PETs in SSI i
7049+
if (i > 0){
7050+
xferInOffsets[i] = xferInOffsets[i-1] + xferInCounts[i-1];
7051+
xferOutOffsets[i] = xferOutOffsets[i-1] + xferOutCounts[i-1];
7052+
}
7053+
if (i == localSsi){
7054+
xferInCounts[i] = xferOutCounts[i] = 0;
7055+
continue; // no self communication to local SSI
7056+
}else{
7057+
xferInCounts[i] = ssiLocalPetCounts[i]*ssiLocalPetCount*inCount;
7058+
xferOutCounts[i] = ssiLocalPetCounts[i]*ssiLocalPetCount*outCount;
7059+
}
7060+
for (int k=0; k<ssiLocalPetCounts[i]; k++){
7061+
// prepare block to SSI local PET k on SSI i
7062+
for (int l=0; l<ssiLocalPetCount; l++){
7063+
// prepare block from local SSI local PET l to PET k on SSI i
7064+
memcpy(xferSsiSBC+inCount*size*j, xferSsiBC
7065+
+inCount*size*(npets-ssiLocalPetCount)*l
7066+
+inCount*size*ssiLocalPetLists[offsets[i]+k],
7067+
inCount*size);
7068+
++j;
7069+
}
7070+
}
7071+
}
7072+
// - Alltoallv xferSsiSendBuffer -> xferSsiBuffer
7073+
MPI_Alltoallv(xferSsiSBC, &(xferInCounts[0]), &(xferInOffsets[0]),
7074+
mpitype, xferSsiBC, &(xferOutCounts[0]), &(xferOutOffsets[0]),
7075+
mpitype, mpi_c_ssi);
7076+
}
7077+
}
7078+
}
69677079
#endif
69687080
}else{
69697081
// This is a very simplistic, probably very bad peformance implementation.
@@ -7043,7 +7155,7 @@ int VMK::alltoallv(void *in, int *inCounts, int *inOffsets, void *out,
70437155
mpitype = MPI_LOGICAL;
70447156
break;
70457157
}
7046-
#if 0
7158+
#if 1
70477159
localrc = MPI_Alltoallv(in, inCounts, inOffsets, mpitype, out, outCounts,
70487160
outOffsets, mpitype, mpi_c);
70497161
#else

0 commit comments

Comments
 (0)