@@ -476,9 +476,10 @@ int main(int argc, char** argv)
476
476
477
477
utils::mpi::MPIComm comm = utils::mpi::MPICommWorld;
478
478
479
- utils::mpi::MPIBarrier (comm);
480
- auto startTotal = std::chrono::high_resolution_clock::now ();
481
-
479
+ utils::Profiler pTot (comm, " Total Statistics" );
480
+ utils::Profiler p (comm, " Initilization Breakdown Statistics" );
481
+ pTot.registerStart (" Initilization" );
482
+
482
483
// Get the rank of the process
483
484
int rank;
484
485
utils::mpi::MPICommRank (comm, &rank);
@@ -718,10 +719,7 @@ int main(int argc, char** argv)
718
719
719
720
// Set up the quadrature rule
720
721
721
- // add device synchronize for gpu
722
- utils::mpi::MPIBarrier (comm);
723
- auto start = std::chrono::high_resolution_clock::now ();
724
-
722
+ p.registerStart (" Basis Creation and Basis Data Storages Evaluation" );
725
723
quadrature::QuadratureRuleAttributes quadAttrElec (quadrature::QuadratureFamily::GAUSS,true ,feOrderElec+1 );
726
724
727
725
basis::BasisStorageAttributesBoolMap basisAttrMap;
@@ -949,16 +947,17 @@ std::shared_ptr<linearAlgebra::OperatorContext<double,
949
947
setCellValues<Host>(iCell, b);
950
948
}
951
949
952
- // add device synchronize for gpu
953
- utils::mpi::MPIBarrier (comm);
954
- auto stop = std::chrono::high_resolution_clock::now ();
955
-
956
- auto duration = std::chrono::duration_cast<std::chrono::microseconds>(stop - start);
957
-
958
- rootCout << " Time for all basis storage evaluations including overlap operators(in secs) : " << duration.count ()/1e6 << std::endl;
950
+ p.registerEnd (" Basis Creation and Basis Data Storages Evaluation" );
959
951
960
952
rootCout << " Entering KohnSham DFT Class....\n\n " ;
961
953
954
+ p.registerStart (" Kohn Sham DFT Class Init" );
955
+ std::shared_ptr<ksdft::KohnShamDFT<double ,
956
+ double ,
957
+ double ,
958
+ double ,
959
+ Host,
960
+ dim>> dftefeSolve = nullptr ;
962
961
if (isNumericalNuclearSolve)
963
962
{
964
963
@@ -977,12 +976,7 @@ std::shared_ptr<linearAlgebra::OperatorContext<double,
977
976
978
977
std::shared_ptr<const basis::FEBasisDataStorage<double ,Host>> feBDNuclearChargeStiffnessMatrix = feBDTotalChargeStiffnessMatrix;
979
978
980
- std::shared_ptr<ksdft::KohnShamDFT<double ,
981
- double ,
982
- double ,
983
- double ,
984
- Host,
985
- dim>> dftefeSolve =
979
+ dftefeSolve =
986
980
std::make_shared<ksdft::KohnShamDFT<double ,
987
981
double ,
988
982
double ,
@@ -1021,31 +1015,11 @@ std::shared_ptr<linearAlgebra::OperatorContext<double,
1021
1015
*MContextForInv,
1022
1016
/* *MContextForInv,*/
1023
1017
*MContext,
1024
- *MInvContext);
1025
-
1026
- // add device synchronize for gpu
1027
- utils::mpi::MPIBarrier (comm);
1028
- start = std::chrono::high_resolution_clock::now ();
1029
-
1030
- dftefeSolve->solve ();
1031
-
1032
- // add device synchronize for gpu
1033
- utils::mpi::MPIBarrier (comm);
1034
- stop = std::chrono::high_resolution_clock::now ();
1035
-
1036
- duration = std::chrono::duration_cast<std::chrono::microseconds>(stop - start);
1037
-
1038
- rootCout << " Time for scf iterations is(in secs) : " << duration.count ()/1e6 << std::endl;
1039
-
1018
+ *MInvContext);
1040
1019
}
1041
1020
else
1042
1021
{
1043
- std::shared_ptr<ksdft::KohnShamDFT<double ,
1044
- double ,
1045
- double ,
1046
- double ,
1047
- Host,
1048
- dim>> dftefeSolve =
1022
+ dftefeSolve =
1049
1023
std::make_shared<ksdft::KohnShamDFT<double ,
1050
1024
double ,
1051
1025
double ,
@@ -1083,31 +1057,18 @@ std::shared_ptr<linearAlgebra::OperatorContext<double,
1083
1057
/* *MContextForInv,*/
1084
1058
*MContext,
1085
1059
*MInvContext);
1086
-
1087
- // add device synchronize for gpu
1088
- utils::mpi::MPIBarrier (comm);
1089
- start = std::chrono::high_resolution_clock::now ();
1090
-
1091
- dftefeSolve->solve ();
1092
-
1093
- // add device synchronize for gpu
1094
- utils::mpi::MPIBarrier (comm);
1095
- stop = std::chrono::high_resolution_clock::now ();
1096
-
1097
- duration = std::chrono::duration_cast<std::chrono::microseconds>(stop - start);
1098
-
1099
- rootCout << " Time for scf iterations is(in secs) : " << duration.count ()/1e6 << std::endl;
1100
-
1101
1060
}
1061
+ p.registerEnd (" Kohn Sham DFT Class Init" );
1062
+ p.print ();
1102
1063
1103
- // add device synchronize for gpu
1104
- utils::mpi::MPIBarrier (comm);
1105
- auto stopTotal = std::chrono::high_resolution_clock::now ();
1106
-
1107
- auto durationTotal = std::chrono::duration_cast<std::chrono::microseconds>(stopTotal - startTotal);
1064
+ pTot.registerEnd (" Initilization" );
1065
+ pTot.registerStart (" Kohn Sham DFT Solve" );
1108
1066
1109
- rootCout << " Total wall time(in secs) : " << durationTotal. count ()/ 1e6 << std::endl ;
1067
+ dftefeSolve-> solve () ;
1110
1068
1069
+ pTot.registerEnd (" Kohn Sham DFT Solve" );
1070
+ pTot.print ();
1071
+
1111
1072
// gracefully end MPI
1112
1073
1113
1074
int mpiFinalFlag = 0 ;
0 commit comments