diff --git a/.github/workflows/ci-cov-linux-report.yml b/.github/workflows/ci-cov-linux-report.yml index 03f4d809..1d15abac 100644 --- a/.github/workflows/ci-cov-linux-report.yml +++ b/.github/workflows/ci-cov-linux-report.yml @@ -37,7 +37,7 @@ jobs: ci-cov-linux-report.sh PCM.linux.and.python - name: upload-artifact - uses: actions/upload-artifact@83fd05a356d7e2593de66fc9913b3002723633cb # v3.1.1 + uses: actions/upload-artifact@a8a3f3ad30e3422c9c7b888a15615d19a852ae32 # v3.1.3 with: name: coverity-linux-and-python-report-${{ github.sha }} path: "*-Report.pdf" diff --git a/.github/workflows/ci-cov-windows-report.yml b/.github/workflows/ci-cov-windows-report.yml index 11338e72..9d4d871d 100644 --- a/.github/workflows/ci-cov-windows-report.yml +++ b/.github/workflows/ci-cov-windows-report.yml @@ -48,7 +48,7 @@ jobs: c:\pcm\ci-cov-windows-report.ps1 PCM.windows-all - name: upload-artifact - uses: actions/upload-artifact@c7d193f32edcb7bfad88892161225aeda64e9392 # v4.0.0 + uses: actions/upload-artifact@a8a3f3ad30e3422c9c7b888a15615d19a852ae32 # v3.1.3 with: name: coverity-windows-all-report-${{ github.sha }} path: "*-Report.pdf" diff --git a/.github/workflows/ci-gcc5.yml b/.github/workflows/ci-gcc5.yml index b9682c44..109d620c 100644 --- a/.github/workflows/ci-gcc5.yml +++ b/.github/workflows/ci-gcc5.yml @@ -43,7 +43,7 @@ jobs: cd ${{ github.workspace }}/build make install -j$(nproc) - name: upload-artifact - uses: actions/upload-artifact@83fd05a356d7e2593de66fc9913b3002723633cb # v3.1.1 + uses: actions/upload-artifact@a8a3f3ad30e3422c9c7b888a15615d19a852ae32 # v3.1.3 with: name: PCMforLinuxGCC5 path: build/bin/* diff --git a/.github/workflows/ci-test.yml b/.github/workflows/ci-test.yml index bc0fe729..b18a7289 100644 --- a/.github/workflows/ci-test.yml +++ b/.github/workflows/ci-test.yml @@ -40,55 +40,55 @@ jobs: sh ${{ github.workspace }}/tests/test.sh 2>&1 | tee test-log.txt - name: upload-artifact - uses: actions/upload-artifact@83fd05a356d7e2593de66fc9913b3002723633cb # v3.1.1 + uses: actions/upload-artifact@a8a3f3ad30e3422c9c7b888a15615d19a852ae32 # v3.1.3 with: name: test-log-${{ github.sha }} path: test-log.txt - name: upload-artifact - uses: actions/upload-artifact@83fd05a356d7e2593de66fc9913b3002723633cb # v3.1.1 + uses: actions/upload-artifact@a8a3f3ad30e3422c9c7b888a15615d19a852ae32 # v3.1.3 with: name: test-log-raw-tr-wo_ext-${{ github.sha }} path: build/bin/raw_tr_wo_ext.csv - name: upload-artifact - uses: actions/upload-artifact@83fd05a356d7e2593de66fc9913b3002723633cb # v3.1.1 + uses: actions/upload-artifact@a8a3f3ad30e3422c9c7b888a15615d19a852ae32 # v3.1.3 with: name: test-log-raw-tr-wi_ext-${{ github.sha }} path: build/bin/raw_tr_wi_ext.csv - name: upload-artifact - uses: actions/upload-artifact@83fd05a356d7e2593de66fc9913b3002723633cb # v3.1.1 + uses: actions/upload-artifact@a8a3f3ad30e3422c9c7b888a15615d19a852ae32 # v3.1.3 with: name: pcm-csv-${{ github.sha }} path: build/bin/pcm.csv - name: upload-artifact - uses: actions/upload-artifact@83fd05a356d7e2593de66fc9913b3002723633cb # v3.1.1 + uses: actions/upload-artifact@a8a3f3ad30e3422c9c7b888a15615d19a852ae32 # v3.1.3 with: name: pcm-memory-csv-${{ github.sha }} path: build/bin/pcm-memory.csv - name: upload-artifact - uses: actions/upload-artifact@83fd05a356d7e2593de66fc9913b3002723633cb # v3.1.1 + uses: actions/upload-artifact@a8a3f3ad30e3422c9c7b888a15615d19a852ae32 # v3.1.3 with: name: test-log-raw-tr-wi_ext-single_header-${{ github.sha }} path: build/bin/raw_tr_wi_ext_single_header.csv - name: upload-artifact - uses: actions/upload-artifact@83fd05a356d7e2593de66fc9913b3002723633cb # v3.1.1 + uses: actions/upload-artifact@a8a3f3ad30e3422c9c7b888a15615d19a852ae32 # v3.1.3 with: name: test-log-raw-edp-${{ github.sha }} path: build/bin/raw_edp.txt - name: upload-artifact - uses: actions/upload-artifact@83fd05a356d7e2593de66fc9913b3002723633cb # v3.1.1 + uses: actions/upload-artifact@a8a3f3ad30e3422c9c7b888a15615d19a852ae32 # v3.1.3 with: name: test-log-raw-json-${{ github.sha }} path: build/bin/raw_json.json - name: upload-artifact - uses: actions/upload-artifact@83fd05a356d7e2593de66fc9913b3002723633cb # v3.1.1 + uses: actions/upload-artifact@a8a3f3ad30e3422c9c7b888a15615d19a852ae32 # v3.1.3 with: name: test-log-raw-edp-offlined-cores-${{ github.sha }} path: build/bin/raw_edp_offlined_cores.txt diff --git a/.github/workflows/ci-windows.yml b/.github/workflows/ci-windows.yml index 8d6644d7..9854bb3a 100644 --- a/.github/workflows/ci-windows.yml +++ b/.github/workflows/ci-windows.yml @@ -41,7 +41,7 @@ jobs: chdir ${{github.workspace}}\src\WinMSRDriver msbuild MSR.vcxproj /p:Configuration=Release,Platform=x64 /t:Clean,Build /m - name: upload-artifact - uses: actions/upload-artifact@c7d193f32edcb7bfad88892161225aeda64e9392 # v4.0.0 + uses: actions/upload-artifact@a8a3f3ad30e3422c9c7b888a15615d19a852ae32 # v3.1.3 with: name: PCMforWindows path: build/bin/**/* diff --git a/.github/workflows/codeql.yml b/.github/workflows/codeql.yml index f621a959..5e6f50bb 100644 --- a/.github/workflows/codeql.yml +++ b/.github/workflows/codeql.yml @@ -50,7 +50,7 @@ jobs: # Initializes the CodeQL tools for scanning. - name: Initialize CodeQL - uses: github/codeql-action/init@012739e5082ff0c22ca6d6ab32e07c36df03c4a4 # v3.22.12 + uses: github/codeql-action/init@379614612a29c9e28f31f39a59013eb8012a51f0 # v3.24.3 with: languages: ${{ matrix.language }} # If you wish to specify custom queries, you can do so here or in a config file. @@ -60,7 +60,7 @@ jobs: # Autobuild attempts to build any compiled languages (C/C++, C#, or Java). # If this step fails, then you should remove it and run the build manually (see below) - name: Autobuild - uses: github/codeql-action/autobuild@012739e5082ff0c22ca6d6ab32e07c36df03c4a4 # v3.22.12 + uses: github/codeql-action/autobuild@379614612a29c9e28f31f39a59013eb8012a51f0 # v3.24.3 # ℹ️ Command-line programs to run using the OS shell. # 📚 See https://docs.github.com/en/actions/using-workflows/workflow-syntax-for-github-actions#jobsjob_idstepsrun @@ -73,6 +73,6 @@ jobs: # ./location_of_script_within_repo/buildscript.sh - name: Perform CodeQL Analysis - uses: github/codeql-action/analyze@012739e5082ff0c22ca6d6ab32e07c36df03c4a4 # v3.22.12 + uses: github/codeql-action/analyze@379614612a29c9e28f31f39a59013eb8012a51f0 # v3.24.3 with: category: "/language:${{matrix.language}}" diff --git a/.github/workflows/dependency-review.yml b/.github/workflows/dependency-review.yml index f4a8bb47..e82880c5 100644 --- a/.github/workflows/dependency-review.yml +++ b/.github/workflows/dependency-review.yml @@ -26,4 +26,4 @@ jobs: - name: 'Checkout Repository' uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # v4.1.1 - name: 'Dependency Review' - uses: actions/dependency-review-action@c74b580d73376b7750d3d2a50bfb8adc2c937507 # v3.1.5 + uses: actions/dependency-review-action@9129d7d40b8c12c1ed0f60400d00c92d437adcce # v4.1.3 diff --git a/.github/workflows/docker.yml b/.github/workflows/docker.yml index f5c4a211..0e0a81b3 100644 --- a/.github/workflows/docker.yml +++ b/.github/workflows/docker.yml @@ -37,7 +37,7 @@ jobs: uses: docker/setup-buildx-action@f95db51fddba0c2d1ec667646a06c2ce06100226 # v3.0.0 - name: Cache Docker layers - uses: actions/cache@704facf57e6136b1bc63b828d79edcd491f0ee84 # v3.3.2 + uses: actions/cache@13aacd865c20de90d75de3b17ebe84f7a17d57d2 # v4.0.0 with: path: /tmp/.buildx-cache key: ${{ runner.os }}-buildx-${{ github.sha }} diff --git a/.github/workflows/scorecard.yml b/.github/workflows/scorecard.yml index ccc14d78..78efbce2 100644 --- a/.github/workflows/scorecard.yml +++ b/.github/workflows/scorecard.yml @@ -65,7 +65,7 @@ jobs: # Upload the results as artifacts (optional). Commenting out will disable uploads of run results in SARIF # format to the repository Actions tab. - name: "Upload artifact" - uses: actions/upload-artifact@c7d193f32edcb7bfad88892161225aeda64e9392 # v4.0.0 + uses: actions/upload-artifact@a8a3f3ad30e3422c9c7b888a15615d19a852ae32 # v3.1.3 with: name: SARIF file path: results.sarif @@ -73,6 +73,6 @@ jobs: # Upload the results to GitHub's code scanning dashboard. - name: "Upload to code-scanning" - uses: github/codeql-action/upload-sarif@012739e5082ff0c22ca6d6ab32e07c36df03c4a4 # v3.22.12 + uses: github/codeql-action/upload-sarif@379614612a29c9e28f31f39a59013eb8012a51f0 # v3.24.3 with: sarif_file: results.sarif diff --git a/doc/FAQ.md b/doc/FAQ.md index cd51f5ff..88a0266d 100644 --- a/doc/FAQ.md +++ b/doc/FAQ.md @@ -86,3 +86,10 @@ Not all AWS instances allow users to collect CPU telemetry by exposing PMU to th * Bare metal instances: allow collection of CPU metrics from both core (e.g. instructions per cycle, cache misses) and uncore (e.g. memory controller, UPI) * Full-socket (single socket, two socket, etc) virtualized instances: e.g. m5d.12xlarge, m5.24xlarge, m5.12xlarge. Only core CPU metrics are exposed, and certain CPU performance events are forbidden (e.g. offcore response events, events collecting “any_thread” information). “arch_perfmon” flag in /proc/cpuinfo indicates if the core CPU metrics are exposed (example: https://instaguide.io/info.html?type=m5.12xlarge ). The mechanism of PMU virtualization is commonly known as vPMU. +## Q12 + +pcm-pcie reports that the CPU is not supported: "Jaketown, Ivytown, Haswell, Broadwell-DE, Skylake, Icelake, Snowridge and Sapphirerapids Server CPU is required for this tool! Program aborted" +Can you add support for pcm-pcie for my CPU? + +Answer: most likely you have a client CPU which does not have required hardware performance monitoring units. pcm-pcie can not work without them. + diff --git a/scripts/build.sh b/scripts/build.sh new file mode 100644 index 00000000..2049a90c --- /dev/null +++ b/scripts/build.sh @@ -0,0 +1,7 @@ + + +mkdir build +cd build +cmake .. +make -j + diff --git a/scripts/debug-build.sh b/scripts/debug-build.sh new file mode 100644 index 00000000..a89e2a48 --- /dev/null +++ b/scripts/debug-build.sh @@ -0,0 +1,7 @@ + + +mkdir debug-build +cd debug-build +cmake -DCMAKE_BUILD_TYPE=Debug .. +make -j + diff --git a/src/cpucounters.cpp b/src/cpucounters.cpp index 9e7158eb..e76797c5 100644 --- a/src/cpucounters.cpp +++ b/src/cpucounters.cpp @@ -6857,6 +6857,55 @@ bool PCM::useLinuxPerfForUncore() const return 1 == use; } +template +void PCM::getPCICFGPMUsFromDiscovery(const unsigned int BoxType, const size_t s, F f) const +{ + if (uncorePMUDiscovery.get()) + { + const auto numBoxes = uncorePMUDiscovery->getNumBoxes(BoxType, s); + for (size_t pos = 0; pos < numBoxes; ++pos) + { + if (uncorePMUDiscovery->getBoxAccessType(BoxType, s, pos) == UncorePMUDiscovery::accessTypeEnum::PCICFG) + { + std::vector > CounterControlRegs, CounterValueRegs; + const auto n_regs = uncorePMUDiscovery->getBoxNumRegs(BoxType, s, pos); + auto makeRegister = [](const uint64 rawAddr) + { +#ifndef PCI_ENABLE + constexpr auto PCI_ENABLE = 0x80000000ULL; +#endif + UncorePMUDiscovery::PCICFGAddress Addr; + Addr.raw = rawAddr; + assert(Addr.raw & PCI_ENABLE); + try { + auto handle = std::make_shared(0, (uint32)Addr.fields.bus, + (uint32)Addr.fields.device, + (uint32)Addr.fields.function); + assert(handle.get()); + // std::cerr << "DEBUG: opened bdf "<< Addr.getStr() << "\n"; + return std::make_shared(handle, (size_t)Addr.fields.offset); + } + catch (...) + { + // std::cerr << "DEBUG: error opening bdf "<< Addr.getStr() << "\n"; + } + return std::shared_ptr(); + }; + auto boxCtlRegister = makeRegister(uncorePMUDiscovery->getBoxCtlAddr(BoxType, s, pos)); + if (boxCtlRegister.get()) + { + for (size_t r = 0; r < n_regs; ++r) + { + CounterControlRegs.push_back(makeRegister(uncorePMUDiscovery->getBoxCtlAddr(BoxType, s, pos, r))); + CounterValueRegs.push_back(makeRegister(uncorePMUDiscovery->getBoxCtrAddr(BoxType, s, pos, r))); + } + f(UncorePMU(boxCtlRegister, CounterControlRegs, CounterValueRegs)); + } + } + } + } +}; + ServerUncorePMUs::ServerUncorePMUs(uint32 socket_, const PCM * pcm) : iMCbus(-1) , UPIbus(-1) @@ -7293,9 +7342,12 @@ void ServerUncorePMUs::initDirect(uint32 socket_, const PCM * pcm) populateM2MPMUs(groupnr, M2Mbus, cpu_model, HBM_M2MRegisterLocation, hbm_m2mPMUs); int numChannels = 0; - if (cpu_model == PCM::SPR || cpu_model == PCM::EMR) + if (safe_getenv("PCM_NO_IMC_DISCOVERY") == std::string("1")) { - numChannels = 3; + if (cpu_model == PCM::SPR || cpu_model == PCM::EMR) + { + numChannels = 3; + } } if (cpu_model == PCM::SNOWRIDGE || cpu_model == PCM::ICX) { @@ -7352,6 +7404,75 @@ void ServerUncorePMUs::initDirect(uint32 socket_, const PCM * pcm) } } } + else + { + switch (cpu_model) + { + case PCM::SPR: + case PCM::EMR: + { + auto & uncorePMUDiscovery = pcm->uncorePMUDiscovery; + const auto BoxType = SPR_IMC_BOX_TYPE; + if (uncorePMUDiscovery.get()) + { + const auto numBoxes = uncorePMUDiscovery->getNumBoxes(BoxType, socket_); + for (size_t pos = 0; pos < numBoxes; ++pos) + { + if (uncorePMUDiscovery->getBoxAccessType(BoxType, socket_, pos) == UncorePMUDiscovery::accessTypeEnum::MMIO) + { + std::vector > CounterControlRegs, CounterValueRegs; + const auto n_regs = uncorePMUDiscovery->getBoxNumRegs(BoxType, socket_, pos); + auto makeRegister = [](const uint64 rawAddr, const uint32 bits) -> std::shared_ptr + { + const auto mapSize = SERVER_MC_CH_PMON_SIZE; + const auto alignedAddr = rawAddr & ~4095ULL; + const auto alignDelta = rawAddr & 4095ULL; + try { + auto handle = std::make_shared(alignedAddr, mapSize, false); + assert(handle.get()); + switch (bits) + { + case 32: + return std::make_shared(handle, (size_t)alignDelta); + case 64: + return std::make_shared(handle, (size_t)alignDelta); + } + } + catch (...) + { + } + return std::shared_ptr(); + }; + + auto boxCtlRegister = makeRegister(uncorePMUDiscovery->getBoxCtlAddr(BoxType, socket_, pos), 32); + if (boxCtlRegister.get()) + { + for (size_t r = 0; r < n_regs; ++r) + { + CounterControlRegs.push_back(makeRegister(uncorePMUDiscovery->getBoxCtlAddr(BoxType, socket_, pos, r), 32)); + CounterValueRegs.push_back(makeRegister(uncorePMUDiscovery->getBoxCtrAddr(BoxType, socket_, pos, r), 64)); + } + imcPMUs.push_back(UncorePMU(boxCtlRegister, + CounterControlRegs, + CounterValueRegs, + makeRegister(uncorePMUDiscovery->getBoxCtlAddr(BoxType, socket_, pos) + SERVER_MC_CH_PMON_FIXED_CTL_OFFSET, 32), + makeRegister(uncorePMUDiscovery->getBoxCtlAddr(BoxType, socket_, pos) + SERVER_MC_CH_PMON_FIXED_CTR_OFFSET, 64))); + } + } + } + } + if (imcPMUs.empty() == false) + { + numChannels = 2; + for (size_t c = 0; c < imcPMUs.size(); c += numChannels) + { + num_imc_channels.push_back(numChannels); + } + } + } + break; + } + } if (imcPMUs.empty()) { @@ -7500,11 +7621,28 @@ void ServerUncorePMUs::initDirect(uint32 socket_, const PCM * pcm) return; #endif + if (pcm->getNumSockets() <= 4 && safe_getenv("PCM_NO_UPILL_DISCOVERY") != std::string("1")) + { + switch (cpu_model) + { + case PCM::SPR: + case PCM::EMR: + { + std::cerr << "INFO: Trying to detect UPILL PMU through uncore PMU discovery..\n"; + pcm->getPCICFGPMUsFromDiscovery(SPR_UPILL_BOX_TYPE, socket_, [this](const UncorePMU & pmu) + { + xpiPMUs.push_back(pmu); + }); + } + break; + } + } + std::vector > qpiLLHandles; auto xPI = pcm->xPI(); try { - for (size_t i = 0; i < XPIRegisterLocation.size(); ++i) + if (xpiPMUs.empty()) for (size_t i = 0; i < XPIRegisterLocation.size(); ++i) { PciHandleType * handle = createIntelPerfMonDevice(groupnr, UPIbus, XPIRegisterLocation[i].first, XPIRegisterLocation[i].second, true); if (handle) @@ -7530,7 +7668,7 @@ void ServerUncorePMUs::initDirect(uint32 socket_, const PCM * pcm) throw std::exception(); } - for (auto & handle : qpiLLHandles) + if (xpiPMUs.empty()) for (auto & handle : qpiLLHandles) { if (cpu_model == PCM::SKX) { diff --git a/src/cpucounters.h b/src/cpucounters.h index 31286338..db5bbaa3 100644 --- a/src/cpucounters.h +++ b/src/cpucounters.h @@ -804,6 +804,9 @@ class PCM_API PCM std::shared_ptr uncorePMUDiscovery; + template + void getPCICFGPMUsFromDiscovery(const unsigned int BoxType, const size_t s, F f) const; + bool disable_JKT_workaround; bool blocked; // track if time-driven counter update is running or not: PCM is blocked diff --git a/src/memoptest.cpp b/src/memoptest.cpp index 04d36140..dda78042 100644 --- a/src/memoptest.cpp +++ b/src/memoptest.cpp @@ -79,7 +79,7 @@ int main(int argc, char * argv[]) assert((argc > 1) && "Need operation type as parameter: 0 - read, 1 - write, 2 - streaming write "); int op = atoi(argv[1]); T * vector; - int nelements = 13000000; + int nelements = 1024 * 1024 * 1024 / sizeof(T); vector = new T[nelements]; int i = 0; diff --git a/src/uncore_pmu_discovery.h b/src/uncore_pmu_discovery.h index dc050fa2..284324df 100644 --- a/src/uncore_pmu_discovery.h +++ b/src/uncore_pmu_discovery.h @@ -11,6 +11,8 @@ namespace pcm { constexpr auto SPR_PCU_BOX_TYPE = 4U; +constexpr auto SPR_IMC_BOX_TYPE = 6U; +constexpr auto SPR_UPILL_BOX_TYPE = 8U; constexpr auto SPR_MDF_BOX_TYPE = 11U; constexpr auto SPR_CXLCM_BOX_TYPE = 12U; constexpr auto SPR_CXLDP_BOX_TYPE = 13U; @@ -185,7 +187,8 @@ class UncorePMUDiscovery { if (validBox(boxType, socket, pos) && c < boxPMUs[socket][boxType][pos].numRegs) { - return boxPMUs[socket][boxType][pos].boxCtrlAddr + boxPMUs[socket][boxType][pos].ctrlOffset + c * registerStep(boxType, socket, pos); + const size_t step = (boxType == SPR_IMC_BOX_TYPE) ? 4 : registerStep(boxType, socket, pos); + return boxPMUs[socket][boxType][pos].boxCtrlAddr + boxPMUs[socket][boxType][pos].ctrlOffset + c * step; } return 0; }