From 6ef66c45b754e3dd8b0ddc4f6de8fb7971215e1a Mon Sep 17 00:00:00 2001 From: opcm Date: Tue, 7 May 2024 19:32:21 +0200 Subject: [PATCH 01/30] Update appveyor.yml --- appveyor.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/appveyor.yml b/appveyor.yml index e4783e48..aa1dd482 100644 --- a/appveyor.yml +++ b/appveyor.yml @@ -15,7 +15,7 @@ build_script: - cmake --build build --config Release --parallel after_build: -- cmd: 7z a pcm-all.zip %APPVEYOR_BUILD_FOLDER%\build\bin\Release\*.exe %APPVEYOR_BUILD_FOLDER%\build\bin\Release\*.dll %APPVEYOR_BUILD_FOLDER%\build\src\Release\*.lib %APPVEYOR_BUILD_FOLDER%\build\src\Release\*.exp +- cmd: 7z a pcm-all.zip %APPVEYOR_BUILD_FOLDER%\build\bin\Release\*.exe %APPVEYOR_BUILD_FOLDER%\build\bin\Release\*.dll %APPVEYOR_BUILD_FOLDER%\build\src\Release\*.lib %APPVEYOR_BUILD_FOLDER%\build\src\Release\*.exp %APPVEYOR_BUILD_FOLDER%\src\windows\PCM-Service.exe.config artifacts: - path: pcm-all.zip - name: pcm-all \ No newline at end of file + name: pcm-all From 032a0af07d3948acc22daaf605bf596be0a0e939 Mon Sep 17 00:00:00 2001 From: "Dementiev, Roman" Date: Fri, 10 May 2024 14:26:09 +0200 Subject: [PATCH 02/30] Add initial SRF support Co-authored-by: Alexander Antonov Co-authored-by: Dementiev, Roman Co-authored-by: Gang Deng Co-authored-by: Pavithran Pandiyan Change-Id: Ie80bbe9195cef7d928269ea58c1632592cc43716 --- scripts/bhs-die-stat.sh | 44 ++ src/CMakeLists.txt | 2 +- .../GenuineIntel-6-AF-0.json | 145 ++++ src/client/main.cpp | 2 +- src/cpucounters.cpp | 692 ++++++++++++++---- src/cpucounters.h | 214 +++++- src/daemon/common.h | 4 +- src/daemon/daemon.cpp | 2 +- src/dashboard.cpp | 32 + src/mmio.cpp | 45 +- src/mmio.h | 6 +- src/pcm-iio.cpp | 63 +- src/pcm-lspci.cpp | 8 +- src/pcm-memory.cpp | 228 ++++-- src/pcm-pcie.cpp | 2 + src/pcm-pcie.h | 132 ++++ src/pcm-raw.cpp | 2 + src/pcm-sensor-server.cpp | 14 + src/pcm-tpmi.cpp | 119 ++- src/pcm.cpp | 75 +- src/tpmi.cpp | 439 +++++++++++ src/tpmi.h | 40 + src/types.h | 125 ++++ src/uncore_pmu_discovery.h | 3 + 24 files changed, 2096 insertions(+), 342 deletions(-) create mode 100644 scripts/bhs-die-stat.sh create mode 100644 src/PMURegisterDeclarations/GenuineIntel-6-AF-0.json create mode 100644 src/tpmi.cpp create mode 100644 src/tpmi.h diff --git a/scripts/bhs-die-stat.sh b/scripts/bhs-die-stat.sh new file mode 100644 index 00000000..ecff3fc7 --- /dev/null +++ b/scripts/bhs-die-stat.sh @@ -0,0 +1,44 @@ +#!/bin/bash + + +echo "Intel(r) Performance Counter Monitor" +echo "Birch Stream Die Statistics Utility" +echo + +# Run the pcm-tpmi command and store the output +output=$(pcm-tpmi 2 0x10 -d) + +# Use a while loop to read each line of the output +echo "$output" | while read -r line; do + # Check if the line contains "Read value" + if [[ $line =~ Read\ value\ ([0-9]+)\ from\ TPMI\ ID\ 2@16\ for\ entry\ ([0-9]+)\ in\ instance\ ([0-9]+) ]]; then + # Extract the value using BASH_REMATCH + value=${BASH_REMATCH[1]} + die=${BASH_REMATCH[2]} + socket=${BASH_REMATCH[3]} + + freq=$(( (value & 0x7F) * 100 )) + compute=$(( (value >> 23) & 1 )) + llc=$(( (value >> 24) & 1 )) + memory=$(( (value >> 25) & 1 )) + io=$(( (value >> 26) & 1 )) + + die_type="" + if [ "$compute" -ne 0 ]; then + die_type="compute/" + fi + if [ "$llc" -ne 0 ]; then + die_type="${die_type}LLC/" + fi + if [ "$memory" -ne 0 ]; then + die_type="${die_type}memory/" + fi + if [ "$io" -ne 0 ]; then + die_type="${die_type}IO" + fi + die_type="${die_type%"${die_type##*[!\/]}"}" + str="Socket $socket die $die ($die_type) uncore frequency" + printf "%-60s: %d MHz\n" "$str" "$freq" + fi +done + diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 050c5398..fbce37c4 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -5,7 +5,7 @@ # All pcm-* executables set(PROJECT_NAMES pcm pcm-numa pcm-latency pcm-power pcm-msr pcm-memory pcm-tsx pcm-pcie pcm-core pcm-iio pcm-lspci pcm-pcicfg pcm-mmio pcm-tpmi pcm-raw pcm-accel) -file(GLOB COMMON_SOURCES pcm-accel-common.cpp msr.cpp cpucounters.cpp pci.cpp mmio.cpp bw.cpp utils.cpp topology.cpp debug.cpp threadpool.cpp uncore_pmu_discovery.cpp) +file(GLOB COMMON_SOURCES pcm-accel-common.cpp msr.cpp cpucounters.cpp pci.cpp mmio.cpp tpmi.cpp bw.cpp utils.cpp topology.cpp debug.cpp threadpool.cpp uncore_pmu_discovery.cpp) if (APPLE) file(GLOB UNUX_SOURCES dashboard.cpp) diff --git a/src/PMURegisterDeclarations/GenuineIntel-6-AF-0.json b/src/PMURegisterDeclarations/GenuineIntel-6-AF-0.json new file mode 100644 index 00000000..e3b4a8ed --- /dev/null +++ b/src/PMURegisterDeclarations/GenuineIntel-6-AF-0.json @@ -0,0 +1,145 @@ +{ + "core" : { + "programmable" : { + "EventCode": {"Config": 0, "Position": 0, "Width": 8}, + "UMask": {"Config": 0, "Position": 8, "Width": 8}, + "User": {"Config": 0, "Position": 16, "Width": 1, "DefaultValue": 1}, + "OS": {"Config": 0, "Position": 17, "Width": 1, "DefaultValue": 1}, + "EdgeDetect": {"Config": 0, "Position": 18, "Width": 1}, + "PinControl": {"Config": 0, "Position": 19, "Width": 1, "DefaultValue": 0}, + "APICInt": {"Config": 0, "Position": 20, "Width": 1, "DefaultValue": 0}, + "Enable": {"Config": 0, "Position": 22, "Width": 1, "DefaultValue": 1}, + "Invert": {"Config": 0, "Position": 23, "Width": 1}, + "CounterMask": {"Config": 0, "Position": 24, "Width": 8}, + "MSRIndex": { + "0x1a6" : {"Config": 1, "Position": 0, "Width": 64}, + "0x1a7" : {"Config": 2, "Position": 0, "Width": 64}, + "0x3f6" : {"Config": 3, "Position": 0, "Width": 64}, + "0x3f7" : {"Config": 4, "Position": 0, "Width": 64} + } + }, + "fixed0" : { + "OS": {"Config": 0, "Position": 0, "Width": 1, "DefaultValue": 1}, + "User": {"Config": 0, "Position": 1, "Width": 1, "DefaultValue": 1}, + "EnablePMI": {"Config": 0, "Position": 3, "Width": 1, "DefaultValue": 0}, + "EventCode": {"Config": 0, "Position": -1, "__comment": "position=-1 means field ignored"}, + "UMask": {"Config": 0, "Position": -1, "__comment": "position=-1 means field ignored"}, + "EdgeDetect": {"Config": 0, "Position": -1, "__comment": "position=-1 means field ignored"}, + "Invert": {"Config": 0, "Position": -1, "__comment": "position=-1 means field ignored"}, + "CounterMask": {"Config": 0, "Position": -1, "__comment": "position=-1 means field ignored"} + }, + "fixed1" : { + "OS": {"Config": 0, "Position": 4, "Width": 1, "DefaultValue": 1}, + "User": {"Config": 0, "Position": 5, "Width": 1, "DefaultValue": 1}, + "EnablePMI": {"Config": 0, "Position": 7, "Width": 1, "DefaultValue": 0}, + "EventCode": {"Config": 0, "Position": -1, "__comment": "position=-1 means field ignored"}, + "UMask": {"Config": 0, "Position": -1, "__comment": "position=-1 means field ignored"}, + "EdgeDetect": {"Config": 0, "Position": -1, "__comment": "position=-1 means field ignored"}, + "Invert": {"Config": 0, "Position": -1, "__comment": "position=-1 means field ignored"}, + "CounterMask": {"Config": 0, "Position": -1, "__comment": "position=-1 means field ignored"} + }, + "fixed2" : { + "OS": {"Config": 0, "Position": 8, "Width": 1, "DefaultValue": 1}, + "User": {"Config": 0, "Position": 9, "Width": 1, "DefaultValue": 1}, + "EnablePMI": {"Config": 0, "Position": 11, "Width": 1, "DefaultValue": 0}, + "EventCode": {"Config": 0, "Position": -1, "__comment": "position=-1 means field ignored"}, + "UMask": {"Config": 0, "Position": -1, "__comment": "position=-1 means field ignored"}, + "EdgeDetect": {"Config": 0, "Position": -1, "__comment": "position=-1 means field ignored"}, + "Invert": {"Config": 0, "Position": -1, "__comment": "position=-1 means field ignored"}, + "CounterMask": {"Config": 0, "Position": -1, "__comment": "position=-1 means field ignored"} + } + }, + "cha" : { + "programmable" : { + "EventCode": {"Config": 0, "Position": 0, "Width": 8}, + "UMask": {"Config": 0, "Position": 8, "Width": 8}, + "TIDEnable": {"Config": 0, "Position": 16, "Width": 1, "DefaultValue": 0}, + "EdgeDetect": {"Config": 0, "Position": 18, "Width": 1, "DefaultValue": 0}, + "Threshold": {"Config": 0, "Position": 24, "Width": 8, "DefaultValue": 0}, + "UMaskExt": {"Config": 0, "Position": 32, "Width": 26}, + "TID": {"Config": 1, "Position": 0, "Width": 10, "DefaultValue": 0} + } + }, + "imc" : { + "programmable" : { + "EventCode": {"Config": 0, "Position": 0, "Width": 8}, + "UMask": {"Config": 0, "Position": 8, "Width": 8}, + "EdgeDetect": {"Config": 0, "Position": 18, "Width": 1, "DefaultValue": 0}, + "Threshold": {"Config": 0, "Position": 24, "Width": 8, "DefaultValue": 0} + } + }, + "xpi" : { + "__comment" : "this is for UPI LL and QPI LL uncore PMUs", + "programmable" : { + "EventCode": {"Config": 0, "Position": 0, "Width": 8}, + "UMask": {"Config": 0, "Position": 8, "Width": 8}, + "EdgeDetect": {"Config": 0, "Position": 18, "Width": 1, "DefaultValue": 0}, + "Threshold": {"Config": 0, "Position": 24, "Width": 8, "DefaultValue": 0}, + "UMaskExt": {"Config": 0, "Position": 32, "Width": 24} + } + }, + "m2m" : { + "programmable" : { + "EventCode": {"Config": 0, "Position": 0, "Width": 8}, + "UMask": {"Config": 0, "Position": 8, "Width": 8}, + "EdgeDetect": {"Config": 0, "Position": 18, "Width": 1, "DefaultValue": 0}, + "Threshold": {"Config": 0, "Position": 24, "Width": 8, "DefaultValue": 0}, + "UMaskExt": {"Config": 0, "Position": 32, "Width": 8} + } + }, + "m3upi" : { + "programmable" : { + "EventCode": {"Config": 0, "Position": 0, "Width": 8}, + "UMask": {"Config": 0, "Position": 8, "Width": 8}, + "EdgeDetect": {"Config": 0, "Position": 18, "Width": 1, "DefaultValue": 0}, + "Threshold": {"Config": 0, "Position": 24, "Width": 8, "DefaultValue": 0} + } + }, + "mdf" : { + "programmable" : { + "EventCode": {"Config": 0, "Position": 0, "Width": 8}, + "UMask": {"Config": 0, "Position": 8, "Width": 8}, + "EdgeDetect": {"Config": 0, "Position": 18, "Width": 1, "DefaultValue": 0}, + "Threshold": {"Config": 0, "Position": 24, "Width": 8, "DefaultValue": 0} + } + }, + "irp" : { + "programmable" : { + "EventCode": {"Config": 0, "Position": 0, "Width": 8}, + "UMask": {"Config": 0, "Position": 8, "Width": 8}, + "EdgeDetect": {"Config": 0, "Position": 18, "Width": 1, "DefaultValue": 0}, + "Threshold": {"Config": 0, "Position": 24, "Width": 8, "DefaultValue": 0} + } + }, + "pcu" : { + "programmable" : { + "EventCode": {"Config": 0, "Position": 0, "Width": 8}, + "UMask": {"Config": 0, "Position": 8, "Width": 8}, + "EdgeDetect": {"Config": 0, "Position": 18, "Width": 1, "DefaultValue": 0} + } + }, + "pciex8" : { + "programmable" : { + "EventCode": {"Config": 0, "Position": 0, "Width": 8}, + "UMask": {"Config": 0, "Position": 8, "Width": 8}, + "EdgeDetect": {"Config": 0, "Position": 18, "Width": 1, "DefaultValue": 0} + } + }, + "pciex16" : { + "programmable" : { + "EventCode": {"Config": 0, "Position": 0, "Width": 8}, + "UMask": {"Config": 0, "Position": 8, "Width": 8}, + "EdgeDetect": {"Config": 0, "Position": 18, "Width": 1, "DefaultValue": 0} + } + }, + "iio" : { + "programmable" : { + "EventCode": {"Config": 0, "Position": 0, "Width": 8}, + "UMask": {"Config": 0, "Position": 8, "Width": 8}, + "EdgeDetect": {"Config": 0, "Position": 18, "Width": 1, "DefaultValue": 0}, + "Threshold": {"Config": 0, "Position": 24, "Width": 12, "DefaultValue": 0}, + "PortMask": {"Config": 0, "Position": 36, "Width": 12}, + "FCMask": {"Config": 0, "Position": 48, "Width": 3} + } + } +} diff --git a/src/client/main.cpp b/src/client/main.cpp index 23095bd0..cd842d8e 100644 --- a/src/client/main.cpp +++ b/src/client/main.cpp @@ -281,7 +281,7 @@ int main(int argc, char * argv[]) printTitle("PMM Memory Mode hit rate p/Sock. "); for (PCMDaemon::uint32 i = 0; i < counters.system.numOfOnlineSockets; ++i) { - std::cout << std::setprecision(coutPrecision) << counters.memory.sockets[i].pmmMemoryModeHitRate << " "; + std::cout << std::setprecision(coutPrecision) << counters.memory.sockets[i].memoryModeHitRate << " "; } std::cout << "\n"; } diff --git a/src/cpucounters.cpp b/src/cpucounters.cpp index ec6e5191..21e341d6 100644 --- a/src/cpucounters.cpp +++ b/src/cpucounters.cpp @@ -6,6 +6,7 @@ // Pat Fay // Austen Ott // Jim Harris (FreeBSD) +// and many others /*! \file cpucounters.cpp \brief The bulk of PCM implementation @@ -723,6 +724,7 @@ void PCM::initCStateSupportTables() case ICX: case SPR: case EMR: + case SRF: PCM_CSTATE_ARRAY(pkgCStateMsr, PCM_PARAM_PROTECT({0, 0, 0x60D, 0, 0, 0, 0x3F9, 0, 0, 0, 0}) ); case HASWELL_ULT: case BROADWELL: @@ -775,6 +777,7 @@ void PCM::initCStateSupportTables() case ICX: case SPR: case EMR: + case SRF: PCM_CSTATE_ARRAY(coreCStateMsr, PCM_PARAM_PROTECT({0, 0, 0, 0x3FC, 0, 0, 0x3FD, 0x3FE, 0, 0, 0}) ); case KNL: PCM_CSTATE_ARRAY(coreCStateMsr, PCM_PARAM_PROTECT({0, 0, 0, 0, 0, 0, 0x3FF, 0, 0, 0, 0}) ); @@ -1625,6 +1628,7 @@ bool PCM::detectNominalFrequency() || cpu_model == ICX || cpu_model == SPR || cpu_model == EMR + || cpu_model == SRF ) ? (100000000ULL) : (133333333ULL); nominal_frequency = ((freq >> 8) & 255) * bus_freq; @@ -1914,26 +1918,34 @@ void PCM::initUncoreObjects() #endif } } - if (cpu_model == ICX || cpu_model == SNOWRIDGE || cpu_model == SPR || cpu_model == EMR) + switch (cpu_model) { - bool failed = false; - try - { - initSocket2Ubox0Bus(); - } - catch (std::exception & e) - { - std::cerr << e.what() << "\n"; - failed = true; - } - catch (...) - { - failed = true; - } - if (failed) + case ICX: + case SNOWRIDGE: + case SPR: + case EMR: + case SRF: { - std::cerr << "Can not read PCI configuration space bus mapping. Access to uncore counters is disabled.\n"; + bool failed = false; + try + { + initSocket2Ubox0Bus(); + } + catch (std::exception& e) + { + std::cerr << e.what() << "\n"; + failed = true; + } + catch (...) + { + failed = true; + } + if (failed) + { + std::cerr << "Can not read PCI configuration space bus mapping. Access to uncore counters is disabled.\n"; + } } + break; } if (cpu_model == ICX || cpu_model == SNOWRIDGE) { @@ -1957,6 +1969,28 @@ void PCM::initUncoreObjects() initUncorePMUsDirect(); } + // TPMIHandle::setVerbose(true); + if (TPMIHandle::getNumInstances() == (size_t)num_sockets) + { + // std::cerr << "DEBUG: TPMIHandle::getNumInstances(): " << TPMIHandle::getNumInstances() << "\n"; + UFSStatus.resize(num_sockets); + for (uint32 s = 0; s < (uint32)num_sockets; ++s) + { + try { + TPMIHandle h(s, UFS_ID, UFS_FABRIC_CLUSTER_OFFSET * sizeof(uint64)); + // std::cerr << "DEBUG: Socket " << s << " dies: " << h.getNumEntries() << "\n"; + for (size_t die = 0; die < h.getNumEntries(); ++die) + { + const auto clusterOffset = extract_bits(h.read64(die), 0, 7); + UFSStatus[s].push_back(std::make_shared(s, UFS_ID, (clusterOffset + UFS_STATUS)* sizeof(uint64))); + } + } catch (std::exception & ) + { + std::cerr << "ERROR: Could not open UFS TPMI register on socket " << s << ". Uncore frequency metrics will be unavailable.\n"; + } + } + } + for (uint32 s = 0; s < (uint32)num_sockets; ++s) { std::cerr << "Socket " << s << ":" << @@ -1967,6 +2001,8 @@ void PCM::initUncoreObjects() " " << getMaxNumOfUncorePMUs(MDF_PMU_ID, s) << " MDF units detected." " " << getMaxNumOfUncorePMUs(UBOX_PMU_ID, s) << " UBOX units detected." " " << ((s < cxlPMUs.size()) ? cxlPMUs[s].size() : 0) << " CXL units detected." + " " << getMaxNumOfUncorePMUs(PCIE_GEN5x16_PMU_ID, s) << " PCIE_GEN5x16 units detected." + " " << getMaxNumOfUncorePMUs(PCIE_GEN5x8_PMU_ID, s) << " PCIE_GEN5x8 units detected." "\n"; } } @@ -2019,8 +2055,10 @@ void PCM::initUncorePMUsDirect() // unfreeze uncore PMUs globalUnfreezeUncoreCounters(); - if (IVYTOWN == cpu_model || JAKETOWN == cpu_model) + switch (cpu_model) { + case IVYTOWN: + case JAKETOWN: uncorePMUs[s].resize(1); { std::vector > CounterControlRegs{ @@ -2041,9 +2079,9 @@ void PCM::initUncorePMUsDirect() ) ); } - } - else if (SPR == cpu_model || EMR == cpu_model) - { + break; + case SPR: + case EMR: uncorePMUs[s].resize(1); { std::vector > CounterControlRegs{ @@ -2064,9 +2102,32 @@ void PCM::initUncorePMUsDirect() ) ); } - } - else if (isServerCPU() && hasPCICFGUncore()) - { + break; + case SRF: + uncorePMUs[s].resize(1); + { + std::vector > CounterControlRegs{ + std::make_shared(handle, BHS_UBOX_MSR_PMON_CTL0_ADDR), + std::make_shared(handle, BHS_UBOX_MSR_PMON_CTL1_ADDR) + }, + CounterValueRegs{ + std::make_shared(handle, BHS_UBOX_MSR_PMON_CTR0_ADDR), + std::make_shared(handle, BHS_UBOX_MSR_PMON_CTR1_ADDR), + }; + uncorePMUs[s][0][UBOX_PMU_ID].push_back( + std::make_shared( + std::make_shared(handle, BHS_UBOX_MSR_PMON_BOX_CTL_ADDR), + CounterControlRegs, + CounterValueRegs, + std::make_shared(handle, BHS_UCLK_FIXED_CTL_ADDR), + std::make_shared(handle, BHS_UCLK_FIXED_CTR_ADDR) + ) + ); + } + break; + default: + if (isServerCPU() && hasPCICFGUncore()) + { uncorePMUs[s].resize(1); { std::vector > CounterControlRegs{ @@ -2087,6 +2148,7 @@ void PCM::initUncorePMUsDirect() ) ); } + } } auto addPMUsFromDiscoveryRef = [this, &handle, &s](std::vector& out, const unsigned int pmuType, const int filter0 = -1) @@ -2167,6 +2229,7 @@ void PCM::initUncorePMUsDirect() break; case SPR: case EMR: + case SRF: uncorePMUs[s].resize(1); addPMUsFromDiscoveryRef(uncorePMUs[s][0][PCU_PMU_ID], SPR_PCU_BOX_TYPE, 0xE); if (uncorePMUs[s][0][PCU_PMU_ID].empty()) @@ -2177,18 +2240,83 @@ void PCM::initUncorePMUsDirect() } // add MDF PMUs - switch (cpu_model) + auto addMDFPMUs = [&](const unsigned int boxType) { - case SPR: - case EMR: uncorePMUs[s].resize(1); - addPMUsFromDiscoveryRef(uncorePMUs[s][0][MDF_PMU_ID], SPR_MDF_BOX_TYPE); + addPMUsFromDiscoveryRef(uncorePMUs[s][0][MDF_PMU_ID], boxType); if (uncorePMUs[s][0][MDF_PMU_ID].empty()) { std::cerr << "ERROR: MDF PMU not found\n"; } + }; + switch (cpu_model) + { + case SPR: + case EMR: + addMDFPMUs(SPR_MDF_BOX_TYPE); + break; + case SRF: + addMDFPMUs(BHS_MDF_BOX_TYPE); break; } + + auto addPCICFGPMUsFromDiscoveryRef = [this, &s](std::vector& out, const unsigned int BoxType) + { + getPCICFGPMUsFromDiscovery(BoxType, s, [&out](const UncorePMU& pmu) { + out.push_back(std::make_shared(pmu)); + }); + }; + + auto addPCICFGPMUsFallback = [&s](std::vector& out, const std::vector & DIDs, const char * info = nullptr) + { + if (s == 0) + { + if (info) + { +#ifndef PCM_SILENT + std::cerr << info; +#endif + } + forAllIntelDevices([&DIDs, &out](const uint32 group, const uint32 bus, const uint32 device, const uint32 function, const uint32 device_id) + { + for (const auto & did: DIDs) + { + if (device_id == did) + { + auto handle = std::make_shared(group, bus, device, function); + const size_t n_regs = 4; + std::vector > CounterControlRegs, CounterValueRegs; + for (size_t r = 0; r < n_regs; ++r) + { + CounterControlRegs.push_back(std::make_shared(handle, BHS_PCIE_GEN5_PCI_PMON_CTL0_ADDR + sizeof(uint64)*r)); + CounterValueRegs.push_back(std::make_shared(handle, BHS_PCIE_GEN5_PCI_PMON_CTR0_ADDR + sizeof(uint64)*r)); + } + auto boxCtlRegister = std::make_shared(handle, BHS_PCIE_GEN5_PCI_PMON_BOX_CTL_ADDR); + out.push_back(std::make_shared(boxCtlRegister, CounterControlRegs, CounterValueRegs)); + } + } + }); + } + }; + + switch (cpu_model) + { + case SRF: + uncorePMUs[s].resize(1); + if (safe_getenv("PCM_NO_PCIE_GEN5_DISCOVERY") == std::string("1")) + { + addPCICFGPMUsFallback(uncorePMUs[s][0][PCIE_GEN5x16_PMU_ID], { 0x0DB0, 0x0DB1, 0x0DB2, 0x0DB3 }, + "Info: PCM_NO_PCIE_GEN5_DISCOVERY=1 is set, detecting PCIE_GEN5 x16 PMUs manually and mapping them to socket 0.\n"); + addPCICFGPMUsFallback(uncorePMUs[s][0][PCIE_GEN5x8_PMU_ID], { 0x0DB6, 0x0DB7, 0x0DB8, 0x0DB9 }, + "Info: PCM_NO_PCIE_GEN5_DISCOVERY=1 is set, detecting PCIE_GEN5 x8 PMUs manually and mapping them to socket 0.\n"); + } + else + { + addPCICFGPMUsFromDiscoveryRef(uncorePMUs[s][0][PCIE_GEN5x16_PMU_ID], BHS_PCIE_GEN5x16_TYPE); + addPCICFGPMUsFromDiscoveryRef(uncorePMUs[s][0][PCIE_GEN5x8_PMU_ID], BHS_PCIE_GEN5x8_TYPE); + } + break; + } } // init IIO addresses @@ -2277,8 +2405,27 @@ void PCM::initUncorePMUsDirect() } } break; + case PCM::SRF: + for (uint32 s = 0; s < (uint32)num_sockets; ++s) + { + auto & handle = MSR[socketRefCore[s]]; + for (int unit = 0; unit < BHS_M2IOSF_NUM; ++unit) + { + iioPMUs[s][unit] = UncorePMU( + std::make_shared(handle, BHS_M2IOSF_IIO_UNIT_CTL + BHS_M2IOSF_REG_STEP * unit), + std::make_shared(handle, BHS_M2IOSF_IIO_CTL0 + BHS_M2IOSF_REG_STEP * unit + 0), + std::make_shared(handle, BHS_M2IOSF_IIO_CTL0 + BHS_M2IOSF_REG_STEP * unit + 1), + std::make_shared(handle, BHS_M2IOSF_IIO_CTL0 + BHS_M2IOSF_REG_STEP * unit + 2), + std::make_shared(handle, BHS_M2IOSF_IIO_CTL0 + BHS_M2IOSF_REG_STEP * unit + 3), + std::make_shared(handle, BHS_M2IOSF_IIO_CTR0 + BHS_M2IOSF_REG_STEP * unit + 0), + std::make_shared(handle, BHS_M2IOSF_IIO_CTR0 + BHS_M2IOSF_REG_STEP * unit + 1), + std::make_shared(handle, BHS_M2IOSF_IIO_CTR0 + BHS_M2IOSF_REG_STEP * unit + 2), + std::make_shared(handle, BHS_M2IOSF_IIO_CTR0 + BHS_M2IOSF_REG_STEP * unit + 3) + ); + } + } + break; } - //init the IDX accelerator auto createIDXPMU = [](const size_t addr, const size_t mapSize, const size_t numaNode, const size_t socketId) -> IDX_PMU { @@ -2358,7 +2505,7 @@ void PCM::initUncorePMUsDirect() { static const uint32 IAA_DEV_IDS[] = { 0x0CFE }; static const uint32 DSA_DEV_IDS[] = { 0x0B25 }; - static const uint32 QAT_DEV_IDS[] = { 0x4940, 0x4942 }; + static const uint32 QAT_DEV_IDS[] = { 0x4940, 0x4942, 0x4944 }; std::vector > socket2IAAbus; std::vector > socket2DSAbus; std::vector > socket2QATbus; @@ -2467,6 +2614,12 @@ void PCM::initUncorePMUsDirect() IRP_CTR_REG_OFFSET = SPR_IRP_CTR_REG_OFFSET; IRP_UNIT_CTL = SPR_IRP_UNIT_CTL; break; + case SRF: + irpStacks = BHS_M2IOSF_NUM; + IRP_CTL_REG_OFFSET = BHS_IRP_CTL_REG_OFFSET; + IRP_CTR_REG_OFFSET = BHS_IRP_CTR_REG_OFFSET; + IRP_UNIT_CTL = BHS_IRP_UNIT_CTL; + break; } irpPMUs.resize(num_sockets); if (IRP_UNIT_CTL) @@ -2590,14 +2743,21 @@ void PCM::initUncorePMUsDirect() } return UncorePMU(std::make_shared(handle, unitControlAddr - unitControlAddrAligned), CounterControlRegs, CounterValueRegs); }; - if (getCPUModel() == PCM::SPR || getCPUModel() == PCM::EMR) + + switch (getCPUModel()) { - const auto n_units = (std::min)(uncorePMUDiscovery->getNumBoxes(SPR_CXLCM_BOX_TYPE, s), - uncorePMUDiscovery->getNumBoxes(SPR_CXLDP_BOX_TYPE, s)); - for (size_t pos = 0; pos < n_units; ++pos) + case PCM::SPR: + case PCM::EMR: + case PCM::SRF: { - cxlPMUs[s].push_back(std::make_pair(createCXLPMU(s, SPR_CXLCM_BOX_TYPE, pos), createCXLPMU(s, SPR_CXLDP_BOX_TYPE, pos))); + const auto n_units = (std::min)(uncorePMUDiscovery->getNumBoxes(SPR_CXLCM_BOX_TYPE, s), + uncorePMUDiscovery->getNumBoxes(SPR_CXLDP_BOX_TYPE, s)); + for (size_t pos = 0; pos < n_units; ++pos) + { + cxlPMUs[s].push_back(std::make_pair(createCXLPMU(s, SPR_CXLCM_BOX_TYPE, pos), createCXLPMU(s, SPR_CXLDP_BOX_TYPE, pos))); + } } + break; } } } @@ -3062,6 +3222,7 @@ bool PCM::isCPUModelSupported(const int model_) || model_ == ICX || model_ == SPR || model_ == EMR + || model_ == SRF ); } @@ -3349,6 +3510,21 @@ PCM::ErrorCode PCM::program(const PCM::ProgramMode mode_, const void * parameter L3CacheHitsAvailable = true; core_gen_counter_num_used = 4; break; + case SRF: + LLCArchEventInit(coreEventDesc); + coreEventDesc[2].event_number = CMT_MEM_LOAD_RETIRED_L2_MISS_EVTNR; + coreEventDesc[2].umask_value = CMT_MEM_LOAD_RETIRED_L2_MISS_UMASK; + coreEventDesc[3].event_number = CMT_MEM_LOAD_RETIRED_L2_HIT_EVTNR; + coreEventDesc[3].umask_value = CMT_MEM_LOAD_RETIRED_L2_HIT_UMASK; + L2CacheHitRatioAvailable = true; + L3CacheHitRatioAvailable = true; + L3CacheMissesAvailable = true; + L2CacheMissesAvailable = true; + L2CacheHitsAvailable = true; + L3CacheHitsSnoopAvailable = true; + L3CacheHitsAvailable = true; + core_gen_counter_num_used = 4; + break; PCM_SKL_PATH_CASES case SKX: case ICX: @@ -4650,6 +4826,8 @@ const char * PCM::getUArchCodename(const int32 cpu_model_param) const return "Sapphire Rapids-SP"; case EMR: return "Emerald Rapids-SP"; + case SRF: + return "Sierra Forest"; } return "unknown"; } @@ -5366,6 +5544,7 @@ PCM::ErrorCode PCM::programServerUncoreMemoryMetrics(const ServerUncoreMemoryMet serverUncorePMUs[i]->programServerUncoreMemoryMetrics(metrics, rankA, rankB); } programCXLCM(); + programCXLDP(); return PCM::Success; } @@ -5817,7 +5996,10 @@ PCM::ErrorCode PCM::program(const RawPMUConfigs& curPMUConfigs_, const bool sile programUncorePMUs(pmu_id, [&events64, &events, &pmu_id](UncorePMU& pmu) { uint64 * eventsIter = (uint64 *)events64; - pmu.initFreeze(UNC_PMON_UNIT_CTL_FRZ_EN); + if (pmu_id != PCIE_GEN5x16_PMU_ID && pmu_id != PCIE_GEN5x8_PMU_ID) + { + pmu.initFreeze(UNC_PMON_UNIT_CTL_FRZ_EN); + } PCM::program(pmu, eventsIter, eventsIter + (std::min)(events.programmable.size(), (size_t)ServerUncoreCounterState::maxCounters), UNC_PMON_UNIT_CTL_FRZ_EN); }); } @@ -6024,6 +6206,21 @@ void PCM::readAndAggregateUncoreMCCounters(const uint32 socket, CounterStateType result.UncClocks += getUncoreClocks(socket); } + if (socket < UFSStatus.size()) + { + result.UFSStatus.clear(); + for (size_t die = 0; die < UFSStatus[socket].size(); ++die) + { + auto & handle = UFSStatus[socket][die]; + if (handle.get() && die < handle->getNumEntries()) + { + const auto value = handle->read64(die); + // std::cerr << "DEBUG: " << std::hex << value << std::dec << " "; + result.UFSStatus.push_back(value); + } + } + } + const bool ReadMCStatsFromServerBW = (socket < serverBW.size()); if (ReadMCStatsFromServerBW) { @@ -6045,6 +6242,11 @@ void PCM::readAndAggregateUncoreMCCounters(const uint32 socket, CounterStateType { result.UncMCNormalReads += serverUncorePMUs[socket]->getImcReads(); result.UncMCFullWrites += serverUncorePMUs[socket]->getImcWrites(); + if(nearMemoryMetricsAvailable()){ + result.UncNMHit += serverUncorePMUs[socket]->getNMHits(); + result.UncNMMiss += serverUncorePMUs[socket]->getNMMisses(); + } + } if (localMemoryRequestRatioMetricAvailable()) { @@ -7018,11 +7220,11 @@ ServerUncorePMUs::ServerUncorePMUs(uint32 socket_, const PCM * pcm) : std::cerr << "Socket " << socket_ << ": " << getNumMC() << " memory controllers detected with total number of " << getNumMCChannels() << " channels. " << getNumQPIPorts() << " " << pcm->xPI() << " ports detected." << - " " << m2mPMUs.size() << " M2M (mesh to memory) blocks detected." + " " << m2mPMUs.size() << " M2M (mesh to memory)/B2CMI blocks detected." " " << hbm_m2mPMUs.size() << " HBM M2M blocks detected." " " << edcPMUs.size() << " EDC/HBM channels detected." " " << haPMUs.size() << " Home Agents detected." - " " << m3upiPMUs.size() << " M3UPI blocks detected." + " " << m3upiPMUs.size() << " M3UPI/B2UPI blocks detected." "\n"; } @@ -7204,6 +7406,39 @@ void ServerUncorePMUs::initRegisterLocations(const PCM * pcm) PCM_PCICFG_EDC_INIT(6, ECLK, KNL) PCM_PCICFG_EDC_INIT(7, ECLK, KNL) } + else if ( + cpu_model == PCM::SRF + ) + { + PCM_PCICFG_QPI_INIT(0, BHS); + PCM_PCICFG_QPI_INIT(1, BHS); + PCM_PCICFG_QPI_INIT(2, BHS); + PCM_PCICFG_QPI_INIT(3, BHS); + PCM_PCICFG_QPI_INIT(4, BHS); + PCM_PCICFG_QPI_INIT(5, BHS); + + // B2CMI (M2M) + PCM_PCICFG_M2M_INIT(0, BHS) + PCM_PCICFG_M2M_INIT(1, BHS) + PCM_PCICFG_M2M_INIT(2, BHS) + PCM_PCICFG_M2M_INIT(3, BHS) + PCM_PCICFG_M2M_INIT(4, BHS) + PCM_PCICFG_M2M_INIT(5, BHS) + PCM_PCICFG_M2M_INIT(6, BHS) + PCM_PCICFG_M2M_INIT(7, BHS) + PCM_PCICFG_M2M_INIT(8, BHS) + PCM_PCICFG_M2M_INIT(9, BHS) + PCM_PCICFG_M2M_INIT(10, BHS) + PCM_PCICFG_M2M_INIT(11, BHS) + + // B2UPI (M3UPI) + PCM_PCICFG_M3UPI_INIT(0, BHS); + PCM_PCICFG_M3UPI_INIT(1, BHS); + PCM_PCICFG_M3UPI_INIT(2, BHS); + PCM_PCICFG_M3UPI_INIT(3, BHS); + PCM_PCICFG_M3UPI_INIT(4, BHS); + PCM_PCICFG_M3UPI_INIT(5, BHS); + } else if (cpu_model == PCM::SNOWRIDGE) { PCM_PCICFG_M2M_INIT(0, SERVER) @@ -7394,8 +7629,13 @@ void ServerUncorePMUs::initDirect(uint32 socket_, const PCM * pcm) for (auto & handle : m2mHandles) { - if (cpu_model == PCM::ICX || cpu_model == PCM::SNOWRIDGE || cpu_model == PCM::SPR || cpu_model == PCM::EMR) + switch (cpu_model) { + case PCM::ICX: + case PCM::SNOWRIDGE: + case PCM::SPR: + case PCM::EMR: + case PCM::SRF: m2mPMUs.push_back( UncorePMU( std::make_shared(handle, SERVER_M2M_PCI_PMON_BOX_CTL_ADDR), @@ -7409,9 +7649,8 @@ void ServerUncorePMUs::initDirect(uint32 socket_, const PCM * pcm) std::make_shared(handle, SERVER_M2M_PCI_PMON_CTR3_ADDR) ) ); - } - else - { + break; + default: m2mPMUs.push_back( UncorePMU( std::make_shared(handle, SKX_M2M_PCI_PMON_BOX_CTL_ADDR), @@ -7564,6 +7803,27 @@ void ServerUncorePMUs::initDirect(uint32 socket_, const PCM * pcm) } } + auto initBHSiMCPMUs = [&](const size_t numChannelsParam) + { + numChannels = (std::min)(numChannelsParam, m2mPMUs.size()); + if (initAndCheckSocket2Ubox0Bus()) + { + auto memBar = getServerSCFBar(socket2UBOX0bus[socket_].first, socket2UBOX0bus[socket_].second); + for (int channel = 0; channel < numChannels; ++channel) + { + imcPMUs.push_back(createIMCPMU(memBar + BHS_MC_CH_PMON_BASE_ADDR + channel * SERVER_MC_CH_PMON_STEP, SERVER_MC_CH_PMON_SIZE)); + num_imc_channels.push_back(1); + } + } + }; + + switch (cpu_model) + { + case PCM::SRF: + initBHSiMCPMUs(12); + break; + } + if (imcPMUs.empty()) { std::cerr << "PCM error: no memory controllers found.\n"; @@ -7645,6 +7905,21 @@ void ServerUncorePMUs::initDirect(uint32 socket_, const PCM * pcm) ) ); break; + case PCM::SRF: + m3upiPMUs.push_back( + UncorePMU( + std::make_shared(handle, BHS_M3UPI_PCI_PMON_BOX_CTL_ADDR), + std::make_shared(handle, BHS_M3UPI_PCI_PMON_CTL0_ADDR), + std::make_shared(handle, BHS_M3UPI_PCI_PMON_CTL1_ADDR), + std::make_shared(handle, BHS_M3UPI_PCI_PMON_CTL2_ADDR), + std::make_shared(handle, BHS_M3UPI_PCI_PMON_CTL3_ADDR), + std::make_shared(handle, BHS_M3UPI_PCI_PMON_CTR0_ADDR), + std::make_shared(handle, BHS_M3UPI_PCI_PMON_CTR1_ADDR), + std::make_shared(handle, BHS_M3UPI_PCI_PMON_CTR2_ADDR), + std::make_shared(handle, BHS_M3UPI_PCI_PMON_CTR3_ADDR) + ) + ); + break; default: m3upiPMUs.push_back( @@ -7760,8 +8035,9 @@ void ServerUncorePMUs::initDirect(uint32 socket_, const PCM * pcm) if (xpiPMUs.empty()) for (auto & handle : qpiLLHandles) { - if (cpu_model == PCM::SKX) + switch (cpu_model) { + case PCM::SKX: xpiPMUs.push_back( UncorePMU( std::make_shared(handle, U_L_PCI_PMON_BOX_CTL_ADDR), @@ -7775,9 +8051,8 @@ void ServerUncorePMUs::initDirect(uint32 socket_, const PCM * pcm) std::make_shared(handle, U_L_PCI_PMON_CTR3_ADDR) ) ); - } - else if (cpu_model == PCM::ICX) - { + break; + case PCM::ICX: xpiPMUs.push_back( UncorePMU( std::make_shared(handle, ICX_UPI_PCI_PMON_BOX_CTL_ADDR), @@ -7791,9 +8066,10 @@ void ServerUncorePMUs::initDirect(uint32 socket_, const PCM * pcm) std::make_shared(handle, ICX_UPI_PCI_PMON_CTR3_ADDR) ) ); - } - else if (cpu_model == PCM::SPR || cpu_model == PCM::EMR) - { + break; + case PCM::SPR: + case PCM::EMR: + case PCM::SRF: xpiPMUs.push_back( UncorePMU( std::make_shared(handle, SPR_UPI_PCI_PMON_BOX_CTL_ADDR), @@ -7807,9 +8083,8 @@ void ServerUncorePMUs::initDirect(uint32 socket_, const PCM * pcm) std::make_shared(handle, SPR_UPI_PCI_PMON_CTR0_ADDR + 8*3) ) ); - } - else - { + break; + default: xpiPMUs.push_back( UncorePMU( std::make_shared(handle, Q_P_PCI_PMON_BOX_CTL_ADDR), @@ -8264,8 +8539,8 @@ void ServerUncorePMUs::programServerUncoreMemoryMetrics(const ServerUncoreMemory case PmemMemoryMode: case PmemMixedMode: if (noPmem()) return false; - MCCntConfig[EventPosition::PMM_MM_MISS_CLEAN] = MC_CH_PCI_PMON_CTL_EVENT(0xd3) + MC_CH_PCI_PMON_CTL_UMASK(2); // monitor TAGCHK.MISS_CLEAN on counter 2 - MCCntConfig[EventPosition::PMM_MM_MISS_DIRTY] = MC_CH_PCI_PMON_CTL_EVENT(0xd3) + MC_CH_PCI_PMON_CTL_UMASK(4); // monitor TAGCHK.MISS_DIRTY on counter 3 + MCCntConfig[EventPosition::MM_MISS_CLEAN] = MC_CH_PCI_PMON_CTL_EVENT(0xd3) + MC_CH_PCI_PMON_CTL_UMASK(2); // monitor TAGCHK.MISS_CLEAN on counter 2 + MCCntConfig[EventPosition::MM_MISS_DIRTY] = MC_CH_PCI_PMON_CTL_EVENT(0xd3) + MC_CH_PCI_PMON_CTL_UMASK(4); // monitor TAGCHK.MISS_DIRTY on counter 3 break; case Pmem: if (noPmem()) return false; @@ -8316,6 +8591,20 @@ void ServerUncorePMUs::programServerUncoreMemoryMetrics(const ServerUncoreMemory return; } break; + case PCM::SRF: + if (metrics == PmemMemoryMode) + { + std::cerr << "PCM Error: PMM/Pmem metrics are not available on your platform\n"; + return; + } + else + { + MCCntConfig[EventPosition::READ] = MC_CH_PCI_PMON_CTL_EVENT(0x05) + MC_CH_PCI_PMON_CTL_UMASK(0xcf); // monitor reads on counter 0: CAS_COUNT_SCH0.RD + MCCntConfig[EventPosition::WRITE] = MC_CH_PCI_PMON_CTL_EVENT(0x05) + MC_CH_PCI_PMON_CTL_UMASK(0xf0); // monitor writes on counter 1: CAS_COUNT_SCH0.WR + MCCntConfig[EventPosition::READ2] = MC_CH_PCI_PMON_CTL_EVENT(0x06) + MC_CH_PCI_PMON_CTL_UMASK(0xcf); // monitor reads on counter 2: CAS_COUNT_SCH1.RD + MCCntConfig[EventPosition::WRITE2] = MC_CH_PCI_PMON_CTL_EVENT(0x06) + MC_CH_PCI_PMON_CTL_UMASK(0xf0); // monitor writes on counter 3: CAS_COUNT_SCH1.WR + } + break; default: MCCntConfig[EventPosition::READ] = MC_CH_PCI_PMON_CTL_EVENT(0x04) + MC_CH_PCI_PMON_CTL_UMASK(3); // monitor reads on counter 0: CAS_COUNT.RD MCCntConfig[EventPosition::WRITE] = MC_CH_PCI_PMON_CTL_EVENT(0x04) + MC_CH_PCI_PMON_CTL_UMASK(12); // monitor writes on counter 1: CAS_COUNT.WR @@ -8385,6 +8674,12 @@ void ServerUncorePMUs::program() EDCCntConfig[EventPosition::READ] = MCCntConfig[EventPosition::READ] = MC_CH_PCI_PMON_CTL_EVENT(0x05) + MC_CH_PCI_PMON_CTL_UMASK(0xcf); // monitor reads on counter 0: CAS_COUNT.RD EDCCntConfig[EventPosition::WRITE] = MCCntConfig[EventPosition::WRITE] = MC_CH_PCI_PMON_CTL_EVENT(0x05) + MC_CH_PCI_PMON_CTL_UMASK(0xf0); // monitor writes on counter 1: CAS_COUNT.WR break; + case PCM::SRF: + MCCntConfig[EventPosition::READ] = MC_CH_PCI_PMON_CTL_EVENT(0x05) + MC_CH_PCI_PMON_CTL_UMASK(0xcf); // monitor reads on counter 0: CAS_COUNT_SCH0.RD + MCCntConfig[EventPosition::WRITE] = MC_CH_PCI_PMON_CTL_EVENT(0x05) + MC_CH_PCI_PMON_CTL_UMASK(0xf0); // monitor writes on counter 1: CAS_COUNT_SCH0.WR + MCCntConfig[EventPosition::READ2] = MC_CH_PCI_PMON_CTL_EVENT(0x06) + MC_CH_PCI_PMON_CTL_UMASK(0xcf); // monitor reads on counter 2: CAS_COUNT_SCH1.RD + MCCntConfig[EventPosition::WRITE2] = MC_CH_PCI_PMON_CTL_EVENT(0x06) + MC_CH_PCI_PMON_CTL_UMASK(0xf0); // monitor writes on counter 3: CAS_COUNT_SCH1.WR + break; default: MCCntConfig[EventPosition::READ] = MC_CH_PCI_PMON_CTL_EVENT(0x04) + MC_CH_PCI_PMON_CTL_UMASK(3); // monitor reads on counter 0: CAS_COUNT.RD MCCntConfig[EventPosition::WRITE] = MC_CH_PCI_PMON_CTL_EVENT(0x04) + MC_CH_PCI_PMON_CTL_UMASK(12); // monitor writes on counter 1: CAS_COUNT.WR @@ -8508,6 +8803,12 @@ uint64 ServerUncorePMUs::getImcReadsForChannels(uint32 beginChannel, uint32 endC for (uint32 i = beginChannel; i < endChannel && i < imcPMUs.size(); ++i) { result += getMCCounter(i, EventPosition::READ); + switch (cpu_model) + { + case PCM::SRF: + result += getMCCounter(i, EventPosition::READ2); + break; + } } return result; } @@ -8518,11 +8819,41 @@ uint64 ServerUncorePMUs::getImcWrites() for (uint32 i = 0; i < (uint32)imcPMUs.size(); ++i) { result += getMCCounter(i, EventPosition::WRITE); + switch (cpu_model) + { + case PCM::SRF: + result += getMCCounter(i, EventPosition::WRITE2); + break; + } + } + + return result; +} + + +uint64 ServerUncorePMUs::getNMHits() +{ + uint64 result = 0; + for (uint32 i = 0; i < (uint32)m2mPMUs.size(); ++i) + { + result += getM2MCounter(i, EventPosition::NM_HIT); } return result; } +uint64 ServerUncorePMUs::getNMMisses() +{ + uint64 result = 0; + for (uint32 i = 0; i < (uint32)m2mPMUs.size(); ++i) + { + result += getM2MCounter(i, EventPosition::MM_MISS_CLEAN) + getM2MCounter(i, EventPosition::MM_MISS_DIRTY); + } + + return result; +} + + uint64 ServerUncorePMUs::getPMMReads() { uint64 result = 0; @@ -8712,6 +9043,12 @@ void ServerUncorePMUs::programM2M() cfg[EventPosition::PMM_READ] = M2M_PCI_PMON_CTL_EVENT(0x37) + M2M_PCI_PMON_CTL_UMASK(0x20) + UNC_PMON_CTL_UMASK_EXT(0x07); // UNC_M2M_IMC_READS.TO_PMM cfg[EventPosition::PMM_WRITE] = M2M_PCI_PMON_CTL_EVENT(0x38) + M2M_PCI_PMON_CTL_UMASK(0x80) + UNC_PMON_CTL_UMASK_EXT(0x1C); // UNC_M2M_IMC_WRITES.TO_PMM break; + case PCM::SRF: + cfg[EventPosition::NM_HIT] = M2M_PCI_PMON_CTL_EVENT(0x1F) + M2M_PCI_PMON_CTL_UMASK(0x0F); // UNC_B2CMI_TAG_HIT.ALL + cfg[EventPosition::M2M_CLOCKTICKS] = 0; // CLOCKTICKS + cfg[EventPosition::MM_MISS_CLEAN] = M2M_PCI_PMON_CTL_EVENT(0x4B) + M2M_PCI_PMON_CTL_UMASK(0x05); // UNC_B2CMI_TAG_MISS.CLEAN + cfg[EventPosition::MM_MISS_DIRTY] = M2M_PCI_PMON_CTL_EVENT(0x4B) + M2M_PCI_PMON_CTL_UMASK(0x0A); // UNC_B2CMI_TAG_MISS.DIRTY + break; default: cfg[EventPosition::NM_HIT] = M2M_PCI_PMON_CTL_EVENT(0x2c) + M2M_PCI_PMON_CTL_UMASK(3); // UNC_M2M_TAG_HIT.NM_DRD_HIT_* events (CLEAN | DIRTY) cfg[EventPosition::M2M_CLOCKTICKS] = 0; // CLOCKTICKS @@ -9057,6 +9394,17 @@ uint64 ServerUncorePMUs::computeQPISpeed(const uint32 core_nr, const int cpumode std::pair regBits{}; switch (cpumodel) { + case PCM::SRF: + UPISpeedMap = { + { 0, 2500}, + { 1, 12800}, + { 2, 14400}, + { 3, 16000}, + { 8, 20000}, + { 9, 24000} + }; + regBits = std::make_pair(5, 8); + break; case PCM::SPR: UPISpeedMap = { {0, 2500}, @@ -9152,111 +9500,136 @@ void ServerUncorePMUs::reportQPISpeed() const uint64 PCM::CX_MSR_PMON_CTRY(uint32 Cbo, uint32 Ctr) const { - if(JAKETOWN == cpu_model || IVYTOWN == cpu_model) - { - return JKT_C0_MSR_PMON_CTR0 + ((JKTIVT_CBO_MSR_STEP)*Cbo) + Ctr; + switch (cpu_model) { + case JAKETOWN: + case IVYTOWN: + return JKT_C0_MSR_PMON_CTR0 + (JKTIVT_CBO_MSR_STEP * Cbo) + Ctr; - } else if(HASWELLX == cpu_model || BDX_DE == cpu_model || BDX == cpu_model || SKX == cpu_model) - { - return HSX_C0_MSR_PMON_CTR0 + ((HSX_CBO_MSR_STEP)*Cbo) + Ctr; - } - else if (ICX == cpu_model || SNOWRIDGE == cpu_model) - { + case HASWELLX: + case BDX_DE: + case BDX: + case SKX: + return HSX_C0_MSR_PMON_CTR0 + (HSX_CBO_MSR_STEP * Cbo) + Ctr; + + case ICX: + case SNOWRIDGE: return CX_MSR_PMON_BOX_CTL(Cbo) + SERVER_CHA_MSR_PMON_CTR0_OFFSET + Ctr; - } - else if (SPR == cpu_model || EMR == cpu_model) - { + + case SPR: + case EMR: + case SRF: return SPR_CHA0_MSR_PMON_CTR0 + SPR_CHA_MSR_STEP * Cbo + Ctr; + + default: + return 0; } - return 0; } uint64 PCM::CX_MSR_PMON_BOX_FILTER(uint32 Cbo) const { - if(JAKETOWN == cpu_model || IVYTOWN == cpu_model) - { - return JKT_C0_MSR_PMON_BOX_FILTER + ((JKTIVT_CBO_MSR_STEP)*Cbo); + switch (cpu_model) { + case JAKETOWN: + case IVYTOWN: + return JKT_C0_MSR_PMON_BOX_FILTER + (JKTIVT_CBO_MSR_STEP * Cbo); - } else if (HASWELLX == cpu_model || BDX_DE == cpu_model || BDX == cpu_model || SKX == cpu_model) - { - return HSX_C0_MSR_PMON_BOX_FILTER + ((HSX_CBO_MSR_STEP)*Cbo); - } else if (KNL == cpu_model) - { - return KNL_CHA0_MSR_PMON_BOX_CTL + ((KNL_CHA_MSR_STEP)*Cbo); - } - else if (ICX == cpu_model) - { + case HASWELLX: + case BDX_DE: + case BDX: + case SKX: + return HSX_C0_MSR_PMON_BOX_FILTER + (HSX_CBO_MSR_STEP * Cbo); + + case KNL: + return KNL_CHA0_MSR_PMON_BOX_CTL + (KNL_CHA_MSR_STEP * Cbo); + + case ICX: return CX_MSR_PMON_BOX_CTL(Cbo) + SERVER_CHA_MSR_PMON_BOX_FILTER_OFFSET; - } - else if (SPR == cpu_model || EMR == cpu_model) - { + + case SPR: + case EMR: + case SRF: return SPR_CHA0_MSR_PMON_BOX_FILTER + SPR_CHA_MSR_STEP * Cbo; + + default: + return 0; } - return 0; } uint64 PCM::CX_MSR_PMON_BOX_FILTER1(uint32 Cbo) const { - if(IVYTOWN == cpu_model) - { - return IVT_C0_MSR_PMON_BOX_FILTER1 + ((JKTIVT_CBO_MSR_STEP)*Cbo); + switch (cpu_model) { + case IVYTOWN: + return IVT_C0_MSR_PMON_BOX_FILTER1 + (JKTIVT_CBO_MSR_STEP * Cbo); - } else if (HASWELLX == cpu_model || BDX_DE == cpu_model || BDX == cpu_model || SKX == cpu_model) - { - return HSX_C0_MSR_PMON_BOX_FILTER1 + ((HSX_CBO_MSR_STEP)*Cbo); + case HASWELLX: + case BDX_DE: + case BDX: + case SKX: + return HSX_C0_MSR_PMON_BOX_FILTER1 + (HSX_CBO_MSR_STEP * Cbo); + + default: + return 0; } - return 0; } - uint64 PCM::CX_MSR_PMON_CTLY(uint32 Cbo, uint32 Ctl) const { - if(JAKETOWN == cpu_model || IVYTOWN == cpu_model) - { - return JKT_C0_MSR_PMON_CTL0 + ((JKTIVT_CBO_MSR_STEP)*Cbo) + Ctl; + switch (cpu_model) { + case JAKETOWN: + case IVYTOWN: + return JKT_C0_MSR_PMON_CTL0 + (JKTIVT_CBO_MSR_STEP * Cbo) + Ctl; - } else if (HASWELLX == cpu_model || BDX_DE == cpu_model || BDX == cpu_model || SKX == cpu_model) - { - return HSX_C0_MSR_PMON_CTL0 + ((HSX_CBO_MSR_STEP)*Cbo) + Ctl; - } - else if (ICX == cpu_model || SNOWRIDGE == cpu_model) - { + case HASWELLX: + case BDX_DE: + case BDX: + case SKX: + return HSX_C0_MSR_PMON_CTL0 + (HSX_CBO_MSR_STEP * Cbo) + Ctl; + + case ICX: + case SNOWRIDGE: return CX_MSR_PMON_BOX_CTL(Cbo) + SERVER_CHA_MSR_PMON_CTL0_OFFSET + Ctl; - } - else if (SPR == cpu_model || EMR == cpu_model) - { + + case SPR: + case EMR: + case SRF: return SPR_CHA0_MSR_PMON_CTL0 + SPR_CHA_MSR_STEP * Cbo + Ctl; + + default: + return 0; } - return 0; } uint64 PCM::CX_MSR_PMON_BOX_CTL(uint32 Cbo) const { - if(JAKETOWN == cpu_model || IVYTOWN == cpu_model) - { - return JKT_C0_MSR_PMON_BOX_CTL + ((JKTIVT_CBO_MSR_STEP)*Cbo); + switch (cpu_model) { + case JAKETOWN: + case IVYTOWN: + return JKT_C0_MSR_PMON_BOX_CTL + (JKTIVT_CBO_MSR_STEP * Cbo); - } else if (HASWELLX == cpu_model || BDX_DE == cpu_model || BDX == cpu_model || SKX == cpu_model) - { - return HSX_C0_MSR_PMON_BOX_CTL + ((HSX_CBO_MSR_STEP)*Cbo); - } else if (KNL == cpu_model) - { - return KNL_CHA0_MSR_PMON_BOX_CTRL + ((KNL_CHA_MSR_STEP)*Cbo); - } - else if (ICX == cpu_model) - { + case HASWELLX: + case BDX_DE: + case BDX: + case SKX: + return HSX_C0_MSR_PMON_BOX_CTL + (HSX_CBO_MSR_STEP * Cbo); + + case KNL: + return KNL_CHA0_MSR_PMON_BOX_CTRL + (KNL_CHA_MSR_STEP * Cbo); + + case ICX: return ICX_CHA_MSR_PMON_BOX_CTL[Cbo]; - } - else if (SPR == cpu_model || EMR == cpu_model) - { + + case SPR: + case EMR: + case SRF: return SPR_CHA0_MSR_PMON_BOX_CTRL + SPR_CHA_MSR_STEP * Cbo; - } - else if (SNOWRIDGE == cpu_model) - { + + case SNOWRIDGE: return SNR_CHA_MSR_PMON_BOX_CTL[Cbo]; + + default: + return 0; } - return 0; } + // Return the first device found with specific vendor/device IDs PciHandleType * getDeviceHandle(uint32 vendorId, uint32 deviceId) { @@ -9320,6 +9693,13 @@ uint32 PCM::getMaxNumOfCBoxesInternal() const uint64 val = 0; switch (cpu_model) { + case SRF: + { + const auto MSR_PMON_NUMBER_CBOS = 0x3fed; + MSR[refCore]->read(MSR_PMON_NUMBER_CBOS, &val); + num = (uint32)(val & 511); + } + break; case SPR: case EMR: try { @@ -9427,6 +9807,9 @@ void PCM::programIIOCounters(uint64 rawEvents[4], int IIOStack) int stacks_count; switch (getCPUModel()) { + case PCM::SRF: + stacks_count = BHS_M2IOSF_NUM; + break; case PCM::SPR: case PCM::EMR: stacks_count = SPR_M2IOSF_NUM; @@ -9518,6 +9901,7 @@ void PCM::programPCIeEventGroup(eventGroup_t &eventGroup) switch (cpu_model) { + case PCM::SRF: case PCM::SPR: case PCM::EMR: case PCM::ICX: @@ -9564,8 +9948,15 @@ void PCM::programCbo(const uint64 * events, const uint32 opCode, const uint32 nc { pmu.initFreeze(UNC_PMON_UNIT_CTL_FRZ_EN); - if (ICX != cpu_model && SNOWRIDGE != cpu_model && SPR != cpu_model && EMR != cpu_model) + if ( ICX != cpu_model + && SNOWRIDGE != cpu_model + && SPR != cpu_model + && EMR != cpu_model + && SRF != cpu_model + ) + { programCboOpcodeFilter(opCode, pmu, nc_, 0, loc, rem); + } if ((HASWELLX == cpu_model || BDX_DE == cpu_model || BDX == cpu_model || SKX == cpu_model) && llc_lookup_tid_filter != 0) *pmu.filter[0] = llc_lookup_tid_filter; @@ -9678,11 +10069,24 @@ void PCM::programCXLCM() { uint64 CXLCMevents[8] = { 0,0,0,0,0,0,0,0 }; + CXLCMevents[EventPosition::CXL_RxC_MEM] = UNC_PMON_CTL_EVENT(0x41) + UNC_PMON_CTL_UMASK(0x10); // CXLCM_RxC_PACK_BUF_INSERTS.MEM_DATA CXLCMevents[EventPosition::CXL_TxC_MEM] = UNC_PMON_CTL_EVENT(0x02) + UNC_PMON_CTL_UMASK(0x10); // CXLCM_TxC_PACK_BUF_INSERTS.MEM_DATA + CXLCMevents[EventPosition::CXL_RxC_CACHE] = UNC_PMON_CTL_EVENT(0x41) + UNC_PMON_CTL_UMASK(0x04);// CXLCM_RxC_PACK_BUF_INSERTS.CACHE_DATA CXLCMevents[EventPosition::CXL_TxC_CACHE] = UNC_PMON_CTL_EVENT(0x02) + UNC_PMON_CTL_UMASK(0x04);// CXLCM_TxC_PACK_BUF_INSERTS.CACHE_DATA programCXLCM(CXLCMevents); } + + +void PCM::programCXLDP() +{ + uint64 events[4] = { 0,0,0,0 }; + + events[EventPosition::CXL_TxC_MEM] = UNC_PMON_CTL_EVENT(0x02) + UNC_PMON_CTL_UMASK(0x20); // UNC_CXLDP_TxC_AGF_INSERTS.M2S_DATA + + programCXLDP(events); +} + void PCM::programIDXAccelCounters(uint32 accel, std::vector &events, std::vector &filters_wq, std::vector &filters_eng, std::vector &filters_tc, std::vector &filters_pgsz, std::vector &filters_xfersz) { uint32 maxCTR = getMaxNumOfIDXAccelCtrs(accel); //limit the number of physical counter to use @@ -9811,6 +10215,7 @@ bool PCM::supportIDXAccelDev() const { case PCM::SPR: case PCM::EMR: + case PCM::SRF: retval = true; break; @@ -10041,12 +10446,30 @@ void UncorePMU::cleanup() void UncorePMU::freeze(const uint32 extra) { - *unitControl = (getCPUModel() == PCM::SPR || getCPUModel() == PCM::EMR) ? SPR_UNC_PMON_UNIT_CTL_FRZ : (extra + UNC_PMON_UNIT_CTL_FRZ); + switch (getCPUModel()) + { + case PCM::SPR: + case PCM::EMR: + case PCM::SRF: + *unitControl = SPR_UNC_PMON_UNIT_CTL_FRZ; + break; + default: + *unitControl = extra + UNC_PMON_UNIT_CTL_FRZ; + } } void UncorePMU::unfreeze(const uint32 extra) { - *unitControl = (getCPUModel() == PCM::SPR || getCPUModel() == PCM::EMR) ? 0 : extra; + switch (getCPUModel()) + { + case PCM::SPR: + case PCM::EMR: + case PCM::SRF: + *unitControl = 0; + break; + default: + *unitControl = extra; + } } bool UncorePMU::initFreeze(const uint32 extra, const char* xPICheckMsg) @@ -10055,11 +10478,15 @@ bool UncorePMU::initFreeze(const uint32 extra, const char* xPICheckMsg) { return true; // this PMU does not have unit control register => no op } - if (getCPUModel() == PCM::SPR || getCPUModel() == PCM::EMR) + + switch (getCPUModel()) { - *unitControl = SPR_UNC_PMON_UNIT_CTL_FRZ; // freeze - *unitControl = SPR_UNC_PMON_UNIT_CTL_FRZ + SPR_UNC_PMON_UNIT_CTL_RST_CONTROL; // freeze and reset control registers - return true; + case PCM::SPR: + case PCM::EMR: + case PCM::SRF: + *unitControl = SPR_UNC_PMON_UNIT_CTL_FRZ; // freeze + *unitControl = SPR_UNC_PMON_UNIT_CTL_FRZ + SPR_UNC_PMON_UNIT_CTL_RST_CONTROL; // freeze and reset control registers + return true; } // freeze enable *unitControl = extra; @@ -10090,8 +10517,11 @@ bool UncorePMU::initFreeze(const uint32 extra, const char* xPICheckMsg) void UncorePMU::resetUnfreeze(const uint32 extra) { - if (getCPUModel() == PCM::SPR || getCPUModel() == PCM::EMR) + switch (getCPUModel()) { + case PCM::SPR: + case PCM::EMR: + case PCM::SRF: *unitControl = SPR_UNC_PMON_UNIT_CTL_FRZ + SPR_UNC_PMON_UNIT_CTL_RST_COUNTERS; // freeze and reset counter registers *unitControl = 0; // unfreeze return; diff --git a/src/cpucounters.h b/src/cpucounters.h index 599965b2..47bef3e4 100644 --- a/src/cpucounters.h +++ b/src/cpucounters.h @@ -27,6 +27,7 @@ #include "topologyentry.h" #include "msr.h" #include "pci.h" +#include "tpmi.h" #include "bw.h" #include "width_extender.h" #include "exceptions/unsupported_processor_exception.hpp" @@ -412,6 +413,8 @@ class ServerUncorePMUs enum EventPosition { READ=0, WRITE=1, + READ2=2, + WRITE2=3, READ_RANK_A=0, WRITE_RANK_A=1, READ_RANK_B=2, @@ -419,8 +422,8 @@ class ServerUncorePMUs PARTIAL=2, PMM_READ=2, PMM_WRITE=3, - PMM_MM_MISS_CLEAN=2, - PMM_MM_MISS_DIRTY=3, + MM_MISS_CLEAN=2, + MM_MISS_DIRTY=3, NM_HIT=0, // NM : Near Memory (DRAM cache) in Memory Mode M2M_CLOCKTICKS=1 }; @@ -445,7 +448,10 @@ class ServerUncorePMUs uint64 getHALocalRequests(); //! \brief Get the number of local requests to home agent (BDX/HSX only) uint64 getHARequests(); - + //! \brief Get the number of Near Memory Hits + uint64 getNMHits(); + //! \brief Get the number of Near Memory Misses + uint64 getNMMisses(); //! \brief Get the number of PMM memory reads (in cache lines) uint64 getPMMReads(); //! \brief Get the number of PMM memory writes (in cache lines) @@ -632,6 +638,13 @@ class PCM_API PCM uint64 max_qpi_speed; // in GBytes/second uint32 L3ScalingFactor; int32 pkgThermalSpecPower, pkgMinimumPower, pkgMaximumPower; + enum UFS_TPMI + { + UFS_ID = 2, + UFS_FABRIC_CLUSTER_OFFSET = 1, + UFS_STATUS = 0 + }; + std::vector > > UFSStatus; std::vector topology; SystemRoot* systemTopology; @@ -650,10 +663,14 @@ class PCM_API PCM MDF_PMU_ID, PCU_PMU_ID, UBOX_PMU_ID, + PCIE_GEN5x16_PMU_ID, + PCIE_GEN5x8_PMU_ID, INVALID_PMU_ID }; private: std::unordered_map strToUncorePMUID_ { + {"pciex8", PCIE_GEN5x8_PMU_ID}, + {"pciex16", PCIE_GEN5x16_PMU_ID} }; public: UncorePMUIDs strToUncorePMUID(const std::string & type) const @@ -661,6 +678,12 @@ class PCM_API PCM const auto iter = strToUncorePMUID_.find(type); return (iter == strToUncorePMUID_.end()) ? INVALID_PMU_ID : (UncorePMUIDs)iter->second; } + size_t getNumUFSDies() const + { + if (UFSStatus.empty()) return 0; + + return UFSStatus[0].size(); + } private: typedef std::unordered_map UncorePMUMapType; // socket -> die -> pmu map -> pmu ref array @@ -1183,6 +1206,7 @@ class PCM_API PCM public: struct RawPMUConfig; void programCXLCM(); + void programCXLDP(); template void readAndAggregateCXLCMCounters(CounterStateType & counterState); @@ -1218,12 +1242,14 @@ class PCM_API PCM auto ctrl = pmu.counterControl[c]; if (ctrl.get() != nullptr) { - if (PCM::SPR == cpu_model || PCM::EMR == cpu_model) + switch (cpu_model) { + case SPR: + case EMR: + case SRF: *ctrl = *curEvent; - } - else - { + break; + default: *ctrl = MC_CH_PCI_PMON_CTL_EN; *ctrl = MC_CH_PCI_PMON_CTL_EN | *curEvent; } @@ -1286,7 +1312,9 @@ class PCM_API PCM REQUESTS_ALL = 2, REQUESTS_LOCAL = 3, CXL_TxC_MEM = 0, // works only on counters 0-3 - CXL_TxC_CACHE = 1 // works only on counters 0-3 + CXL_TxC_CACHE = 1, // works only on counters 0-3 + CXL_RxC_MEM = 4, // works only on counters 4-7 + CXL_RxC_CACHE = 5 // works only on counters 4-7 }; //! check if in secure boot mode bool isSecureBoot() const; @@ -1808,6 +1836,7 @@ class PCM_API PCM ICX = 106, SPR = 143, EMR = 207, + SRF = 175, END_OF_MODEL_LIST = 0x0ffff }; @@ -1901,6 +1930,7 @@ class PCM_API PCM case ICX: case SPR: case EMR: + case SRF: return (serverUncorePMUs.size() && serverUncorePMUs[0].get()) ? (serverUncorePMUs[0]->getNumQPIPorts()) : 0; } return 0; @@ -1926,6 +1956,7 @@ class PCM_API PCM case ICX: case SPR: case EMR: + case SRF: case BDX: case KNL: return (serverUncorePMUs.size() && serverUncorePMUs[0].get()) ? (serverUncorePMUs[0]->getNumMC()) : 0; @@ -1953,6 +1984,7 @@ class PCM_API PCM case ICX: case SPR: case EMR: + case SRF: case BDX: case KNL: case SNOWRIDGE: @@ -1983,6 +2015,7 @@ class PCM_API PCM case ICX: case SPR: case EMR: + case SRF: case BDX: case KNL: case SNOWRIDGE: @@ -2042,6 +2075,7 @@ class PCM_API PCM return 5; case SPR: case EMR: + case SRF: return 6; } if (isAtom()) @@ -2093,6 +2127,7 @@ class PCM_API PCM case SNOWRIDGE: case SPR: case EMR: + case SRF: case KNL: return true; default: @@ -2354,6 +2389,7 @@ class PCM_API PCM || cpu_model == PCM::MTL || cpu_model == PCM::SPR || cpu_model == PCM::EMR + || cpu_model == PCM::SRF ); } @@ -2370,6 +2406,7 @@ class PCM_API PCM || cpu_model == PCM::ICX || cpu_model == PCM::SPR || cpu_model == PCM::EMR + || cpu_model == PCM::SRF ); } @@ -2392,6 +2429,7 @@ class PCM_API PCM || cpu_model == PCM::ICX || cpu_model == PCM::SPR || cpu_model == PCM::EMR + || cpu_model == PCM::SRF ); } @@ -2407,6 +2445,7 @@ class PCM_API PCM || cpu_model == PCM::ICX || cpu_model == PCM::SPR || cpu_model == PCM::EMR + || cpu_model == PCM::SRF ); } @@ -2424,6 +2463,13 @@ class PCM_API PCM return outgoingQPITrafficMetricsAvailable(); } + bool nearMemoryMetricsAvailable() const + { + return ( + cpu_model == PCM::SRF + ); + } + bool memoryTrafficMetricsAvailable() const { return (!(isAtom() || cpu_model == PCM::CLARKDALE)) @@ -2465,7 +2511,11 @@ class PCM_API PCM bool uncoreFrequencyMetricAvailable() const { - return MSR.empty() == false && getMaxNumOfUncorePMUs(UBOX_PMU_ID) > 0ULL && getNumCores() == getNumOnlineCores(); + return MSR.empty() == false + && getMaxNumOfUncorePMUs(UBOX_PMU_ID) > 0ULL + && getNumCores() == getNumOnlineCores() + && PCM::SRF != cpu_model + ; } bool LatencyMetricsAvailable() const @@ -2556,6 +2606,7 @@ class PCM_API PCM || cpu_model == PCM::ICX || cpu_model == PCM::SPR || cpu_model == PCM::EMR + || cpu_model == PCM::SRF || cpu_model == PCM::BDX || cpu_model == PCM::KNL ); @@ -2575,6 +2626,7 @@ class PCM_API PCM || cpu_model_ == PCM::ICX || cpu_model_ == PCM::SPR || cpu_model_ == PCM::EMR + || cpu_model_ == PCM::SRF ); } @@ -2598,6 +2650,7 @@ class PCM_API PCM || cpu_model == PCM::ICX || cpu_model == PCM::SPR || cpu_model == PCM::EMR + || cpu_model == PCM::SRF ); } @@ -2685,7 +2738,7 @@ class PCM_API PCM return getBytesPerLinkCycle(cpu_model); } - static double getLinkTransfersPerLinkCycle() + double getLinkTransfersPerLinkCycle() const { return 8.; } @@ -3280,6 +3333,7 @@ double getDRAMConsumedJoules(const CounterStateType & before, const CounterState || PCM::BDX == cpu_model || PCM::SKX == cpu_model || PCM::ICX == cpu_model + || PCM::SRF == cpu_model || PCM::KNL == cpu_model ) { /* as described in sections 5.3.2 (DRAM_POWER_INFO) and 5.3.3 (DRAM_ENERGY_STATUS) of @@ -3308,6 +3362,16 @@ class UncoreCounterState friend uint64 getBytesReadFromMC(const CounterStateType & before, const CounterStateType & after); template friend uint64 getBytesWrittenToMC(const CounterStateType & before, const CounterStateType & after); + template + + friend uint64 getNMHits(const CounterStateType & before, const CounterStateType & after); + template + friend uint64 getNMMisses(const CounterStateType & before, const CounterStateType & after); + template + friend double getNMHitRate(const CounterStateType & before, const CounterStateType & after); + template + friend uint64 getNMMissBW(const CounterStateType & before, const CounterStateType & after); + template friend uint64 getBytesReadFromPMM(const CounterStateType & before, const CounterStateType & after); template @@ -3341,13 +3405,49 @@ class UncoreCounterState template friend double getAverageUncoreFrequency(const CounterStateType& before, const CounterStateType& after); template + friend std::vector getUncoreFrequency(const CounterStateType& state); + template + friend std::vector getUncoreDieTypes(const CounterStateType& state); + template friend double getAverageFrequencyFromClocks(const int64 clocks, const CounterStateType& before, const CounterStateType& after); +public: + enum DieTypeBits + { + Compute = 1<<23, + LLC = 1<<24, + Memory = 1<<25, + IO = 1<<26 + }; + static std::string getDieTypeStr(const uint64 d) + { + std::string type{}; + if (d & UncoreCounterState::Compute) + { + type += "COR"; + } + if (d & UncoreCounterState::IO) + { + type += "IO"; + } + if (d & UncoreCounterState::LLC) + { + type += "LLC"; + } + if (d & UncoreCounterState::Memory) + { + type += "M"; + } + return type; + } protected: + std::vector UFSStatus; uint64 UncMCFullWrites; uint64 UncMCNormalReads; uint64 UncHARequests; uint64 UncHALocalRequests; + uint64 UncNMMiss; + uint64 UncNMHit; uint64 UncPMMWrites; uint64 UncPMMReads; uint64 UncEDCFullWrites; @@ -3366,10 +3466,13 @@ class UncoreCounterState public: UncoreCounterState() : + UFSStatus{{}}, UncMCFullWrites(0), UncMCNormalReads(0), UncHARequests(0), UncHALocalRequests(0), + UncNMMiss(0), + UncNMHit(0), UncPMMWrites(0), UncPMMReads(0), UncEDCFullWrites(0), @@ -3383,6 +3486,7 @@ class UncoreCounterState TORInsertsIAMiss(0), UncClocks(0) { + UFSStatus.clear(); std::fill(CStateResidency, CStateResidency + PCM::MAX_C_STATE + 1, 0); std::fill(PPEnergyStatus, PPEnergyStatus + PCM::MAX_PP + 1, 0); } @@ -3423,7 +3527,7 @@ class ServerUncoreCounterState : public UncoreCounterState { public: enum { - maxControllers = 4, + maxControllers = 32, maxChannels = 32, maxXPILinks = 6, maxIIOStacks = 16, @@ -3743,6 +3847,29 @@ double getIPC(const CounterStateType & before, const CounterStateType & after) / return -1; } +// \brief Returns current uncore frequency vector +template +std::vector getUncoreFrequency(const CounterStateType& state) +{ + std::vector result; + for (auto & e : state.UFSStatus) + { + result.push_back(extract_bits(e, 0, 6) * 100000000.); + } + return result; +} + +// \brief Returns uncore die type vector +template +std::vector getUncoreDieTypes(const CounterStateType& state) +{ + std::vector result; + for (auto & e : state.UFSStatus) + { + result.push_back(extract_bits(e, 23, 26) << 23); + } + return result; +} /*! \brief Computes the number of retired instructions @@ -4028,6 +4155,7 @@ uint64 getL2CacheMisses(const CounterStateType & before, const CounterStateType const auto cpu_model = pcm->getCPUModel(); if (pcm->useSkylakeEvents() || cpu_model == PCM::SNOWRIDGE + || cpu_model == PCM::SRF || cpu_model == PCM::ADL || cpu_model == PCM::RPL || cpu_model == PCM::MTL @@ -4138,6 +4266,7 @@ uint64 getL3CacheHitsSnoop(const CounterStateType & before, const CounterStateTy if (!pcm->isL3CacheHitsSnoopAvailable()) return 0; const auto cpu_model = pcm->getCPUModel(); if (cpu_model == PCM::SNOWRIDGE + || cpu_model == PCM::SRF || cpu_model == PCM::ADL || cpu_model == PCM::RPL || cpu_model == PCM::MTL @@ -4302,12 +4431,75 @@ uint64 getBytesWrittenToMC(const CounterStateType & before, const CounterStateTy return 0ULL; } +/*! \brief Computes number of Near Memory Hits + + \param before CPU counter state before the experiment + \param after CPU counter state after the experiment + \return Number of bytes +*/ +template +uint64 getNMHits(const CounterStateType & before, const CounterStateType & after) +{ + if (PCM::getInstance()->nearMemoryMetricsAvailable()) + return (after.UncNMHit - before.UncNMHit); + return 0ULL; +} + +/*! \brief Computes number of Near Memory Misses + + \param before CPU counter state before the experiment + \param after CPU counter state after the experiment + \return Number of NMMisses +*/ +template +uint64 getNMMisses(const CounterStateType & before, const CounterStateType & after) +{ + if (PCM::getInstance()->nearMemoryMetricsAvailable()) + return (after.UncNMMiss - before.UncNMMiss); + return 0ULL; +} + +/*! \brief Computes Near Memory Misses Bandwidth + + \param before CPU counter state before the experiment + \param after CPU counter state after the experiment + \return Number of bytes +*/ +template +uint64 getNMMissBW(const CounterStateType & before, const CounterStateType & after) +{ + if (PCM::getInstance()->nearMemoryMetricsAvailable()) + return (after.UncNMMiss - before.UncNMMiss)*64*2; + return 0ULL; +} + +/*! \brief Computes Near Memory Hit/Miss rate as a percentage + + \param before CPU counter state before the experiment + \param after CPU counter state after the experiment + \return Number of bytes +*/ +template +double getNMHitRate(const CounterStateType & before, const CounterStateType & after) +{ + + if (PCM::getInstance()->nearMemoryMetricsAvailable()) + { + auto hit = (after.UncNMHit - before.UncNMHit); + auto miss = (after.UncNMMiss - before.UncNMMiss); + if((hit+miss) != 0 ) + return (hit*100.0/(hit+miss));} + + return 0ULL; +} + /*! \brief Computes number of bytes read from PMM memory \param before CPU counter state before the experiment \param after CPU counter state after the experiment \return Number of bytes */ + template uint64 getBytesReadFromPMM(const CounterStateType & before, const CounterStateType & after) { diff --git a/src/daemon/common.h b/src/daemon/common.h index 1f9b0102..f638473c 100644 --- a/src/daemon/common.h +++ b/src/daemon/common.h @@ -124,7 +124,7 @@ namespace PCMDaemon { float pmmRead; // PMM read traffic in MBytes/sec float pmmWrite; // PMM write traffic in MBytes/sec float total; // total traffic in MBytes/sec - float pmmMemoryModeHitRate; // PMM memory mode hit rate estimation. Metric value range is [0..1] + float memoryModeHitRate; // PMM memory mode hit rate estimation. Metric value range is [0..1] double dramEnergy; // energy consumed/used by DRAM memory in Joules public: @@ -135,7 +135,7 @@ namespace PCMDaemon { pmmRead(-1.0), pmmWrite(-1.0), total(-1.0), - pmmMemoryModeHitRate(-1.0), + memoryModeHitRate(-1.0), dramEnergy(0.0) {} } ALIGN(ALIGNMENT); diff --git a/src/daemon/daemon.cpp b/src/daemon/daemon.cpp index a95266f2..ffa271f8 100644 --- a/src/daemon/daemon.cpp +++ b/src/daemon/daemon.cpp @@ -641,7 +641,7 @@ namespace PCMDaemon { memory.sockets[onlineSocketsI].pmmWrite = iMC_PMM_Wr_socket[skt]; memory.sockets[onlineSocketsI].total = iMC_Rd_socket[skt] + iMC_Wr_socket[skt] + iMC_PMM_Rd_socket[skt] + iMC_PMM_Wr_socket[skt]; const auto all = memory.sockets[onlineSocketsI].total; - memory.sockets[onlineSocketsI].pmmMemoryModeHitRate = (all == 0.0) ? -1.0 : ((iMC_Rd_socket[skt] + iMC_Wr_socket[skt]) / all); // simplified approximation + memory.sockets[onlineSocketsI].memoryModeHitRate = (all == 0.0) ? -1.0 : ((iMC_Rd_socket[skt] + iMC_Wr_socket[skt]) / all); // simplified approximation if (memory.dramEnergyMetricsAvailable) { memory.sockets[onlineSocketsI].dramEnergy = getDRAMConsumedJoules(socketStatesBefore_[skt], socketStatesAfter_[skt]); diff --git a/src/dashboard.cpp b/src/dashboard.cpp index c3cfd411..01ad32d6 100644 --- a/src/dashboard.cpp +++ b/src/dashboard.cpp @@ -647,7 +647,39 @@ std::string getPCMDashboardJSON(const PCMDashboardType type, int ns, int nu, int dashboard.push(panel); dashboard.push(panel1); } + if(pcm->nearMemoryMetricsAvailable()){ // Near Memory statistics + y += height; + for (size_t s = 0; s < NumSockets; ++s) + { + const auto S = std::to_string(s); + + auto panel = std::make_shared(0, y, width, height,std::string("Socket") + S + " Near Memory Hit Miss", "M/s", false); + auto panel1 = std::make_shared(width, y, max_width - width, height,std::string("Current Socket") + S + "Near Memory Hit/Miss"); + for (auto& m : {"NM Hits","NM Misses","NM Miss Bw"}) + { + auto t = createTarget(m, influxDBUncore_Uncore_Counters(S, m) + "/1048576", prometheusCounters(S, m, false) + "/1048576"); + panel->push(t); + panel1->push(t); + } + dashboard.push(panel); + dashboard.push(panel1); + } + + auto NMpanel = std::make_shared(0, y, width, height, "Near Memory Hit Rate", "NM Hit Rate", false); + auto NMpanel1 = std::make_shared(width, y, max_width - width, height, "Near Memory HitRate"); + y += height; + for (size_t s = 0; s < NumSockets; ++s) + { + const auto S = std::to_string(s); + auto t = createTarget("Socket " + S, influxDBUncore_Uncore_Counters(S, "NM HitRate") + "/1048576", prometheusCounters(S, "NM HitRate", false) + "/1048576"); + NMpanel->push(t); + NMpanel1->push(t); + + } + dashboard.push(NMpanel); + dashboard.push(NMpanel1); + } auto panel = std::make_shared(0, y, width, height, "PMEM/DRAM Bandwidth Ratio", "PMEM/DRAM", false); auto panel1 = std::make_shared(width, y, max_width - width, height, "PMEM/DRAM Bandwidth Ratio"); y += height; diff --git a/src/mmio.cpp b/src/mmio.cpp index dcceaf9f..2d4973c6 100644 --- a/src/mmio.cpp +++ b/src/mmio.cpp @@ -87,7 +87,7 @@ WinPmemMMIORange::WinPmemMMIORange(uint64 baseAddr_, uint64 /* size_ */, bool re mutex.unlock(); } -MMIORange::MMIORange(uint64 baseAddr_, uint64 size_, bool readonly_) +MMIORange::MMIORange(uint64 baseAddr_, uint64 size_, bool readonly_, bool silent) { auto hDriver = openMSRDriver(); if (hDriver != INVALID_HANDLE_VALUE) @@ -103,7 +103,10 @@ MMIORange::MMIORange(uint64 baseAddr_, uint64 size_, bool readonly_) } else { - std::cerr << "MSR.sys does not support mmap operations\n"; + if (!silent) + { + std::cerr << "MSR.sys does not support mmap operations\n"; + } } } @@ -161,13 +164,16 @@ OwnMMIORange::~OwnMMIORange() #include "PCIDriverInterface.h" -MMIORange::MMIORange(uint64 physical_address, uint64 size_, bool) : +MMIORange::MMIORange(uint64 physical_address, uint64 size_, bool, bool silent) : mmapAddr(NULL), size(size_) { if (size > 4096) { - std::cerr << "PCM Error: the driver does not support mapping of regions > 4KB\n"; + if (!silent) + { + std::cerr << "PCM Error: the driver does not support mapping of regions > 4KB\n"; + } return; } if (physical_address) { @@ -205,7 +211,7 @@ MMIORange::~MMIORange() #elif defined(__linux__) || defined(__FreeBSD__) || defined(__DragonFly__) -MMIORange::MMIORange(uint64 baseAddr_, uint64 size_, bool readonly_) : +MMIORange::MMIORange(uint64 baseAddr_, uint64 size_, bool readonly_, bool silent) : fd(-1), mmapAddr(NULL), size(size_), @@ -215,8 +221,13 @@ MMIORange::MMIORange(uint64 baseAddr_, uint64 size_, bool readonly_) : int handle = ::open("/dev/mem", oflag); if (handle < 0) { - std::cerr << "opening /dev/mem failed: errno is " << errno << " (" << strerror(errno) << ")\n"; - throw std::exception(); + std::ostringstream strstr; + strstr << "opening /dev/mem failed: errno is " << errno << " (" << strerror(errno) << ")\n"; + if (!silent) + { + std::cerr << strstr.str(); + } + throw std::runtime_error(strstr.str()); } fd = handle; @@ -225,12 +236,17 @@ MMIORange::MMIORange(uint64 baseAddr_, uint64 size_, bool readonly_) : if (mmapAddr == MAP_FAILED) { - std::cerr << "mmap failed: errno is " << errno << " (" << strerror(errno) << ")\n"; + std::ostringstream strstr; + strstr << "mmap failed: errno is " << errno << " (" << strerror(errno) << ")\n"; if (1 == errno) { - std::cerr << "Try to add 'iomem=relaxed' parameter to the kernel boot command line and reboot.\n"; + strstr << "Try to add 'iomem=relaxed' parameter to the kernel boot command line and reboot.\n"; } - throw std::exception(); + if (!silent) + { + std::cerr << strstr.str(); + } + throw std::runtime_error(strstr.str()); } } @@ -271,7 +287,7 @@ MMIORange::~MMIORange() #endif -void mmio_memcpy(void * dest_, const uint64 src, const size_t n, const bool checkFailures) +void mmio_memcpy(void * dest_, const uint64 src, const size_t n, const bool checkFailures, const bool silent) { assert((src % sizeof(uint32)) == 0); assert((n % sizeof(uint32)) == 0); @@ -280,7 +296,7 @@ void mmio_memcpy(void * dest_, const uint64 src, const size_t n, const bool chec const uint64 mapBegin = roundDownTo4K(src); const uint64 mapSize = roundUpTo4K(end) - mapBegin; uint32 * dest = (uint32 *)dest_; - MMIORange range(mapBegin, mapSize); + MMIORange range(mapBegin, mapSize, true, silent); for (uint64 i = src; i < end; i += sizeof(uint32), ++dest) { @@ -290,7 +306,10 @@ void mmio_memcpy(void * dest_, const uint64 src, const size_t n, const bool chec // a bad read std::ostringstream strstr; strstr << "Failed to read memory at 0x" << std::hex << i << std::dec << "\n"; - std::cerr << strstr.str(); + if (!silent) + { + std::cerr << strstr.str(); + } throw std::runtime_error(strstr.str()); } *dest = value; diff --git a/src/mmio.h b/src/mmio.h index ea5ed18b..2d42535b 100644 --- a/src/mmio.h +++ b/src/mmio.h @@ -115,7 +115,7 @@ class MMIORange MMIORange(const MMIORange &) = delete; MMIORange & operator = (const MMIORange &) = delete; public: - MMIORange(uint64 baseAddr_, uint64 size_, bool readonly_ = true); + MMIORange(uint64 baseAddr_, uint64 size_, bool readonly_ = true, bool silent = false); uint32 read32(uint64 offset) { return impl->read32(offset); @@ -149,7 +149,7 @@ class MMIORange MMIORange(const MMIORange &) = delete; MMIORange & operator = (const MMIORange &) = delete; public: - MMIORange(uint64 baseAddr_, uint64 size_, bool readonly_ = true); + MMIORange(uint64 baseAddr_, uint64 size_, bool readonly_ = true, bool silent = false); uint32 read32(uint64 offset); uint64 read64(uint64 offset); void write32(uint64 offset, uint32 val); @@ -158,6 +158,6 @@ class MMIORange }; #endif -void mmio_memcpy(void * dest, const uint64 src, const size_t n, const bool checkFailures); +void mmio_memcpy(void * dest, const uint64 src, const size_t n, const bool checkFailures, const bool silent = false); } // namespace pcm diff --git a/src/pcm-iio.cpp b/src/pcm-iio.cpp index 6782c2ef..00544c23 100644 --- a/src/pcm-iio.cpp +++ b/src/pcm-iio.cpp @@ -35,6 +35,7 @@ using namespace pcm; #define NIS_DID 0x18D1 #define HQM_DID 0x270B + #define ROOT_BUSES_OFFSET 0xCC #define ROOT_BUSES_OFFSET_2 0xD0 @@ -139,6 +140,7 @@ static const std::map snr_sad_to_pmu_id_mapping = { }; #define HQMV2_DID 0x2710 // Hardware Queue Manager v2 +#define HQMV25_DID 0x2714 // Hardware Queue Manager v2.5 #define DSA_DID 0x0b25 // Data Streaming Accelerator (DSA) #define IAX_DID 0x0cfe // In-Memory Database Analytics Accelerator (IAX) #define QATV2_DID 0x4940 // QuickAssist (CPM) v2 @@ -810,22 +812,23 @@ bool WhitleyPlatformMapping::pciTreeDiscover(std::vectorbusno = root_bus; bdf->devno = 0x00; bdf->funcno = 0x00; - probe_pci(pci); - // Probe child devices only under PCH part. - for (uint8_t bus = pci->secondary_bus_number; bus <= pci->subordinate_bus_number; bus++) { - for (uint8_t device = 0; device < 32; device++) { - for (uint8_t function = 0; function < 8; function++) { - struct pci child_pci_dev; - child_pci_dev.bdf.busno = bus; - child_pci_dev.bdf.devno = device; - child_pci_dev.bdf.funcno = function; - if (probe_pci(&child_pci_dev)) { - pch_part.child_pci_devs.push_back(child_pci_dev); + if (probe_pci(pci)) { + // Probe child devices only under PCH part. + for (uint8_t bus = pci->secondary_bus_number; bus <= pci->subordinate_bus_number; bus++) { + for (uint8_t device = 0; device < 32; device++) { + for (uint8_t function = 0; function < 8; function++) { + struct pci child_pci_dev; + child_pci_dev.bdf.busno = bus; + child_pci_dev.bdf.devno = device; + child_pci_dev.bdf.funcno = function; + if (probe_pci(&child_pci_dev)) { + pch_part.child_pci_devs.push_back(child_pci_dev); + } } } } + stack.parts.push_back(pch_part); } - stack.parts.push_back(pch_part); } struct iio_bifurcated_part part; @@ -835,8 +838,8 @@ bool WhitleyPlatformMapping::pciTreeDiscover(std::vectorbusno = root_bus; bdf->devno = 0x01; bdf->funcno = 0x00; - probe_pci(pci); - stack.parts.push_back(part); + if (probe_pci(pci)) + stack.parts.push_back(part); iio_on_socket.stacks.push_back(stack); continue; @@ -961,30 +964,32 @@ bool JacobsvillePlatformMapping::pciTreeDiscover(std::vectorbusno = busno; bdf->devno = part; bdf->funcno = 0; - if (stack != 0 && busno == 0) /* This is a workaround to catch some IIO stack does not exist */ + /* This is a workaround to catch some IIO stack does not exist */ + if (stack != 0 && busno == 0) pci->exist = false; else - probe_pci(pci); + (void)probe_pci(pci); } } for (uint8_t stack = 0; stack < 6; stack++) { @@ -69,8 +70,7 @@ void scanBus(int bus, const PCIDB & pciDB) pci.bdf.busno = b; pci.bdf.devno = d; pci.bdf.funcno = f; - probe_pci(&pci); - if (pci.exist) + if (probe_pci(&pci)) iio_skx.stacks[stack].parts[part].child_pci_devs.push_back(pci); } } diff --git a/src/pcm-memory.cpp b/src/pcm-memory.cpp index 3e17b73c..745d7baf 100644 --- a/src/pcm-memory.cpp +++ b/src/pcm-memory.cpp @@ -45,7 +45,7 @@ typedef struct memdata { float iMC_Wr_socket_chan[max_sockets][ServerUncoreCounterState::maxChannels]{}; float iMC_PMM_Rd_socket_chan[max_sockets][ServerUncoreCounterState::maxChannels]{}; float iMC_PMM_Wr_socket_chan[max_sockets][ServerUncoreCounterState::maxChannels]{}; - float iMC_PMM_MemoryMode_Miss_socket_chan[max_sockets][ServerUncoreCounterState::maxChannels]{}; + float MemoryMode_Miss_socket_chan[max_sockets][ServerUncoreCounterState::maxChannels]{}; float iMC_Rd_socket[max_sockets]{}; float iMC_Wr_socket[max_sockets]{}; float iMC_PMM_Rd_socket[max_sockets]{}; @@ -54,11 +54,13 @@ typedef struct memdata { float CXLMEM_Wr_socket_port[max_sockets][ServerUncoreCounterState::maxCXLPorts]{}; float CXLCACHE_Rd_socket_port[max_sockets][ServerUncoreCounterState::maxCXLPorts]{}; float CXLCACHE_Wr_socket_port[max_sockets][ServerUncoreCounterState::maxCXLPorts]{}; - float iMC_PMM_MemoryMode_Miss_socket[max_sockets]{}; - bool iMC_NM_hit_rate_supported{}; - float iMC_PMM_MemoryMode_Hit_socket[max_sockets]{}; + float MemoryMode_Miss_socket[max_sockets]{}; + bool NM_hit_rate_supported{}; + bool BHS_NM{}; + bool BHS{}; + float MemoryMode_Hit_socket[max_sockets]{}; bool M2M_NM_read_hit_rate_supported{}; - float iMC_NM_hit_rate[max_sockets]{}; + float NM_hit_rate[max_sockets]{}; float M2M_NM_read_hit_rate[max_sockets][max_imc_controllers]{}; float EDC_Rd_socket_chan[max_sockets][max_edc_channels]{}; float EDC_Wr_socket_chan[max_sockets][max_edc_channels]{}; @@ -301,16 +303,34 @@ void printSocketCXLBW(PCM* m, memdata_t* md, uint32 no_columns, uint32 skt) cout << "\n"; } for (uint32 port = 0; port < numPorts; ++port) { - for (uint32 i = skt; i < (skt + no_columns); ++i) { - cout << "|-- .mem --|"; + if (md->BHS) + { + for (uint32 i = skt; i < (skt + no_columns); ++i) { + cout << "|-- .mem " << setw(2) << port << " Reads (MB/s): " << setw(8) << md->CXLMEM_Rd_socket_port[i][port] << " --|"; + } + } + else + { + for (uint32 i = skt; i < (skt + no_columns); ++i) { + cout << "|-- .mem --|"; + } } cout << "\n"; for (uint32 i = skt; i < (skt + no_columns); ++i) { cout << "|-- Writes(MB/s): " << setw(8) << md->CXLMEM_Wr_socket_port[i][port] << " --|"; } cout << "\n"; - for (uint32 i = skt; i < (skt + no_columns); ++i) { - cout << "|-- .cache --|"; + if (md->BHS) + { + for (uint32 i = skt; i < (skt + no_columns); ++i) { + cout << "|-- .cache " << setw(2) << port << " dv->hst(MB/s): " << setw(8) << md->CXLCACHE_Rd_socket_port[i][port] << " --|"; + } + } + else + { + for (uint32 i = skt; i < (skt + no_columns); ++i) { + cout << "|-- .cache --|"; + } } cout << "\n"; for (uint32 i = skt; i < (skt + no_columns); ++i) { @@ -323,16 +343,16 @@ void printSocketCXLBW(PCM* m, memdata_t* md, uint32 no_columns, uint32 skt) float AD_BW(const memdata_t *md, const uint32 skt) { const auto totalPMM = md->iMC_PMM_Rd_socket[skt] + md->iMC_PMM_Wr_socket[skt]; - return (max)(totalPMM - md->iMC_PMM_MemoryMode_Miss_socket[skt], float(0.0)); + return (max)(totalPMM - md->MemoryMode_Miss_socket[skt], float(0.0)); } float PMM_MM_Ratio(const memdata_t *md, const uint32 skt) { const auto dram = md->iMC_Rd_socket[skt] + md->iMC_Wr_socket[skt]; - return md->iMC_PMM_MemoryMode_Miss_socket[skt] / dram; + return md->MemoryMode_Miss_socket[skt] / dram; } -void printSocketBWFooter(uint32 no_columns, uint32 skt, const memdata_t *md) +void printSocketBWFooter(PCM *m, uint32 no_columns, uint32 skt, const memdata_t *md) { for (uint32 i=skt; i<(skt+no_columns); ++i) { cout << "|-- NODE" << setw(2) << i << " Mem Read (MB/s) :" << setw(9) << md->iMC_Rd_socket[i] << " --|"; @@ -362,7 +382,7 @@ void printSocketBWFooter(uint32 no_columns, uint32 skt, const memdata_t *md) cout << "\n"; for (uint32 i = skt; i < (skt + no_columns); ++i) { - cout << "|-- NODE" << setw(2) << i << " PMM MM Bw(MB/s): " << setw(8) << md->iMC_PMM_MemoryMode_Miss_socket[i] << " --|"; + cout << "|-- NODE" << setw(2) << i << " PMM MM Bw(MB/s): " << setw(8) << md->MemoryMode_Miss_socket[i] << " --|"; } cout << "\n"; for (uint32 i = skt; i < (skt + no_columns); ++i) @@ -381,22 +401,31 @@ void printSocketBWFooter(uint32 no_columns, uint32 skt, const memdata_t *md) cout << "\n"; } } - if (md->metrics == PmemMemoryMode && md->iMC_NM_hit_rate_supported) + if ((md->metrics == PmemMemoryMode && md->NM_hit_rate_supported) || md->BHS_NM == true) { for (uint32 i=skt; i<(skt+no_columns); ++i) { - cout << "|-- NODE" << setw(2) << i << " NM hit rate: " << setw(6) << md->iMC_NM_hit_rate[i] << " --|"; + cout << "|-- NODE" << setw(2) << i << " NM hit rate: " << setw(6) << md->NM_hit_rate[i] << " --|"; } cout << "\n"; for (uint32 i=skt; i<(skt+no_columns); ++i) { - cout << "|-- NODE" << setw(2) << i << " NM hits (M/s): " << setw(7) << (md->iMC_PMM_MemoryMode_Hit_socket[i])/1000000. << " --|"; + cout << "|-- NODE" << setw(2) << i << " NM hits (M/s): " << setw(7) << (md->MemoryMode_Hit_socket[i])/1000000. << " --|"; } cout << "\n"; for (uint32 i=skt; i<(skt+no_columns); ++i) { - cout << "|-- NODE" << setw(2) << i << " NM misses (M/s): " << setw(7) << (md->iMC_PMM_MemoryMode_Miss_socket[i])/1000000. << " --|"; + cout << "|-- NODE" << setw(2) << i << " NM misses (M/s): " << setw(7) << (md->MemoryMode_Miss_socket[i])/1000000. << " --|"; } cout << "\n"; } - if (md->metrics == PartialWrites) + if (md->BHS_NM == true) + { + for (uint32 i = skt; i < (skt + no_columns); ++i) { + cout << "|-- NODE" << setw(2) << i << " NM miss Bw(MB/s):" << setw(9) << (md->MemoryMode_Miss_socket[i] * 64. * 2.) / 1000000. << " --|"; + } + cout << "\n"; + } + if ( md->metrics == PartialWrites + && m->getCPUModel() != PCM::SRF + ) { for (uint32 i=skt; i<(skt+no_columns); ++i) { cout << "|-- NODE" << setw(2) << i << " P. Write (T/s): " << dec << setw(10) << md->partial_write[i] << " --|"; @@ -506,7 +535,7 @@ void display_bandwidth(PCM *m, memdata_t *md, const uint32 no_columns, const boo printSocketBWHeader(no_columns, skt, show_channel_output); if (show_channel_output) printSocketChannelBW(m, md, no_columns, skt); - printSocketBWFooter(no_columns, skt, md); + printSocketBWFooter(m, no_columns, skt, md); printSocketCXLBW(m, md, no_columns, skt); for (uint32 i = skt; i < (skt + no_columns); i++) { @@ -639,7 +668,7 @@ void display_bandwidth_csv(PCM *m, memdata_t *md, uint64 /*elapsedTime*/, const << setw(8) << md->iMC_PMM_Wr_socket[skt] << ','; }); } - if (md->metrics == PmemMemoryMode && md->iMC_NM_hit_rate_supported) + if ((md->metrics == PmemMemoryMode && md->NM_hit_rate_supported) || md->BHS_NM == true) { choose(outputType, [printSKT]() { @@ -651,9 +680,22 @@ void display_bandwidth_csv(PCM *m, memdata_t *md, uint64 /*elapsedTime*/, const cout << "NM misses (M/s),"; }, [&md, &skt]() { - cout << setw(8) << md->iMC_NM_hit_rate[skt]<< ','; - cout << setw(8) << md->iMC_PMM_MemoryMode_Hit_socket[skt]/1000000. << ','; - cout << setw(8) << md->iMC_PMM_MemoryMode_Miss_socket[skt]/1000000. << ','; + cout << setw(8) << md->NM_hit_rate[skt]<< ','; + cout << setw(8) << md->MemoryMode_Hit_socket[skt]/1000000. << ','; + cout << setw(8) << md->MemoryMode_Miss_socket[skt]/1000000. << ','; + }); + } + if (md->BHS_NM == true) + { + choose(outputType, + [printSKT]() { + printSKT(); + }, + []() { + cout << "NM miss Bw (MB/s),"; + }, + [&md, &skt]() { + cout << setw(9) << (md->MemoryMode_Miss_socket[skt] * 64. * 2.) / 1000000. << ','; }); } if (md->metrics == Pmem && md->M2M_NM_read_hit_rate_supported) @@ -683,13 +725,15 @@ void display_bandwidth_csv(PCM *m, memdata_t *md, uint64 /*elapsedTime*/, const }, [&md, &skt]() { cout << setw(8) << AD_BW(md, skt) << ',' - << setw(8) << md->iMC_PMM_MemoryMode_Miss_socket[skt] << ',' + << setw(8) << md->MemoryMode_Miss_socket[skt] << ',' << setw(8) << PMM_MM_Ratio(md, skt) << ','; }); } if (m->HBMmemoryTrafficMetricsAvailable() == false) { - if (md->metrics == PartialWrites) + if ( md->metrics == PartialWrites + && m->getCPUModel() != PCM::SRF + ) { choose(outputType, [printSKT]() { @@ -763,17 +807,37 @@ void display_bandwidth_csv(PCM *m, memdata_t *md, uint64 /*elapsedTime*/, const for (uint64 port = 0; port < m->getNumCXLPorts(skt); ++port) { choose(outputType, - [printSKT]() { - printSKT(2); + [printSKT, &md]() { + printSKT((md->BHS)? 4 : 2 ); }, - [&port]() { - cout - << "CXL.mem_P" << port << "Write," - << "CXL.cache_P" << port << "hst->dv,"; + [&port,&md]() { + if (md->BHS) + { + cout << "CXL.mem_P" << port << "Read," + << "CXL.mem_P" << port << "Write," + << "CXL.cache_P" << port << "dv->hst," + << "CXL.cache_P" << port << "hst->dv,"; + } + else + { + cout + << "CXL.mem_P" << port << "Write," + << "CXL.cache_P" << port << "hst->dv,"; + } }, [&md, &skt, &port]() { - cout << setw(8) << md->CXLMEM_Wr_socket_port[skt][port] << ',' - << setw(8) << md->CXLCACHE_Wr_socket_port[skt][port] << ','; + if (md->BHS) + { + cout << setw(8) << md->CXLMEM_Rd_socket_port[skt][port] << ',' + << setw(8) << md->CXLMEM_Wr_socket_port[skt][port] << ',' + << setw(8) << md->CXLCACHE_Rd_socket_port[skt][port] << ',' + << setw(8) << md->CXLCACHE_Wr_socket_port[skt][port] << ','; + } + else + { + cout << setw(8) << md->CXLMEM_Wr_socket_port[skt][port] << ',' + << setw(8) << md->CXLCACHE_Wr_socket_port[skt][port] << ','; + } }); } } @@ -841,15 +905,17 @@ void calculate_bandwidth(PCM *m, md.metrics = metrics; const auto cpu_model = m->getCPUModel(); md.M2M_NM_read_hit_rate_supported = (cpu_model == PCM::SKX); - md.iMC_NM_hit_rate_supported = (cpu_model == PCM::ICX); + md.NM_hit_rate_supported = (cpu_model == PCM::ICX); + md.BHS_NM = m->nearMemoryMetricsAvailable(); + md.BHS = md.BHS_NM; static bool mm_once = true; - if (metrics == Pmem && md.M2M_NM_read_hit_rate_supported == false && md.iMC_NM_hit_rate_supported == true && mm_once) + if (metrics == Pmem && md.M2M_NM_read_hit_rate_supported == false && md.NM_hit_rate_supported == true && mm_once) { cerr << "INFO: Use -mm option to monitor NM Memory Mode metrics\n"; mm_once = false; } static bool mm_once1 = true; - if (metrics == PmemMemoryMode && md.M2M_NM_read_hit_rate_supported == true && md.iMC_NM_hit_rate_supported == false && mm_once1) + if (metrics == PmemMemoryMode && md.M2M_NM_read_hit_rate_supported == true && md.NM_hit_rate_supported == false && mm_once1) { cerr << "INFO: Use -pmem option to monitor NM Memory Mode metrics\n"; mm_once1 = false; @@ -861,9 +927,9 @@ void calculate_bandwidth(PCM *m, md.iMC_Wr_socket[skt] = 0.0; md.iMC_PMM_Rd_socket[skt] = 0.0; md.iMC_PMM_Wr_socket[skt] = 0.0; - md.iMC_PMM_MemoryMode_Miss_socket[skt] = 0.0; - md.iMC_PMM_MemoryMode_Hit_socket[skt] = 0.0; - md.iMC_NM_hit_rate[skt] = 0.0; + md.MemoryMode_Miss_socket[skt] = 0.0; + md.MemoryMode_Hit_socket[skt] = 0.0; + md.NM_hit_rate[skt] = 0.0; md.EDC_Rd_socket[skt] = 0.0; md.EDC_Wr_socket[skt] = 0.0; md.partial_write[skt] = 0; @@ -885,13 +951,19 @@ void calculate_bandwidth(PCM *m, return (float)(nEvents * 64 / 1000000.0 / (elapsedTime / 1000.0)); }; + auto toRate = [&elapsedTime](const uint64 nEvents) + { + return (float)(nEvents / (elapsedTime / 1000.0)); + }; + for(uint32 skt = 0; skt < m->getNumSockets(); ++skt) { - const uint32 numChannels1 = (uint32)m->getMCChannels(skt, 0); // number of channels in the first controller + const uint32 numChannels1 = (uint32)m->getMCChannels(skt, 0); // number of channels in the first controller + if (m->HBMmemoryTrafficMetricsAvailable()) { - const float scalingFactor = ((float) m->getHBMCASTransferSize()) / float(64.); + const float scalingFactor = ((float)m->getHBMCASTransferSize()) / float(64.); for (uint32 channel = 0; channel < max_edc_channels; ++channel) { @@ -913,10 +985,17 @@ void calculate_bandwidth(PCM *m, { for (uint32 channel = 0; channel < max_imc_channels; ++channel) { - uint64 reads = 0, writes = 0, pmmReads = 0, pmmWrites = 0, pmmMemoryModeCleanMisses = 0, pmmMemoryModeDirtyMisses = 0; - uint64 pmmMemoryModeHits = 0; + uint64 reads = 0, writes = 0, pmmReads = 0, pmmWrites = 0, memoryModeCleanMisses = 0, memoryModeDirtyMisses = 0; + uint64 memoryModeHits = 0; reads = getMCCounter(channel, ServerUncorePMUs::EventPosition::READ, uncState1[skt], uncState2[skt]); writes = getMCCounter(channel, ServerUncorePMUs::EventPosition::WRITE, uncState1[skt], uncState2[skt]); + switch (cpu_model) + { + case PCM::SRF: + reads += getMCCounter(channel, ServerUncorePMUs::EventPosition::READ2, uncState1[skt], uncState2[skt]); + writes += getMCCounter(channel, ServerUncorePMUs::EventPosition::WRITE2, uncState1[skt], uncState2[skt]); + break; + } if (metrics == Pmem) { pmmReads = getMCCounter(channel, ServerUncorePMUs::EventPosition::PMM_READ, uncState1[skt], uncState2[skt]); @@ -924,18 +1003,18 @@ void calculate_bandwidth(PCM *m, } else if (metrics == PmemMixedMode || metrics == PmemMemoryMode) { - pmmMemoryModeCleanMisses = getMCCounter(channel, ServerUncorePMUs::EventPosition::PMM_MM_MISS_CLEAN, uncState1[skt], uncState2[skt]); - pmmMemoryModeDirtyMisses = getMCCounter(channel, ServerUncorePMUs::EventPosition::PMM_MM_MISS_DIRTY, uncState1[skt], uncState2[skt]); + memoryModeCleanMisses = getMCCounter(channel, ServerUncorePMUs::EventPosition::MM_MISS_CLEAN, uncState1[skt], uncState2[skt]); + memoryModeDirtyMisses = getMCCounter(channel, ServerUncorePMUs::EventPosition::MM_MISS_DIRTY, uncState1[skt], uncState2[skt]); } if (metrics == PmemMemoryMode) { - pmmMemoryModeHits = getMCCounter(channel, ServerUncorePMUs::EventPosition::NM_HIT, uncState1[skt], uncState2[skt]); + memoryModeHits = getMCCounter(channel, ServerUncorePMUs::EventPosition::NM_HIT, uncState1[skt], uncState2[skt]); } if (skipInactiveChannels && (reads + writes == 0)) { if ((metrics != Pmem) || (pmmReads + pmmWrites == 0)) { - if ((metrics != PmemMixedMode) || (pmmMemoryModeCleanMisses + pmmMemoryModeDirtyMisses == 0)) + if ((metrics != PmemMixedMode) || (memoryModeCleanMisses + memoryModeDirtyMisses == 0)) { md.iMC_Rd_socket_chan[skt][channel] = -1.0; @@ -966,15 +1045,17 @@ void calculate_bandwidth(PCM *m, } else if (metrics == PmemMixedMode) { - md.iMC_PMM_MemoryMode_Miss_socket_chan[skt][channel] = toBW(pmmMemoryModeCleanMisses + 2 * pmmMemoryModeDirtyMisses); - md.iMC_PMM_MemoryMode_Miss_socket[skt] += md.iMC_PMM_MemoryMode_Miss_socket_chan[skt][channel]; + md.MemoryMode_Miss_socket_chan[skt][channel] = toBW(memoryModeCleanMisses + 2 * memoryModeDirtyMisses); + md.MemoryMode_Miss_socket[skt] += md.MemoryMode_Miss_socket_chan[skt][channel]; } else if (metrics == PmemMemoryMode) { - md.iMC_PMM_MemoryMode_Miss_socket[skt] += (float)((pmmMemoryModeCleanMisses + pmmMemoryModeDirtyMisses) / (elapsedTime / 1000.0)); - md.iMC_PMM_MemoryMode_Hit_socket[skt] += (float)((pmmMemoryModeHits) / (elapsedTime / 1000.0)); + md.MemoryMode_Miss_socket[skt] += toRate(memoryModeCleanMisses + memoryModeDirtyMisses); + md.MemoryMode_Hit_socket[skt] += toRate(memoryModeHits); } - else + else if ( + cpu_model != PCM::SRF + ) { md.partial_write[skt] += (uint64)(getMCCounter(channel, ServerUncorePMUs::EventPosition::PARTIAL, uncState1[skt], uncState2[skt]) / (elapsedTime / 1000.0)); } @@ -1000,9 +1081,9 @@ void calculate_bandwidth(PCM *m, { md.iMC_PMM_Rd_socket[skt] += toBW(pmmReads); } - else for(uint32 c = 0; c < max_imc_controllers; ++c) + else for (uint32 c = 0; c < max_imc_controllers; ++c) { - md.iMC_PMM_Rd_socket[skt] += toBW(getM2MCounter(c, ServerUncorePMUs::EventPosition::PMM_READ, uncState1[skt],uncState2[skt])); + md.iMC_PMM_Rd_socket[skt] += toBW(getM2MCounter(c, ServerUncorePMUs::EventPosition::PMM_READ, uncState1[skt], uncState2[skt])); } const int64 pmmWrites = getFreeRunningCounter(ServerUncoreCounterState::PMMWrites, uncState1[skt], uncState2[skt]); @@ -1010,31 +1091,50 @@ void calculate_bandwidth(PCM *m, { md.iMC_PMM_Wr_socket[skt] += toBW(pmmWrites); } - else for(uint32 c = 0; c < max_imc_controllers; ++c) + else for (uint32 c = 0; c < max_imc_controllers; ++c) { - md.iMC_PMM_Wr_socket[skt] += toBW(getM2MCounter(c, ServerUncorePMUs::EventPosition::PMM_WRITE, uncState1[skt],uncState2[skt]));; + md.iMC_PMM_Wr_socket[skt] += toBW(getM2MCounter(c, ServerUncorePMUs::EventPosition::PMM_WRITE, uncState1[skt], uncState2[skt]));; } } if (metrics == Pmem) { - for(uint32 c = 0; c < max_imc_controllers; ++c) + for (uint32 c = 0; c < max_imc_controllers; ++c) { - if(md.M2M_NM_read_hit_rate[skt][c] != 0.0) + if (md.M2M_NM_read_hit_rate[skt][c] != 0.0) { - md.M2M_NM_read_hit_rate[skt][c] = ((float)getM2MCounter(c, ServerUncorePMUs::EventPosition::NM_HIT, uncState1[skt],uncState2[skt]))/ md.M2M_NM_read_hit_rate[skt][c]; + md.M2M_NM_read_hit_rate[skt][c] = ((float)getM2MCounter(c, ServerUncorePMUs::EventPosition::NM_HIT, uncState1[skt], uncState2[skt])) / md.M2M_NM_read_hit_rate[skt][c]; } } } - const auto all = md.iMC_PMM_MemoryMode_Miss_socket[skt] + md.iMC_PMM_MemoryMode_Hit_socket[skt]; - if (metrics == PmemMemoryMode && all != 0.0) + if (md.BHS_NM) { - md.iMC_NM_hit_rate[skt] = md.iMC_PMM_MemoryMode_Hit_socket[skt] / all; + for (uint32 c = 0; c < max_imc_controllers; ++c) + { + md.MemoryMode_Hit_socket[skt] += toRate(getM2MCounter(c, ServerUncorePMUs::EventPosition::NM_HIT, uncState1[skt], uncState2[skt])); + md.MemoryMode_Miss_socket[skt] += toRate(getM2MCounter(c, ServerUncorePMUs::EventPosition::MM_MISS_CLEAN, uncState1[skt], uncState2[skt])); + md.MemoryMode_Miss_socket[skt] += toRate(getM2MCounter(c, ServerUncorePMUs::EventPosition::MM_MISS_DIRTY, uncState1[skt], uncState2[skt])); + } + } + const auto all = md.MemoryMode_Miss_socket[skt] + md.MemoryMode_Hit_socket[skt]; + if ((metrics == PmemMemoryMode || md.BHS_NM == true) && all != 0.0) + { + md.NM_hit_rate[skt] = md.MemoryMode_Hit_socket[skt] / all; } for (size_t p = 0; p < m->getNumCXLPorts(skt); ++p) { - md.CXLMEM_Wr_socket_port[skt][p] = CXLBWWrScalingFactor * toBW(getCXLCMCounter((uint32)p, PCM::EventPosition::CXL_TxC_MEM, uncState1[skt], uncState2[skt])); - md.CXLCACHE_Wr_socket_port[skt][p] = CXLBWWrScalingFactor * toBW(getCXLCMCounter((uint32)p, PCM::EventPosition::CXL_TxC_CACHE, uncState1[skt], uncState2[skt])); + if (md.BHS) + { + md.CXLMEM_Rd_socket_port[skt][p] = toBW(getCXLCMCounter((uint32)p, PCM::EventPosition::CXL_RxC_MEM, uncState1[skt], uncState2[skt])); + md.CXLMEM_Wr_socket_port[skt][p] = toBW(getCXLDPCounter((uint32)p, PCM::EventPosition::CXL_TxC_MEM, uncState1[skt], uncState2[skt])); + md.CXLCACHE_Rd_socket_port[skt][p] = toBW(getCXLCMCounter((uint32)p, PCM::EventPosition::CXL_RxC_CACHE, uncState1[skt], uncState2[skt])); + md.CXLCACHE_Wr_socket_port[skt][p] = toBW(getCXLCMCounter((uint32)p, PCM::EventPosition::CXL_TxC_CACHE, uncState1[skt], uncState2[skt])); + } + else + { + md.CXLMEM_Wr_socket_port[skt][p] = CXLBWWrScalingFactor * toBW(getCXLCMCounter((uint32)p, PCM::EventPosition::CXL_TxC_MEM, uncState1[skt], uncState2[skt])); + md.CXLCACHE_Wr_socket_port[skt][p] = CXLBWWrScalingFactor * toBW(getCXLCMCounter((uint32)p, PCM::EventPosition::CXL_TxC_CACHE, uncState1[skt], uncState2[skt])); + } } } diff --git a/src/pcm-pcie.cpp b/src/pcm-pcie.cpp index c18f21d7..37ab8557 100644 --- a/src/pcm-pcie.cpp +++ b/src/pcm-pcie.cpp @@ -96,6 +96,8 @@ void print_usage(const string & progname) IPlatform *IPlatform::getPlatform(PCM *m, bool csv, bool print_bandwidth, bool print_additional_info, uint32 delay) { switch (m->getCPUModel()) { + case PCM::SRF: + return new BirchStreamPlatform(m, csv, print_bandwidth, print_additional_info, delay); case PCM::SPR: case PCM::EMR: return new EagleStreamPlatform(m, csv, print_bandwidth, print_additional_info, delay); diff --git a/src/pcm-pcie.h b/src/pcm-pcie.h index 533ffa66..0021047c 100644 --- a/src/pcm-pcie.h +++ b/src/pcm-pcie.h @@ -343,6 +343,138 @@ void LegacyPlatform::printAggregatedEvents() } } +// BHS + +class BirchStreamPlatform: public LegacyPlatform +{ +public: + BirchStreamPlatform(PCM *m, bool csv, bool bandwidth, bool verbose, uint32 delay) : + LegacyPlatform( {"PCIRdCur", "ItoM", "ItoMCacheNear", "UCRdF", "WiL", "WCiL", "WCiLF"}, + { + {0xC8F3FE00000435, 0xC8F3FD00000435, 0xCC43FE00000435, 0xCC43FD00000435}, + {0xCD43FE00000435, 0xCD43FD00000435, 0xC877DE00000135, 0xC87FDE00000135}, + {0xC86FFE00000135, 0xC867FE00000135,}, + }, + m, csv, bandwidth, verbose, delay) + { + }; + +private: + enum eventIdx { + PCIRdCur, + ItoM, + ItoMCacheNear, + UCRdF, + WiL, + WCiL, + WCiLF + }; + + enum Events { + PCIRdCur_miss, + PCIRdCur_hit, + ItoM_miss, + ItoM_hit, + ItoMCacheNear_miss, + ItoMCacheNear_hit, + UCRdF_miss, + WiL_miss, + WCiL_miss, + WCiLF_miss, + eventLast + }; + + virtual uint64 getReadBw(uint socket, eventFilter filter); + virtual uint64 getWriteBw(uint socket, eventFilter filter); + virtual uint64 getReadBw(); + virtual uint64 getWriteBw(); + virtual uint64 event(uint socket, eventFilter filter, uint idx); +}; + +uint64 BirchStreamPlatform::event(uint socket, eventFilter filter, uint idx) +{ + uint64 event = 0; + switch (idx) + { + case PCIRdCur: + if (filter == TOTAL) + event = eventSample[socket][PCIRdCur_miss] + + eventSample[socket][PCIRdCur_hit]; + else if (filter == MISS) + event = eventSample[socket][PCIRdCur_miss]; + else if (filter == HIT) + event = eventSample[socket][PCIRdCur_hit]; + break; + case ItoM: + if (filter == TOTAL) + event = eventSample[socket][ItoM_miss] + + eventSample[socket][ItoM_hit]; + else if (filter == MISS) + event = eventSample[socket][ItoM_miss]; + else if (filter == HIT) + event = eventSample[socket][ItoM_hit]; + break; + case ItoMCacheNear: + if (filter == TOTAL) + event = eventSample[socket][ItoMCacheNear_miss] + + eventSample[socket][ItoMCacheNear_hit]; + else if (filter == MISS) + event = eventSample[socket][ItoMCacheNear_miss]; + else if (filter == HIT) + event = eventSample[socket][ItoMCacheNear_hit]; + break; + case UCRdF: + if (filter == TOTAL || filter == MISS) + event = eventSample[socket][UCRdF_miss]; + break; + case WiL: + if (filter == TOTAL || filter == MISS) + event = eventSample[socket][WiL_miss]; + break; + case WCiL: + if (filter == TOTAL || filter == MISS) + event = eventSample[socket][WCiL_miss]; + break; + case WCiLF: + if (filter == TOTAL || filter == MISS) + event = eventSample[socket][WCiLF_miss]; + break; + default: + break; + } + return event; +} + +uint64 BirchStreamPlatform::getReadBw(uint socket, eventFilter filter) +{ + uint64 readBw = event(socket, filter, PCIRdCur); + return (readBw * 64ULL); +} + +uint64 BirchStreamPlatform::getWriteBw(uint socket, eventFilter filter) +{ + uint64 writeBw = event(socket, filter, ItoM) + + event(socket, filter, ItoMCacheNear); + return (writeBw * 64ULL); +} +uint64 BirchStreamPlatform::getReadBw() +{ + uint64 readBw = 0; + for (uint socket = 0; socket < m_socketCount; socket++) + readBw += (event(socket, TOTAL, PCIRdCur)); + return (readBw * 64ULL); +} + +uint64 BirchStreamPlatform::getWriteBw() +{ + uint64 writeBw = 0; + for (uint socket = 0; socket < m_socketCount; socket++) + writeBw += (event(socket, TOTAL, ItoM) + + event(socket, TOTAL, ItoMCacheNear)); + return (writeBw * 64ULL); +} + + //SPR class EagleStreamPlatform: public LegacyPlatform { diff --git a/src/pcm-raw.cpp b/src/pcm-raw.cpp index 30bf86e7..b25461d3 100644 --- a/src/pcm-raw.cpp +++ b/src/pcm-raw.cpp @@ -602,8 +602,10 @@ AddEventStatus addEventFromDB(PCM::RawPMUConfigs& curPMUConfigs, string fullEven static std::map pmuNameMap = { {std::string("cbo"), std::string("cha")}, + {std::string("b2cmi"), std::string("m2m")}, {std::string("upi"), std::string("xpi")}, {std::string("upi ll"), std::string("xpi")}, + {std::string("b2upi"), std::string("m3upi")}, {std::string("qpi"), std::string("xpi")}, {std::string("qpi ll"), std::string("xpi")} }; diff --git a/src/pcm-sensor-server.cpp b/src/pcm-sensor-server.cpp index 44bddfb0..72a89ec5 100644 --- a/src/pcm-sensor-server.cpp +++ b/src/pcm-sensor-server.cpp @@ -424,8 +424,15 @@ class JSONPrinter : Visitor void printUncoreCounterState( SocketCounterState const& before, SocketCounterState const& after ) { startObject( "Uncore Counters", BEGIN_OBJECT ); + PCM* pcm = PCM::getInstance(); printCounter( "DRAM Writes", getBytesWrittenToMC ( before, after ) ); printCounter( "DRAM Reads", getBytesReadFromMC ( before, after ) ); + if(pcm->nearMemoryMetricsAvailable()){ + printCounter( "NM HitRate", getNMHitRate ( before, after ) ); + printCounter( "NM Hits", getNMHits ( before, after ) ); + printCounter( "NM Misses", getNMMisses ( before, after ) ); + printCounter( "NM Miss Bw", getNMMissBW ( before, after ) ); + } printCounter( "Persistent Memory Writes", getBytesWrittenToPMM ( before, after ) ); printCounter( "Persistent Memory Reads", getBytesReadFromPMM ( before, after ) ); printCounter( "Embedded DRAM Writes", getBytesWrittenToEDC ( before, after ) ); @@ -704,9 +711,16 @@ class PrometheusPrinter : Visitor } void printUncoreCounterState( SocketCounterState const& before, SocketCounterState const& after ) { + PCM* pcm = PCM::getInstance(); addToHierarchy( "source=\"uncore\"" ); printCounter( "DRAM Writes", getBytesWrittenToMC ( before, after ) ); printCounter( "DRAM Reads", getBytesReadFromMC ( before, after ) ); + if(pcm->nearMemoryMetricsAvailable()){ + printCounter( "NM Hits", getNMHits ( before, after ) ); + printCounter( "NM Misses", getNMMisses ( before, after ) ); + printCounter( "NM Miss Bw", getNMMissBW ( before, after ) ); + printCounter( "NM HitRate", getNMHitRate ( before, after ) ); + } printCounter( "Persistent Memory Writes", getBytesWrittenToPMM ( before, after ) ); printCounter( "Persistent Memory Reads", getBytesReadFromPMM ( before, after ) ); printCounter( "Embedded DRAM Writes", getBytesWrittenToEDC ( before, after ) ); diff --git a/src/pcm-tpmi.cpp b/src/pcm-tpmi.cpp index 1d39d86c..8d0ef12c 100644 --- a/src/pcm-tpmi.cpp +++ b/src/pcm-tpmi.cpp @@ -21,12 +21,14 @@ using namespace pcm; void print_usage(const char * progname) { - std::cout << "Usage " << progname << " [-w value] [-d] [-b low:high] ID offset\n\n"; + std::cout << "Usage " << progname << " [-w value] [-d] [-b low:high] [-e entries] ID offset\n\n"; std::cout << " Reads/writes TPMI (Topology Aware Register and PM Capsule Interface) register \n"; std::cout << " ID : TPMI ID\n"; std::cout << " offset : register offset\n"; std::cout << " -w value : write the value before reading \n"; std::cout << " -b low:high : read or write only low..high bits of the register\n"; + std::cout << " -e entries : perform read/write on specified entries (default is all entries)\n"; + std::cout << " (examples: -e 10 -e 10-11 -e 4,6,12-20,6)\n"; std::cout << " -d : output all numbers in dec (default is hex)\n"; std::cout << " -v : verbose ouput\n"; std::cout << " --version : print application version\n"; @@ -48,11 +50,11 @@ int mainThrows(int argc, char * argv[]) uint64 value = 0; bool write = false; bool dec = false; - bool verbose = false; std::pair bits{-1, -1}; + std::list entries; int my_opt = -1; - while ((my_opt = getopt(argc, argv, "w:dvb:")) != -1) + while ((my_opt = getopt(argc, argv, "w:dvb:e:")) != -1) { switch (my_opt) { @@ -64,11 +66,14 @@ int mainThrows(int argc, char * argv[]) dec = true; break; case 'v': - verbose = true; + TPMIHandle::setVerbose(true); break; case 'b': bits = parseBitsParameter(optarg); break; + case 'e': + entries = extract_integer_list(optarg); + break; default: print_usage(argv[0]); return -1; @@ -98,88 +103,48 @@ int mainThrows(int argc, char * argv[]) return -1; } #endif - - processDVSEC([](const VSEC & vsec) - { - return vsec.fields.cap_id == 0xb // Vendor Specific DVSEC - && vsec.fields.vsec_id == 0x42; // TPMI PM_Features - }, [&](const uint64 bar, const VSEC & vsec) + + try + { + for (size_t i = 0; i < TPMIHandle::getNumInstances(); ++i) { - struct PFS + TPMIHandle h(i, requestedID, requestedRelativeOffset, !write); + auto one = [&](const size_t p) { - uint64 TPMI_ID:8; - uint64 NumEntries:8; - uint64 EntrySize:16; - uint64 CapOffset:16; - uint64 Attribute:2; - uint64 Reserved:14; + if (!dec) + std::cout << std::hex << std::showbase; + readOldValueHelper(bits, value, write, [&h, &p](uint64& old_value) + { old_value = h.read64(p); return true; }); + if (write) + { + std::cout << " Writing " << value << " to TPMI ID " << requestedID << "@" << requestedRelativeOffset << " for entry " << p << " in instance " << i << "\n"; + h.write64(p, value); + } + value = h.read64(p); + extractBitsPrintHelper(bits, value, dec); + std::cout << " from TPMI ID " << requestedID << "@" << requestedRelativeOffset << " for entry " << p << " in instance " << i << "\n\n"; }; - static_assert(sizeof(PFS) == sizeof(uint64), "sizeof(PFS) != sizeof(uint64)"); - assert(vsec.fields.EntrySize == 2); - std::vector pfsArray(vsec.fields.NumEntries); - pcm::mmio_memcpy(&(pfsArray[0]), bar + vsec.fields.Address, vsec.fields.NumEntries * sizeof(PFS), true); - for (const auto & pfs : pfsArray) + if (entries.empty()) { - if (verbose) + for (size_t p = 0; p < h.getNumEntries(); ++p) { - std::cout << "PFS" << - "\t TPMI_ID: " << pfs.TPMI_ID << - "\t NumEntries: " << pfs.NumEntries << - "\t EntrySize: " << pfs.EntrySize << - "\t CapOffset: " << pfs.CapOffset << - "\t Attribute: " << pfs.Attribute << - "\n"; + entries.push_back(p); } - for (uint64 p = 0; p < pfs.NumEntries; ++p) + } + for (const size_t p : entries) + { + if (p < h.getNumEntries()) { - uint32 reg0 = 0; - const auto addr = bar + vsec.fields.Address + pfs.CapOffset * 1024ULL + p * pfs.EntrySize * sizeof(uint32); - mmio_memcpy(®0, addr, sizeof(uint32), false); - if (reg0 == ~0U) - { - if (verbose) - { - std::cout << "invalid entry " << p << "\n"; - } - } - else if (pfs.TPMI_ID == requestedID) - { - if (verbose) - { - std::cout << "Entry "<< p << std::hex; - for (uint64 i_offset = 0; i_offset < pfs.EntrySize * sizeof(uint32); i_offset += sizeof(uint64)) - { - uint64 reg = 0; - mmio_memcpy(®, addr + i_offset, sizeof(uint64), false); - std::cout << " register "<< i_offset << " = " << reg; - } - std::cout << std::dec << "\n"; - } - try { - const auto requestedAddr = addr + requestedRelativeOffset; - const auto baseAddr = roundDownTo4K(requestedAddr); - const auto baseOffset = requestedAddr - baseAddr; - MMIORange range(baseAddr, 4096ULL, !write); - if (!dec) std::cout << std::hex << std::showbase; - readOldValueHelper(bits, value, write, [&range, &baseOffset](uint64 & old_value){ old_value = range.read64(baseOffset); return true; }); - if (write) - { - std::cout << " Writing " << value << " to TPMI ID " << requestedID << "@" << requestedRelativeOffset << " for entry " << p << "\n"; - range.write64(baseOffset, value); - } - value = range.read64(baseOffset); - extractBitsPrintHelper(bits, value, dec); - std::cout << " from TPMI ID " << requestedID << "@" << requestedRelativeOffset << " for entry " << p << "\n\n"; - } - catch (std::exception& e) - { - std::cerr << "Error accessing registers: " << e.what() << "\n"; - std::cerr << "Please check if the program can access MSR/PCICFG drivers.\n"; - } - } + one(p); } } - }); + } + } + catch (std::exception &e) + { + std::cerr << "Error accessing registers: " << e.what() << "\n"; + std::cerr << "Please check if the program can access MSR/PCICFG drivers.\n"; + } return 0; } diff --git a/src/pcm.cpp b/src/pcm.cpp index deec2ef7..5eb28782 100644 --- a/src/pcm.cpp +++ b/src/pcm.cpp @@ -533,7 +533,33 @@ void print_output(PCM * m, cout << setNextColor() << " LLCRDMISSLAT (ns)|"; if (m->uncoreFrequencyMetricAvailable()) cout << setNextColor() << " UncFREQ (Ghz)|"; + + auto printCentered = [](const std::string& str, int width) + { + int len = str.length(); + if(width < len) { + std::cout << str; + } else { + int diff = width - len; + int pad1 = diff/2; + int pad2 = diff - pad1; + std::cout << std::string(pad1, ' ') << str << std::string(pad2, ' '); + } + }; + const std::vector uncoreDieTypes{getUncoreDieTypes(sktstate2[0])}; + if (uncoreDieTypes.empty() == false) + { + cout << setNextColor() << " Unc(Ghz) "; + for (auto & d: uncoreDieTypes) + { + cout << setNextColor(); + printCentered(UncoreCounterState::getDieTypeStr(d), 7); + cout << " "; + } + std::cout << "|" ; + } cout << resetColor() << "\n"; + cout << longDiv; for (uint32 i = 0; i < m->getNumSockets(); ++i) { @@ -576,6 +602,17 @@ void print_output(PCM * m, cout << setNextColor() << " "; cout << setw(4) << getAverageUncoreFrequencyGhz(sktstate1[i], sktstate2[i]); } + const std::vector uncoreFrequencies{getUncoreFrequency(sktstate2[i])}; + assert(uncoreFrequencies.size() == uncoreDieTypes.size()); + + if (uncoreFrequencies.empty() == false) + { + cout << setNextColor() << " "; + for (auto & d: uncoreFrequencies) + { + cout << setNextColor() << " " << std::setw(4) << d/1e9 << " "; + } + } cout << resetColor() << "\n"; } cout << longDiv; @@ -824,6 +861,14 @@ void print_csv_header(PCM * m, header = "UncFREQ (Ghz)"; print_csv_header_helper(header, m->getNumSockets()); } + for (uint32 s = 0; s < m->getNumSockets(); ++s) + { + for (size_t die = 0; die < m->getNumUFSDies(); ++die) + { + header = "UncFREQ Die " + std::to_string(die) + " (Ghz)"; + print_csv_header_helper(header); + } + } } if (show_core_output) @@ -965,30 +1010,41 @@ void print_csv_header(PCM * m, cout << "C" << s << "res%,"; } + auto printSKT = [] (const uint32 i, const uint32 count = 1) + { + for (uint32 j = 0; j < count; ++j) + { + cout << "SKT" << i << ","; + } + }; if (m->packageEnergyMetricsAvailable()) { for (uint32 i = 0; i < m->getNumSockets(); ++i) - cout << "SKT" << i << ","; + printSKT(i); } if (m->ppEnergyMetricsAvailable()) { for (uint32 i = 0; i < m->getNumSockets(); ++i) - cout << "SKT" << i << "," << "SKT" << i << ","; + printSKT(i, 2); } if (m->dramEnergyMetricsAvailable()) { for (uint32 i = 0; i < m->getNumSockets(); ++i) - cout << "SKT" << i << ","; + printSKT(i); } if (m->LLCReadMissLatencyMetricsAvailable()) { for (uint32 i = 0; i < m->getNumSockets(); ++i) - cout << "SKT" << i << ","; + printSKT(i); } if (m->uncoreFrequencyMetricAvailable()) { for (uint32 i = 0; i < m->getNumSockets(); ++i) - cout << "SKT" << i << ","; + printSKT(i); + } + for (uint32 i = 0; i < m->getNumSockets(); ++i) + { + printSKT(i, m->getNumUFSDies()); } } @@ -1245,6 +1301,15 @@ void print_csv(PCM * m, for (uint32 i = 0; i < m->getNumSockets(); ++i) cout << getAverageUncoreFrequencyGhz(sktstate1[i], sktstate2[i]) << ","; } + for (uint32 i = 0; i < m->getNumSockets(); ++i) + { + const auto freqs = getUncoreFrequency(sktstate2[i]); + assert(freqs.size() == (size_t)m->getNumUFSDies()); + for (auto & f : freqs) + { + cout << f/1e9 << ","; + } + } } if (show_core_output) diff --git a/src/tpmi.cpp b/src/tpmi.cpp new file mode 100644 index 00000000..5b2cfee9 --- /dev/null +++ b/src/tpmi.cpp @@ -0,0 +1,439 @@ +// SPDX-License-Identifier: BSD-3-Clause +// Copyright (c) 2023, Intel Corporation +// written by Roman Dementiev +// + +#include "tpmi.h" +#include "pci.h" +#include "utils.h" +#include +#include +#include +#ifdef __linux__ +#include +#include +#endif + +namespace pcm { + +constexpr uint32 TPMIInvalidValue = ~0U; + +bool TPMIverbose = false; + +class PFSInstances +{ +public: + // [TPMI ID][entry] -> base address + typedef std::unordered_map > PFSMapType; + // [PFS instance][TPMI ID][entry] -> base address + typedef std::vector PFSInstancesType; +private: + static std::shared_ptr PFSInstancesSingleton; +public: + static PFSInstancesType & get() + { + if (PFSInstancesSingleton.get()) + { + return *PFSInstancesSingleton.get(); + } + // PFSInstancesSingleton not initialized, let us initialize it + auto PFSInstancesSingletonInit = std::make_shared(); + + processDVSEC([](const VSEC & vsec) + { + return vsec.fields.cap_id == 0xb // Vendor Specific DVSEC + && vsec.fields.vsec_id == 0x42; // TPMI PM_Features + }, [&](const uint64 bar, const VSEC & vsec) + { + struct PFS + { + uint64 TPMI_ID:8; + uint64 NumEntries:8; + uint64 EntrySize:16; + uint64 CapOffset:16; + uint64 Attribute:2; + uint64 Reserved:14; + }; + static_assert(sizeof(PFS) == sizeof(uint64), "sizeof(PFS) != sizeof(uint64)"); + assert(vsec.fields.EntrySize == 2); + std::vector pfsArray(vsec.fields.NumEntries); + try { + mmio_memcpy(&(pfsArray[0]), bar + vsec.fields.Address, vsec.fields.NumEntries * sizeof(PFS), true, true); + } catch (std::runtime_error & e) + { + std::cerr << "Can't read PFS\n"; + std::cerr << e.what(); + } + PFSInstancesSingletonInit->push_back(PFSMapType()); + for (const auto & pfs : pfsArray) + { + if (TPMIverbose) + { + std::cout << "PFS" << + "\t TPMI_ID: " << pfs.TPMI_ID << + "\t NumEntries: " << pfs.NumEntries << + "\t EntrySize: " << pfs.EntrySize << + "\t CapOffset: " << pfs.CapOffset << + "\t Attribute: " << pfs.Attribute << + "\n"; + } + for (uint64 p = 0; p < pfs.NumEntries; ++p) + { + uint32 reg0 = 0; + const auto addr = bar + vsec.fields.Address + pfs.CapOffset * 1024ULL + p * pfs.EntrySize * sizeof(uint32); + try { + mmio_memcpy(®0, addr, sizeof(uint32), false, true); + } catch (std::runtime_error & e) + { + if (TPMIverbose) + { + std::cout << "can't read entry " << p << "\n"; + std::cout << e.what(); + } + PFSInstancesSingletonInit->back()[pfs.TPMI_ID].push_back(addr); + continue; + } + if (reg0 == TPMIInvalidValue) + { + if (TPMIverbose) + { + std::cout << "invalid entry " << p << "\n"; + } + } + else + { + if (TPMIverbose) + { + std::cout << "Entry "<< p << std::hex; + for (uint64 i_offset = 0; i_offset < pfs.EntrySize * sizeof(uint32); i_offset += sizeof(uint64)) + { + uint64 reg = 0; + mmio_memcpy(®, addr + i_offset, sizeof(uint64), false); + std::cout << " register "<< i_offset << " = " << reg; + } + std::cout << std::dec << "\n"; + } + PFSInstancesSingletonInit->back()[pfs.TPMI_ID].push_back(addr); + } + } + } + }); + PFSInstancesSingleton = PFSInstancesSingletonInit; + return *PFSInstancesSingleton.get(); + } +}; + +std::shared_ptr PFSInstances::PFSInstancesSingleton; + +class TPMIHandleMMIO : public TPMIHandleInterface +{ + TPMIHandleMMIO(const TPMIHandleMMIO&) = delete; + TPMIHandleMMIO& operator = (const TPMIHandleMMIO&) = delete; + struct Entry + { + std::shared_ptr range; + size_t offset; + }; + std::vector entries; +public: + static size_t getNumInstances(); + static void setVerbose(const bool); + TPMIHandleMMIO(const size_t instance_, const size_t ID_, const size_t offset_, const bool readonly_ = true); + size_t getNumEntries() const override + { + return entries.size(); + } + uint64 read64(size_t entryPos) override; + void write64(size_t entryPos, uint64 val) override; +}; + +size_t TPMIHandleMMIO::getNumInstances() +{ + return PFSInstances::get().size(); +} + +void TPMIHandle::setVerbose(const bool v) +{ + TPMIverbose = v; +} + +TPMIHandleMMIO::TPMIHandleMMIO(const size_t instance_, const size_t ID_, const size_t requestedRelativeOffset, const bool readonly_) +{ + auto & pfsInstances = PFSInstances::get(); + assert(instance_ < pfsInstances.size()); + for (const auto & addr: pfsInstances[instance_][ID_]) + { + const auto requestedAddr = addr + requestedRelativeOffset; + const auto baseAddr = roundDownTo4K(requestedAddr); + const auto baseOffset = requestedAddr - baseAddr; + Entry e; + e.range = std::make_shared(baseAddr, 4096ULL, readonly_); + e.offset = baseOffset; + entries.push_back(e); + } +} + +uint64 TPMIHandleMMIO::read64(size_t entryPos) +{ + assert(entryPos < entries.size()); + return entries[entryPos].range->read64(entries[entryPos].offset); +} + +void TPMIHandleMMIO::write64(size_t entryPos, uint64 val) +{ + assert(entryPos < entries.size()); + entries[entryPos].range->write64(entries[entryPos].offset, val); +} + +#ifdef __linux__ +class TPMIHandleDriver : public TPMIHandleInterface +{ + TPMIHandleDriver(const TPMIHandleDriver&) = delete; + TPMIHandleDriver& operator = (const TPMIHandleDriver&) = delete; + static std::vector instancePaths; + typedef std::unordered_map TPMI_IDPathMap; + static std::vector AllIDPaths; + static int available; + static bool isAvailable(); + const size_t instance; + const size_t ID; + const size_t offset; + const bool readonly; + size_t nentries; + struct TPMIEntry { + unsigned int offset{0}; + std::vector data; + }; + + size_t findValidIndex(const std::vector & entries, const size_t & entryPos) + { + size_t validIndex = 0; + for (size_t i = 0; i < entries.size(); ++i) + { + if (entries[i].data.empty() || entries[i].data[0] == TPMIInvalidValue) + { + // invalid, skip it + continue; + } + if (validIndex == entryPos) + { + // found the right instance + return i; + } + ++validIndex; + } + assert(0 && "TPMIHandleDriver: entryPos not found"); + return 0; + } + std::vector readTPMIFile(std::string filePath) + { + filePath += "/mem_dump"; + std::vector entries; + std::ifstream file(filePath); + std::string line; + + if (!file.is_open()) { + std::cerr << "Error opening file: " << filePath << std::endl; + return entries; + } + + TPMIEntry currentEntry; + while (getline(file, line)) { + if (line.find("TPMI Instance:") != std::string::npos) { + // If we have a previous instance, push it back to the vector + if (!currentEntry.data.empty()) { + entries.push_back(currentEntry); + currentEntry.data.clear(); + } + + std::istringstream iss(line); + std::string temp; + iss >> temp >> temp >> temp; // Skip "TPMI Instance:" + iss >> temp; // Skip entry number + iss >> temp >> std::hex >> currentEntry.offset; // Read offset + } else { + std::istringstream iss(line); + std::string address; + iss >> address; // Skip the address part + + uint32_t value; + while (iss >> std::hex >> value) { + currentEntry.data.push_back(value); + } + } + } + + // Push the last instance if it exists + if (!currentEntry.data.empty()) { + entries.push_back(currentEntry); + } + + return entries; + } +public: + static size_t getNumInstances(); + TPMIHandleDriver(const size_t instance_, const size_t ID_, const size_t offset_, const bool readonly_ = true) : + instance(instance_), + ID(ID_), + offset(offset_), + readonly(readonly_), + nentries(0) + { + assert(available > 0); + assert(instance < getNumInstances()); + const auto entries = readTPMIFile(AllIDPaths[instance][ID]); + for (auto & e: entries) + { + if (e.data.empty() == false && e.data[0] != TPMIInvalidValue) + { + // count valid entries + ++nentries; + } + } + } + size_t getNumEntries() const override + { + assert(available > 0); + return nentries; + } + uint64 read64(size_t entryPos) override + { + assert(available > 0); + assert(instance < getNumInstances()); + const auto entries = readTPMIFile(AllIDPaths[instance][ID]); + size_t i = findValidIndex(entries, entryPos); + cvt_ds result; + const auto i4 = offset / 4; + assert(i4 + 1 < entries[i].data.size()); + result.ui32.low = entries[i].data[i4]; + result.ui32.high = entries[i].data[i4 + 1]; + return result.ui64; + } + void write64(size_t entryPos, uint64 val) override + { + assert(available > 0); + assert(instance < getNumInstances()); + const auto entries = readTPMIFile(AllIDPaths[instance][ID]); + size_t i = findValidIndex(entries, entryPos); + cvt_ds out; + out.ui64 = val; + const auto path = AllIDPaths[instance][ID] + "/mem_write"; + writeSysFS(path.c_str(), std::to_string(i) + "," + std::to_string(offset) + "," + std::to_string(out.ui32.low)); + writeSysFS(path.c_str(), std::to_string(i) + "," + std::to_string(offset + 4) + "," + std::to_string(out.ui32.high)); + } +}; + +int TPMIHandleDriver::available = -1; +std::vector TPMIHandleDriver::instancePaths; +std::vector TPMIHandleDriver::AllIDPaths; + +bool TPMIHandleDriver::isAvailable() +{ + if (available < 0) // not initialized yet + { + auto findPathsFromPattern = [](const char* pattern) + { + std::vector result; + glob_t glob_result; + memset(&glob_result, 0, sizeof(glob_result)); + if (glob(pattern, GLOB_TILDE, nullptr, &glob_result) == 0) + { + for (size_t i = 0; i < glob_result.gl_pathc; ++i) + { + result.push_back(glob_result.gl_pathv[i]); + } + } + globfree(&glob_result); + return result; + }; + instancePaths = findPathsFromPattern("/sys/kernel/debug/tpmi-*"); + std::sort(instancePaths.begin(), instancePaths.end()); + for (size_t i = 0; i < instancePaths.size(); ++i) + { + // std::cout << instancePaths[i] << std::endl; + std::string prefix = instancePaths[i] + "/tpmi-id-"; + std::vector IDPaths = findPathsFromPattern((prefix + "*").c_str()); + TPMI_IDPathMap idMap; + for (auto & p : IDPaths) + { + const auto id = read_number((std::string("0x") + p.substr(prefix.size())).c_str()); + // std::cout << p << " -> " << id << std::endl; + idMap[id] = p; + std::ifstream mem_dump((p + "/mem_dump").c_str()); + std::ifstream mem_write((p + "/mem_write").c_str()); + if (mem_dump.good() && mem_write.good()) + { + available = 1; + } + } + AllIDPaths.push_back(idMap); + } + if (available < 0) + { + available = 0; + } + if (safe_getenv("PCM_NO_TPMI_DRIVER") == std::string("1")) + { + available = 0; + } + } + return available > 0; +} + +size_t TPMIHandleDriver::getNumInstances() +{ + // std::cout << "isAvailable: " << isAvailable() << std::endl; + if (isAvailable()) + { + return AllIDPaths.size(); + } + return 0; +} + +#endif + +size_t TPMIHandle::getNumInstances() +{ + #ifdef __linux__ + const auto tpmiNInstances = TPMIHandleDriver::getNumInstances(); + if (tpmiNInstances) + { + return tpmiNInstances; + } + #endif + return TPMIHandleMMIO::getNumInstances(); +} + +TPMIHandle::TPMIHandle(const size_t instance_, const size_t ID_, const size_t requestedRelativeOffset, const bool readonly_) +{ + #ifdef __linux__ + const auto tpmiNInstances = TPMIHandleDriver::getNumInstances(); + if (tpmiNInstances) + { + impl = std::make_shared(instance_, ID_, requestedRelativeOffset, readonly_); + return; + } + #endif + impl = std::make_shared(instance_, ID_, requestedRelativeOffset, readonly_); +} + +size_t TPMIHandle::getNumEntries() const +{ + assert(impl.get());; + return impl->getNumEntries(); +} + +uint64 TPMIHandle::read64(size_t entryPos) +{ + assert(impl.get()); + return impl->read64(entryPos); +} + +void TPMIHandle::write64(size_t entryPos, uint64 val) +{ + assert(impl.get()); + impl->write64(entryPos, val); +} + + +} // namespace pcm diff --git a/src/tpmi.h b/src/tpmi.h new file mode 100644 index 00000000..d0e1d65f --- /dev/null +++ b/src/tpmi.h @@ -0,0 +1,40 @@ +// SPDX-License-Identifier: BSD-3-Clause +// Copyright (c) 2023, Intel Corporation +// written by Roman Dementiev +// + +#pragma once + +/*! \file tpmi.h + \brief Interface to access TPMI registers + +*/ + +#include "mmio.h" +#include + +namespace pcm { + +class TPMIHandleInterface +{ +public: + virtual size_t getNumEntries() const = 0; + virtual uint64 read64(size_t entryPos) = 0; + virtual void write64(size_t entryPos, uint64 val) = 0; +}; + +class TPMIHandle : public TPMIHandleInterface +{ + TPMIHandle(const TPMIHandle&) = delete; + TPMIHandle& operator = (const TPMIHandle&) = delete; + std::shared_ptr impl; +public: + static size_t getNumInstances(); + static void setVerbose(const bool); + TPMIHandle(const size_t instance_, const size_t ID_, const size_t offset_, const bool readonly_ = true); + size_t getNumEntries() const override; + uint64 read64(size_t entryPos) override; + void write64(size_t entryPos, uint64 val) override; +}; + +} // namespace pcm diff --git a/src/types.h b/src/types.h index 3f911d2e..63a54b60 100644 --- a/src/types.h +++ b/src/types.h @@ -137,6 +137,14 @@ constexpr auto HSX_L2_RQSTS_REFERENCES_UMASK = 0xff; #define SKL_MEM_LOAD_RETIRED_L2_HIT_EVTNR (0xD1) #define SKL_MEM_LOAD_RETIRED_L2_HIT_UMASK (0x02) +// Crestmont on-core events + +constexpr auto CMT_MEM_LOAD_RETIRED_L2_MISS_EVTNR = 0xD1; +constexpr auto CMT_MEM_LOAD_RETIRED_L2_MISS_UMASK = 0x80; + +constexpr auto CMT_MEM_LOAD_RETIRED_L2_HIT_EVTNR = 0xD1; +constexpr auto CMT_MEM_LOAD_RETIRED_L2_HIT_UMASK = 0x02; + // architectural on-core events #define ARCH_LLC_REFERENCE_EVTNR (0x2E) @@ -632,6 +640,10 @@ struct BecktonUncorePMUCNTCTLRegister #define XPF_HA_PCI_PMON_CTR2_ADDR (0xA0 + 8*2) #define XPF_HA_PCI_PMON_CTR3_ADDR (0xA0 + 8*3) +constexpr auto BHS_PCIE_GEN5_PCI_PMON_BOX_CTL_ADDR = 0x620; +constexpr auto BHS_PCIE_GEN5_PCI_PMON_CTL0_ADDR = 0x630; +constexpr auto BHS_PCIE_GEN5_PCI_PMON_CTR0_ADDR = 0x650; + /** * XPF_ for Xeons: SNB, IVT, HSX, BDW, etc. * KNX_ for Xeon Phi (Knights *) processors @@ -703,6 +715,8 @@ struct BecktonUncorePMUCNTCTLRegister #define SERVER_MC_CH_PMON_FIXED_CTL_OFFSET (0x54) #define SERVER_MC_CH_PMON_FIXED_CTR_OFFSET (0x38) +constexpr auto BHS_MC_CH_PMON_BASE_ADDR = 0x024e800; + #define JKTIVT_QPI_PORT0_REGISTER_DEV_ADDR (8) #define JKTIVT_QPI_PORT0_REGISTER_FUNC_ADDR (2) #define JKTIVT_QPI_PORT1_REGISTER_DEV_ADDR (9) @@ -750,6 +764,25 @@ struct BecktonUncorePMUCNTCTLRegister #define SPR_QPI_PORT3_REGISTER_DEV_ADDR (4) #define SPR_QPI_PORT3_REGISTER_FUNC_ADDR (1) + +constexpr auto BHS_QPI_PORT0_REGISTER_DEV_ADDR = 16; +constexpr auto BHS_QPI_PORT0_REGISTER_FUNC_ADDR = 1; + +constexpr auto BHS_QPI_PORT1_REGISTER_DEV_ADDR = 17; +constexpr auto BHS_QPI_PORT1_REGISTER_FUNC_ADDR = 1; + +constexpr auto BHS_QPI_PORT2_REGISTER_DEV_ADDR = 18; +constexpr auto BHS_QPI_PORT2_REGISTER_FUNC_ADDR = 1; + +constexpr auto BHS_QPI_PORT3_REGISTER_DEV_ADDR = 19; +constexpr auto BHS_QPI_PORT3_REGISTER_FUNC_ADDR = 1; + +constexpr auto BHS_QPI_PORT4_REGISTER_DEV_ADDR = 20; +constexpr auto BHS_QPI_PORT4_REGISTER_FUNC_ADDR = 1; + +constexpr auto BHS_QPI_PORT5_REGISTER_DEV_ADDR = 21; +constexpr auto BHS_QPI_PORT5_REGISTER_FUNC_ADDR = 1; + #define QPI_PORT0_MISC_REGISTER_FUNC_ADDR (0) #define QPI_PORT1_MISC_REGISTER_FUNC_ADDR (0) #define QPI_PORT2_MISC_REGISTER_FUNC_ADDR (0) @@ -840,6 +873,47 @@ constexpr auto SERVER_HBM_M2M_14_REGISTER_FUNC_ADDR = 4; constexpr auto SERVER_HBM_M2M_15_REGISTER_DEV_ADDR = 15; constexpr auto SERVER_HBM_M2M_15_REGISTER_FUNC_ADDR = 4; + +// BHS B2CMI (M2M) +constexpr auto BHS_M2M_0_REGISTER_DEV_ADDR = 5; +constexpr auto BHS_M2M_0_REGISTER_FUNC_ADDR = 1; +constexpr auto BHS_M2M_1_REGISTER_DEV_ADDR = 5; +constexpr auto BHS_M2M_1_REGISTER_FUNC_ADDR = 2; +constexpr auto BHS_M2M_2_REGISTER_DEV_ADDR = 5; +constexpr auto BHS_M2M_2_REGISTER_FUNC_ADDR = 3; +constexpr auto BHS_M2M_3_REGISTER_DEV_ADDR = 5; +constexpr auto BHS_M2M_3_REGISTER_FUNC_ADDR = 4; +constexpr auto BHS_M2M_4_REGISTER_DEV_ADDR = 5; +constexpr auto BHS_M2M_4_REGISTER_FUNC_ADDR = 5; +constexpr auto BHS_M2M_5_REGISTER_DEV_ADDR = 5; +constexpr auto BHS_M2M_5_REGISTER_FUNC_ADDR = 6; +constexpr auto BHS_M2M_6_REGISTER_DEV_ADDR = 5; +constexpr auto BHS_M2M_6_REGISTER_FUNC_ADDR = 7; +constexpr auto BHS_M2M_7_REGISTER_DEV_ADDR = 6; +constexpr auto BHS_M2M_7_REGISTER_FUNC_ADDR = 1; +constexpr auto BHS_M2M_8_REGISTER_DEV_ADDR = 6; +constexpr auto BHS_M2M_8_REGISTER_FUNC_ADDR = 2; +constexpr auto BHS_M2M_9_REGISTER_DEV_ADDR = 6; +constexpr auto BHS_M2M_9_REGISTER_FUNC_ADDR = 3; +constexpr auto BHS_M2M_10_REGISTER_DEV_ADDR = 6; +constexpr auto BHS_M2M_10_REGISTER_FUNC_ADDR = 4; +constexpr auto BHS_M2M_11_REGISTER_DEV_ADDR = 6; +constexpr auto BHS_M2M_11_REGISTER_FUNC_ADDR = 5; + +// BHS B2UPI (M3UPI) +constexpr auto BHS_M3UPI_PORT0_REGISTER_DEV_ADDR = 24; +constexpr auto BHS_M3UPI_PORT1_REGISTER_DEV_ADDR = 25; +constexpr auto BHS_M3UPI_PORT2_REGISTER_DEV_ADDR = 26; +constexpr auto BHS_M3UPI_PORT3_REGISTER_DEV_ADDR = 27; +constexpr auto BHS_M3UPI_PORT4_REGISTER_DEV_ADDR = 28; +constexpr auto BHS_M3UPI_PORT5_REGISTER_DEV_ADDR = 29; +constexpr auto BHS_M3UPI_PORT0_REGISTER_FUNC_ADDR = 0; +constexpr auto BHS_M3UPI_PORT1_REGISTER_FUNC_ADDR = 0; +constexpr auto BHS_M3UPI_PORT2_REGISTER_FUNC_ADDR = 0; +constexpr auto BHS_M3UPI_PORT3_REGISTER_FUNC_ADDR = 0; +constexpr auto BHS_M3UPI_PORT4_REGISTER_FUNC_ADDR = 0; +constexpr auto BHS_M3UPI_PORT5_REGISTER_FUNC_ADDR = 0; + #define SKX_M2M_PCI_PMON_BOX_CTL_ADDR (0x258) #define SKX_M2M_PCI_PMON_CTL0_ADDR (0x228) @@ -886,6 +960,18 @@ constexpr auto ICX_M3UPI_PCI_PMON_CTR1_ADDR = (0xB0); constexpr auto ICX_M3UPI_PCI_PMON_CTR2_ADDR = (0xB8); constexpr auto ICX_M3UPI_PCI_PMON_CTR3_ADDR = (0xC0); +constexpr auto BHS_M3UPI_PCI_PMON_BOX_CTL_ADDR = (0x408); + +constexpr auto BHS_M3UPI_PCI_PMON_CTL0_ADDR = (0x430); +constexpr auto BHS_M3UPI_PCI_PMON_CTL1_ADDR = (0x438); +constexpr auto BHS_M3UPI_PCI_PMON_CTL2_ADDR = (0x440); +constexpr auto BHS_M3UPI_PCI_PMON_CTL3_ADDR = (0x448); + +constexpr auto BHS_M3UPI_PCI_PMON_CTR0_ADDR = (0x410); +constexpr auto BHS_M3UPI_PCI_PMON_CTR1_ADDR = (0x418); +constexpr auto BHS_M3UPI_PCI_PMON_CTR2_ADDR = (0x420); +constexpr auto BHS_M3UPI_PCI_PMON_CTR3_ADDR = (0x428); + constexpr auto MSR_UNCORE_PMON_GLOBAL_CTL = 0x700; constexpr auto IVT_MSR_UNCORE_PMON_GLOBAL_CTL = 0x0C00; @@ -952,6 +1038,16 @@ constexpr auto SPR_UBOX_MSR_PMON_CTL1_ADDR = 0x2FD3; constexpr auto SPR_UBOX_MSR_PMON_CTR0_ADDR = 0X2FD8; constexpr auto SPR_UBOX_MSR_PMON_CTR1_ADDR = 0X2FD9; +constexpr auto BHS_UCLK_FIXED_CTR_ADDR = 0x3FFD; +constexpr auto BHS_UCLK_FIXED_CTL_ADDR = 0x3FFE; +constexpr auto BHS_UBOX_MSR_PMON_BOX_CTL_ADDR = 0x3FF0; +constexpr auto BHS_UBOX_MSR_PMON_CTL0_ADDR = 0x3FF2; +constexpr auto BHS_UBOX_MSR_PMON_CTL1_ADDR = 0x3FF3; +constexpr auto BHS_UBOX_MSR_PMON_CTR0_ADDR = 0x3FF8; +constexpr auto BHS_UBOX_MSR_PMON_CTR1_ADDR = 0x3FF9; + + + constexpr auto JKTIVT_UCLK_FIXED_CTR_ADDR = (0x0C09); constexpr auto JKTIVT_UCLK_FIXED_CTL_ADDR = (0x0C08); constexpr auto JKTIVT_UBOX_MSR_PMON_CTL0_ADDR = (0x0C10); @@ -1180,6 +1276,29 @@ static const uint32 ICX_IIO_UNIT_CTL[] = { 0x0A50, 0x0A70, 0x0A90, 0x0AE0, 0x0B00, 0x0B20 }; + +static const uint32 BHS_IRP_UNIT_CTL[] = { + 0x2A00, + 0x2A10, + 0x2A20, + 0x2A30, + 0x2A40, + 0x2A50, + 0x2A60, + 0x2A70, + 0x2A80, + 0x2A90, + 0x2AA0, + 0x2AB0, + 0x2AC0, + 0x2AD0, + 0x2AE0, + 0x2AF0 +}; + +#define BHS_IRP_CTL_REG_OFFSET (0x0002) +#define BHS_IRP_CTR_REG_OFFSET (0x0008) + static const uint32 SPR_IRP_UNIT_CTL[] = { 0x3400, 0x3410, @@ -1246,6 +1365,12 @@ constexpr auto SPR_M2IOSF_IIO_CTL0 = 0x3002; constexpr auto SPR_M2IOSF_REG_STEP = 0x10; constexpr auto SPR_M2IOSF_NUM = 12; +constexpr auto BHS_M2IOSF_IIO_UNIT_CTL = 0x2900; +constexpr auto BHS_M2IOSF_IIO_CTR0 = 0x2908; +constexpr auto BHS_M2IOSF_IIO_CTL0 = 0x2902; +constexpr auto BHS_M2IOSF_REG_STEP = 0x10; +constexpr auto BHS_M2IOSF_NUM = 16; + constexpr auto CXL_PMON_SIZE = 0x1000; #define IIO_MSR_PMON_CTL_EVENT(x) ((x) << 0) diff --git a/src/uncore_pmu_discovery.h b/src/uncore_pmu_discovery.h index 284324df..9d9f089d 100644 --- a/src/uncore_pmu_discovery.h +++ b/src/uncore_pmu_discovery.h @@ -16,6 +16,9 @@ constexpr auto SPR_UPILL_BOX_TYPE = 8U; constexpr auto SPR_MDF_BOX_TYPE = 11U; constexpr auto SPR_CXLCM_BOX_TYPE = 12U; constexpr auto SPR_CXLDP_BOX_TYPE = 13U; +constexpr auto BHS_MDF_BOX_TYPE = 20U; +constexpr auto BHS_PCIE_GEN5x16_TYPE = 21U; +constexpr auto BHS_PCIE_GEN5x8_TYPE = 22U; class UncorePMUDiscovery { From f783dab2370bf26844f03b236af84f14a0154f40 Mon Sep 17 00:00:00 2001 From: "Dementiev, Roman" Date: Fri, 17 May 2024 14:31:46 +0200 Subject: [PATCH 03/30] support more cpus for localMemoryRequestRatioMetric Change-Id: Ic0ffc7c87311d9850a1dcfd92fd8cb512e93e927 --- src/cpucounters.h | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/cpucounters.h b/src/cpucounters.h index 47bef3e4..851616a8 100644 --- a/src/cpucounters.h +++ b/src/cpucounters.h @@ -2455,6 +2455,9 @@ class PCM_API PCM || cpu_model == PCM::BDX || cpu_model == PCM::SKX || cpu_model == PCM::ICX + || cpu_model == PCM::SPR + || cpu_model == PCM::EMR + || cpu_model == PCM::SRF ; } From 55e8907ff4fae0274d6920898e98dfb8b63c000e Mon Sep 17 00:00:00 2001 From: "Dementiev, Roman" Date: Fri, 17 May 2024 14:42:15 +0200 Subject: [PATCH 04/30] refactoring and copyright year update Change-Id: Icff051d6f43253705c8c8cd4a6b2bb54d7e79240 --- src/cpucounters.cpp | 2 +- src/cpucounters.h | 2 +- src/pcm-accel.cpp | 2 +- src/tpmi.h | 2 +- src/types.h | 56 ++++++++++++++++++++++----------------------- 5 files changed, 31 insertions(+), 33 deletions(-) diff --git a/src/cpucounters.cpp b/src/cpucounters.cpp index 21e341d6..608f8038 100644 --- a/src/cpucounters.cpp +++ b/src/cpucounters.cpp @@ -1,5 +1,5 @@ // SPDX-License-Identifier: BSD-3-Clause -// Copyright (c) 2009-2022, Intel Corporation +// Copyright (c) 2009-2024, Intel Corporation // written by Roman Dementiev // Otto Bruggeman // Thomas Willhalm diff --git a/src/cpucounters.h b/src/cpucounters.h index 851616a8..560d9cac 100644 --- a/src/cpucounters.h +++ b/src/cpucounters.h @@ -1,5 +1,5 @@ // SPDX-License-Identifier: BSD-3-Clause -// Copyright (c) 2009-2022, Intel Corporation +// Copyright (c) 2009-2024, Intel Corporation // written by Roman Dementiev // Thomas Willhalm // and others diff --git a/src/pcm-accel.cpp b/src/pcm-accel.cpp index 8988589d..5f560114 100644 --- a/src/pcm-accel.cpp +++ b/src/pcm-accel.cpp @@ -1,5 +1,5 @@ // SPDX-License-Identifier: BSD-3-Clause -// Copyright (c) 2022, Intel Corporation +// Copyright (c) 2022-2024, Intel Corporation // written by White.Hu #include "pcm-accel-common.h" diff --git a/src/tpmi.h b/src/tpmi.h index d0e1d65f..dfee7197 100644 --- a/src/tpmi.h +++ b/src/tpmi.h @@ -1,5 +1,5 @@ // SPDX-License-Identifier: BSD-3-Clause -// Copyright (c) 2023, Intel Corporation +// Copyright (c) 2023-2024, Intel Corporation // written by Roman Dementiev // diff --git a/src/types.h b/src/types.h index 63a54b60..fad5524f 100644 --- a/src/types.h +++ b/src/types.h @@ -1,5 +1,5 @@ // SPDX-License-Identifier: BSD-3-Clause -// Copyright (c) 2009-2022, Intel Corporation +// Copyright (c) 2009-2024, Intel Corporation // written by Roman Dementiev // @@ -39,43 +39,41 @@ typedef signed int int32; #define PCM_ULIMIT_RECOMMENDATION ("try executing 'ulimit -n 1000000' to increase the limit on the number of open files.\n") /* - MSR addresses from - "Intel 64 and IA-32 Architectures Software Developers Manual Volume 3B: - System Programming Guide, Part 2", Appendix A "PERFORMANCE-MONITORING EVENTS" + MSR addresses from + "Intel 64 and IA-32 Architectures Software Developers Manual Volume 3B: + System Programming Guide, Part 2", Appendix A "PERFORMANCE-MONITORING EVENTS" */ -#define INST_RETIRED_ADDR (0x309) -#define CPU_CLK_UNHALTED_THREAD_ADDR (0x30A) -#define CPU_CLK_UNHALTED_REF_ADDR (0x30B) -#define TOPDOWN_SLOTS_ADDR (0x30C) -#define PERF_METRICS_ADDR (0x329) -#define IA32_CR_PERF_GLOBAL_CTRL (0x38F) -#define IA32_CR_FIXED_CTR_CTRL (0x38D) -#define IA32_PERFEVTSEL0_ADDR (0x186) -#define IA32_PERFEVTSEL1_ADDR (IA32_PERFEVTSEL0_ADDR + 1) -#define IA32_PERFEVTSEL2_ADDR (IA32_PERFEVTSEL0_ADDR + 2) -#define IA32_PERFEVTSEL3_ADDR (IA32_PERFEVTSEL0_ADDR + 3) - +constexpr auto INST_RETIRED_ADDR = 0x309; +constexpr auto CPU_CLK_UNHALTED_THREAD_ADDR = 0x30A; +constexpr auto CPU_CLK_UNHALTED_REF_ADDR = 0x30B; +constexpr auto TOPDOWN_SLOTS_ADDR = 0x30C; +constexpr auto PERF_METRICS_ADDR = 0x329; +constexpr auto IA32_CR_PERF_GLOBAL_CTRL = 0x38F; +constexpr auto IA32_CR_FIXED_CTR_CTRL = 0x38D; +constexpr auto IA32_PERFEVTSEL0_ADDR = 0x186; +constexpr auto IA32_PERFEVTSEL1_ADDR = IA32_PERFEVTSEL0_ADDR + 1; +constexpr auto IA32_PERFEVTSEL2_ADDR = IA32_PERFEVTSEL0_ADDR + 2; +constexpr auto IA32_PERFEVTSEL3_ADDR = IA32_PERFEVTSEL0_ADDR + 3; constexpr auto IA32_PERF_GLOBAL_STATUS = 0x38E; constexpr auto IA32_PERF_GLOBAL_OVF_CTRL = 0x390; constexpr auto IA32_PEBS_ENABLE_ADDR = 0x3F1; -#define PERF_MAX_FIXED_COUNTERS (3) -#define PERF_MAX_CUSTOM_COUNTERS (8) -#define PERF_TOPDOWN_COUNTERS_L1 (5) -#define PERF_TOPDOWN_COUNTERS (PERF_TOPDOWN_COUNTERS_L1 + 4) -#define PERF_MAX_COUNTERS (PERF_MAX_FIXED_COUNTERS + PERF_MAX_CUSTOM_COUNTERS + PERF_TOPDOWN_COUNTERS) - -#define IA32_DEBUGCTL (0x1D9) +constexpr auto PERF_MAX_FIXED_COUNTERS = 3; +constexpr auto PERF_MAX_CUSTOM_COUNTERS = 8; +constexpr auto PERF_TOPDOWN_COUNTERS_L1 = 5; +constexpr auto PERF_TOPDOWN_COUNTERS = PERF_TOPDOWN_COUNTERS_L1 + 4; +constexpr auto PERF_MAX_COUNTERS = PERF_MAX_FIXED_COUNTERS + PERF_MAX_CUSTOM_COUNTERS + PERF_TOPDOWN_COUNTERS; -#define IA32_PMC0 (0xC1) -#define IA32_PMC1 (0xC1 + 1) -#define IA32_PMC2 (0xC1 + 2) -#define IA32_PMC3 (0xC1 + 3) +constexpr auto IA32_DEBUGCTL = 0x1D9; -#define MSR_OFFCORE_RSP0 (0x1A6) -#define MSR_OFFCORE_RSP1 (0x1A7) +constexpr auto IA32_PMC0 = 0xC1; +constexpr auto IA32_PMC1 = IA32_PMC0 + 1; +constexpr auto IA32_PMC2 = IA32_PMC0 + 2; +constexpr auto IA32_PMC3 = IA32_PMC0 + 3; +constexpr auto MSR_OFFCORE_RSP0 = 0x1A6; +constexpr auto MSR_OFFCORE_RSP1 = 0x1A7; constexpr auto MSR_LOAD_LATENCY = 0x3F6; constexpr auto MSR_FRONTEND = 0x3F7; From 25b737a71c558f20192e5116ef53a0c8e3e7f903 Mon Sep 17 00:00:00 2001 From: "Dementiev, Roman" Date: Fri, 17 May 2024 14:47:07 +0200 Subject: [PATCH 05/30] refactoring Change-Id: Ifa648f16f089446e33b30b390326b33fdcce74b8 --- src/types.h | 41 ++++++++++++++++++++--------------------- 1 file changed, 20 insertions(+), 21 deletions(-) diff --git a/src/types.h b/src/types.h index fad5524f..537c5323 100644 --- a/src/types.h +++ b/src/types.h @@ -78,42 +78,41 @@ constexpr auto MSR_LOAD_LATENCY = 0x3F6; constexpr auto MSR_FRONTEND = 0x3F7; /* From Table B-5. of the above mentioned document */ -#define PLATFORM_INFO_ADDR (0xCE) +constexpr auto PLATFORM_INFO_ADDR = 0xCE; -#define IA32_TIME_STAMP_COUNTER (0x10) +constexpr auto IA32_TIME_STAMP_COUNTER = 0x10; // Event IDs // Nehalem/Westmere on-core events -#define MEM_LOAD_RETIRED_L3_MISS_EVTNR (0xCB) -#define MEM_LOAD_RETIRED_L3_MISS_UMASK (0x10) +constexpr auto MEM_LOAD_RETIRED_L3_MISS_EVTNR = 0xCB; +constexpr auto MEM_LOAD_RETIRED_L3_MISS_UMASK = 0x10; -#define MEM_LOAD_RETIRED_L3_UNSHAREDHIT_EVTNR (0xCB) -#define MEM_LOAD_RETIRED_L3_UNSHAREDHIT_UMASK (0x04) +constexpr auto MEM_LOAD_RETIRED_L3_UNSHAREDHIT_EVTNR = 0xCB; +constexpr auto MEM_LOAD_RETIRED_L3_UNSHAREDHIT_UMASK = 0x04; -#define MEM_LOAD_RETIRED_L2_HITM_EVTNR (0xCB) -#define MEM_LOAD_RETIRED_L2_HITM_UMASK (0x08) +constexpr auto MEM_LOAD_RETIRED_L2_HITM_EVTNR = 0xCB; +constexpr auto MEM_LOAD_RETIRED_L2_HITM_UMASK = 0x08; -#define MEM_LOAD_RETIRED_L2_HIT_EVTNR (0xCB) -#define MEM_LOAD_RETIRED_L2_HIT_UMASK (0x02) +constexpr auto MEM_LOAD_RETIRED_L2_HIT_EVTNR = 0xCB; +constexpr auto MEM_LOAD_RETIRED_L2_HIT_UMASK = 0x02; // Sandy Bridge on-core events -#define MEM_LOAD_UOPS_MISC_RETIRED_LLC_MISS_EVTNR (0xD4) -#define MEM_LOAD_UOPS_MISC_RETIRED_LLC_MISS_UMASK (0x02) +constexpr auto MEM_LOAD_UOPS_MISC_RETIRED_LLC_MISS_EVTNR = 0xD4; +constexpr auto MEM_LOAD_UOPS_MISC_RETIRED_LLC_MISS_UMASK = 0x02; -#define MEM_LOAD_UOPS_LLC_HIT_RETIRED_XSNP_NONE_EVTNR (0xD2) -#define MEM_LOAD_UOPS_LLC_HIT_RETIRED_XSNP_NONE_UMASK (0x08) +constexpr auto MEM_LOAD_UOPS_LLC_HIT_RETIRED_XSNP_NONE_EVTNR = 0xD2; +constexpr auto MEM_LOAD_UOPS_LLC_HIT_RETIRED_XSNP_NONE_UMASK = 0x08; -#define MEM_LOAD_UOPS_LLC_HIT_RETIRED_XSNP_HITM_EVTNR (0xD2) -#define MEM_LOAD_UOPS_LLC_HIT_RETIRED_XSNP_HITM_UMASK (0x04) +constexpr auto MEM_LOAD_UOPS_LLC_HIT_RETIRED_XSNP_HITM_EVTNR = 0xD2; +constexpr auto MEM_LOAD_UOPS_LLC_HIT_RETIRED_XSNP_HITM_UMASK = 0x04; -#define MEM_LOAD_UOPS_LLC_HIT_RETIRED_XSNP_EVTNR (0xD2) -#define MEM_LOAD_UOPS_LLC_HIT_RETIRED_XSNP_UMASK (0x07) - -#define MEM_LOAD_UOPS_RETIRED_L2_HIT_EVTNR (0xD1) -#define MEM_LOAD_UOPS_RETIRED_L2_HIT_UMASK (0x02) +constexpr auto MEM_LOAD_UOPS_LLC_HIT_RETIRED_XSNP_EVTNR = 0xD2; +constexpr auto MEM_LOAD_UOPS_LLC_HIT_RETIRED_XSNP_UMASK = 0x07; +constexpr auto MEM_LOAD_UOPS_RETIRED_L2_HIT_EVTNR = 0xD1; +constexpr auto MEM_LOAD_UOPS_RETIRED_L2_HIT_UMASK = 0x02; // Haswell on-core events constexpr auto HSX_L2_RQSTS_MISS_EVTNR = 0x24; From 2c97610ddd4b477d18b0fb65ab7396e378bf4c96 Mon Sep 17 00:00:00 2001 From: "Dementiev, Roman" Date: Fri, 17 May 2024 15:03:58 +0200 Subject: [PATCH 06/30] refactoring Change-Id: Idb5293444ab1947d49b6af3c61d4ce0806e7c35e --- src/types.h | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/src/types.h b/src/types.h index 537c5323..8a292bb2 100644 --- a/src/types.h +++ b/src/types.h @@ -143,13 +143,11 @@ constexpr auto CMT_MEM_LOAD_RETIRED_L2_HIT_EVTNR = 0xD1; constexpr auto CMT_MEM_LOAD_RETIRED_L2_HIT_UMASK = 0x02; // architectural on-core events +constexpr auto ARCH_LLC_REFERENCE_EVTNR = 0x2E; +constexpr auto ARCH_LLC_REFERENCE_UMASK = 0x4F; -#define ARCH_LLC_REFERENCE_EVTNR (0x2E) -#define ARCH_LLC_REFERENCE_UMASK (0x4F) - -#define ARCH_LLC_MISS_EVTNR (0x2E) -#define ARCH_LLC_MISS_UMASK (0x41) - +constexpr auto ARCH_LLC_MISS_EVTNR = 0x2E; +constexpr auto ARCH_LLC_MISS_UMASK = 0x41; // Atom on-core events #define ATOM_MEM_LOAD_RETIRED_L2_HIT_EVTNR (0xCB) From b709d32ed14eda3d8cbfce902189689c764dcb61 Mon Sep 17 00:00:00 2001 From: "Dementiev, Roman" Date: Fri, 17 May 2024 15:05:30 +0200 Subject: [PATCH 07/30] refactoring Change-Id: I8fc6abe4dbd37dba42ab504caf45abd925f129e4 --- src/types.h | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) diff --git a/src/types.h b/src/types.h index 8a292bb2..7305924b 100644 --- a/src/types.h +++ b/src/types.h @@ -169,13 +169,12 @@ constexpr auto ARCH_LLC_MISS_UMASK = 0x41; #define ATOM_MEM_LOAD_RETIRED_L2_MISS_UMASK (0x02) // Offcore response events -#define OFFCORE_RESPONSE_0_EVTNR (0xB7) -#define OFFCORE_RESPONSE_1_EVTNR (0xBB) -#define GLC_OFFCORE_RESPONSE_0_EVTNR (0x2A) -#define GLC_OFFCORE_RESPONSE_1_EVTNR (0x2B) -#define OFFCORE_RESPONSE_0_UMASK (1) -#define OFFCORE_RESPONSE_1_UMASK (1) - +constexpr auto OFFCORE_RESPONSE_0_EVTNR = 0xB7; +constexpr auto OFFCORE_RESPONSE_1_EVTNR = 0xBB; +constexpr auto GLC_OFFCORE_RESPONSE_0_EVTNR = 0x2A; +constexpr auto GLC_OFFCORE_RESPONSE_1_EVTNR = 0x2B; +constexpr auto OFFCORE_RESPONSE_0_UMASK = 1; +constexpr auto OFFCORE_RESPONSE_1_UMASK = 1; constexpr auto LOAD_LATENCY_EVTNR = 0xcd; constexpr auto LOAD_LATENCY_UMASK = 0x01; From 2569257aed32bc477ea6d2904504c774b9ed3d1b Mon Sep 17 00:00:00 2001 From: "Dementiev, Roman" Date: Fri, 17 May 2024 15:13:56 +0200 Subject: [PATCH 08/30] refactoring Change-Id: Id9bda613bd52e32963e8cf77f4d6e17534b8b49c --- src/types.h | 86 ++++++++++++++++++++++------------------------------- 1 file changed, 35 insertions(+), 51 deletions(-) diff --git a/src/types.h b/src/types.h index 7305924b..7b370646 100644 --- a/src/types.h +++ b/src/types.h @@ -148,25 +148,13 @@ constexpr auto ARCH_LLC_REFERENCE_UMASK = 0x4F; constexpr auto ARCH_LLC_MISS_EVTNR = 0x2E; constexpr auto ARCH_LLC_MISS_UMASK = 0x41; -// Atom on-core events - -#define ATOM_MEM_LOAD_RETIRED_L2_HIT_EVTNR (0xCB) -#define ATOM_MEM_LOAD_RETIRED_L2_HIT_UMASK (0x01) - -#define ATOM_MEM_LOAD_RETIRED_L2_MISS_EVTNR (0xCB) -#define ATOM_MEM_LOAD_RETIRED_L2_MISS_UMASK (0x02) -#define ATOM_MEM_LOAD_RETIRED_L2_HIT_EVTNR (0xCB) -#define ATOM_MEM_LOAD_RETIRED_L2_HIT_UMASK (0x01) - -#define ATOM_MEM_LOAD_RETIRED_L2_MISS_EVTNR (0xCB) -#define ATOM_MEM_LOAD_RETIRED_L2_MISS_UMASK (0x02) - -#define ATOM_MEM_LOAD_RETIRED_L2_HIT_EVTNR (0xCB) -#define ATOM_MEM_LOAD_RETIRED_L2_HIT_UMASK (0x01) +// Atom on-core events +constexpr auto ATOM_MEM_LOAD_RETIRED_L2_HIT_EVTNR = 0xCB; +constexpr auto ATOM_MEM_LOAD_RETIRED_L2_HIT_UMASK = 0x01; -#define ATOM_MEM_LOAD_RETIRED_L2_MISS_EVTNR (0xCB) -#define ATOM_MEM_LOAD_RETIRED_L2_MISS_UMASK (0x02) +constexpr auto ATOM_MEM_LOAD_RETIRED_L2_MISS_EVTNR = 0xCB; +constexpr auto ATOM_MEM_LOAD_RETIRED_L2_MISS_UMASK = 0x02; // Offcore response events constexpr auto OFFCORE_RESPONSE_0_EVTNR = 0xB7; @@ -186,45 +174,41 @@ constexpr auto FRONTEND_UMASK = 0x01; */ // Uncore msrs - -#define MSR_UNCORE_PERF_GLOBAL_CTRL_ADDR (0x391) - -#define MSR_UNCORE_PERFEVTSEL0_ADDR (0x3C0) -#define MSR_UNCORE_PERFEVTSEL1_ADDR (MSR_UNCORE_PERFEVTSEL0_ADDR + 1) -#define MSR_UNCORE_PERFEVTSEL2_ADDR (MSR_UNCORE_PERFEVTSEL0_ADDR + 2) -#define MSR_UNCORE_PERFEVTSEL3_ADDR (MSR_UNCORE_PERFEVTSEL0_ADDR + 3) -#define MSR_UNCORE_PERFEVTSEL4_ADDR (MSR_UNCORE_PERFEVTSEL0_ADDR + 4) -#define MSR_UNCORE_PERFEVTSEL5_ADDR (MSR_UNCORE_PERFEVTSEL0_ADDR + 5) -#define MSR_UNCORE_PERFEVTSEL6_ADDR (MSR_UNCORE_PERFEVTSEL0_ADDR + 6) -#define MSR_UNCORE_PERFEVTSEL7_ADDR (MSR_UNCORE_PERFEVTSEL0_ADDR + 7) - - -#define MSR_UNCORE_PMC0 (0x3B0) -#define MSR_UNCORE_PMC1 (MSR_UNCORE_PMC0 + 1) -#define MSR_UNCORE_PMC2 (MSR_UNCORE_PMC0 + 2) -#define MSR_UNCORE_PMC3 (MSR_UNCORE_PMC0 + 3) -#define MSR_UNCORE_PMC4 (MSR_UNCORE_PMC0 + 4) -#define MSR_UNCORE_PMC5 (MSR_UNCORE_PMC0 + 5) -#define MSR_UNCORE_PMC6 (MSR_UNCORE_PMC0 + 6) -#define MSR_UNCORE_PMC7 (MSR_UNCORE_PMC0 + 7) +constexpr auto MSR_UNCORE_PERF_GLOBAL_CTRL_ADDR = 0x391; + +constexpr auto MSR_UNCORE_PERFEVTSEL0_ADDR = 0x3C0; +constexpr auto MSR_UNCORE_PERFEVTSEL1_ADDR = MSR_UNCORE_PERFEVTSEL0_ADDR + 1; +constexpr auto MSR_UNCORE_PERFEVTSEL2_ADDR = MSR_UNCORE_PERFEVTSEL0_ADDR + 2; +constexpr auto MSR_UNCORE_PERFEVTSEL3_ADDR = MSR_UNCORE_PERFEVTSEL0_ADDR + 3; +constexpr auto MSR_UNCORE_PERFEVTSEL4_ADDR = MSR_UNCORE_PERFEVTSEL0_ADDR + 4; +constexpr auto MSR_UNCORE_PERFEVTSEL5_ADDR = MSR_UNCORE_PERFEVTSEL0_ADDR + 5; +constexpr auto MSR_UNCORE_PERFEVTSEL6_ADDR = MSR_UNCORE_PERFEVTSEL0_ADDR + 6; +constexpr auto MSR_UNCORE_PERFEVTSEL7_ADDR = MSR_UNCORE_PERFEVTSEL0_ADDR + 7; + +constexpr auto MSR_UNCORE_PMC0 = 0x3B0; +constexpr auto MSR_UNCORE_PMC1 = MSR_UNCORE_PMC0 + 1; +constexpr auto MSR_UNCORE_PMC2 = MSR_UNCORE_PMC0 + 2; +constexpr auto MSR_UNCORE_PMC3 = MSR_UNCORE_PMC0 + 3; +constexpr auto MSR_UNCORE_PMC4 = MSR_UNCORE_PMC0 + 4; +constexpr auto MSR_UNCORE_PMC5 = MSR_UNCORE_PMC0 + 5; +constexpr auto MSR_UNCORE_PMC6 = MSR_UNCORE_PMC0 + 6; +constexpr auto MSR_UNCORE_PMC7 = MSR_UNCORE_PMC0 + 7; // Uncore event IDs +constexpr auto UNC_QMC_WRITES_FULL_ANY_EVTNR = 0x2F; +constexpr auto UNC_QMC_WRITES_FULL_ANY_UMASK = 0x07; -#define UNC_QMC_WRITES_FULL_ANY_EVTNR (0x2F) -#define UNC_QMC_WRITES_FULL_ANY_UMASK (0x07) - -#define UNC_QMC_NORMAL_READS_ANY_EVTNR (0x2C) -#define UNC_QMC_NORMAL_READS_ANY_UMASK (0x07) - -#define UNC_QHL_REQUESTS_EVTNR (0x20) +constexpr auto UNC_QMC_NORMAL_READS_ANY_EVTNR = 0x2C; +constexpr auto UNC_QMC_NORMAL_READS_ANY_UMASK = 0x07; -#define UNC_QHL_REQUESTS_IOH_READS_UMASK (0x01) -#define UNC_QHL_REQUESTS_IOH_WRITES_UMASK (0x02) -#define UNC_QHL_REQUESTS_REMOTE_READS_UMASK (0x04) -#define UNC_QHL_REQUESTS_REMOTE_WRITES_UMASK (0x08) -#define UNC_QHL_REQUESTS_LOCAL_READS_UMASK (0x10) -#define UNC_QHL_REQUESTS_LOCAL_WRITES_UMASK (0x20) +constexpr auto UNC_QHL_REQUESTS_EVTNR = 0x20; +constexpr auto UNC_QHL_REQUESTS_IOH_READS_UMASK = 0x01; +constexpr auto UNC_QHL_REQUESTS_IOH_WRITES_UMASK = 0x02; +constexpr auto UNC_QHL_REQUESTS_REMOTE_READS_UMASK = 0x04; +constexpr auto UNC_QHL_REQUESTS_REMOTE_WRITES_UMASK = 0x08; +constexpr auto UNC_QHL_REQUESTS_LOCAL_READS_UMASK = 0x10; +constexpr auto UNC_QHL_REQUESTS_LOCAL_WRITES_UMASK = 0x20; /* From "Intel(r) Xeon(r) Processor 7500 Series Uncore Programming Guide" */ From 0c38aec00c515b9fa50d257c6c7026d9cdcb1aa5 Mon Sep 17 00:00:00 2001 From: "Dementiev, Roman" Date: Fri, 17 May 2024 15:19:43 +0200 Subject: [PATCH 09/30] refactoring Change-Id: Ie0b6e5dba8586aec9cb9ceab43495a1d09d3f4b0 --- src/types.h | 166 +++++++++++++++++++++++++--------------------------- 1 file changed, 81 insertions(+), 85 deletions(-) diff --git a/src/types.h b/src/types.h index 7b370646..6d3d3483 100644 --- a/src/types.h +++ b/src/types.h @@ -193,7 +193,6 @@ constexpr auto MSR_UNCORE_PMC4 = MSR_UNCORE_PMC0 + 4; constexpr auto MSR_UNCORE_PMC5 = MSR_UNCORE_PMC0 + 5; constexpr auto MSR_UNCORE_PMC6 = MSR_UNCORE_PMC0 + 6; constexpr auto MSR_UNCORE_PMC7 = MSR_UNCORE_PMC0 + 7; - // Uncore event IDs constexpr auto UNC_QMC_WRITES_FULL_ANY_EVTNR = 0x2F; constexpr auto UNC_QMC_WRITES_FULL_ANY_UMASK = 0x07; @@ -214,90 +213,87 @@ constexpr auto UNC_QHL_REQUESTS_LOCAL_WRITES_UMASK = 0x20; */ // Beckton uncore event IDs - -#define U_MSR_PMON_GLOBAL_CTL (0x0C00) - -#define MB0_MSR_PERF_GLOBAL_CTL (0x0CA0) -#define MB0_MSR_PMU_CNT_0 (0x0CB1) -#define MB0_MSR_PMU_CNT_CTL_0 (0x0CB0) -#define MB0_MSR_PMU_CNT_1 (0x0CB3) -#define MB0_MSR_PMU_CNT_CTL_1 (0x0CB2) -#define MB0_MSR_PMU_ZDP_CTL_FVC (0x0CAB) - - -#define MB1_MSR_PERF_GLOBAL_CTL (0x0CE0) -#define MB1_MSR_PMU_CNT_0 (0x0CF1) -#define MB1_MSR_PMU_CNT_CTL_0 (0x0CF0) -#define MB1_MSR_PMU_CNT_1 (0x0CF3) -#define MB1_MSR_PMU_CNT_CTL_1 (0x0CF2) -#define MB1_MSR_PMU_ZDP_CTL_FVC (0x0CEB) - -#define BB0_MSR_PERF_GLOBAL_CTL (0x0C20) -#define BB0_MSR_PERF_CNT_1 (0x0C33) -#define BB0_MSR_PERF_CNT_CTL_1 (0x0C32) - -#define BB1_MSR_PERF_GLOBAL_CTL (0x0C60) -#define BB1_MSR_PERF_CNT_1 (0x0C73) -#define BB1_MSR_PERF_CNT_CTL_1 (0x0C72) - -#define R_MSR_PMON_CTL0 (0x0E10) -#define R_MSR_PMON_CTR0 (0x0E11) -#define R_MSR_PMON_CTL1 (0x0E12) -#define R_MSR_PMON_CTR1 (0x0E13) -#define R_MSR_PMON_CTL2 (0x0E14) -#define R_MSR_PMON_CTR2 (0x0E15) -#define R_MSR_PMON_CTL3 (0x0E16) -#define R_MSR_PMON_CTR3 (0x0E17) -#define R_MSR_PMON_CTL4 (0x0E18) -#define R_MSR_PMON_CTR4 (0x0E19) -#define R_MSR_PMON_CTL5 (0x0E1A) -#define R_MSR_PMON_CTR5 (0x0E1B) -#define R_MSR_PMON_CTL6 (0x0E1C) -#define R_MSR_PMON_CTR6 (0x0E1D) -#define R_MSR_PMON_CTL7 (0x0E1E) -#define R_MSR_PMON_CTR7 (0x0E1F) -#define R_MSR_PMON_CTL8 (0x0E30) -#define R_MSR_PMON_CTR8 (0x0E31) -#define R_MSR_PMON_CTL9 (0x0E32) -#define R_MSR_PMON_CTR9 (0x0E33) -#define R_MSR_PMON_CTL10 (0x0E34) -#define R_MSR_PMON_CTR10 (0x0E35) -#define R_MSR_PMON_CTL11 (0x0E36) -#define R_MSR_PMON_CTR11 (0x0E37) -#define R_MSR_PMON_CTL12 (0x0E38) -#define R_MSR_PMON_CTR12 (0x0E39) -#define R_MSR_PMON_CTL13 (0x0E3A) -#define R_MSR_PMON_CTR13 (0x0E3B) -#define R_MSR_PMON_CTL14 (0x0E3C) -#define R_MSR_PMON_CTR14 (0x0E3D) -#define R_MSR_PMON_CTL15 (0x0E3E) -#define R_MSR_PMON_CTR15 (0x0E3F) - -#define R_MSR_PORT0_IPERF_CFG0 (0x0E04) -#define R_MSR_PORT1_IPERF_CFG0 (0x0E05) -#define R_MSR_PORT2_IPERF_CFG0 (0x0E06) -#define R_MSR_PORT3_IPERF_CFG0 (0x0E07) -#define R_MSR_PORT4_IPERF_CFG0 (0x0E08) -#define R_MSR_PORT5_IPERF_CFG0 (0x0E09) -#define R_MSR_PORT6_IPERF_CFG0 (0x0E0A) -#define R_MSR_PORT7_IPERF_CFG0 (0x0E0B) - -#define R_MSR_PORT0_IPERF_CFG1 (0x0E24) -#define R_MSR_PORT1_IPERF_CFG1 (0x0E25) -#define R_MSR_PORT2_IPERF_CFG1 (0x0E26) -#define R_MSR_PORT3_IPERF_CFG1 (0x0E27) -#define R_MSR_PORT4_IPERF_CFG1 (0x0E28) -#define R_MSR_PORT5_IPERF_CFG1 (0x0E29) -#define R_MSR_PORT6_IPERF_CFG1 (0x0E2A) -#define R_MSR_PORT7_IPERF_CFG1 (0x0E2B) - -#define R_MSR_PMON_GLOBAL_CTL_7_0 (0x0E00) -#define R_MSR_PMON_GLOBAL_CTL_15_8 (0x0E20) - -#define W_MSR_PMON_GLOBAL_CTL (0xC80) -#define W_MSR_PMON_FIXED_CTR_CTL (0x395) -#define W_MSR_PMON_FIXED_CTR (0x394) - +constexpr auto U_MSR_PMON_GLOBAL_CTL = 0x0C00; + +constexpr auto MB0_MSR_PERF_GLOBAL_CTL = 0x0CA0; +constexpr auto MB0_MSR_PMU_CNT_0 = 0x0CB1; +constexpr auto MB0_MSR_PMU_CNT_CTL_0 = 0x0CB0; +constexpr auto MB0_MSR_PMU_CNT_1 = 0x0CB3; +constexpr auto MB0_MSR_PMU_CNT_CTL_1 = 0x0CB2; +constexpr auto MB0_MSR_PMU_ZDP_CTL_FVC = 0x0CAB; + +constexpr auto MB1_MSR_PERF_GLOBAL_CTL = 0x0CE0; +constexpr auto MB1_MSR_PMU_CNT_0 = 0x0CF1; +constexpr auto MB1_MSR_PMU_CNT_CTL_0 = 0x0CF0; +constexpr auto MB1_MSR_PMU_CNT_1 = 0x0CF3; +constexpr auto MB1_MSR_PMU_CNT_CTL_1 = 0x0CF2; +constexpr auto MB1_MSR_PMU_ZDP_CTL_FVC = 0x0CEB; + +constexpr auto BB0_MSR_PERF_GLOBAL_CTL = 0x0C20; +constexpr auto BB0_MSR_PERF_CNT_1 = 0x0C33; +constexpr auto BB0_MSR_PERF_CNT_CTL_1 = 0x0C32; + +constexpr auto BB1_MSR_PERF_GLOBAL_CTL = 0x0C60; +constexpr auto BB1_MSR_PERF_CNT_1 = 0x0C73; +constexpr auto BB1_MSR_PERF_CNT_CTL_1 = 0x0C72; + +constexpr auto R_MSR_PMON_CTL0 = 0x0E10; +constexpr auto R_MSR_PMON_CTR0 = 0x0E11; +constexpr auto R_MSR_PMON_CTL1 = 0x0E12; +constexpr auto R_MSR_PMON_CTR1 = 0x0E13; +constexpr auto R_MSR_PMON_CTL2 = 0x0E14; +constexpr auto R_MSR_PMON_CTR2 = 0x0E15; +constexpr auto R_MSR_PMON_CTL3 = 0x0E16; +constexpr auto R_MSR_PMON_CTR3 = 0x0E17; +constexpr auto R_MSR_PMON_CTL4 = 0x0E18; +constexpr auto R_MSR_PMON_CTR4 = 0x0E19; +constexpr auto R_MSR_PMON_CTL5 = 0x0E1A; +constexpr auto R_MSR_PMON_CTR5 = 0x0E1B; +constexpr auto R_MSR_PMON_CTL6 = 0x0E1C; +constexpr auto R_MSR_PMON_CTR6 = 0x0E1D; +constexpr auto R_MSR_PMON_CTL7 = 0x0E1E; +constexpr auto R_MSR_PMON_CTR7 = 0x0E1F; +constexpr auto R_MSR_PMON_CTL8 = 0x0E30; +constexpr auto R_MSR_PMON_CTR8 = 0x0E31; +constexpr auto R_MSR_PMON_CTL9 = 0x0E32; +constexpr auto R_MSR_PMON_CTR9 = 0x0E33; +constexpr auto R_MSR_PMON_CTL10 = 0x0E34; +constexpr auto R_MSR_PMON_CTR10 = 0x0E35; +constexpr auto R_MSR_PMON_CTL11 = 0x0E36; +constexpr auto R_MSR_PMON_CTR11 = 0x0E37; +constexpr auto R_MSR_PMON_CTL12 = 0x0E38; +constexpr auto R_MSR_PMON_CTR12 = 0x0E39; +constexpr auto R_MSR_PMON_CTL13 = 0x0E3A; +constexpr auto R_MSR_PMON_CTR13 = 0x0E3B; +constexpr auto R_MSR_PMON_CTL14 = 0x0E3C; +constexpr auto R_MSR_PMON_CTR14 = 0x0E3D; +constexpr auto R_MSR_PMON_CTL15 = 0x0E3E; +constexpr auto R_MSR_PMON_CTR15 = 0x0E3F; + +constexpr auto R_MSR_PORT0_IPERF_CFG0 = 0x0E04; +constexpr auto R_MSR_PORT1_IPERF_CFG0 = 0x0E05; +constexpr auto R_MSR_PORT2_IPERF_CFG0 = 0x0E06; +constexpr auto R_MSR_PORT3_IPERF_CFG0 = 0x0E07; +constexpr auto R_MSR_PORT4_IPERF_CFG0 = 0x0E08; +constexpr auto R_MSR_PORT5_IPERF_CFG0 = 0x0E09; +constexpr auto R_MSR_PORT6_IPERF_CFG0 = 0x0E0A; +constexpr auto R_MSR_PORT7_IPERF_CFG0 = 0x0E0B; + +constexpr auto R_MSR_PORT0_IPERF_CFG1 = 0x0E24; +constexpr auto R_MSR_PORT1_IPERF_CFG1 = 0x0E25; +constexpr auto R_MSR_PORT2_IPERF_CFG1 = 0x0E26; +constexpr auto R_MSR_PORT3_IPERF_CFG1 = 0x0E27; +constexpr auto R_MSR_PORT4_IPERF_CFG1 = 0x0E28; +constexpr auto R_MSR_PORT5_IPERF_CFG1 = 0x0E29; +constexpr auto R_MSR_PORT6_IPERF_CFG1 = 0x0E2A; +constexpr auto R_MSR_PORT7_IPERF_CFG1 = 0x0E2B; + +constexpr auto R_MSR_PMON_GLOBAL_CTL_7_0 = 0x0E00; +constexpr auto R_MSR_PMON_GLOBAL_CTL_15_8 = 0x0E20; + +constexpr auto W_MSR_PMON_GLOBAL_CTL = 0xC80; +constexpr auto W_MSR_PMON_FIXED_CTR_CTL = 0x395; +constexpr auto W_MSR_PMON_FIXED_CTR = 0x394; /* * Platform QoS MSRs */ From 222043ff4a45bfe187f3e066137e6d5d6ed03968 Mon Sep 17 00:00:00 2001 From: "Dementiev, Roman" Date: Fri, 17 May 2024 15:22:44 +0200 Subject: [PATCH 10/30] refactor Change-Id: I08359604fcfc0354345d7aa5e1b18fc3d9ab9e00 --- src/types.h | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/src/types.h b/src/types.h index 6d3d3483..c6c7076a 100644 --- a/src/types.h +++ b/src/types.h @@ -21,6 +21,7 @@ #include #include #include +#include #ifdef _MSC_VER #include @@ -298,11 +299,11 @@ constexpr auto W_MSR_PMON_FIXED_CTR = 0x394; * Platform QoS MSRs */ -#define IA32_PQR_ASSOC (0xc8f) -#define IA32_QM_EVTSEL (0xc8d) -#define IA32_QM_CTR (0xc8e) +constexpr auto IA32_PQR_ASSOC = 0xc8f; +constexpr auto IA32_QM_EVTSEL = 0xc8d; +constexpr auto IA32_QM_CTR = 0xc8e; -#define PCM_INVALID_QOS_MONITORING_DATA ((std::numeric_limits::max)()) +constexpr auto PCM_INVALID_QOS_MONITORING_DATA = (std::numeric_limits::max)(); /* \brief Event Select Register format From 1caae02e1ca1b19118a2b3eab388b90e187be495 Mon Sep 17 00:00:00 2001 From: "Dementiev, Roman" Date: Fri, 17 May 2024 15:28:46 +0200 Subject: [PATCH 11/30] refactor Change-Id: I726ab4265ab610895b168362743655d66ace8fe5 --- src/types.h | 34 +++++++++++++++++----------------- 1 file changed, 17 insertions(+), 17 deletions(-) diff --git a/src/types.h b/src/types.h index c6c7076a..41562b9c 100644 --- a/src/types.h +++ b/src/types.h @@ -1449,32 +1449,32 @@ struct ICX_IIOPMUCNTCTLRegister ICX_IIOPMUCNTCTLRegister() : value(0) { } }; -#define MSR_PACKAGE_THERM_STATUS (0x01B1) -#define MSR_IA32_THERM_STATUS (0x019C) -#define PCM_INVALID_THERMAL_HEADROOM ((std::numeric_limits::min)()) +constexpr auto MSR_PACKAGE_THERM_STATUS = 0x01B1; +constexpr auto MSR_IA32_THERM_STATUS = 0x019C; +constexpr auto PCM_INVALID_THERMAL_HEADROOM = (std::numeric_limits::min)(); -#define MSR_IA32_BIOS_SIGN_ID (0x8B) +constexpr auto MSR_IA32_BIOS_SIGN_ID = 0x8B; -#define MSR_DRAM_ENERGY_STATUS (0x0619) +constexpr auto MSR_DRAM_ENERGY_STATUS = 0x0619; constexpr auto MSR_PP0_ENERGY_STATUS = 0x639; constexpr auto MSR_PP1_ENERGY_STATUS = 0x641; -#define MSR_PKG_C2_RESIDENCY (0x60D) -#define MSR_PKG_C3_RESIDENCY (0x3F8) -#define MSR_PKG_C6_RESIDENCY (0x3F9) -#define MSR_PKG_C7_RESIDENCY (0x3FA) -#define MSR_CORE_C3_RESIDENCY (0x3FC) -#define MSR_CORE_C6_RESIDENCY (0x3FD) -#define MSR_CORE_C7_RESIDENCY (0x3FE) +constexpr auto MSR_PKG_C2_RESIDENCY = 0x60D; +constexpr auto MSR_PKG_C3_RESIDENCY = 0x3F8; +constexpr auto MSR_PKG_C6_RESIDENCY = 0x3F9; +constexpr auto MSR_PKG_C7_RESIDENCY = 0x3FA; +constexpr auto MSR_CORE_C3_RESIDENCY = 0x3FC; +constexpr auto MSR_CORE_C6_RESIDENCY = 0x3FD; +constexpr auto MSR_CORE_C7_RESIDENCY = 0x3FE; -#define MSR_PERF_GLOBAL_INUSE (0x392) +constexpr auto MSR_PERF_GLOBAL_INUSE = 0x392; -#define MSR_IA32_SPEC_CTRL (0x48) -#define MSR_IA32_ARCH_CAPABILITIES (0x10A) +constexpr auto MSR_IA32_SPEC_CTRL = 0x48; +constexpr auto MSR_IA32_ARCH_CAPABILITIES = 0x10A; -#define MSR_TSX_FORCE_ABORT (0x10f) +constexpr auto MSR_TSX_FORCE_ABORT = 0x10f; -#define MSR_PERF_CAPABILITIES (0x345) +constexpr auto MSR_PERF_CAPABILITIES = 0x345; // data structure for converting two uint32s <-> uin64 union cvt_ds From 7923732698a3d1da58df9df455c56d6ba7314946 Mon Sep 17 00:00:00 2001 From: "Dementiev, Roman" Date: Fri, 17 May 2024 15:43:14 +0200 Subject: [PATCH 12/30] refactor Change-Id: I8f355feffc79d47e733ecec3eef5259eb8babe51 --- src/types.h | 564 ++++++++++++++++++++++++++-------------------------- 1 file changed, 279 insertions(+), 285 deletions(-) diff --git a/src/types.h b/src/types.h index 41562b9c..8e93ff12 100644 --- a/src/types.h +++ b/src/types.h @@ -498,123 +498,121 @@ struct BecktonUncorePMUCNTCTLRegister }; }; -#define MSR_SMI_COUNT (0x34) +constexpr auto MSR_SMI_COUNT = 0x34; /* \brief Sandy Bridge energy counters */ -#define MSR_PKG_ENERGY_STATUS (0x611) -#define MSR_RAPL_POWER_UNIT (0x606) -#define MSR_PKG_POWER_INFO (0x614) +constexpr auto MSR_PKG_ENERGY_STATUS = 0x611; +constexpr auto MSR_RAPL_POWER_UNIT = 0x606; +constexpr auto MSR_PKG_POWER_INFO = 0x614; -#define PCM_INTEL_PCI_VENDOR_ID (0x8086) -#define PCM_PCI_VENDOR_ID_OFFSET (0) +constexpr auto PCM_INTEL_PCI_VENDOR_ID = 0x8086; +constexpr auto PCM_PCI_VENDOR_ID_OFFSET = 0; // server PCICFG uncore counters - -#define JKTIVT_MC0_CH0_REGISTER_DEV_ADDR (16) -#define JKTIVT_MC0_CH1_REGISTER_DEV_ADDR (16) -#define JKTIVT_MC0_CH2_REGISTER_DEV_ADDR (16) -#define JKTIVT_MC0_CH3_REGISTER_DEV_ADDR (16) -#define JKTIVT_MC0_CH0_REGISTER_FUNC_ADDR (4) -#define JKTIVT_MC0_CH1_REGISTER_FUNC_ADDR (5) -#define JKTIVT_MC0_CH2_REGISTER_FUNC_ADDR (0) -#define JKTIVT_MC0_CH3_REGISTER_FUNC_ADDR (1) - -#define JKTIVT_MC1_CH0_REGISTER_DEV_ADDR (30) -#define JKTIVT_MC1_CH1_REGISTER_DEV_ADDR (30) -#define JKTIVT_MC1_CH2_REGISTER_DEV_ADDR (30) -#define JKTIVT_MC1_CH3_REGISTER_DEV_ADDR (30) -#define JKTIVT_MC1_CH0_REGISTER_FUNC_ADDR (4) -#define JKTIVT_MC1_CH1_REGISTER_FUNC_ADDR (5) -#define JKTIVT_MC1_CH2_REGISTER_FUNC_ADDR (0) -#define JKTIVT_MC1_CH3_REGISTER_FUNC_ADDR (1) - -#define HSX_MC0_CH0_REGISTER_DEV_ADDR (20) -#define HSX_MC0_CH1_REGISTER_DEV_ADDR (20) -#define HSX_MC0_CH2_REGISTER_DEV_ADDR (21) -#define HSX_MC0_CH3_REGISTER_DEV_ADDR (21) -#define HSX_MC0_CH0_REGISTER_FUNC_ADDR (0) -#define HSX_MC0_CH1_REGISTER_FUNC_ADDR (1) -#define HSX_MC0_CH2_REGISTER_FUNC_ADDR (0) -#define HSX_MC0_CH3_REGISTER_FUNC_ADDR (1) - -#define HSX_MC1_CH0_REGISTER_DEV_ADDR (23) -#define HSX_MC1_CH1_REGISTER_DEV_ADDR (23) -#define HSX_MC1_CH2_REGISTER_DEV_ADDR (24) -#define HSX_MC1_CH3_REGISTER_DEV_ADDR (24) -#define HSX_MC1_CH0_REGISTER_FUNC_ADDR (0) -#define HSX_MC1_CH1_REGISTER_FUNC_ADDR (1) -#define HSX_MC1_CH2_REGISTER_FUNC_ADDR (0) -#define HSX_MC1_CH3_REGISTER_FUNC_ADDR (1) - -#define KNL_MC0_CH0_REGISTER_DEV_ADDR (8) -#define KNL_MC0_CH1_REGISTER_DEV_ADDR (8) -#define KNL_MC0_CH2_REGISTER_DEV_ADDR (8) -#define KNL_MC0_CH0_REGISTER_FUNC_ADDR (2) -#define KNL_MC0_CH1_REGISTER_FUNC_ADDR (3) -#define KNL_MC0_CH2_REGISTER_FUNC_ADDR (4) - -#define SKX_MC0_CH0_REGISTER_DEV_ADDR (10) -#define SKX_MC0_CH1_REGISTER_DEV_ADDR (10) -#define SKX_MC0_CH2_REGISTER_DEV_ADDR (11) -#define SKX_MC0_CH3_REGISTER_DEV_ADDR (-1) //Does not exist -#define SKX_MC0_CH0_REGISTER_FUNC_ADDR (2) -#define SKX_MC0_CH1_REGISTER_FUNC_ADDR (6) -#define SKX_MC0_CH2_REGISTER_FUNC_ADDR (2) -#define SKX_MC0_CH3_REGISTER_FUNC_ADDR (-1) //Does not exist - -#define SKX_MC1_CH0_REGISTER_DEV_ADDR (12) -#define SKX_MC1_CH1_REGISTER_DEV_ADDR (12) -#define SKX_MC1_CH2_REGISTER_DEV_ADDR (13) -#define SKX_MC1_CH3_REGISTER_DEV_ADDR (-1) //Does not exist -#define SKX_MC1_CH0_REGISTER_FUNC_ADDR (2) -#define SKX_MC1_CH1_REGISTER_FUNC_ADDR (6) -#define SKX_MC1_CH2_REGISTER_FUNC_ADDR (2) -#define SKX_MC1_CH3_REGISTER_FUNC_ADDR (-1) //Does not exist - -#define SERVER_UBOX0_REGISTER_DEV_ADDR (0) -#define SERVER_UBOX0_REGISTER_FUNC_ADDR (1) - -#define KNL_MC1_CH0_REGISTER_DEV_ADDR (9) -#define KNL_MC1_CH1_REGISTER_DEV_ADDR (9) -#define KNL_MC1_CH2_REGISTER_DEV_ADDR (9) -#define KNL_MC1_CH0_REGISTER_FUNC_ADDR (2) -#define KNL_MC1_CH1_REGISTER_FUNC_ADDR (3) -#define KNL_MC1_CH2_REGISTER_FUNC_ADDR (4) - -#define KNL_EDC0_ECLK_REGISTER_DEV_ADDR (24) -#define KNL_EDC0_ECLK_REGISTER_FUNC_ADDR (2) -#define KNL_EDC1_ECLK_REGISTER_DEV_ADDR (25) -#define KNL_EDC1_ECLK_REGISTER_FUNC_ADDR (2) -#define KNL_EDC2_ECLK_REGISTER_DEV_ADDR (26) -#define KNL_EDC2_ECLK_REGISTER_FUNC_ADDR (2) -#define KNL_EDC3_ECLK_REGISTER_DEV_ADDR (27) -#define KNL_EDC3_ECLK_REGISTER_FUNC_ADDR (2) -#define KNL_EDC4_ECLK_REGISTER_DEV_ADDR (28) -#define KNL_EDC4_ECLK_REGISTER_FUNC_ADDR (2) -#define KNL_EDC5_ECLK_REGISTER_DEV_ADDR (29) -#define KNL_EDC5_ECLK_REGISTER_FUNC_ADDR (2) -#define KNL_EDC6_ECLK_REGISTER_DEV_ADDR (30) -#define KNL_EDC6_ECLK_REGISTER_FUNC_ADDR (2) -#define KNL_EDC7_ECLK_REGISTER_DEV_ADDR (31) -#define KNL_EDC7_ECLK_REGISTER_FUNC_ADDR (2) - -#define HSX_HA0_REGISTER_DEV_ADDR (18) -#define HSX_HA0_REGISTER_FUNC_ADDR (1) -#define HSX_HA1_REGISTER_DEV_ADDR (18) -#define HSX_HA1_REGISTER_FUNC_ADDR (5) - -#define XPF_HA_PCI_PMON_BOX_CTL_ADDR (0xF4) -#define XPF_HA_PCI_PMON_CTL0_ADDR (0xD8 + 4*0) -#define XPF_HA_PCI_PMON_CTL1_ADDR (0xD8 + 4*1) -#define XPF_HA_PCI_PMON_CTL2_ADDR (0xD8 + 4*2) -#define XPF_HA_PCI_PMON_CTL3_ADDR (0xD8 + 4*3) -#define XPF_HA_PCI_PMON_CTR0_ADDR (0xA0 + 8*0) -#define XPF_HA_PCI_PMON_CTR1_ADDR (0xA0 + 8*1) -#define XPF_HA_PCI_PMON_CTR2_ADDR (0xA0 + 8*2) -#define XPF_HA_PCI_PMON_CTR3_ADDR (0xA0 + 8*3) - +constexpr auto JKTIVT_MC0_CH0_REGISTER_DEV_ADDR = 16; +constexpr auto JKTIVT_MC0_CH1_REGISTER_DEV_ADDR = 16; +constexpr auto JKTIVT_MC0_CH2_REGISTER_DEV_ADDR = 16; +constexpr auto JKTIVT_MC0_CH3_REGISTER_DEV_ADDR = 16; +constexpr auto JKTIVT_MC0_CH0_REGISTER_FUNC_ADDR = 4; +constexpr auto JKTIVT_MC0_CH1_REGISTER_FUNC_ADDR = 5; +constexpr auto JKTIVT_MC0_CH2_REGISTER_FUNC_ADDR = 0; +constexpr auto JKTIVT_MC0_CH3_REGISTER_FUNC_ADDR = 1; + +constexpr auto JKTIVT_MC1_CH0_REGISTER_DEV_ADDR = 30; +constexpr auto JKTIVT_MC1_CH1_REGISTER_DEV_ADDR = 30; +constexpr auto JKTIVT_MC1_CH2_REGISTER_DEV_ADDR = 30; +constexpr auto JKTIVT_MC1_CH3_REGISTER_DEV_ADDR = 30; +constexpr auto JKTIVT_MC1_CH0_REGISTER_FUNC_ADDR = 4; +constexpr auto JKTIVT_MC1_CH1_REGISTER_FUNC_ADDR = 5; +constexpr auto JKTIVT_MC1_CH2_REGISTER_FUNC_ADDR = 0; +constexpr auto JKTIVT_MC1_CH3_REGISTER_FUNC_ADDR = 1; + +constexpr auto HSX_MC0_CH0_REGISTER_DEV_ADDR = 20; +constexpr auto HSX_MC0_CH1_REGISTER_DEV_ADDR = 20; +constexpr auto HSX_MC0_CH2_REGISTER_DEV_ADDR = 21; +constexpr auto HSX_MC0_CH3_REGISTER_DEV_ADDR = 21; +constexpr auto HSX_MC0_CH0_REGISTER_FUNC_ADDR = 0; +constexpr auto HSX_MC0_CH1_REGISTER_FUNC_ADDR = 1; +constexpr auto HSX_MC0_CH2_REGISTER_FUNC_ADDR = 0; +constexpr auto HSX_MC0_CH3_REGISTER_FUNC_ADDR = 1; + +constexpr auto HSX_MC1_CH0_REGISTER_DEV_ADDR = 23; +constexpr auto HSX_MC1_CH1_REGISTER_DEV_ADDR = 23; +constexpr auto HSX_MC1_CH2_REGISTER_DEV_ADDR = 24; +constexpr auto HSX_MC1_CH3_REGISTER_DEV_ADDR = 24; +constexpr auto HSX_MC1_CH0_REGISTER_FUNC_ADDR = 0; +constexpr auto HSX_MC1_CH1_REGISTER_FUNC_ADDR = 1; +constexpr auto HSX_MC1_CH2_REGISTER_FUNC_ADDR = 0; +constexpr auto HSX_MC1_CH3_REGISTER_FUNC_ADDR = 1; + +constexpr auto KNL_MC0_CH0_REGISTER_DEV_ADDR = 8; +constexpr auto KNL_MC0_CH1_REGISTER_DEV_ADDR = 8; +constexpr auto KNL_MC0_CH2_REGISTER_DEV_ADDR = 8; +constexpr auto KNL_MC0_CH0_REGISTER_FUNC_ADDR = 2; +constexpr auto KNL_MC0_CH1_REGISTER_FUNC_ADDR = 3; +constexpr auto KNL_MC0_CH2_REGISTER_FUNC_ADDR = 4; + +constexpr auto SKX_MC0_CH0_REGISTER_DEV_ADDR = 10; +constexpr auto SKX_MC0_CH1_REGISTER_DEV_ADDR = 10; +constexpr auto SKX_MC0_CH2_REGISTER_DEV_ADDR = 11; +constexpr auto SKX_MC0_CH3_REGISTER_DEV_ADDR = -1; //Does not exist +constexpr auto SKX_MC0_CH0_REGISTER_FUNC_ADDR = 2; +constexpr auto SKX_MC0_CH1_REGISTER_FUNC_ADDR = 6; +constexpr auto SKX_MC0_CH2_REGISTER_FUNC_ADDR = 2; +constexpr auto SKX_MC0_CH3_REGISTER_FUNC_ADDR = -1; //Does not exist + +constexpr auto SKX_MC1_CH0_REGISTER_DEV_ADDR = 12; +constexpr auto SKX_MC1_CH1_REGISTER_DEV_ADDR = 12; +constexpr auto SKX_MC1_CH2_REGISTER_DEV_ADDR = 13; +constexpr auto SKX_MC1_CH3_REGISTER_DEV_ADDR = -1; //Does not exist +constexpr auto SKX_MC1_CH0_REGISTER_FUNC_ADDR = 2; +constexpr auto SKX_MC1_CH1_REGISTER_FUNC_ADDR = 6; +constexpr auto SKX_MC1_CH2_REGISTER_FUNC_ADDR = 2; +constexpr auto SKX_MC1_CH3_REGISTER_FUNC_ADDR = -1; //Does not exist + +constexpr auto SERVER_UBOX0_REGISTER_DEV_ADDR = 0; +constexpr auto SERVER_UBOX0_REGISTER_FUNC_ADDR = 1; + +constexpr auto KNL_MC1_CH0_REGISTER_DEV_ADDR = 9; +constexpr auto KNL_MC1_CH1_REGISTER_DEV_ADDR = 9; +constexpr auto KNL_MC1_CH2_REGISTER_DEV_ADDR = 9; +constexpr auto KNL_MC1_CH0_REGISTER_FUNC_ADDR = 2; +constexpr auto KNL_MC1_CH1_REGISTER_FUNC_ADDR = 3; +constexpr auto KNL_MC1_CH2_REGISTER_FUNC_ADDR = 4; + +constexpr auto KNL_EDC0_ECLK_REGISTER_DEV_ADDR = 24; +constexpr auto KNL_EDC0_ECLK_REGISTER_FUNC_ADDR = 2; +constexpr auto KNL_EDC1_ECLK_REGISTER_DEV_ADDR = 25; +constexpr auto KNL_EDC1_ECLK_REGISTER_FUNC_ADDR = 2; +constexpr auto KNL_EDC2_ECLK_REGISTER_DEV_ADDR = 26; +constexpr auto KNL_EDC2_ECLK_REGISTER_FUNC_ADDR = 2; +constexpr auto KNL_EDC3_ECLK_REGISTER_DEV_ADDR = 27; +constexpr auto KNL_EDC3_ECLK_REGISTER_FUNC_ADDR = 2; +constexpr auto KNL_EDC4_ECLK_REGISTER_DEV_ADDR = 28; +constexpr auto KNL_EDC4_ECLK_REGISTER_FUNC_ADDR = 2; +constexpr auto KNL_EDC5_ECLK_REGISTER_DEV_ADDR = 29; +constexpr auto KNL_EDC5_ECLK_REGISTER_FUNC_ADDR = 2; +constexpr auto KNL_EDC6_ECLK_REGISTER_DEV_ADDR = 30; +constexpr auto KNL_EDC6_ECLK_REGISTER_FUNC_ADDR = 2; +constexpr auto KNL_EDC7_ECLK_REGISTER_DEV_ADDR = 31; +constexpr auto KNL_EDC7_ECLK_REGISTER_FUNC_ADDR = 2; + +constexpr auto HSX_HA0_REGISTER_DEV_ADDR = 18; +constexpr auto HSX_HA0_REGISTER_FUNC_ADDR = 1; +constexpr auto HSX_HA1_REGISTER_DEV_ADDR = 18; +constexpr auto HSX_HA1_REGISTER_FUNC_ADDR = 5; + +constexpr auto XPF_HA_PCI_PMON_BOX_CTL_ADDR = 0xF4; +constexpr auto XPF_HA_PCI_PMON_CTL0_ADDR = 0xD8 + 4*0; +constexpr auto XPF_HA_PCI_PMON_CTL1_ADDR = 0xD8 + 4*1; +constexpr auto XPF_HA_PCI_PMON_CTL2_ADDR = 0xD8 + 4*2; +constexpr auto XPF_HA_PCI_PMON_CTL3_ADDR = 0xD8 + 4*3; +constexpr auto XPF_HA_PCI_PMON_CTR0_ADDR = 0xA0 + 8*0; +constexpr auto XPF_HA_PCI_PMON_CTR1_ADDR = 0xA0 + 8*1; +constexpr auto XPF_HA_PCI_PMON_CTR2_ADDR = 0xA0 + 8*2; +constexpr auto XPF_HA_PCI_PMON_CTR3_ADDR = 0xA0 + 8*3; constexpr auto BHS_PCIE_GEN5_PCI_PMON_BOX_CTL_ADDR = 0x620; constexpr auto BHS_PCIE_GEN5_PCI_PMON_CTL0_ADDR = 0x630; constexpr auto BHS_PCIE_GEN5_PCI_PMON_CTR0_ADDR = 0x650; @@ -623,123 +621,120 @@ constexpr auto BHS_PCIE_GEN5_PCI_PMON_CTR0_ADDR = 0x650; * XPF_ for Xeons: SNB, IVT, HSX, BDW, etc. * KNX_ for Xeon Phi (Knights *) processors */ -#define XPF_MC_CH_PCI_PMON_BOX_CTL_ADDR (0x0F4) -#define KNX_MC_CH_PCI_PMON_BOX_CTL_ADDR (0xB30) -#define KNX_EDC_CH_PCI_PMON_BOX_CTL_ADDR (0xA30) +constexpr auto XPF_MC_CH_PCI_PMON_BOX_CTL_ADDR = 0x0F4; +constexpr auto KNX_MC_CH_PCI_PMON_BOX_CTL_ADDR = 0xB30; +constexpr auto KNX_EDC_CH_PCI_PMON_BOX_CTL_ADDR = 0xA30; //! for Xeons -#define XPF_MC_CH_PCI_PMON_FIXED_CTL_ADDR (0x0F0) -#define XPF_MC_CH_PCI_PMON_CTL3_ADDR (0x0E4) -#define XPF_MC_CH_PCI_PMON_CTL2_ADDR (0x0E0) -#define XPF_MC_CH_PCI_PMON_CTL1_ADDR (0x0DC) -#define XPF_MC_CH_PCI_PMON_CTL0_ADDR (0x0D8) +constexpr auto XPF_MC_CH_PCI_PMON_FIXED_CTL_ADDR = 0x0F0; +constexpr auto XPF_MC_CH_PCI_PMON_CTL3_ADDR = 0x0E4; +constexpr auto XPF_MC_CH_PCI_PMON_CTL2_ADDR = 0x0E0; +constexpr auto XPF_MC_CH_PCI_PMON_CTL1_ADDR = 0x0DC; +constexpr auto XPF_MC_CH_PCI_PMON_CTL0_ADDR = 0x0D8; //! KNL IMC -#define KNX_MC_CH_PCI_PMON_FIXED_CTL_ADDR (0xB44) -#define KNX_MC_CH_PCI_PMON_CTL3_ADDR (0xB2C) -#define KNX_MC_CH_PCI_PMON_CTL2_ADDR (0xB28) -#define KNX_MC_CH_PCI_PMON_CTL1_ADDR (0xB24) -#define KNX_MC_CH_PCI_PMON_CTL0_ADDR (0xB20) +constexpr auto KNX_MC_CH_PCI_PMON_FIXED_CTL_ADDR = 0xB44; +constexpr auto KNX_MC_CH_PCI_PMON_CTL3_ADDR = 0xB2C; +constexpr auto KNX_MC_CH_PCI_PMON_CTL2_ADDR = 0xB28; +constexpr auto KNX_MC_CH_PCI_PMON_CTL1_ADDR = 0xB24; +constexpr auto KNX_MC_CH_PCI_PMON_CTL0_ADDR = 0xB20; //! KNL EDC ECLK -#define KNX_EDC_CH_PCI_PMON_FIXED_CTL_ADDR (0xA44) -#define KNX_EDC_CH_PCI_PMON_CTL3_ADDR (0xA2C) -#define KNX_EDC_CH_PCI_PMON_CTL2_ADDR (0xA28) -#define KNX_EDC_CH_PCI_PMON_CTL1_ADDR (0xA24) -#define KNX_EDC_CH_PCI_PMON_CTL0_ADDR (0xA20) -#define KNX_EDC_ECLK_PMON_UNIT_CTL_REG (0xA30) +constexpr auto KNX_EDC_CH_PCI_PMON_FIXED_CTL_ADDR = 0xA44; +constexpr auto KNX_EDC_CH_PCI_PMON_CTL3_ADDR = 0xA2C; +constexpr auto KNX_EDC_CH_PCI_PMON_CTL2_ADDR = 0xA28; +constexpr auto KNX_EDC_CH_PCI_PMON_CTL1_ADDR = 0xA24; +constexpr auto KNX_EDC_CH_PCI_PMON_CTL0_ADDR = 0xA20; +constexpr auto KNX_EDC_ECLK_PMON_UNIT_CTL_REG = 0xA30; //! for Xeons -#define XPF_MC_CH_PCI_PMON_FIXED_CTR_ADDR (0x0D0) -#define XPF_MC_CH_PCI_PMON_CTR3_ADDR (0x0B8) -#define XPF_MC_CH_PCI_PMON_CTR2_ADDR (0x0B0) -#define XPF_MC_CH_PCI_PMON_CTR1_ADDR (0x0A8) -#define XPF_MC_CH_PCI_PMON_CTR0_ADDR (0x0A0) +constexpr auto XPF_MC_CH_PCI_PMON_FIXED_CTR_ADDR = 0x0D0; +constexpr auto XPF_MC_CH_PCI_PMON_CTR3_ADDR = 0x0B8; +constexpr auto XPF_MC_CH_PCI_PMON_CTR2_ADDR = 0x0B0; +constexpr auto XPF_MC_CH_PCI_PMON_CTR1_ADDR = 0x0A8; +constexpr auto XPF_MC_CH_PCI_PMON_CTR0_ADDR = 0x0A0; //! for KNL IMC -#define KNX_MC_CH_PCI_PMON_FIXED_CTR_ADDR (0xB3C) -#define KNX_MC_CH_PCI_PMON_CTR3_ADDR (0xB18) -#define KNX_MC_CH_PCI_PMON_CTR2_ADDR (0xB10) -#define KNX_MC_CH_PCI_PMON_CTR1_ADDR (0xB08) -#define KNX_MC_CH_PCI_PMON_CTR0_ADDR (0xB00) +constexpr auto KNX_MC_CH_PCI_PMON_FIXED_CTR_ADDR = 0xB3C; +constexpr auto KNX_MC_CH_PCI_PMON_CTR3_ADDR = 0xB18; +constexpr auto KNX_MC_CH_PCI_PMON_CTR2_ADDR = 0xB10; +constexpr auto KNX_MC_CH_PCI_PMON_CTR1_ADDR = 0xB08; +constexpr auto KNX_MC_CH_PCI_PMON_CTR0_ADDR = 0xB00; //! for KNL EDC ECLK -#define KNX_EDC_CH_PCI_PMON_FIXED_CTR_ADDR (0xA3C) -#define KNX_EDC_CH_PCI_PMON_CTR3_ADDR (0xA18) -#define KNX_EDC_CH_PCI_PMON_CTR2_ADDR (0xA10) -#define KNX_EDC_CH_PCI_PMON_CTR1_ADDR (0xA08) -#define KNX_EDC_CH_PCI_PMON_CTR0_ADDR (0xA00) - -#define SERVER_HBM_CH_PMON_BASE_ADDR (0x141c00) -#define SERVER_HBM_CH_PMON_STEP (0x4000) -#define SERVER_HBM_CH_PMON_SIZE (0x1000) -#define SERVER_HBM_BOX_PMON_STEP (0x9000) - -#define SERVER_MC_CH_PMON_BASE_ADDR (0x22800) -#define SERVER_MC_CH_PMON_STEP (0x4000) -#define SERVER_MC_CH_PMON_SIZE (0x1000) -#define SERVER_MC_CH_PMON_BOX_CTL_OFFSET (0x00) -#define SERVER_MC_CH_PMON_CTL0_OFFSET (0x40) -#define SERVER_MC_CH_PMON_CTL1_OFFSET (SERVER_MC_CH_PMON_CTL0_OFFSET + 4*1) -#define SERVER_MC_CH_PMON_CTL2_OFFSET (SERVER_MC_CH_PMON_CTL0_OFFSET + 4*2) -#define SERVER_MC_CH_PMON_CTL3_OFFSET (SERVER_MC_CH_PMON_CTL0_OFFSET + 4*3) -#define SERVER_MC_CH_PMON_CTR0_OFFSET (0x08) -#define SERVER_MC_CH_PMON_CTR1_OFFSET (SERVER_MC_CH_PMON_CTR0_OFFSET + 8*1) -#define SERVER_MC_CH_PMON_CTR2_OFFSET (SERVER_MC_CH_PMON_CTR0_OFFSET + 8*2) -#define SERVER_MC_CH_PMON_CTR3_OFFSET (SERVER_MC_CH_PMON_CTR0_OFFSET + 8*3) -#define SERVER_MC_CH_PMON_FIXED_CTL_OFFSET (0x54) -#define SERVER_MC_CH_PMON_FIXED_CTR_OFFSET (0x38) - +constexpr auto KNX_EDC_CH_PCI_PMON_FIXED_CTR_ADDR = 0xA3C; +constexpr auto KNX_EDC_CH_PCI_PMON_CTR3_ADDR = 0xA18; +constexpr auto KNX_EDC_CH_PCI_PMON_CTR2_ADDR = 0xA10; +constexpr auto KNX_EDC_CH_PCI_PMON_CTR1_ADDR = 0xA08; +constexpr auto KNX_EDC_CH_PCI_PMON_CTR0_ADDR = 0xA00; + +constexpr auto SERVER_HBM_CH_PMON_BASE_ADDR = 0x141c00; +constexpr auto SERVER_HBM_CH_PMON_STEP = 0x4000; +constexpr auto SERVER_HBM_CH_PMON_SIZE = 0x1000; +constexpr auto SERVER_HBM_BOX_PMON_STEP = 0x9000; + +constexpr auto SERVER_MC_CH_PMON_BASE_ADDR = 0x22800; +constexpr auto SERVER_MC_CH_PMON_STEP = 0x4000; +constexpr auto SERVER_MC_CH_PMON_SIZE = 0x1000; +constexpr auto SERVER_MC_CH_PMON_BOX_CTL_OFFSET = 0x00; +constexpr auto SERVER_MC_CH_PMON_CTL0_OFFSET = 0x40; +constexpr auto SERVER_MC_CH_PMON_CTL1_OFFSET = SERVER_MC_CH_PMON_CTL0_OFFSET + 4*1; +constexpr auto SERVER_MC_CH_PMON_CTL2_OFFSET = SERVER_MC_CH_PMON_CTL0_OFFSET + 4*2; +constexpr auto SERVER_MC_CH_PMON_CTL3_OFFSET = SERVER_MC_CH_PMON_CTL0_OFFSET + 4*3; +constexpr auto SERVER_MC_CH_PMON_CTR0_OFFSET = 0x08; +constexpr auto SERVER_MC_CH_PMON_CTR1_OFFSET = SERVER_MC_CH_PMON_CTR0_OFFSET + 8*1; +constexpr auto SERVER_MC_CH_PMON_CTR2_OFFSET = SERVER_MC_CH_PMON_CTR0_OFFSET + 8*2; +constexpr auto SERVER_MC_CH_PMON_CTR3_OFFSET = SERVER_MC_CH_PMON_CTR0_OFFSET + 8*3; +constexpr auto SERVER_MC_CH_PMON_FIXED_CTL_OFFSET = 0x54; +constexpr auto SERVER_MC_CH_PMON_FIXED_CTR_OFFSET = 0x38; constexpr auto BHS_MC_CH_PMON_BASE_ADDR = 0x024e800; -#define JKTIVT_QPI_PORT0_REGISTER_DEV_ADDR (8) -#define JKTIVT_QPI_PORT0_REGISTER_FUNC_ADDR (2) -#define JKTIVT_QPI_PORT1_REGISTER_DEV_ADDR (9) -#define JKTIVT_QPI_PORT1_REGISTER_FUNC_ADDR (2) -#define JKTIVT_QPI_PORT2_REGISTER_DEV_ADDR (24) -#define JKTIVT_QPI_PORT2_REGISTER_FUNC_ADDR (2) - -#define HSX_QPI_PORT0_REGISTER_DEV_ADDR (8) -#define HSX_QPI_PORT0_REGISTER_FUNC_ADDR (2) -#define HSX_QPI_PORT1_REGISTER_DEV_ADDR (9) -#define HSX_QPI_PORT1_REGISTER_FUNC_ADDR (2) -#define HSX_QPI_PORT2_REGISTER_DEV_ADDR (10) -#define HSX_QPI_PORT2_REGISTER_FUNC_ADDR (2) - -#define SKX_QPI_PORT0_REGISTER_DEV_ADDR (14) -#define SKX_QPI_PORT0_REGISTER_FUNC_ADDR (0) -#define SKX_QPI_PORT1_REGISTER_DEV_ADDR (15) -#define SKX_QPI_PORT1_REGISTER_FUNC_ADDR (0) -#define SKX_QPI_PORT2_REGISTER_DEV_ADDR (16) -#define SKX_QPI_PORT2_REGISTER_FUNC_ADDR (0) - -#define CPX_QPI_PORT3_REGISTER_DEV_ADDR (14) -#define CPX_QPI_PORT3_REGISTER_FUNC_ADDR (4) -#define CPX_QPI_PORT4_REGISTER_DEV_ADDR (15) -#define CPX_QPI_PORT4_REGISTER_FUNC_ADDR (4) -#define CPX_QPI_PORT5_REGISTER_DEV_ADDR (16) -#define CPX_QPI_PORT5_REGISTER_FUNC_ADDR (4) - -#define ICX_QPI_PORT0_REGISTER_DEV_ADDR (2) -#define ICX_QPI_PORT0_REGISTER_FUNC_ADDR (1) -#define ICX_QPI_PORT1_REGISTER_DEV_ADDR (3) -#define ICX_QPI_PORT1_REGISTER_FUNC_ADDR (1) -#define ICX_QPI_PORT2_REGISTER_DEV_ADDR (4) -#define ICX_QPI_PORT2_REGISTER_FUNC_ADDR (1) - -#define SPR_QPI_PORT0_REGISTER_DEV_ADDR (1) -#define SPR_QPI_PORT0_REGISTER_FUNC_ADDR (1) - -#define SPR_QPI_PORT1_REGISTER_DEV_ADDR (2) -#define SPR_QPI_PORT1_REGISTER_FUNC_ADDR (1) - -#define SPR_QPI_PORT2_REGISTER_DEV_ADDR (3) -#define SPR_QPI_PORT2_REGISTER_FUNC_ADDR (1) - -#define SPR_QPI_PORT3_REGISTER_DEV_ADDR (4) -#define SPR_QPI_PORT3_REGISTER_FUNC_ADDR (1) - - +constexpr auto JKTIVT_QPI_PORT0_REGISTER_DEV_ADDR = 8; +constexpr auto JKTIVT_QPI_PORT0_REGISTER_FUNC_ADDR = 2; +constexpr auto JKTIVT_QPI_PORT1_REGISTER_DEV_ADDR = 9; +constexpr auto JKTIVT_QPI_PORT1_REGISTER_FUNC_ADDR = 2; +constexpr auto JKTIVT_QPI_PORT2_REGISTER_DEV_ADDR = 24; +constexpr auto JKTIVT_QPI_PORT2_REGISTER_FUNC_ADDR = 2; + +constexpr auto HSX_QPI_PORT0_REGISTER_DEV_ADDR = 8; +constexpr auto HSX_QPI_PORT0_REGISTER_FUNC_ADDR = 2; +constexpr auto HSX_QPI_PORT1_REGISTER_DEV_ADDR = 9; +constexpr auto HSX_QPI_PORT1_REGISTER_FUNC_ADDR = 2; +constexpr auto HSX_QPI_PORT2_REGISTER_DEV_ADDR = 10; +constexpr auto HSX_QPI_PORT2_REGISTER_FUNC_ADDR = 2; + +constexpr auto SKX_QPI_PORT0_REGISTER_DEV_ADDR = 14; +constexpr auto SKX_QPI_PORT0_REGISTER_FUNC_ADDR = 0; +constexpr auto SKX_QPI_PORT1_REGISTER_DEV_ADDR = 15; +constexpr auto SKX_QPI_PORT1_REGISTER_FUNC_ADDR = 0; +constexpr auto SKX_QPI_PORT2_REGISTER_DEV_ADDR = 16; +constexpr auto SKX_QPI_PORT2_REGISTER_FUNC_ADDR = 0; + +constexpr auto CPX_QPI_PORT3_REGISTER_DEV_ADDR = 14; +constexpr auto CPX_QPI_PORT3_REGISTER_FUNC_ADDR = 4; +constexpr auto CPX_QPI_PORT4_REGISTER_DEV_ADDR = 15; +constexpr auto CPX_QPI_PORT4_REGISTER_FUNC_ADDR = 4; +constexpr auto CPX_QPI_PORT5_REGISTER_DEV_ADDR = 16; +constexpr auto CPX_QPI_PORT5_REGISTER_FUNC_ADDR = 4; + +constexpr auto ICX_QPI_PORT0_REGISTER_DEV_ADDR = 2; +constexpr auto ICX_QPI_PORT0_REGISTER_FUNC_ADDR = 1; +constexpr auto ICX_QPI_PORT1_REGISTER_DEV_ADDR = 3; +constexpr auto ICX_QPI_PORT1_REGISTER_FUNC_ADDR = 1; +constexpr auto ICX_QPI_PORT2_REGISTER_DEV_ADDR = 4; +constexpr auto ICX_QPI_PORT2_REGISTER_FUNC_ADDR = 1; + +constexpr auto SPR_QPI_PORT0_REGISTER_DEV_ADDR = 1; +constexpr auto SPR_QPI_PORT0_REGISTER_FUNC_ADDR = 1; + +constexpr auto SPR_QPI_PORT1_REGISTER_DEV_ADDR = 2; +constexpr auto SPR_QPI_PORT1_REGISTER_FUNC_ADDR = 1; + +constexpr auto SPR_QPI_PORT2_REGISTER_DEV_ADDR = 3; +constexpr auto SPR_QPI_PORT2_REGISTER_FUNC_ADDR = 1; + +constexpr auto SPR_QPI_PORT3_REGISTER_DEV_ADDR = 4; +constexpr auto SPR_QPI_PORT3_REGISTER_FUNC_ADDR = 1; constexpr auto BHS_QPI_PORT0_REGISTER_DEV_ADDR = 16; constexpr auto BHS_QPI_PORT0_REGISTER_FUNC_ADDR = 1; @@ -758,9 +753,9 @@ constexpr auto BHS_QPI_PORT4_REGISTER_FUNC_ADDR = 1; constexpr auto BHS_QPI_PORT5_REGISTER_DEV_ADDR = 21; constexpr auto BHS_QPI_PORT5_REGISTER_FUNC_ADDR = 1; -#define QPI_PORT0_MISC_REGISTER_FUNC_ADDR (0) -#define QPI_PORT1_MISC_REGISTER_FUNC_ADDR (0) -#define QPI_PORT2_MISC_REGISTER_FUNC_ADDR (0) +constexpr auto QPI_PORT0_MISC_REGISTER_FUNC_ADDR = 0; +constexpr auto QPI_PORT1_MISC_REGISTER_FUNC_ADDR = 0; +constexpr auto QPI_PORT2_MISC_REGISTER_FUNC_ADDR = 0; constexpr auto SKX_M3UPI_PORT0_REGISTER_DEV_ADDR = (0x12); constexpr auto SKX_M3UPI_PORT0_REGISTER_FUNC_ADDR = (1); @@ -798,19 +793,19 @@ constexpr auto SPR_M3UPI_PORT1_REGISTER_FUNC_ADDR = 1; constexpr auto SPR_M3UPI_PORT2_REGISTER_FUNC_ADDR = 1; constexpr auto SPR_M3UPI_PORT3_REGISTER_FUNC_ADDR = 1; -#define SKX_M2M_0_REGISTER_DEV_ADDR (8) -#define SKX_M2M_0_REGISTER_FUNC_ADDR (0) -#define SKX_M2M_1_REGISTER_DEV_ADDR (9) -#define SKX_M2M_1_REGISTER_FUNC_ADDR (0) +constexpr auto SKX_M2M_0_REGISTER_DEV_ADDR = 8; +constexpr auto SKX_M2M_0_REGISTER_FUNC_ADDR = 0; +constexpr auto SKX_M2M_1_REGISTER_DEV_ADDR = 9; +constexpr auto SKX_M2M_1_REGISTER_FUNC_ADDR = 0; -#define SERVER_M2M_0_REGISTER_DEV_ADDR (12) -#define SERVER_M2M_0_REGISTER_FUNC_ADDR (0) -#define SERVER_M2M_1_REGISTER_DEV_ADDR (13) -#define SERVER_M2M_1_REGISTER_FUNC_ADDR (0) -#define SERVER_M2M_2_REGISTER_DEV_ADDR (14) -#define SERVER_M2M_2_REGISTER_FUNC_ADDR (0) -#define SERVER_M2M_3_REGISTER_DEV_ADDR (15) -#define SERVER_M2M_3_REGISTER_FUNC_ADDR (0) +constexpr auto SERVER_M2M_0_REGISTER_DEV_ADDR = 12; +constexpr auto SERVER_M2M_0_REGISTER_FUNC_ADDR = 0; +constexpr auto SERVER_M2M_1_REGISTER_DEV_ADDR = 13; +constexpr auto SERVER_M2M_1_REGISTER_FUNC_ADDR = 0; +constexpr auto SERVER_M2M_2_REGISTER_DEV_ADDR = 14; +constexpr auto SERVER_M2M_2_REGISTER_FUNC_ADDR = 0; +constexpr auto SERVER_M2M_3_REGISTER_DEV_ADDR = 15; +constexpr auto SERVER_M2M_3_REGISTER_FUNC_ADDR = 0; constexpr auto SERVER_HBM_M2M_0_REGISTER_DEV_ADDR = 12; constexpr auto SERVER_HBM_M2M_0_REGISTER_FUNC_ADDR = 1; @@ -889,29 +884,29 @@ constexpr auto BHS_M3UPI_PORT3_REGISTER_FUNC_ADDR = 0; constexpr auto BHS_M3UPI_PORT4_REGISTER_FUNC_ADDR = 0; constexpr auto BHS_M3UPI_PORT5_REGISTER_FUNC_ADDR = 0; -#define SKX_M2M_PCI_PMON_BOX_CTL_ADDR (0x258) +constexpr auto SKX_M2M_PCI_PMON_BOX_CTL_ADDR = 0x258; -#define SKX_M2M_PCI_PMON_CTL0_ADDR (0x228) -#define SKX_M2M_PCI_PMON_CTL1_ADDR (0x230) -#define SKX_M2M_PCI_PMON_CTL2_ADDR (0x238) -#define SKX_M2M_PCI_PMON_CTL3_ADDR (0x240) +constexpr auto SKX_M2M_PCI_PMON_CTL0_ADDR = 0x228; +constexpr auto SKX_M2M_PCI_PMON_CTL1_ADDR = 0x230; +constexpr auto SKX_M2M_PCI_PMON_CTL2_ADDR = 0x238; +constexpr auto SKX_M2M_PCI_PMON_CTL3_ADDR = 0x240; -#define SKX_M2M_PCI_PMON_CTR0_ADDR (0x200) -#define SKX_M2M_PCI_PMON_CTR1_ADDR (0x208) -#define SKX_M2M_PCI_PMON_CTR2_ADDR (0x210) -#define SKX_M2M_PCI_PMON_CTR3_ADDR (0x218) +constexpr auto SKX_M2M_PCI_PMON_CTR0_ADDR = 0x200; +constexpr auto SKX_M2M_PCI_PMON_CTR1_ADDR = 0x208; +constexpr auto SKX_M2M_PCI_PMON_CTR2_ADDR = 0x210; +constexpr auto SKX_M2M_PCI_PMON_CTR3_ADDR = 0x218; -#define SERVER_M2M_PCI_PMON_BOX_CTL_ADDR (0x438) +constexpr auto SERVER_M2M_PCI_PMON_BOX_CTL_ADDR = 0x438; -#define SERVER_M2M_PCI_PMON_CTL0_ADDR (0x468) -#define SERVER_M2M_PCI_PMON_CTL1_ADDR (SERVER_M2M_PCI_PMON_CTL0_ADDR + 1*8) -#define SERVER_M2M_PCI_PMON_CTL2_ADDR (SERVER_M2M_PCI_PMON_CTL0_ADDR + 2*8) -#define SERVER_M2M_PCI_PMON_CTL3_ADDR (SERVER_M2M_PCI_PMON_CTL0_ADDR + 3*8) +constexpr auto SERVER_M2M_PCI_PMON_CTL0_ADDR = 0x468; +constexpr auto SERVER_M2M_PCI_PMON_CTL1_ADDR = SERVER_M2M_PCI_PMON_CTL0_ADDR + 1*8; +constexpr auto SERVER_M2M_PCI_PMON_CTL2_ADDR = SERVER_M2M_PCI_PMON_CTL0_ADDR + 2*8; +constexpr auto SERVER_M2M_PCI_PMON_CTL3_ADDR = SERVER_M2M_PCI_PMON_CTL0_ADDR + 3*8; -#define SERVER_M2M_PCI_PMON_CTR0_ADDR (0x440) -#define SERVER_M2M_PCI_PMON_CTR1_ADDR (SERVER_M2M_PCI_PMON_CTR0_ADDR + 1*8) -#define SERVER_M2M_PCI_PMON_CTR2_ADDR (SERVER_M2M_PCI_PMON_CTR0_ADDR + 2*8) -#define SERVER_M2M_PCI_PMON_CTR3_ADDR (SERVER_M2M_PCI_PMON_CTR0_ADDR + 3*8) +constexpr auto SERVER_M2M_PCI_PMON_CTR0_ADDR = 0x440; +constexpr auto SERVER_M2M_PCI_PMON_CTR1_ADDR = SERVER_M2M_PCI_PMON_CTR0_ADDR + 1*8; +constexpr auto SERVER_M2M_PCI_PMON_CTR2_ADDR = SERVER_M2M_PCI_PMON_CTR0_ADDR + 2*8; +constexpr auto SERVER_M2M_PCI_PMON_CTR3_ADDR = SERVER_M2M_PCI_PMON_CTR0_ADDR + 3*8; constexpr auto M3UPI_PCI_PMON_BOX_CTL_ADDR = (0xF4); @@ -953,57 +948,56 @@ constexpr auto IVT_MSR_UNCORE_PMON_GLOBAL_CTL = 0x0C00; constexpr auto SPR_MSR_UNCORE_PMON_GLOBAL_CTL = 0x2FF0; -#define PCM_INVALID_DEV_ADDR (~(uint32)0UL) -#define PCM_INVALID_FUNC_ADDR (~(uint32)0UL) - -#define Q_P_PCI_PMON_BOX_CTL_ADDR (0x0F4) +constexpr auto PCM_INVALID_DEV_ADDR = ~(uint32)0UL; +constexpr auto PCM_INVALID_FUNC_ADDR = ~(uint32)0UL; -#define Q_P_PCI_PMON_CTL3_ADDR (0x0E4) -#define Q_P_PCI_PMON_CTL2_ADDR (0x0E0) -#define Q_P_PCI_PMON_CTL1_ADDR (0x0DC) -#define Q_P_PCI_PMON_CTL0_ADDR (0x0D8) +constexpr auto Q_P_PCI_PMON_BOX_CTL_ADDR = 0x0F4; -#define Q_P_PCI_PMON_CTR3_ADDR (0x0B8) -#define Q_P_PCI_PMON_CTR2_ADDR (0x0B0) -#define Q_P_PCI_PMON_CTR1_ADDR (0x0A8) -#define Q_P_PCI_PMON_CTR0_ADDR (0x0A0) +constexpr auto Q_P_PCI_PMON_CTL3_ADDR = 0x0E4; +constexpr auto Q_P_PCI_PMON_CTL2_ADDR = 0x0E0; +constexpr auto Q_P_PCI_PMON_CTL1_ADDR = 0x0DC; +constexpr auto Q_P_PCI_PMON_CTL0_ADDR = 0x0D8; -#define QPI_RATE_STATUS_ADDR (0x0D4) +constexpr auto Q_P_PCI_PMON_CTR3_ADDR = 0x0B8; +constexpr auto Q_P_PCI_PMON_CTR2_ADDR = 0x0B0; +constexpr auto Q_P_PCI_PMON_CTR1_ADDR = 0x0A8; +constexpr auto Q_P_PCI_PMON_CTR0_ADDR = 0x0A0; -#define U_L_PCI_PMON_BOX_CTL_ADDR (0x378) +constexpr auto QPI_RATE_STATUS_ADDR = 0x0D4; -#define U_L_PCI_PMON_CTL3_ADDR (0x368) -#define U_L_PCI_PMON_CTL2_ADDR (0x360) -#define U_L_PCI_PMON_CTL1_ADDR (0x358) -#define U_L_PCI_PMON_CTL0_ADDR (0x350) +constexpr auto U_L_PCI_PMON_BOX_CTL_ADDR = 0x378; -#define U_L_PCI_PMON_CTR3_ADDR (0x330) -#define U_L_PCI_PMON_CTR2_ADDR (0x328) -#define U_L_PCI_PMON_CTR1_ADDR (0x320) -#define U_L_PCI_PMON_CTR0_ADDR (0x318) +constexpr auto U_L_PCI_PMON_CTL3_ADDR = 0x368; +constexpr auto U_L_PCI_PMON_CTL2_ADDR = 0x360; +constexpr auto U_L_PCI_PMON_CTL1_ADDR = 0x358; +constexpr auto U_L_PCI_PMON_CTL0_ADDR = 0x350; -#define ICX_UPI_PCI_PMON_BOX_CTL_ADDR (0x318) +constexpr auto U_L_PCI_PMON_CTR3_ADDR = 0x330; +constexpr auto U_L_PCI_PMON_CTR2_ADDR = 0x328; +constexpr auto U_L_PCI_PMON_CTR1_ADDR = 0x320; +constexpr auto U_L_PCI_PMON_CTR0_ADDR = 0x318; -#define ICX_UPI_PCI_PMON_CTL3_ADDR (0x368) -#define ICX_UPI_PCI_PMON_CTL2_ADDR (0x360) -#define ICX_UPI_PCI_PMON_CTL1_ADDR (0x358) -#define ICX_UPI_PCI_PMON_CTL0_ADDR (0x350) +constexpr auto ICX_UPI_PCI_PMON_BOX_CTL_ADDR = 0x318; -#define ICX_UPI_PCI_PMON_CTR3_ADDR (0x338) -#define ICX_UPI_PCI_PMON_CTR2_ADDR (0x330) -#define ICX_UPI_PCI_PMON_CTR1_ADDR (0x328) -#define ICX_UPI_PCI_PMON_CTR0_ADDR (0x320) +constexpr auto ICX_UPI_PCI_PMON_CTL3_ADDR = 0x368; +constexpr auto ICX_UPI_PCI_PMON_CTL2_ADDR = 0x360; +constexpr auto ICX_UPI_PCI_PMON_CTL1_ADDR = 0x358; +constexpr auto ICX_UPI_PCI_PMON_CTL0_ADDR = 0x350; +constexpr auto ICX_UPI_PCI_PMON_CTR3_ADDR = 0x338; +constexpr auto ICX_UPI_PCI_PMON_CTR2_ADDR = 0x330; +constexpr auto ICX_UPI_PCI_PMON_CTR1_ADDR = 0x328; +constexpr auto ICX_UPI_PCI_PMON_CTR0_ADDR = 0x320; constexpr auto SPR_UPI_PCI_PMON_BOX_CTL_ADDR = 0x318; constexpr auto SPR_UPI_PCI_PMON_CTL0_ADDR = 0x350; -constexpr auto SPR_UPI_PCI_PMON_CTR0_ADDR = 0x320; - -#define UCLK_FIXED_CTR_ADDR (0x704) -#define UCLK_FIXED_CTL_ADDR (0x703) -#define UBOX_MSR_PMON_CTL0_ADDR (0x705) -#define UBOX_MSR_PMON_CTL1_ADDR (0x706) -#define UBOX_MSR_PMON_CTR0_ADDR (0x709) -#define UBOX_MSR_PMON_CTR1_ADDR (0x70a) +constexpr auto SPR_UPI_PCI_PMON_CTR0_ADDR = 0x320; + +constexpr auto UCLK_FIXED_CTR_ADDR = 0x704; +constexpr auto UCLK_FIXED_CTL_ADDR = 0x703; +constexpr auto UBOX_MSR_PMON_CTL0_ADDR = 0x705; +constexpr auto UBOX_MSR_PMON_CTL1_ADDR = 0x706; +constexpr auto UBOX_MSR_PMON_CTR0_ADDR = 0x709; +constexpr auto UBOX_MSR_PMON_CTR1_ADDR = 0x70a; constexpr auto SPR_UCLK_FIXED_CTR_ADDR = 0x2FDF; constexpr auto SPR_UCLK_FIXED_CTL_ADDR = 0x2FDE; From fbcea48f2a6da096441ec72b8402a505dc82a62d Mon Sep 17 00:00:00 2001 From: Alexander Antonov Date: Fri, 17 May 2024 01:57:39 -0700 Subject: [PATCH 13/30] Add opCode event file for SRF --- src/opCode-175.txt | 45 +++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 45 insertions(+) create mode 100644 src/opCode-175.txt diff --git a/src/opCode-175.txt b/src/opCode-175.txt new file mode 100644 index 00000000..c3ccfbc9 --- /dev/null +++ b/src/opCode-175.txt @@ -0,0 +1,45 @@ +#Clockticks +#ctr=0,ev_sel=0x1,umask=0x0,en=1,ch_mask=0,fc_mask=0x0,multiplier=1,divider=1,hname=Clockticks,vname=Total +# Inbound (PCIe device DMA into system) payload events +ctr=0,ev_sel=0x83,umask=0x1,ch_mask=1,fc_mask=0x7,multiplier=4,divider=1,hname=IB write,vname=Part0 +ctr=1,ev_sel=0x83,umask=0x1,ch_mask=2,fc_mask=0x7,multiplier=4,divider=1,hname=IB write,vname=Part1 +ctr=0,ev_sel=0x83,umask=0x1,ch_mask=4,fc_mask=0x7,multiplier=4,divider=1,hname=IB write,vname=Part2 +ctr=1,ev_sel=0x83,umask=0x1,ch_mask=8,fc_mask=0x7,multiplier=4,divider=1,hname=IB write,vname=Part3 +ctr=0,ev_sel=0x83,umask=0x1,ch_mask=16,fc_mask=0x7,multiplier=4,divider=1,hname=IB write,vname=Part4 +ctr=1,ev_sel=0x83,umask=0x1,ch_mask=32,fc_mask=0x7,multiplier=4,divider=1,hname=IB write,vname=Part5 +ctr=0,ev_sel=0x83,umask=0x1,ch_mask=64,fc_mask=0x7,multiplier=4,divider=1,hname=IB write,vname=Part6 +ctr=1,ev_sel=0x83,umask=0x1,ch_mask=128,fc_mask=0x7,multiplier=4,divider=1,hname=IB write,vname=Part7 +ctr=0,ev_sel=0x83,umask=0x4,ch_mask=1,fc_mask=0x7,multiplier=4,divider=1,hname=IB read,vname=Part0 +ctr=1,ev_sel=0x83,umask=0x4,ch_mask=2,fc_mask=0x7,multiplier=4,divider=1,hname=IB read,vname=Part1 +ctr=0,ev_sel=0x83,umask=0x4,ch_mask=4,fc_mask=0x7,multiplier=4,divider=1,hname=IB read,vname=Part2 +ctr=1,ev_sel=0x83,umask=0x4,ch_mask=8,fc_mask=0x7,multiplier=4,divider=1,hname=IB read,vname=Part3 +ctr=0,ev_sel=0x83,umask=0x4,ch_mask=16,fc_mask=0x7,multiplier=4,divider=1,hname=IB read,vname=Part4 +ctr=1,ev_sel=0x83,umask=0x4,ch_mask=32,fc_mask=0x7,multiplier=4,divider=1,hname=IB read,vname=Part5 +ctr=0,ev_sel=0x83,umask=0x4,ch_mask=64,fc_mask=0x7,multiplier=4,divider=1,hname=IB read,vname=Part6 +ctr=1,ev_sel=0x83,umask=0x4,ch_mask=128,fc_mask=0x7,multiplier=4,divider=1,hname=IB read,vname=Part7 +# Outbound (CPU MMIO to the PCIe device) payload events +ctr=2,ev_sel=0xc0,umask=0x4,ch_mask=1,fc_mask=0x7,multiplier=4,divider=1,hname=OB read,vname=Part0 +ctr=3,ev_sel=0xc0,umask=0x4,ch_mask=2,fc_mask=0x7,multiplier=4,divider=1,hname=OB read,vname=Part1 +ctr=2,ev_sel=0xc0,umask=0x4,ch_mask=4,fc_mask=0x7,multiplier=4,divider=1,hname=OB read,vname=Part2 +ctr=3,ev_sel=0xc0,umask=0x4,ch_mask=8,fc_mask=0x7,multiplier=4,divider=1,hname=OB read,vname=Part3 +ctr=2,ev_sel=0xc0,umask=0x4,ch_mask=16,fc_mask=0x7,multiplier=4,divider=1,hname=OB read,vname=Part4 +ctr=3,ev_sel=0xc0,umask=0x4,ch_mask=32,fc_mask=0x7,multiplier=4,divider=1,hname=OB read,vname=Part5 +ctr=2,ev_sel=0xc0,umask=0x4,ch_mask=64,fc_mask=0x7,multiplier=4,divider=1,hname=OB read,vname=Part6 +ctr=3,ev_sel=0xc0,umask=0x4,ch_mask=128,fc_mask=0x7,multiplier=4,divider=1,hname=OB read,vname=Part7 +ctr=2,ev_sel=0xc0,umask=0x1,ch_mask=1,fc_mask=0x7,multiplier=4,divider=1,hname=OB write,vname=Part0 +ctr=3,ev_sel=0xc0,umask=0x1,ch_mask=2,fc_mask=0x7,multiplier=4,divider=1,hname=OB write,vname=Part1 +ctr=2,ev_sel=0xc0,umask=0x1,ch_mask=4,fc_mask=0x7,multiplier=4,divider=1,hname=OB write,vname=Part2 +ctr=3,ev_sel=0xc0,umask=0x1,ch_mask=8,fc_mask=0x7,multiplier=4,divider=1,hname=OB write,vname=Part3 +ctr=2,ev_sel=0xc0,umask=0x1,ch_mask=16,fc_mask=0x7,multiplier=4,divider=1,hname=OB write,vname=Part4 +ctr=3,ev_sel=0xc0,umask=0x1,ch_mask=32,fc_mask=0x7,multiplier=4,divider=1,hname=OB write,vname=Part5 +ctr=2,ev_sel=0xc0,umask=0x1,ch_mask=64,fc_mask=0x7,multiplier=4,divider=1,hname=OB write,vname=Part6 +ctr=3,ev_sel=0xc0,umask=0x1,ch_mask=128,fc_mask=0x7,multiplier=4,divider=1,hname=OB write,vname=Part7 +# IOMMU events +ctr=0,ev_sel=0x40,umask=0x01,ch_mask=0x0,fc_mask=0x0,multiplier=1,divider=1,hname=IOTLB Lookup,vname=Total +ctr=1,ev_sel=0x40,umask=0x20,ch_mask=0x0,fc_mask=0x0,multiplier=1,divider=1,hname=IOTLB Miss,vname=Total +ctr=2,ev_sel=0x40,umask=0x80,ch_mask=0x0,fc_mask=0x0,multiplier=1,divider=1,hname=Ctxt Cache Hit,vname=Total +ctr=3,ev_sel=0x41,umask=0x10,ch_mask=0x0,fc_mask=0x0,multiplier=1,divider=1,hname=256T Cache Hit,vname=Total +ctr=0,ev_sel=0x41,umask=0x08,ch_mask=0x0,fc_mask=0x0,multiplier=1,divider=1,hname=512G Cache Hit,vname=Total +ctr=1,ev_sel=0x41,umask=0x04,ch_mask=0x0,fc_mask=0x0,multiplier=1,divider=1,hname=1G Cache Hit,vname=Total +ctr=2,ev_sel=0x41,umask=0x02,ch_mask=0x0,fc_mask=0x0,multiplier=1,divider=1,hname=2M Cache Hit,vname=Total +ctr=3,ev_sel=0x41,umask=0xc0,ch_mask=0x0,fc_mask=0x0,multiplier=1,divider=1,hname=IOMMU Mem Access,vname=Total From fe547f37ca22e10a078cc572f80baa6b632dee90 Mon Sep 17 00:00:00 2001 From: Alexander Antonov Date: Fri, 17 May 2024 01:58:32 -0700 Subject: [PATCH 14/30] Add skeleton for BirchStreamPlatform class --- src/pcm-iio.cpp | 71 +++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 71 insertions(+) diff --git a/src/pcm-iio.cpp b/src/pcm-iio.cpp index 00544c23..7375b9e7 100644 --- a/src/pcm-iio.cpp +++ b/src/pcm-iio.cpp @@ -1364,6 +1364,75 @@ void IPlatformMapping::probeDeviceRange(std::vector &pci_devs, int d } } +class BirchStreamPlatform: public IPlatformMapping { +private: + bool isPcieStack(int unit); + bool isRootHcStack(int unit); + bool isPartHcStack(int unit); + bool isUboxStack(int unit); + + bool stackProbe(int unit, const struct bdf &address, struct iio_stacks_on_socket &iio_on_socket); + bool getRootBuses(std::map> &root_buses); +public: + BirchStreamPlatform(int cpu_model, uint32_t sockets_count) : IPlatformMapping(cpu_model, sockets_count) {} + ~BirchStreamPlatform() = default; + bool pciTreeDiscover(std::vector& iios) override; +}; + +bool BirchStreamPlatform::isPcieStack(int unit) +{ + return false; +} + +bool BirchStreamPlatform::isRootHcStack(int unit) +{ + return false; +} + +bool BirchStreamPlatform::isPartHcStack(int unit) +{ + return false; +} + +bool BirchStreamPlatform::isUboxStack(int unit) +{ + return false; +} + +bool BirchStreamPlatform::stackProbe(int unit, const struct bdf &address, struct iio_stacks_on_socket &iio_on_socket) +{ + return true; +} + +bool BirchStreamPlatform::getRootBuses(std::map> &root_buses) +{ + return true; +} + +bool BirchStreamPlatform::pciTreeDiscover(std::vector& iios) +{ + std::map> root_buses; + if (!getRootBuses(root_buses)) + { + return false; + } + + for (auto iter = root_buses.cbegin(); iter != root_buses.cend(); ++iter) { + auto rbs_on_socket = iter->second; + struct iio_stacks_on_socket iio_on_socket; + iio_on_socket.socket_id = iter->first; + for (auto rb = rbs_on_socket.cbegin(); rb != rbs_on_socket.cend(); ++rb) { + if (!stackProbe(rb->first, rb->second, iio_on_socket)) { + return false; + } + } + std::sort(iio_on_socket.stacks.begin(), iio_on_socket.stacks.end()); + iios.push_back(iio_on_socket); + } + + return true; +} + std::unique_ptr IPlatformMapping::getPlatformMapping(int cpu_model, uint32_t sockets_count) { switch (cpu_model) { @@ -1376,6 +1445,8 @@ std::unique_ptr IPlatformMapping::getPlatformMapping(int cpu_m case PCM::SPR: case PCM::EMR: return std::unique_ptr{new EagleStreamPlatformMapping(cpu_model, sockets_count)}; + case PCM::SRF: + return std::unique_ptr{new BirchStreamPlatform(cpu_model, sockets_count)}; default: return nullptr; } From 4a742b741da26441b24121b5d82ee28225edd24b Mon Sep 17 00:00:00 2001 From: Alexander Antonov Date: Fri, 17 May 2024 02:16:29 -0700 Subject: [PATCH 15/30] Add checking first valid BUS ID to get_cpu_bus() function It's possible to have not enabled first stack --- src/utils.cpp | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/src/utils.cpp b/src/utils.cpp index 146f9b08..45528234 100644 --- a/src/utils.cpp +++ b/src/utils.cpp @@ -1210,7 +1210,13 @@ bool get_cpu_bus(uint32 msmDomain, uint32 msmBus, uint32 msmDev, uint32 msmFunc, //std::cout << std::hex << "get_cpu_bus: busNo=0x" << busNo << std::dec << "\n"; } - cpuBusNo0 = cpuBusNo[0] & 0xff; + /* + * It's possible to have not enabled first stack that's why + * need to find the first valid bus to read CSR + */ + int firstValidBusId = 0; + while (!((cpuBusValid >> firstValidBusId) & 0x1)) firstValidBusId++; + int cpuBusNo0 = (cpuBusNo[(int)(firstValidBusId / 4)] >> ((firstValidBusId % 4) * 8)) & 0xff; PciHandleType sad_cfg_handler(msmDomain, cpuBusNo0, 0, 0); sad_cfg_handler.read32(SPR_SAD_REG_CTL_CFG_OFFSET, &sadControlCfg); From 82cd94e34f27e6762213bd51e03d071f79a8b16e Mon Sep 17 00:00:00 2001 From: Alexander Antonov Date: Fri, 17 May 2024 02:33:13 -0700 Subject: [PATCH 16/30] Run internal loop get_cpu_bus() 4 times instead of 8 --- src/utils.cpp | 30 +++++++++++------------------- 1 file changed, 11 insertions(+), 19 deletions(-) diff --git a/src/utils.cpp b/src/utils.cpp index 45528234..f777d597 100644 --- a/src/utils.cpp +++ b/src/utils.cpp @@ -1176,10 +1176,6 @@ int load_events(const std::string &fn, std::map &ofm, bool get_cpu_bus(uint32 msmDomain, uint32 msmBus, uint32 msmDev, uint32 msmFunc, uint32 &cpuBusValid, std::vector &cpuBusNo, int &cpuPackageId) { - int cpuBusNo0 = 0x0; - uint32 sadControlCfg = 0x0; - uint32 busNo = 0x0; - //std::cout << "get_cpu_bus: d=" << std::hex << msmDomain << ",b=" << msmBus << ",d=" << msmDev << ",f=" << msmFunc << std::dec << " \n"; try { PciHandleType h(msmDomain, msmBus, msmDev, msmFunc); @@ -1190,24 +1186,19 @@ bool get_cpu_bus(uint32 msmDomain, uint32 msmBus, uint32 msmDev, uint32 msmFunc, return false; } - for (int i = 0; i < 8; ++i) + cpuBusNo.resize(8); + for (int i = 0; i < 4; ++i) { - busNo = 0x00; - if (i <= 3) - { - h.read32(SPR_MSM_REG_CPUBUSNO0_OFFSET + i*4, &busNo); - } - else - { - h.read32(SPR_MSM_REG_CPUBUSNO4_OFFSET + (i-4)*4, &busNo); - } - if (busNo == (std::numeric_limits::max)()) + h.read32(SPR_MSM_REG_CPUBUSNO0_OFFSET + i * 4, &cpuBusNo[i]); + + h.read32(SPR_MSM_REG_CPUBUSNO4_OFFSET + i * 4, &cpuBusNo[i + 4]); + + if (cpuBusNo[i] == (std::numeric_limits::max)() || + cpuBusNo[i + 4] == (std::numeric_limits::max)()) { - std::cerr << "Failed to read CPUBUSNO" << std::endl; + std::cerr << "Failed to read CPUBUSNO registers" << std::endl; return false; } - cpuBusNo.push_back(busNo); - //std::cout << std::hex << "get_cpu_bus: busNo=0x" << busNo << std::dec << "\n"; } /* @@ -1217,8 +1208,9 @@ bool get_cpu_bus(uint32 msmDomain, uint32 msmBus, uint32 msmDev, uint32 msmFunc, int firstValidBusId = 0; while (!((cpuBusValid >> firstValidBusId) & 0x1)) firstValidBusId++; int cpuBusNo0 = (cpuBusNo[(int)(firstValidBusId / 4)] >> ((firstValidBusId % 4) * 8)) & 0xff; - PciHandleType sad_cfg_handler(msmDomain, cpuBusNo0, 0, 0); + uint32 sadControlCfg = 0x0; + PciHandleType sad_cfg_handler(msmDomain, cpuBusNo0, 0, 0); sad_cfg_handler.read32(SPR_SAD_REG_CTL_CFG_OFFSET, &sadControlCfg); if (sadControlCfg == (std::numeric_limits::max)()) { From 41f20f5d89805ff53bfadf1560931d2b3d253fb3 Mon Sep 17 00:00:00 2001 From: Alexander Antonov Date: Fri, 17 May 2024 02:35:33 -0700 Subject: [PATCH 17/30] Cosmetic changes in get_cpu_bus() function --- src/utils.cpp | 72 ++++++++++++++++++++++++++------------------------- 1 file changed, 37 insertions(+), 35 deletions(-) diff --git a/src/utils.cpp b/src/utils.cpp index f777d597..3788a6ae 100644 --- a/src/utils.cpp +++ b/src/utils.cpp @@ -1177,50 +1177,52 @@ int load_events(const std::string &fn, std::map &ofm, bool get_cpu_bus(uint32 msmDomain, uint32 msmBus, uint32 msmDev, uint32 msmFunc, uint32 &cpuBusValid, std::vector &cpuBusNo, int &cpuPackageId) { //std::cout << "get_cpu_bus: d=" << std::hex << msmDomain << ",b=" << msmBus << ",d=" << msmDev << ",f=" << msmFunc << std::dec << " \n"; - try { - PciHandleType h(msmDomain, msmBus, msmDev, msmFunc); + try + { + PciHandleType h(msmDomain, msmBus, msmDev, msmFunc); - h.read32(SPR_MSM_REG_CPUBUSNO_VALID_OFFSET, &cpuBusValid); - if (cpuBusValid == (std::numeric_limits::max)()) { - std::cerr << "Failed to read CPUBUSNO_VALID" << std::endl; - return false; - } + h.read32(SPR_MSM_REG_CPUBUSNO_VALID_OFFSET, &cpuBusValid); + if (cpuBusValid == (std::numeric_limits::max)()) { + std::cerr << "Failed to read CPUBUSNO_VALID" << std::endl; + return false; + } - cpuBusNo.resize(8); - for (int i = 0; i < 4; ++i) - { - h.read32(SPR_MSM_REG_CPUBUSNO0_OFFSET + i * 4, &cpuBusNo[i]); + cpuBusNo.resize(8); + for (int i = 0; i < 4; ++i) + { + h.read32(SPR_MSM_REG_CPUBUSNO0_OFFSET + i * 4, &cpuBusNo[i]); - h.read32(SPR_MSM_REG_CPUBUSNO4_OFFSET + i * 4, &cpuBusNo[i + 4]); + h.read32(SPR_MSM_REG_CPUBUSNO4_OFFSET + i * 4, &cpuBusNo[i + 4]); - if (cpuBusNo[i] == (std::numeric_limits::max)() || - cpuBusNo[i + 4] == (std::numeric_limits::max)()) + if (cpuBusNo[i] == (std::numeric_limits::max)() || + cpuBusNo[i + 4] == (std::numeric_limits::max)()) + { + std::cerr << "Failed to read CPUBUSNO registers" << std::endl; + return false; + } + } + + /* + * It's possible to have not enabled first stack that's why + * need to find the first valid bus to read CSR + */ + int firstValidBusId = 0; + while (!((cpuBusValid >> firstValidBusId) & 0x1)) firstValidBusId++; + int cpuBusNo0 = (cpuBusNo[(int)(firstValidBusId / 4)] >> ((firstValidBusId % 4) * 8)) & 0xff; + + uint32 sadControlCfg = 0x0; + PciHandleType sad_cfg_handler(msmDomain, cpuBusNo0, 0, 0); + sad_cfg_handler.read32(SPR_SAD_REG_CTL_CFG_OFFSET, &sadControlCfg); + if (sadControlCfg == (std::numeric_limits::max)()) { - std::cerr << "Failed to read CPUBUSNO registers" << std::endl; + std::cerr << "Failed to read SAD_CONTROL_CFG" << std::endl; return false; } - } + cpuPackageId = sadControlCfg & 0xf; - /* - * It's possible to have not enabled first stack that's why - * need to find the first valid bus to read CSR - */ - int firstValidBusId = 0; - while (!((cpuBusValid >> firstValidBusId) & 0x1)) firstValidBusId++; - int cpuBusNo0 = (cpuBusNo[(int)(firstValidBusId / 4)] >> ((firstValidBusId % 4) * 8)) & 0xff; - - uint32 sadControlCfg = 0x0; - PciHandleType sad_cfg_handler(msmDomain, cpuBusNo0, 0, 0); - sad_cfg_handler.read32(SPR_SAD_REG_CTL_CFG_OFFSET, &sadControlCfg); - if (sadControlCfg == (std::numeric_limits::max)()) - { - std::cerr << "Failed to read SAD_CONTROL_CFG" << std::endl; - return false; + return true; } - cpuPackageId = sadControlCfg & 0xf; - - return true; - } catch (...) + catch (...) { std::cerr << "Warning: unable to enumerate CPU Buses" << std::endl; return false; From 3fc11dabeb73de2b7b861266d2a50daa3e8d3902 Mon Sep 17 00:00:00 2001 From: Alexander Antonov Date: Fri, 17 May 2024 07:18:19 -0700 Subject: [PATCH 18/30] Add function to struct pci to check whether it's Intel device --- src/lspci.h | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/lspci.h b/src/lspci.h index 7d42b74b..0cf09134 100644 --- a/src/lspci.h +++ b/src/lspci.h @@ -282,6 +282,8 @@ struct pci { {} bool hasChildDevices() const { return (child_pci_devs.size() != 0); } + + bool isIntelDevice() const { return (vendor_id == PCM_INTEL_PCI_VENDOR_ID); } }; struct iio_skx { struct { From 6bb546f7e9b7e47212e74049c09ad787e27c5679 Mon Sep 17 00:00:00 2001 From: Alexander Antonov Date: Fri, 17 May 2024 07:30:23 -0700 Subject: [PATCH 19/30] Cosmetic changes in get_cpu_bus() --- src/utils.cpp | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/src/utils.cpp b/src/utils.cpp index 3788a6ae..ac546406 100644 --- a/src/utils.cpp +++ b/src/utils.cpp @@ -1188,15 +1188,13 @@ bool get_cpu_bus(uint32 msmDomain, uint32 msmBus, uint32 msmDev, uint32 msmFunc, } cpuBusNo.resize(8); - for (int i = 0; i < 4; ++i) - { + for (int i = 0; i < 4; ++i) { h.read32(SPR_MSM_REG_CPUBUSNO0_OFFSET + i * 4, &cpuBusNo[i]); h.read32(SPR_MSM_REG_CPUBUSNO4_OFFSET + i * 4, &cpuBusNo[i + 4]); if (cpuBusNo[i] == (std::numeric_limits::max)() || - cpuBusNo[i + 4] == (std::numeric_limits::max)()) - { + cpuBusNo[i + 4] == (std::numeric_limits::max)()) { std::cerr << "Failed to read CPUBUSNO registers" << std::endl; return false; } @@ -1213,8 +1211,7 @@ bool get_cpu_bus(uint32 msmDomain, uint32 msmBus, uint32 msmDev, uint32 msmFunc, uint32 sadControlCfg = 0x0; PciHandleType sad_cfg_handler(msmDomain, cpuBusNo0, 0, 0); sad_cfg_handler.read32(SPR_SAD_REG_CTL_CFG_OFFSET, &sadControlCfg); - if (sadControlCfg == (std::numeric_limits::max)()) - { + if (sadControlCfg == (std::numeric_limits::max)()) { std::cerr << "Failed to read SAD_CONTROL_CFG" << std::endl; return false; } From f3291d6c445cc8004ec8577201bc90df31e6fdca Mon Sep 17 00:00:00 2001 From: Alexander Antonov Date: Fri, 17 May 2024 07:33:50 -0700 Subject: [PATCH 20/30] Enable pcm-iio for SRF --- src/cpucounters.h | 1 + src/pcm-iio.cpp | 266 ++++++++++++++++++++++++++++++++++++++++++++-- 2 files changed, 261 insertions(+), 6 deletions(-) diff --git a/src/cpucounters.h b/src/cpucounters.h index 560d9cac..d2578d60 100644 --- a/src/cpucounters.h +++ b/src/cpucounters.h @@ -2509,6 +2509,7 @@ class PCM_API PCM || cpu_model == PCM::SNOWRIDGE || cpu_model == PCM::SPR || cpu_model == PCM::EMR + || cpu_model == PCM::SRF ); } diff --git a/src/pcm-iio.cpp b/src/pcm-iio.cpp index 7375b9e7..968429ad 100644 --- a/src/pcm-iio.cpp +++ b/src/pcm-iio.cpp @@ -353,6 +353,103 @@ const std::map> es_sad_to_pmu_id_mapping = { {estype::esEmrXcc, emr_sad_to_pmu_id_mapping }, }; +#define SRF_PE0_PMON_ID 3 +#define SRF_PE1_PMON_ID 4 +#define SRF_PE2_PMON_ID 2 +#define SRF_PE3_PMON_ID 5 +/* + * There are platform configuration when FlexUPI stacks (stacks 5 and 6) are enabled as + * PCIe stack and PCIe ports are disabled (ports 2 and 3) and vice sersa. See details here: + * In these cases the PMON IDs are different. + * So, defines with _FLEX_ are applicable for cases when FlexUPI stacks + * are working as PCIe ports. + */ +#define SRF_PE4_PMON_ID 11 +#define SRF_FLEX_PE4_PMON_ID 13 +#define SRF_PE5_PMON_ID 12 +#define SRF_FLEX_PE5_PMON_ID 10 + +#define SRF_PE6_PMON_ID 0 +#define SRF_PE7_PMON_ID 7 +#define SRF_PE8_PMON_ID 8 +#define SRF_HC0_PMON_ID 1 +#define SRF_HC1_PMON_ID 6 +#define SRF_HC2_PMON_ID 14 +#define SRF_HC3_PMON_ID 9 + +#define SRF_PE0_SAD_BUS_ID 2 +#define SRF_PE1_SAD_BUS_ID 3 +#define SRF_PE2_SAD_BUS_ID 1 +#define SRF_PE3_SAD_BUS_ID 4 +#define SRF_PE4_SAD_BUS_ID 29 +#define SRF_FLEX_PE4_SAD_BUS_ID SRF_PE4_SAD_BUS_ID +#define SRF_PE5_SAD_BUS_ID 26 +#define SRF_FLEX_PE5_SAD_BUS_ID SRF_PE5_SAD_BUS_ID +#define SRF_PE6_SAD_BUS_ID 0 // UPI0 +#define SRF_PE7_SAD_BUS_ID 5 // UPI1 +#define SRF_PE8_SAD_BUS_ID 28 // UPI2 +#define SRF_UBOXA_SAD_BUS_ID 30 +#define SRF_UBOXB_SAD_BUS_ID 31 + +const std::set srf_pcie_stacks({ + SRF_PE0_SAD_BUS_ID, + SRF_PE1_SAD_BUS_ID, + SRF_PE2_SAD_BUS_ID, + SRF_PE3_SAD_BUS_ID, + SRF_PE4_SAD_BUS_ID, + SRF_FLEX_PE4_SAD_BUS_ID, + SRF_PE5_SAD_BUS_ID, + SRF_FLEX_PE5_SAD_BUS_ID, + SRF_PE6_SAD_BUS_ID, + SRF_PE7_SAD_BUS_ID, + SRF_PE8_SAD_BUS_ID, +}); + +#define SRF_HC0_SAD_BUS_ID 8 +#define SRF_HC1_SAD_BUS_ID 12 +#define SRF_HC2_SAD_BUS_ID 20 +#define SRF_HC3_SAD_BUS_ID 16 + +const std::map srf_sad_to_pmu_id_mapping = { + { SRF_PE0_SAD_BUS_ID, SRF_PE0_PMON_ID }, + { SRF_PE1_SAD_BUS_ID, SRF_PE1_PMON_ID }, + { SRF_PE2_SAD_BUS_ID, SRF_PE2_PMON_ID }, + { SRF_PE3_SAD_BUS_ID, SRF_PE3_PMON_ID }, + { SRF_PE4_SAD_BUS_ID, SRF_PE4_PMON_ID }, + { SRF_FLEX_PE4_SAD_BUS_ID, SRF_FLEX_PE4_PMON_ID }, + { SRF_PE5_SAD_BUS_ID, SRF_PE5_PMON_ID }, + { SRF_FLEX_PE5_SAD_BUS_ID, SRF_FLEX_PE5_PMON_ID }, + { SRF_PE6_SAD_BUS_ID, SRF_PE6_PMON_ID }, + { SRF_PE7_SAD_BUS_ID, SRF_PE7_PMON_ID }, + { SRF_PE8_SAD_BUS_ID, SRF_PE8_PMON_ID }, + { SRF_HC0_SAD_BUS_ID, SRF_HC0_PMON_ID }, + { SRF_HC1_SAD_BUS_ID, SRF_HC1_PMON_ID }, + { SRF_HC2_SAD_BUS_ID, SRF_HC2_PMON_ID }, + { SRF_HC3_SAD_BUS_ID, SRF_HC3_PMON_ID }, +}; + +#define SRF_DSA_IAX_PART_NUMBER 0 +#define SRF_HQM_PART_NUMBER 5 +#define SRF_QAT_PART_NUMBER 4 + +static const std::string srf_iio_stack_names[] = { + "IIO Stack 0 - PCIe6 ", // SRF_PE6_PMON_ID 0 + "IIO Stack 1 - HCx0 ", // SRF_HC0_PMON_ID 1 + "IIO Stack 2 - PCIe2 ", // SRF_PE2_PMON_ID 2 + "IIO Stack 3 - PCIe0 ", // SRF_PE0_PMON_ID 3 + "IIO Stack 4 - PCIe1 ", // SRF_PE1_PMON_ID 4 + "IIO Stack 5 - PCIe3 ", // SRF_PE3_PMON_ID 5 + "IIO Stack 6 - HCx1 ", // SRF_HC1_PMON_ID 6 + "IIO Stack 7 - PCIe7 ", // SRF_PE7_PMON_ID 7 + "IIO Stack 8 - PCIe8 ", // SRF_PE8_PMON_ID 8 + "IIO Stack 9 - HCx3 ", // SRF_HC3_PMON_ID 9 + "IIO Stack 10 - Flex PCIe5", // SRF_FLEX_PE5_PMON_ID 10 + "IIO Stack 11 - PCIe4 ", // SRF_PE4_PMON_ID 11 + "IIO Stack 12 - PCIe5 ", // SRF_PE5_PMON_ID 12 + "IIO Stack 13 - Flex PCIe4", // SRF_FLEX_PE4_PMON_ID 13 + "IIO Stack 14 - HCx2 ", // SRF_HC2_PMON_ID 14 +}; + struct iio_counter : public counter { std::vector data; }; @@ -1371,6 +1468,9 @@ class BirchStreamPlatform: public IPlatformMapping { bool isPartHcStack(int unit); bool isUboxStack(int unit); + bool birchStreamPciStackProbe(int unit, const struct bdf &address, struct iio_stacks_on_socket &iio_on_socket); + bool birchStreamAcceleratorStackProbe(int unit, const struct bdf &address, struct iio_stacks_on_socket &iio_on_socket); + bool stackProbe(int unit, const struct bdf &address, struct iio_stacks_on_socket &iio_on_socket); bool getRootBuses(std::map> &root_buses); public: @@ -1379,34 +1479,187 @@ class BirchStreamPlatform: public IPlatformMapping { bool pciTreeDiscover(std::vector& iios) override; }; +bool BirchStreamPlatform::birchStreamPciStackProbe(int unit, const struct bdf &address, struct iio_stacks_on_socket &iio_on_socket) +{ + /* + * All stacks manage PCIe 5.0 Root Ports. Bifurcated Root Ports A-H appear as devices 2-9. + */ + struct iio_stack stack; + stack.domain = address.domainno; + stack.busno = address.busno; + stack.iio_unit_id = srf_sad_to_pmu_id_mapping.at(unit); + stack.stack_name = srf_iio_stack_names[stack.iio_unit_id]; + for (int slot = 2; slot < 9; ++slot) + { + struct pci root_pci_dev; + root_pci_dev.bdf = bdf(address.domainno, address.busno, slot, 0x0); + if (probe_pci(&root_pci_dev)) + { + struct iio_bifurcated_part part; + part.part_id = slot - 2; + part.root_pci_dev = root_pci_dev; + for (uint8_t b = root_pci_dev.secondary_bus_number; b <= root_pci_dev.subordinate_bus_number; ++b) { + for (uint8_t d = 0; d < 32; ++d) { + for (uint8_t f = 0; f < 8; ++f) { + struct pci child_pci_dev(address.domainno, b, d, f); + if (probe_pci(&child_pci_dev)) { + child_pci_dev.parts_no.push_back(part.part_id); + part.child_pci_devs.push_back(child_pci_dev); + } + } + } + } + stack.parts.push_back(part); + } + } + iio_on_socket.stacks.push_back(stack); + return true; +} + +bool BirchStreamPlatform::birchStreamAcceleratorStackProbe(int unit, const struct bdf &address, struct iio_stacks_on_socket &iio_on_socket) +{ + struct iio_stack stack; + stack.iio_unit_id = srf_sad_to_pmu_id_mapping.at(unit); + stack.domain = address.domainno; + stack.busno = address.busno; + stack.stack_name = srf_iio_stack_names[stack.iio_unit_id]; + + /* + * Instance of DSA(0, 1, 2, 3) appears as PCIe device with SAD Bus ID (8, 12, 20, 16), device 1, function 0 + * Instance of IAX(0, 1, 2, 3) appears as PCIe device with SAD Bus ID (8, 12, 20, 16), device 2, function 0 + * Instance of QAT(0, 1, 2, 3) appears as PCIe device with SAD Bus ID (9, 13, 21, 17), device 0, function 0 + * Instance of HQM(0, 1, 2, 3) appears as PCIe device with SAD Bus ID (10, 14, 22, 18), device 0, function 0 + */ + auto process_pci_dev = [](int domainno, int busno, int devno, int part_number, iio_bifurcated_part& part) + { + struct pci pci_dev(domainno, busno, devno, 0); + if (probe_pci(&pci_dev) && pci_dev.isIntelDevice()) { + part.part_id = part_number; + pci_dev.parts_no.push_back(part_number); + part.child_pci_devs.push_back(pci_dev); + return true; + } + return false; + }; + + { + struct iio_bifurcated_part part; + if (process_pci_dev(address.domainno, address.busno, 1, SRF_DSA_IAX_PART_NUMBER, part) || + process_pci_dev(address.domainno, address.busno, 2, SRF_DSA_IAX_PART_NUMBER, part)) { + stack.parts.push_back(part); + } + } + + { + struct iio_bifurcated_part part; + if (process_pci_dev(address.domainno, address.busno + 1, 0, SRF_QAT_PART_NUMBER, part)) { + stack.parts.push_back(part); + } + } + + { + /* Bus number for HQM is higher on 3 than DSA bus number */ + struct iio_bifurcated_part part; + if (process_pci_dev(address.domainno, address.busno + 3, 0, SRF_HQM_PART_NUMBER, part)) { + stack.parts.push_back(part); + } + } + + if (!stack.parts.empty()) { + iio_on_socket.stacks.push_back(stack); + } + + return true; +} + bool BirchStreamPlatform::isPcieStack(int unit) { - return false; + return srf_pcie_stacks.find(unit) != srf_pcie_stacks.end(); } +/* + * HC is the name of DINO stacks as we had on SPR + */ bool BirchStreamPlatform::isRootHcStack(int unit) { - return false; + return SRF_HC0_SAD_BUS_ID == unit || SRF_HC1_SAD_BUS_ID == unit || + SRF_HC2_SAD_BUS_ID == unit || SRF_HC3_SAD_BUS_ID == unit; } bool BirchStreamPlatform::isPartHcStack(int unit) { - return false; + return isRootHcStack(unit - 1) || isRootHcStack(unit - 2); } bool BirchStreamPlatform::isUboxStack(int unit) { - return false; + return SRF_UBOXA_SAD_BUS_ID == unit || SRF_UBOXB_SAD_BUS_ID == unit; } bool BirchStreamPlatform::stackProbe(int unit, const struct bdf &address, struct iio_stacks_on_socket &iio_on_socket) { - return true; + if (isPcieStack(unit)) { + return birchStreamPciStackProbe(unit, address, iio_on_socket); + } + else if (isRootHcStack(unit)) { + return birchStreamAcceleratorStackProbe(unit, address, iio_on_socket); + } + else if (isPartHcStack(unit)) { + cout << "Found a part of HC stack. Stack ID - " << unit << " domain " << address.domainno + << " bus " << std::hex << std::setfill('0') << std::setw(2) << (int)address.busno << std::dec << ". Don't probe it again." << endl; + return true; + } + else if (isUboxStack(unit)) { + cout << "Found UBOX stack. Stack ID - " << unit << " domain " << address.domainno + << " bus " << std::hex << std::setfill('0') << std::setw(2) << (int)address.busno << std::dec << endl; + return true; + } + + cout << "Unknown stack ID " << unit << " domain " << address.domainno << " bus " << std::hex << std::setfill('0') << std::setw(2) << (int)address.busno << std::dec << endl; + + return false; } bool BirchStreamPlatform::getRootBuses(std::map> &root_buses) { - return true; + bool mapped = true; + for (uint32_t domain = 0; mapped; domain++) { + mapped = false; + for (uint16_t b = 0; b < 256; b++) { + for (uint8_t d = 0; d < 32; d++) { + for (uint8_t f = 0; f < 8; f++) { + struct pci pci_dev(domain, b, d, f); + if (!probe_pci(&pci_dev)) { + break; + } + if (!((pci_dev.vendor_id == PCM_INTEL_PCI_VENDOR_ID) && (pci_dev.device_id == SPR_MSM_DEV_ID))) { + continue; + } + + std::uint32_t cpuBusValid; + std::vector cpuBusNo; + int package_id; + + if (get_cpu_bus(domain, b, d, f, cpuBusValid, cpuBusNo, package_id) == false) { + return false; + } + + for (int cpuBusId = 0; cpuBusId < SPR_MSM_CPUBUSNO_MAX; ++cpuBusId) { + if (!((cpuBusValid >> cpuBusId) & 0x1)) { + cout << "CPU bus " << cpuBusId << " is disabled on package " << package_id << endl; + continue; + } + int rootBus = (cpuBusNo[(int)(cpuBusId / 4)] >> ((cpuBusId % 4) * 8)) & 0xff; + root_buses[package_id][cpuBusId] = bdf(domain, rootBus, 0, 0); + cout << "Mapped CPU bus #" << cpuBusId << " (domain " << domain << " bus " << std::hex << rootBus << std::dec << ")" + << " package " << package_id << endl; + mapped = true; + } + } + } + } + } + return !root_buses.empty(); } bool BirchStreamPlatform::pciTreeDiscover(std::vector& iios) @@ -1462,6 +1715,7 @@ ccr* get_ccr(PCM* m, uint64_t& ccr) case PCM::SNOWRIDGE: case PCM::SPR: case PCM::EMR: + case PCM::SRF: return new icx_ccr(ccr); default: cerr << m->getCPUFamilyModelString() << " is not supported! Program aborted" << endl; From 56bf77d0e27fd2278598663ab0aa0b55f73260b9 Mon Sep 17 00:00:00 2001 From: Alexander Antonov Date: Fri, 17 May 2024 07:35:04 -0700 Subject: [PATCH 21/30] Cosmetic changes in pcm-iio.cpp --- src/pcm-iio.cpp | 16 +++++++++------- 1 file changed, 9 insertions(+), 7 deletions(-) diff --git a/src/pcm-iio.cpp b/src/pcm-iio.cpp index 968429ad..66ca983c 100644 --- a/src/pcm-iio.cpp +++ b/src/pcm-iio.cpp @@ -5,13 +5,15 @@ // Aaron Cruz // and others #include "cpucounters.h" + #ifdef _MSC_VER -#pragma warning(disable : 4996) // for sprintf -#include -#include "windows/windriver.h" + #pragma warning(disable : 4996) // for sprintf + #include + #include "windows/windriver.h" #else -#include + #include #endif + #include #include #include @@ -20,8 +22,10 @@ #include #include #include +#include + #ifdef _MSC_VER -#include "freegetopt/getopt.h" + #include "freegetopt/getopt.h" #endif #include "lspci.h" @@ -465,7 +469,6 @@ typedef struct vector combine_stack_name_and_counter_names(string stack_name, const map>> &nameMap) { - vector v; vector tmp(nameMap.size()); v.push_back(stack_name); @@ -521,7 +524,6 @@ string build_pci_header(const PCIDB & pciDB, uint32_t column_width, const struct s.insert(0, std::string(4*level, ' ')); } - return s; } From cbeba329fc84d31438bc0f2564b8e4d691860dc7 Mon Sep 17 00:00:00 2001 From: Alexander Antonov Date: Sun, 19 May 2024 07:19:22 -0700 Subject: [PATCH 22/30] Fix incorrect PMON IDs for HCx stacks --- src/pcm-iio.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/pcm-iio.cpp b/src/pcm-iio.cpp index 66ca983c..0ab606b5 100644 --- a/src/pcm-iio.cpp +++ b/src/pcm-iio.cpp @@ -378,8 +378,8 @@ const std::map> es_sad_to_pmu_id_mapping = { #define SRF_PE8_PMON_ID 8 #define SRF_HC0_PMON_ID 1 #define SRF_HC1_PMON_ID 6 -#define SRF_HC2_PMON_ID 14 -#define SRF_HC3_PMON_ID 9 +#define SRF_HC2_PMON_ID 9 +#define SRF_HC3_PMON_ID 14 #define SRF_PE0_SAD_BUS_ID 2 #define SRF_PE1_SAD_BUS_ID 3 From 4e6265370a7535f87a390d1f0204bd31ff55d157 Mon Sep 17 00:00:00 2001 From: Roman Dementiev Date: Sun, 19 May 2024 15:56:58 +0200 Subject: [PATCH 23/30] implement Intel PMT TelemetryArray on Linux --- src/CMakeLists.txt | 2 +- src/cpucounters.h | 1 + src/pmt.cpp | 158 +++++++++++++++++++++++++++++++++++++++++++++ src/pmt.h | 37 +++++++++++ src/tpmi.cpp | 16 ----- src/utils.cpp | 21 ++++++ src/utils.h | 3 + 7 files changed, 221 insertions(+), 17 deletions(-) create mode 100644 src/pmt.cpp create mode 100644 src/pmt.h diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index fbce37c4..0af9ad1c 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -5,7 +5,7 @@ # All pcm-* executables set(PROJECT_NAMES pcm pcm-numa pcm-latency pcm-power pcm-msr pcm-memory pcm-tsx pcm-pcie pcm-core pcm-iio pcm-lspci pcm-pcicfg pcm-mmio pcm-tpmi pcm-raw pcm-accel) -file(GLOB COMMON_SOURCES pcm-accel-common.cpp msr.cpp cpucounters.cpp pci.cpp mmio.cpp tpmi.cpp bw.cpp utils.cpp topology.cpp debug.cpp threadpool.cpp uncore_pmu_discovery.cpp) +file(GLOB COMMON_SOURCES pcm-accel-common.cpp msr.cpp cpucounters.cpp pci.cpp mmio.cpp tpmi.cpp pmt.cpp bw.cpp utils.cpp topology.cpp debug.cpp threadpool.cpp uncore_pmu_discovery.cpp) if (APPLE) file(GLOB UNUX_SOURCES dashboard.cpp) diff --git a/src/cpucounters.h b/src/cpucounters.h index d2578d60..ac61f0e8 100644 --- a/src/cpucounters.h +++ b/src/cpucounters.h @@ -28,6 +28,7 @@ #include "msr.h" #include "pci.h" #include "tpmi.h" +#include "pmt.h" #include "bw.h" #include "width_extender.h" #include "exceptions/unsupported_processor_exception.hpp" diff --git a/src/pmt.cpp b/src/pmt.cpp new file mode 100644 index 00000000..23773ad1 --- /dev/null +++ b/src/pmt.cpp @@ -0,0 +1,158 @@ +// SPDX-License-Identifier: BSD-3-Clause +// Copyright (c) 2024, Intel Corporation + +#include "pmt.h" +#include "utils.h" +#include +#include +#include +#include + +namespace pcm { + +#ifdef __linux__ +class TelemetryArrayLinux : public TelemetryArrayInterface +{ + TelemetryArrayLinux() = delete; + typedef std::vector FileVector; + typedef std::unordered_map FileMap; + static std::shared_ptr TelemetryFiles; + static FileMap & getTelemetryFiles() + { + if (!TelemetryFiles.get()) + { + std::shared_ptr TelemetryFilesTemp = std::make_shared(); + auto paths = findPathsFromPattern("/sys/class/intel_pmt/telem*"); + for (auto & path : paths) + { + const auto guid = read_number(readSysFS((path + "/guid").c_str()).c_str()); + #if 0 + auto size = read_number(readSysFS((path + "/size").c_str()).c_str()); + std::cout << "path: " << path << " guid: 0x" << std::hex << guid << " size: "<< std::dec << size << std::endl; + #endif + auto file = std::fopen((path + "/telem").c_str(), "rb"); + if (!file) + { + std::cerr << "Error: failed to open " << path << "/telem" << std::endl; + continue; + } + TelemetryFilesTemp->operator[](guid).push_back(file); + } + + // print the telemetry files + for (auto & guid : *TelemetryFilesTemp) + { + auto & files = guid.second; + for (auto & file : files) + { + if (!file) + { + std::cerr << "Error: file is null" << std::endl; + continue; + } + // std::cout << "guid: 0x" << std::hex << guid.first << " file: " << file << std::endl; + } + } + + TelemetryFiles = TelemetryFilesTemp; + } + return *TelemetryFiles; + } + std::vector data; + size_t uid, instance; +public: + TelemetryArrayLinux(const size_t uid_, const size_t instance_): uid(uid_), instance(instance_) + { + assert(instance < numInstances(uid)); + load(); + } + static size_t numInstances(const size_t uid) + { + return getTelemetryFiles().at(uid).size(); + } + virtual ~TelemetryArrayLinux() override + { + } + size_t size() override + { + return data.size(); + } + void load() override + { + FILE * file = getTelemetryFiles().at(uid).at(instance); + assert(file); + // get the file size + fseek(file, 0, SEEK_END); + size_t fileSize = ftell(file); + fseek(file, 0, SEEK_SET); + data.resize(fileSize); + const size_t bytesRead = fread(data.data(), 1, fileSize, file); + if (bytesRead != fileSize) + { + std::cerr << "Error: failed to read " << fileSize << " bytes from telemetry file" << std::endl; + } + } + uint64 get(size_t qWordOffset, size_t lsb, size_t msb) override + { + assert(qWordOffset * sizeof(uint64) + sizeof(uint64) <= data.size()); + return extract_bits(*reinterpret_cast(&data[qWordOffset * sizeof(uint64)]), lsb, msb); + } +}; + +std::shared_ptr TelemetryArrayLinux::TelemetryFiles; + +#else + +class TelemetryArrayDummy : public TelemetryArrayInterface +{ + TelemetryArrayDummy() = delete; +public: + TelemetryArrayDummy(const size_t /* uid */, const size_t /* instance */) {}; + static size_t numInstances(const size_t /* uid */) { return 0; }; + virtual ~TelemetryArrayDummy() override {}; + size_t size() override { return 0;}; // in bytes + void load() override {}; + uint64 get(size_t , size_t , size_t ) override { return 0;} ; +}; + +#endif + +TelemetryArray::TelemetryArray(const size_t uid, const size_t instance) +{ +#ifdef __linux__ + impl = std::make_shared(uid, instance); +#else + impl = std::make_shared(uid, instance); +#endif +} + +size_t TelemetryArray::numInstances(const size_t uid) +{ +#ifdef __linux__ + return TelemetryArrayLinux::numInstances(uid); +#else + return TelemetryArrayDummy::numInstances(uid); +#endif +} + +TelemetryArray::~TelemetryArray() {} + +size_t TelemetryArray::size() +{ + assert(impl.get()); + return impl->size(); +} + +void TelemetryArray::load() +{ + assert(impl.get()); + impl->load(); +} + +uint64 TelemetryArray::get(size_t qWordOffset, size_t lsb, size_t msb) +{ + assert(impl.get()); + return impl->get(qWordOffset, lsb, msb); +} + +}; // namespace pcm \ No newline at end of file diff --git a/src/pmt.h b/src/pmt.h new file mode 100644 index 00000000..d3ccc8ab --- /dev/null +++ b/src/pmt.h @@ -0,0 +1,37 @@ +// SPDX-License-Identifier: BSD-3-Clause +// Copyright (c) 2024, Intel Corporation + +#pragma once + +#include "types.h" +#include + +namespace pcm { + +class TelemetryArrayInterface +{ +public: + virtual size_t size() = 0; // in bytes + virtual size_t numQWords() + { + return size() / sizeof(uint64); + } + virtual void load() = 0; + virtual uint64 get(size_t qWordOffset, size_t lsb, size_t msb) = 0; + virtual ~TelemetryArrayInterface() {}; +}; + +class TelemetryArray : public TelemetryArrayInterface +{ + TelemetryArray() = delete; + std::shared_ptr impl; +public: + TelemetryArray(const size_t /* uid */, const size_t /* instance */); + static size_t numInstances(const size_t /* uid */); + virtual ~TelemetryArray() override; + size_t size() override; // in bytes + void load() override; + uint64 get(size_t qWordOffset, size_t lsb, size_t msb) override; +}; + +} // namespace pcm \ No newline at end of file diff --git a/src/tpmi.cpp b/src/tpmi.cpp index 5b2cfee9..9056b9e0 100644 --- a/src/tpmi.cpp +++ b/src/tpmi.cpp @@ -10,7 +10,6 @@ #include #include #ifdef __linux__ -#include #include #endif @@ -331,21 +330,6 @@ bool TPMIHandleDriver::isAvailable() { if (available < 0) // not initialized yet { - auto findPathsFromPattern = [](const char* pattern) - { - std::vector result; - glob_t glob_result; - memset(&glob_result, 0, sizeof(glob_result)); - if (glob(pattern, GLOB_TILDE, nullptr, &glob_result) == 0) - { - for (size_t i = 0; i < glob_result.gl_pathc; ++i) - { - result.push_back(glob_result.gl_pathv[i]); - } - } - globfree(&glob_result); - return result; - }; instancePaths = findPathsFromPattern("/sys/kernel/debug/tpmi-*"); std::sort(instancePaths.begin(), instancePaths.end()); for (size_t i = 0; i < instancePaths.size(); ++i) diff --git a/src/utils.cpp b/src/utils.cpp index ac546406..8dbcc497 100644 --- a/src/utils.cpp +++ b/src/utils.cpp @@ -23,6 +23,9 @@ #ifndef _MSC_VER #include #endif +#ifdef __linux__ +#include +#endif namespace pcm { @@ -101,6 +104,24 @@ void print_cpu_details() std::cerr << "\n"; } +#ifdef __linux__ +std::vector findPathsFromPattern(const char* pattern) +{ + std::vector result; + glob_t glob_result; + memset(&glob_result, 0, sizeof(glob_result)); + if (glob(pattern, GLOB_TILDE, nullptr, &glob_result) == 0) + { + for (size_t i = 0; i < glob_result.gl_pathc; ++i) + { + result.push_back(glob_result.gl_pathv[i]); + } + } + globfree(&glob_result); + return result; +}; +#endif + #ifdef _MSC_VER ThreadGroupTempAffinity::ThreadGroupTempAffinity(uint32 core_id, bool checkStatus, const bool restore_) diff --git a/src/utils.h b/src/utils.h index 1e7837e9..50c3ca9b 100644 --- a/src/utils.h +++ b/src/utils.h @@ -612,6 +612,9 @@ inline void extractBitsPrintHelper(const std::pair & bits, T & valu void restrictDriverAccessNative(LPCTSTR path); #endif +#ifdef __linux__ +std::vector findPathsFromPattern(const char* pattern); +#endif class TemporalThreadAffinity // speedup trick for Linux, FreeBSD, DragonFlyBSD, Windows { From 7402052e5eb07f7621bd54cb305fc7213c17ca93 Mon Sep 17 00:00:00 2001 From: Roman Dementiev Date: Sun, 19 May 2024 18:50:13 +0200 Subject: [PATCH 24/30] pcm-raw: add pmt raw events --- src/cpucounters.cpp | 63 +++++++++++++++++++++++++++++++++++++++++++++ src/cpucounters.h | 47 ++++++++++++++++++++++++++++++++- src/pcm-raw.cpp | 20 ++++++++++++++ src/pmt.cpp | 11 +++++++- src/utils.cpp | 1 + src/utils.h | 3 +++ 6 files changed, 143 insertions(+), 2 deletions(-) diff --git a/src/cpucounters.cpp b/src/cpucounters.cpp index 608f8038..58aba98d 100644 --- a/src/cpucounters.cpp +++ b/src/cpucounters.cpp @@ -5713,6 +5713,7 @@ PCM::ErrorCode PCM::program(const RawPMUConfigs& curPMUConfigs_, const bool sile packageMSRConfig = RawPMUConfig{}; pcicfgConfig = RawPMUConfig{}; mmioConfig = RawPMUConfig{}; + pmtConfig = RawPMUConfig{}; RawPMUConfigs curPMUConfigs = curPMUConfigs_; constexpr auto globalRegPos = 0ULL; PCM::ExtendedCustomCoreEventDescription conf; @@ -5982,6 +5983,29 @@ PCM::ErrorCode PCM::program(const RawPMUConfigs& curPMUConfigs_, const bool sile addLocations(mmioConfig.programmable); addLocations(mmioConfig.fixed); } + else if (type == "pmt") + { + pmtConfig = pmuConfig.second; + auto addLocations = [this](const std::vector& configs) { + for (const auto& c : configs) + { + if (PMTRegisterLocations.find(c.first) == PMTRegisterLocations.end()) + { + // add locations + std::vector locations; + const auto UID = c.first[PMTEventPosition::UID]; + for (size_t inst = 0; inst < TelemetryArray::numInstances(UID); ++inst) + { + locations.push_back(std::make_shared(UID, inst)); + // std::cout << "PMTRegisterLocations: UID: 0x" << std::hex << UID << " inst: " << std::dec << inst << std::endl; + } + PMTRegisterLocations[c.first] = locations; + } + } + }; + addLocations(pmtConfig.programmable); + addLocations(pmtConfig.fixed); + } else if (type == "cxlcm") { programCXLCM(events64); @@ -6466,6 +6490,44 @@ void PCM::readMMIORegisters(SystemCounterState& systemState) } } +void PCM::readPMTRegisters(SystemCounterState& systemState) +{ + for (auto & p: PMTRegisterLocations) + { + for (auto & t: p.second) + { + if (t.get()) + { + t->load(); + } + } + } + auto read = [this, &systemState](const RawEventConfig& cfg) { + const RawEventEncoding& reEnc = cfg.first; + systemState.PMTValues[reEnc].clear(); + const auto lsb = reEnc[PMTEventPosition::lsb]; + const auto msb = reEnc[PMTEventPosition::msb]; + const auto offset = reEnc[PMTEventPosition::offset]; + // std::cout << "PMTValues: " << std::hex << reEnc[PMTEventPosition::UID] << std::dec << std::endl; + for (auto& reg : PMTRegisterLocations[reEnc]) + { + if (reg.get()) + { + systemState.PMTValues[reEnc].push_back(reg->get(offset, lsb, msb)); + // std::cout << "PMTValues: " << std::hex << reEnc[PMTEventPosition::UID] << " " << std::dec << reg->get(offset, lsb, msb) << std::endl; + } + } + }; + for (const auto& cfg : pmtConfig.programmable) + { + read(cfg); + } + for (const auto& cfg : pmtConfig.fixed) + { + read(cfg); + } +} + void PCM::readQPICounters(SystemCounterState & result) { // read QPI counters @@ -6671,6 +6733,7 @@ void PCM::getAllCounterStates(SystemCounterState & systemState, std::vector{}(e[PMTEventPosition::UID]); + } + }; + struct PMTRegisterEncodingHash2 + { + std::size_t operator()(const RawEventEncoding & e) const + { + std::size_t h1 = std::hash{}(e[PMTEventPosition::UID]); + std::size_t h2 = std::hash{}(e[PMTEventPosition::offset]); + std::size_t h3 = std::hash{}(e[PMTEventPosition::lsb]); + return h1 ^ (h2 << 1ULL) ^ (h3 << 2ULL); + } + }; + struct PMTRegisterEncodingCmp + { + bool operator ()(const RawEventEncoding& a, const RawEventEncoding& b) const + { + return a[PMTEventPosition::UID] == b[PMTEventPosition::UID]; + } + }; + typedef std::shared_ptr PMTRegisterEncoding; // TelemetryArray shared ptr private: std::unordered_map, PCICFGRegisterEncodingHash, PCICFGRegisterEncodingCmp> PCICFGRegisterLocations{}; std::unordered_map, MMIORegisterEncodingHash, MMIORegisterEncodingCmp> MMIORegisterLocations{}; + std::unordered_map, PMTRegisterEncodingHash, PMTRegisterEncodingCmp> PMTRegisterLocations{}; public: TopologyEntry::CoreType getCoreType(const unsigned coreID) const @@ -1861,7 +1899,7 @@ class PCM_API PCM } return false; } - RawPMUConfig threadMSRConfig{}, packageMSRConfig{}, pcicfgConfig{}, mmioConfig{}; + RawPMUConfig threadMSRConfig{}, packageMSRConfig{}, pcicfgConfig{}, mmioConfig{}, pmtConfig{}; public: //! \brief Reads CPU family @@ -3743,6 +3781,7 @@ class SystemCounterState : public SocketCounterState friend class PCM; friend std::vector getPCICFGEvent(const PCM::RawEventEncoding& eventEnc, const SystemCounterState& before, const SystemCounterState& after); friend std::vector getMMIOEvent(const PCM::RawEventEncoding& eventEnc, const SystemCounterState& before, const SystemCounterState& after); + friend std::vector getPMTEvent(const PCM::RawEventEncoding& eventEnc, const SystemCounterState& before, const SystemCounterState& after); std::vector > incomingQPIPackets; // each 64 byte std::vector > outgoingQPIFlits; // idle or data/non-data flits depending on the architecture @@ -3750,6 +3789,7 @@ class SystemCounterState : public SocketCounterState uint64 uncoreTSC; std::unordered_map , PCM::PCICFGRegisterEncodingHash, PCM::PCICFGRegisterEncodingCmp> PCICFGValues{}; std::unordered_map, PCM::MMIORegisterEncodingHash, PCM::MMIORegisterEncodingCmp> MMIOValues{}; + std::unordered_map, PCM::PMTRegisterEncodingHash2> PMTValues{}; protected: void readAndAggregate(std::shared_ptr handle) @@ -5100,6 +5140,11 @@ inline std::vector getMMIOEvent(const PCM::RawEventEncoding& eventEnc, c return getRegisterEvent(eventEnc, before.MMIOValues, after.MMIOValues); } +inline std::vector getPMTEvent(const PCM::RawEventEncoding& eventEnc, const SystemCounterState& before, const SystemCounterState& after) +{ + return getRegisterEvent(eventEnc, before.PMTValues, after.PMTValues); +} + template uint64 getMSREvent(const uint64& index, const PCM::MSRType& type, const CounterStateType& before, const CounterStateType& after) { diff --git a/src/pcm-raw.cpp b/src/pcm-raw.cpp index b25461d3..04623e24 100644 --- a/src/pcm-raw.cpp +++ b/src/pcm-raw.cpp @@ -1250,6 +1250,18 @@ std::string getMMIOEventString(const PCM::RawEventEncoding& eventEnc, const std: return c.str(); } +std::string getPMTEventString(const PCM::RawEventEncoding& eventEnc, const std::string& type) +{ + std::stringstream c; + c << type << ":0x" << std::hex << + eventEnc[PCM::PMTEventPosition::UID] << + ":0x" << eventEnc[PCM::PMTEventPosition::offset] << + ":0x" << eventEnc[PCM::PMTEventPosition::lsb] << + ":0x" << eventEnc[PCM::PMTEventPosition::msb] << + ":" << getTypeString(eventEnc[PCM::PMTEventPosition::type]); + return c.str(); +} + typedef std::string(*getEventStringFunc)(const PCM::RawEventEncoding& eventEnc, const std::string& type); typedef std::vector(getEventFunc)(const PCM::RawEventEncoding& eventEnc, const SystemCounterState& before, const SystemCounterState& after); @@ -1639,6 +1651,10 @@ void printTransposed(const PCM::RawPMUConfigs& curPMUConfigs, { printRegisterRows(getMMIOEventString, getMMIOEvent); } + else if (type == "pmt") + { + printRegisterRows(getPMTEventString, getPMTEvent); + } else if (type == "m3upi") { choose(outputType, @@ -2055,6 +2071,10 @@ void print(const PCM::RawPMUConfigs& curPMUConfigs, { printRegisters(getMMIOEventString, getMMIOEvent); } + else if (type == "pmt") + { + printRegisters(getPMTEventString, getPMTEvent); + } else if (type == "ubox") { for (uint32 s = 0; s < m->getNumSockets(); ++s) diff --git a/src/pmt.cpp b/src/pmt.cpp index 23773ad1..644974c4 100644 --- a/src/pmt.cpp +++ b/src/pmt.cpp @@ -8,6 +8,10 @@ #include #include +#ifdef __linux__ +#include +#endif + namespace pcm { #ifdef __linux__ @@ -68,7 +72,12 @@ class TelemetryArrayLinux : public TelemetryArrayInterface } static size_t numInstances(const size_t uid) { - return getTelemetryFiles().at(uid).size(); + auto t = getTelemetryFiles(); + if (t.find(uid) == t.end()) + { + return 0; + } + return t.at(uid).size(); } virtual ~TelemetryArrayLinux() override { diff --git a/src/utils.cpp b/src/utils.cpp index 8dbcc497..42dc1cd6 100644 --- a/src/utils.cpp +++ b/src/utils.cpp @@ -946,6 +946,7 @@ bool isRegisterEvent(const std::string & pmu) { if (pmu == "mmio" || pmu == "pcicfg" + || pmu == "pmt" || pmu == "package_msr" || pmu == "thread_msr") { diff --git a/src/utils.h b/src/utils.h index 50c3ca9b..0ff2327c 100644 --- a/src/utils.h +++ b/src/utils.h @@ -36,6 +36,9 @@ #include #include +#ifdef __linux__ +#include +#endif namespace pcm { std::string safe_getenv(const char* env); From 505550aaf4f8277b7f444cf7cfefdcaa3425df68 Mon Sep 17 00:00:00 2001 From: Roman Dementiev Date: Sun, 19 May 2024 20:46:26 +0200 Subject: [PATCH 25/30] pcm-raw: improve format print for registers --- src/pcm-raw.cpp | 28 +++++++++++++++------------- 1 file changed, 15 insertions(+), 13 deletions(-) diff --git a/src/pcm-raw.cpp b/src/pcm-raw.cpp index 04623e24..d83f7672 100644 --- a/src/pcm-raw.cpp +++ b/src/pcm-raw.cpp @@ -1225,14 +1225,16 @@ const char * getTypeString(uint64 typeID) std::string getMSREventString(const uint64 & index, const std::string & type, const PCM::MSRType & msrType) { std::stringstream c; - c << type << ":0x" << std::hex << index << ":" << getTypeString(msrType); + c << type << "/MSR 0x" << std::hex << index << "/" << getTypeString(msrType); return c.str(); } std::string getPCICFGEventString(const PCM::RawEventEncoding & eventEnc, const std::string& type) { std::stringstream c; - c << type << ":0x" << std::hex << eventEnc[PCM::PCICFGEventPosition::deviceID] << ":0x" << eventEnc[PCM::PCICFGEventPosition::offset] << ":0x" << eventEnc[PCM::PCICFGEventPosition::width] << ":" + c << type << "/deviceID 0x" << std::hex << eventEnc[PCM::PCICFGEventPosition::deviceID] + << "/offset 0x" << eventEnc[PCM::PCICFGEventPosition::offset] + << "/width 0x" << eventEnc[PCM::PCICFGEventPosition::width] << "/" << getTypeString(eventEnc[PCM::PCICFGEventPosition::type]); return c.str(); } @@ -1240,25 +1242,25 @@ std::string getPCICFGEventString(const PCM::RawEventEncoding & eventEnc, const s std::string getMMIOEventString(const PCM::RawEventEncoding& eventEnc, const std::string& type) { std::stringstream c; - c << type << ":0x" << std::hex << + c << type << "/deviceID 0x" << std::hex << eventEnc[PCM::MMIOEventPosition::deviceID] << - ":0x" << eventEnc[PCM::MMIOEventPosition::offset] << - ":0x" << eventEnc[PCM::MMIOEventPosition::membar_bits1] << - ":0x" << eventEnc[PCM::MMIOEventPosition::membar_bits2] << - ":0x" << eventEnc[PCM::MMIOEventPosition::width] << - ":" << getTypeString(eventEnc[PCM::MMIOEventPosition::type]); + "/offset 0x" << eventEnc[PCM::MMIOEventPosition::offset] << + "/membar_bits1 0x" << eventEnc[PCM::MMIOEventPosition::membar_bits1] << + "/membar_bits2 0x" << eventEnc[PCM::MMIOEventPosition::membar_bits2] << + "/width 0x" << eventEnc[PCM::MMIOEventPosition::width] << + "/" << getTypeString(eventEnc[PCM::MMIOEventPosition::type]); return c.str(); } std::string getPMTEventString(const PCM::RawEventEncoding& eventEnc, const std::string& type) { std::stringstream c; - c << type << ":0x" << std::hex << + c << type << "/UID 0x" << std::hex << eventEnc[PCM::PMTEventPosition::UID] << - ":0x" << eventEnc[PCM::PMTEventPosition::offset] << - ":0x" << eventEnc[PCM::PMTEventPosition::lsb] << - ":0x" << eventEnc[PCM::PMTEventPosition::msb] << - ":" << getTypeString(eventEnc[PCM::PMTEventPosition::type]); + "/offset 0x" << eventEnc[PCM::PMTEventPosition::offset] << + "/lsb 0x" << eventEnc[PCM::PMTEventPosition::lsb] << + "/msb 0x" << eventEnc[PCM::PMTEventPosition::msb] << + "/" << getTypeString(eventEnc[PCM::PMTEventPosition::type]); return c.str(); } From 0b838b07cec20c0010587b5b39ab1b99f2226da4 Mon Sep 17 00:00:00 2001 From: Roman Dementiev Date: Mon, 20 May 2024 11:34:13 +0200 Subject: [PATCH 26/30] make the code more robust --- src/pmt.cpp | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/src/pmt.cpp b/src/pmt.cpp index 644974c4..9d21a32c 100644 --- a/src/pmt.cpp +++ b/src/pmt.cpp @@ -68,7 +68,7 @@ class TelemetryArrayLinux : public TelemetryArrayInterface TelemetryArrayLinux(const size_t uid_, const size_t instance_): uid(uid_), instance(instance_) { assert(instance < numInstances(uid)); - load(); + TelemetryArrayLinux::load(); } static size_t numInstances(const size_t uid) { @@ -92,7 +92,13 @@ class TelemetryArrayLinux : public TelemetryArrayInterface assert(file); // get the file size fseek(file, 0, SEEK_END); - size_t fileSize = ftell(file); + const auto pos = ftell(file); + if (pos < 0) + { + std::cerr << "Error: failed to get file size" << std::endl; + return; + } + const size_t fileSize = pos; fseek(file, 0, SEEK_SET); data.resize(fileSize); const size_t bytesRead = fread(data.data(), 1, fileSize, file); From 4c610905afe869b3ae5c4d57c508e6bae15d5435 Mon Sep 17 00:00:00 2001 From: "Dementiev, Roman" Date: Tue, 21 May 2024 13:54:40 +0200 Subject: [PATCH 27/30] pcm-raw: document raw pmt access Change-Id: I215ee0f5e06ead3164a0bb4ea06b0ba6bd745599 --- doc/PCM_RAW_README.md | 31 +++++++++++++++++++++++++++++++ 1 file changed, 31 insertions(+) diff --git a/doc/PCM_RAW_README.md b/doc/PCM_RAW_README.md index d01facd2..0637ac1a 100644 --- a/doc/PCM_RAW_README.md +++ b/doc/PCM_RAW_README.md @@ -193,3 +193,34 @@ Sample csv output (date,time,event_name,milliseconds_between_samples,TSC_cycles_ 2021-09-27,00:07:40.507,UNC_UPI_L1_POWER_CYCLES,1000,2102078418,0,0,1200328715,0,0,1200283803 ``` The unit can be logical core, memory channel, CHA, etc, depending on the event type. + +-------------------------------------------------------------------------------- +Low-level access to Intel PMT telemetry data +-------------------------------------------------------------------------------- + +pcm-raw can read raw telemetry data from Intel PMT (https://github.com/intel/Intel-PMT/). + +Syntax for a PMT raw telemetry counter: + +``` +pcm-raw -e pmt/config=,config1=,config2=,config3=[,name=] + +``` + +The fields are the values for the counter from the Intel PMT aggregator XML: + +* uniqueid : Intel PMT Telemetry unique identifier +* sampleID : sample ID of the counter +* sampleType counter encoding: + - 0 : Snapshot (last value reported in csv) + - non-zero : Counter (delta to last value reported in csv) +* lsb : lsb field +* msb : msb field + +Example: +``` +# for https://github.com/intel/Intel-PMT/blob/868049006ad2770a75e5fc7526fd0c4b22438e27/xml/SPR/OOBMSM/CORE/spr_aggregator.xml#L15428 +pmt/config=0x87b6fef1,config1=770,config2=0,config3=32,config4=63,name="Temperature_histogram_range_5_(50.5-57.5C)_counter_for_core_0" +``` + +Current limitations: this feature (PMT access) is currently only available on Linux (with Intel PMT Linux driver). From fb8f0846703e78e115059b8a23b4593097506f4f Mon Sep 17 00:00:00 2001 From: "Dementiev, Roman" Date: Tue, 21 May 2024 13:56:35 +0200 Subject: [PATCH 28/30] minor doc fixes Change-Id: I990cb2b459c23013d2c6031d23c3bdc27d07d761 --- doc/PCM_RAW_README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/PCM_RAW_README.md b/doc/PCM_RAW_README.md index 0637ac1a..dfaca25d 100644 --- a/doc/PCM_RAW_README.md +++ b/doc/PCM_RAW_README.md @@ -203,7 +203,7 @@ pcm-raw can read raw telemetry data from Intel PMT (https://github.com/intel/Int Syntax for a PMT raw telemetry counter: ``` -pcm-raw -e pmt/config=,config1=,config2=,config3=[,name=] +pmt/config=,config1=,config2=,config3=[,name=] ``` From d1d2d062319ddbd02876b4a245ab24e20e7a54e2 Mon Sep 17 00:00:00 2001 From: "Dementiev, Roman" Date: Tue, 21 May 2024 13:58:42 +0200 Subject: [PATCH 29/30] minor doc fixes Change-Id: I62cc169da7541b8b6c3aa71391ab2acb13f32a53 --- doc/PCM_RAW_README.md | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/doc/PCM_RAW_README.md b/doc/PCM_RAW_README.md index dfaca25d..162cd61c 100644 --- a/doc/PCM_RAW_README.md +++ b/doc/PCM_RAW_README.md @@ -217,9 +217,8 @@ The fields are the values for the counter from the Intel PMT aggregator XML: * lsb : lsb field * msb : msb field -Example: +Example for https://github.com/intel/Intel-PMT/blob/868049006ad2770a75e5fc7526fd0c4b22438e27/xml/SPR/OOBMSM/CORE/spr_aggregator.xml#L15428: ``` -# for https://github.com/intel/Intel-PMT/blob/868049006ad2770a75e5fc7526fd0c4b22438e27/xml/SPR/OOBMSM/CORE/spr_aggregator.xml#L15428 pmt/config=0x87b6fef1,config1=770,config2=0,config3=32,config4=63,name="Temperature_histogram_range_5_(50.5-57.5C)_counter_for_core_0" ``` From 140c0feeade8c8406747f6b580c9e3819b10adb1 Mon Sep 17 00:00:00 2001 From: "Dementiev, Roman" Date: Wed, 22 May 2024 08:09:07 +0200 Subject: [PATCH 30/30] fix BSD and OSX builds Change-Id: I1b1442c65c4f9d1369289fc9474f62d948207344 --- src/types.h | 4 ++++ src/utils.h | 5 +---- 2 files changed, 5 insertions(+), 4 deletions(-) diff --git a/src/types.h b/src/types.h index 8e93ff12..09b24a06 100644 --- a/src/types.h +++ b/src/types.h @@ -303,7 +303,9 @@ constexpr auto IA32_PQR_ASSOC = 0xc8f; constexpr auto IA32_QM_EVTSEL = 0xc8d; constexpr auto IA32_QM_CTR = 0xc8e; +#ifndef KERNEL constexpr auto PCM_INVALID_QOS_MONITORING_DATA = (std::numeric_limits::max)(); +#endif /* \brief Event Select Register format @@ -1445,7 +1447,9 @@ struct ICX_IIOPMUCNTCTLRegister constexpr auto MSR_PACKAGE_THERM_STATUS = 0x01B1; constexpr auto MSR_IA32_THERM_STATUS = 0x019C; +#ifndef KERNEL constexpr auto PCM_INVALID_THERMAL_HEADROOM = (std::numeric_limits::min)(); +#endif constexpr auto MSR_IA32_BIOS_SIGN_ID = 0x8B; diff --git a/src/utils.h b/src/utils.h index 0ff2327c..c2b42543 100644 --- a/src/utils.h +++ b/src/utils.h @@ -27,6 +27,7 @@ #endif #ifndef _MSC_VER +#include #include #include #include @@ -36,10 +37,6 @@ #include #include -#ifdef __linux__ -#include -#endif - namespace pcm { std::string safe_getenv(const char* env); }