From ead7611c7f4770c571ba6dd2da21aa5682a2dfbf Mon Sep 17 00:00:00 2001 From: Roman Dementiev Date: Thu, 24 Oct 2024 10:40:21 +0200 Subject: [PATCH 01/14] disable static address sanitizer library by default --- CMakeLists.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index c7d99535..3a9b98f3 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -13,7 +13,7 @@ set(CMAKE_POSITION_INDEPENDENT_CODE ON) option(PCM_NO_ASAN "Disable address sanitizer" OFF) option(PCM_FUZZ "Enable fuzzing" OFF) option(PCM_BUILD_EXECUTABLES "Build PCM utilities" ON) -option(PCM_NO_STATIC_LIBASAN OFF) +option(PCM_NO_STATIC_LIBASAN "Disable static address sanitizer library" ON) if(MSVC) option(PCM_NO_STATIC_MSVC_RUNTIME_LIBRARY "Disable using static runtime under MSVC" OFF) From 49a6186ba6bb9c22e55c44a0e24469b7f0559d46 Mon Sep 17 00:00:00 2001 From: "Dementiev, Roman" Date: Mon, 4 Nov 2024 13:46:27 +0100 Subject: [PATCH 02/14] add Grand Ridge (GRR) support Change-Id: Ia1c185bd6d26bc13d3ead6b6c6dfa2d70e642843 Co-authored-by: Alexander Antonov Co-authored-by: Roman Dementiev --- .../GenuineIntel-6-B6-0.json | 145 +++++++++++++ src/cpucounters.cpp | 83 ++++++++ src/cpucounters.h | 16 ++ src/opCode-6-182.txt | 45 ++++ src/pcm-iio.cpp | 201 ++++++++++++++++++ src/pcm-memory.cpp | 4 + src/pcm-pcie.cpp | 2 + src/pcm-pcie.h | 130 +++++++++++ src/types.h | 22 +- 9 files changed, 647 insertions(+), 1 deletion(-) create mode 100644 src/PMURegisterDeclarations/GenuineIntel-6-B6-0.json create mode 100644 src/opCode-6-182.txt diff --git a/src/PMURegisterDeclarations/GenuineIntel-6-B6-0.json b/src/PMURegisterDeclarations/GenuineIntel-6-B6-0.json new file mode 100644 index 00000000..e3b4a8ed --- /dev/null +++ b/src/PMURegisterDeclarations/GenuineIntel-6-B6-0.json @@ -0,0 +1,145 @@ +{ + "core" : { + "programmable" : { + "EventCode": {"Config": 0, "Position": 0, "Width": 8}, + "UMask": {"Config": 0, "Position": 8, "Width": 8}, + "User": {"Config": 0, "Position": 16, "Width": 1, "DefaultValue": 1}, + "OS": {"Config": 0, "Position": 17, "Width": 1, "DefaultValue": 1}, + "EdgeDetect": {"Config": 0, "Position": 18, "Width": 1}, + "PinControl": {"Config": 0, "Position": 19, "Width": 1, "DefaultValue": 0}, + "APICInt": {"Config": 0, "Position": 20, "Width": 1, "DefaultValue": 0}, + "Enable": {"Config": 0, "Position": 22, "Width": 1, "DefaultValue": 1}, + "Invert": {"Config": 0, "Position": 23, "Width": 1}, + "CounterMask": {"Config": 0, "Position": 24, "Width": 8}, + "MSRIndex": { + "0x1a6" : {"Config": 1, "Position": 0, "Width": 64}, + "0x1a7" : {"Config": 2, "Position": 0, "Width": 64}, + "0x3f6" : {"Config": 3, "Position": 0, "Width": 64}, + "0x3f7" : {"Config": 4, "Position": 0, "Width": 64} + } + }, + "fixed0" : { + "OS": {"Config": 0, "Position": 0, "Width": 1, "DefaultValue": 1}, + "User": {"Config": 0, "Position": 1, "Width": 1, "DefaultValue": 1}, + "EnablePMI": {"Config": 0, "Position": 3, "Width": 1, "DefaultValue": 0}, + "EventCode": {"Config": 0, "Position": -1, "__comment": "position=-1 means field ignored"}, + "UMask": {"Config": 0, "Position": -1, "__comment": "position=-1 means field ignored"}, + "EdgeDetect": {"Config": 0, "Position": -1, "__comment": "position=-1 means field ignored"}, + "Invert": {"Config": 0, "Position": -1, "__comment": "position=-1 means field ignored"}, + "CounterMask": {"Config": 0, "Position": -1, "__comment": "position=-1 means field ignored"} + }, + "fixed1" : { + "OS": {"Config": 0, "Position": 4, "Width": 1, "DefaultValue": 1}, + "User": {"Config": 0, "Position": 5, "Width": 1, "DefaultValue": 1}, + "EnablePMI": {"Config": 0, "Position": 7, "Width": 1, "DefaultValue": 0}, + "EventCode": {"Config": 0, "Position": -1, "__comment": "position=-1 means field ignored"}, + "UMask": {"Config": 0, "Position": -1, "__comment": "position=-1 means field ignored"}, + "EdgeDetect": {"Config": 0, "Position": -1, "__comment": "position=-1 means field ignored"}, + "Invert": {"Config": 0, "Position": -1, "__comment": "position=-1 means field ignored"}, + "CounterMask": {"Config": 0, "Position": -1, "__comment": "position=-1 means field ignored"} + }, + "fixed2" : { + "OS": {"Config": 0, "Position": 8, "Width": 1, "DefaultValue": 1}, + "User": {"Config": 0, "Position": 9, "Width": 1, "DefaultValue": 1}, + "EnablePMI": {"Config": 0, "Position": 11, "Width": 1, "DefaultValue": 0}, + "EventCode": {"Config": 0, "Position": -1, "__comment": "position=-1 means field ignored"}, + "UMask": {"Config": 0, "Position": -1, "__comment": "position=-1 means field ignored"}, + "EdgeDetect": {"Config": 0, "Position": -1, "__comment": "position=-1 means field ignored"}, + "Invert": {"Config": 0, "Position": -1, "__comment": "position=-1 means field ignored"}, + "CounterMask": {"Config": 0, "Position": -1, "__comment": "position=-1 means field ignored"} + } + }, + "cha" : { + "programmable" : { + "EventCode": {"Config": 0, "Position": 0, "Width": 8}, + "UMask": {"Config": 0, "Position": 8, "Width": 8}, + "TIDEnable": {"Config": 0, "Position": 16, "Width": 1, "DefaultValue": 0}, + "EdgeDetect": {"Config": 0, "Position": 18, "Width": 1, "DefaultValue": 0}, + "Threshold": {"Config": 0, "Position": 24, "Width": 8, "DefaultValue": 0}, + "UMaskExt": {"Config": 0, "Position": 32, "Width": 26}, + "TID": {"Config": 1, "Position": 0, "Width": 10, "DefaultValue": 0} + } + }, + "imc" : { + "programmable" : { + "EventCode": {"Config": 0, "Position": 0, "Width": 8}, + "UMask": {"Config": 0, "Position": 8, "Width": 8}, + "EdgeDetect": {"Config": 0, "Position": 18, "Width": 1, "DefaultValue": 0}, + "Threshold": {"Config": 0, "Position": 24, "Width": 8, "DefaultValue": 0} + } + }, + "xpi" : { + "__comment" : "this is for UPI LL and QPI LL uncore PMUs", + "programmable" : { + "EventCode": {"Config": 0, "Position": 0, "Width": 8}, + "UMask": {"Config": 0, "Position": 8, "Width": 8}, + "EdgeDetect": {"Config": 0, "Position": 18, "Width": 1, "DefaultValue": 0}, + "Threshold": {"Config": 0, "Position": 24, "Width": 8, "DefaultValue": 0}, + "UMaskExt": {"Config": 0, "Position": 32, "Width": 24} + } + }, + "m2m" : { + "programmable" : { + "EventCode": {"Config": 0, "Position": 0, "Width": 8}, + "UMask": {"Config": 0, "Position": 8, "Width": 8}, + "EdgeDetect": {"Config": 0, "Position": 18, "Width": 1, "DefaultValue": 0}, + "Threshold": {"Config": 0, "Position": 24, "Width": 8, "DefaultValue": 0}, + "UMaskExt": {"Config": 0, "Position": 32, "Width": 8} + } + }, + "m3upi" : { + "programmable" : { + "EventCode": {"Config": 0, "Position": 0, "Width": 8}, + "UMask": {"Config": 0, "Position": 8, "Width": 8}, + "EdgeDetect": {"Config": 0, "Position": 18, "Width": 1, "DefaultValue": 0}, + "Threshold": {"Config": 0, "Position": 24, "Width": 8, "DefaultValue": 0} + } + }, + "mdf" : { + "programmable" : { + "EventCode": {"Config": 0, "Position": 0, "Width": 8}, + "UMask": {"Config": 0, "Position": 8, "Width": 8}, + "EdgeDetect": {"Config": 0, "Position": 18, "Width": 1, "DefaultValue": 0}, + "Threshold": {"Config": 0, "Position": 24, "Width": 8, "DefaultValue": 0} + } + }, + "irp" : { + "programmable" : { + "EventCode": {"Config": 0, "Position": 0, "Width": 8}, + "UMask": {"Config": 0, "Position": 8, "Width": 8}, + "EdgeDetect": {"Config": 0, "Position": 18, "Width": 1, "DefaultValue": 0}, + "Threshold": {"Config": 0, "Position": 24, "Width": 8, "DefaultValue": 0} + } + }, + "pcu" : { + "programmable" : { + "EventCode": {"Config": 0, "Position": 0, "Width": 8}, + "UMask": {"Config": 0, "Position": 8, "Width": 8}, + "EdgeDetect": {"Config": 0, "Position": 18, "Width": 1, "DefaultValue": 0} + } + }, + "pciex8" : { + "programmable" : { + "EventCode": {"Config": 0, "Position": 0, "Width": 8}, + "UMask": {"Config": 0, "Position": 8, "Width": 8}, + "EdgeDetect": {"Config": 0, "Position": 18, "Width": 1, "DefaultValue": 0} + } + }, + "pciex16" : { + "programmable" : { + "EventCode": {"Config": 0, "Position": 0, "Width": 8}, + "UMask": {"Config": 0, "Position": 8, "Width": 8}, + "EdgeDetect": {"Config": 0, "Position": 18, "Width": 1, "DefaultValue": 0} + } + }, + "iio" : { + "programmable" : { + "EventCode": {"Config": 0, "Position": 0, "Width": 8}, + "UMask": {"Config": 0, "Position": 8, "Width": 8}, + "EdgeDetect": {"Config": 0, "Position": 18, "Width": 1, "DefaultValue": 0}, + "Threshold": {"Config": 0, "Position": 24, "Width": 12, "DefaultValue": 0}, + "PortMask": {"Config": 0, "Position": 36, "Width": 12}, + "FCMask": {"Config": 0, "Position": 48, "Width": 3} + } + } +} diff --git a/src/cpucounters.cpp b/src/cpucounters.cpp index 8c8e0b2f..67b68e52 100644 --- a/src/cpucounters.cpp +++ b/src/cpucounters.cpp @@ -752,6 +752,7 @@ void PCM::initCStateSupportTables() case SPR: case EMR: case GNR: + case GRR: case SRF: PCM_CSTATE_ARRAY(pkgCStateMsr, PCM_PARAM_PROTECT({0, 0, 0x60D, 0, 0, 0, 0x3F9, 0, 0, 0, 0}) ); case HASWELL_ULT: @@ -809,6 +810,7 @@ void PCM::initCStateSupportTables() case SPR: case EMR: case GNR: + case GRR: case SRF: PCM_CSTATE_ARRAY(coreCStateMsr, PCM_PARAM_PROTECT({0, 0, 0, 0x3FC, 0, 0, 0x3FD, 0x3FE, 0, 0, 0}) ); case KNL: @@ -1666,6 +1668,7 @@ bool PCM::detectNominalFrequency() || cpu_family_model == EMR || cpu_family_model == GNR || cpu_family_model == SRF + || cpu_family_model == GRR ) ? (100000000ULL) : (133333333ULL); nominal_frequency = ((freq >> 8) & 255) * bus_freq; @@ -1963,6 +1966,7 @@ void PCM::initUncoreObjects() case SPR: case EMR: case GNR: + case GRR: case SRF: { bool failed = false; @@ -2170,6 +2174,28 @@ void PCM::initUncorePMUsDirect() ); } break; + case GRR: + uncorePMUs[s].resize(1); + { + std::vector > CounterControlRegs{ + std::make_shared(handle, GRR_UBOX_MSR_PMON_CTL0_ADDR), + std::make_shared(handle, GRR_UBOX_MSR_PMON_CTL1_ADDR) + }, + CounterValueRegs{ + std::make_shared(handle, GRR_UBOX_MSR_PMON_CTR0_ADDR), + std::make_shared(handle, GRR_UBOX_MSR_PMON_CTR1_ADDR) + }; + uncorePMUs[s][0][UBOX_PMU_ID].push_back( + std::make_shared( + std::make_shared(handle, GRR_UBOX_MSR_PMON_BOX_CTL_ADDR), + CounterControlRegs, + CounterValueRegs, + std::make_shared(handle, GRR_UCLK_FIXED_CTL_ADDR), + std::make_shared(handle, GRR_UCLK_FIXED_CTR_ADDR) + ) + ); + } + break; default: if (isServerCPU() && hasPCICFGUncore()) { @@ -2349,6 +2375,7 @@ void PCM::initUncorePMUsDirect() switch (cpu_family_model) { case GNR: + case GRR: case SRF: uncorePMUs[s].resize(1); if (safe_getenv("PCM_NO_PCIE_GEN5_DISCOVERY") == std::string("1")) @@ -2474,6 +2501,26 @@ void PCM::initUncorePMUsDirect() } } break; + case PCM::GRR: + for (uint32 s = 0; s < (uint32)num_sockets; ++s) + { + auto & handle = MSR[socketRefCore[s]]; + for (int unit = 0; unit < GRR_M2IOSF_NUM; ++unit) + { + iioPMUs[s][unit] = UncorePMU( + std::make_shared(handle, GRR_M2IOSF_IIO_UNIT_CTL + GRR_M2IOSF_REG_STEP * unit), + std::make_shared(handle, GRR_M2IOSF_IIO_CTL0 + GRR_M2IOSF_REG_STEP * unit + 0), + std::make_shared(handle, GRR_M2IOSF_IIO_CTL0 + GRR_M2IOSF_REG_STEP * unit + 1), + std::make_shared(handle, GRR_M2IOSF_IIO_CTL0 + GRR_M2IOSF_REG_STEP * unit + 2), + std::make_shared(handle, GRR_M2IOSF_IIO_CTL0 + GRR_M2IOSF_REG_STEP * unit + 3), + std::make_shared(handle, GRR_M2IOSF_IIO_CTR0 + GRR_M2IOSF_REG_STEP * unit + 0), + std::make_shared(handle, GRR_M2IOSF_IIO_CTR0 + GRR_M2IOSF_REG_STEP * unit + 1), + std::make_shared(handle, GRR_M2IOSF_IIO_CTR0 + GRR_M2IOSF_REG_STEP * unit + 2), + std::make_shared(handle, GRR_M2IOSF_IIO_CTR0 + GRR_M2IOSF_REG_STEP * unit + 3) + ); + } + } + break; } //init the IDX accelerator auto createIDXPMU = [](const size_t addr, const size_t mapSize, const size_t numaNode, const size_t socketId) -> IDX_PMU @@ -2670,6 +2717,12 @@ void PCM::initUncorePMUsDirect() IRP_CTR_REG_OFFSET = BHS_IRP_CTR_REG_OFFSET; IRP_UNIT_CTL = BHS_IRP_UNIT_CTL; break; + case GRR: + irpStacks = GRR_M2IOSF_NUM; + IRP_CTL_REG_OFFSET = GRR_IRP_CTL_REG_OFFSET; + IRP_CTR_REG_OFFSET = GRR_IRP_CTR_REG_OFFSET; + IRP_UNIT_CTL = GRR_IRP_UNIT_CTL; + break; } irpPMUs.resize(num_sockets); if (IRP_UNIT_CTL) @@ -3279,6 +3332,7 @@ bool PCM::isCPUModelSupported(const int model_) || model_ == SPR || model_ == EMR || model_ == GNR + || model_ == GRR || model_ == SRF ); } @@ -3573,6 +3627,7 @@ PCM::ErrorCode PCM::program(const PCM::ProgramMode mode_, const void * parameter L3CacheHitsAvailable = true; core_gen_counter_num_used = 4; break; + case GRR: case SRF: LLCArchEventInit(coreEventDesc); coreEventDesc[2].event_number = CMT_MEM_LOAD_RETIRED_L2_MISS_EVTNR; @@ -4918,6 +4973,8 @@ const char * PCM::getUArchCodename(const int32 cpu_family_model_param) const return "Emerald Rapids-SP"; case GNR: return "Granite Rapids-SP"; + case GRR: + return "Grand Ridge"; case SRF: return "Sierra Forest"; } @@ -7656,6 +7713,11 @@ void ServerUncorePMUs::initRegisterLocations(const PCM * pcm) PCM_PCICFG_M2M_INIT(3, SERVER) } break; + case PCM::GRR: + { + // placeholder to init GRR PCICFG + } + break; default: std::cerr << "Error: Uncore PMU for processor with id 0x" << std::hex << cpu_family_model << std::dec << " is not supported.\n"; throw std::exception(); @@ -8029,6 +8091,9 @@ void ServerUncorePMUs::initDirect(uint32 socket_, const PCM * pcm) switch (cpu_family_model) { + case PCM::GRR: + initBHSiMCPMUs(2); + break; case PCM::GNR: case PCM::SRF: initBHSiMCPMUs(12); @@ -8818,6 +8883,7 @@ void ServerUncorePMUs::programServerUncoreMemoryMetrics(const ServerUncoreMemory } break; case PCM::GNR: + case PCM::GRR: case PCM::SRF: if (metrics == PmemMemoryMode) { @@ -8913,6 +8979,7 @@ void ServerUncorePMUs::program() EDCCntConfig[EventPosition::WRITE] = MCCntConfig[EventPosition::WRITE] = MC_CH_PCI_PMON_CTL_EVENT(0x05) + MC_CH_PCI_PMON_CTL_UMASK(0xf0); // monitor writes on counter 1: CAS_COUNT.WR break; case PCM::GNR: + case PCM::GRR: case PCM::SRF: MCCntConfig[EventPosition::READ] = MC_CH_PCI_PMON_CTL_EVENT(0x05) + MC_CH_PCI_PMON_CTL_UMASK(0xcf); // monitor reads on counter 0: CAS_COUNT_SCH0.RD MCCntConfig[EventPosition::WRITE] = MC_CH_PCI_PMON_CTL_EVENT(0x05) + MC_CH_PCI_PMON_CTL_UMASK(0xf0); // monitor writes on counter 1: CAS_COUNT_SCH0.WR @@ -9045,6 +9112,7 @@ uint64 ServerUncorePMUs::getImcReadsForChannels(uint32 beginChannel, uint32 endC switch (cpu_family_model) { case PCM::GNR: + case PCM::GRR: case PCM::SRF: result += getMCCounter(i, EventPosition::READ2); break; @@ -9062,6 +9130,7 @@ uint64 ServerUncorePMUs::getImcWrites() switch (cpu_family_model) { case PCM::GNR: + case PCM::GRR: case PCM::SRF: result += getMCCounter(i, EventPosition::WRITE2); break; @@ -9784,6 +9853,7 @@ uint64 PCM::CX_MSR_PMON_CTRY(uint32 Cbo, uint32 Ctr) const case SPR: case EMR: case GNR: + case GRR: case SRF: return SPR_CHA0_MSR_PMON_CTR0 + SPR_CHA_MSR_STEP * Cbo + Ctr; @@ -9815,6 +9885,7 @@ uint64 PCM::CX_MSR_PMON_BOX_FILTER(uint32 Cbo) const case SPR: case EMR: case GNR: + case GRR: case SRF: return SPR_CHA0_MSR_PMON_BOX_FILTER + SPR_CHA_MSR_STEP * Cbo; @@ -9859,6 +9930,7 @@ uint64 PCM::CX_MSR_PMON_CTLY(uint32 Cbo, uint32 Ctl) const case SPR: case EMR: case GNR: + case GRR: case SRF: return SPR_CHA0_MSR_PMON_CTL0 + SPR_CHA_MSR_STEP * Cbo + Ctl; @@ -9889,6 +9961,7 @@ uint64 PCM::CX_MSR_PMON_BOX_CTL(uint32 Cbo) const case SPR: case EMR: case GNR: + case GRR: case SRF: return SPR_CHA0_MSR_PMON_BOX_CTRL + SPR_CHA_MSR_STEP * Cbo; @@ -9964,6 +10037,7 @@ uint32 PCM::getMaxNumOfCBoxesInternal() const uint64 val = 0; switch (cpu_family_model) { + case GRR: case GNR: case SRF: { @@ -10079,6 +10153,9 @@ void PCM::programIIOCounters(uint64 rawEvents[4], int IIOStack) int stacks_count; switch (getCPUFamilyModel()) { + case PCM::GRR: + stacks_count = GRR_M2IOSF_NUM; + break; case PCM::GNR: case PCM::SRF: stacks_count = BHS_M2IOSF_NUM; @@ -10175,6 +10252,7 @@ void PCM::programPCIeEventGroup(eventGroup_t &eventGroup) switch (cpu_family_model) { case PCM::GNR: + case PCM::GRR: case PCM::SRF: case PCM::SPR: case PCM::EMR: @@ -10228,6 +10306,7 @@ void PCM::programCbo(const uint64 * events, const uint32 opCode, const uint32 nc && EMR != cpu_family_model && GNR != cpu_family_model && SRF != cpu_family_model + && GRR != cpu_family_model ) { programCboOpcodeFilter(opCode, pmu, nc_, 0, loc, rem); @@ -10728,6 +10807,7 @@ void UncorePMU::freeze(const uint32 extra) case PCM::SPR: case PCM::EMR: case PCM::GNR: + case PCM::GRR: case PCM::SRF: *unitControl = SPR_UNC_PMON_UNIT_CTL_FRZ; break; @@ -10743,6 +10823,7 @@ void UncorePMU::unfreeze(const uint32 extra) case PCM::SPR: case PCM::EMR: case PCM::GNR: + case PCM::GRR: case PCM::SRF: *unitControl = 0; break; @@ -10763,6 +10844,7 @@ bool UncorePMU::initFreeze(const uint32 extra, const char* xPICheckMsg) case PCM::SPR: case PCM::EMR: case PCM::GNR: + case PCM::GRR: case PCM::SRF: *unitControl = SPR_UNC_PMON_UNIT_CTL_FRZ; // freeze *unitControl = SPR_UNC_PMON_UNIT_CTL_FRZ + SPR_UNC_PMON_UNIT_CTL_RST_CONTROL; // freeze and reset control registers @@ -10802,6 +10884,7 @@ void UncorePMU::resetUnfreeze(const uint32 extra) case PCM::SPR: case PCM::EMR: case PCM::GNR: + case PCM::GRR: case PCM::SRF: *unitControl = SPR_UNC_PMON_UNIT_CTL_FRZ + SPR_UNC_PMON_UNIT_CTL_RST_COUNTERS; // freeze and reset counter registers *unitControl = 0; // unfreeze diff --git a/src/cpucounters.h b/src/cpucounters.h index 3969cf53..a5d39422 100644 --- a/src/cpucounters.h +++ b/src/cpucounters.h @@ -1250,6 +1250,7 @@ class PCM_API PCM case SPR: case EMR: case GNR: + case GRR: case SRF: *ctrl = *curEvent; break; @@ -1902,6 +1903,7 @@ class PCM_API PCM GNR = PCM_CPU_FAMILY_MODEL(6, 173), SRF = PCM_CPU_FAMILY_MODEL(6, 175), GNR_D = PCM_CPU_FAMILY_MODEL(6, 174), + GRR = PCM_CPU_FAMILY_MODEL(6, 182), END_OF_MODEL_LIST = 0x0ffff }; @@ -2000,6 +2002,7 @@ class PCM_API PCM case SPR: case EMR: case GNR: + case GRR: case SRF: return (serverUncorePMUs.size() && serverUncorePMUs[0].get()) ? (serverUncorePMUs[0]->getNumQPIPorts()) : 0; } @@ -2027,6 +2030,7 @@ class PCM_API PCM case SPR: case EMR: case GNR: + case GRR: case SRF: case BDX: case KNL: @@ -2056,6 +2060,7 @@ class PCM_API PCM case SPR: case EMR: case GNR: + case GRR: case SRF: case BDX: case KNL: @@ -2088,6 +2093,7 @@ class PCM_API PCM case SPR: case EMR: case GNR: + case GRR: case SRF: case BDX: case KNL: @@ -2153,6 +2159,7 @@ class PCM_API PCM case SPR: case EMR: case GNR: + case GRR: case SRF: return 6; } @@ -2206,6 +2213,7 @@ class PCM_API PCM case SPR: case EMR: case GNR: + case GRR: case SRF: case KNL: return true; @@ -2473,6 +2481,7 @@ class PCM_API PCM || cpu_family_model == PCM::EMR || cpu_family_model == PCM::GNR || cpu_family_model == PCM::SRF + || cpu_family_model == PCM::GRR ); } @@ -2491,6 +2500,7 @@ class PCM_API PCM || cpu_family_model == PCM::EMR || cpu_family_model == PCM::GNR || cpu_family_model == PCM::SRF + || cpu_family_model == PCM::GRR ); } @@ -2597,6 +2607,7 @@ class PCM_API PCM || cpu_family_model == PCM::SNOWRIDGE || cpu_family_model == PCM::SPR || cpu_family_model == PCM::EMR + || cpu_family_model == PCM::GRR || cpu_family_model == PCM::SRF || cpu_family_model == PCM::GNR ); @@ -2702,6 +2713,7 @@ class PCM_API PCM || cpu_family_model == PCM::EMR || cpu_family_model == PCM::GNR || cpu_family_model == PCM::SRF + || cpu_family_model == PCM::GRR || cpu_family_model == PCM::BDX || cpu_family_model == PCM::KNL ); @@ -2748,6 +2760,7 @@ class PCM_API PCM || cpu_family_model == PCM::EMR || cpu_family_model == PCM::GNR || cpu_family_model == PCM::SRF + || cpu_family_model == PCM::GRR ); } @@ -3434,6 +3447,7 @@ double getDRAMConsumedJoules(const CounterStateType & before, const CounterState || PCM::ICX == cpu_family_model || PCM::GNR == cpu_family_model || PCM::SRF == cpu_family_model + || PCM::GRR == cpu_family_model || PCM::KNL == cpu_family_model ) { /* as described in sections 5.3.2 (DRAM_POWER_INFO) and 5.3.3 (DRAM_ENERGY_STATUS) of @@ -4279,6 +4293,7 @@ uint64 getL2CacheMisses(const CounterStateType & before, const CounterStateType || cpu_family_model == PCM::ELKHART_LAKE || cpu_family_model == PCM::JASPER_LAKE || cpu_family_model == PCM::SRF + || cpu_family_model == PCM::GRR || cpu_family_model == PCM::ADL || cpu_family_model == PCM::RPL || cpu_family_model == PCM::MTL @@ -4390,6 +4405,7 @@ uint64 getL3CacheHitsSnoop(const CounterStateType & before, const CounterStateTy if (!pcm->isL3CacheHitsSnoopAvailable()) return 0; const auto cpu_family_model = pcm->getCPUFamilyModel(); if (cpu_family_model == PCM::SNOWRIDGE + || cpu_family_model == PCM::GRR || cpu_family_model == PCM::ELKHART_LAKE || cpu_family_model == PCM::JASPER_LAKE || cpu_family_model == PCM::SRF diff --git a/src/opCode-6-182.txt b/src/opCode-6-182.txt new file mode 100644 index 00000000..c3ccfbc9 --- /dev/null +++ b/src/opCode-6-182.txt @@ -0,0 +1,45 @@ +#Clockticks +#ctr=0,ev_sel=0x1,umask=0x0,en=1,ch_mask=0,fc_mask=0x0,multiplier=1,divider=1,hname=Clockticks,vname=Total +# Inbound (PCIe device DMA into system) payload events +ctr=0,ev_sel=0x83,umask=0x1,ch_mask=1,fc_mask=0x7,multiplier=4,divider=1,hname=IB write,vname=Part0 +ctr=1,ev_sel=0x83,umask=0x1,ch_mask=2,fc_mask=0x7,multiplier=4,divider=1,hname=IB write,vname=Part1 +ctr=0,ev_sel=0x83,umask=0x1,ch_mask=4,fc_mask=0x7,multiplier=4,divider=1,hname=IB write,vname=Part2 +ctr=1,ev_sel=0x83,umask=0x1,ch_mask=8,fc_mask=0x7,multiplier=4,divider=1,hname=IB write,vname=Part3 +ctr=0,ev_sel=0x83,umask=0x1,ch_mask=16,fc_mask=0x7,multiplier=4,divider=1,hname=IB write,vname=Part4 +ctr=1,ev_sel=0x83,umask=0x1,ch_mask=32,fc_mask=0x7,multiplier=4,divider=1,hname=IB write,vname=Part5 +ctr=0,ev_sel=0x83,umask=0x1,ch_mask=64,fc_mask=0x7,multiplier=4,divider=1,hname=IB write,vname=Part6 +ctr=1,ev_sel=0x83,umask=0x1,ch_mask=128,fc_mask=0x7,multiplier=4,divider=1,hname=IB write,vname=Part7 +ctr=0,ev_sel=0x83,umask=0x4,ch_mask=1,fc_mask=0x7,multiplier=4,divider=1,hname=IB read,vname=Part0 +ctr=1,ev_sel=0x83,umask=0x4,ch_mask=2,fc_mask=0x7,multiplier=4,divider=1,hname=IB read,vname=Part1 +ctr=0,ev_sel=0x83,umask=0x4,ch_mask=4,fc_mask=0x7,multiplier=4,divider=1,hname=IB read,vname=Part2 +ctr=1,ev_sel=0x83,umask=0x4,ch_mask=8,fc_mask=0x7,multiplier=4,divider=1,hname=IB read,vname=Part3 +ctr=0,ev_sel=0x83,umask=0x4,ch_mask=16,fc_mask=0x7,multiplier=4,divider=1,hname=IB read,vname=Part4 +ctr=1,ev_sel=0x83,umask=0x4,ch_mask=32,fc_mask=0x7,multiplier=4,divider=1,hname=IB read,vname=Part5 +ctr=0,ev_sel=0x83,umask=0x4,ch_mask=64,fc_mask=0x7,multiplier=4,divider=1,hname=IB read,vname=Part6 +ctr=1,ev_sel=0x83,umask=0x4,ch_mask=128,fc_mask=0x7,multiplier=4,divider=1,hname=IB read,vname=Part7 +# Outbound (CPU MMIO to the PCIe device) payload events +ctr=2,ev_sel=0xc0,umask=0x4,ch_mask=1,fc_mask=0x7,multiplier=4,divider=1,hname=OB read,vname=Part0 +ctr=3,ev_sel=0xc0,umask=0x4,ch_mask=2,fc_mask=0x7,multiplier=4,divider=1,hname=OB read,vname=Part1 +ctr=2,ev_sel=0xc0,umask=0x4,ch_mask=4,fc_mask=0x7,multiplier=4,divider=1,hname=OB read,vname=Part2 +ctr=3,ev_sel=0xc0,umask=0x4,ch_mask=8,fc_mask=0x7,multiplier=4,divider=1,hname=OB read,vname=Part3 +ctr=2,ev_sel=0xc0,umask=0x4,ch_mask=16,fc_mask=0x7,multiplier=4,divider=1,hname=OB read,vname=Part4 +ctr=3,ev_sel=0xc0,umask=0x4,ch_mask=32,fc_mask=0x7,multiplier=4,divider=1,hname=OB read,vname=Part5 +ctr=2,ev_sel=0xc0,umask=0x4,ch_mask=64,fc_mask=0x7,multiplier=4,divider=1,hname=OB read,vname=Part6 +ctr=3,ev_sel=0xc0,umask=0x4,ch_mask=128,fc_mask=0x7,multiplier=4,divider=1,hname=OB read,vname=Part7 +ctr=2,ev_sel=0xc0,umask=0x1,ch_mask=1,fc_mask=0x7,multiplier=4,divider=1,hname=OB write,vname=Part0 +ctr=3,ev_sel=0xc0,umask=0x1,ch_mask=2,fc_mask=0x7,multiplier=4,divider=1,hname=OB write,vname=Part1 +ctr=2,ev_sel=0xc0,umask=0x1,ch_mask=4,fc_mask=0x7,multiplier=4,divider=1,hname=OB write,vname=Part2 +ctr=3,ev_sel=0xc0,umask=0x1,ch_mask=8,fc_mask=0x7,multiplier=4,divider=1,hname=OB write,vname=Part3 +ctr=2,ev_sel=0xc0,umask=0x1,ch_mask=16,fc_mask=0x7,multiplier=4,divider=1,hname=OB write,vname=Part4 +ctr=3,ev_sel=0xc0,umask=0x1,ch_mask=32,fc_mask=0x7,multiplier=4,divider=1,hname=OB write,vname=Part5 +ctr=2,ev_sel=0xc0,umask=0x1,ch_mask=64,fc_mask=0x7,multiplier=4,divider=1,hname=OB write,vname=Part6 +ctr=3,ev_sel=0xc0,umask=0x1,ch_mask=128,fc_mask=0x7,multiplier=4,divider=1,hname=OB write,vname=Part7 +# IOMMU events +ctr=0,ev_sel=0x40,umask=0x01,ch_mask=0x0,fc_mask=0x0,multiplier=1,divider=1,hname=IOTLB Lookup,vname=Total +ctr=1,ev_sel=0x40,umask=0x20,ch_mask=0x0,fc_mask=0x0,multiplier=1,divider=1,hname=IOTLB Miss,vname=Total +ctr=2,ev_sel=0x40,umask=0x80,ch_mask=0x0,fc_mask=0x0,multiplier=1,divider=1,hname=Ctxt Cache Hit,vname=Total +ctr=3,ev_sel=0x41,umask=0x10,ch_mask=0x0,fc_mask=0x0,multiplier=1,divider=1,hname=256T Cache Hit,vname=Total +ctr=0,ev_sel=0x41,umask=0x08,ch_mask=0x0,fc_mask=0x0,multiplier=1,divider=1,hname=512G Cache Hit,vname=Total +ctr=1,ev_sel=0x41,umask=0x04,ch_mask=0x0,fc_mask=0x0,multiplier=1,divider=1,hname=1G Cache Hit,vname=Total +ctr=2,ev_sel=0x41,umask=0x02,ch_mask=0x0,fc_mask=0x0,multiplier=1,divider=1,hname=2M Cache Hit,vname=Total +ctr=3,ev_sel=0x41,umask=0xc0,ch_mask=0x0,fc_mask=0x0,multiplier=1,divider=1,hname=IOMMU Mem Access,vname=Total diff --git a/src/pcm-iio.cpp b/src/pcm-iio.cpp index 07a01311..8c77f038 100644 --- a/src/pcm-iio.cpp +++ b/src/pcm-iio.cpp @@ -38,6 +38,8 @@ using namespace pcm; #define NIS_DID 0x18D1 #define HQM_DID 0x270B +#define GRR_QAT_VRP_DID 0x5789 // Virtual Root Port to integrated QuickAssist (GRR QAT) +#define GRR_NIS_VRP_DID 0x5788 // VRP to Network Interface and Scheduler (GRR NIS) #define ROOT_BUSES_OFFSET 0xCC #define ROOT_BUSES_OFFSET_2 0xD0 @@ -238,6 +240,32 @@ static const std::string spr_mcc_iio_stack_names[] = { "IIO Stack 10 - DMI ", }; +// MS2IOSF stack IDs in CHA notation +#define GRR_PCH_DSA_GEN4_SAD_ID 0 +#define GRR_DLB_SAD_ID 1 +#define GRR_NIS_QAT_SAD_ID 2 + +#define GRR_PCH_DSA_GEN4_PMON_ID 2 +#define GRR_DLB_PMON_ID 1 +#define GRR_NIS_QAT_PMON_ID 0 + +// Stack 0 contains PCH, DSA and CPU PCIe Gen4 Complex +const std::map grr_sad_to_pmu_id_mapping = { + { GRR_PCH_DSA_GEN4_SAD_ID, GRR_PCH_DSA_GEN4_PMON_ID }, + { GRR_DLB_SAD_ID, GRR_DLB_PMON_ID }, + { GRR_NIS_QAT_SAD_ID, GRR_NIS_QAT_PMON_ID }, +}; + +#define GRR_DLB_PART_ID 0 +#define GRR_NIS_PART_ID 0 +#define GRR_QAT_PART_ID 1 + +static const std::string grr_iio_stack_names[3] = { + "IIO Stack 0 - NIS/QAT ", + "IIO Stack 1 - HQM ", + "IIO Stack 2 - PCH/DSA/PCIe " +}; + #define EMR_DMI_PMON_ID 7 #define EMR_PCIE_GEN5_0_PMON_ID 1 #define EMR_PCIE_GEN5_1_PMON_ID 2 @@ -1441,6 +1469,176 @@ bool EagleStreamPlatformMapping::pciTreeDiscover(std::vector& iios) override; +}; + +bool LoganvillePlatform::loganvillePchDsaPciStackProbe(struct iio_stacks_on_socket& iio_on_socket, int root_bus, int stack_pmon_id) +{ + struct iio_stack stack; + stack.busno = root_bus; + stack.iio_unit_id = stack_pmon_id; + stack.stack_name = grr_iio_stack_names[stack_pmon_id]; + + struct iio_bifurcated_part pch_part; + pch_part.part_id = 7; + struct pci* pci_dev = &pch_part.root_pci_dev; + pci_dev->bdf.busno = root_bus; + + if (probe_pci(pci_dev)) { + probeDeviceRange(pch_part.child_pci_devs, pci_dev->bdf.domainno, pci_dev->secondary_bus_number, pci_dev->subordinate_bus_number); + stack.parts.push_back(pch_part); + iio_on_socket.stacks.push_back(stack); + return true; + } + + return false; +} + +bool LoganvillePlatform::loganvilleDlbStackProbe(struct iio_stacks_on_socket& iio_on_socket, int root_bus, int stack_pmon_id) +{ + struct iio_stack stack; + stack.busno = root_bus; + stack.iio_unit_id = stack_pmon_id; + stack.stack_name = grr_iio_stack_names[stack_pmon_id]; + + struct iio_bifurcated_part dlb_part; + dlb_part.part_id = GRR_DLB_PART_ID; + + for (uint8_t bus = root_bus; bus < 255; bus++) { + struct pci pci_dev(bus, 0x00, 0x00); + if (probe_pci(&pci_dev)) { + if ((pci_dev.vendor_id == PCM_INTEL_PCI_VENDOR_ID) && (pci_dev.device_id == HQMV25_DID)) { + dlb_part.root_pci_dev = pci_dev; + // Check Virtual RPs for DLB + for (uint8_t device = 0; device < 2; device++) { + for (uint8_t function = 0; function < 8; function++) { + struct pci child_pci_dev(bus, device, function); + if (probe_pci(&child_pci_dev)) { + dlb_part.child_pci_devs.push_back(child_pci_dev); + } + } + } + stack.parts.push_back(dlb_part); + iio_on_socket.stacks.push_back(stack); + return true; + } + } + } + + return false; +} + +bool LoganvillePlatform::loganvilleNacStackProbe(struct iio_stacks_on_socket& iio_on_socket, int root_bus, int stack_pmon_id) +{ + struct iio_stack stack; + stack.busno = root_bus; + stack.iio_unit_id = stack_pmon_id; + stack.stack_name = grr_iio_stack_names[stack_pmon_id]; + + // Probe NIS + { + struct iio_bifurcated_part nis_part; + nis_part.part_id = GRR_NIS_PART_ID; + struct pci pci_dev(root_bus, 0x04, 0x00); + if (probe_pci(&pci_dev)) { + nis_part.root_pci_dev = pci_dev; + for (uint8_t bus = pci_dev.secondary_bus_number; bus <= pci_dev.subordinate_bus_number; bus++) { + for (uint8_t device = 0; device < 2; device++) { + for (uint8_t function = 0; function < 8; function++) { + struct pci child_pci_dev(bus, device, function); + if (probe_pci(&child_pci_dev)) { + nis_part.child_pci_devs.push_back(child_pci_dev); + } + } + } + } + stack.parts.push_back(nis_part); + } + } + + // Probe QAT + { + struct iio_bifurcated_part qat_part; + qat_part.part_id = GRR_QAT_PART_ID; + struct pci pci_dev(root_bus, 0x05, 0x00); + if (probe_pci(&pci_dev)) { + qat_part.root_pci_dev = pci_dev; + for (uint8_t bus = pci_dev.secondary_bus_number; bus <= pci_dev.subordinate_bus_number; bus++) { + for (uint8_t device = 0; device < 17; device++) { + for (uint8_t function = 0; function < 8; function++) { + struct pci child_pci_dev(bus, device, function); + if (probe_pci(&child_pci_dev)) { + qat_part.child_pci_devs.push_back(child_pci_dev); + } + } + } + } + stack.parts.push_back(qat_part); + } + } + + iio_on_socket.stacks.push_back(stack); + return true; +} + +bool LoganvillePlatform::pciTreeDiscover(std::vector& iios) +{ + std::map sad_id_bus_map; + if (!getSadIdRootBusMap(0, sad_id_bus_map)) { + return false; + } + + if (sad_id_bus_map.size() != grr_sad_to_pmu_id_mapping.size()) { + cerr << "Found unexpected number of stacks: " << sad_id_bus_map.size() << ", expected: " << grr_sad_to_pmu_id_mapping.size() << endl; + return false; + } + + struct iio_stacks_on_socket iio_on_socket; + iio_on_socket.socket_id = 0; + + for (auto sad_id_bus_pair = sad_id_bus_map.cbegin(); sad_id_bus_pair != sad_id_bus_map.cend(); ++sad_id_bus_pair) { + if (grr_sad_to_pmu_id_mapping.find(sad_id_bus_pair->first) == grr_sad_to_pmu_id_mapping.end()) { + cerr << "Cannot map SAD ID to PMON ID. Unknown ID: " << sad_id_bus_pair->first << endl; + return false; + } + int stack_pmon_id = grr_sad_to_pmu_id_mapping.at(sad_id_bus_pair->first); + int root_bus = sad_id_bus_pair->second; + switch (stack_pmon_id) { + case GRR_PCH_DSA_GEN4_PMON_ID: + if (!loganvillePchDsaPciStackProbe(iio_on_socket, root_bus, stack_pmon_id)) { + return false; + } + break; + case GRR_DLB_PMON_ID: + if (!loganvilleDlbStackProbe(iio_on_socket, root_bus, stack_pmon_id)) { + return false; + } + break; + case GRR_NIS_QAT_PMON_ID: + if (!loganvilleNacStackProbe(iio_on_socket, root_bus, stack_pmon_id)) { + return false; + } + break; + default: + return false; + } + } + + std::sort(iio_on_socket.stacks.begin(), iio_on_socket.stacks.end()); + + iios.push_back(iio_on_socket); + + return true; +} + void IPlatformMapping::probeDeviceRange(std::vector &pci_devs, int domain, int secondary, int subordinate) { for (uint8_t bus = secondary; int(bus) <= subordinate; bus++) { @@ -1699,6 +1897,8 @@ std::unique_ptr IPlatformMapping::getPlatformMapping(int cpu_f case PCM::SPR: case PCM::EMR: return std::unique_ptr{new EagleStreamPlatformMapping(cpu_family_model, sockets_count)}; + case PCM::GRR: + return std::unique_ptr{new LoganvillePlatform(cpu_family_model, sockets_count)}; case PCM::SRF: case PCM::GNR: return std::unique_ptr{new BirchStreamPlatform(cpu_family_model, sockets_count)}; @@ -1717,6 +1917,7 @@ ccr* get_ccr(PCM* m, uint64_t& ccr) case PCM::SNOWRIDGE: case PCM::SPR: case PCM::EMR: + case PCM::GRR: case PCM::SRF: case PCM::GNR: return new icx_ccr(ccr); diff --git a/src/pcm-memory.cpp b/src/pcm-memory.cpp index 5cd5d2a7..57d373d0 100644 --- a/src/pcm-memory.cpp +++ b/src/pcm-memory.cpp @@ -426,6 +426,7 @@ void printSocketBWFooter(PCM *m, uint32 no_columns, uint32 skt, const memdata_t if ( md->metrics == PartialWrites && m->getCPUFamilyModel() != PCM::SRF && m->getCPUFamilyModel() != PCM::GNR + && m->getCPUFamilyModel() != PCM::GRR ) { for (uint32 i=skt; i<(skt+no_columns); ++i) { @@ -735,6 +736,7 @@ void display_bandwidth_csv(PCM *m, memdata_t *md, uint64 /*elapsedTime*/, const if ( md->metrics == PartialWrites && m->getCPUFamilyModel() != PCM::GNR && m->getCPUFamilyModel() != PCM::SRF + && m->getCPUFamilyModel() != PCM::GRR ) { choose(outputType, @@ -994,6 +996,7 @@ void calculate_bandwidth(PCM *m, switch (cpu_family_model) { case PCM::GNR: + case PCM::GRR: case PCM::SRF: reads += getMCCounter(channel, ServerUncorePMUs::EventPosition::READ2, uncState1[skt], uncState2[skt]); writes += getMCCounter(channel, ServerUncorePMUs::EventPosition::WRITE2, uncState1[skt], uncState2[skt]); @@ -1059,6 +1062,7 @@ void calculate_bandwidth(PCM *m, else if ( cpu_family_model != PCM::GNR && cpu_family_model != PCM::SRF + && cpu_family_model != PCM::GRR ) { md.partial_write[skt] += (uint64)(getMCCounter(channel, ServerUncorePMUs::EventPosition::PARTIAL, uncState1[skt], uncState2[skt]) / (elapsedTime / 1000.0)); diff --git a/src/pcm-pcie.cpp b/src/pcm-pcie.cpp index 69d1c380..d361d225 100644 --- a/src/pcm-pcie.cpp +++ b/src/pcm-pcie.cpp @@ -99,6 +99,8 @@ IPlatform *IPlatform::getPlatform(PCM *m, bool csv, bool print_bandwidth, bool p case PCM::GNR: case PCM::SRF: return new BirchStreamPlatform(m, csv, print_bandwidth, print_additional_info, delay); + case PCM::GRR: + return new LoganvillePlatform(m, csv, print_bandwidth, print_additional_info, delay); case PCM::SPR: case PCM::EMR: return new EagleStreamPlatform(m, csv, print_bandwidth, print_additional_info, delay); diff --git a/src/pcm-pcie.h b/src/pcm-pcie.h index 0021047c..6b27c299 100644 --- a/src/pcm-pcie.h +++ b/src/pcm-pcie.h @@ -474,6 +474,136 @@ uint64 BirchStreamPlatform::getWriteBw() return (writeBw * 64ULL); } +// GRR + +class LoganvillePlatform: public LegacyPlatform +{ +public: + LoganvillePlatform(PCM *m, bool csv, bool bandwidth, bool verbose, uint32 delay) : + LegacyPlatform( {"PCIRdCur", "ItoM", "ItoMCacheNear", "UCRdF", "WiL", "WCiL", "WCiLF"}, + { + {0xC8F3FE00000435, 0xC8F3FD00000435, 0xCC43FE00000435, 0xCC43FD00000435}, + {0xCD43FE00000435, 0xCD43FD00000435, 0xC877DE00000135, 0xC87FDE00000135}, + {0xC86FFE00000135, 0xC867FE00000135,}, + }, + m, csv, bandwidth, verbose, delay) + { + }; + +private: + enum eventIdx { + PCIRdCur, + ItoM, + ItoMCacheNear, + UCRdF, + WiL, + WCiL, + WCiLF + }; + + enum Events { + PCIRdCur_miss, + PCIRdCur_hit, + ItoM_miss, + ItoM_hit, + ItoMCacheNear_miss, + ItoMCacheNear_hit, + UCRdF_miss, + WiL_miss, + WCiL_miss, + WCiLF_miss, + eventLast + }; + + virtual uint64 getReadBw(uint socket, eventFilter filter); + virtual uint64 getWriteBw(uint socket, eventFilter filter); + virtual uint64 getReadBw(); + virtual uint64 getWriteBw(); + virtual uint64 event(uint socket, eventFilter filter, uint idx); +}; + +uint64 LoganvillePlatform::event(uint socket, eventFilter filter, uint idx) +{ + uint64 event = 0; + switch (idx) + { + case PCIRdCur: + if (filter == TOTAL) + event = eventSample[socket][PCIRdCur_miss] + + eventSample[socket][PCIRdCur_hit]; + else if (filter == MISS) + event = eventSample[socket][PCIRdCur_miss]; + else if (filter == HIT) + event = eventSample[socket][PCIRdCur_hit]; + break; + case ItoM: + if (filter == TOTAL) + event = eventSample[socket][ItoM_miss] + + eventSample[socket][ItoM_hit]; + else if (filter == MISS) + event = eventSample[socket][ItoM_miss]; + else if (filter == HIT) + event = eventSample[socket][ItoM_hit]; + break; + case ItoMCacheNear: + if (filter == TOTAL) + event = eventSample[socket][ItoMCacheNear_miss] + + eventSample[socket][ItoMCacheNear_hit]; + else if (filter == MISS) + event = eventSample[socket][ItoMCacheNear_miss]; + else if (filter == HIT) + event = eventSample[socket][ItoMCacheNear_hit]; + break; + case UCRdF: + if (filter == TOTAL || filter == MISS) + event = eventSample[socket][UCRdF_miss]; + break; + case WiL: + if (filter == TOTAL || filter == MISS) + event = eventSample[socket][WiL_miss]; + break; + case WCiL: + if (filter == TOTAL || filter == MISS) + event = eventSample[socket][WCiL_miss]; + break; + case WCiLF: + if (filter == TOTAL || filter == MISS) + event = eventSample[socket][WCiLF_miss]; + break; + default: + break; + } + return event; +} + +uint64 LoganvillePlatform::getReadBw(uint socket, eventFilter filter) +{ + uint64 readBw = event(socket, filter, PCIRdCur); + return (readBw * 64ULL); +} + +uint64 LoganvillePlatform::getWriteBw(uint socket, eventFilter filter) +{ + uint64 writeBw = event(socket, filter, ItoM) + + event(socket, filter, ItoMCacheNear); + return (writeBw * 64ULL); +} +uint64 LoganvillePlatform::getReadBw() +{ + uint64 readBw = 0; + for (uint socket = 0; socket < m_socketCount; socket++) + readBw += (event(socket, TOTAL, PCIRdCur)); + return (readBw * 64ULL); +} + +uint64 LoganvillePlatform::getWriteBw() +{ + uint64 writeBw = 0; + for (uint socket = 0; socket < m_socketCount; socket++) + writeBw += (event(socket, TOTAL, ItoM) + + event(socket, TOTAL, ItoMCacheNear)); + return (writeBw * 64ULL); +} //SPR class EagleStreamPlatform: public LegacyPlatform diff --git a/src/types.h b/src/types.h index 09b24a06..f6f33ce2 100644 --- a/src/types.h +++ b/src/types.h @@ -1017,7 +1017,19 @@ constexpr auto BHS_UBOX_MSR_PMON_CTL1_ADDR = 0x3FF3; constexpr auto BHS_UBOX_MSR_PMON_CTR0_ADDR = 0x3FF8; constexpr auto BHS_UBOX_MSR_PMON_CTR1_ADDR = 0x3FF9; - +constexpr auto GRR_UCLK_FIXED_CTR_ADDR = 0x3F5F; +constexpr auto GRR_UCLK_FIXED_CTL_ADDR = 0x3F5E; +constexpr auto GRR_UBOX_MSR_PMON_BOX_CTL_ADDR = 0x3F50; +constexpr auto GRR_UBOX_MSR_PMON_CTL0_ADDR = 0x3F52; +constexpr auto GRR_UBOX_MSR_PMON_CTL1_ADDR = 0x3F53; +constexpr auto GRR_UBOX_MSR_PMON_CTR0_ADDR = 0x3F58; +constexpr auto GRR_UBOX_MSR_PMON_CTR1_ADDR = 0x3F59; + +constexpr auto GRR_M2IOSF_IIO_UNIT_CTL = 0x2900; +constexpr auto GRR_M2IOSF_IIO_CTR0 = 0x2908; +constexpr auto GRR_M2IOSF_IIO_CTL0 = 0x2902; +constexpr auto GRR_M2IOSF_REG_STEP = 0x10; +constexpr auto GRR_M2IOSF_NUM = 3; constexpr auto JKTIVT_UCLK_FIXED_CTR_ADDR = (0x0C09); constexpr auto JKTIVT_UCLK_FIXED_CTL_ADDR = (0x0C08); @@ -1247,6 +1259,14 @@ static const uint32 ICX_IIO_UNIT_CTL[] = { 0x0A50, 0x0A70, 0x0A90, 0x0AE0, 0x0B00, 0x0B20 }; +static const uint32 GRR_IRP_UNIT_CTL[] = { + 0x2A00, + 0x2A10, + 0x2A20 +}; + +#define GRR_IRP_CTL_REG_OFFSET (0x0002) +#define GRR_IRP_CTR_REG_OFFSET (0x0008) static const uint32 BHS_IRP_UNIT_CTL[] = { 0x2A00, From 652c772ee9c6a490adb120554a9014ac634e5fed Mon Sep 17 00:00:00 2001 From: "Dementiev, Roman" Date: Wed, 6 Nov 2024 09:22:11 +0100 Subject: [PATCH 03/14] make print_help_force_rtm_abort_mode more robust Change-Id: I91e3b0249b98a217d6b985aedff21d17cdda5ad4 --- src/cpucounters.cpp | 2 +- src/cpucounters.h | 2 +- src/utils.cpp | 29 ++++++++++++++++++++++------- 3 files changed, 24 insertions(+), 9 deletions(-) diff --git a/src/cpucounters.cpp b/src/cpucounters.cpp index 67b68e52..9d2443b0 100644 --- a/src/cpucounters.cpp +++ b/src/cpucounters.cpp @@ -4636,7 +4636,7 @@ void PCM::disableForceRTMAbortMode(const bool silent) } } -bool PCM::isForceRTMAbortModeAvailable() const +bool PCM::isForceRTMAbortModeAvailable() { PCM_CPUID_INFO info; pcm_cpuid(7, 0, info); // leaf 7, subleaf 0 diff --git a/src/cpucounters.h b/src/cpucounters.h index a5d39422..2e65b908 100644 --- a/src/cpucounters.h +++ b/src/cpucounters.h @@ -2405,7 +2405,7 @@ class PCM_API PCM void disableForceRTMAbortMode(const bool silent = false); //! \brief queries availability of "force all RTM transaction abort" mode - bool isForceRTMAbortModeAvailable() const; + static bool isForceRTMAbortModeAvailable(); //! \brief Get microcode level (returns -1 if retrieval not supported due to some restrictions) int64 getCPUMicrocodeLevel() const { return cpu_microcode_level; } diff --git a/src/utils.cpp b/src/utils.cpp index a131ecf7..d442a3b8 100644 --- a/src/utils.cpp +++ b/src/utils.cpp @@ -812,16 +812,31 @@ int calibratedSleep(const double delay, const char* sysCmd, const MainLoop& main void print_help_force_rtm_abort_mode(const int alignment, const char * separator) { - const auto m = PCM::getInstance(); - if (m->isForceRTMAbortModeAvailable() && (m->getMaxCustomCoreEvents() < 4)) + if (PCM::isForceRTMAbortModeAvailable() == false) + { + return; + } + try { - std::cout << " -force-rtm-abort-mode"; - for (int i = 0; i < (alignment - 23); ++i) + const auto m = PCM::getInstance(); + if (m->getMaxCustomCoreEvents() < 4) { - std::cout << " "; + std::cout << " -force-rtm-abort-mode"; + for (int i = 0; i < (alignment - 23); ++i) + { + std::cout << " "; + } + assert(separator); + std::cout << separator << " force RTM transaction abort mode to enable more programmable counters\n"; } - assert(separator); - std::cout << separator << " force RTM transaction abort mode to enable more programmable counters\n"; + } + catch (std::exception & e) + { + std::cerr << "ERROR: " << e.what() << "\n"; + } + catch (...) + { + std::cerr << "ERROR: Unknown exception caught in print_help_force_rtm_abort_mode\n"; } } From ef33733ac4930135e88ab70efa91875ac694acc9 Mon Sep 17 00:00:00 2001 From: "Dementiev, Roman" Date: Wed, 6 Nov 2024 09:30:54 +0100 Subject: [PATCH 04/14] handle invalid CXL PMU entries Change-Id: I621725d4d0e5300b3b1052d91f7d18e3887c845a --- src/cpucounters.cpp | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/src/cpucounters.cpp b/src/cpucounters.cpp index 9d2443b0..d0d7f545 100644 --- a/src/cpucounters.cpp +++ b/src/cpucounters.cpp @@ -2858,7 +2858,14 @@ void PCM::initUncorePMUsDirect() uncorePMUDiscovery->getNumBoxes(SPR_CXLDP_BOX_TYPE, s)); for (size_t pos = 0; pos < n_units; ++pos) { - cxlPMUs[s].push_back(std::make_pair(createCXLPMU(s, SPR_CXLCM_BOX_TYPE, pos), createCXLPMU(s, SPR_CXLDP_BOX_TYPE, pos))); + try + { + cxlPMUs[s].push_back(std::make_pair(createCXLPMU(s, SPR_CXLCM_BOX_TYPE, pos), createCXLPMU(s, SPR_CXLDP_BOX_TYPE, pos))); + } + catch (const std::exception& e) + { + std::cerr << "CXL PMU initialization for socket " << s << " at position " << pos << " failed: " << e.what() << std::endl; + } } } break; From bb4903c186b13f913cb743f6446c771a713cf8f9 Mon Sep 17 00:00:00 2001 From: "Dementiev, Roman" Date: Wed, 6 Nov 2024 09:36:18 +0100 Subject: [PATCH 05/14] print QAT warnings to cerr Change-Id: I21c759c0800a259b9084a385f9555b0ca3ff8d02 --- src/cpucounters.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/cpucounters.cpp b/src/cpucounters.cpp index d0d7f545..60e52ade 100644 --- a/src/cpucounters.cpp +++ b/src/cpucounters.cpp @@ -2666,7 +2666,7 @@ void PCM::initUncorePMUsDirect() std::hex << devInfo.func << "/telemetry/control"; qatTLMCTLStr = readSysFS(qat_TLMCTL_sysfs_path.str().c_str(), true); if(!qatTLMCTLStr.size()){ - std::cout << "Warning: IDX - QAT telemetry feature of B:0x" << std::hex << devInfo.bus << ",D:0x" << devInfo.dev << ",F:0x" << devInfo.func \ + std::cerr << "Warning: IDX - QAT telemetry feature of B:0x" << std::hex << devInfo.bus << ",D:0x" << devInfo.dev << ",F:0x" << devInfo.func \ << " is NOT available, skipped." << std::dec << std::endl; continue; } From b095ad32af90b76225b9a37d36ced6055e2db89c Mon Sep 17 00:00:00 2001 From: "Dementiev, Roman" Date: Tue, 19 Nov 2024 08:56:47 +0100 Subject: [PATCH 06/14] enable libasan for tests Change-Id: If094f52a55e60be355a36314fcaf763c0f0e5d45 --- .github/workflows/ci-test.yml | 3 ++- tests/fuzz.sh | 3 ++- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/.github/workflows/ci-test.yml b/.github/workflows/ci-test.yml index b4c2bca7..40f4fdc6 100644 --- a/.github/workflows/ci-test.yml +++ b/.github/workflows/ci-test.yml @@ -28,12 +28,13 @@ jobs: run: | cmake --version rm -rf ${{ github.workspace }}/build - cmake -B ${{ github.workspace }}/build + cmake -B ${{ github.workspace }}/build -DPCM_NO_ASAN=OFF - name: Build run: | g++ --version cd ${{ github.workspace }}/build make -j$(nproc) + ldd bin/pcm - name: Test run: | set -o pipefail diff --git a/tests/fuzz.sh b/tests/fuzz.sh index 1b9d4062..2ca138c0 100644 --- a/tests/fuzz.sh +++ b/tests/fuzz.sh @@ -15,13 +15,14 @@ export LSAN_OPTIONS=suppressions="pcm_asan_suppression_file" echo "Running fuzz tests with running time multiplier $factor" -CC=`which clang` CXX=`which clang++` cmake .. -DCMAKE_BUILD_TYPE=Debug -DFUZZ=1 && mkdir -p corpus && +CC=`which clang` CXX=`which clang++` cmake .. -DCMAKE_BUILD_TYPE=Debug -DFUZZ=1 -DPCM_NO_ASAN=OFF && mkdir -p corpus && make urltest-fuzz \ pcm-fuzz \ pcm-memory-fuzz \ pcm-sensor-server-fuzz \ pcm-sensor-server-ssl-fuzz \ -j && +ldd bin/tests/pcm-fuzz && rm -rf corpus/* && printf '%b' "GET / HTTP/1.1\r\nHost: localhost\r\nAccept: */*\r\n\r\n" > corpus/1 && printf '%b' "GET /metrics HTTP/1.1\r\nHost: localhost\r\nAccept: */*\r\n\r\n" > corpus/2 && From d964822883493f6a1c6d15936d18e19a8663c33a Mon Sep 17 00:00:00 2001 From: "Dementiev, Roman" Date: Tue, 19 Nov 2024 10:31:48 +0100 Subject: [PATCH 07/14] asan already enabled in fuzzing; use PCM_FUZZ Change-Id: I344c0860706e10d404d844da344aac89589850a6 --- CMakeLists.txt | 5 ++++- tests/CMakeLists.txt | 4 ++-- tests/fuzz.sh | 2 +- 3 files changed, 7 insertions(+), 4 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 776e3b4d..662e5bff 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -1,5 +1,5 @@ # SPDX-License-Identifier: BSD-3-Clause -# Copyright (c) 2022, Intel Corporation +# Copyright (c) 2022-2024, Intel Corporation cmake_minimum_required(VERSION 3.5) @@ -145,6 +145,9 @@ if(PCM_FUZZ) set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${FUZZER_OPTIONS}") set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${FUZZER_OPTIONS}") set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} ${FUZZER_OPTIONS}") + message(STATUS "CMAKE_C_FLAGS: ${CMAKE_C_FLAGS}") + message(STATUS "CMAKE_CXX_FLAGS: ${CMAKE_CXX_FLAGS}") + message(STATUS "CMAKE_EXE_LINKER_FLAGS: ${CMAKE_EXE_LINKER_FLAGS}") endif(PCM_FUZZ) ####################### diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt index c7a507de..2ff9606c 100644 --- a/tests/CMakeLists.txt +++ b/tests/CMakeLists.txt @@ -1,5 +1,5 @@ # SPDX-License-Identifier: BSD-3-Clause -# Copyright (c) 2022, Intel Corporation +# Copyright (c) 2022-2024, Intel Corporation set(CMAKE_RUNTIME_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}/bin/tests) @@ -18,7 +18,7 @@ if(UNIX) endif(UNIX) -if(FUZZ) +if(PCM_FUZZ) find_package(OpenSSL REQUIRED) set(SSL_LIBS OpenSSL::SSL OpenSSL::Crypto) add_executable(urltest-fuzz urltest-fuzz.cpp) diff --git a/tests/fuzz.sh b/tests/fuzz.sh index 2ca138c0..d49b4075 100644 --- a/tests/fuzz.sh +++ b/tests/fuzz.sh @@ -15,7 +15,7 @@ export LSAN_OPTIONS=suppressions="pcm_asan_suppression_file" echo "Running fuzz tests with running time multiplier $factor" -CC=`which clang` CXX=`which clang++` cmake .. -DCMAKE_BUILD_TYPE=Debug -DFUZZ=1 -DPCM_NO_ASAN=OFF && mkdir -p corpus && +CC=`which clang` CXX=`which clang++` cmake .. -DCMAKE_BUILD_TYPE=Debug -DPCM_FUZZ=ON && mkdir -p corpus && make urltest-fuzz \ pcm-fuzz \ pcm-memory-fuzz \ From c0542fcd7549f406f8facfb8cb038949e5e8cf6e Mon Sep 17 00:00:00 2001 From: "Dementiev, Roman" Date: Wed, 20 Nov 2024 10:44:32 +0100 Subject: [PATCH 08/14] add ARL (Arrow Lake) support Change-Id: I0ecc9cfea908eec24442e7e2af78b5a738485f45 --- src/cpucounters.cpp | 18 +++++++++++++++++- src/cpucounters.h | 9 +++++++++ 2 files changed, 26 insertions(+), 1 deletion(-) diff --git a/src/cpucounters.cpp b/src/cpucounters.cpp index 60e52ade..3b186164 100644 --- a/src/cpucounters.cpp +++ b/src/cpucounters.cpp @@ -724,6 +724,7 @@ void PCM::initCStateSupportTables() case RPL: case MTL: case LNL: + case ARL: case SNOWRIDGE: case ELKHART_LAKE: case JASPER_LAKE: @@ -803,6 +804,7 @@ void PCM::initCStateSupportTables() case RPL: case MTL: case LNL: + case ARL: case SNOWRIDGE: case ELKHART_LAKE: case JASPER_LAKE: @@ -1662,6 +1664,7 @@ bool PCM::detectNominalFrequency() || cpu_family_model == RPL || cpu_family_model == MTL || cpu_family_model == LNL + || cpu_family_model == ARL || cpu_family_model == SKX || cpu_family_model == ICX || cpu_family_model == SPR @@ -1926,6 +1929,7 @@ void PCM::initUncoreObjects() case RPL: // TGLClientBW works fine for RPL case MTL: // TGLClientBW works fine for MTL case LNL: // TGLClientBW works fine for LNL + case ARL: // TGLClientBW works fine for ARL clientBW = std::make_shared(); break; /* Disabled since ADLClientBW requires 2x multiplier for BW on top @@ -3334,6 +3338,7 @@ bool PCM::isCPUModelSupported(const int model_) || model_ == RPL || model_ == MTL || model_ == LNL + || model_ == ARL || model_ == SKX || model_ == ICX || model_ == SPR @@ -3367,6 +3372,9 @@ bool PCM::checkModel() case CML_1: cpu_family_model = CML; break; + case ARL_1: + cpu_family_model = ARL; + break; case ICL_1: cpu_family_model = ICL; break; @@ -3510,7 +3518,12 @@ PCM::ErrorCode PCM::program(const PCM::ProgramMode mode_, const void * parameter canUsePerf = false; if (!silent) std::cerr << "Installed Linux kernel perf does not support hardware top-down level-1 counters. Using direct PMU programming instead.\n"; } - if (canUsePerf && (cpu_family_model == ADL || cpu_family_model == RPL || cpu_family_model == MTL || cpu_family_model == LNL)) + if (canUsePerf && (cpu_family_model == ADL + || cpu_family_model == RPL + || cpu_family_model == MTL + || cpu_family_model == LNL + || cpu_family_model == ARL + )) { canUsePerf = false; if (!silent) std::cerr << "Linux kernel perf rejects an architectural event on your platform. Using direct PMU programming instead.\n"; @@ -3598,6 +3611,7 @@ PCM::ErrorCode PCM::program(const PCM::ProgramMode mode_, const void * parameter case RPL: case MTL: case LNL: + case ARL: LLCArchEventInit(hybridAtomEventDesc); hybridAtomEventDesc[2].event_number = SKL_MEM_LOAD_RETIRED_L2_MISS_EVTNR; hybridAtomEventDesc[2].umask_value = SKL_MEM_LOAD_RETIRED_L2_MISS_UMASK; @@ -4957,6 +4971,8 @@ const char * PCM::getUArchCodename(const int32 cpu_family_model_param) const return "Meteor Lake"; case LNL: return "Lunar Lake"; + case ARL: + return "Arrow Lake"; case SKX: if (cpu_family_model_param >= 0) { diff --git a/src/cpucounters.h b/src/cpucounters.h index 2e65b908..6dc8043f 100644 --- a/src/cpucounters.h +++ b/src/cpucounters.h @@ -1639,6 +1639,7 @@ class PCM_API PCM case RPL: case MTL: case LNL: + case ARL: if (topology[coreID].core_type == TopologyEntry::Atom) { return std::make_pair(OFFCORE_RESPONSE_0_EVTNR, event + 1); @@ -1655,6 +1656,7 @@ class PCM_API PCM case RPL: case MTL: case LNL: + case ARL: useGLCOCREvent = true; break; } @@ -1892,6 +1894,8 @@ class PCM_API PCM RPL_3 = PCM_CPU_FAMILY_MODEL(6, 0xbe), MTL = PCM_CPU_FAMILY_MODEL(6, 0xAA), LNL = PCM_CPU_FAMILY_MODEL(6, 0xBD), + ARL = PCM_CPU_FAMILY_MODEL(6, 197), + ARL_1 = PCM_CPU_FAMILY_MODEL(6, 198), BDX = PCM_CPU_FAMILY_MODEL(6, 79), KNL = PCM_CPU_FAMILY_MODEL(6, 87), SKL = PCM_CPU_FAMILY_MODEL(6, 94), @@ -2128,6 +2132,7 @@ class PCM_API PCM case MTL: return 6; case LNL: + case ARL: return 12; case SNOWRIDGE: case ELKHART_LAKE: @@ -2477,6 +2482,7 @@ class PCM_API PCM || cpu_family_model == PCM::RPL || cpu_family_model == PCM::MTL || cpu_family_model == PCM::LNL + || cpu_family_model == PCM::ARL || cpu_family_model == PCM::SPR || cpu_family_model == PCM::EMR || cpu_family_model == PCM::GNR @@ -2789,6 +2795,7 @@ class PCM_API PCM || cpu_family_model == RPL || cpu_family_model == MTL || cpu_family_model == LNL + || cpu_family_model == ARL || useSKLPath() ; } @@ -4298,6 +4305,7 @@ uint64 getL2CacheMisses(const CounterStateType & before, const CounterStateType || cpu_family_model == PCM::RPL || cpu_family_model == PCM::MTL || cpu_family_model == PCM::LNL + || cpu_family_model == PCM::ARL ) { return after.Event[BasicCounterState::SKLL2MissPos] - before.Event[BasicCounterState::SKLL2MissPos]; } @@ -4413,6 +4421,7 @@ uint64 getL3CacheHitsSnoop(const CounterStateType & before, const CounterStateTy || cpu_family_model == PCM::RPL || cpu_family_model == PCM::MTL || cpu_family_model == PCM::LNL + || cpu_family_model == PCM::ARL ) { const int64 misses = getL3CacheMisses(before, after); From 2fdf0ef0933f8f39e86cbf78160ba60e976a5ffa Mon Sep 17 00:00:00 2001 From: "Dementiev, Roman" Date: Mon, 25 Nov 2024 15:39:55 +0100 Subject: [PATCH 09/14] extend MMIORange API with core affinity Change-Id: I5914ff091936d491e2ecdc885e6135c1f32e57c4 --- src/mmio.cpp | 37 +++++++++++++++++++++++++++++-------- src/mmio.h | 32 +++++++++++++++++++++++++++++--- src/pcm-mmio.cpp | 41 ++++++++++++++++++++++++++++++++--------- src/utils.h | 2 +- 4 files changed, 91 insertions(+), 21 deletions(-) diff --git a/src/mmio.cpp b/src/mmio.cpp index 9e4c406c..5d79905d 100644 --- a/src/mmio.cpp +++ b/src/mmio.cpp @@ -87,7 +87,7 @@ WinPmemMMIORange::WinPmemMMIORange(uint64 baseAddr_, uint64 /* size_ */, bool re mutex.unlock(); } -MMIORange::MMIORange(uint64 baseAddr_, uint64 size_, bool readonly_, bool silent) +MMIORange::MMIORange(const uint64 baseAddr_, const uint64 size_, const bool readonly_, const bool silent, const int core) { auto hDriver = openMSRDriver(); if (hDriver != INVALID_HANDLE_VALUE) @@ -98,7 +98,7 @@ MMIORange::MMIORange(uint64 baseAddr_, uint64 size_, bool readonly_, bool silent CloseHandle(hDriver); if (status == TRUE && reslength == sizeof(uint64) && result == 1) { - impl = std::make_shared(baseAddr_, size_, readonly_); + impl = std::make_shared(baseAddr_, size_, readonly_, core); return; } else @@ -109,11 +109,18 @@ MMIORange::MMIORange(uint64 baseAddr_, uint64 size_, bool readonly_, bool silent } } } - + if (core >= 0) + { + throw std::runtime_error("WinPmem does not support core affinity"); + } impl = std::make_shared(baseAddr_, size_, readonly_); } -OwnMMIORange::OwnMMIORange(uint64 baseAddr_, uint64 size_, bool /* readonly_ */) +OwnMMIORange::OwnMMIORange( const uint64 baseAddr_, + const uint64 size_, + const bool /* readonly_ */, + const int core_) : + core(core_) { hDriver = openMSRDriver(); MMAP_Request req{}; @@ -132,20 +139,24 @@ OwnMMIORange::OwnMMIORange(uint64 baseAddr_, uint64 size_, bool /* readonly_ */) uint32 OwnMMIORange::read32(uint64 offset) { + CoreAffinityScope _(core); return *((uint32*)(mmapAddr + offset)); } uint64 OwnMMIORange::read64(uint64 offset) { + CoreAffinityScope _(core); return *((uint64*)(mmapAddr + offset)); } void OwnMMIORange::write32(uint64 offset, uint32 val) { + CoreAffinityScope _(core); *((uint32*)(mmapAddr + offset)) = val; } void OwnMMIORange::write64(uint64 offset, uint64 val) { + CoreAffinityScope _(core); *((uint64*)(mmapAddr + offset)) = val; } @@ -164,10 +175,15 @@ OwnMMIORange::~OwnMMIORange() #include "PCIDriverInterface.h" -MMIORange::MMIORange(uint64 physical_address, uint64 size_, bool, bool silent) : +MMIORange::MMIORange(const uint64 physical_address, const uint64 size_, const bool, const bool silent, const int core_) : mmapAddr(NULL), - size(size_) + size(size_), + core(core_) { + if (core_ >= 0) + { + throw std::runtime_error("MMIORange on MacOSX does not support core affinity"); + } if (size > 4096) { if (!silent) @@ -211,11 +227,12 @@ MMIORange::~MMIORange() #elif defined(__linux__) || defined(__FreeBSD__) || defined(__DragonFly__) -MMIORange::MMIORange(uint64 baseAddr_, uint64 size_, bool readonly_, bool silent) : +MMIORange::MMIORange(const uint64 baseAddr_, const uint64 size_, const bool readonly_, const bool silent, const int core_) : fd(-1), mmapAddr(NULL), size(size_), - readonly(readonly_) + readonly(readonly_), + core(core_) { const int oflag = readonly ? O_RDONLY : O_RDWR; int handle = ::open("/dev/mem", oflag); @@ -252,16 +269,19 @@ MMIORange::MMIORange(uint64 baseAddr_, uint64 size_, bool readonly_, bool silent uint32 MMIORange::read32(uint64 offset) { + CoreAffinityScope _(core); return *((uint32 *)(mmapAddr + offset)); } uint64 MMIORange::read64(uint64 offset) { + CoreAffinityScope _(core); return *((uint64 *)(mmapAddr + offset)); } void MMIORange::write32(uint64 offset, uint32 val) { + CoreAffinityScope _(core); if (readonly) { std::cerr << "PCM Error: attempting to write to a read-only MMIORange\n"; @@ -271,6 +291,7 @@ void MMIORange::write32(uint64 offset, uint32 val) } void MMIORange::write64(uint64 offset, uint64 val) { + CoreAffinityScope _(core); if (readonly) { std::cerr << "PCM Error: attempting to write to a read-only MMIORange\n"; diff --git a/src/mmio.h b/src/mmio.h index 2d42535b..b24813f1 100644 --- a/src/mmio.h +++ b/src/mmio.h @@ -22,10 +22,23 @@ #endif #include "mutex.h" +#include "utils.h" #include namespace pcm { + class CoreAffinityScope // sets core affinity if core >= 0, nop otherwise + { + std::shared_ptr affinity{nullptr}; + CoreAffinityScope(const CoreAffinityScope&) = delete; + CoreAffinityScope& operator = (const CoreAffinityScope&) = delete; + public: + CoreAffinityScope(const int core) + : affinity((core >= 0) ? std::make_shared(core) : nullptr) + { + } + }; + #ifdef _MSC_VER class MMIORangeInterface @@ -98,10 +111,14 @@ class OwnMMIORange : public MMIORangeInterface { HANDLE hDriver; char * mmapAddr; + const int core; OwnMMIORange(const OwnMMIORange&) = delete; OwnMMIORange& operator = (const OwnMMIORange&) = delete; public: - OwnMMIORange(uint64 baseAddr_, uint64 size_, bool readonly_ = true); + OwnMMIORange( const uint64 baseAddr_, + const uint64 size_, + const bool readonly_ = true, + const int core_ = -1); uint32 read32(uint64 offset); uint64 read64(uint64 offset); void write32(uint64 offset, uint32 val); @@ -115,7 +132,11 @@ class MMIORange MMIORange(const MMIORange &) = delete; MMIORange & operator = (const MMIORange &) = delete; public: - MMIORange(uint64 baseAddr_, uint64 size_, bool readonly_ = true, bool silent = false); + MMIORange( const uint64 baseAddr_, + const uint64 size_, + const bool readonly_ = true, + const bool silent = false, + const int core = -1); uint32 read32(uint64 offset) { return impl->read32(offset); @@ -146,10 +167,15 @@ class MMIORange #ifndef __APPLE__ const bool readonly; #endif + const int core; MMIORange(const MMIORange &) = delete; MMIORange & operator = (const MMIORange &) = delete; public: - MMIORange(uint64 baseAddr_, uint64 size_, bool readonly_ = true, bool silent = false); + MMIORange( const uint64 baseAddr_, + const uint64 size_, + const bool readonly_ = true, + const bool silent = false, + const int core_ = -1); uint32 read32(uint64 offset); uint64 read64(uint64 offset); void write32(uint64 offset, uint32 val); diff --git a/src/pcm-mmio.cpp b/src/pcm-mmio.cpp index bc720d18..ba942dc6 100644 --- a/src/pcm-mmio.cpp +++ b/src/pcm-mmio.cpp @@ -25,20 +25,35 @@ using namespace pcm; void print_usage(const char* progname) { - std::cout << "Usage " << progname << " [-w value] [-q] [-d] address\n\n"; + std::cout << "Usage " << progname << " [-w value] [-q] [-d] [-c core] address\n\n"; std::cout << " Reads/writes MMIO (memory mapped) register in the specified address\n"; std::cout << " -w value : write the value before reading \n"; std::cout << " -b low:high : read or write only low..high bits of the register\n"; std::cout << " -q : read/write 64-bit quad word (default is 32-bit double word)\n"; std::cout << " -d : output all numbers in dec (default is hex)\n"; std::cout << " -n size : number of bytes read from specified address(batch read mode), max bytes=" << MAX_BATCH_OPERATE_BYTES << "\n"; + std::cout << " -c core : perform the operation from specified core\n"; std::cout << " --version : print application version\n"; std::cout << "\n"; } template -void doOp(const std::pair & bits, const uint64 address, const uint64 offset, const uint32 batch_bytes, const bool write, T value, RD readOp, WR writeOp, const bool dec) +void doOp( const std::pair & bits, + const uint64 address, const uint64 offset, + const uint32 batch_bytes, const bool write, + T value, + RD readOp, + WR writeOp, + const bool dec, + const int core) { + auto printCoreEndl = [&]() { + if (core >= 0) + { + std::cout << " on core " << core; + } + std::cout << "\n\n"; + }; if (batch_bytes == 0) //single mode { if (!dec) std::cout << std::hex << std::showbase; @@ -55,13 +70,15 @@ void doOp(const std::pair & bits, const uint64 address, const uint6 extractBitsPrintHelper(bits, value, dec); std::cout << " from " << std::dec << bit; if (!dec) std::cout << std::hex << std::showbase; - std::cout << "-bit MMIO register " << address << "\n\n"; + std::cout << "-bit MMIO register " << address; + printCoreEndl(); } else //batch mode { uint32 i = 0, j= 0; - std::cout << std::hex << " Dumping MMIO register range from 0x" << address << - ", number of bytes=0x" << batch_bytes << "\n\n"; + std::cout << std::hex << " Dumping MMIO register range from 0x" << address << + ", number of bytes=0x" << batch_bytes; + printCoreEndl(); for(i = 0; i < batch_bytes; i+=MAX_BATCH_READ_ROW_DISPLAY_BYTES) { std::ostringstream row_disp_str(std::ostringstream::out); @@ -95,9 +112,10 @@ int mainThrows(int argc, char * argv[]) bool quad = false; uint32 batch_bytes = 0; std::pair bits{-1, -1}; + int core = -1; int my_opt = -1; - while ((my_opt = getopt(argc, argv, "w:dqn:b:")) != -1) + while ((my_opt = getopt(argc, argv, "w:dqn:b:c:")) != -1) { switch (my_opt) { @@ -121,6 +139,9 @@ int mainThrows(int argc, char * argv[]) batch_bytes = MAX_BATCH_OPERATE_BYTES; } break; + case 'c': + core = read_number(optarg); + break; default: print_usage(argv[0]); return -1; @@ -151,16 +172,18 @@ int mainThrows(int argc, char * argv[]) batch_bytes = (rangeSize - offset); //limit the boundary } - MMIORange mmio(baseAddr, rangeSize, !write); + MMIORange mmio(baseAddr, rangeSize, !write, false, core); using namespace std::placeholders; if (quad) { - doOp(bits, address, offset, batch_bytes, write, (uint64)value, std::bind(&MMIORange::read64, &mmio, _1), std::bind(&MMIORange::write64, &mmio, _1, _2), dec); + doOp(bits, address, offset, batch_bytes, write, (uint64)value, + std::bind(&MMIORange::read64, &mmio, _1), std::bind(&MMIORange::write64, &mmio, _1, _2), dec, core); } else { - doOp(bits, address, offset, batch_bytes, write, (uint32)value, std::bind(&MMIORange::read32, &mmio, _1), std::bind(&MMIORange::write32, &mmio, _1, _2), dec); + doOp(bits, address, offset, batch_bytes, write, (uint32)value, + std::bind(&MMIORange::read32, &mmio, _1), std::bind(&MMIORange::write32, &mmio, _1, _2), dec, core); } } catch (std::exception & e) diff --git a/src/utils.h b/src/utils.h index 63f7e570..6d962559 100644 --- a/src/utils.h +++ b/src/utils.h @@ -678,7 +678,7 @@ void restrictDriverAccessNative(LPCTSTR path); std::vector findPathsFromPattern(const char* pattern); #endif -class TemporalThreadAffinity // speedup trick for Linux, FreeBSD, DragonFlyBSD, Windows +class TemporalThreadAffinity { TemporalThreadAffinity(); // forbidden #if defined(__FreeBSD__) || (defined(__DragonFly__) && __DragonFly_version >= 400707) From 904d4315d406fe68a0f18fc7db68e250908240cb Mon Sep 17 00:00:00 2001 From: "Dementiev, Roman" Date: Tue, 26 Nov 2024 12:12:50 +0100 Subject: [PATCH 10/14] implement warnAlignment Change-Id: I66cab5e117af6b3a021c4b901593eff1eb9b4f8b --- src/mmio.cpp | 15 ++++++++++++--- src/mmio.h | 10 ++++++++-- src/pci.cpp | 21 ++++++++++++++++++++- src/utils.h | 9 +++++++++ 4 files changed, 49 insertions(+), 6 deletions(-) diff --git a/src/mmio.cpp b/src/mmio.cpp index 5d79905d..bceba05d 100644 --- a/src/mmio.cpp +++ b/src/mmio.cpp @@ -87,7 +87,8 @@ WinPmemMMIORange::WinPmemMMIORange(uint64 baseAddr_, uint64 /* size_ */, bool re mutex.unlock(); } -MMIORange::MMIORange(const uint64 baseAddr_, const uint64 size_, const bool readonly_, const bool silent, const int core) +MMIORange::MMIORange(const uint64 baseAddr_, const uint64 size_, const bool readonly_, const bool silent_, const int core) : + silent(silent_) { auto hDriver = openMSRDriver(); if (hDriver != INVALID_HANDLE_VALUE) @@ -175,9 +176,10 @@ OwnMMIORange::~OwnMMIORange() #include "PCIDriverInterface.h" -MMIORange::MMIORange(const uint64 physical_address, const uint64 size_, const bool, const bool silent, const int core_) : +MMIORange::MMIORange(const uint64 physical_address, const uint64 size_, const bool, const bool silent_, const int core_) : mmapAddr(NULL), size(size_), + silent(silent_), core(core_) { if (core_ >= 0) @@ -199,6 +201,7 @@ MMIORange::MMIORange(const uint64 physical_address, const uint64 size_, const bo uint32 MMIORange::read32(uint64 offset) { + warnAlignment<4>("MMIORange::read32", silent, offset); uint32 val = 0; PCIDriver_readMemory32((uint8_t *)mmapAddr + offset, &val); return val; @@ -206,6 +209,7 @@ uint32 MMIORange::read32(uint64 offset) uint64 MMIORange::read64(uint64 offset) { + warnAlignment<8>("MMIORange::read64", silent, offset); uint64 val = 0; PCIDriver_readMemory64((uint8_t *)mmapAddr + offset, &val); return val; @@ -227,11 +231,12 @@ MMIORange::~MMIORange() #elif defined(__linux__) || defined(__FreeBSD__) || defined(__DragonFly__) -MMIORange::MMIORange(const uint64 baseAddr_, const uint64 size_, const bool readonly_, const bool silent, const int core_) : +MMIORange::MMIORange(const uint64 baseAddr_, const uint64 size_, const bool readonly_, const bool silent_, const int core_) : fd(-1), mmapAddr(NULL), size(size_), readonly(readonly_), + silent(silent_), core(core_) { const int oflag = readonly ? O_RDONLY : O_RDWR; @@ -269,18 +274,21 @@ MMIORange::MMIORange(const uint64 baseAddr_, const uint64 size_, const bool read uint32 MMIORange::read32(uint64 offset) { + warnAlignment<4>("MMIORange::read32", silent, offset); CoreAffinityScope _(core); return *((uint32 *)(mmapAddr + offset)); } uint64 MMIORange::read64(uint64 offset) { + warnAlignment<8>("MMIORange::read64", silent, offset); CoreAffinityScope _(core); return *((uint64 *)(mmapAddr + offset)); } void MMIORange::write32(uint64 offset, uint32 val) { + warnAlignment<4>("MMIORange::write32", silent, offset); CoreAffinityScope _(core); if (readonly) { @@ -291,6 +299,7 @@ void MMIORange::write32(uint64 offset, uint32 val) } void MMIORange::write64(uint64 offset, uint64 val) { + warnAlignment<8>("MMIORange::write64", silent, offset); CoreAffinityScope _(core); if (readonly) { diff --git a/src/mmio.h b/src/mmio.h index b24813f1..d2c10281 100644 --- a/src/mmio.h +++ b/src/mmio.h @@ -129,28 +129,33 @@ class OwnMMIORange : public MMIORangeInterface class MMIORange { std::shared_ptr impl; + const bool silent; MMIORange(const MMIORange &) = delete; MMIORange & operator = (const MMIORange &) = delete; public: MMIORange( const uint64 baseAddr_, const uint64 size_, const bool readonly_ = true, - const bool silent = false, + const bool silent_ = false, const int core = -1); uint32 read32(uint64 offset) { + warnAlignment<4>("MMIORange::read32", silent, offset); return impl->read32(offset); } uint64 read64(uint64 offset) { + warnAlignment<8>("MMIORange::read64", silent, offset); return impl->read64(offset); } void write32(uint64 offset, uint32 val) { + warnAlignment<4>("MMIORange::write32", silent, offset); impl->write32(offset, val); } void write64(uint64 offset, uint64 val) { + warnAlignment<8>("MMIORange::write64", silent, offset); impl->write64(offset, val); } }; @@ -167,6 +172,7 @@ class MMIORange #ifndef __APPLE__ const bool readonly; #endif + const bool silent; const int core; MMIORange(const MMIORange &) = delete; MMIORange & operator = (const MMIORange &) = delete; @@ -174,7 +180,7 @@ class MMIORange MMIORange( const uint64 baseAddr_, const uint64 size_, const bool readonly_ = true, - const bool silent = false, + const bool silent_ = false, const int core_ = -1); uint32 read32(uint64 offset); uint64 read64(uint64 offset); diff --git a/src/pci.cpp b/src/pci.cpp index f2e30306..50fd9b27 100644 --- a/src/pci.cpp +++ b/src/pci.cpp @@ -25,9 +25,10 @@ #include "Winmsrdriver\msrstruct.h" #include "winring0/OlsDef.h" #include "winring0/OlsApiInitExt.h" -#include "utils.h" #endif +#include "utils.h" + #if defined (__FreeBSD__) || defined(__DragonFly__) #include #endif @@ -83,6 +84,7 @@ bool PciHandle::exists(uint32 groupnr_, uint32 bus_, uint32 device_, uint32 func int32 PciHandle::read32(uint64 offset, uint32 * value) { + warnAlignment<4>("PciHandle::read32", false, offset); if (hDriver != INVALID_HANDLE_VALUE) { PCICFG_Request req; @@ -113,6 +115,7 @@ int32 PciHandle::read32(uint64 offset, uint32 * value) int32 PciHandle::write32(uint64 offset, uint32 value) { + warnAlignment<4>("PciHandle::write32", false, offset); if (hDriver != INVALID_HANDLE_VALUE) { PCICFG_Request req; @@ -139,6 +142,7 @@ int32 PciHandle::write32(uint64 offset, uint32 value) int32 PciHandle::read64(uint64 offset, uint64 * value) { + warnAlignment<8>("PciHandle::read64", false, offset); if (hDriver != INVALID_HANDLE_VALUE) { PCICFG_Request req; @@ -208,18 +212,21 @@ bool PciHandle::exists(uint32 groupnr_, uint32 bus_, uint32 device_, uint32 func int32 PciHandle::read32(uint64 offset, uint32 * value) { + warnAlignment<4>("PciHandle::read32", false, offset); uint32_t pci_address = FORM_PCI_ADDR(bus, device, function, (uint32_t)offset); return PCIDriver_read32(pci_address, value); } int32 PciHandle::write32(uint64 offset, uint32 value) { + warnAlignment<4>("PciHandle::write32", false, offset); uint32_t pci_address = FORM_PCI_ADDR(bus, device, function, (uint32_t)offset); return PCIDriver_write32(pci_address, value); } int32 PciHandle::read64(uint64 offset, uint64 * value) { + warnAlignment<8>("PciHandle::read64", false, offset); uint32_t pci_address = FORM_PCI_ADDR(bus, device, function, (uint32_t)offset); return PCIDriver_read64(pci_address, value); } @@ -289,6 +296,7 @@ bool PciHandle::exists(uint32 groupnr_, uint32 bus_, uint32 device_, uint32 func int32 PciHandle::read32(uint64 offset, uint32 * value) { + warnAlignment<4>("PciHandle::read32", false, offset); struct pci_io pi; int ret; @@ -308,6 +316,7 @@ int32 PciHandle::read32(uint64 offset, uint32 * value) int32 PciHandle::write32(uint64 offset, uint32 value) { + warnAlignment<4>("PciHandle::write32", false, offset) struct pci_io pi; int ret; @@ -327,6 +336,7 @@ int32 PciHandle::write32(uint64 offset, uint32 value) int32 PciHandle::read64(uint64 offset, uint64 * value) { + warnAlignment<8>("PciHandle::read64", false, offset); struct pci_io pi; int32 ret; @@ -415,16 +425,19 @@ bool PciHandle::exists(uint32 groupnr_, uint32 bus_, uint32 device_, uint32 func int32 PciHandle::read32(uint64 offset, uint32 * value) { + warnAlignment<4>("PciHandle::read32", false, offset); return ::pread(fd, (void *)value, sizeof(uint32), offset); } int32 PciHandle::write32(uint64 offset, uint32 value) { + warnAlignment<4>("PciHandle::write32", false, offset); return ::pwrite(fd, (const void *)&value, sizeof(uint32), offset); } int32 PciHandle::read64(uint64 offset, uint64 * value) { + warnAlignment<8>("PciHandle::read64", false, offset); size_t res = ::pread(fd, (void *)value, sizeof(uint64), offset); if(res != sizeof(uint64)) { @@ -532,16 +545,19 @@ bool PciHandleM::exists(uint32 /*groupnr_*/, uint32 /* bus_*/, uint32 /* device_ int32 PciHandleM::read32(uint64 offset, uint32 * value) { + warnAlignment<4>("PciHandleM::read32", false, offset); return ::pread(fd, (void *)value, sizeof(uint32), offset + base_addr); } int32 PciHandleM::write32(uint64 offset, uint32 value) { + warnAlignment<4>("PciHandleM::write32", false, offset); return ::pwrite(fd, (const void *)&value, sizeof(uint32), offset + base_addr); } int32 PciHandleM::read64(uint64 offset, uint64 * value) { + warnAlignment<8>("PciHandleM::read64", false, offset); return ::pread(fd, (void *)value, sizeof(uint64), offset + base_addr); } @@ -682,6 +698,7 @@ bool PciHandleMM::exists(uint32 /*groupnr_*/, uint32 /*bus_*/, uint32 /*device_* int32 PciHandleMM::read32(uint64 offset, uint32 * value) { + warnAlignment<4>("PciHandleMM::read32", false, offset); *value = *((uint32 *)(mmapAddr + offset)); return sizeof(uint32); @@ -689,6 +706,7 @@ int32 PciHandleMM::read32(uint64 offset, uint32 * value) int32 PciHandleMM::write32(uint64 offset, uint32 value) { + warnAlignment<4>("PciHandleMM::write32", false, offset); *((uint32 *)(mmapAddr + offset)) = value; return sizeof(uint32); @@ -696,6 +714,7 @@ int32 PciHandleMM::write32(uint64 offset, uint32 value) int32 PciHandleMM::read64(uint64 offset, uint64 * value) { + warnAlignment<8>("PciHandleMM::read64", false, offset); read32(offset, (uint32 *)value); read32(offset + sizeof(uint32), ((uint32 *)value) + 1); diff --git a/src/utils.h b/src/utils.h index 6d962559..3d7665fc 100644 --- a/src/utils.h +++ b/src/utils.h @@ -248,6 +248,15 @@ inline std::string unit_format(IntType n) void print_cpu_details(); +template +inline void warnAlignment(const char* call, const bool silent, const uint64 offset) +{ + if (silent == false && (offset % Bytes) != 0) + { + std::cerr << "PCM Warning: " << call << " offset " << offset << " is not " << Bytes << "-byte aligned\n"; + } +} + #define PCM_UNUSED(x) (void)(x) #define PCM_COMPILE_ASSERT(condition) \ From 59d5335ccc6244d9ec8ef95ae9e61bceeaffeb41 Mon Sep 17 00:00:00 2001 From: "Dementiev, Roman" Date: Tue, 26 Nov 2024 12:51:41 +0100 Subject: [PATCH 11/14] add printDebugCallstack() function Change-Id: Idc8aeebfbd08faff254ecf224f907f70a42ff3c7 --- src/utils.cpp | 20 ++++++++++++-------- src/utils.h | 13 +++++++++++++ 2 files changed, 25 insertions(+), 8 deletions(-) diff --git a/src/utils.cpp b/src/utils.cpp index d442a3b8..7fe9b998 100644 --- a/src/utils.cpp +++ b/src/utils.cpp @@ -330,16 +330,11 @@ void sigINT_handler(int signum) } } -/** - * \brief handles SIGSEGV signals that lead to termination of the program - * this function specifically works when the client application launched - * by pcm -- terminates - */ constexpr auto BACKTRACE_MAX_STACK_FRAME = 30; -void sigSEGV_handler(int signum) +void printBacktrace() { - void *backtrace_buffer[BACKTRACE_MAX_STACK_FRAME] = {0}; - char **backtrace_strings = NULL; + void* backtrace_buffer[BACKTRACE_MAX_STACK_FRAME] = { 0 }; + char** backtrace_strings = NULL; size_t backtrace_size = 0; backtrace_size = backtrace(backtrace_buffer, BACKTRACE_MAX_STACK_FRAME); @@ -357,7 +352,16 @@ void sigSEGV_handler(int signum) } freeAndNullify(backtrace_strings); } +} +/** + * \brief handles SIGSEGV signals that lead to termination of the program + * this function specifically works when the client application launched + * by pcm -- terminates + */ +void sigSEGV_handler(int signum) +{ + printBacktrace(); sigINT_handler(signum); } diff --git a/src/utils.h b/src/utils.h index 3d7665fc..11e7dd41 100644 --- a/src/utils.h +++ b/src/utils.h @@ -140,6 +140,7 @@ void set_signal_handlers(void); void set_real_time_priority(const bool & silent); void restore_signal_handlers(void); #ifndef _MSC_VER +void printBacktrace(); void sigINT_handler(int signum); void sigHUP_handler(int signum); void sigUSR_handler(int signum); @@ -248,12 +249,24 @@ inline std::string unit_format(IntType n) void print_cpu_details(); + +inline void printDebugCallstack() +{ +#ifndef _MSC_VER + if (safe_getenv("PCM_PRINT_DEBUG_CALLSTACK") == "1") + { + printBacktrace(); + } +#endif +} + template inline void warnAlignment(const char* call, const bool silent, const uint64 offset) { if (silent == false && (offset % Bytes) != 0) { std::cerr << "PCM Warning: " << call << " offset " << offset << " is not " << Bytes << "-byte aligned\n"; + printDebugCallstack(); } } From 53a2471d111daa972057b45667d1428f6e57a444 Mon Sep 17 00:00:00 2001 From: "Dementiev, Roman" Date: Tue, 26 Nov 2024 13:43:47 +0100 Subject: [PATCH 12/14] alignment of 4 is enough for PciHandle API Change-Id: Ibf7f4d9918a098f1f1070bb7c7e15afd08cb624a --- src/pci.cpp | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/src/pci.cpp b/src/pci.cpp index 50fd9b27..44f25efa 100644 --- a/src/pci.cpp +++ b/src/pci.cpp @@ -142,7 +142,7 @@ int32 PciHandle::write32(uint64 offset, uint32 value) int32 PciHandle::read64(uint64 offset, uint64 * value) { - warnAlignment<8>("PciHandle::read64", false, offset); + warnAlignment<4>("PciHandle::read64", false, offset); if (hDriver != INVALID_HANDLE_VALUE) { PCICFG_Request req; @@ -226,7 +226,7 @@ int32 PciHandle::write32(uint64 offset, uint32 value) int32 PciHandle::read64(uint64 offset, uint64 * value) { - warnAlignment<8>("PciHandle::read64", false, offset); + warnAlignment<4>("PciHandle::read64", false, offset); uint32_t pci_address = FORM_PCI_ADDR(bus, device, function, (uint32_t)offset); return PCIDriver_read64(pci_address, value); } @@ -336,7 +336,7 @@ int32 PciHandle::write32(uint64 offset, uint32 value) int32 PciHandle::read64(uint64 offset, uint64 * value) { - warnAlignment<8>("PciHandle::read64", false, offset); + warnAlignment<4>("PciHandle::read64", false, offset); struct pci_io pi; int32 ret; @@ -437,7 +437,7 @@ int32 PciHandle::write32(uint64 offset, uint32 value) int32 PciHandle::read64(uint64 offset, uint64 * value) { - warnAlignment<8>("PciHandle::read64", false, offset); + warnAlignment<4>("PciHandle::read64", false, offset); size_t res = ::pread(fd, (void *)value, sizeof(uint64), offset); if(res != sizeof(uint64)) { @@ -557,7 +557,7 @@ int32 PciHandleM::write32(uint64 offset, uint32 value) int32 PciHandleM::read64(uint64 offset, uint64 * value) { - warnAlignment<8>("PciHandleM::read64", false, offset); + warnAlignment<4>("PciHandleM::read64", false, offset); return ::pread(fd, (void *)value, sizeof(uint64), offset + base_addr); } @@ -714,7 +714,7 @@ int32 PciHandleMM::write32(uint64 offset, uint32 value) int32 PciHandleMM::read64(uint64 offset, uint64 * value) { - warnAlignment<8>("PciHandleMM::read64", false, offset); + warnAlignment<4>("PciHandleMM::read64", false, offset); read32(offset, (uint32 *)value); read32(offset + sizeof(uint32), ((uint32 *)value) + 1); From b0428a3db230dbcefb811e27c900c348dd0c9b8f Mon Sep 17 00:00:00 2001 From: Roman Dementiev Date: Tue, 26 Nov 2024 12:38:25 +0100 Subject: [PATCH 13/14] fix misaligned read --- src/pci.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/pci.h b/src/pci.h index ec44e99f..d7c03264 100644 --- a/src/pci.h +++ b/src/pci.h @@ -261,8 +261,8 @@ void processDVSEC(MatchFunc matchFunc, ProcessFunc processFunc) // std::cerr << "Intel device scan. found " << std::hex << group << ":" << bus << ":" << device << ":" << function << " " << device_id << std::dec; uint32 status{0}; PciHandleType h(group, bus, device, function); - h.read32(6, &status); // read status - if (status & 0x10) // has capability list + h.read32(4, &status); // read status + if (status & 0x100000) // has capability list { // std::cerr << "Intel device scan. found "<< std::hex << group << ":" << bus << ":" << device << ":" << function << " " << device_id << " with capability list\n" << std::dec; VSEC header; From 9b726470ddb7c9407000d97d0ca55433da2dc03f Mon Sep 17 00:00:00 2001 From: "Dementiev, Roman" Date: Wed, 27 Nov 2024 08:37:07 +0100 Subject: [PATCH 14/14] fix compilation Change-Id: Ia0d3373f5d0c544a55cec49479c81259dbd2c080 --- src/pci.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/pci.cpp b/src/pci.cpp index 44f25efa..d7f485df 100644 --- a/src/pci.cpp +++ b/src/pci.cpp @@ -316,7 +316,7 @@ int32 PciHandle::read32(uint64 offset, uint32 * value) int32 PciHandle::write32(uint64 offset, uint32 value) { - warnAlignment<4>("PciHandle::write32", false, offset) + warnAlignment<4>("PciHandle::write32", false, offset); struct pci_io pi; int ret;