diff --git a/.github/workflows/ci-cov-linux-report.yml b/.github/workflows/ci-cov-linux-report.yml
index 03f4d809..1d15abac 100644
--- a/.github/workflows/ci-cov-linux-report.yml
+++ b/.github/workflows/ci-cov-linux-report.yml
@@ -37,7 +37,7 @@ jobs:
         ci-cov-linux-report.sh PCM.linux.and.python
 
     - name: upload-artifact
-      uses: actions/upload-artifact@83fd05a356d7e2593de66fc9913b3002723633cb # v3.1.1
+      uses: actions/upload-artifact@a8a3f3ad30e3422c9c7b888a15615d19a852ae32 # v3.1.3
       with:
         name: coverity-linux-and-python-report-${{ github.sha }}
         path: "*-Report.pdf"
diff --git a/.github/workflows/ci-cov-windows-report.yml b/.github/workflows/ci-cov-windows-report.yml
index 11338e72..9d4d871d 100644
--- a/.github/workflows/ci-cov-windows-report.yml
+++ b/.github/workflows/ci-cov-windows-report.yml
@@ -48,7 +48,7 @@ jobs:
         c:\pcm\ci-cov-windows-report.ps1 PCM.windows-all
 
     - name: upload-artifact
-      uses: actions/upload-artifact@c7d193f32edcb7bfad88892161225aeda64e9392 # v4.0.0
+      uses: actions/upload-artifact@a8a3f3ad30e3422c9c7b888a15615d19a852ae32 # v3.1.3
       with:
         name: coverity-windows-all-report-${{ github.sha }}
         path: "*-Report.pdf"
diff --git a/.github/workflows/ci-gcc5.yml b/.github/workflows/ci-gcc5.yml
index b9682c44..109d620c 100644
--- a/.github/workflows/ci-gcc5.yml
+++ b/.github/workflows/ci-gcc5.yml
@@ -43,7 +43,7 @@ jobs:
         cd ${{ github.workspace }}/build
         make install -j$(nproc)
     - name: upload-artifact
-      uses: actions/upload-artifact@83fd05a356d7e2593de66fc9913b3002723633cb # v3.1.1
+      uses: actions/upload-artifact@a8a3f3ad30e3422c9c7b888a15615d19a852ae32 # v3.1.3
       with:
         name: PCMforLinuxGCC5
         path: build/bin/*
diff --git a/.github/workflows/ci-test.yml b/.github/workflows/ci-test.yml
index bc0fe729..b18a7289 100644
--- a/.github/workflows/ci-test.yml
+++ b/.github/workflows/ci-test.yml
@@ -40,55 +40,55 @@ jobs:
         sh ${{ github.workspace }}/tests/test.sh 2>&1 | tee test-log.txt
 
     - name: upload-artifact
-      uses: actions/upload-artifact@83fd05a356d7e2593de66fc9913b3002723633cb # v3.1.1
+      uses: actions/upload-artifact@a8a3f3ad30e3422c9c7b888a15615d19a852ae32 # v3.1.3
       with:
         name: test-log-${{ github.sha }}
         path: test-log.txt
 
     - name: upload-artifact
-      uses: actions/upload-artifact@83fd05a356d7e2593de66fc9913b3002723633cb # v3.1.1
+      uses: actions/upload-artifact@a8a3f3ad30e3422c9c7b888a15615d19a852ae32 # v3.1.3
       with:
         name: test-log-raw-tr-wo_ext-${{ github.sha }}
         path: build/bin/raw_tr_wo_ext.csv
 
     - name: upload-artifact
-      uses: actions/upload-artifact@83fd05a356d7e2593de66fc9913b3002723633cb # v3.1.1
+      uses: actions/upload-artifact@a8a3f3ad30e3422c9c7b888a15615d19a852ae32 # v3.1.3
       with:
         name: test-log-raw-tr-wi_ext-${{ github.sha }}
         path: build/bin/raw_tr_wi_ext.csv
 
     - name: upload-artifact
-      uses: actions/upload-artifact@83fd05a356d7e2593de66fc9913b3002723633cb # v3.1.1
+      uses: actions/upload-artifact@a8a3f3ad30e3422c9c7b888a15615d19a852ae32 # v3.1.3
       with:
         name: pcm-csv-${{ github.sha }}
         path: build/bin/pcm.csv
 
     - name: upload-artifact
-      uses: actions/upload-artifact@83fd05a356d7e2593de66fc9913b3002723633cb # v3.1.1
+      uses: actions/upload-artifact@a8a3f3ad30e3422c9c7b888a15615d19a852ae32 # v3.1.3
       with:
         name: pcm-memory-csv-${{ github.sha }}
         path: build/bin/pcm-memory.csv
 
     - name: upload-artifact
-      uses: actions/upload-artifact@83fd05a356d7e2593de66fc9913b3002723633cb # v3.1.1
+      uses: actions/upload-artifact@a8a3f3ad30e3422c9c7b888a15615d19a852ae32 # v3.1.3
       with:
         name: test-log-raw-tr-wi_ext-single_header-${{ github.sha }}
         path: build/bin/raw_tr_wi_ext_single_header.csv
 
     - name: upload-artifact
-      uses: actions/upload-artifact@83fd05a356d7e2593de66fc9913b3002723633cb # v3.1.1
+      uses: actions/upload-artifact@a8a3f3ad30e3422c9c7b888a15615d19a852ae32 # v3.1.3
       with:
         name: test-log-raw-edp-${{ github.sha }}
         path: build/bin/raw_edp.txt
 
     - name: upload-artifact
-      uses: actions/upload-artifact@83fd05a356d7e2593de66fc9913b3002723633cb # v3.1.1
+      uses: actions/upload-artifact@a8a3f3ad30e3422c9c7b888a15615d19a852ae32 # v3.1.3
       with:
         name: test-log-raw-json-${{ github.sha }}
         path: build/bin/raw_json.json
 
     - name: upload-artifact
-      uses: actions/upload-artifact@83fd05a356d7e2593de66fc9913b3002723633cb # v3.1.1
+      uses: actions/upload-artifact@a8a3f3ad30e3422c9c7b888a15615d19a852ae32 # v3.1.3
       with:
         name: test-log-raw-edp-offlined-cores-${{ github.sha }}
         path: build/bin/raw_edp_offlined_cores.txt
diff --git a/.github/workflows/ci-windows.yml b/.github/workflows/ci-windows.yml
index 8d6644d7..9854bb3a 100644
--- a/.github/workflows/ci-windows.yml
+++ b/.github/workflows/ci-windows.yml
@@ -41,7 +41,7 @@ jobs:
         chdir ${{github.workspace}}\src\WinMSRDriver
         msbuild MSR.vcxproj /p:Configuration=Release,Platform=x64 /t:Clean,Build /m
     - name: upload-artifact
-      uses: actions/upload-artifact@c7d193f32edcb7bfad88892161225aeda64e9392 # v4.0.0
+      uses: actions/upload-artifact@a8a3f3ad30e3422c9c7b888a15615d19a852ae32 # v3.1.3
       with:
         name: PCMforWindows
         path: build/bin/**/*
diff --git a/.github/workflows/codeql.yml b/.github/workflows/codeql.yml
index f621a959..5e6f50bb 100644
--- a/.github/workflows/codeql.yml
+++ b/.github/workflows/codeql.yml
@@ -50,7 +50,7 @@ jobs:
 
       # Initializes the CodeQL tools for scanning.
       - name: Initialize CodeQL
-        uses: github/codeql-action/init@012739e5082ff0c22ca6d6ab32e07c36df03c4a4 # v3.22.12
+        uses: github/codeql-action/init@379614612a29c9e28f31f39a59013eb8012a51f0 # v3.24.3
         with:
           languages: ${{ matrix.language }}
           # If you wish to specify custom queries, you can do so here or in a config file.
@@ -60,7 +60,7 @@ jobs:
       # Autobuild attempts to build any compiled languages  (C/C++, C#, or Java).
       # If this step fails, then you should remove it and run the build manually (see below)
       - name: Autobuild
-        uses: github/codeql-action/autobuild@012739e5082ff0c22ca6d6ab32e07c36df03c4a4 # v3.22.12
+        uses: github/codeql-action/autobuild@379614612a29c9e28f31f39a59013eb8012a51f0 # v3.24.3
 
       # ℹ️ Command-line programs to run using the OS shell.
       # 📚 See https://docs.github.com/en/actions/using-workflows/workflow-syntax-for-github-actions#jobsjob_idstepsrun
@@ -73,6 +73,6 @@ jobs:
       #   ./location_of_script_within_repo/buildscript.sh
 
       - name: Perform CodeQL Analysis
-        uses: github/codeql-action/analyze@012739e5082ff0c22ca6d6ab32e07c36df03c4a4 # v3.22.12
+        uses: github/codeql-action/analyze@379614612a29c9e28f31f39a59013eb8012a51f0 # v3.24.3
         with:
           category: "/language:${{matrix.language}}"
diff --git a/.github/workflows/dependency-review.yml b/.github/workflows/dependency-review.yml
index f4a8bb47..e82880c5 100644
--- a/.github/workflows/dependency-review.yml
+++ b/.github/workflows/dependency-review.yml
@@ -26,4 +26,4 @@ jobs:
       - name: 'Checkout Repository'
         uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # v4.1.1
       - name: 'Dependency Review'
-        uses: actions/dependency-review-action@c74b580d73376b7750d3d2a50bfb8adc2c937507 # v3.1.5
+        uses: actions/dependency-review-action@9129d7d40b8c12c1ed0f60400d00c92d437adcce # v4.1.3
diff --git a/.github/workflows/docker.yml b/.github/workflows/docker.yml
index f5c4a211..0e0a81b3 100644
--- a/.github/workflows/docker.yml
+++ b/.github/workflows/docker.yml
@@ -37,7 +37,7 @@ jobs:
         uses: docker/setup-buildx-action@f95db51fddba0c2d1ec667646a06c2ce06100226 # v3.0.0
 
       - name: Cache Docker layers
-        uses: actions/cache@704facf57e6136b1bc63b828d79edcd491f0ee84 # v3.3.2
+        uses: actions/cache@13aacd865c20de90d75de3b17ebe84f7a17d57d2 # v4.0.0
         with:
           path: /tmp/.buildx-cache
           key: ${{ runner.os }}-buildx-${{ github.sha }}
diff --git a/.github/workflows/scorecard.yml b/.github/workflows/scorecard.yml
index ccc14d78..78efbce2 100644
--- a/.github/workflows/scorecard.yml
+++ b/.github/workflows/scorecard.yml
@@ -65,7 +65,7 @@ jobs:
       # Upload the results as artifacts (optional). Commenting out will disable uploads of run results in SARIF
       # format to the repository Actions tab.
       - name: "Upload artifact"
-        uses: actions/upload-artifact@c7d193f32edcb7bfad88892161225aeda64e9392 # v4.0.0
+        uses: actions/upload-artifact@a8a3f3ad30e3422c9c7b888a15615d19a852ae32 # v3.1.3
         with:
           name: SARIF file
           path: results.sarif
@@ -73,6 +73,6 @@ jobs:
 
       # Upload the results to GitHub's code scanning dashboard.
       - name: "Upload to code-scanning"
-        uses: github/codeql-action/upload-sarif@012739e5082ff0c22ca6d6ab32e07c36df03c4a4 # v3.22.12
+        uses: github/codeql-action/upload-sarif@379614612a29c9e28f31f39a59013eb8012a51f0 # v3.24.3
         with:
           sarif_file: results.sarif
diff --git a/doc/FAQ.md b/doc/FAQ.md
index cd51f5ff..88a0266d 100644
--- a/doc/FAQ.md
+++ b/doc/FAQ.md
@@ -86,3 +86,10 @@ Not all AWS instances allow users to collect CPU telemetry by exposing PMU to th
 * Bare metal instances: allow collection of CPU metrics from both core (e.g. instructions per cycle, cache misses) and uncore (e.g. memory controller, UPI)
 * Full-socket (single socket, two socket, etc) virtualized instances: e.g. m5d.12xlarge, m5.24xlarge, m5.12xlarge. Only core CPU metrics are exposed, and certain CPU performance events are forbidden (e.g. offcore response events, events collecting “any_thread” information). “arch_perfmon” flag in /proc/cpuinfo indicates if the core CPU metrics are exposed (example: https://instaguide.io/info.html?type=m5.12xlarge ). The mechanism of PMU virtualization is commonly known as vPMU.
 
+## Q12
+
+pcm-pcie reports that the CPU is not supported: "Jaketown, Ivytown, Haswell, Broadwell-DE, Skylake, Icelake, Snowridge and Sapphirerapids Server CPU is required for this tool! Program aborted"
+Can you add support for pcm-pcie for my CPU?
+
+Answer: most likely you have a client CPU which does not have required hardware performance monitoring units. pcm-pcie can not work without them.
+
diff --git a/scripts/build.sh b/scripts/build.sh
new file mode 100644
index 00000000..2049a90c
--- /dev/null
+++ b/scripts/build.sh
@@ -0,0 +1,7 @@
+
+
+mkdir build
+cd build
+cmake ..
+make -j
+
diff --git a/scripts/debug-build.sh b/scripts/debug-build.sh
new file mode 100644
index 00000000..a89e2a48
--- /dev/null
+++ b/scripts/debug-build.sh
@@ -0,0 +1,7 @@
+
+
+mkdir debug-build
+cd debug-build
+cmake -DCMAKE_BUILD_TYPE=Debug ..
+make -j
+
diff --git a/src/cpucounters.cpp b/src/cpucounters.cpp
index 9e7158eb..e76797c5 100644
--- a/src/cpucounters.cpp
+++ b/src/cpucounters.cpp
@@ -6857,6 +6857,55 @@ bool PCM::useLinuxPerfForUncore() const
     return 1 == use;
 }
 
+template <class F>
+void PCM::getPCICFGPMUsFromDiscovery(const unsigned int BoxType, const size_t s, F f) const
+{
+    if (uncorePMUDiscovery.get())
+    {
+        const auto numBoxes = uncorePMUDiscovery->getNumBoxes(BoxType, s);
+        for (size_t pos = 0; pos < numBoxes; ++pos)
+        {
+            if (uncorePMUDiscovery->getBoxAccessType(BoxType, s, pos) == UncorePMUDiscovery::accessTypeEnum::PCICFG)
+            {
+                std::vector<std::shared_ptr<HWRegister> > CounterControlRegs, CounterValueRegs;
+                const auto n_regs = uncorePMUDiscovery->getBoxNumRegs(BoxType, s, pos);
+                auto makeRegister = [](const uint64 rawAddr)
+                {
+#ifndef PCI_ENABLE
+                    constexpr auto PCI_ENABLE = 0x80000000ULL;
+#endif
+                    UncorePMUDiscovery::PCICFGAddress Addr;
+                    Addr.raw = rawAddr;
+                    assert(Addr.raw & PCI_ENABLE);
+                    try {
+                        auto handle = std::make_shared<PciHandleType>(0, (uint32)Addr.fields.bus,
+                                                                        (uint32)Addr.fields.device,
+                                                                        (uint32)Addr.fields.function);
+                        assert(handle.get());
+                        // std::cerr << "DEBUG: opened bdf "<< Addr.getStr() << "\n";
+                        return std::make_shared<PCICFGRegister64>(handle, (size_t)Addr.fields.offset);
+                    }
+                    catch (...)
+                    {
+                        // std::cerr << "DEBUG: error opening bdf "<< Addr.getStr() << "\n";
+                    }
+                    return std::shared_ptr<PCICFGRegister64>();
+                };
+                auto boxCtlRegister = makeRegister(uncorePMUDiscovery->getBoxCtlAddr(BoxType, s, pos));
+                if (boxCtlRegister.get())
+                {
+                    for (size_t r = 0; r < n_regs; ++r)
+                    {
+                        CounterControlRegs.push_back(makeRegister(uncorePMUDiscovery->getBoxCtlAddr(BoxType, s, pos, r)));
+                        CounterValueRegs.push_back(makeRegister(uncorePMUDiscovery->getBoxCtrAddr(BoxType, s, pos, r)));
+                    }
+                    f(UncorePMU(boxCtlRegister, CounterControlRegs, CounterValueRegs));
+                }
+            }
+        }
+    }
+};
+
 ServerUncorePMUs::ServerUncorePMUs(uint32 socket_, const PCM * pcm) :
      iMCbus(-1)
    , UPIbus(-1)
@@ -7293,9 +7342,12 @@ void ServerUncorePMUs::initDirect(uint32 socket_, const PCM * pcm)
     populateM2MPMUs(groupnr, M2Mbus, cpu_model, HBM_M2MRegisterLocation, hbm_m2mPMUs);
 
     int numChannels = 0;
-    if (cpu_model == PCM::SPR || cpu_model == PCM::EMR)
+    if (safe_getenv("PCM_NO_IMC_DISCOVERY") == std::string("1"))
     {
-        numChannels = 3;
+        if (cpu_model == PCM::SPR || cpu_model == PCM::EMR)
+        {
+            numChannels = 3;
+        }
     }
     if (cpu_model == PCM::SNOWRIDGE || cpu_model == PCM::ICX)
     {
@@ -7352,6 +7404,75 @@ void ServerUncorePMUs::initDirect(uint32 socket_, const PCM * pcm)
             }
         }
     }
+    else
+    {
+        switch (cpu_model)
+        {
+            case PCM::SPR:
+            case PCM::EMR:
+                {
+                    auto & uncorePMUDiscovery = pcm->uncorePMUDiscovery;
+                    const auto BoxType = SPR_IMC_BOX_TYPE;
+                    if (uncorePMUDiscovery.get())
+                    {
+                        const auto numBoxes = uncorePMUDiscovery->getNumBoxes(BoxType, socket_);
+                        for (size_t pos = 0; pos < numBoxes; ++pos)
+                        {
+                            if (uncorePMUDiscovery->getBoxAccessType(BoxType, socket_, pos) == UncorePMUDiscovery::accessTypeEnum::MMIO)
+                            {
+                                std::vector<std::shared_ptr<HWRegister> > CounterControlRegs, CounterValueRegs;
+                                const auto n_regs = uncorePMUDiscovery->getBoxNumRegs(BoxType, socket_, pos);
+                                auto makeRegister = [](const uint64 rawAddr, const uint32 bits) -> std::shared_ptr<HWRegister>
+                                {
+                                    const auto mapSize = SERVER_MC_CH_PMON_SIZE;
+                                    const auto alignedAddr = rawAddr & ~4095ULL;
+                                    const auto alignDelta = rawAddr & 4095ULL;
+                                    try {
+                                        auto handle = std::make_shared<MMIORange>(alignedAddr, mapSize, false);
+                                        assert(handle.get());
+                                        switch (bits)
+                                        {
+                                            case 32:
+                                                return std::make_shared<MMIORegister32>(handle, (size_t)alignDelta);
+                                            case 64:
+                                                return std::make_shared<MMIORegister64>(handle, (size_t)alignDelta);
+                                        }
+                                    }
+                                    catch (...)
+                                    {
+                                    }
+                                    return std::shared_ptr<HWRegister>();
+                                };
+
+                                auto boxCtlRegister = makeRegister(uncorePMUDiscovery->getBoxCtlAddr(BoxType, socket_, pos), 32);
+                                if (boxCtlRegister.get())
+                                {
+                                    for (size_t r = 0; r < n_regs; ++r)
+                                    {
+                                        CounterControlRegs.push_back(makeRegister(uncorePMUDiscovery->getBoxCtlAddr(BoxType, socket_, pos, r), 32));
+                                        CounterValueRegs.push_back(makeRegister(uncorePMUDiscovery->getBoxCtrAddr(BoxType, socket_, pos, r), 64));
+                                    }
+                                    imcPMUs.push_back(UncorePMU(boxCtlRegister,
+                                        CounterControlRegs,
+                                        CounterValueRegs,
+                                        makeRegister(uncorePMUDiscovery->getBoxCtlAddr(BoxType, socket_, pos) + SERVER_MC_CH_PMON_FIXED_CTL_OFFSET, 32),
+                                        makeRegister(uncorePMUDiscovery->getBoxCtlAddr(BoxType, socket_, pos) + SERVER_MC_CH_PMON_FIXED_CTR_OFFSET, 64)));
+                                }
+                            }
+                        }
+                    }
+                    if (imcPMUs.empty() == false)
+                    {
+                        numChannels = 2;
+                        for (size_t c = 0; c < imcPMUs.size(); c += numChannels)
+                        {
+                            num_imc_channels.push_back(numChannels);
+                        }
+                    }
+                }
+                break;
+        }
+    }
 
     if (imcPMUs.empty())
     {
@@ -7500,11 +7621,28 @@ void ServerUncorePMUs::initDirect(uint32 socket_, const PCM * pcm)
     return;
 #endif
 
+    if (pcm->getNumSockets() <= 4 && safe_getenv("PCM_NO_UPILL_DISCOVERY") != std::string("1"))
+    {
+        switch (cpu_model)
+        {
+            case PCM::SPR:
+            case PCM::EMR:
+                {
+                    std::cerr << "INFO: Trying to detect UPILL PMU through uncore PMU discovery..\n";
+                    pcm->getPCICFGPMUsFromDiscovery(SPR_UPILL_BOX_TYPE, socket_, [this](const UncorePMU & pmu)
+                    {
+                        xpiPMUs.push_back(pmu);
+                    });
+                }
+                break;
+        }
+    }
+
     std::vector<std::shared_ptr<PciHandleType> > qpiLLHandles;
     auto xPI = pcm->xPI();
     try
     {
-        for (size_t i = 0; i < XPIRegisterLocation.size(); ++i)
+        if (xpiPMUs.empty()) for (size_t i = 0; i < XPIRegisterLocation.size(); ++i)
         {
             PciHandleType * handle = createIntelPerfMonDevice(groupnr, UPIbus, XPIRegisterLocation[i].first, XPIRegisterLocation[i].second, true);
             if (handle)
@@ -7530,7 +7668,7 @@ void ServerUncorePMUs::initDirect(uint32 socket_, const PCM * pcm)
         throw std::exception();
     }
 
-    for (auto & handle : qpiLLHandles)
+    if (xpiPMUs.empty()) for (auto & handle : qpiLLHandles)
     {
         if (cpu_model == PCM::SKX)
         {
diff --git a/src/cpucounters.h b/src/cpucounters.h
index 31286338..db5bbaa3 100644
--- a/src/cpucounters.h
+++ b/src/cpucounters.h
@@ -804,6 +804,9 @@ class PCM_API PCM
 
     std::shared_ptr<UncorePMUDiscovery> uncorePMUDiscovery;
 
+    template <class F>
+    void getPCICFGPMUsFromDiscovery(const unsigned int BoxType, const size_t s, F f) const;
+
     bool disable_JKT_workaround;
     bool blocked;              // track if time-driven counter update is running or not: PCM is blocked
 
diff --git a/src/memoptest.cpp b/src/memoptest.cpp
index 04d36140..dda78042 100644
--- a/src/memoptest.cpp
+++ b/src/memoptest.cpp
@@ -79,7 +79,7 @@ int main(int argc, char * argv[])
     assert((argc > 1) && "Need operation type as parameter: 0 - read, 1 - write, 2 - streaming write ");
     int op = atoi(argv[1]);
     T * vector;
-    int nelements = 13000000;
+    int nelements = 1024 * 1024 * 1024 / sizeof(T);
     vector = new T[nelements];
 
     int i = 0;
diff --git a/src/uncore_pmu_discovery.h b/src/uncore_pmu_discovery.h
index dc050fa2..284324df 100644
--- a/src/uncore_pmu_discovery.h
+++ b/src/uncore_pmu_discovery.h
@@ -11,6 +11,8 @@
 namespace pcm {
 
 constexpr auto SPR_PCU_BOX_TYPE = 4U;
+constexpr auto SPR_IMC_BOX_TYPE = 6U;
+constexpr auto SPR_UPILL_BOX_TYPE = 8U;
 constexpr auto SPR_MDF_BOX_TYPE = 11U;
 constexpr auto SPR_CXLCM_BOX_TYPE = 12U;
 constexpr auto SPR_CXLDP_BOX_TYPE = 13U;
@@ -185,7 +187,8 @@ class UncorePMUDiscovery
     {
         if (validBox(boxType, socket, pos) && c < boxPMUs[socket][boxType][pos].numRegs)
         {
-            return boxPMUs[socket][boxType][pos].boxCtrlAddr + boxPMUs[socket][boxType][pos].ctrlOffset + c * registerStep(boxType, socket, pos);
+            const size_t step = (boxType == SPR_IMC_BOX_TYPE) ? 4 : registerStep(boxType, socket, pos);
+            return boxPMUs[socket][boxType][pos].boxCtrlAddr + boxPMUs[socket][boxType][pos].ctrlOffset + c * step;
         }
         return 0;
     }