From 263390cf5ba7ae12f7232b753090ef43e81a9f12 Mon Sep 17 00:00:00 2001
From: Pawel Palucki <pawel.palucki@sap.com>
Date: Fri, 26 Apr 2024 16:34:01 +0200
Subject: [PATCH] update README an values comments

---
 deployment/pcm/README.md         | 115 +++++++++++++++----------------
 deployment/pcm/values-metal.yaml |   4 +-
 deployment/pcm/values-vm.yaml    |   5 +-
 deployment/pcm/values.yaml       |  61 ++++++++--------
 4 files changed, 92 insertions(+), 93 deletions(-)

diff --git a/deployment/pcm/README.md b/deployment/pcm/README.md
index 3102695e..a69a19fa 100644
--- a/deployment/pcm/README.md
+++ b/deployment/pcm/README.md
@@ -59,9 +59,8 @@ kubectl logs ds/pcm
 
 ### Requirements
 
-- Full set of metrics requires bare-metal or .metal instance (uncore metrics, RDT, energy, UPI),
-- Core metrics (instructions, cycles are also available) on VM instances,
-- /sys/fs/resctrl has to be mounted on host OS,
+- Full set of metrics (uncore/UPI, RDT, energy) requires bare-metal or .metal cloud instance.
+- /sys/fs/resctrl has to be mounted on host OS (for default indirect deployment method),
 - pod is allowed to be run with privileged capabilities (SYS_ADMIN, SYS_RAWIO) on given namespace in other words: Pod Security Standards allow to run on privileged level,
 
 ```
@@ -77,51 +76,19 @@ More information here: https://kubernetes.io/docs/tutorials/security/ns-level-ps
 
 ### Defaults
 
-- Use Linux abstraction to access event counters (Linux Perf, resctrl) and run container in un-privileged mode.
-- hostPort 9738 is exposed on host, (TODO: security review)
-- Prometheus podMonitor is disabled
-
-#### Metric availability and requirements (devices/mounts/permissions)
-
-| Method        | Used interfaces                                            | default  | Notes                                                                                 |
-|---------------|------------------------------------------------------------| -------- | ------------------------------------------------------------------------------------- |
-| indirect      | perf, resctrl                                              |    v     | missing energy metrics,                                                               |
-| direct        | msr                                                        |          | requires msr module and access to /dev/cpu (non trivial) or privileged access         |
-
-
-| Metrics               | Available on Hardware         | Available through interface  | Available through method |
-| --------------------- | ----------------------------- | ---------------------------- | ------------------------ |
-| core                  | bare-metal, VM (any)          | msr or perf                  | any                      |
-| uncore (UPI)          | bare-metal, VM (all sockets)  | msr or perf                  | any                      |
-| RDT (MBW,L3OCCUP)     | bare-metal, VM (all sockets)  | msr or resctrl               | any                      |
-| energy, temp          | bare-metal (only)             | msr                          | direct                   |
-| perf-topdown          |                               | perf only                    | indirect                 |
-
-
-| Interface     | Requirements                                               |  Controlled by (env/helm value) |  default helm         | Used by source code                                      | Notes                                               |
-|---------------|------------------------------------------------------------|---------------------------------|-----------------------|----------------------------------------------------------|-----------------------------------------------------|
-| perf          | sys_perf_open() perf_paranoid<=0/privileged/CAP_ADMIN      | PCM_NO_PERF                     | use perf              | programPerfEvent(), PerfVirtualControlRegister()         |                                                     |
-| perf-uncore   | sys_perf_open() perf_paranoid<=0/privileged/CAP_ADMIN      | PCM_USE_UNCORE_PERF             | use perf for uncore   | programPerfEvent(), PerfVirtualControlRegister()         |                                                     |
-| perf-topdown  | /sys/bus/event_source/devices/cpu/events                   | sysMount                        | yes                   | cpucounters.cpp:perfSupportsTopDown()                    | TODO: conflicts with sys/fs/resctrl                 |
-| RDT           | uses "msr" or "resctrl" interface                          | PCM_NO_RDT                      | yes                   | cpucounters.cpp:isRDTDisabled()/QOSMetricAvailable()     |                                                     |
-| resctrl       | RW: /sys/fs/resctrl                                        | PCM_USE_RESCTRL                 | yes                   | resctrl.cpp                                              | resctrlHostMount                                    |
-| watchdog      | RO/RW: /proc/sys/kernel/nmi_watchdog                       | PCM_KEEP_NMI_WATCHDOG           | yes (tries to disable)| src/cpucounters.cpp:disableNMIWatchdog()                 |                                                     |
-| msr           | RW: /dev/cpu/X/msr + privileged or CAP_ADMIN/CAP_RAWIO     | PCM_NO_MSR                      | msr is disabled       | msr.cpp:MsrHandle()                                      | privileged or some method to access /dev/cpu        |
-|               | RW: /dev/mem                                               | ?                               | msr is disabled       | cpucounters.cpp:initUncoreObjects, pci.cpp:PCIHandleM()  | privileged or some method to access /dev/cpu        |
-|               | RO/RW: /sys/module/msr/parameters                          | PCM_NO_MSR                      | msr is disabled       | msr.cpp:MsrHandle()                                      | sysMount                                            |
-|               | RW: /proc/bus/pci                                          | PCM_USE_UNCORE_PERF             | msr is disabled       | pci.cpp:PCIHandle()                                      | pciMount                                            |
-|               | RO: /sys/firmware/acpi/tables/MCFG                         | PCM_USE_UNCORE_PERF             | msr is disabled       | pci.cpp:PciHandle::openMcfgTable()                       | mcfgMount                                           |
-|               | energy                                                     |                                 |                       | cpucounters.cpp initEnergyMonitoring()                   |                                                     |
+- Indirect method uses Linux abstraction to access event counters (Linux Perf, resctrl) and run container in non-privileged mode.
+- hostPort 9738 is exposed on host. (TODO: security review, consider TLS, together with Prometheus scrapping !!).
+- Prometheus podMonitor is disabled (enabled it with --set podMonitor=true).
 
 ### Validation on local kind cluster
 
-
 #### Requirements
 
-- kubectl/kind/helm/jq binaries available in PATH
-- docker service up and running
+- kubectl/kind/helm/jq binaries available in PATH,
+- docker service up and running.
+- full set of metrics avaiable only bare-metal instance or Cloud .metal instance.
 
-#### 1) Optionally mount resctrl filesystem
+#### 1) (Optionally) mount resctrl filesystem (for RDT metrics)
 
 ```
 mount -t resctrl resctrl /sys/fs/resctrl
@@ -129,13 +96,11 @@ mount -t resctrl resctrl /sys/fs/resctrl
 
 #### 2) Create kind based Kubernetes cluster
 
-
 ```
 kind create cluster
 ```
 
-**Note** to be able to collect and test resctrl RDT metrics, kind cluster have to be created with additional mounts:
-
+**Note** to be able to collect and test RDT metrics through resctrl filesystem, kind cluster have to be created with additional mounts:
 ```
 nodes:
 - role: control-plane
@@ -143,8 +108,8 @@ nodes:
   - hostPath: /sys/fs/resctrl
     containerPath: /sys/fs/resctrl
 ```
-or (optionally), create kind cluster with local registry with [this script](https://kind.sigs.k8s.io/docs/user/local-registry/).
-and apply the patch using sed:
+e.g. create kind cluster with local registry with [this script](https://kind.sigs.k8s.io/docs/user/local-registry/).
+and apply the patch to enable resctrl win following way:
 
 ```
 wget https://kind.sigs.k8s.io/examples/kind-with-registry.sh
@@ -156,7 +121,10 @@ nodes:\
   - hostPath: /sys/fs/resctrl\
     containerPath: /sys/fs/resctrl\
 ' kind-with-registry.sh
+```
 
+Then create cluster using above patched script:
+```
 bash kind-with-registry.sh
 ```
 
@@ -170,8 +138,7 @@ Export kind kubeconfig as default for further kubectl commands:
 kind export kubeconfig
 ```
 
-
-#### 3) (Optionally) Deploy Node feature discovery
+#### 3) (Optionally) Deploy Node Feature Discovery (nfd)
 
 ```
 # I.a. Using Kustomize:
@@ -196,27 +163,23 @@ kubectl get sts prometheus-prometheus-kube-prometheus-prometheus
 
 #### 5) Deploy PCM helm chart
 
-Deploy with defaults:
 ```
-# Deploy to current namespace with defaults
+# a) Deploy to current namespace with defaults
 helm install pcm . 
 
-# Alternatively deploy with NFD and with Prometheus enabled
+# b) Alternatively deploy with NFD and/or with Prometheus enabled
 helm install pcm . --set podMonitor=true
-kubectl get podmonitor pcm
 helm install pcm . --set nfd=true
 
-# Alternatively deploy with NFD and with Prometheus enabled into own "pcm" namespace 
+# c) Alternatively deploy with NFD and with Prometheus enabled into own "pcm" namespace 
 helm install pcm . --namespace pcm 
 ```
 
-#### 6) Check metrics
+#### 6) Check metrics are exported
 
 Run proxy in background:
 ```
 kubectl proxy &
-# for access from another host TODO to be remove (unsecure!!!)
-kubectl proxy --address 0.0.0.0 &
 ```
 
 Access PCM metrics directly:
@@ -232,7 +195,7 @@ curl -Ls http://127.0.0.1:8001/api/v1/namespaces/default/pods/$podname/proxy/met
 curl -Ls http://127.0.0.1:8001/api/v1/namespaces/default/pods/$podname/proxy/metrics | grep DRAM_Joules_Consumed                                                    # source: energy
 ```
 
-or through Prometheus UI/prom tool:
+or through Prometheus UI/prom tool (requires prometheus operator to be deployed and helm install with with `--set podMonitor=true`):
 ```
 http://127.0.0.1:8001/api/v1/namespaces/default/services/prometheus-kube-prometheus-prometheus:http-web/proxy/graph
 promtool query range --step 1m http://127.0.0.1:8001/api/v1/namespaces/default/services/prometheus-kube-prometheus-prometheus:http-web/proxy 'rate(DRAM_Writes{aggregate="system"}[5m])/1e9'
@@ -265,7 +228,7 @@ helm install pcm-vm . -f values-vm.yaml
 helm install pcm-metal . -f values-metal.yaml
 ```
 
-#### Direct as non-privileged container 
+#### Direct method as non-privileged container (not recommended)
 
 **Note** PCM requires access to /dev/cpu device in read writer mode (MSR access) but it is no possible currently to mount devices in Kubernetes pods/containers in vanila Kubernetes. Please read this isses for more information https://github.com/kubernetes/kubernetes/issues/5607.
 
@@ -350,7 +313,39 @@ docker push localhost:5001/pcm-local
 helm install pcm . -f values-local-image.yaml
 ```
 
-##### Troubleshooting
+#### Troubleshooting
+
+##### Metric availability and requirements (devices/mounts/permissions)
+
+| Method        | Used interfaces                                            | default  | Notes                                                                                 |
+|---------------|------------------------------------------------------------| -------- | ------------------------------------------------------------------------------------- |
+| indirect      | perf, resctrl                                              |    v     | missing energy metrics,                                                               |
+| direct        | msr                                                        |          | requires msr module and access to /dev/cpu (non trivial) or privileged access         |
+
+
+| Metrics               | Available on Hardware         | Available through interface  | Available through method |
+| --------------------- | ----------------------------- | ---------------------------- | ------------------------ |
+| core                  | bare-metal, VM (any)          | msr or perf                  | any                      |
+| uncore (UPI)          | bare-metal, VM (all sockets)  | msr or perf                  | any                      |
+| RDT (MBW,L3OCCUP)     | bare-metal, VM (all sockets)  | msr or resctrl               | any                      |
+| energy, temp          | bare-metal (only)             | msr                          | direct                   |
+| perf-topdown          |                               | perf only                    | indirect                 |
+
+
+| Interface     | Requirements                                               |  Controlled by (env/helm value) |  default helm         | Used by source code                                      | Notes                                               |
+|---------------|------------------------------------------------------------|---------------------------------|-----------------------|----------------------------------------------------------|-----------------------------------------------------|
+| perf          | sys_perf_open() perf_paranoid<=0/privileged/CAP_ADMIN      | PCM_NO_PERF                     | use perf              | programPerfEvent(), PerfVirtualControlRegister()         |                                                     |
+| perf-uncore   | sys_perf_open() perf_paranoid<=0/privileged/CAP_ADMIN      | PCM_USE_UNCORE_PERF             | use perf for uncore   | programPerfEvent(), PerfVirtualControlRegister()         |                                                     |
+| perf-topdown  | /sys/bus/event_source/devices/cpu/events                   | sysMount                        | yes                   | cpucounters.cpp:perfSupportsTopDown()                    | TODO: conflicts with sys/fs/resctrl                 |
+| RDT           | uses "msr" or "resctrl" interface                          | PCM_NO_RDT                      | yes                   | cpucounters.cpp:isRDTDisabled()/QOSMetricAvailable()     |                                                     |
+| resctrl       | RW: /sys/fs/resctrl                                        | PCM_USE_RESCTRL                 | yes                   | resctrl.cpp                                              | resctrlHostMount                                    |
+| watchdog      | RO/RW: /proc/sys/kernel/nmi_watchdog                       | PCM_KEEP_NMI_WATCHDOG           | yes (tries to disable)| src/cpucounters.cpp:disableNMIWatchdog()                 |                                                     |
+| msr           | RW: /dev/cpu/X/msr + privileged or CAP_ADMIN/CAP_RAWIO     | PCM_NO_MSR                      | msr is disabled       | msr.cpp:MsrHandle()                                      | privileged or some method to access /dev/cpu        |
+|               | RW: /dev/mem                                               | ?                               | msr is disabled       | cpucounters.cpp:initUncoreObjects, pci.cpp:PCIHandleM()  | privileged or some method to access /dev/cpu        |
+|               | RO/RW: /sys/module/msr/parameters                          | PCM_NO_MSR                      | msr is disabled       | msr.cpp:MsrHandle()                                      | sysMount                                            |
+|               | RW: /proc/bus/pci                                          | PCM_USE_UNCORE_PERF             | msr is disabled       | pci.cpp:PCIHandle()                                      | pciMount                                            |
+|               | RO: /sys/firmware/acpi/tables/MCFG                         | PCM_USE_UNCORE_PERF             | msr is disabled       | pci.cpp:PciHandle::openMcfgTable()                       | mcfgMount                                           |
+|               | energy                                                     |                                 |                       | cpucounters.cpp initEnergyMonitoring()                   |                                                     |
 
 One can replace pcm-sensor-server command and run pcm or sleep to investigate issue add following arguments when install helm chart
 ```
diff --git a/deployment/pcm/values-metal.yaml b/deployment/pcm/values-metal.yaml
index daf01fc6..1ca73c1e 100644
--- a/deployment/pcm/values-metal.yaml
+++ b/deployment/pcm/values-metal.yaml
@@ -1,6 +1,8 @@
-#### ================ Tunning for VM ================
+#### ================ Tunning for bare-metal instances ================
+# with node-feature-discovery node affinity for non hypervisor and RDT
 nmiWatchdogMount: false
 PCM_NO_AWS_WORKAROUND: 1
 PCM_KEEP_NMI_WATCHDOG: 0
 nfd: true  
 nfdBaremetalAffinity: true
+nfdRDTAffinity: true
diff --git a/deployment/pcm/values-vm.yaml b/deployment/pcm/values-vm.yaml
index 71badb31..0c6d4139 100644
--- a/deployment/pcm/values-vm.yaml
+++ b/deployment/pcm/values-vm.yaml
@@ -1,4 +1,5 @@
 #### ================ Tunning for VM ================
 nmiWatchdogMount: true
-mcfgMount: false           
-PCM_NO_RDT: 1                 # 0 - try to collect RDT data, enables local/remote memory bandwidth + llc occupancy
+# Disable RDT because is not avaiable for VM instances
+PCM_NO_RDT: 1                 
+resctrlHostMount: false
diff --git a/deployment/pcm/values.yaml b/deployment/pcm/values.yaml
index c25af6ba..1c0f9f29 100644
--- a/deployment/pcm/values.yaml
+++ b/deployment/pcm/values.yaml
@@ -1,4 +1,4 @@
-### -------------- Naming -----------
+### -------------- Naming -------------------
 # used in 
 # - common label: app.kubernetes.io/name otherwise "Chart name"
 # - also in selectorLabels together with release.name
@@ -7,33 +7,28 @@ nameOverride: ""
 # Used as daemonset name (usually based on truncated "name + release name")
 fullnameOverride: ""
 
-### -------------- Image options -----------
+### -------------- Image options ------------
 image:
   repository: ghcr.io/opcm/pcm
   pullPolicy: IfNotPresent
   tag: "latest"               # uses .Chart.AppVersion if empty
 imagePullSecrets: {}
 
-### -------------- Security ----------------
-# Configures SecurityContext to not be privileged (by default) so  SYS_ADMIN/SYS_RAWIO capabilietes are required for running pod
+### -------------- Security ------------------
+# Configures SecurityContext to not privileged (by default) so SYS_ADMIN/SYS_RAWIO capabilietes are required for running pod
 privileged: false
 
-# PCM deployment to be intergrated with NRI balloons resource policy intergration
-# if true, will add special annotation to allow pcm pod use all the core, regardless NRI balloons policy rules. 
-nriBalloonsPolicyIntegration: false
-
-### -------------- Prometheus operator integration --------------------
-# Expose run containerPort "pcm-sensor-server -p 9738" as hostPort, can be empty to disable hostPort
-hostPort: 9738
-# Deploy PromtheusOperator PodMonitor
-podMonitor: false
+### -------------- Required OS affinity -------
+# Should only running on linux
+nodeSelector:
+  kubernetes.io/os: linux
 
 ### -------------- Probes ---------------------
 probes: false
 
-#### ================ Tune this section to handle VM or limited set of metrics 
+### ================ Metrics configuration ======================
 
-### -------------- Metrics: Uncore -------------------
+### -------------- Metrics: Uncore ------------
 # required for uncore metrics, only in baremetal, not available for VM 
 mcfgMount: false           
 sysMount: false
@@ -45,7 +40,7 @@ PCM_NO_MSR: 1                 # do not use MSR
 PCM_NO_PERF: 0                # use Linux Perf over MSR for core metrics
 PCM_USE_UNCORE_PERF: 1        # use Linux Perf instead of MSR for uncore metrics (collection+detection)
 
-### -------------- Metrics: RDT ----------------------
+### -------------- Metrics: RDT ---------------
 ### RDT rdt/resctrl:
 PCM_NO_RDT: 0                 # 0 - try to collect RDT data, enables local/remote memory bandwidth + llc occupancy
 PCM_USE_RESCTRL: 1            # use Linux Perf  instead of MSR access (more reliable)
@@ -54,10 +49,8 @@ PCM_USE_RESCTRL: 1            # use Linux Perf  instead of MSR access (more reli
 resctrlHostMount: true        # mount from external host
 resctrlInsideMount: false     # TODO: mount inside with extra call to mount, requires image with mount installed - doesn't require 
 
-### -------------- HW: vm/baremetal/aws/nmi
+### -------------- Other (NMI handling and/or on VM/AWS)
 PCM_IGNORE_ARCH_PERFMON: 0    # After VM is detected through CPUID (hypervisor flag) - check arch_perfmon flag to be also enabled - fail if not avaiable (0 - do check, 1 - disable check)
-
-### -------------- Other (NMI handling and/or on AWS)
 # 0: Disabling NMI watchdog since it consumes one hw-PMU counter, requires nmiWatchdogMount to be true
 # 1: don't disable NMI watchdog (reducing the core metrics set) - prefferd for production usage!
 # but even with 0 automatic AWS workround applies! 
@@ -81,24 +74,32 @@ cpuLimit: 100m
 cpuRequest: 100m
 memoryLimit: 512Mi
 memoryRequest: 256Mi
-extraResources: {} # requests + limits
+# requests, limits level need to be specified here
+extraResources: {} 
 
-### =============================== NodeSelector and node-feature-discovery integration =================
-### Enables integration with node-feautre-discovery
-# So configuration will be based on discovery made by NFD
+### =============================== Integrations with other projects ====================================
+#
+### -------------- Prometheus operator --------------------
+# Expose run containerPort "pcm-sensor-server -p 9738" as hostPort, can be empty to disable hostPort
+hostPort: 9738
+# Deploy PromtheusOperator PodMonitor (requires hostPort to be not empty)
+podMonitor: false
+
+### -------------- NRI balloons policy plugin -------------
+# PCM deployment to be intergrated with NRI balloons resource policy intergration
+# if true, will add special annotation to allow pcm pod use all the core, regardless NRI balloons policy rules. 
+nriBalloonsPolicyIntegration: false
+
+### -------------  node-feature-discovery -----------------
 # when enabled specific set of labels will be used as node selector (Intel vendor, RDT availability, baremetal) 
 nfd: false  
-# if enabled daemonset nodeAffinity would required node without feature.node.kubernetes.io/cpu-cpuid.HYPERVISOR flag
+# if enabled daemonset nodeAffinity will require node without feature.node.kubernetes.io/cpu-cpuid.HYPERVISOR flag (requires nfd=true)
 nfdBaremetalAffinity: false
-# if enabled
+# if enabled, followin RDT labels will be required for scheduling (requires nfd=true)
 # feature.node.kubernetes.io/cpu-rdt.RDTCMT=true
 # feature.node.kubernetes.io/cpu-rdt.RDTL3CA=true
 # feature.node.kubernetes.io/cpu-rdt.RDTMBA=true
 # feature.node.kubernetes.io/cpu-rdt.RDTMBM=true
 # feature.node.kubernetes.io/cpu-rdt.RDTMON=true
-# * use nodeSelect to only match flags avaiable on metal e.g. 
-# feature.node.kubernetes.io/cpu-cpuid.VMX: "true"            # only appears on metal instances in AWS (not reliable)
+nfdRDTAffinity: false
 
-# Should only running on linux and baremetal 
-nodeSelector:
-  kubernetes.io/os: linux