Skip to content

Commit

Permalink
charts,salt: Reinstate node-level cAdvisor metrics
Browse files Browse the repository at this point in the history
This changes the default configuration from kube-prometheus-stack since
we still use these metrics in prometheus-adapter.
Ideally, we would rather let prometheus-adapter consume node-exporter
metrics, but this requires #4018 to be fixed first.
  • Loading branch information
gdemonet committed Mar 9, 2023
1 parent 161c389 commit b9d1e26
Show file tree
Hide file tree
Showing 2 changed files with 31 additions and 6 deletions.
32 changes: 31 additions & 1 deletion charts/kube-prometheus-stack.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -269,7 +269,6 @@ prometheus-node-exporter:
- --collector.filesystem.ignored-mount-points=^/(dev|proc|sys|var/lib/docker/.+|var/lib/kubelet/.+|var/lib/containerd/.+)($|/)
- --collector.filesystem.ignored-fs-types=^(autofs|binfmt_misc|bpf|cgroup2?|configfs|debugfs|devpts|devtmpfs|fusectl|hugetlbfs|iso9660|mqueue|nsfs|overlay|proc|procfs|pstore|rpc_pipefs|securityfs|selinuxfs|squashfs|sysfs|tracefs)$


kubeEtcd:
service:
port: 2381
Expand All @@ -294,3 +293,34 @@ kubeControllerManager:
kubelet:
serviceMonitor:
scrapeTimeout: "__var__(prometheus.spec.config.serviceMonitor.kubelet.scrapeTimeout)"

# FIXME: still keep around some node-level metrics as a workaround until #4018 is fixed
# The values below are the same as default, with the problematic drop being commented out
cAdvisorMetricRelabelings:
# Drop less useful container CPU metrics.
- sourceLabels: [__name__]
action: drop
regex: 'container_cpu_(cfs_throttled_seconds_total|load_average_10s|system_seconds_total|user_seconds_total)'
# Drop less useful container / always zero filesystem metrics.
- sourceLabels: [__name__]
action: drop
regex: 'container_fs_(io_current|io_time_seconds_total|io_time_weighted_seconds_total|reads_merged_total|sector_reads_total|sector_writes_total|writes_merged_total)'
# Drop less useful / always zero container memory metrics.
- sourceLabels: [__name__]
action: drop
regex: 'container_memory_(mapped_file|swap)'
# Drop less useful container process metrics.
- sourceLabels: [__name__]
action: drop
regex: 'container_(file_descriptors|tasks_state|threads_max)'
# Drop container spec metrics that overlap with kube-state-metrics.
- sourceLabels: [__name__]
action: drop
regex: 'container_spec.*'
# WORKAROUND - don't drop these yet, otherwise we can't have prometheus-adapter
# expose node metrics
# Drop cgroup metrics with no pod.
# - sourceLabels: [id, pod]
# action: drop
# regex: '.+;'
# END WORKAROUND
5 changes: 0 additions & 5 deletions salt/metalk8s/addons/prometheus-operator/deployed/chart.sls
Original file line number Diff line number Diff line change
Expand Up @@ -71434,11 +71434,6 @@ spec:
regex: container_spec.*
sourceLabels:
- __name__
- action: drop
regex: .+;
sourceLabels:
- id
- pod
path: /metrics/cadvisor
port: https-metrics
relabelings:
Expand Down

0 comments on commit b9d1e26

Please sign in to comment.