Skip to content

Commit

Permalink
Add NodeSystemSaturation and NodeMemoryMajorPagesFaults
Browse files Browse the repository at this point in the history
Signed-off-by: Vitaly Zhuravlev <v-zhuravlev@users.noreply.github.com>
  • Loading branch information
v-zhuravlev committed Mar 27, 2023
1 parent a3ba905 commit 4a83ed2
Showing 1 changed file with 36 additions and 1 deletion.
37 changes: 36 additions & 1 deletion docs/node-mixin/alerts/alerts.libsonnet
Original file line number Diff line number Diff line change
Expand Up @@ -323,6 +323,41 @@
description: 'CPU usage at {{ $labels.instance }} has been above 80% for the last 15 minutes, is currently at {{ printf "%.2f" $value }}%.',
},
},
{
alert: 'NodeSystemSaturation',
expr: |||
node_load1{%(nodeExporterSelector)s}
/ count without (cpu, mode) (node_cpu_seconds_total{%(nodeExporterSelector)s, mode="idle"}) > 2
||| % $._config,
'for': '15m',
labels: {
severity: 'critical',
},
annotations: {
summary: 'System saturated, load per core is very high.',
description: |||
System load per core at {{ $labels.instance }} has been above 2 for the last 15 minutes, is currently at {{ printf "%.2f" $value }}.
This might indicate this instance resources saturation and can cause it becoming unresponsive.
|||,
},
},
{
alert: 'NodeMemoryMajorPagesFaults',
expr: |||
rate(node_vmstat_pgmajfault{%(nodeExporterSelector)s}[5m]) > 500
||| % $._config,
'for': '15m',
labels: {
severity: 'warning',
},
annotations: {
summary: 'Memory major page faults are occurring at very high rate.',
description: |||
Memory major pages are occurring at very high rate at {{ $labels.instance }}, 500 major page faults per second for the last 15 minutes, is currently at {{ printf "%.2f" $value }}.
Please check that there is enough memory available at this instance.
|||,
},
},
{
alert: 'NodeMemoryHighUtilization',
expr: |||
Expand Down Expand Up @@ -352,7 +387,7 @@
summary: 'Disk IO queue is high.',
description: |||
Disk IO queue (aqu-sq) is high on {{ $labels.device }} at {{ $labels.instance }}, has been above 10 for the last 15 minutes, is currently at {{ printf "%.2f" $value }}.
This symptom might indicate disk saturation.,
This symptom might indicate disk saturation.
|||,
},
},
Expand Down

0 comments on commit 4a83ed2

Please sign in to comment.