diff --git a/.src/jsonnetfile.json b/.src/jsonnetfile.json index 148f54c..22d7d00 100644 --- a/.src/jsonnetfile.json +++ b/.src/jsonnetfile.json @@ -19,6 +19,15 @@ }, "version": "main" }, + { + "source": { + "git": { + "remote": "https://github.com/grafana/jsonnet-libs.git", + "subdir": "opensearch-mixin" + } + }, + "version": "master" + }, { "source": { "git": { diff --git a/.src/jsonnetfile.lock.json b/.src/jsonnetfile.lock.json index 458fe6d..44c9d09 100644 --- a/.src/jsonnetfile.lock.json +++ b/.src/jsonnetfile.lock.json @@ -41,6 +41,26 @@ "version": "82a19822e54a0a12a51e24dbd48fcde717dc0864", "sum": "64fMUPI3frXGj4X1FqFd1t7r04w3CUSmXaDcJ23EYbQ=" }, + { + "source": { + "git": { + "remote": "https://github.com/grafana/grafonnet.git", + "subdir": "gen/grafonnet-v10.0.0" + } + }, + "version": "82a19822e54a0a12a51e24dbd48fcde717dc0864", + "sum": "xdcrJPJlpkq4+5LpGwN4tPAuheNNLXZjE6tDcyvFjr0=" + }, + { + "source": { + "git": { + "remote": "https://github.com/grafana/grafonnet.git", + "subdir": "gen/grafonnet-v11.0.0" + } + }, + "version": "82a19822e54a0a12a51e24dbd48fcde717dc0864", + "sum": "0BvzR0i4bS4hc2O3xDv6i9m52z7mPrjvqxtcPrGhynA=" + }, { "source": { "git": { @@ -51,6 +71,16 @@ "version": "82a19822e54a0a12a51e24dbd48fcde717dc0864", "sum": "41w7p/rwrNsITqNHMXtGSJAfAyKmnflg6rFhKBduUxM=" }, + { + "source": { + "git": { + "remote": "https://github.com/grafana/jsonnet-libs.git", + "subdir": "common-lib" + } + }, + "version": "a8fc2139d881ae632a8c956eb9dd4b84b24f362e", + "sum": "c2Omoqo8FTwR/V3VC+hRN5CEyI0UDD1OyYXnEqwiKLY=" + }, { "source": { "git": { @@ -61,6 +91,16 @@ "version": "a8fc2139d881ae632a8c956eb9dd4b84b24f362e", "sum": "yxqWcq/N3E/a/XreeU6EuE6X7kYPnG0AspAQFKOjASo=" }, + { + "source": { + "git": { + "remote": "https://github.com/grafana/jsonnet-libs.git", + "subdir": "opensearch-mixin" + } + }, + "version": "a8fc2139d881ae632a8c956eb9dd4b84b24f362e", + "sum": "AK83KBy5roMxhT0taG54ERV20oG9mhaCJA+EHRzuPO4=" + }, { "source": { "git": { @@ -110,6 +150,16 @@ }, "version": "d3074b39c38493ebb81514c0ec962b7853ed0162", "sum": "dYLcLzGH4yF3qB7OGC/7z4nqeTNjv42L7Q3BENU8XJI=" + }, + { + "source": { + "git": { + "remote": "https://github.com/yugui/jsonnetunit.git", + "subdir": "jsonnetunit" + } + }, + "version": "6927c58cae7624a00f368b977ccc477d4f74071f", + "sum": "9FFqqln65hooRF0l6rjICDtnTxUlmDj34+sKMh4sjPI=" } ], "legacyImports": false diff --git a/.src/mixins/infrastructure/opensearch.libsonnet b/.src/mixins/infrastructure/opensearch.libsonnet new file mode 100644 index 0000000..9f9ba68 --- /dev/null +++ b/.src/mixins/infrastructure/opensearch.libsonnet @@ -0,0 +1,10 @@ +local opensearch = import "opensearch-mixin/mixin.libsonnet"; + +opensearch { + _config+:: { + enableLokiLogs: false, + }, + prometheusRules+: {}, + prometheusAlerts+: {}, + grafanaDashboards+: {} +} diff --git a/grafana/dashboards/infrastructure/node-overview.json b/grafana/dashboards/infrastructure/node-overview.json new file mode 100644 index 0000000..cebcac7 --- /dev/null +++ b/grafana/dashboards/infrastructure/node-overview.json @@ -0,0 +1,1661 @@ +{ + "links": [ + { + "asDropdown": false, + "includeVars": true, + "keepTime": true, + "tags": [ + "opensearch-mixin" + ], + "title": "Other Opensearch dashboards", + "type": "dashboards" + } + ], + "panels": [ + { + "datasource": { + "type": "datasource", + "uid": "-- Mixed --" + }, + "description": "OpenSearch node roles over time.", + "fieldConfig": { + "defaults": { + "mappings": [ + { + "options": { + "2": { + "color": "light-purple", + "index": 0, + "text": "data" + }, + "3": { + "color": "light-green", + "index": 1, + "text": "master" + }, + "4": { + "color": "light-blue", + "index": 2, + "text": "ingest" + }, + "5": { + "color": "light-yellow", + "index": 3, + "text": "cluster_manager" + }, + "6": { + "color": "super-light-red", + "index": 4, + "text": "remote_cluster_client" + } + }, + "type": "value" + } + ] + } + }, + "gridPos": { + "h": 5, + "w": 24, + "x": 0, + "y": 0 + }, + "id": 1, + "maxDataPoints": 100, + "options": { + "legend": false, + "showValue": "never" + }, + "pluginVersion": "v10.0.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${prometheus_datasource}" + }, + "expr": "max by (node,role) (\n max_over_time(opensearch_node_role_bool{opensearch_cluster!=\"\",job=~\"$job\",opensearch_cluster=~\"$opensearch_cluster\",node=~\"$node\", role=\"data\"}[1m]) == 1\n) * 2\n", + "legendFormat": "{{node}}" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${prometheus_datasource}" + }, + "expr": "max by (node,role) (\n max_over_time(opensearch_node_role_bool{opensearch_cluster!=\"\",job=~\"$job\",opensearch_cluster=~\"$opensearch_cluster\",node=~\"$node\", role=\"master\"}[1m]) == 1\n) * 3\n", + "legendFormat": "{{node}}" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${prometheus_datasource}" + }, + "expr": "max by (node,role) (\n max_over_time(opensearch_node_role_bool{opensearch_cluster!=\"\",job=~\"$job\",opensearch_cluster=~\"$opensearch_cluster\",node=~\"$node\", role=\"ingest\"}[1m]) == 1\n) * 4\n", + "legendFormat": "{{node}}" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${prometheus_datasource}" + }, + "expr": "max by (node,role) (\n max_over_time(opensearch_node_role_bool{opensearch_cluster!=\"\",job=~\"$job\",opensearch_cluster=~\"$opensearch_cluster\",node=~\"$node\", role=\"cluster_manager\"}[1m]) == 1\n) * 5\n", + "legendFormat": "{{node}}" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${prometheus_datasource}" + }, + "expr": "max by (node,role) (\n max_over_time(opensearch_node_role_bool{opensearch_cluster!=\"\",job=~\"$job\",opensearch_cluster=~\"$opensearch_cluster\",node=~\"$node\", role=\"remote_cluster_client\"}[1m]) == 1\n) * 6\n", + "legendFormat": "{{node}}" + } + ], + "title": "Roles timeline", + "type": "status-history" + }, + { + "collapsed": false, + "datasource": { + "uid": "${prometheus_datasource}" + }, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 1 + }, + "id": 2, + "targets": [ ], + "title": "Node health", + "type": "row" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${prometheus_datasource}" + }, + "description": "CPU usage percentage of the node's Operating System.", + "fieldConfig": { + "defaults": { + "color": { + "mode": "continuous-BlYlRd" + }, + "custom": { + "fillOpacity": 5, + "gradientMode": "scheme", + "lineInterpolation": "smooth", + "lineWidth": 2, + "showPoints": "never" + }, + "decimals": 1, + "max": 100, + "min": 0, + "unit": "percent" + } + }, + "gridPos": { + "h": 7, + "w": 6, + "x": 0, + "y": 2 + }, + "id": 3, + "options": { + "legend": { + "calcs": [ ], + "displayMode": "list" + }, + "tooltip": { + "mode": "multi", + "sort": "desc" + } + }, + "pluginVersion": "v11.0.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${prometheus_datasource}" + }, + "expr": "opensearch_os_cpu_percent{opensearch_cluster!=\"\",job=~\"$job\",opensearch_cluster=~\"$opensearch_cluster\",node=~\"$node\"}", + "legendFormat": "{{node}}" + } + ], + "title": "Node CPU usage", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${prometheus_datasource}" + }, + "description": "Memory usage percentage of the node for the Operating System and OpenSearch", + "fieldConfig": { + "defaults": { + "color": { + "mode": "continuous-BlYlRd" + }, + "custom": { + "fillOpacity": 5, + "gradientMode": "scheme", + "lineInterpolation": "smooth", + "lineWidth": 2, + "showPoints": "never" + }, + "decimals": 1, + "max": 100, + "min": 0, + "unit": "percent" + } + }, + "gridPos": { + "h": 7, + "w": 6, + "x": 6, + "y": 2 + }, + "id": 4, + "options": { + "legend": { + "calcs": [ ], + "displayMode": "list" + }, + "tooltip": { + "mode": "multi", + "sort": "desc" + } + }, + "pluginVersion": "v11.0.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${prometheus_datasource}" + }, + "expr": "opensearch_os_mem_used_percent{opensearch_cluster!=\"\",job=~\"$job\",opensearch_cluster=~\"$opensearch_cluster\",node=~\"$node\"}", + "legendFormat": "{{node}}" + } + ], + "title": "Node memory usage", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${prometheus_datasource}" + }, + "description": "Node file system read and write data.", + "fieldConfig": { + "defaults": { + "custom": { + "fillOpacity": 1, + "gradientMode": "opacity", + "lineInterpolation": "smooth", + "lineWidth": 2, + "showPoints": "never", + "stacking": "normal" + }, + "unit": "Bps" + }, + "overrides": [ + { + "matcher": { + "id": "byRegexp", + "options": "/time|used|busy|util/" + }, + "properties": [ + { + "id": "custom.axisSoftMax", + "value": 100 + }, + { + "id": "custom.drawStyle", + "value": "points" + }, + { + "id": "unit", + "value": "percent" + } + ] + } + ] + }, + "gridPos": { + "h": 7, + "w": 6, + "x": 12, + "y": 2 + }, + "id": 5, + "options": { + "legend": { + "calcs": [ ], + "displayMode": "list" + }, + "tooltip": { + "mode": "multi", + "sort": "desc" + } + }, + "pluginVersion": "v11.0.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${prometheus_datasource}" + }, + "expr": "sum by(job,opensearch_cluster,node) (rate(opensearch_fs_io_total_read_bytes{opensearch_cluster!=\"\",job=~\"$job\",opensearch_cluster=~\"$opensearch_cluster\",node=~\"$node\"}[$__rate_interval]))", + "legendFormat": "{{node}} - read" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${prometheus_datasource}" + }, + "expr": "sum by(job,opensearch_cluster,node) (rate(opensearch_fs_io_total_write_bytes{opensearch_cluster!=\"\",job=~\"$job\",opensearch_cluster=~\"$opensearch_cluster\",node=~\"$node\"}[$__rate_interval]))", + "legendFormat": "{{node}} - write" + } + ], + "title": "Node I/O", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${prometheus_datasource}" + }, + "description": "Number of open connections for the selected node.", + "fieldConfig": { + "defaults": { + "custom": { + "fillOpacity": 30, + "gradientMode": "opacity", + "lineInterpolation": "smooth", + "lineWidth": 2, + "showPoints": "never", + "stacking": "normal" + }, + "unit": "" + } + }, + "gridPos": { + "h": 7, + "w": 6, + "x": 18, + "y": 2 + }, + "id": 6, + "options": { + "legend": { + "calcs": [ ], + "displayMode": "list" + }, + "tooltip": { + "mode": "multi", + "sort": "desc" + } + }, + "pluginVersion": "v11.0.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${prometheus_datasource}" + }, + "expr": "sum by (job,opensearch_cluster,node) (opensearch_transport_server_open_number{opensearch_cluster!=\"\",job=~\"$job\",opensearch_cluster=~\"$opensearch_cluster\",node=~\"$node\"})", + "legendFormat": "{{node}}" + } + ], + "title": "Node open connections", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${prometheus_datasource}" + }, + "description": "Disk usage percentage of the selected node.", + "fieldConfig": { + "defaults": { + "color": { + "mode": "continuous-BlYlRd" + }, + "custom": { + "fillOpacity": 1, + "gradientMode": "scheme", + "lineInterpolation": "smooth", + "lineWidth": 2, + "showPoints": "never" + }, + "decimals": 1, + "max": 100, + "min": 0, + "unit": "percent" + } + }, + "gridPos": { + "h": 7, + "w": 6, + "x": 0, + "y": 8 + }, + "id": 7, + "options": { + "legend": { + "calcs": [ ], + "displayMode": "list" + }, + "tooltip": { + "mode": "multi", + "sort": "desc" + } + }, + "pluginVersion": "v11.0.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${prometheus_datasource}" + }, + "expr": "100 - (100 * opensearch_fs_path_free_bytes{opensearch_cluster!=\"\",job=~\"$job\",opensearch_cluster=~\"$opensearch_cluster\",node=~\"$node\"} / clamp_min(opensearch_fs_path_total_bytes{opensearch_cluster!=\"\",job=~\"$job\",opensearch_cluster=~\"$opensearch_cluster\",node=~\"$node\"}, 1))", + "legendFormat": "{{node}}" + } + ], + "title": "Node disk usage", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${prometheus_datasource}" + }, + "description": "Percentage of swap space used by OpenSearch and the Operating System on the selected node.", + "fieldConfig": { + "defaults": { + "color": { + "mode": "continuous-BlYlRd" + }, + "custom": { + "fillOpacity": 5, + "gradientMode": "scheme", + "lineInterpolation": "smooth", + "lineWidth": 2, + "showPoints": "never" + }, + "decimals": 1, + "max": 100, + "min": 0, + "unit": "percent" + } + }, + "gridPos": { + "h": 7, + "w": 6, + "x": 6, + "y": 8 + }, + "id": 8, + "options": { + "legend": { + "calcs": [ ], + "displayMode": "list" + }, + "tooltip": { + "mode": "multi", + "sort": "desc" + } + }, + "pluginVersion": "v11.0.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${prometheus_datasource}" + }, + "expr": "100 * opensearch_os_swap_used_bytes{opensearch_cluster!=\"\",job=~\"$job\",opensearch_cluster=~\"$opensearch_cluster\",node=~\"$node\"} / clamp_min((opensearch_os_swap_used_bytes{opensearch_cluster!=\"\",job=~\"$job\",opensearch_cluster=~\"$opensearch_cluster\",node=~\"$node\"} + opensearch_os_swap_free_bytes{opensearch_cluster!=\"\",job=~\"$job\",opensearch_cluster=~\"$opensearch_cluster\",node=~\"$node\"}), 1)", + "legendFormat": "{{node}}" + } + ], + "title": "Node memory swap", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${prometheus_datasource}" + }, + "description": "Node network traffic sent and received.", + "fieldConfig": { + "defaults": { + "custom": { + "axisCenteredZero": false, + "axisLabel": "out(-) | in(+)", + "fillOpacity": 5, + "gradientMode": "opacity", + "lineInterpolation": "smooth", + "lineWidth": 2, + "showPoints": "never" + }, + "decimals": 1, + "unit": "bps" + }, + "overrides": [ + { + "matcher": { + "id": "byRegexp", + "options": "/sent/" + }, + "properties": [ + { + "id": "custom.transform", + "value": "negative-Y" + } + ] + } + ] + }, + "gridPos": { + "h": 7, + "w": 6, + "x": 12, + "y": 8 + }, + "id": 9, + "options": { + "legend": { + "calcs": [ ], + "displayMode": "list" + }, + "tooltip": { + "mode": "multi", + "sort": "desc" + } + }, + "pluginVersion": "v11.0.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${prometheus_datasource}" + }, + "expr": "sum by (job,opensearch_cluster,node) (rate(opensearch_transport_tx_bytes_count{opensearch_cluster!=\"\",job=~\"$job\",opensearch_cluster=~\"$opensearch_cluster\",node=~\"$node\"}[$__rate_interval])) * 8", + "legendFormat": "{{node}} - sent" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${prometheus_datasource}" + }, + "expr": "sum by (job,opensearch_cluster,node) (rate(opensearch_transport_rx_bytes_count{opensearch_cluster!=\"\",job=~\"$job\",opensearch_cluster=~\"$opensearch_cluster\",node=~\"$node\"}[$__rate_interval])) * 8", + "legendFormat": "{{node}} - received" + } + ], + "title": "Node network traffic", + "type": "timeseries" + }, + { + "datasource": { + "uid": "${prometheus_datasource}" + }, + "description": "Circuit breakers tripped on the selected node by type", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [ ], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + } + ] + }, + "unit": "trips" + }, + "overrides": [ ] + }, + "gridPos": { + "h": 7, + "w": 6, + "x": 18, + "y": 8 + }, + "id": 10, + "options": { + "legend": { + "calcs": [ ], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "uid": "${prometheus_datasource}" + }, + "expr": "sum by(name, job,opensearch_cluster,node) (increase(opensearch_circuitbreaker_tripped_count{opensearch_cluster!=\"\",job=~\"$job\",opensearch_cluster=~\"$opensearch_cluster\",node=~\"$node\"}[$__interval:]))", + "format": "time_series", + "interval": "1m", + "intervalFactor": 2, + "legendFormat": "{{node}} - {{ name }}" + } + ], + "title": "Circuit breakers", + "type": "timeseries" + }, + { + "collapsed": false, + "datasource": { + "uid": "${prometheus_datasource}" + }, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 15 + }, + "id": 11, + "targets": [ ], + "title": "Node JVM", + "type": "row" + }, + { + "datasource": { + "uid": "${prometheus_datasource}" + }, + "description": "The amount of heap memory used vs committed on the selected node.", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [ ], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + } + ] + }, + "unit": "bytes" + }, + "overrides": [ ] + }, + "gridPos": { + "h": 6, + "w": 6, + "x": 0, + "y": 16 + }, + "id": 12, + "options": { + "legend": { + "calcs": [ ], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "uid": "${prometheus_datasource}" + }, + "expr": "sum by (job,opensearch_cluster,node) (opensearch_jvm_mem_heap_used_bytes{opensearch_cluster!=\"\",job=~\"$job\",opensearch_cluster=~\"$opensearch_cluster\",node=~\"$node\"})", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{node}} - used" + }, + { + "datasource": { + "uid": "${prometheus_datasource}" + }, + "expr": "sum by (job,opensearch_cluster,node) (opensearch_jvm_mem_heap_committed_bytes{opensearch_cluster!=\"\",job=~\"$job\",opensearch_cluster=~\"$opensearch_cluster\",node=~\"$node\"})", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{node}} - commited" + } + ], + "title": "JVM heap used vs. committed", + "type": "timeseries" + }, + { + "datasource": { + "uid": "${prometheus_datasource}" + }, + "description": "The amount of non-heap memory used vs committed on the selected node.", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [ ], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + } + ] + }, + "unit": "bytes" + }, + "overrides": [ ] + }, + "gridPos": { + "h": 6, + "w": 6, + "x": 6, + "y": 16 + }, + "id": 13, + "options": { + "legend": { + "calcs": [ ], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "uid": "${prometheus_datasource}" + }, + "expr": "sum by (job,opensearch_cluster,node) (opensearch_jvm_mem_nonheap_used_bytes{opensearch_cluster!=\"\",job=~\"$job\",opensearch_cluster=~\"$opensearch_cluster\",node=~\"$node\"})", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{node}} - used" + }, + { + "datasource": { + "uid": "${prometheus_datasource}" + }, + "expr": "sum by (job,opensearch_cluster,node) (opensearch_jvm_mem_nonheap_committed_bytes{opensearch_cluster!=\"\",job=~\"$job\",opensearch_cluster=~\"$opensearch_cluster\",node=~\"$node\"})", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{node}} - commited" + } + ], + "title": "JVM non-heap used vs. committed", + "type": "timeseries" + }, + { + "datasource": { + "uid": "${prometheus_datasource}" + }, + "description": "The number of threads running in the JVM on the selected node.", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [ ], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "threads" + }, + "overrides": [ ] + }, + "gridPos": { + "h": 6, + "w": 6, + "x": 12, + "y": 16 + }, + "id": 14, + "options": { + "legend": { + "calcs": [ ], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "uid": "${prometheus_datasource}" + }, + "expr": "sum by (job,opensearch_cluster,node) (opensearch_jvm_threads_number{opensearch_cluster!=\"\",job=~\"$job\",opensearch_cluster=~\"$opensearch_cluster\",node=~\"$node\"})", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{node}}" + } + ], + "title": "JVM threads", + "type": "timeseries" + }, + { + "datasource": { + "uid": "${prometheus_datasource}" + }, + "description": "The number of buffer pools available on the selected node.", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [ ], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "buffer pools" + }, + "overrides": [ ] + }, + "gridPos": { + "h": 6, + "w": 6, + "x": 18, + "y": 16 + }, + "id": 15, + "options": { + "legend": { + "calcs": [ ], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "uid": "${prometheus_datasource}" + }, + "expr": "sum by( job,opensearch_cluster,node, bufferpool) (opensearch_jvm_bufferpool_number{opensearch_cluster!=\"\",job=~\"$job\",opensearch_cluster=~\"$opensearch_cluster\",node=~\"$node\"})", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{node}} - {{bufferpool}}" + } + ], + "title": "JVM buffer pools", + "type": "timeseries" + }, + { + "datasource": { + "uid": "${prometheus_datasource}" + }, + "description": "The uptime of the JVM in seconds on the selected node.", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [ ], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "s" + }, + "overrides": [ ] + }, + "gridPos": { + "h": 6, + "w": 6, + "x": 0, + "y": 22 + }, + "id": 16, + "options": { + "legend": { + "calcs": [ ], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "pluginVersion": "9.4.3", + "targets": [ + { + "datasource": { + "uid": "${prometheus_datasource}" + }, + "expr": "sum by(job,opensearch_cluster,node) (opensearch_jvm_uptime_seconds{opensearch_cluster!=\"\",job=~\"$job\",opensearch_cluster=~\"$opensearch_cluster\",node=~\"$node\"})", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{node}}" + } + ], + "title": "JVM uptime", + "type": "timeseries" + }, + { + "datasource": { + "uid": "${prometheus_datasource}" + }, + "description": "The number of garbage collection operations on the selected node.", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [ ], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "operations" + }, + "overrides": [ ] + }, + "gridPos": { + "h": 6, + "w": 6, + "x": 6, + "y": 22 + }, + "id": 17, + "options": { + "legend": { + "calcs": [ ], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "uid": "${prometheus_datasource}" + }, + "expr": "sum by (job,opensearch_cluster,node) (increase(opensearch_jvm_gc_collection_count{opensearch_cluster!=\"\",job=~\"$job\",opensearch_cluster=~\"$opensearch_cluster\",node=~\"$node\"}[$__interval:]))", + "format": "time_series", + "interval": "1m", + "intervalFactor": 2, + "legendFormat": "{{node}}" + } + ], + "title": "JVM garbage collections", + "type": "timeseries" + }, + { + "datasource": { + "uid": "${prometheus_datasource}" + }, + "description": "The amount of time spent on garbage collection on the selected node.", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [ ], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + } + ] + }, + "unit": "s" + }, + "overrides": [ ] + }, + "gridPos": { + "h": 6, + "w": 6, + "x": 12, + "y": 22 + }, + "id": 18, + "options": { + "legend": { + "calcs": [ ], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "uid": "${prometheus_datasource}" + }, + "expr": "sum by (job,opensearch_cluster,node) (increase(opensearch_jvm_gc_collection_time_seconds{opensearch_cluster!=\"\",job=~\"$job\",opensearch_cluster=~\"$opensearch_cluster\",node=~\"$node\"}[$__interval:]))", + "format": "time_series", + "interval": "1m", + "intervalFactor": 2, + "legendFormat": "{{node}}" + } + ], + "title": "JVM garbage collection time", + "type": "timeseries" + }, + { + "datasource": { + "uid": "${prometheus_datasource}" + }, + "description": "The percent used of JVM buffer pool memory.", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [ ], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "percent" + }, + "overrides": [ ] + }, + "gridPos": { + "h": 6, + "w": 6, + "x": 18, + "y": 22 + }, + "id": 19, + "options": { + "legend": { + "calcs": [ ], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "uid": "${prometheus_datasource}" + }, + "expr": "100 * (sum by (job,opensearch_cluster,node, bufferpool) (opensearch_jvm_bufferpool_used_bytes{opensearch_cluster!=\"\",job=~\"$job\",opensearch_cluster=~\"$opensearch_cluster\",node=~\"$node\"})) / clamp_min((sum by (job, bufferpool, cluster) (opensearch_jvm_bufferpool_total_capacity_bytes{opensearch_cluster!=\"\",job=~\"$job\",opensearch_cluster=~\"$opensearch_cluster\",node=~\"$node\"})),1)", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{node}} - {{bufferpool}}" + } + ], + "title": "JVM buffer pool usage", + "type": "timeseries" + }, + { + "collapsed": false, + "datasource": { + "uid": "${prometheus_datasource}" + }, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 28 + }, + "id": 20, + "targets": [ ], + "title": "Thread pools", + "type": "row" + }, + { + "datasource": { + "uid": "${prometheus_datasource}" + }, + "description": "The number of threads in the thread pool for the selected node", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [ ], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "threads" + }, + "overrides": [ ] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 29 + }, + "id": 21, + "options": { + "legend": { + "calcs": [ ], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "uid": "${prometheus_datasource}" + }, + "expr": "sum by(job,opensearch_cluster,node) ((opensearch_threadpool_threads_number{opensearch_cluster!=\"\",job=~\"$job\",opensearch_cluster=~\"$opensearch_cluster\",node=~\"$node\"}))", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{node}}" + } + ], + "title": "Thread pool threads", + "type": "timeseries" + }, + { + "datasource": { + "uid": "${prometheus_datasource}" + }, + "description": "The number of tasks in the thread pool for the selected node.", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [ ], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "tasks" + }, + "overrides": [ ] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 29 + }, + "id": 22, + "options": { + "legend": { + "calcs": [ ], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "uid": "${prometheus_datasource}" + }, + "expr": "sum by (job,opensearch_cluster,node) (opensearch_threadpool_tasks_number{opensearch_cluster!=\"\",job=~\"$job\",opensearch_cluster=~\"$opensearch_cluster\",node=~\"$node\"})", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{node}}" + } + ], + "title": "Thread pool tasks", + "type": "timeseries" + } + ], + "refresh": "1m", + "schemaVersion": 36, + "tags": [ + "opensearch-mixin" + ], + "templating": { + "list": [ + { + "label": "Prometheus data source", + "name": "prometheus_datasource", + "query": "prometheus", + "regex": "", + "type": "datasource" + }, + { + "allValue": ".+", + "datasource": { + "type": "prometheus", + "uid": "${prometheus_datasource}" + }, + "includeAll": true, + "label": "Job", + "multi": true, + "name": "job", + "query": "label_values(opensearch_os_cpu_percent{opensearch_cluster!=\"\"}, job)", + "refresh": 2, + "sort": 1, + "type": "query" + }, + { + "allValue": ".+", + "datasource": { + "type": "prometheus", + "uid": "${prometheus_datasource}" + }, + "includeAll": true, + "label": "Opensearch_cluster", + "multi": true, + "name": "opensearch_cluster", + "query": "label_values(opensearch_os_cpu_percent{opensearch_cluster!=\"\",job=~\"$job\"}, opensearch_cluster)", + "refresh": 2, + "sort": 1, + "type": "query" + }, + { + "allValue": ".+", + "datasource": { + "type": "prometheus", + "uid": "${prometheus_datasource}" + }, + "includeAll": true, + "label": "Node", + "multi": true, + "name": "node", + "query": "label_values(opensearch_os_cpu_percent{opensearch_cluster!=\"\",job=~\"$job\",opensearch_cluster=~\"$opensearch_cluster\"}, node)", + "refresh": 2, + "sort": 1, + "type": "query" + } + ] + }, + "time": { + "from": "now-1h", + "to": "now" + }, + "timezone": "default", + "title": "OpenSearch node overview", + "uid": "opensearch-node-overview" +} diff --git a/grafana/dashboards/infrastructure/opensearch-cluster-overview.json b/grafana/dashboards/infrastructure/opensearch-cluster-overview.json new file mode 100644 index 0000000..27af57f --- /dev/null +++ b/grafana/dashboards/infrastructure/opensearch-cluster-overview.json @@ -0,0 +1,2089 @@ +{ + "links": [ + { + "asDropdown": false, + "includeVars": true, + "keepTime": true, + "tags": [ + "opensearch-mixin" + ], + "title": "Other Opensearch dashboards", + "type": "dashboards" + } + ], + "panels": [ + { + "datasource": { + "type": "datasource", + "uid": "-- Mixed --" + }, + "description": "OpenSearch node roles.", + "fieldConfig": { + "defaults": { + "mappings": [ + { + "options": { + "0": { + "color": "super-light-orange", + "index": 5, + "text": "False" + }, + "1": { + "color": "light-green", + "index": 3, + "text": "True" + }, + "Data": { + "color": "light-purple", + "index": 0, + "text": "data" + }, + "Ingest": { + "color": "light-blue", + "index": 2, + "text": "ingest" + }, + "Master": { + "color": "light-green", + "index": 1, + "text": "master" + }, + "Remote cluster client": { + "color": "light-orange", + "index": 4, + "text": "remote_cluster_client" + } + }, + "type": "value" + } + ] + }, + "overrides": [ + { + "matcher": { + "id": "byRegexp", + "options": "/Data|Master|Ingest|Remote.+|Cluster.+/" + }, + "properties": [ + { + "id": "custom.cellOptions", + "value": { + "type": "color-text" + } + } + ] + } + ] + }, + "gridPos": { + "h": 6, + "w": 24, + "x": 0, + "y": 0 + }, + "id": 1, + "pluginVersion": "v10.0.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${prometheus_datasource}" + }, + "expr": "max by (job,opensearch_cluster,node,node,nodeid,role,primary_ip) (last_over_time(opensearch_node_role_bool{opensearch_cluster!=\"\",job=~\"$job\",opensearch_cluster=~\"$opensearch_cluster\"}[1d]))", + "instant": true, + "legendFormat": "{{node}}" + } + ], + "title": "Roles", + "transformations": [ + { + "id": "labelsToFields", + "options": { + "mode": "columns", + "valueLabel": "role" + } + }, + { + "id": "merge", + "options": { } + }, + { + "id": "organize", + "options": { + "excludeByName": { + "Time": true + }, + "indexByName": { + "Time": 0, + "cluster_manager": 108, + "data": 105, + "ingest": 106, + "job": 3, + "master": 104, + "node": 3, + "nodeid": 3, + "opensearch_cluster": 3, + "remote_cluster_client": 107 + }, + "renameByName": { + "Time": "", + "cluster": "Cluster", + "cluster_manager": "Cluster manager", + "data": "Data", + "ingest": "Ingest", + "master": "Master", + "node": "Node", + "nodeid": "Nodeid", + "remote_cluster_client": "Remote cluster client" + } + } + } + ], + "type": "table" + }, + { + "datasource": { + "uid": "${prometheus_datasource}" + }, + "description": "The overall health and availability of the OpenSearch cluster.", + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "mappings": [ + { + "options": { + "0": { + "index": 0, + "text": "Green" + }, + "1": { + "index": 1, + "text": "Yellow" + }, + "2": { + "index": 2, + "text": "Red" + } + }, + "type": "value" + } + ], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "green", + "value": 0 + }, + { + "color": "yellow", + "value": 1 + }, + { + "color": "red", + "value": 2 + } + ] + } + }, + "overrides": [ ] + }, + "gridPos": { + "h": 5, + "w": 3, + "x": 0, + "y": 2 + }, + "id": 2, + "options": { + "colorMode": "value", + "graphMode": "none", + "justifyMode": "auto", + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "textMode": "auto" + }, + "pluginVersion": "9.4.3", + "targets": [ + { + "datasource": { + "uid": "${prometheus_datasource}" + }, + "expr": "min by(job,opensearch_cluster) (opensearch_cluster_status{opensearch_cluster!=\"\",job=~\"$job\",opensearch_cluster=~\"$opensearch_cluster\"})", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{job}}/{{opensearch_cluster}}" + } + ], + "title": "Cluster status", + "type": "stat" + }, + { + "datasource": { + "uid": "${prometheus_datasource}" + }, + "description": "The number of running nodes across the OpenSearch cluster.", + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "mappings": [ ], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 0 + }, + { + "color": "green", + "value": 1 + } + ] + } + }, + "overrides": [ ] + }, + "gridPos": { + "h": 5, + "w": 3, + "x": 3, + "y": 2 + }, + "id": 3, + "options": { + "colorMode": "value", + "graphMode": "none", + "justifyMode": "auto", + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "textMode": "auto" + }, + "pluginVersion": "9.4.3", + "targets": [ + { + "datasource": { + "uid": "${prometheus_datasource}" + }, + "expr": "min by(job,opensearch_cluster) (opensearch_cluster_nodes_number{opensearch_cluster!=\"\",job=~\"$job\",opensearch_cluster=~\"$opensearch_cluster\"})", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{job}}/{{opensearch_cluster}}" + } + ], + "title": "Node count", + "type": "stat" + }, + { + "datasource": { + "uid": "${prometheus_datasource}" + }, + "description": "The number of data nodes in the OpenSearch cluster.", + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "mappings": [ ], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 0 + }, + { + "color": "green", + "value": 1 + } + ] + } + }, + "overrides": [ ] + }, + "gridPos": { + "h": 5, + "w": 3, + "x": 6, + "y": 2 + }, + "id": 4, + "options": { + "colorMode": "value", + "graphMode": "none", + "justifyMode": "auto", + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "textMode": "auto" + }, + "pluginVersion": "9.4.3", + "targets": [ + { + "datasource": { + "uid": "${prometheus_datasource}" + }, + "expr": "min by(job,opensearch_cluster) (opensearch_cluster_datanodes_number{opensearch_cluster!=\"\",job=~\"$job\",opensearch_cluster=~\"$opensearch_cluster\"})", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{job}}/{{opensearch_cluster}}" + } + ], + "title": "Data node count", + "type": "stat" + }, + { + "datasource": { + "uid": "${prometheus_datasource}" + }, + "description": "The number of shards in the OpenSearch cluster across all indices.", + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "mappings": [ ], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 0 + }, + { + "color": "green", + "value": 1 + } + ] + } + }, + "overrides": [ ] + }, + "gridPos": { + "h": 5, + "w": 3, + "x": 9, + "y": 2 + }, + "id": 5, + "options": { + "colorMode": "value", + "graphMode": "none", + "justifyMode": "auto", + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "textMode": "auto" + }, + "pluginVersion": "9.4.3", + "targets": [ + { + "datasource": { + "uid": "${prometheus_datasource}" + }, + "expr": "sum(max by (type) (opensearch_cluster_shards_number{opensearch_cluster!=\"\",job=~\"$job\",opensearch_cluster=~\"$opensearch_cluster\"}))", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{job}}/{{opensearch_cluster}}" + } + ], + "title": "Shard count", + "type": "stat" + }, + { + "datasource": { + "uid": "${prometheus_datasource}" + }, + "description": "Percent of active shards across the OpenSearch cluster.", + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "mappings": [ ], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 0 + }, + { + "color": "yellow", + "value": 1 + }, + { + "color": "green", + "value": 100 + } + ] + }, + "unit": "percent" + }, + "overrides": [ ] + }, + "gridPos": { + "h": 5, + "w": 3, + "x": 12, + "y": 2 + }, + "id": 6, + "options": { + "colorMode": "value", + "graphMode": "none", + "justifyMode": "auto", + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "textMode": "auto" + }, + "pluginVersion": "9.4.3", + "targets": [ + { + "datasource": { + "uid": "${prometheus_datasource}" + }, + "expr": "min by(job,opensearch_cluster) (opensearch_cluster_shards_active_percent{opensearch_cluster!=\"\",job=~\"$job\",opensearch_cluster=~\"$opensearch_cluster\"})", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{job}}/{{opensearch_cluster}}" + } + ], + "title": "Active shards %", + "type": "stat" + }, + { + "datasource": { + "type": "datasource", + "uid": "-- Mixed --" + }, + "description": "OpenSearch node roles over time.", + "fieldConfig": { + "defaults": { + "mappings": [ + { + "options": { + "2": { + "color": "light-purple", + "index": 0, + "text": "data" + }, + "3": { + "color": "light-green", + "index": 1, + "text": "master" + }, + "4": { + "color": "light-blue", + "index": 2, + "text": "ingest" + }, + "5": { + "color": "light-yellow", + "index": 3, + "text": "cluster_manager" + }, + "6": { + "color": "super-light-red", + "index": 4, + "text": "remote_cluster_client" + } + }, + "type": "value" + } + ] + } + }, + "gridPos": { + "h": 5, + "w": 9, + "x": 15, + "y": 2 + }, + "id": 7, + "maxDataPoints": 100, + "options": { + "legend": false, + "showValue": "never" + }, + "pluginVersion": "v10.0.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${prometheus_datasource}" + }, + "expr": "max by (node,role) (\n max_over_time(opensearch_node_role_bool{opensearch_cluster!=\"\",job=~\"$job\",opensearch_cluster=~\"$opensearch_cluster\", role=\"data\"}[1m]) == 1\n) * 2\n", + "legendFormat": "{{node}}" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${prometheus_datasource}" + }, + "expr": "max by (node,role) (\n max_over_time(opensearch_node_role_bool{opensearch_cluster!=\"\",job=~\"$job\",opensearch_cluster=~\"$opensearch_cluster\", role=\"master\"}[1m]) == 1\n) * 3\n", + "legendFormat": "{{node}}" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${prometheus_datasource}" + }, + "expr": "max by (node,role) (\n max_over_time(opensearch_node_role_bool{opensearch_cluster!=\"\",job=~\"$job\",opensearch_cluster=~\"$opensearch_cluster\", role=\"ingest\"}[1m]) == 1\n) * 4\n", + "legendFormat": "{{node}}" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${prometheus_datasource}" + }, + "expr": "max by (node,role) (\n max_over_time(opensearch_node_role_bool{opensearch_cluster!=\"\",job=~\"$job\",opensearch_cluster=~\"$opensearch_cluster\", role=\"cluster_manager\"}[1m]) == 1\n) * 5\n", + "legendFormat": "{{node}}" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${prometheus_datasource}" + }, + "expr": "max by (node,role) (\n max_over_time(opensearch_node_role_bool{opensearch_cluster!=\"\",job=~\"$job\",opensearch_cluster=~\"$opensearch_cluster\", role=\"remote_cluster_client\"}[1m]) == 1\n) * 6\n", + "legendFormat": "{{node}}" + } + ], + "title": "Roles timeline", + "type": "status-history" + }, + { + "datasource": { + "uid": "${prometheus_datasource}" + }, + "description": "Top nodes by OS CPU usage across the OpenSearch cluster.", + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "mappings": [ ], + "max": 100, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "percent" + }, + "overrides": [ ] + }, + "gridPos": { + "h": 9, + "w": 8, + "x": 0, + "y": 4 + }, + "id": 8, + "options": { + "displayMode": "gradient", + "minVizHeight": 10, + "minVizWidth": 0, + "orientation": "horizontal", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "showUnfilled": true + }, + "pluginVersion": "9.4.3", + "targets": [ + { + "datasource": { + "uid": "${prometheus_datasource}" + }, + "expr": "topk(10, sort_desc(sum by(node, job,opensearch_cluster) (opensearch_os_cpu_percent{opensearch_cluster!=\"\",job=~\"$job\",opensearch_cluster=~\"$opensearch_cluster\"})))", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{node}}" + } + ], + "title": "Top nodes by CPU usage", + "type": "bargauge" + }, + { + "datasource": { + "uid": "${prometheus_datasource}" + }, + "description": "The total count of circuit breakers tripped across the OpenSearch cluster.", + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "mappings": [ ], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "trips" + }, + "overrides": [ ] + }, + "gridPos": { + "h": 9, + "w": 8, + "x": 8, + "y": 4 + }, + "id": 9, + "options": { + "displayMode": "gradient", + "minVizHeight": 10, + "minVizWidth": 0, + "orientation": "horizontal", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "showUnfilled": true + }, + "pluginVersion": "9.4.3", + "targets": [ + { + "datasource": { + "uid": "${prometheus_datasource}" + }, + "expr": "sum by(job,opensearch_cluster, node) (increase(opensearch_circuitbreaker_tripped_count{opensearch_cluster!=\"\",job=~\"$job\",opensearch_cluster=~\"$opensearch_cluster\"}[$__interval:]))", + "format": "time_series", + "interval": "1m", + "intervalFactor": 2, + "legendFormat": "{{node}}" + } + ], + "title": "Breakers tripped", + "type": "bargauge" + }, + { + "datasource": { + "uid": "${prometheus_datasource}" + }, + "description": "Shard status counts across the Opensearch cluster.", + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "mappings": [ ], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "shards" + }, + "overrides": [ ] + }, + "gridPos": { + "h": 9, + "w": 8, + "x": 16, + "y": 4 + }, + "id": 10, + "options": { + "displayMode": "gradient", + "minVizHeight": 10, + "minVizWidth": 0, + "orientation": "horizontal", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "showUnfilled": true + }, + "pluginVersion": "9.4.3", + "targets": [ + { + "datasource": { + "uid": "${prometheus_datasource}" + }, + "expr": "min by(type, job,opensearch_cluster) (opensearch_cluster_shards_number{opensearch_cluster!=\"\",job=~\"$job\",opensearch_cluster=~\"$opensearch_cluster\"})", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{type}}" + } + ], + "title": "Shard status", + "type": "bargauge" + }, + { + "datasource": { + "uid": "${prometheus_datasource}" + }, + "description": "Top nodes by disk usage across the OpenSearch cluster.", + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "mappings": [ ], + "max": 100, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "percent" + }, + "overrides": [ ] + }, + "gridPos": { + "h": 10, + "w": 8, + "x": 0, + "y": 13 + }, + "id": 11, + "options": { + "displayMode": "gradient", + "minVizHeight": 10, + "minVizWidth": 0, + "orientation": "horizontal", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "showUnfilled": true + }, + "pluginVersion": "9.4.3", + "targets": [ + { + "datasource": { + "uid": "${prometheus_datasource}" + }, + "expr": "topk(10, sort_desc((100 * (sum by(node, job,opensearch_cluster) (opensearch_fs_path_total_bytes{opensearch_cluster!=\"\",job=~\"$job\",opensearch_cluster=~\"$opensearch_cluster\"})- sum by(node, job,opensearch_cluster) (opensearch_fs_path_free_bytes{opensearch_cluster!=\"\",job=~\"$job\",opensearch_cluster=~\"$opensearch_cluster\"})) / sum by(node, job,opensearch_cluster) (opensearch_fs_path_total_bytes{opensearch_cluster!=\"\",job=~\"$job\",opensearch_cluster=~\"$opensearch_cluster\"}))))", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{node}}" + } + ], + "title": "Top nodes by disk usage", + "type": "bargauge" + }, + { + "datasource": { + "uid": "${prometheus_datasource}" + }, + "description": "The total count of documents indexed across the OpenSearch cluster.", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [ ], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + } + ] + }, + "unit": "documents" + }, + "overrides": [ ] + }, + "gridPos": { + "h": 5, + "w": 8, + "x": 8, + "y": 13 + }, + "id": 12, + "options": { + "legend": { + "calcs": [ ], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "uid": "${prometheus_datasource}" + }, + "expr": "avg by(job,opensearch_cluster) (opensearch_indices_indexing_index_count{opensearch_cluster!=\"\",job=~\"$job\",opensearch_cluster=~\"$opensearch_cluster\"})", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{job}}/{{opensearch_cluster}}" + } + ], + "title": "Total documents", + "type": "timeseries" + }, + { + "datasource": { + "uid": "${prometheus_datasource}" + }, + "description": "The number of tasks waiting to be executed across the OpenSearch cluster.", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [ ], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + } + ] + }, + "unit": "tasks" + }, + "overrides": [ ] + }, + "gridPos": { + "h": 5, + "w": 8, + "x": 16, + "y": 13 + }, + "id": 13, + "options": { + "legend": { + "calcs": [ ], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "uid": "${prometheus_datasource}" + }, + "expr": "avg by(job,opensearch_cluster) (opensearch_cluster_pending_tasks_number{opensearch_cluster!=\"\",job=~\"$job\",opensearch_cluster=~\"$opensearch_cluster\"})", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{job}}/{{opensearch_cluster}}" + } + ], + "title": "Pending tasks", + "type": "timeseries" + }, + { + "datasource": { + "uid": "${prometheus_datasource}" + }, + "description": "The total size of the store across the OpenSearch cluster.", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [ ], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + } + ] + }, + "unit": "bytes" + }, + "overrides": [ ] + }, + "gridPos": { + "h": 5, + "w": 8, + "x": 8, + "y": 18 + }, + "id": 14, + "options": { + "legend": { + "calcs": [ ], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "uid": "${prometheus_datasource}" + }, + "expr": "avg by(job,opensearch_cluster) (opensearch_indices_store_size_bytes{opensearch_cluster!=\"\",job=~\"$job\",opensearch_cluster=~\"$opensearch_cluster\"})", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{job}}/{{opensearch_cluster}}" + } + ], + "title": "Store size", + "type": "timeseries" + }, + { + "datasource": { + "uid": "${prometheus_datasource}" + }, + "description": "The max wait time for tasks to be executed across the OpenSearch cluster.", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [ ], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + } + ] + }, + "unit": "s" + }, + "overrides": [ ] + }, + "gridPos": { + "h": 5, + "w": 8, + "x": 16, + "y": 18 + }, + "id": 15, + "options": { + "legend": { + "calcs": [ ], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "uid": "${prometheus_datasource}" + }, + "expr": "max by(job,opensearch_cluster) (opensearch_cluster_task_max_waiting_time_seconds{opensearch_cluster!=\"\",job=~\"$job\",opensearch_cluster=~\"$opensearch_cluster\"})", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{job}}/{{opensearch_cluster}}" + } + ], + "title": "Max task wait time", + "type": "timeseries" + }, + { + "collapsed": false, + "datasource": { + "uid": "${prometheus_datasource}" + }, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 23 + }, + "id": 16, + "targets": [ ], + "title": "Cluster search and index summary", + "type": "row" + }, + { + "datasource": { + "uid": "${prometheus_datasource}" + }, + "description": "Top indices by combined fetch, query, and scroll request rate across the OpenSearch cluster.", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [ ], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "reqps" + }, + "overrides": [ ] + }, + "gridPos": { + "h": 8, + "w": 8, + "x": 0, + "y": 24 + }, + "id": 17, + "options": { + "legend": { + "calcs": [ ], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "uid": "${prometheus_datasource}" + }, + "expr": "topk(10, sort_desc(avg by(index, job,opensearch_cluster) (\n opensearch_index_search_fetch_current_number{opensearch_cluster!=\"\",job=~\"$job\",opensearch_cluster=~\"$opensearch_cluster\", context=\"total\"} + \n opensearch_index_search_query_current_number{opensearch_cluster!=\"\",job=~\"$job\",opensearch_cluster=~\"$opensearch_cluster\", context=\"total\"} + \n opensearch_index_search_scroll_current_number{opensearch_cluster!=\"\",job=~\"$job\",opensearch_cluster=~\"$opensearch_cluster\", context=\"total\"}\n)))\n", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{index}}" + } + ], + "title": "Top indices by request rate", + "type": "timeseries" + }, + { + "datasource": { + "uid": "${prometheus_datasource}" + }, + "description": "Top indices by combined fetch, query, and scroll latency across the OpenSearch cluster.", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [ ], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + } + ] + }, + "unit": "s" + }, + "overrides": [ ] + }, + "gridPos": { + "h": 8, + "w": 8, + "x": 8, + "y": 24 + }, + "id": 18, + "options": { + "legend": { + "calcs": [ ], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "uid": "${prometheus_datasource}" + }, + "expr": "topk(10, sort_desc(sum by(index, job,opensearch_cluster) ((increase(opensearch_index_search_fetch_time_seconds{opensearch_cluster!=\"\",job=~\"$job\",opensearch_cluster=~\"$opensearch_cluster\", context=\"total\"}[$__interval:])\n+increase(opensearch_index_search_query_time_seconds{opensearch_cluster!=\"\",job=~\"$job\",opensearch_cluster=~\"$opensearch_cluster\", context=\"total\"}[$__interval:])\n+increase(opensearch_index_search_scroll_time_seconds{opensearch_cluster!=\"\",job=~\"$job\",opensearch_cluster=~\"$opensearch_cluster\", context=\"total\"}[$__interval:]))\n/ clamp_min(increase(opensearch_index_search_fetch_count{opensearch_cluster!=\"\",job=~\"$job\",opensearch_cluster=~\"$opensearch_cluster\", context=\"total\"}[$__interval:])\n+increase(opensearch_index_search_query_count{opensearch_cluster!=\"\",job=~\"$job\",opensearch_cluster=~\"$opensearch_cluster\", context=\"total\"}[$__interval:])\n+increase(opensearch_index_search_scroll_count{opensearch_cluster!=\"\",job=~\"$job\",opensearch_cluster=~\"$opensearch_cluster\", context=\"total\"}[$__interval:]), 1))))\n", + "format": "time_series", + "interval": "1m", + "intervalFactor": 2, + "legendFormat": "{{index}}" + } + ], + "title": "Top indices by request latency", + "type": "timeseries" + }, + { + "datasource": { + "uid": "${prometheus_datasource}" + }, + "description": "Top indices by cache hit ratio for the combined request and query cache across the OpenSearch cluster.", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [ ], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "percent" + }, + "overrides": [ ] + }, + "gridPos": { + "h": 8, + "w": 8, + "x": 16, + "y": 24 + }, + "id": 19, + "options": { + "legend": { + "calcs": [ ], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "uid": "${prometheus_datasource}" + }, + "expr": "topk(10, sort_desc(avg by(index, job,opensearch_cluster) (\n 100 * (opensearch_index_requestcache_hit_count{opensearch_cluster!=\"\",job=~\"$job\",opensearch_cluster=~\"$opensearch_cluster\", context=\"total\"} + \n opensearch_index_querycache_hit_count{opensearch_cluster!=\"\",job=~\"$job\",opensearch_cluster=~\"$opensearch_cluster\", context=\"total\"}) / \n clamp_min((opensearch_index_requestcache_hit_count{opensearch_cluster!=\"\",job=~\"$job\",opensearch_cluster=~\"$opensearch_cluster\", context=\"total\"} + \n opensearch_index_querycache_hit_count{opensearch_cluster!=\"\",job=~\"$job\",opensearch_cluster=~\"$opensearch_cluster\", context=\"total\"} + \n opensearch_index_requestcache_miss_count{opensearch_cluster!=\"\",job=~\"$job\",opensearch_cluster=~\"$opensearch_cluster\", context=\"total\"} + \n opensearch_index_querycache_miss_number{opensearch_cluster!=\"\",job=~\"$job\",opensearch_cluster=~\"$opensearch_cluster\", context=\"total\"}), 1\n ))))\n", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{index}}" + } + ], + "title": "Top indices by combined cache hit ratio", + "type": "timeseries" + }, + { + "datasource": { + "uid": "${prometheus_datasource}" + }, + "description": "Top nodes by rate of ingest across the OpenSearch cluster.", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [ ], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "Bps" + }, + "overrides": [ ] + }, + "gridPos": { + "h": 8, + "w": 8, + "x": 0, + "y": 32 + }, + "id": 20, + "options": { + "legend": { + "calcs": [ ], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "uid": "${prometheus_datasource}" + }, + "expr": "topk(10, sum by(node, job,opensearch_cluster) (rate(opensearch_ingest_total_count{opensearch_cluster!=\"\",job=~\"$job\",opensearch_cluster=~\"$opensearch_cluster\"}[$__rate_interval])))", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{node}}" + } + ], + "title": "Top nodes by ingest rate", + "type": "timeseries" + }, + { + "datasource": { + "uid": "${prometheus_datasource}" + }, + "description": "Top nodes by ingestion latency across the OpenSearch cluster.", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [ ], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "s" + }, + "overrides": [ ] + }, + "gridPos": { + "h": 8, + "w": 8, + "x": 8, + "y": 32 + }, + "id": 21, + "options": { + "legend": { + "calcs": [ ], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "uid": "${prometheus_datasource}" + }, + "expr": "topk(10, sum by(job,opensearch_cluster, node) (\n increase(opensearch_ingest_total_time_seconds{opensearch_cluster!=\"\",job=~\"$job\",opensearch_cluster=~\"$opensearch_cluster\"}[$__interval:]) / \n clamp_min(increase(opensearch_ingest_total_count{opensearch_cluster!=\"\",job=~\"$job\",opensearch_cluster=~\"$opensearch_cluster\"}[$__interval:]), 1)))\n", + "format": "time_series", + "interval": "1m", + "intervalFactor": 2, + "legendFormat": "{{node}}" + } + ], + "title": "Top nodes by ingest latency", + "type": "timeseries" + }, + { + "datasource": { + "uid": "${prometheus_datasource}" + }, + "description": "Top nodes by ingestion failures across the OpenSearch cluster.", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [ ], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + } + ] + }, + "unit": "errors" + }, + "overrides": [ ] + }, + "gridPos": { + "h": 8, + "w": 8, + "x": 16, + "y": 32 + }, + "id": 22, + "options": { + "legend": { + "calcs": [ ], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "uid": "${prometheus_datasource}" + }, + "expr": "topk(10, sum by(job,opensearch_cluster, node) (increase(opensearch_ingest_total_failed_count{opensearch_cluster!=\"\",job=~\"$job\",opensearch_cluster=~\"$opensearch_cluster\"}[$__interval:])))", + "format": "time_series", + "interval": "1m", + "intervalFactor": 2, + "legendFormat": "{{node}}" + } + ], + "title": "Top nodes by ingest errors", + "type": "timeseries" + }, + { + "datasource": { + "uid": "${prometheus_datasource}" + }, + "description": "Top indices by rate of document indexing across the OpenSearch cluster.", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [ ], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + } + ] + }, + "unit": "documents/s" + }, + "overrides": [ ] + }, + "gridPos": { + "h": 8, + "w": 8, + "x": 0, + "y": 40 + }, + "id": 23, + "options": { + "legend": { + "calcs": [ ], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "uid": "${prometheus_datasource}" + }, + "expr": "topk(10, avg by(job,opensearch_cluster, index) (opensearch_index_indexing_index_current_number{opensearch_cluster!=\"\",job=~\"$job\",opensearch_cluster=~\"$opensearch_cluster\"}))", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{index}}" + } + ], + "title": "Top indices by index rate", + "type": "timeseries" + }, + { + "datasource": { + "uid": "${prometheus_datasource}" + }, + "description": "Top indices by indexing latency across the OpenSearch cluster.", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [ ], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + } + ] + }, + "unit": "s" + }, + "overrides": [ ] + }, + "gridPos": { + "h": 8, + "w": 8, + "x": 8, + "y": 40 + }, + "id": 24, + "options": { + "legend": { + "calcs": [ ], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "uid": "${prometheus_datasource}" + }, + "expr": "topk(10, avg by(job,opensearch_cluster, index) \n(increase(opensearch_index_indexing_index_time_seconds{opensearch_cluster!=\"\",job=~\"$job\",opensearch_cluster=~\"$opensearch_cluster\", context=\"total\"}[$__interval:]) / \nclamp_min(increase(opensearch_index_indexing_index_count{opensearch_cluster!=\"\",job=~\"$job\",opensearch_cluster=~\"$opensearch_cluster\", context=\"total\"}[$__interval:]), 1)))\n", + "format": "time_series", + "interval": "1m", + "intervalFactor": 2, + "legendFormat": "{{index}}" + } + ], + "title": "Top indices by index latency", + "type": "timeseries" + }, + { + "datasource": { + "uid": "${prometheus_datasource}" + }, + "description": "Top indices by index document failures across the OpenSearch cluster.", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [ ], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + } + ] + }, + "unit": "failures" + }, + "overrides": [ ] + }, + "gridPos": { + "h": 8, + "w": 8, + "x": 16, + "y": 40 + }, + "id": 25, + "options": { + "legend": { + "calcs": [ ], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "uid": "${prometheus_datasource}" + }, + "expr": "topk(10, avg by(job,opensearch_cluster, index) (increase(opensearch_index_indexing_index_failed_count{opensearch_cluster!=\"\",job=~\"$job\",opensearch_cluster=~\"$opensearch_cluster\"}[$__interval:])))", + "format": "time_series", + "interval": "1m", + "intervalFactor": 2, + "legendFormat": "{{index}}" + } + ], + "title": "Top indices by index failures", + "type": "timeseries" + } + ], + "refresh": "1m", + "schemaVersion": 36, + "tags": [ + "opensearch-mixin" + ], + "templating": { + "list": [ + { + "label": "Prometheus data source", + "name": "prometheus_datasource", + "query": "prometheus", + "regex": "", + "type": "datasource" + }, + { + "allValue": ".+", + "datasource": { + "type": "prometheus", + "uid": "${prometheus_datasource}" + }, + "includeAll": true, + "label": "Job", + "multi": true, + "name": "job", + "query": "label_values(opensearch_cluster_status{opensearch_cluster!=\"\"}, job)", + "refresh": 2, + "sort": 1, + "type": "query" + }, + { + "allValue": ".+", + "datasource": { + "type": "prometheus", + "uid": "${prometheus_datasource}" + }, + "includeAll": true, + "label": "Opensearch_cluster", + "multi": true, + "name": "opensearch_cluster", + "query": "label_values(opensearch_cluster_status{opensearch_cluster!=\"\",job=~\"$job\"}, opensearch_cluster)", + "refresh": 2, + "sort": 1, + "type": "query" + } + ] + }, + "time": { + "from": "now-1h", + "to": "now" + }, + "timezone": "default", + "title": "OpenSearch cluster overview", + "uid": "opensearch-cluster-overview" +} diff --git a/grafana/dashboards/infrastructure/search-and-index-overview.json b/grafana/dashboards/infrastructure/search-and-index-overview.json new file mode 100644 index 0000000..6e5ce4a --- /dev/null +++ b/grafana/dashboards/infrastructure/search-and-index-overview.json @@ -0,0 +1,2470 @@ +{ + "links": [ + { + "asDropdown": false, + "includeVars": true, + "keepTime": true, + "tags": [ + "opensearch-mixin" + ], + "title": "Other Opensearch dashboards", + "type": "dashboards" + } + ], + "panels": [ + { + "collapsed": false, + "datasource": { + "uid": "${prometheus_datasource}" + }, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 0 + }, + "id": 1, + "targets": [ ], + "title": "Request performance", + "type": "row" + }, + { + "datasource": { + "uid": "${prometheus_datasource}" + }, + "description": "Rate of fetch, scroll, and query requests by selected index.", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [ ], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "reqps" + }, + "overrides": [ + { + "matcher": { + "id": "byValue", + "options": { + "op": "gte", + "reducer": "allIsZero", + "value": 0 + } + }, + "properties": [ + { + "id": "custom.hideFrom", + "value": { + "legend": true, + "tooltip": true, + "viz": false + } + } + ] + } + ] + }, + "gridPos": { + "h": 8, + "w": 6, + "x": 0, + "y": 1 + }, + "id": 2, + "options": { + "legend": { + "calcs": [ ], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "uid": "${prometheus_datasource}" + }, + "expr": "avg by (job,opensearch_cluster,index) (opensearch_index_search_query_current_number{opensearch_cluster!=\"\",job=~\"$job\",opensearch_cluster=~\"$opensearch_cluster\",index=~\"$index\", context=~\"total\"})", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{index}} - query" + }, + { + "datasource": { + "uid": "${prometheus_datasource}" + }, + "expr": "avg by (job,opensearch_cluster,index) (opensearch_index_search_fetch_current_number{opensearch_cluster!=\"\",job=~\"$job\",opensearch_cluster=~\"$opensearch_cluster\",index=~\"$index\", context=~\"total\"})", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{index}} - fetch" + }, + { + "datasource": { + "uid": "${prometheus_datasource}" + }, + "expr": "avg by (job,opensearch_cluster,index) (opensearch_index_search_scroll_current_number{opensearch_cluster!=\"\",job=~\"$job\",opensearch_cluster=~\"$opensearch_cluster\",index=~\"$index\", context=~\"total\"})", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{index}} - scroll" + } + ], + "title": "Request rate", + "type": "timeseries" + }, + { + "datasource": { + "uid": "${prometheus_datasource}" + }, + "description": "Latency of fetch, scroll, and query requests by selected index.", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [ ], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "s" + }, + "overrides": [ + { + "matcher": { + "id": "byValue", + "options": { + "op": "gte", + "reducer": "allIsZero", + "value": 0 + } + }, + "properties": [ + { + "id": "custom.hideFrom", + "value": { + "legend": true, + "tooltip": true, + "viz": false + } + } + ] + } + ] + }, + "gridPos": { + "h": 8, + "w": 6, + "x": 6, + "y": 1 + }, + "id": 3, + "options": { + "legend": { + "calcs": [ ], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "uid": "${prometheus_datasource}" + }, + "expr": "avg by (job,opensearch_cluster,index) (increase(opensearch_index_search_query_time_seconds{opensearch_cluster!=\"\",job=~\"$job\",opensearch_cluster=~\"$opensearch_cluster\",index=~\"$index\"}[$__interval:]) / clamp_min(increase(opensearch_index_search_query_count{opensearch_cluster!=\"\",job=~\"$job\",opensearch_cluster=~\"$opensearch_cluster\",index=~\"$index\", context=\"total\"}[$__interval:]), 1))", + "format": "time_series", + "interval": "1m", + "intervalFactor": 2, + "legendFormat": "{{index}} - query" + }, + { + "datasource": { + "uid": "${prometheus_datasource}" + }, + "expr": "avg by (job,opensearch_cluster,index) (increase(opensearch_index_search_fetch_time_seconds{opensearch_cluster!=\"\",job=~\"$job\",opensearch_cluster=~\"$opensearch_cluster\",index=~\"$index\", context=\"total\"}[$__interval:]) / clamp_min(increase(opensearch_index_search_fetch_count{opensearch_cluster!=\"\",job=~\"$job\",opensearch_cluster=~\"$opensearch_cluster\",index=~\"$index\", context=\"total\"}[$__interval:]), 1))", + "format": "time_series", + "interval": "1m", + "intervalFactor": 2, + "legendFormat": "{{index}} - fetch" + }, + { + "datasource": { + "uid": "${prometheus_datasource}" + }, + "expr": "avg by (job,opensearch_cluster,index) (increase(opensearch_index_search_scroll_time_seconds{opensearch_cluster!=\"\",job=~\"$job\",opensearch_cluster=~\"$opensearch_cluster\",index=~\"$index\", context=\"total\"}[$__interval:]) / clamp_min(increase(opensearch_index_search_scroll_count{opensearch_cluster!=\"\",job=~\"$job\",opensearch_cluster=~\"$opensearch_cluster\",index=~\"$index\", context=\"total\"}[$__interval:]), 1))", + "format": "time_series", + "interval": "1m", + "intervalFactor": 2, + "legendFormat": "{{index}} - scroll" + } + ], + "title": "Request latency", + "type": "timeseries" + }, + { + "datasource": { + "uid": "${prometheus_datasource}" + }, + "description": "Ratio of query cache and request cache hits and misses.", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [ ], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "percent" + }, + "overrides": [ + { + "matcher": { + "id": "byValue", + "options": { + "op": "gte", + "reducer": "allIsZero", + "value": 0 + } + }, + "properties": [ + { + "id": "custom.hideFrom", + "value": { + "legend": true, + "tooltip": true, + "viz": false + } + } + ] + } + ] + }, + "gridPos": { + "h": 8, + "w": 6, + "x": 12, + "y": 1 + }, + "id": 4, + "options": { + "legend": { + "calcs": [ ], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "uid": "${prometheus_datasource}" + }, + "expr": "avg by(job,opensearch_cluster,index) (100 * (opensearch_index_requestcache_hit_count{opensearch_cluster!=\"\",job=~\"$job\",opensearch_cluster=~\"$opensearch_cluster\",index=~\"$index\", context=\"total\"}) / clamp_min(opensearch_index_requestcache_hit_count{opensearch_cluster!=\"\",job=~\"$job\",opensearch_cluster=~\"$opensearch_cluster\",index=~\"$index\", context=\"total\"} + opensearch_index_requestcache_miss_count{opensearch_cluster!=\"\",job=~\"$job\",opensearch_cluster=~\"$opensearch_cluster\",index=~\"$index\", context=\"total\"}, 1))", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{index}} - request" + }, + { + "datasource": { + "uid": "${prometheus_datasource}" + }, + "expr": "avg by(job,opensearch_cluster,index) (100 * (opensearch_index_querycache_hit_count{opensearch_cluster!=\"\",job=~\"$job\",opensearch_cluster=~\"$opensearch_cluster\",index=~\"$index\", context=\"total\"}) / clamp_min(opensearch_index_querycache_hit_count{opensearch_cluster!=\"\",job=~\"$job\",opensearch_cluster=~\"$opensearch_cluster\",index=~\"$index\", context=\"total\"} + opensearch_index_querycache_miss_number{opensearch_cluster!=\"\",job=~\"$job\",opensearch_cluster=~\"$opensearch_cluster\",index=~\"$index\", context=\"total\"}, 1))", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{index}} - query" + } + ], + "title": "Cache hit ratio", + "type": "timeseries" + }, + { + "datasource": { + "uid": "${prometheus_datasource}" + }, + "description": "Total evictions count by cache type for the selected index.", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [ ], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "evictions" + }, + "overrides": [ + { + "matcher": { + "id": "byValue", + "options": { + "op": "gte", + "reducer": "allIsZero", + "value": 0 + } + }, + "properties": [ + { + "id": "custom.hideFrom", + "value": { + "legend": true, + "tooltip": true, + "viz": false + } + } + ] + } + ] + }, + "gridPos": { + "h": 8, + "w": 6, + "x": 18, + "y": 1 + }, + "id": 5, + "options": { + "legend": { + "calcs": [ ], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "uid": "${prometheus_datasource}" + }, + "expr": "avg by(job,opensearch_cluster,index) (increase(opensearch_index_querycache_evictions_count{opensearch_cluster!=\"\",job=~\"$job\",opensearch_cluster=~\"$opensearch_cluster\",index=~\"$index\", context=\"total\"}[$__interval:]))", + "format": "time_series", + "interval": "1m", + "intervalFactor": 2, + "legendFormat": "{{index}} - query cache" + }, + { + "datasource": { + "uid": "${prometheus_datasource}" + }, + "expr": "avg by(job,opensearch_cluster,index) (increase(opensearch_index_requestcache_evictions_count{opensearch_cluster!=\"\",job=~\"$job\",opensearch_cluster=~\"$opensearch_cluster\",index=~\"$index\", context=\"total\"}[$__interval:]))", + "format": "time_series", + "interval": "1m", + "intervalFactor": 2, + "legendFormat": "{{index}} - request cache" + }, + { + "datasource": { + "uid": "${prometheus_datasource}" + }, + "expr": "avg by(job,opensearch_cluster,index) (increase(opensearch_index_fielddata_evictions_count{opensearch_cluster!=\"\",job=~\"$job\",opensearch_cluster=~\"$opensearch_cluster\",index=~\"$index\", context=\"total\"}[$__interval:]))", + "format": "time_series", + "interval": "1m", + "intervalFactor": 2, + "legendFormat": "{{index}} - field data" + } + ], + "title": "Evictions", + "type": "timeseries" + }, + { + "collapsed": false, + "datasource": { + "uid": "${prometheus_datasource}" + }, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 9 + }, + "id": 6, + "targets": [ ], + "title": "Index performance", + "type": "row" + }, + { + "datasource": { + "uid": "${prometheus_datasource}" + }, + "description": "Rate of indexed documents for the selected index.", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [ ], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "documents/s" + }, + "overrides": [ + { + "matcher": { + "id": "byValue", + "options": { + "op": "gte", + "reducer": "allIsZero", + "value": 0 + } + }, + "properties": [ + { + "id": "custom.hideFrom", + "value": { + "legend": true, + "tooltip": true, + "viz": false + } + } + ] + } + ] + }, + "gridPos": { + "h": 8, + "w": 6, + "x": 0, + "y": 10 + }, + "id": 7, + "options": { + "legend": { + "calcs": [ ], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "uid": "${prometheus_datasource}" + }, + "expr": "avg by(job,opensearch_cluster,index) (opensearch_index_indexing_index_current_number{opensearch_cluster!=\"\",job=~\"$job\",opensearch_cluster=~\"$opensearch_cluster\",index=~\"$index\", context=\"total\"})", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{index}}" + } + ], + "title": "Index rate", + "type": "timeseries" + }, + { + "datasource": { + "uid": "${prometheus_datasource}" + }, + "description": "Document indexing latency for the selected index.", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [ ], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "s" + }, + "overrides": [ + { + "matcher": { + "id": "byValue", + "options": { + "op": "gte", + "reducer": "allIsZero", + "value": 0 + } + }, + "properties": [ + { + "id": "custom.hideFrom", + "value": { + "legend": true, + "tooltip": true, + "viz": false + } + } + ] + } + ] + }, + "gridPos": { + "h": 8, + "w": 6, + "x": 6, + "y": 10 + }, + "id": 8, + "options": { + "legend": { + "calcs": [ ], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "uid": "${prometheus_datasource}" + }, + "expr": "avg by(job,opensearch_cluster,index) (increase(opensearch_index_indexing_index_time_seconds{opensearch_cluster!=\"\",job=~\"$job\",opensearch_cluster=~\"$opensearch_cluster\",index=~\"$index\", context=~\"total\"}[$__interval:]) / clamp_min(increase(opensearch_index_indexing_index_count{opensearch_cluster!=\"\",job=~\"$job\",opensearch_cluster=~\"$opensearch_cluster\",index=~\"$index\", context=~\"total\"}[$__interval:]),1))", + "format": "time_series", + "interval": "1m", + "intervalFactor": 2, + "legendFormat": "{{index}}" + } + ], + "title": "Index latency", + "type": "timeseries" + }, + { + "datasource": { + "uid": "${prometheus_datasource}" + }, + "description": "Number of indexing failures for the selected index.", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [ ], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "failures" + }, + "overrides": [ + { + "matcher": { + "id": "byValue", + "options": { + "op": "gte", + "reducer": "allIsZero", + "value": 0 + } + }, + "properties": [ + { + "id": "custom.hideFrom", + "value": { + "legend": true, + "tooltip": true, + "viz": false + } + } + ] + } + ] + }, + "gridPos": { + "h": 8, + "w": 6, + "x": 12, + "y": 10 + }, + "id": 9, + "options": { + "legend": { + "calcs": [ ], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "uid": "${prometheus_datasource}" + }, + "expr": "avg by(job,opensearch_cluster,index) (increase(opensearch_index_indexing_index_failed_count{opensearch_cluster!=\"\",job=~\"$job\",opensearch_cluster=~\"$opensearch_cluster\",index=~\"$index\", context=\"total\"}[$__interval:]))", + "format": "time_series", + "interval": "1m", + "intervalFactor": 2, + "legendFormat": "{{index}}" + } + ], + "title": "Index failures", + "type": "timeseries" + }, + { + "datasource": { + "uid": "${prometheus_datasource}" + }, + "description": "Index flush latency for the selected index.", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [ ], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "s" + }, + "overrides": [ + { + "matcher": { + "id": "byValue", + "options": { + "op": "gte", + "reducer": "allIsZero", + "value": 0 + } + }, + "properties": [ + { + "id": "custom.hideFrom", + "value": { + "legend": true, + "tooltip": true, + "viz": false + } + } + ] + } + ] + }, + "gridPos": { + "h": 8, + "w": 6, + "x": 18, + "y": 10 + }, + "id": 10, + "options": { + "legend": { + "calcs": [ ], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "uid": "${prometheus_datasource}" + }, + "expr": "avg by(job,opensearch_cluster,index) (increase(opensearch_index_flush_total_time_seconds{opensearch_cluster!=\"\",job=~\"$job\",opensearch_cluster=~\"$opensearch_cluster\",index=~\"$index\", context=\"total\"}[$__interval:]) / clamp_min(increase(opensearch_index_flush_total_count{opensearch_cluster!=\"\",job=~\"$job\",opensearch_cluster=~\"$opensearch_cluster\",index=~\"$index\", context=\"total\"}[$__interval:]),1))", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{index}}" + } + ], + "title": "Flush latency", + "type": "timeseries" + }, + { + "datasource": { + "uid": "${prometheus_datasource}" + }, + "description": "Index merge time for the selected index.", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "points", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [ ], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "s" + }, + "overrides": [ + { + "matcher": { + "id": "byValue", + "options": { + "op": "gte", + "reducer": "allIsZero", + "value": 0 + } + }, + "properties": [ + { + "id": "custom.hideFrom", + "value": { + "legend": true, + "tooltip": true, + "viz": false + } + } + ] + } + ] + }, + "gridPos": { + "h": 8, + "w": 6, + "x": 0, + "y": 18 + }, + "id": 11, + "options": { + "legend": { + "calcs": [ ], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "uid": "${prometheus_datasource}" + }, + "expr": "avg by(job,opensearch_cluster,index) (increase(opensearch_index_merges_total_time_seconds{opensearch_cluster!=\"\",job=~\"$job\",opensearch_cluster=~\"$opensearch_cluster\",index=~\"$index\", context=\"total\"}[$__interval:])) > 0", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{index}} - total" + }, + { + "datasource": { + "uid": "${prometheus_datasource}" + }, + "expr": "avg by(job,opensearch_cluster,index) (increase(opensearch_index_merges_total_stopped_time_seconds{opensearch_cluster!=\"\",job=~\"$job\",opensearch_cluster=~\"$opensearch_cluster\",index=~\"$index\", context=\"total\"}[$__interval:])) > 0", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{index}} - stopped" + }, + { + "datasource": { + "uid": "${prometheus_datasource}" + }, + "expr": "avg by(job,opensearch_cluster,index) (increase(opensearch_index_merges_total_throttled_time_seconds{opensearch_cluster!=\"\",job=~\"$job\",opensearch_cluster=~\"$opensearch_cluster\",index=~\"$index\", context=\"total\"}[$__interval:])) > 0", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{index}} - throttled" + } + ], + "title": "Merge time", + "type": "timeseries" + }, + { + "datasource": { + "uid": "${prometheus_datasource}" + }, + "description": "Index refresh latency for the selected index.", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [ ], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "s" + }, + "overrides": [ + { + "matcher": { + "id": "byValue", + "options": { + "op": "gte", + "reducer": "allIsZero", + "value": 0 + } + }, + "properties": [ + { + "id": "custom.hideFrom", + "value": { + "legend": true, + "tooltip": true, + "viz": false + } + } + ] + } + ] + }, + "gridPos": { + "h": 8, + "w": 6, + "x": 6, + "y": 18 + }, + "id": 12, + "options": { + "legend": { + "calcs": [ ], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "uid": "${prometheus_datasource}" + }, + "expr": "avg by(job,opensearch_cluster,index) (increase(opensearch_index_refresh_total_time_seconds{opensearch_cluster!=\"\",job=~\"$job\",opensearch_cluster=~\"$opensearch_cluster\",index=~\"$index\", context=\"total\"}[$__interval:]) / clamp_min(increase(opensearch_index_refresh_total_count{opensearch_cluster!=\"\",job=~\"$job\",opensearch_cluster=~\"$opensearch_cluster\",index=~\"$index\", context=\"total\"}[$__interval:]),1))", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{index}}" + } + ], + "title": "Refresh latency", + "type": "timeseries" + }, + { + "datasource": { + "uid": "${prometheus_datasource}" + }, + "description": "Current number of translog operations for the selected index.", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [ ], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "operations" + }, + "overrides": [ + { + "matcher": { + "id": "byValue", + "options": { + "op": "gte", + "reducer": "allIsZero", + "value": 0 + } + }, + "properties": [ + { + "id": "custom.hideFrom", + "value": { + "legend": true, + "tooltip": true, + "viz": false + } + } + ] + } + ] + }, + "gridPos": { + "h": 8, + "w": 6, + "x": 12, + "y": 18 + }, + "id": 13, + "options": { + "legend": { + "calcs": [ ], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "uid": "${prometheus_datasource}" + }, + "expr": "avg by(job,opensearch_cluster,index) (opensearch_index_translog_operations_number{opensearch_cluster!=\"\",job=~\"$job\",opensearch_cluster=~\"$opensearch_cluster\",index=~\"$index\", context=\"total\"})", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{index}}" + } + ], + "title": "Translog operations", + "type": "timeseries" + }, + { + "datasource": { + "uid": "${prometheus_datasource}" + }, + "description": "Rate of documents deleted for the selected index.", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [ ], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "documents/s" + }, + "overrides": [ + { + "matcher": { + "id": "byValue", + "options": { + "op": "gte", + "reducer": "allIsZero", + "value": 0 + } + }, + "properties": [ + { + "id": "custom.hideFrom", + "value": { + "legend": true, + "tooltip": true, + "viz": false + } + } + ] + } + ] + }, + "gridPos": { + "h": 8, + "w": 6, + "x": 18, + "y": 18 + }, + "id": 14, + "options": { + "legend": { + "calcs": [ ], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "uid": "${prometheus_datasource}" + }, + "expr": "avg by (job,opensearch_cluster,index) (opensearch_index_indexing_delete_current_number{opensearch_cluster!=\"\",job=~\"$job\",opensearch_cluster=~\"$opensearch_cluster\",index=~\"$index\", context=\"total\"})", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{index}}" + } + ], + "title": "Docs deleted", + "type": "timeseries" + }, + { + "collapsed": false, + "datasource": { + "uid": "${prometheus_datasource}" + }, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 26 + }, + "id": 15, + "targets": [ ], + "title": "Index capacity", + "type": "row" + }, + { + "datasource": { + "uid": "${prometheus_datasource}" + }, + "description": "Number of indexed documents for the selected index.", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [ ], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "documents" + }, + "overrides": [ + { + "matcher": { + "id": "byValue", + "options": { + "op": "gte", + "reducer": "allIsZero", + "value": 0 + } + }, + "properties": [ + { + "id": "custom.hideFrom", + "value": { + "legend": true, + "tooltip": true, + "viz": false + } + } + ] + } + ] + }, + "gridPos": { + "h": 8, + "w": 6, + "x": 0, + "y": 27 + }, + "id": 16, + "options": { + "legend": { + "calcs": [ ], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "uid": "${prometheus_datasource}" + }, + "expr": "avg by (job,opensearch_cluster,index) (opensearch_index_indexing_index_count{opensearch_cluster!=\"\",job=~\"$job\",opensearch_cluster=~\"$opensearch_cluster\",index=~\"$index\", context=\"total\"})", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{index}}" + } + ], + "title": "Documents indexed", + "type": "timeseries" + }, + { + "datasource": { + "uid": "${prometheus_datasource}" + }, + "description": "Current number of segments for the selected index.", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [ ], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "segments" + }, + "overrides": [ + { + "matcher": { + "id": "byValue", + "options": { + "op": "gte", + "reducer": "allIsZero", + "value": 0 + } + }, + "properties": [ + { + "id": "custom.hideFrom", + "value": { + "legend": true, + "tooltip": true, + "viz": false + } + } + ] + } + ] + }, + "gridPos": { + "h": 8, + "w": 6, + "x": 6, + "y": 27 + }, + "id": 17, + "options": { + "legend": { + "calcs": [ ], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "uid": "${prometheus_datasource}" + }, + "expr": "avg by(job,opensearch_cluster,index) (opensearch_index_segments_number{opensearch_cluster!=\"\",job=~\"$job\",opensearch_cluster=~\"$opensearch_cluster\",index=~\"$index\", context=\"total\"})", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{index}}" + } + ], + "title": "Segment count", + "type": "timeseries" + }, + { + "datasource": { + "uid": "${prometheus_datasource}" + }, + "description": "Number of merge operations for the selected index.", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "points", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [ ], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "merges" + }, + "overrides": [ + { + "matcher": { + "id": "byValue", + "options": { + "op": "gte", + "reducer": "allIsZero", + "value": 0 + } + }, + "properties": [ + { + "id": "custom.hideFrom", + "value": { + "legend": true, + "tooltip": true, + "viz": false + } + } + ] + } + ] + }, + "gridPos": { + "h": 8, + "w": 6, + "x": 12, + "y": 27 + }, + "id": 18, + "options": { + "legend": { + "calcs": [ ], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "uid": "${prometheus_datasource}" + }, + "expr": "avg by(job,opensearch_cluster,index) (increase(opensearch_index_merges_total_docs_count{opensearch_cluster!=\"\",job=~\"$job\",opensearch_cluster=~\"$opensearch_cluster\",index=~\"$index\", context=\"total\"}[$__interval:])) > 0", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{index}}" + } + ], + "title": "Merge count", + "type": "timeseries" + }, + { + "datasource": { + "uid": "${prometheus_datasource}" + }, + "description": "Size of query cache and request cache.", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [ ], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "bytes" + }, + "overrides": [ + { + "matcher": { + "id": "byValue", + "options": { + "op": "gte", + "reducer": "allIsZero", + "value": 0 + } + }, + "properties": [ + { + "id": "custom.hideFrom", + "value": { + "legend": true, + "tooltip": true, + "viz": false + } + } + ] + } + ] + }, + "gridPos": { + "h": 8, + "w": 6, + "x": 18, + "y": 27 + }, + "id": 19, + "options": { + "legend": { + "calcs": [ ], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "uid": "${prometheus_datasource}" + }, + "expr": "avg by(job,opensearch_cluster,index) (opensearch_index_querycache_memory_size_bytes{opensearch_cluster!=\"\",job=~\"$job\",opensearch_cluster=~\"$opensearch_cluster\",index=~\"$index\", context=\"total\"})", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{index}} - query" + }, + { + "datasource": { + "uid": "${prometheus_datasource}" + }, + "expr": "avg by(job,opensearch_cluster,index) (opensearch_index_requestcache_memory_size_bytes{opensearch_cluster!=\"\",job=~\"$job\",opensearch_cluster=~\"$opensearch_cluster\",index=~\"$index\", context=\"total\"})", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{index}} - request" + } + ], + "title": "Cache size", + "type": "timeseries" + }, + { + "datasource": { + "uid": "${prometheus_datasource}" + }, + "description": "Size of the store in bytes for the selected index.", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [ ], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "bytes" + }, + "overrides": [ + { + "matcher": { + "id": "byValue", + "options": { + "op": "gte", + "reducer": "allIsZero", + "value": 0 + } + }, + "properties": [ + { + "id": "custom.hideFrom", + "value": { + "legend": true, + "tooltip": true, + "viz": false + } + } + ] + } + ] + }, + "gridPos": { + "h": 8, + "w": 6, + "x": 0, + "y": 35 + }, + "id": 20, + "options": { + "legend": { + "calcs": [ ], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "uid": "${prometheus_datasource}" + }, + "expr": "avg by(job,opensearch_cluster,index) (opensearch_index_store_size_bytes{opensearch_cluster!=\"\",job=~\"$job\",opensearch_cluster=~\"$opensearch_cluster\",index=~\"$index\", context=\"total\"})", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{index}}" + } + ], + "title": "Store size", + "type": "timeseries" + }, + { + "datasource": { + "uid": "${prometheus_datasource}" + }, + "description": "Memory used by segments for the selected index.", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [ ], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "bytes" + }, + "overrides": [ + { + "matcher": { + "id": "byValue", + "options": { + "op": "gte", + "reducer": "allIsZero", + "value": 0 + } + }, + "properties": [ + { + "id": "custom.hideFrom", + "value": { + "legend": true, + "tooltip": true, + "viz": false + } + } + ] + } + ] + }, + "gridPos": { + "h": 8, + "w": 6, + "x": 6, + "y": 35 + }, + "id": 21, + "options": { + "legend": { + "calcs": [ ], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "uid": "${prometheus_datasource}" + }, + "expr": "avg by(job,opensearch_cluster,index) (opensearch_index_segments_memory_bytes{opensearch_cluster!=\"\",job=~\"$job\",opensearch_cluster=~\"$opensearch_cluster\",index=~\"$index\", context=\"total\"})", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{index}}" + } + ], + "title": "Segment size", + "type": "timeseries" + }, + { + "datasource": { + "uid": "${prometheus_datasource}" + }, + "description": "Size of merge operations in bytes for the selected index.", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "points", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [ ], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "bytes" + }, + "overrides": [ + { + "matcher": { + "id": "byValue", + "options": { + "op": "gte", + "reducer": "allIsZero", + "value": 0 + } + }, + "properties": [ + { + "id": "custom.hideFrom", + "value": { + "legend": true, + "tooltip": true, + "viz": false + } + } + ] + } + ] + }, + "gridPos": { + "h": 8, + "w": 6, + "x": 12, + "y": 35 + }, + "id": 22, + "options": { + "legend": { + "calcs": [ ], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "uid": "${prometheus_datasource}" + }, + "expr": "avg by(job,opensearch_cluster,index) (opensearch_index_merges_current_size_bytes{opensearch_cluster!=\"\",job=~\"$job\",opensearch_cluster=~\"$opensearch_cluster\",index=~\"$index\", context=\"total\"}) > 0", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{index}}" + } + ], + "title": "Merge size", + "type": "timeseries" + }, + { + "datasource": { + "uid": "${prometheus_datasource}" + }, + "description": "The number of index shards for the selected index.", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [ ], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "shards" + }, + "overrides": [ + { + "matcher": { + "id": "byValue", + "options": { + "op": "gte", + "reducer": "allIsZero", + "value": 0 + } + }, + "properties": [ + { + "id": "custom.hideFrom", + "value": { + "legend": true, + "tooltip": true, + "viz": false + } + } + ] + } + ] + }, + "gridPos": { + "h": 8, + "w": 6, + "x": 18, + "y": 35 + }, + "id": 23, + "options": { + "legend": { + "calcs": [ ], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "uid": "${prometheus_datasource}" + }, + "expr": "sum by (index) (avg by(job,opensearch_cluster,index) (opensearch_index_shards_number{opensearch_cluster!=\"\",job=~\"$job\",opensearch_cluster=~\"$opensearch_cluster\",index=~\"$index\", type=~\"active|active_primary\"}))", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{index}}" + } + ], + "title": "Shard count", + "type": "timeseries" + } + ], + "refresh": "1m", + "schemaVersion": 36, + "tags": [ + "opensearch-mixin" + ], + "templating": { + "list": [ + { + "label": "Prometheus data source", + "name": "prometheus_datasource", + "query": "prometheus", + "regex": "", + "type": "datasource" + }, + { + "allValue": ".+", + "datasource": { + "type": "prometheus", + "uid": "${prometheus_datasource}" + }, + "includeAll": true, + "label": "Job", + "multi": true, + "name": "job", + "query": "label_values(opensearch_index_search_fetch_count{opensearch_cluster!=\"\"}, job)", + "refresh": 2, + "sort": 1, + "type": "query" + }, + { + "allValue": ".+", + "datasource": { + "type": "prometheus", + "uid": "${prometheus_datasource}" + }, + "includeAll": true, + "label": "Opensearch_cluster", + "multi": true, + "name": "opensearch_cluster", + "query": "label_values(opensearch_index_search_fetch_count{opensearch_cluster!=\"\",job=~\"$job\"}, opensearch_cluster)", + "refresh": 2, + "sort": 1, + "type": "query" + }, + { + "allValue": ".+", + "datasource": { + "type": "prometheus", + "uid": "${prometheus_datasource}" + }, + "includeAll": true, + "label": "Index", + "multi": true, + "name": "index", + "query": "label_values(opensearch_index_search_fetch_count{opensearch_cluster!=\"\",job=~\"$job\",opensearch_cluster=~\"$opensearch_cluster\"}, index)", + "refresh": 2, + "sort": 1, + "type": "query" + } + ] + }, + "time": { + "from": "now-1h", + "to": "now" + }, + "timezone": "default", + "title": "OpenSearch search and index overview", + "uid": "opensearch-search-and-index-overview" +} diff --git a/prometheus/opensearch.rec.rules b/prometheus/opensearch.rec.rules new file mode 100644 index 0000000..0967ef4 --- /dev/null +++ b/prometheus/opensearch.rec.rules @@ -0,0 +1 @@ +{} diff --git a/prometheus/opensearch.rules b/prometheus/opensearch.rules new file mode 100644 index 0000000..4aa4ade --- /dev/null +++ b/prometheus/opensearch.rules @@ -0,0 +1,121 @@ +"groups": +- "name": "opensearch-alerts" + "rules": + - "alert": "OpenSearchYellowCluster" + "annotations": + "description": "{{$labels.cluster}} health status is yellow over the last 5 minutes" + "summary": "At least one of the clusters is reporting a yellow status." + "expr": | + opensearch_cluster_status{opensearch_cluster!=""} == 1 + "for": "5m" + "labels": + "severity": "warning" + - "alert": "OpenSearchRedCluster" + "annotations": + "description": "{{$labels.cluster}} health status is red over the last 5 minutes" + "summary": "At least one of the clusters is reporting a red status." + "expr": | + opensearch_cluster_status{opensearch_cluster!=""} == 2 + "for": "5m" + "labels": + "severity": "critical" + - "alert": "OpenSearchUnstableShardReallocation" + "annotations": + "description": | + {{$labels.cluster}} has had {{ printf "%.0f" $value }} shard reallocation over the last 1m which is above the threshold of 0. + "summary": "A node has gone offline or has been disconnected triggering shard reallocation." + "expr": | + sum without(type) (opensearch_cluster_shards_number{opensearch_cluster!="", type="relocating"}) > 0 + "for": "1m" + "labels": + "severity": "warning" + - "alert": "OpenSearchUnstableShardUnassigned" + "annotations": + "description": | + {{$labels.cluster}} has had {{ printf "%.0f" $value }} shard unassigned over the last 5m which is above the threshold of 0. + "summary": "There are shards that have been detected as unassigned." + "expr": | + sum without(type) (opensearch_cluster_shards_number{opensearch_cluster!="", type="unassigned"}) > 0 + "for": "5m" + "labels": + "severity": "warning" + - "alert": "OpenSearchHighNodeDiskUsage" + "annotations": + "description": | + {{$labels.node}} has had {{ printf "%.0f" $value }} disk usage over the last 5m which is above the threshold of 60. + "summary": "The node disk usage has exceeded the warning threshold." + "expr": | + 100 * sum without(nodeid, path, mount, type) ((opensearch_fs_path_total_bytes{opensearch_cluster!=""} - opensearch_fs_path_free_bytes{opensearch_cluster!=""}) / opensearch_fs_path_total_bytes{opensearch_cluster!=""}) > 60 + "for": "5m" + "labels": + "severity": "warning" + - "alert": "OpenSearchHighNodeDiskUsage" + "annotations": + "description": | + {{$labels.node}} has had {{ printf "%.0f" $value }}% disk usage over the last 5m which is above the threshold of 80. + "summary": "The node disk usage has exceeded the critical threshold." + "expr": | + 100 * sum without(nodeid, path, mount, type) ((opensearch_fs_path_total_bytes{opensearch_cluster!=""} - opensearch_fs_path_free_bytes) / opensearch_fs_path_total_bytes{opensearch_cluster!=""}) > 80 + "for": "5m" + "labels": + "severity": "critical" + - "alert": "OpenSearchHighNodeCpuUsage" + "annotations": + "description": | + {{$labels.node}} has had {{ printf "%.0f" $value }}% CPU usage over the last 5m which is above the threshold of 70. + "summary": "The node CPU usage has exceeded the warning threshold." + "expr": | + sum without(nodeid) (opensearch_os_cpu_percent{opensearch_cluster!=""}) > 70 + "for": "5m" + "labels": + "severity": "warning" + - "alert": "OpenSearchHighNodeCpuUsage" + "annotations": + "description": | + {{$labels.node}} has had {{ printf "%.0f" $value }}% CPU usage over the last 5m which is above the threshold of 85. + "summary": "The node CPU usage has exceeded the critical threshold." + "expr": | + sum without(nodeid) (opensearch_os_cpu_percent{opensearch_cluster!=""}) > 85 + "for": "5m" + "labels": + "severity": "critical" + - "alert": "OpenSearchHighNodeMemoryUsage" + "annotations": + "description": | + {{$labels.node}} has had {{ printf "%.0f" $value }}% memory usage over the last 5m which is above the threshold of 70. + "summary": "The node memory usage has exceeded the warning threshold." + "expr": | + sum without(nodeid) (opensearch_os_mem_used_percent{opensearch_cluster!=""}) > 70 + "for": "5m" + "labels": + "severity": "warning" + - "alert": "OpenSearchHighNodeMemoryUsage" + "annotations": + "description": | + {{$labels.node}} has had {{ printf "%.0f" $value }}% memory usage over the last 5m which is above the threshold of 85. + "summary": "The node memory usage has exceeded the critical threshold." + "expr": | + sum without(nodeid) (opensearch_os_mem_used_percent{opensearch_cluster!=""}) > 85 + "for": "5m" + "labels": + "severity": "critical" + - "alert": "OpenSearchModerateRequestLatency" + "annotations": + "description": | + {{$labels.index}} has had {{ printf "%.0f" $value }}s of request latency over the last 5m which is above the threshold of 0.5. + "summary": "The request latency has exceeded the warning threshold." + "expr": | + sum without(context) ((increase(opensearch_index_search_fetch_time_seconds{opensearch_cluster!="", context="total"}[5m])+increase(opensearch_index_search_query_time_seconds{context="total"}[5m])+increase(opensearch_index_search_scroll_time_seconds{context="total"}[5m])) / clamp_min(increase(opensearch_index_search_fetch_count{context="total"}[5m])+increase(opensearch_index_search_query_count{context="total"}[5m])+increase(opensearch_index_search_scroll_count{context="total"}[5m]), 1)) > 0.5 + "for": "5m" + "labels": + "severity": "warning" + - "alert": "OpenSearchModerateIndexLatency" + "annotations": + "description": | + {{$labels.index}} has had {{ printf "%.0f" $value }}s of index latency over the last 5m which is above the threshold of 0.5. + "summary": "The index latency has exceeded the warning threshold." + "expr": | + sum without(context) (increase(opensearch_index_indexing_index_time_seconds{opensearch_cluster!="", context="total"}[5m]) / clamp_min(increase(opensearch_index_indexing_index_count{context="total"}[5m]), 1)) > 0.5 + "for": "5m" + "labels": + "severity": "warning"