diff --git a/assets/rancher-monitoring-crd/rancher-monitoring-crd-104.1.2-rc.1+up57.0.3.tgz b/assets/rancher-monitoring-crd/rancher-monitoring-crd-104.1.2-rc.1+up57.0.3.tgz new file mode 100644 index 0000000000..46fdb1f34e Binary files /dev/null and b/assets/rancher-monitoring-crd/rancher-monitoring-crd-104.1.2-rc.1+up57.0.3.tgz differ diff --git a/assets/rancher-monitoring/rancher-monitoring-104.1.2-rc.1+up57.0.3.tgz b/assets/rancher-monitoring/rancher-monitoring-104.1.2-rc.1+up57.0.3.tgz new file mode 100644 index 0000000000..fde696efa9 Binary files /dev/null and b/assets/rancher-monitoring/rancher-monitoring-104.1.2-rc.1+up57.0.3.tgz differ diff --git a/charts/rancher-monitoring-crd/104.1.2-rc.1+up57.0.3/Chart.yaml b/charts/rancher-monitoring-crd/104.1.2-rc.1+up57.0.3/Chart.yaml new file mode 100644 index 0000000000..11419cb2dd --- /dev/null +++ b/charts/rancher-monitoring-crd/104.1.2-rc.1+up57.0.3/Chart.yaml @@ -0,0 +1,10 @@ +annotations: + catalog.cattle.io/certified: rancher + catalog.cattle.io/hidden: "true" + catalog.cattle.io/namespace: cattle-monitoring-system + catalog.cattle.io/release-name: rancher-monitoring-crd +apiVersion: v2 +description: Installs the CRDs for rancher-monitoring. +name: rancher-monitoring-crd +type: application +version: 104.1.2-rc.1+up57.0.3 diff --git a/charts/rancher-monitoring-crd/104.1.2-rc.1+up57.0.3/README.md b/charts/rancher-monitoring-crd/104.1.2-rc.1+up57.0.3/README.md new file mode 100644 index 0000000000..e0b63e0268 --- /dev/null +++ b/charts/rancher-monitoring-crd/104.1.2-rc.1+up57.0.3/README.md @@ -0,0 +1,24 @@ +# rancher-monitoring-crd +A Rancher chart that installs the CRDs used by rancher-monitoring. + +## How does this chart work? + +This chart marshalls all of the CRD files placed in the `crd-manifest` directory into a ConfigMap that is installed onto a cluster alongside relevant RBAC (ServiceAccount, ClusterRoleBinding, ClusterRole, and PodSecurityPolicy). + +Once the relevant dependent resourcees are installed / upgraded / rolled back, this chart executes a post-install / post-upgrade / post-rollback Job that: +- Patches any existing versions of the CRDs contained within the `crd-manifest` on the cluster to set `spec.preserveUnknownFields=false`; this step is required since, based on [Kubernetes docs](https://kubernetes.io/docs/tasks/extend-kubernetes/custom-resources/custom-resource-definitions/#field-pruning) and a [known workaround](https://github.com/kubernetes-sigs/controller-tools/issues/476#issuecomment-691519936), such CRDs cannot be upgraded normally from `apiextensions.k8s.io/v1beta1` to `apiextensions.k8s.io/v1`. +- Runs a `kubectl apply` on the CRDs that are contained within the crd-manifest ConfigMap to upgrade CRDs in the cluster + +On an uninstall, this chart executes a separate post-delete Job that: +- Patches any existing versions of the CRDs contained within `crd-manifest` on the cluster to set `metadata.finalizers=[]` +- Runs a `kubectl delete` on the CRDs that are contained within the crd-manifest ConfigMap to clean up the CRDs from the cluster + +Note: If the relevant CRDs already existed in the cluster at the time of install, this chart will absorb ownership of the lifecycle of those CRDs; therefore, on a `helm uninstall`, those CRDs will also be removed from the cluster alongside this chart. + +## Why can't we just place the CRDs in the templates/ directory of the main chart? + +In Helm today, you cannot declare a CRD and declare a resource of that CRD's kind in templates/ without encountering a failure on render. + +## [Helm 3] Why can't we just place the CRDs in the crds/ directory of the main chart? + +The Helm 3 `crds/` directory only supports the installation of CRDs, but does not support the upgrade and removal of CRDs, unlike what this chart facilitiates. \ No newline at end of file diff --git a/charts/rancher-monitoring-crd/104.1.2-rc.1+up57.0.3/files/crd-manifest.tgz b/charts/rancher-monitoring-crd/104.1.2-rc.1+up57.0.3/files/crd-manifest.tgz new file mode 100644 index 0000000000..d3fd838e1b Binary files /dev/null and b/charts/rancher-monitoring-crd/104.1.2-rc.1+up57.0.3/files/crd-manifest.tgz differ diff --git a/charts/rancher-monitoring-crd/104.1.2-rc.1+up57.0.3/templates/_helpers.tpl b/charts/rancher-monitoring-crd/104.1.2-rc.1+up57.0.3/templates/_helpers.tpl new file mode 100644 index 0000000000..146bc45a14 --- /dev/null +++ b/charts/rancher-monitoring-crd/104.1.2-rc.1+up57.0.3/templates/_helpers.tpl @@ -0,0 +1,30 @@ +# Rancher + +{{- define "system_default_registry" -}} +{{- if .Values.global.cattle.systemDefaultRegistry -}} +{{- printf "%s/" .Values.global.cattle.systemDefaultRegistry -}} +{{- end -}} +{{- end -}} + +# Windows Support + +{{/* +Windows cluster will add default taint for linux nodes, +add below linux tolerations to workloads could be scheduled to those linux nodes +*/}} + +{{- define "linux-node-tolerations" -}} +- key: "cattle.io/os" + value: "linux" + effect: "NoSchedule" + operator: "Equal" +{{- end -}} + +{{- define "linux-node-selector" -}} +{{- if semverCompare "<1.14-0" .Capabilities.KubeVersion.GitVersion -}} +beta.kubernetes.io/os: linux +{{- else -}} +kubernetes.io/os: linux +{{- end -}} +{{- end -}} + diff --git a/charts/rancher-monitoring-crd/104.1.2-rc.1+up57.0.3/templates/jobs.yaml b/charts/rancher-monitoring-crd/104.1.2-rc.1+up57.0.3/templates/jobs.yaml new file mode 100644 index 0000000000..6955e3b309 --- /dev/null +++ b/charts/rancher-monitoring-crd/104.1.2-rc.1+up57.0.3/templates/jobs.yaml @@ -0,0 +1,102 @@ +apiVersion: batch/v1 +kind: Job +metadata: + name: {{ .Chart.Name }}-create + namespace: {{ .Release.Namespace }} + labels: + app: {{ .Chart.Name }} + annotations: + "helm.sh/hook": post-install, post-upgrade, post-rollback + "helm.sh/hook-delete-policy": before-hook-creation, hook-succeeded, hook-failed +spec: + template: + metadata: + name: {{ .Chart.Name }}-create + labels: + app: {{ .Chart.Name }} + spec: + serviceAccountName: {{ .Chart.Name }}-manager + securityContext: + runAsNonRoot: false + runAsUser: 0 + containers: + - name: create-crds + image: {{ template "system_default_registry" . }}{{ .Values.image.repository }}:{{ .Values.image.tag }} + imagePullPolicy: IfNotPresent + command: + - /bin/sh + - -c + - > + echo "Applying CRDs..."; + mkdir -p /etc/crd; + base64 -d /etc/config/crd-manifest.tgz.b64 | tar -xzv -C /etc/crd; + kubectl replace -Rf /etc/crd || kubectl create -Rf /etc/crd; + echo "Done!" + volumeMounts: + - name: crd-manifest + readOnly: true + mountPath: /etc/config + restartPolicy: OnFailure + nodeSelector: {{ include "linux-node-selector" . | nindent 8 }} + {{- if .Values.nodeSelector }} + {{- toYaml .Values.nodeSelector | nindent 8 }} + {{- end }} + tolerations: {{ include "linux-node-tolerations" . | nindent 8 }} + {{- if .Values.tolerations }} + {{- toYaml .Values.tolerations | nindent 8 }} + {{- end }} + volumes: + - name: crd-manifest + configMap: + name: {{ .Chart.Name }}-manifest +--- +apiVersion: batch/v1 +kind: Job +metadata: + name: {{ .Chart.Name }}-delete + namespace: {{ .Release.Namespace }} + labels: + app: {{ .Chart.Name }} + annotations: + "helm.sh/hook": pre-delete + "helm.sh/hook-delete-policy": before-hook-creation, hook-succeeded, hook-failed +spec: + template: + metadata: + name: {{ .Chart.Name }}-delete + labels: + app: {{ .Chart.Name }} + spec: + serviceAccountName: {{ .Chart.Name }}-manager + securityContext: + runAsNonRoot: false + runAsUser: 0 + containers: + - name: delete-crds + image: {{ template "system_default_registry" . }}{{ .Values.image.repository }}:{{ .Values.image.tag }} + imagePullPolicy: IfNotPresent + command: + - /bin/sh + - -c + - > + echo "Deleting CRDs..."; + mkdir -p /etc/crd; + base64 -d /etc/config/crd-manifest.tgz.b64 | tar -xzv -C /etc/crd; + kubectl delete --ignore-not-found=true -Rf /etc/crd; + volumeMounts: + - name: crd-manifest + readOnly: true + mountPath: /etc/config + restartPolicy: OnFailure + nodeSelector: {{ include "linux-node-selector" . | nindent 8 }} + {{- if .Values.nodeSelector }} + {{- toYaml .Values.nodeSelector | nindent 8 }} + {{- end }} + tolerations: {{ include "linux-node-tolerations" . | nindent 8 }} + {{- if .Values.tolerations }} + {{- toYaml .Values.tolerations | nindent 8 }} + {{- end }} + volumes: + - name: crd-manifest + configMap: + name: {{ .Chart.Name }}-manifest diff --git a/charts/rancher-monitoring-crd/104.1.2-rc.1+up57.0.3/templates/manifest.yaml b/charts/rancher-monitoring-crd/104.1.2-rc.1+up57.0.3/templates/manifest.yaml new file mode 100644 index 0000000000..8dc9dfb447 --- /dev/null +++ b/charts/rancher-monitoring-crd/104.1.2-rc.1+up57.0.3/templates/manifest.yaml @@ -0,0 +1,8 @@ +apiVersion: v1 +kind: ConfigMap +metadata: + name: {{ .Chart.Name }}-manifest + namespace: {{ .Release.Namespace }} +data: + crd-manifest.tgz.b64: + {{- .Files.Get "files/crd-manifest.tgz" | b64enc | indent 4 }} diff --git a/charts/rancher-monitoring-crd/104.1.2-rc.1+up57.0.3/templates/rbac.yaml b/charts/rancher-monitoring-crd/104.1.2-rc.1+up57.0.3/templates/rbac.yaml new file mode 100644 index 0000000000..a4d498b0fa --- /dev/null +++ b/charts/rancher-monitoring-crd/104.1.2-rc.1+up57.0.3/templates/rbac.yaml @@ -0,0 +1,76 @@ +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRole +metadata: + name: {{ .Chart.Name }}-manager + labels: + app: {{ .Chart.Name }}-manager +rules: +- apiGroups: + - apiextensions.k8s.io + resources: + - customresourcedefinitions + verbs: ['create', 'get', 'patch', 'delete', 'update', 'list'] +{{- if .Values.global.cattle.psp.enabled }} +- apiGroups: ['policy'] + resources: ['podsecuritypolicies'] + verbs: ['use'] + resourceNames: + - {{ .Chart.Name }}-manager +{{- end }} +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRoleBinding +metadata: + name: {{ .Chart.Name }}-manager + labels: + app: {{ .Chart.Name }}-manager +roleRef: + apiGroup: rbac.authorization.k8s.io + kind: ClusterRole + name: {{ .Chart.Name }}-manager +subjects: +- kind: ServiceAccount + name: {{ .Chart.Name }}-manager + namespace: {{ .Release.Namespace }} +--- +apiVersion: v1 +kind: ServiceAccount +metadata: + name: {{ .Chart.Name }}-manager + namespace: {{ .Release.Namespace }} + labels: + app: {{ .Chart.Name }}-manager +--- +{{- if .Values.global.cattle.psp.enabled }} +apiVersion: policy/v1beta1 +kind: PodSecurityPolicy +metadata: + name: {{ .Chart.Name }}-manager + namespace: {{ .Release.Namespace }} + labels: + app: {{ .Chart.Name }}-manager +spec: + privileged: false + allowPrivilegeEscalation: false + hostNetwork: false + hostIPC: false + hostPID: false + runAsUser: + rule: 'RunAsAny' + seLinux: + rule: 'RunAsAny' + supplementalGroups: + rule: 'MustRunAs' + ranges: + - min: 1 + max: 65535 + fsGroup: + rule: 'MustRunAs' + ranges: + - min: 1 + max: 65535 + readOnlyRootFilesystem: false + volumes: + - 'configMap' + - 'secret' +{{- end }} diff --git a/charts/rancher-monitoring-crd/104.1.2-rc.1+up57.0.3/templates/validate-psp-install.yaml b/charts/rancher-monitoring-crd/104.1.2-rc.1+up57.0.3/templates/validate-psp-install.yaml new file mode 100644 index 0000000000..a30c59d3b7 --- /dev/null +++ b/charts/rancher-monitoring-crd/104.1.2-rc.1+up57.0.3/templates/validate-psp-install.yaml @@ -0,0 +1,7 @@ +#{{- if gt (len (lookup "rbac.authorization.k8s.io/v1" "ClusterRole" "" "")) 0 -}} +#{{- if .Values.global.cattle.psp.enabled }} +#{{- if not (.Capabilities.APIVersions.Has "policy/v1beta1/PodSecurityPolicy") }} +#{{- fail "The target cluster does not have the PodSecurityPolicy API resource. Please disable PSPs in this chart before proceeding." -}} +#{{- end }} +#{{- end }} +#{{- end }} diff --git a/charts/rancher-monitoring-crd/104.1.2-rc.1+up57.0.3/values.yaml b/charts/rancher-monitoring-crd/104.1.2-rc.1+up57.0.3/values.yaml new file mode 100644 index 0000000000..99e63600c4 --- /dev/null +++ b/charts/rancher-monitoring-crd/104.1.2-rc.1+up57.0.3/values.yaml @@ -0,0 +1,17 @@ +# Default values for rancher-monitoring-crd. +# This is a YAML-formatted file. +# Declare variables to be passed into your templates. + +global: + cattle: + psp: + enabled: false + systemDefaultRegistry: "" + +image: + repository: rancher/shell + tag: v0.2.1 + +nodeSelector: {} + +tolerations: [] diff --git a/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/.editorconfig b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/.editorconfig new file mode 100644 index 0000000000..f5ee2f4610 --- /dev/null +++ b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/.editorconfig @@ -0,0 +1,5 @@ +root = true + +[files/dashboards/*.json] +indent_size = 2 +indent_style = space \ No newline at end of file diff --git a/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/.helmignore b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/.helmignore new file mode 100644 index 0000000000..9bdbec92b4 --- /dev/null +++ b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/.helmignore @@ -0,0 +1,29 @@ +# Patterns to ignore when building packages. +# This supports shell glob matching, relative path matching, and +# negation (prefixed with !). Only one pattern per line. +.DS_Store +# Common VCS dirs +.git/ +.gitignore +.bzr/ +.bzrignore +.hg/ +.hgignore +.svn/ +# Common backup files +*.swp +*.bak +*.tmp +*~ +# Various IDEs +.project +.idea/ +*.tmproj +# helm/charts +OWNERS +hack/ +ci/ +kube-prometheus-*.tgz + +unittests/ +files/dashboards/ diff --git a/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/CHANGELOG.md b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/CHANGELOG.md new file mode 100644 index 0000000000..8178169b91 --- /dev/null +++ b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/CHANGELOG.md @@ -0,0 +1,47 @@ +# Changelog +All notable changes from the upstream Prometheus Operator chart will be added to this file. + +## [Package Version 00] - 2020-07-19 +### Added +- Added [Prometheus Adapter](https://github.com/helm/charts/tree/master/stable/prometheus-adapter) as a dependency to the upstream Prometheus Operator chart to allow users to expose custom metrics from the default Prometheus instance deployed by this chart +- Remove `prometheus-operator/cleanup-crds.yaml` and `prometheus-operator/crds.yaml` from the Prometheus Operator upstream chart in favor of just using the CRD directory to install the CRDs. +- Added support for `rkeControllerManager`, `rkeScheduler`, `rkeProxy`, and `rkeEtcd` PushProx exporters for monitoring k8s components within RKE clusters +- Added support for a `k3sServer` PushProx exporter that monitors k3s server components (`kubeControllerManager`, `kubeScheduler`, and `kubeProxy`) within k3s clusters +- Added support for `kubeAdmControllerManager`, `kubeAdmScheduler`, `kubeAdmProxy`, and `kubeAdmEtcd` PushProx exporters for monitoring k8s components within kubeAdm clusters +- Added support for `rke2ControllerManager`, `rke2Scheduler`, `rke2Proxy`, and `rke2Etcd` PushProx exporters for monitoring k8s components within rke2 clusters +- Exposed `prometheus.prometheusSpec.ignoreNamespaceSelectors` on values.yaml and set it to `false` by default. This value instructs the default Prometheus server deployed with this chart to ignore the `namespaceSelector` field within any created ServiceMonitor or PodMonitor CRs that it selects. This prevents ServiceMonitors and PodMonitors from configuring the Prometheus scrape configuration to monitor resources outside the namespace that they are deployed in; if a user needs to have one ServiceMonitor / PodMonitor monitor resources within several namespaces (such as the resources that are used to monitor Istio in a default installation), they should not enable this option since it would require them to create one ServiceMonitor / PodMonitor CR per namespace that they would like to monitor. Relevant fields were also updated in the default README.md. +- Added `grafana.sidecar.dashboards.searchNamespace` to `values.yaml` with a default value of `cattle-dashboards`. The namespace provided should contain all ConfigMaps with the label `grafana_dashboard` and will be searched by the Grafana Dashboards sidecar for updates. The namespace specified is also created along with this deployment. All default dashboard ConfigMaps have been relocated from the deployment namespace to the namespace specified +- Added `monitoring-admin`, `monitoring-edit`, and `monitoring-view` default `ClusterRoles` to allow admins to assign roles to users to interact with Prometheus Operator CRs. These can be enabled by setting `.Values.global.rbac.userRoles.create` (default: `true`). In a typical RBAC setup, you might want to use a `ClusterRoleBinding` to bind these roles to a Subject to allow them to set up or view `ServiceMonitors` / `PodMonitors` / `PrometheusRules` and view `Prometheus` or `Alertmanager` CRs across the cluster. If `.Values.global.rbac.userRoles.aggregateRolesForRBAC` is enabled, these ClusterRoles will aggregate into the respective default ClusterRoles provided by Kubernetes +- Added `monitoring-config-admin`, `monitoring-config-edit` and `monitoring-config-view` default `Roles` to allow admins to assign roles to users to be able to edit / view `Secrets` and `ConfigMaps` within the `cattle-monitoring-system` namespace. These can be enabled by setting `.Values.global.rbac.userRoles.create` (default: `true`). In a typical RBAC setup, you might want to use a `RoleBinding` to bind these roles to a Subject within the `cattle-monitoring-system` namespace to allow them to modify Secrets / ConfigMaps tied to the deployment, such as your Alertmanager Config Secret. +- Added `monitoring-dashboard-admin`, `monitoring-dashboard-edit` and `monitoring-dashboard-view` default `Roles` to allow admins to assign roles to users to be able to edit / view `ConfigMaps` within the `cattle-dashboards` namespace. These can be enabled by setting `.Values.global.rbac.userRoles.create` (default: `true`) and deploying Grafana as part of this chart. In a typical RBAC setup, you might want to use a `RoleBinding` to bind these roles to a Subject within the `cattle-dashboards` namespace to allow them to create / modify ConfigMaps that contain the JSON used to persist Grafana Dashboards on the cluster. +- Added default resource limits for `Prometheus Operator`, `Prometheus`, `AlertManager`, `Grafana`, `kube-state-metrics`, `node-exporter` +- Added a default template `rancher_defaults.tmpl` to AlertManager that Rancher will offer to users in order to help configure the way alerts are rendered on a notifier. Also updated the default template deployed with this chart to reference that template and added an example of a Slack config using this template as a comment in the `values.yaml`. +- Added support for private registries via introducing a new field for `global.cattle.systemDefaultRegistry` that, if supplied, will automatically be prepended onto every image used by the chart. +- Added a default `nginx` proxy container deployed with Grafana whose config is set in the `ConfigMap` located in `charts/grafana/templates/nginx-config.yaml`. The purpose of this container is to make it possible to view Grafana's UI through a proxy that has a subpath (e.g. Rancher's proxy). This proxy container is set to listen on port `8080` (with a `portName` of `nginx-http` instead of the default `service`), which is also where the Grafana service will now point to, and will forward all requests to the Grafana container listening on the default port `3000`. +- Added a default `nginx` proxy container deployed with Prometheus whose config is set in the `ConfigMap` located in `templates/prometheus/nginx-config.yaml`. The purpose of this container is to make it possible to view Prometheus's UI through a proxy that has a subpath (e.g. Rancher's proxy). This proxy container is set to listen on port `8081` (with a `portName` of `nginx-http` instead of the default `web`), which is also where the Prometheus service will now point to, and will forward all requests to the Prometheus container listening on the default port `9090`. +- Added support for passing CIS Scans in a hardened cluster by introducing a Job that patches the default service account within the `cattle-monitoring-system` and `cattle-dashboards` namespaces on install or upgrade and adding a default allow all `NetworkPolicy` to the `cattle-monitoring-system` and `cattle-dashboards` namespaces. +### Modified +- Updated the chart name from `prometheus-operator` to `rancher-monitoring` and added the `io.rancher.certified: rancher` annotation to `Chart.yaml` +- Modified the default `node-exporter` port from `9100` to `9796` +- Modified the default `nameOverride` to `rancher-monitoring`. This change is necessary as the Prometheus Adapter's default URL (`http://{{ .Values.nameOverride }}-prometheus.{{ .Values.namespaceOverride }}.svc`) is based off of the value used here; if modified, the default Adapter URL must also be modified +- Modified the default `namespaceOverride` to `cattle-monitoring-system`. This change is necessary as the Prometheus Adapter's default URL (`http://{{ .Values.nameOverride }}-prometheus.{{ .Values.namespaceOverride }}.svc`) is based off of the value used here; if modified, the default Adapter URL must also be modified +- Configured some default values for `grafana.service` values and exposed them in the default README.md +- The default namespaces the following ServiceMonitors were changed from the deployment namespace to allow them to continue to monitor metrics when `prometheus.prometheusSpec.ignoreNamespaceSelectors` is enabled: + - `core-dns`: `kube-system` + - `api-server`: `default` + - `kube-controller-manager`: `kube-system` + - `kubelet`: `{{ .Values.kubelet.namespace }}` +- Disabled the following deployments by default (can be enabled if required): + - `AlertManager` + - `kube-controller-manager` metrics exporter + - `kube-etcd` metrics exporter + - `kube-scheduler` metrics exporter + - `kube-proxy` metrics exporter +- Updated default Grafana `deploymentStrategy` to `Recreate` to prevent deployments from being stuck on upgrade if a PV is attached to Grafana +- Modified the default `SelectorNilUsesHelmValues` to default to `false`. As a result, we look for all CRs with any labels in all namespaces by default rather than just the ones tagged with the label `release: rancher-monitoring`. +- Modified the default images used by the `rancher-monitoring` chart to point to Rancher mirrors of the original images from upstream. +- Modified the behavior of the chart to create the Alertmanager Config Secret via a pre-install hook instead of using the normal Helm lifecycle to manage the secret. The benefit of this approach is that all changes to the Config Secret done on a live cluster will never get overridden on a `helm upgrade` since the secret only gets created on a `helm install`. If you would like the secret to be cleaned up on an `helm uninstall`, enable `alertmanager.cleanupOnUninstall`; however, this is disabled by default to prevent the loss of alerting configuration on an uninstall. This secret will never be modified on a `helm upgrade`. +- Modified the default `securityContext` for `Pod` templates across the chart to `{"runAsNonRoot": "true", "runAsUser": "1000"}` and replaced `grafana.rbac.pspUseAppArmor` in favor of `grafana.rbac.pspAnnotations={}` in order to make it possible to deploy this chart on a hardened cluster which does not support Seccomp or AppArmor annotations in PSPs. Users can always choose to specify the annotations they want to use for the PSP directly as part of the values provided. +- Modified `.Values.prometheus.prometheusSpec.containers` to take in a string representing a template that should be rendered by Helm (via `tpl`) instead of allowing a user to provide YAML directly. +- Modified the default Grafana configuration to auto assign users who access Grafana to the Viewer role and enable anonymous access to Grafana dashboards by default. This default works well for a Rancher user who is accessing Grafana via the `kubectl proxy` on the Rancher Dashboard UI since anonymous users who enter via the proxy are authenticated by the k8s API Server, but you can / should modify this behavior if you plan on exposing Grafana in a way that does not require authentication (e.g. as a `NodePort` service). +- Modified the default Grafana configuration to add a default dashboard for Rancher on the Grafana home page. \ No newline at end of file diff --git a/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/CONTRIBUTING.md b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/CONTRIBUTING.md new file mode 100644 index 0000000000..f6ce2a3235 --- /dev/null +++ b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/CONTRIBUTING.md @@ -0,0 +1,12 @@ +# Contributing Guidelines + +## How to contribute to this chart + +1. Fork this repository, develop and test your Chart. +1. Bump the chart version for every change. +1. Ensure PR title has the prefix `[kube-prometheus-stack]` +1. When making changes to rules or dashboards, see the README.md section on how to sync data from upstream repositories +1. Check the `hack/minikube` folder has scripts to set up minikube and components of this chart that will allow all components to be scraped. You can use this configuration when validating your changes. +1. Check for changes of RBAC rules. +1. Check for changes in CRD specs. +1. PR must pass the linter (`helm lint`) diff --git a/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/Chart.yaml b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/Chart.yaml new file mode 100644 index 0000000000..2aa440a1ca --- /dev/null +++ b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/Chart.yaml @@ -0,0 +1,126 @@ +annotations: + artifacthub.io/license: Apache-2.0 + artifacthub.io/links: | + - name: Chart Source + url: https://github.com/prometheus-community/helm-charts + - name: Upstream Project + url: https://github.com/prometheus-operator/kube-prometheus + artifacthub.io/operator: "true" + catalog.cattle.io/auto-install: rancher-monitoring-crd=match + catalog.cattle.io/certified: rancher + catalog.cattle.io/deploys-on-os: windows + catalog.cattle.io/display-name: Monitoring + catalog.cattle.io/kube-version: '>= 1.26.0-0 < 1.31.0-0' + catalog.cattle.io/namespace: cattle-monitoring-system + catalog.cattle.io/permits-os: linux,windows + catalog.cattle.io/provides-gvr: monitoring.coreos.com.prometheus/v1 + catalog.cattle.io/rancher-version: '>= 2.9.0-0 < 2.10.0-0' + catalog.cattle.io/release-name: rancher-monitoring + catalog.cattle.io/requests-cpu: 4500m + catalog.cattle.io/requests-memory: 4000Mi + catalog.cattle.io/type: cluster-tool + catalog.cattle.io/ui-component: monitoring + catalog.cattle.io/upstream-version: 57.0.3 +apiVersion: v2 +appVersion: v0.72.0 +dependencies: +- condition: grafana.enabled + name: grafana + repository: file://./charts/grafana +- condition: hardenedKubelet.enabled + name: hardenedKubelet + repository: file://./charts/hardenedKubelet +- condition: hardenedNodeExporter.enabled + name: hardenedNodeExporter + repository: file://./charts/hardenedNodeExporter +- condition: k3sServer.enabled + name: k3sServer + repository: file://./charts/k3sServer +- condition: kubeStateMetrics.enabled + name: kube-state-metrics + repository: file://./charts/kube-state-metrics +- condition: kubeAdmControllerManager.enabled + name: kubeAdmControllerManager + repository: file://./charts/kubeAdmControllerManager +- condition: kubeAdmEtcd.enabled + name: kubeAdmEtcd + repository: file://./charts/kubeAdmEtcd +- condition: kubeAdmProxy.enabled + name: kubeAdmProxy + repository: file://./charts/kubeAdmProxy +- condition: kubeAdmScheduler.enabled + name: kubeAdmScheduler + repository: file://./charts/kubeAdmScheduler +- condition: prometheus-adapter.enabled + name: prometheus-adapter + repository: file://./charts/prometheus-adapter +- condition: nodeExporter.enabled + name: prometheus-node-exporter + repository: file://./charts/prometheus-node-exporter +- condition: rke2ControllerManager.enabled + name: rke2ControllerManager + repository: file://./charts/rke2ControllerManager +- condition: rke2Etcd.enabled + name: rke2Etcd + repository: file://./charts/rke2Etcd +- condition: rke2IngressNginx.enabled + name: rke2IngressNginx + repository: file://./charts/rke2IngressNginx +- condition: rke2Proxy.enabled + name: rke2Proxy + repository: file://./charts/rke2Proxy +- condition: rke2Scheduler.enabled + name: rke2Scheduler + repository: file://./charts/rke2Scheduler +- condition: rkeControllerManager.enabled + name: rkeControllerManager + repository: file://./charts/rkeControllerManager +- condition: rkeEtcd.enabled + name: rkeEtcd + repository: file://./charts/rkeEtcd +- condition: rkeIngressNginx.enabled + name: rkeIngressNginx + repository: file://./charts/rkeIngressNginx +- condition: rkeProxy.enabled + name: rkeProxy + repository: file://./charts/rkeProxy +- condition: rkeScheduler.enabled + name: rkeScheduler + repository: file://./charts/rkeScheduler +- condition: windowsExporter.enabled + name: windowsExporter + repository: file://./charts/windowsExporter +description: kube-prometheus-stack collects Kubernetes manifests, Grafana dashboards, + and Prometheus rules combined with documentation and scripts to provide easy to + operate end-to-end Kubernetes cluster monitoring with Prometheus using the Prometheus + Operator. +home: https://github.com/prometheus-operator/kube-prometheus +icon: file://assets/logos/rancher-monitoring.png +keywords: +- operator +- prometheus +- kube-prometheus +kubeVersion: '>=1.19.0-0' +maintainers: +- email: andrew@quadcorps.co.uk + name: andrewgkew +- email: gianrubio@gmail.com + name: gianrubio +- email: github.gkarthiks@gmail.com + name: gkarthiks +- email: kube-prometheus-stack@sisti.pt + name: GMartinez-Sisti +- email: github@jkroepke.de + name: jkroepke +- email: scott@r6by.com + name: scottrigby +- email: miroslav.hadzhiev@gmail.com + name: Xtigyro +- email: quentin.bisson@gmail.com + name: QuentinBisson +name: rancher-monitoring +sources: +- https://github.com/prometheus-community/helm-charts +- https://github.com/prometheus-operator/kube-prometheus +type: application +version: 104.1.2-rc.1+up57.0.3 diff --git a/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/README.md b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/README.md new file mode 100644 index 0000000000..9baf58bb16 --- /dev/null +++ b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/README.md @@ -0,0 +1,1080 @@ +# kube-prometheus-stack + +Installs the [kube-prometheus stack](https://github.com/prometheus-operator/kube-prometheus), a collection of Kubernetes manifests, [Grafana](http://grafana.com/) dashboards, and [Prometheus rules](https://prometheus.io/docs/prometheus/latest/configuration/recording_rules/) combined with documentation and scripts to provide easy to operate end-to-end Kubernetes cluster monitoring with [Prometheus](https://prometheus.io/) using the [Prometheus Operator](https://github.com/prometheus-operator/prometheus-operator). + +See the [kube-prometheus](https://github.com/prometheus-operator/kube-prometheus) README for details about components, dashboards, and alerts. + +_Note: This chart was formerly named `prometheus-operator` chart, now renamed to more clearly reflect that it installs the `kube-prometheus` project stack, within which Prometheus Operator is only one component._ + +## Prerequisites + +- Kubernetes 1.19+ +- Helm 3+ + +## Get Helm Repository Info + +```console +helm repo add prometheus-community https://prometheus-community.github.io/helm-charts +helm repo update +``` + +_See [`helm repo`](https://helm.sh/docs/helm/helm_repo/) for command documentation._ + +## Install Helm Chart + +```console +helm install [RELEASE_NAME] prometheus-community/kube-prometheus-stack +``` + +_See [configuration](#configuration) below._ + +_See [helm install](https://helm.sh/docs/helm/helm_install/) for command documentation._ + +## Dependencies + +By default this chart installs additional, dependent charts: + +- [prometheus-community/kube-state-metrics](https://github.com/prometheus-community/helm-charts/tree/main/charts/kube-state-metrics) +- [prometheus-community/prometheus-node-exporter](https://github.com/prometheus-community/helm-charts/tree/main/charts/prometheus-node-exporter) +- [grafana/grafana](https://github.com/grafana/helm-charts/tree/main/charts/grafana) + +To disable dependencies during installation, see [multiple releases](#multiple-releases) below. + +_See [helm dependency](https://helm.sh/docs/helm/helm_dependency/) for command documentation._ + +## Uninstall Helm Chart + +```console +helm uninstall [RELEASE_NAME] +``` + +This removes all the Kubernetes components associated with the chart and deletes the release. + +_See [helm uninstall](https://helm.sh/docs/helm/helm_uninstall/) for command documentation._ + +CRDs created by this chart are not removed by default and should be manually cleaned up: + +```console +kubectl delete crd alertmanagerconfigs.monitoring.coreos.com +kubectl delete crd alertmanagers.monitoring.coreos.com +kubectl delete crd podmonitors.monitoring.coreos.com +kubectl delete crd probes.monitoring.coreos.com +kubectl delete crd prometheusagents.monitoring.coreos.com +kubectl delete crd prometheuses.monitoring.coreos.com +kubectl delete crd prometheusrules.monitoring.coreos.com +kubectl delete crd scrapeconfigs.monitoring.coreos.com +kubectl delete crd servicemonitors.monitoring.coreos.com +kubectl delete crd thanosrulers.monitoring.coreos.com +``` + +## Upgrading Chart + +```console +helm upgrade [RELEASE_NAME] prometheus-community/kube-prometheus-stack +``` + +With Helm v3, CRDs created by this chart are not updated by default and should be manually updated. +Consult also the [Helm Documentation on CRDs](https://helm.sh/docs/chart_best_practices/custom_resource_definitions). + +_See [helm upgrade](https://helm.sh/docs/helm/helm_upgrade/) for command documentation._ + +### Upgrading an existing Release to a new major version + +A major chart version change (like v1.2.3 -> v2.0.0) indicates that there is an incompatible breaking change needing manual actions. + +### From 56.x to 57.x + +This version upgrades Prometheus-Operator to v0.72.0 + +Run these commands to update the CRDs before applying the upgrade. + +```console +kubectl apply --server-side -f https://raw.githubusercontent.com/prometheus-operator/prometheus-operator/v0.72.0/example/prometheus-operator-crd/monitoring.coreos.com_alertmanagerconfigs.yaml +kubectl apply --server-side -f https://raw.githubusercontent.com/prometheus-operator/prometheus-operator/v0.72.0/example/prometheus-operator-crd/monitoring.coreos.com_alertmanagers.yaml +kubectl apply --server-side -f https://raw.githubusercontent.com/prometheus-operator/prometheus-operator/v0.72.0/example/prometheus-operator-crd/monitoring.coreos.com_podmonitors.yaml +kubectl apply --server-side -f https://raw.githubusercontent.com/prometheus-operator/prometheus-operator/v0.72.0/example/prometheus-operator-crd/monitoring.coreos.com_probes.yaml +kubectl apply --server-side -f https://raw.githubusercontent.com/prometheus-operator/prometheus-operator/v0.72.0/example/prometheus-operator-crd/monitoring.coreos.com_prometheusagents.yaml +kubectl apply --server-side -f https://raw.githubusercontent.com/prometheus-operator/prometheus-operator/v0.72.0/example/prometheus-operator-crd/monitoring.coreos.com_prometheuses.yaml +kubectl apply --server-side -f https://raw.githubusercontent.com/prometheus-operator/prometheus-operator/v0.72.0/example/prometheus-operator-crd/monitoring.coreos.com_prometheusrules.yaml +kubectl apply --server-side -f https://raw.githubusercontent.com/prometheus-operator/prometheus-operator/v0.72.0/example/prometheus-operator-crd/monitoring.coreos.com_scrapeconfigs.yaml +kubectl apply --server-side -f https://raw.githubusercontent.com/prometheus-operator/prometheus-operator/v0.72.0/example/prometheus-operator-crd/monitoring.coreos.com_servicemonitors.yaml +kubectl apply --server-side -f https://raw.githubusercontent.com/prometheus-operator/prometheus-operator/v0.72.0/example/prometheus-operator-crd/monitoring.coreos.com_thanosrulers.yaml +``` + +### From 55.x to 56.x + +This version upgrades Prometheus-Operator to v0.71.0, Prometheus to 2.49.1 + +Run these commands to update the CRDs before applying the upgrade. + +```console +kubectl apply --server-side -f https://raw.githubusercontent.com/prometheus-operator/prometheus-operator/v0.71.0/example/prometheus-operator-crd/monitoring.coreos.com_alertmanagerconfigs.yaml +kubectl apply --server-side -f https://raw.githubusercontent.com/prometheus-operator/prometheus-operator/v0.71.0/example/prometheus-operator-crd/monitoring.coreos.com_alertmanagers.yaml +kubectl apply --server-side -f https://raw.githubusercontent.com/prometheus-operator/prometheus-operator/v0.71.0/example/prometheus-operator-crd/monitoring.coreos.com_podmonitors.yaml +kubectl apply --server-side -f https://raw.githubusercontent.com/prometheus-operator/prometheus-operator/v0.71.0/example/prometheus-operator-crd/monitoring.coreos.com_probes.yaml +kubectl apply --server-side -f https://raw.githubusercontent.com/prometheus-operator/prometheus-operator/v0.71.0/example/prometheus-operator-crd/monitoring.coreos.com_prometheusagents.yaml +kubectl apply --server-side -f https://raw.githubusercontent.com/prometheus-operator/prometheus-operator/v0.71.0/example/prometheus-operator-crd/monitoring.coreos.com_prometheuses.yaml +kubectl apply --server-side -f https://raw.githubusercontent.com/prometheus-operator/prometheus-operator/v0.71.0/example/prometheus-operator-crd/monitoring.coreos.com_prometheusrules.yaml +kubectl apply --server-side -f https://raw.githubusercontent.com/prometheus-operator/prometheus-operator/v0.71.0/example/prometheus-operator-crd/monitoring.coreos.com_scrapeconfigs.yaml +kubectl apply --server-side -f https://raw.githubusercontent.com/prometheus-operator/prometheus-operator/v0.71.0/example/prometheus-operator-crd/monitoring.coreos.com_servicemonitors.yaml +kubectl apply --server-side -f https://raw.githubusercontent.com/prometheus-operator/prometheus-operator/v0.71.0/example/prometheus-operator-crd/monitoring.coreos.com_thanosrulers.yaml +``` + +### From 54.x to 55.x + +This version upgrades Prometheus-Operator to v0.70.0 + +Run these commands to update the CRDs before applying the upgrade. + +```console +kubectl apply --server-side -f https://raw.githubusercontent.com/prometheus-operator/prometheus-operator/v0.70.0/example/prometheus-operator-crd/monitoring.coreos.com_alertmanagerconfigs.yaml +kubectl apply --server-side -f https://raw.githubusercontent.com/prometheus-operator/prometheus-operator/v0.70.0/example/prometheus-operator-crd/monitoring.coreos.com_alertmanagers.yaml +kubectl apply --server-side -f https://raw.githubusercontent.com/prometheus-operator/prometheus-operator/v0.70.0/example/prometheus-operator-crd/monitoring.coreos.com_podmonitors.yaml +kubectl apply --server-side -f https://raw.githubusercontent.com/prometheus-operator/prometheus-operator/v0.70.0/example/prometheus-operator-crd/monitoring.coreos.com_probes.yaml +kubectl apply --server-side -f https://raw.githubusercontent.com/prometheus-operator/prometheus-operator/v0.70.0/example/prometheus-operator-crd/monitoring.coreos.com_prometheusagents.yaml +kubectl apply --server-side -f https://raw.githubusercontent.com/prometheus-operator/prometheus-operator/v0.70.0/example/prometheus-operator-crd/monitoring.coreos.com_prometheuses.yaml +kubectl apply --server-side -f https://raw.githubusercontent.com/prometheus-operator/prometheus-operator/v0.70.0/example/prometheus-operator-crd/monitoring.coreos.com_prometheusrules.yaml +kubectl apply --server-side -f https://raw.githubusercontent.com/prometheus-operator/prometheus-operator/v0.70.0/example/prometheus-operator-crd/monitoring.coreos.com_scrapeconfigs.yaml +kubectl apply --server-side -f https://raw.githubusercontent.com/prometheus-operator/prometheus-operator/v0.70.0/example/prometheus-operator-crd/monitoring.coreos.com_servicemonitors.yaml +kubectl apply --server-side -f https://raw.githubusercontent.com/prometheus-operator/prometheus-operator/v0.70.0/example/prometheus-operator-crd/monitoring.coreos.com_thanosrulers.yaml +``` + +### From 53.x to 54.x + +Grafana Helm Chart has bumped to version 7 + +Please note Grafana Helm Chart [changelog](https://github.com/grafana/helm-charts/tree/main/charts/grafana#to-700). + +### From 52.x to 53.x + +This version upgrades Prometheus-Operator to v0.69.1, Prometheus to 2.47.2 + +Run these commands to update the CRDs before applying the upgrade. + +```console +kubectl apply --server-side -f https://raw.githubusercontent.com/prometheus-operator/prometheus-operator/v0.69.1/example/prometheus-operator-crd/monitoring.coreos.com_alertmanagerconfigs.yaml +kubectl apply --server-side -f https://raw.githubusercontent.com/prometheus-operator/prometheus-operator/v0.69.1/example/prometheus-operator-crd/monitoring.coreos.com_alertmanagers.yaml +kubectl apply --server-side -f https://raw.githubusercontent.com/prometheus-operator/prometheus-operator/v0.69.1/example/prometheus-operator-crd/monitoring.coreos.com_podmonitors.yaml +kubectl apply --server-side -f https://raw.githubusercontent.com/prometheus-operator/prometheus-operator/v0.69.1/example/prometheus-operator-crd/monitoring.coreos.com_probes.yaml +kubectl apply --server-side -f https://raw.githubusercontent.com/prometheus-operator/prometheus-operator/v0.69.1/example/prometheus-operator-crd/monitoring.coreos.com_prometheusagents.yaml +kubectl apply --server-side -f https://raw.githubusercontent.com/prometheus-operator/prometheus-operator/v0.69.1/example/prometheus-operator-crd/monitoring.coreos.com_prometheuses.yaml +kubectl apply --server-side -f https://raw.githubusercontent.com/prometheus-operator/prometheus-operator/v0.69.1/example/prometheus-operator-crd/monitoring.coreos.com_prometheusrules.yaml +kubectl apply --server-side -f https://raw.githubusercontent.com/prometheus-operator/prometheus-operator/v0.69.1/example/prometheus-operator-crd/monitoring.coreos.com_scrapeconfigs.yaml +kubectl apply --server-side -f https://raw.githubusercontent.com/prometheus-operator/prometheus-operator/v0.69.1/example/prometheus-operator-crd/monitoring.coreos.com_servicemonitors.yaml +kubectl apply --server-side -f https://raw.githubusercontent.com/prometheus-operator/prometheus-operator/v0.69.1/example/prometheus-operator-crd/monitoring.coreos.com_thanosrulers.yaml +``` + +### From 51.x to 52.x + +This includes the ability to select between using existing secrets or create new secret objects for various thanos config. The defaults have not changed but if you were setting: + +- `thanosRuler.thanosRulerSpec.alertmanagersConfig` or +- `thanosRuler.thanosRulerSpec.objectStorageConfig` or +- `thanosRuler.thanosRulerSpec.queryConfig` or +- `prometheus.prometheusSpec.thanos.objectStorageConfig` + +you will have to need to set `existingSecret` or `secret` based on your requirement + +For instance, the `thanosRuler.thanosRulerSpec.alertmanagersConfig` used to be configured as follow: + +```yaml +thanosRuler: + thanosRulerSpec: + alertmanagersConfig: + alertmanagers: + - api_version: v2 + http_config: + basic_auth: + username: some_user + password: some_pass + static_configs: + - alertmanager.thanos.io + scheme: http + timeout: 10s +``` + +But it now moved to: + +```yaml +thanosRuler: + thanosRulerSpec: + alertmanagersConfig: + secret: + alertmanagers: + - api_version: v2 + http_config: + basic_auth: + username: some_user + password: some_pass + static_configs: + - alertmanager.thanos.io + scheme: http + timeout: 10s +``` + +or the `thanosRuler.thanosRulerSpec.objectStorageConfig` used to be configured as follow: + +```yaml +thanosRuler: + thanosRulerSpec: + objectStorageConfig: + name: existing-secret-not-created-by-this-chart + key: object-storage-configs.yaml +``` + +But it now moved to: + +```yaml +thanosRuler: + thanosRulerSpec: + objectStorageConfig: + existingSecret: + name: existing-secret-not-created-by-this-chart + key: object-storage-configs.yaml +``` + +### From 50.x to 51.x + +This version upgrades Prometheus-Operator to v0.68.0, Prometheus to 2.47.0 and Thanos to v0.32.2 + +Run these commands to update the CRDs before applying the upgrade. + +```console +kubectl apply --server-side -f https://raw.githubusercontent.com/prometheus-operator/prometheus-operator/v0.68.0/example/prometheus-operator-crd/monitoring.coreos.com_alertmanagerconfigs.yaml +kubectl apply --server-side -f https://raw.githubusercontent.com/prometheus-operator/prometheus-operator/v0.68.0/example/prometheus-operator-crd/monitoring.coreos.com_alertmanagers.yaml +kubectl apply --server-side -f https://raw.githubusercontent.com/prometheus-operator/prometheus-operator/v0.68.0/example/prometheus-operator-crd/monitoring.coreos.com_podmonitors.yaml +kubectl apply --server-side -f https://raw.githubusercontent.com/prometheus-operator/prometheus-operator/v0.68.0/example/prometheus-operator-crd/monitoring.coreos.com_probes.yaml +kubectl apply --server-side -f https://raw.githubusercontent.com/prometheus-operator/prometheus-operator/v0.68.0/example/prometheus-operator-crd/monitoring.coreos.com_prometheusagents.yaml +kubectl apply --server-side -f https://raw.githubusercontent.com/prometheus-operator/prometheus-operator/v0.68.0/example/prometheus-operator-crd/monitoring.coreos.com_prometheuses.yaml +kubectl apply --server-side -f https://raw.githubusercontent.com/prometheus-operator/prometheus-operator/v0.68.0/example/prometheus-operator-crd/monitoring.coreos.com_prometheusrules.yaml +kubectl apply --server-side -f https://raw.githubusercontent.com/prometheus-operator/prometheus-operator/v0.68.0/example/prometheus-operator-crd/monitoring.coreos.com_scrapeconfigs.yaml +kubectl apply --server-side -f https://raw.githubusercontent.com/prometheus-operator/prometheus-operator/v0.68.0/example/prometheus-operator-crd/monitoring.coreos.com_servicemonitors.yaml +kubectl apply --server-side -f https://raw.githubusercontent.com/prometheus-operator/prometheus-operator/v0.68.0/example/prometheus-operator-crd/monitoring.coreos.com_thanosrulers.yaml +``` + +### From 49.x to 50.x + +This version requires Kubernetes 1.19+. + +We do not expect any breaking changes in this version. + +### From 48.x to 49.x + +This version upgrades Prometheus-Operator to v0.67.1, 0, Alertmanager to v0.26.0, Prometheus to 2.46.0 and Thanos to v0.32.0 + +Run these commands to update the CRDs before applying the upgrade. + +```console +kubectl apply --server-side -f https://raw.githubusercontent.com/prometheus-operator/prometheus-operator/v0.67.1/example/prometheus-operator-crd/monitoring.coreos.com_alertmanagerconfigs.yaml +kubectl apply --server-side -f https://raw.githubusercontent.com/prometheus-operator/prometheus-operator/v0.67.1/example/prometheus-operator-crd/monitoring.coreos.com_alertmanagers.yaml +kubectl apply --server-side -f https://raw.githubusercontent.com/prometheus-operator/prometheus-operator/v0.67.1/example/prometheus-operator-crd/monitoring.coreos.com_podmonitors.yaml +kubectl apply --server-side -f https://raw.githubusercontent.com/prometheus-operator/prometheus-operator/v0.67.1/example/prometheus-operator-crd/monitoring.coreos.com_probes.yaml +kubectl apply --server-side -f https://raw.githubusercontent.com/prometheus-operator/prometheus-operator/v0.67.1/example/prometheus-operator-crd/monitoring.coreos.com_prometheusagents.yaml +kubectl apply --server-side -f https://raw.githubusercontent.com/prometheus-operator/prometheus-operator/v0.67.1/example/prometheus-operator-crd/monitoring.coreos.com_prometheuses.yaml +kubectl apply --server-side -f https://raw.githubusercontent.com/prometheus-operator/prometheus-operator/v0.67.1/example/prometheus-operator-crd/monitoring.coreos.com_prometheusrules.yaml +kubectl apply --server-side -f https://raw.githubusercontent.com/prometheus-operator/prometheus-operator/v0.67.1/example/prometheus-operator-crd/monitoring.coreos.com_scrapeconfigs.yaml +kubectl apply --server-side -f https://raw.githubusercontent.com/prometheus-operator/prometheus-operator/v0.67.1/example/prometheus-operator-crd/monitoring.coreos.com_servicemonitors.yaml +kubectl apply --server-side -f https://raw.githubusercontent.com/prometheus-operator/prometheus-operator/v0.67.1/example/prometheus-operator-crd/monitoring.coreos.com_thanosrulers.yaml +``` + +### From 47.x to 48.x + +This version moved all CRDs into a dedicated sub-chart. No new CRDs are introduced in this version. +See [#3548](https://github.com/prometheus-community/helm-charts/issues/3548) for more context. + +We do not expect any breaking changes in this version. + +### From 46.x to 47.x + +This version upgrades Prometheus-Operator to v0.66.0 with new CRDs (PrometheusAgent and ScrapeConfig). + +Run these commands to update the CRDs before applying the upgrade. + +```console +kubectl apply --server-side -f https://raw.githubusercontent.com/prometheus-operator/prometheus-operator/v0.66.0/example/prometheus-operator-crd/monitoring.coreos.com_alertmanagerconfigs.yaml +kubectl apply --server-side -f https://raw.githubusercontent.com/prometheus-operator/prometheus-operator/v0.66.0/example/prometheus-operator-crd/monitoring.coreos.com_alertmanagers.yaml +kubectl apply --server-side -f https://raw.githubusercontent.com/prometheus-operator/prometheus-operator/v0.66.0/example/prometheus-operator-crd/monitoring.coreos.com_podmonitors.yaml +kubectl apply --server-side -f https://raw.githubusercontent.com/prometheus-operator/prometheus-operator/v0.66.0/example/prometheus-operator-crd/monitoring.coreos.com_probes.yaml +kubectl apply --server-side -f https://raw.githubusercontent.com/prometheus-operator/prometheus-operator/v0.66.0/example/prometheus-operator-crd/monitoring.coreos.com_prometheusagents.yaml +kubectl apply --server-side -f https://raw.githubusercontent.com/prometheus-operator/prometheus-operator/v0.66.0/example/prometheus-operator-crd/monitoring.coreos.com_prometheuses.yaml +kubectl apply --server-side -f https://raw.githubusercontent.com/prometheus-operator/prometheus-operator/v0.66.0/example/prometheus-operator-crd/monitoring.coreos.com_prometheusrules.yaml +kubectl apply --server-side -f https://raw.githubusercontent.com/prometheus-operator/prometheus-operator/v0.66.0/example/prometheus-operator-crd/monitoring.coreos.com_scrapeconfigs.yaml +kubectl apply --server-side -f https://raw.githubusercontent.com/prometheus-operator/prometheus-operator/v0.66.0/example/prometheus-operator-crd/monitoring.coreos.com_servicemonitors.yaml +kubectl apply --server-side -f https://raw.githubusercontent.com/prometheus-operator/prometheus-operator/v0.66.0/example/prometheus-operator-crd/monitoring.coreos.com_thanosrulers.yaml +``` + +### From 45.x to 46.x + +This version upgrades Prometheus-Operator to v0.65.1 with new CRDs (PrometheusAgent and ScrapeConfig), Prometheus to v2.44.0 and Thanos to v0.31.0. + +Run these commands to update the CRDs before applying the upgrade. + +```console +kubectl apply --server-side -f https://raw.githubusercontent.com/prometheus-operator/prometheus-operator/v0.65.1/example/prometheus-operator-crd/monitoring.coreos.com_alertmanagerconfigs.yaml +kubectl apply --server-side -f https://raw.githubusercontent.com/prometheus-operator/prometheus-operator/v0.65.1/example/prometheus-operator-crd/monitoring.coreos.com_alertmanagers.yaml +kubectl apply --server-side -f https://raw.githubusercontent.com/prometheus-operator/prometheus-operator/v0.65.1/example/prometheus-operator-crd/monitoring.coreos.com_podmonitors.yaml +kubectl apply --server-side -f https://raw.githubusercontent.com/prometheus-operator/prometheus-operator/v0.65.1/example/prometheus-operator-crd/monitoring.coreos.com_probes.yaml +kubectl apply --server-side -f https://raw.githubusercontent.com/prometheus-operator/prometheus-operator/v0.65.1/example/prometheus-operator-crd/monitoring.coreos.com_prometheusagents.yaml +kubectl apply --server-side -f https://raw.githubusercontent.com/prometheus-operator/prometheus-operator/v0.65.1/example/prometheus-operator-crd/monitoring.coreos.com_prometheuses.yaml +kubectl apply --server-side -f https://raw.githubusercontent.com/prometheus-operator/prometheus-operator/v0.65.1/example/prometheus-operator-crd/monitoring.coreos.com_prometheusrules.yaml +kubectl apply --server-side -f https://raw.githubusercontent.com/prometheus-operator/prometheus-operator/v0.65.1/example/prometheus-operator-crd/monitoring.coreos.com_scrapeconfigs.yaml +kubectl apply --server-side -f https://raw.githubusercontent.com/prometheus-operator/prometheus-operator/v0.65.1/example/prometheus-operator-crd/monitoring.coreos.com_servicemonitors.yaml +kubectl apply --server-side -f https://raw.githubusercontent.com/prometheus-operator/prometheus-operator/v0.65.1/example/prometheus-operator-crd/monitoring.coreos.com_thanosrulers.yaml +``` + +### From 44.x to 45.x + +This version upgrades Prometheus-Operator to v0.63.0, Prometheus to v2.42.0 and Thanos to v0.30.2. + +Run these commands to update the CRDs before applying the upgrade. + +```console +kubectl apply --server-side -f https://raw.githubusercontent.com/prometheus-operator/prometheus-operator/v0.63.0/example/prometheus-operator-crd/monitoring.coreos.com_alertmanagerconfigs.yaml +kubectl apply --server-side -f https://raw.githubusercontent.com/prometheus-operator/prometheus-operator/v0.63.0/example/prometheus-operator-crd/monitoring.coreos.com_alertmanagers.yaml +kubectl apply --server-side -f https://raw.githubusercontent.com/prometheus-operator/prometheus-operator/v0.63.0/example/prometheus-operator-crd/monitoring.coreos.com_podmonitors.yaml +kubectl apply --server-side -f https://raw.githubusercontent.com/prometheus-operator/prometheus-operator/v0.63.0/example/prometheus-operator-crd/monitoring.coreos.com_probes.yaml +kubectl apply --server-side -f https://raw.githubusercontent.com/prometheus-operator/prometheus-operator/v0.63.0/example/prometheus-operator-crd/monitoring.coreos.com_prometheuses.yaml +kubectl apply --server-side -f https://raw.githubusercontent.com/prometheus-operator/prometheus-operator/v0.63.0/example/prometheus-operator-crd/monitoring.coreos.com_prometheusrules.yaml +kubectl apply --server-side -f https://raw.githubusercontent.com/prometheus-operator/prometheus-operator/v0.63.0/example/prometheus-operator-crd/monitoring.coreos.com_servicemonitors.yaml +kubectl apply --server-side -f https://raw.githubusercontent.com/prometheus-operator/prometheus-operator/v0.63.0/example/prometheus-operator-crd/monitoring.coreos.com_thanosrulers.yaml +``` + +### From 43.x to 44.x + +This version upgrades Prometheus-Operator to v0.62.0, Prometheus to v2.41.0 and Thanos to v0.30.1. + +Run these commands to update the CRDs before applying the upgrade. + +```console +kubectl apply --server-side -f https://raw.githubusercontent.com/prometheus-operator/prometheus-operator/v0.62.0/example/prometheus-operator-crd/monitoring.coreos.com_alertmanagerconfigs.yaml +kubectl apply --server-side -f https://raw.githubusercontent.com/prometheus-operator/prometheus-operator/v0.62.0/example/prometheus-operator-crd/monitoring.coreos.com_alertmanagers.yaml +kubectl apply --server-side -f https://raw.githubusercontent.com/prometheus-operator/prometheus-operator/v0.62.0/example/prometheus-operator-crd/monitoring.coreos.com_podmonitors.yaml +kubectl apply --server-side -f https://raw.githubusercontent.com/prometheus-operator/prometheus-operator/v0.62.0/example/prometheus-operator-crd/monitoring.coreos.com_probes.yaml +kubectl apply --server-side -f https://raw.githubusercontent.com/prometheus-operator/prometheus-operator/v0.62.0/example/prometheus-operator-crd/monitoring.coreos.com_prometheuses.yaml +kubectl apply --server-side -f https://raw.githubusercontent.com/prometheus-operator/prometheus-operator/v0.62.0/example/prometheus-operator-crd/monitoring.coreos.com_prometheusrules.yaml +kubectl apply --server-side -f https://raw.githubusercontent.com/prometheus-operator/prometheus-operator/v0.62.0/example/prometheus-operator-crd/monitoring.coreos.com_servicemonitors.yaml +kubectl apply --server-side -f https://raw.githubusercontent.com/prometheus-operator/prometheus-operator/v0.62.0/example/prometheus-operator-crd/monitoring.coreos.com_thanosrulers.yaml +``` + +If you have explicitly set `prometheusOperator.admissionWebhooks.failurePolicy`, this value is now always used even when `.prometheusOperator.admissionWebhooks.patch.enabled` is `true` (the default). + +The values for `prometheusOperator.image.tag` & `prometheusOperator.prometheusConfigReloader.image.tag` are now empty by default and the Chart.yaml `appVersion` field is used instead. + +### From 42.x to 43.x + +This version upgrades Prometheus-Operator to v0.61.1, Prometheus to v2.40.5 and Thanos to v0.29.0. + +Run these commands to update the CRDs before applying the upgrade. + +```console +kubectl apply --server-side -f https://raw.githubusercontent.com/prometheus-operator/prometheus-operator/v0.61.1/example/prometheus-operator-crd/monitoring.coreos.com_alertmanagerconfigs.yaml +kubectl apply --server-side -f https://raw.githubusercontent.com/prometheus-operator/prometheus-operator/v0.61.1/example/prometheus-operator-crd/monitoring.coreos.com_alertmanagers.yaml +kubectl apply --server-side -f https://raw.githubusercontent.com/prometheus-operator/prometheus-operator/v0.61.1/example/prometheus-operator-crd/monitoring.coreos.com_podmonitors.yaml +kubectl apply --server-side -f https://raw.githubusercontent.com/prometheus-operator/prometheus-operator/v0.61.1/example/prometheus-operator-crd/monitoring.coreos.com_probes.yaml +kubectl apply --server-side -f https://raw.githubusercontent.com/prometheus-operator/prometheus-operator/v0.61.1/example/prometheus-operator-crd/monitoring.coreos.com_prometheuses.yaml +kubectl apply --server-side -f https://raw.githubusercontent.com/prometheus-operator/prometheus-operator/v0.61.1/example/prometheus-operator-crd/monitoring.coreos.com_prometheusrules.yaml +kubectl apply --server-side -f https://raw.githubusercontent.com/prometheus-operator/prometheus-operator/v0.61.1/example/prometheus-operator-crd/monitoring.coreos.com_servicemonitors.yaml +kubectl apply --server-side -f https://raw.githubusercontent.com/prometheus-operator/prometheus-operator/v0.61.1/example/prometheus-operator-crd/monitoring.coreos.com_thanosrulers.yaml +``` + +### From 41.x to 42.x + +This includes the overridability of container registry for all containers at the global level using `global.imageRegistry` or per container image. The defaults have not changed but if you were using a custom image, you will have to override the registry of said custom container image before you upgrade. + +For instance, the prometheus-config-reloader used to be configured as follow: + +```yaml + image: + repository: quay.io/prometheus-operator/prometheus-config-reloader + tag: v0.60.1 + sha: "" +``` + +But it now moved to: + +```yaml + image: + registry: quay.io + repository: prometheus-operator/prometheus-config-reloader + tag: v0.60.1 + sha: "" +``` + +### From 40.x to 41.x + +This version upgrades Prometheus-Operator to v0.60.1, Prometheus to v2.39.1 and Thanos to v0.28.1. +This version also upgrades the Helm charts of kube-state-metrics to 4.20.2, prometheus-node-exporter to 4.3.0 and Grafana to 6.40.4. + +Run these commands to update the CRDs before applying the upgrade. + +```console +kubectl apply --server-side -f https://raw.githubusercontent.com/prometheus-operator/prometheus-operator/v0.60.1/example/prometheus-operator-crd/monitoring.coreos.com_alertmanagerconfigs.yaml +kubectl apply --server-side -f https://raw.githubusercontent.com/prometheus-operator/prometheus-operator/v0.60.1/example/prometheus-operator-crd/monitoring.coreos.com_alertmanagers.yaml +kubectl apply --server-side -f https://raw.githubusercontent.com/prometheus-operator/prometheus-operator/v0.60.1/example/prometheus-operator-crd/monitoring.coreos.com_podmonitors.yaml +kubectl apply --server-side -f https://raw.githubusercontent.com/prometheus-operator/prometheus-operator/v0.60.1/example/prometheus-operator-crd/monitoring.coreos.com_probes.yaml +kubectl apply --server-side -f https://raw.githubusercontent.com/prometheus-operator/prometheus-operator/v0.60.1/example/prometheus-operator-crd/monitoring.coreos.com_prometheuses.yaml +kubectl apply --server-side -f https://raw.githubusercontent.com/prometheus-operator/prometheus-operator/v0.60.1/example/prometheus-operator-crd/monitoring.coreos.com_prometheusrules.yaml +kubectl apply --server-side -f https://raw.githubusercontent.com/prometheus-operator/prometheus-operator/v0.60.1/example/prometheus-operator-crd/monitoring.coreos.com_servicemonitors.yaml +kubectl apply --server-side -f https://raw.githubusercontent.com/prometheus-operator/prometheus-operator/v0.60.1/example/prometheus-operator-crd/monitoring.coreos.com_thanosrulers.yaml +``` + +This version splits kubeScheduler recording and altering rules in separate config values. +Instead of `defaultRules.rules.kubeScheduler` the 2 new variables `defaultRules.rules.kubeSchedulerAlerting` and `defaultRules.rules.kubeSchedulerRecording` are used. + +### From 39.x to 40.x + +This version upgrades Prometheus-Operator to v0.59.1, Prometheus to v2.38.0, kube-state-metrics to v2.6.0 and Thanos to v0.28.0. +This version also upgrades the Helm charts of kube-state-metrics to 4.18.0 and prometheus-node-exporter to 4.2.0. + +Run these commands to update the CRDs before applying the upgrade. + +```console +kubectl apply --server-side -f https://raw.githubusercontent.com/prometheus-operator/prometheus-operator/v0.59.1/example/prometheus-operator-crd/monitoring.coreos.com_alertmanagerconfigs.yaml +kubectl apply --server-side -f https://raw.githubusercontent.com/prometheus-operator/prometheus-operator/v0.59.1/example/prometheus-operator-crd/monitoring.coreos.com_alertmanagers.yaml +kubectl apply --server-side -f https://raw.githubusercontent.com/prometheus-operator/prometheus-operator/v0.59.1/example/prometheus-operator-crd/monitoring.coreos.com_podmonitors.yaml +kubectl apply --server-side -f https://raw.githubusercontent.com/prometheus-operator/prometheus-operator/v0.59.1/example/prometheus-operator-crd/monitoring.coreos.com_probes.yaml +kubectl apply --server-side -f https://raw.githubusercontent.com/prometheus-operator/prometheus-operator/v0.59.1/example/prometheus-operator-crd/monitoring.coreos.com_prometheuses.yaml +kubectl apply --server-side -f https://raw.githubusercontent.com/prometheus-operator/prometheus-operator/v0.59.1/example/prometheus-operator-crd/monitoring.coreos.com_prometheusrules.yaml +kubectl apply --server-side -f https://raw.githubusercontent.com/prometheus-operator/prometheus-operator/v0.59.1/example/prometheus-operator-crd/monitoring.coreos.com_servicemonitors.yaml +kubectl apply --server-side -f https://raw.githubusercontent.com/prometheus-operator/prometheus-operator/v0.59.1/example/prometheus-operator-crd/monitoring.coreos.com_thanosrulers.yaml +``` + +Starting from prometheus-node-exporter version 4.0.0, the `node exporter` chart is using the [Kubernetes recommended labels](https://kubernetes.io/docs/concepts/overview/working-with-objects/common-labels/). Therefore you have to delete the daemonset before you upgrade. + +```console +kubectl delete daemonset -l app=prometheus-node-exporter +helm upgrade -i kube-prometheus-stack prometheus-community/kube-prometheus-stack +``` + +If you use your own custom [ServiceMonitor](https://github.com/prometheus-operator/prometheus-operator/blob/main/Documentation/api.md#servicemonitor) or [PodMonitor](https://github.com/prometheus-operator/prometheus-operator/blob/main/Documentation/api.md#podmonitor), please ensure to upgrade their `selector` fields accordingly to the new labels. + +### From 38.x to 39.x + +This upgraded prometheus-operator to v0.58.0 and prometheus to v2.37.0 + +Run these commands to update the CRDs before applying the upgrade. + +```console +kubectl apply --server-side -f https://raw.githubusercontent.com/prometheus-operator/prometheus-operator/v0.58.0/example/prometheus-operator-crd/monitoring.coreos.com_alertmanagerconfigs.yaml +kubectl apply --server-side -f https://raw.githubusercontent.com/prometheus-operator/prometheus-operator/v0.58.0/example/prometheus-operator-crd/monitoring.coreos.com_alertmanagers.yaml +kubectl apply --server-side -f https://raw.githubusercontent.com/prometheus-operator/prometheus-operator/v0.58.0/example/prometheus-operator-crd/monitoring.coreos.com_podmonitors.yaml +kubectl apply --server-side -f https://raw.githubusercontent.com/prometheus-operator/prometheus-operator/v0.58.0/example/prometheus-operator-crd/monitoring.coreos.com_probes.yaml +kubectl apply --server-side -f https://raw.githubusercontent.com/prometheus-operator/prometheus-operator/v0.58.0/example/prometheus-operator-crd/monitoring.coreos.com_prometheuses.yaml +kubectl apply --server-side -f https://raw.githubusercontent.com/prometheus-operator/prometheus-operator/v0.58.0/example/prometheus-operator-crd/monitoring.coreos.com_prometheusrules.yaml +kubectl apply --server-side -f https://raw.githubusercontent.com/prometheus-operator/prometheus-operator/v0.58.0/example/prometheus-operator-crd/monitoring.coreos.com_servicemonitors.yaml +kubectl apply --server-side -f https://raw.githubusercontent.com/prometheus-operator/prometheus-operator/v0.58.0/example/prometheus-operator-crd/monitoring.coreos.com_thanosrulers.yaml +``` + +### From 37.x to 38.x + +Reverted one of the default metrics relabelings for cAdvisor added in 36.x, due to it breaking container_network_* and various other statistics. If you do not want this change, you will need to override the `kubelet.cAdvisorMetricRelabelings`. + +### From 36.x to 37.x + +This includes some default metric relabelings for cAdvisor and apiserver metrics to reduce cardinality. If you do not want these defaults, you will need to override the `kubeApiServer.metricRelabelings` and or `kubelet.cAdvisorMetricRelabelings`. + +### From 35.x to 36.x + +This upgraded prometheus-operator to v0.57.0 and prometheus to v2.36.1 + +Run these commands to update the CRDs before applying the upgrade. + +```console +kubectl apply --server-side -f https://raw.githubusercontent.com/prometheus-operator/prometheus-operator/v0.57.0/example/prometheus-operator-crd/monitoring.coreos.com_alertmanagerconfigs.yaml +kubectl apply --server-side -f https://raw.githubusercontent.com/prometheus-operator/prometheus-operator/v0.57.0/example/prometheus-operator-crd/monitoring.coreos.com_alertmanagers.yaml +kubectl apply --server-side -f https://raw.githubusercontent.com/prometheus-operator/prometheus-operator/v0.57.0/example/prometheus-operator-crd/monitoring.coreos.com_podmonitors.yaml +kubectl apply --server-side -f https://raw.githubusercontent.com/prometheus-operator/prometheus-operator/v0.57.0/example/prometheus-operator-crd/monitoring.coreos.com_probes.yaml +kubectl apply --server-side -f https://raw.githubusercontent.com/prometheus-operator/prometheus-operator/v0.57.0/example/prometheus-operator-crd/monitoring.coreos.com_prometheuses.yaml +kubectl apply --server-side -f https://raw.githubusercontent.com/prometheus-operator/prometheus-operator/v0.57.0/example/prometheus-operator-crd/monitoring.coreos.com_prometheusrules.yaml +kubectl apply --server-side -f https://raw.githubusercontent.com/prometheus-operator/prometheus-operator/v0.57.0/example/prometheus-operator-crd/monitoring.coreos.com_servicemonitors.yaml +kubectl apply --server-side -f https://raw.githubusercontent.com/prometheus-operator/prometheus-operator/v0.57.0/example/prometheus-operator-crd/monitoring.coreos.com_thanosrulers.yaml +``` + +### From 34.x to 35.x + +This upgraded prometheus-operator to v0.56.0 and prometheus to v2.35.0 + +Run these commands to update the CRDs before applying the upgrade. + +```console +kubectl apply --server-side -f https://raw.githubusercontent.com/prometheus-operator/prometheus-operator/v0.56.0/example/prometheus-operator-crd/monitoring.coreos.com_alertmanagerconfigs.yaml +kubectl apply --server-side -f https://raw.githubusercontent.com/prometheus-operator/prometheus-operator/v0.56.0/example/prometheus-operator-crd/monitoring.coreos.com_alertmanagers.yaml +kubectl apply --server-side -f https://raw.githubusercontent.com/prometheus-operator/prometheus-operator/v0.56.0/example/prometheus-operator-crd/monitoring.coreos.com_podmonitors.yaml +kubectl apply --server-side -f https://raw.githubusercontent.com/prometheus-operator/prometheus-operator/v0.56.0/example/prometheus-operator-crd/monitoring.coreos.com_probes.yaml +kubectl apply --server-side -f https://raw.githubusercontent.com/prometheus-operator/prometheus-operator/v0.56.0/example/prometheus-operator-crd/monitoring.coreos.com_prometheuses.yaml +kubectl apply --server-side -f https://raw.githubusercontent.com/prometheus-operator/prometheus-operator/v0.56.0/example/prometheus-operator-crd/monitoring.coreos.com_prometheusrules.yaml +kubectl apply --server-side -f https://raw.githubusercontent.com/prometheus-operator/prometheus-operator/v0.56.0/example/prometheus-operator-crd/monitoring.coreos.com_servicemonitors.yaml +kubectl apply --server-side -f https://raw.githubusercontent.com/prometheus-operator/prometheus-operator/v0.56.0/example/prometheus-operator-crd/monitoring.coreos.com_thanosrulers.yaml +``` + +### From 33.x to 34.x + +This upgrades to prometheus-operator to v0.55.0 and prometheus to v2.33.5. + +Run these commands to update the CRDs before applying the upgrade. + +```console +kubectl apply --server-side -f https://raw.githubusercontent.com/prometheus-operator/prometheus-operator/v0.55.0/example/prometheus-operator-crd/monitoring.coreos.com_alertmanagerconfigs.yaml +kubectl apply --server-side -f https://raw.githubusercontent.com/prometheus-operator/prometheus-operator/v0.55.0/example/prometheus-operator-crd/monitoring.coreos.com_alertmanagers.yaml +kubectl apply --server-side -f https://raw.githubusercontent.com/prometheus-operator/prometheus-operator/v0.55.0/example/prometheus-operator-crd/monitoring.coreos.com_podmonitors.yaml +kubectl apply --server-side -f https://raw.githubusercontent.com/prometheus-operator/prometheus-operator/v0.55.0/example/prometheus-operator-crd/monitoring.coreos.com_probes.yaml +kubectl apply --server-side -f https://raw.githubusercontent.com/prometheus-operator/prometheus-operator/v0.55.0/example/prometheus-operator-crd/monitoring.coreos.com_prometheuses.yaml +kubectl apply --server-side -f https://raw.githubusercontent.com/prometheus-operator/prometheus-operator/v0.55.0/example/prometheus-operator-crd/monitoring.coreos.com_prometheusrules.yaml +kubectl apply --server-side -f https://raw.githubusercontent.com/prometheus-operator/prometheus-operator/v0.55.0/example/prometheus-operator-crd/monitoring.coreos.com_servicemonitors.yaml +kubectl apply --server-side -f https://raw.githubusercontent.com/prometheus-operator/prometheus-operator/v0.55.0/example/prometheus-operator-crd/monitoring.coreos.com_thanosrulers.yaml +``` + +### From 32.x to 33.x + +This upgrades the prometheus-node-exporter Chart to v3.0.0. Please review the changes to this subchart if you make customizations to hostMountPropagation. + +### From 31.x to 32.x + +This upgrades to prometheus-operator to v0.54.0 and prometheus to v2.33.1. It also changes the default for `grafana.serviceMonitor.enabled` to `true. + +Run these commands to update the CRDs before applying the upgrade. + +```console +kubectl apply --server-side -f https://raw.githubusercontent.com/prometheus-operator/prometheus-operator/v0.54.0/example/prometheus-operator-crd/monitoring.coreos.com_alertmanagerconfigs.yaml +kubectl apply --server-side -f https://raw.githubusercontent.com/prometheus-operator/prometheus-operator/v0.54.0/example/prometheus-operator-crd/monitoring.coreos.com_alertmanagers.yaml +kubectl apply --server-side -f https://raw.githubusercontent.com/prometheus-operator/prometheus-operator/v0.54.0/example/prometheus-operator-crd/monitoring.coreos.com_podmonitors.yaml +kubectl apply --server-side -f https://raw.githubusercontent.com/prometheus-operator/prometheus-operator/v0.54.0/example/prometheus-operator-crd/monitoring.coreos.com_probes.yaml +kubectl apply --server-side -f https://raw.githubusercontent.com/prometheus-operator/prometheus-operator/v0.54.0/example/prometheus-operator-crd/monitoring.coreos.com_prometheuses.yaml +kubectl apply --server-side -f https://raw.githubusercontent.com/prometheus-operator/prometheus-operator/v0.54.0/example/prometheus-operator-crd/monitoring.coreos.com_prometheusrules.yaml +kubectl apply --server-side -f https://raw.githubusercontent.com/prometheus-operator/prometheus-operator/v0.54.0/example/prometheus-operator-crd/monitoring.coreos.com_servicemonitors.yaml +kubectl apply --server-side -f https://raw.githubusercontent.com/prometheus-operator/prometheus-operator/v0.54.0/example/prometheus-operator-crd/monitoring.coreos.com_thanosrulers.yaml +``` + +### From 30.x to 31.x + +This version removes the built-in grafana ServiceMonitor and instead relies on the ServiceMonitor of the sub-chart. +`grafana.serviceMonitor.enabled` must be set instead of `grafana.serviceMonitor.selfMonitor` and the old ServiceMonitor may +need to be manually cleaned up after deploying the new release. + +### From 29.x to 30.x + +This version updates kube-state-metrics to 4.3.0 and uses the new option `kube-state-metrics.releaseLabel=true` which adds the "release" label to kube-state-metrics labels, making scraping of the metrics by kube-prometheus-stack work out of the box again, independent of the used kube-prometheus-stack release name. If you already set the "release" label via `kube-state-metrics.customLabels` you might have to remove that and use it via the new option. + +### From 28.x to 29.x + +This version makes scraping port for kube-controller-manager and kube-scheduler dynamic to reflect changes to default serving ports +for those components in Kubernetes versions v1.22 and v1.23 respectively. + +If you deploy on clusters using version v1.22+, kube-controller-manager will be scraped over HTTPS on port 10257. + +If you deploy on clusters running version v1.23+, kube-scheduler will be scraped over HTTPS on port 10259. + +### From 27.x to 28.x + +This version disables PodSecurityPolicies by default because they are deprecated in Kubernetes 1.21 and will be removed in Kubernetes 1.25. + +If you are using PodSecurityPolicies you can enable the previous behaviour by setting `kube-state-metrics.podSecurityPolicy.enabled`, `prometheus-node-exporter.rbac.pspEnabled`, `grafana.rbac.pspEnabled` and `global.rbac.pspEnabled` to `true`. + +### From 26.x to 27.x + +This version splits prometheus-node-exporter chart recording and altering rules in separate config values. +Instead of `defaultRules.rules.node` the 2 new variables `defaultRules.rules.nodeExporterAlerting` and `defaultRules.rules.nodeExporterRecording` are used. + +Also the following defaultRules.rules has been removed as they had no effect: `kubeApiserverError`, `kubePrometheusNodeAlerting`, `kubernetesAbsent`, `time`. + +The ability to set a rubookUrl via `defaultRules.rules.rubookUrl` was reintroduced. + +### From 25.x to 26.x + +This version enables the prometheus-node-exporter subchart servicemonitor by default again, by setting `prometheus-node-exporter.prometheus.monitor.enabled` to `true`. + +### From 24.x to 25.x + +This version upgrade to prometheus-operator v0.53.1. It removes support for setting a runbookUrl, since the upstream format for runbooks changed. + +```console +kubectl apply --server-side -f https://raw.githubusercontent.com/prometheus-operator/prometheus-operator/v0.53.1/example/prometheus-operator-crd/monitoring.coreos.com_alertmanagerconfigs.yaml +kubectl apply --server-side -f https://raw.githubusercontent.com/prometheus-operator/prometheus-operator/v0.53.1/example/prometheus-operator-crd/monitoring.coreos.com_alertmanagers.yaml +kubectl apply --server-side -f https://raw.githubusercontent.com/prometheus-operator/prometheus-operator/v0.53.1/example/prometheus-operator-crd/monitoring.coreos.com_podmonitors.yaml +kubectl apply --server-side -f https://raw.githubusercontent.com/prometheus-operator/prometheus-operator/v0.53.1/example/prometheus-operator-crd/monitoring.coreos.com_probes.yaml +kubectl apply --server-side -f https://raw.githubusercontent.com/prometheus-operator/prometheus-operator/v0.53.1/example/prometheus-operator-crd/monitoring.coreos.com_prometheuses.yaml +kubectl apply --server-side -f https://raw.githubusercontent.com/prometheus-operator/prometheus-operator/v0.53.1/example/prometheus-operator-crd/monitoring.coreos.com_prometheusrules.yaml +kubectl apply --server-side -f https://raw.githubusercontent.com/prometheus-operator/prometheus-operator/v0.53.1/example/prometheus-operator-crd/monitoring.coreos.com_servicemonitors.yaml +kubectl apply --server-side -f https://raw.githubusercontent.com/prometheus-operator/prometheus-operator/v0.53.1/example/prometheus-operator-crd/monitoring.coreos.com_thanosrulers.yaml +``` + +### From 23.x to 24.x + +The custom `ServiceMonitor` for the _kube-state-metrics_ & _prometheus-node-exporter_ charts have been removed in favour of the built-in sub-chart `ServiceMonitor`; for both sub-charts this means that `ServiceMonitor` customisations happen via the values passed to the chart. If you haven't directly customised this behaviour then there are no changes required to upgrade, but if you have please read the following. + +For _kube-state-metrics_ the `ServiceMonitor` customisation is now set via `kube-state-metrics.prometheus.monitor` and the `kubeStateMetrics.serviceMonitor.selfMonitor.enabled` value has moved to `kube-state-metrics.selfMonitor.enabled`. + +For _prometheus-node-exporter_ the `ServiceMonitor` customisation is now set via `prometheus-node-exporter.prometheus.monitor` and the `nodeExporter.jobLabel` values has moved to `prometheus-node-exporter.prometheus.monitor.jobLabel`. + +### From 22.x to 23.x + +Port names have been renamed for Istio's +[explicit protocol selection](https://istio.io/latest/docs/ops/configuration/traffic-management/protocol-selection/#explicit-protocol-selection). + +| | old value | new value | +|-|-----------|-----------| +| `alertmanager.alertmanagerSpec.portName` | `web` | `http-web` | +| `grafana.service.portName` | `service` | `http-web` | +| `prometheus-node-exporter.service.portName` | `metrics` (hardcoded) | `http-metrics` | +| `prometheus.prometheusSpec.portName` | `web` | `http-web` | + +### From 21.x to 22.x + +Due to the upgrade of the `kube-state-metrics` chart, removal of its deployment/stateful needs to done manually prior to upgrading: + +```console +kubectl delete deployments.apps -l app.kubernetes.io/instance=prometheus-operator,app.kubernetes.io/name=kube-state-metrics --cascade=orphan +``` + +or if you use autosharding: + +```console +kubectl delete statefulsets.apps -l app.kubernetes.io/instance=prometheus-operator,app.kubernetes.io/name=kube-state-metrics --cascade=orphan +``` + +### From 20.x to 21.x + +The config reloader values have been refactored. All the values have been moved to the key `prometheusConfigReloader` and the limits and requests can now be set separately. + +### From 19.x to 20.x + +Version 20 upgrades prometheus-operator from 0.50.x to 0.52.x. Helm does not automatically upgrade or install new CRDs on a chart upgrade, so you have to install the CRDs manually before updating: + +```console +kubectl apply -f https://raw.githubusercontent.com/prometheus-operator/prometheus-operator/v0.52.0/example/prometheus-operator-crd/monitoring.coreos.com_alertmanagerconfigs.yaml +kubectl apply -f https://raw.githubusercontent.com/prometheus-operator/prometheus-operator/v0.52.0/example/prometheus-operator-crd/monitoring.coreos.com_alertmanagers.yaml +kubectl apply -f https://raw.githubusercontent.com/prometheus-operator/prometheus-operator/v0.52.0/example/prometheus-operator-crd/monitoring.coreos.com_podmonitors.yaml +kubectl apply -f https://raw.githubusercontent.com/prometheus-operator/prometheus-operator/v0.52.0/example/prometheus-operator-crd/monitoring.coreos.com_probes.yaml +kubectl apply -f https://raw.githubusercontent.com/prometheus-operator/prometheus-operator/v0.52.0/example/prometheus-operator-crd/monitoring.coreos.com_prometheuses.yaml +kubectl apply -f https://raw.githubusercontent.com/prometheus-operator/prometheus-operator/v0.52.0/example/prometheus-operator-crd/monitoring.coreos.com_prometheusrules.yaml +kubectl apply -f https://raw.githubusercontent.com/prometheus-operator/prometheus-operator/v0.52.0/example/prometheus-operator-crd/monitoring.coreos.com_servicemonitors.yaml +kubectl apply -f https://raw.githubusercontent.com/prometheus-operator/prometheus-operator/v0.52.0/example/prometheus-operator-crd/monitoring.coreos.com_thanosrulers.yaml +``` + +### From 18.x to 19.x + +`kubeStateMetrics.serviceMonitor.namespaceOverride` was removed. +Please use `kube-state-metrics.namespaceOverride` instead. + +### From 17.x to 18.x + +Version 18 upgrades prometheus-operator from 0.49.x to 0.50.x. Helm does not automatically upgrade or install new CRDs on a chart upgrade, so you have to install the CRDs manually before updating: + +```console +kubectl apply -f https://raw.githubusercontent.com/prometheus-operator/prometheus-operator/v0.50.0/example/prometheus-operator-crd/monitoring.coreos.com_alertmanagerconfigs.yaml +kubectl apply -f https://raw.githubusercontent.com/prometheus-operator/prometheus-operator/v0.50.0/example/prometheus-operator-crd/monitoring.coreos.com_alertmanagers.yaml +kubectl apply -f https://raw.githubusercontent.com/prometheus-operator/prometheus-operator/v0.50.0/example/prometheus-operator-crd/monitoring.coreos.com_podmonitors.yaml +kubectl apply -f https://raw.githubusercontent.com/prometheus-operator/prometheus-operator/v0.50.0/example/prometheus-operator-crd/monitoring.coreos.com_probes.yaml +kubectl apply -f https://raw.githubusercontent.com/prometheus-operator/prometheus-operator/v0.50.0/example/prometheus-operator-crd/monitoring.coreos.com_prometheuses.yaml +kubectl apply -f https://raw.githubusercontent.com/prometheus-operator/prometheus-operator/v0.50.0/example/prometheus-operator-crd/monitoring.coreos.com_prometheusrules.yaml +kubectl apply -f https://raw.githubusercontent.com/prometheus-operator/prometheus-operator/v0.50.0/example/prometheus-operator-crd/monitoring.coreos.com_servicemonitors.yaml +kubectl apply -f https://raw.githubusercontent.com/prometheus-operator/prometheus-operator/v0.50.0/example/prometheus-operator-crd/monitoring.coreos.com_thanosrulers.yaml +``` + +### From 16.x to 17.x + +Version 17 upgrades prometheus-operator from 0.48.x to 0.49.x. Helm does not automatically upgrade or install new CRDs on a chart upgrade, so you have to install the CRDs manually before updating: + +```console +kubectl apply -f https://raw.githubusercontent.com/prometheus-operator/prometheus-operator/v0.49.0/example/prometheus-operator-crd/monitoring.coreos.com_alertmanagerconfigs.yaml +kubectl apply -f https://raw.githubusercontent.com/prometheus-operator/prometheus-operator/v0.49.0/example/prometheus-operator-crd/monitoring.coreos.com_alertmanagers.yaml +kubectl apply -f https://raw.githubusercontent.com/prometheus-operator/prometheus-operator/v0.49.0/example/prometheus-operator-crd/monitoring.coreos.com_podmonitors.yaml +kubectl apply -f https://raw.githubusercontent.com/prometheus-operator/prometheus-operator/v0.49.0/example/prometheus-operator-crd/monitoring.coreos.com_probes.yaml +kubectl apply -f https://raw.githubusercontent.com/prometheus-operator/prometheus-operator/v0.49.0/example/prometheus-operator-crd/monitoring.coreos.com_prometheuses.yaml +kubectl apply -f https://raw.githubusercontent.com/prometheus-operator/prometheus-operator/v0.49.0/example/prometheus-operator-crd/monitoring.coreos.com_prometheusrules.yaml +kubectl apply -f https://raw.githubusercontent.com/prometheus-operator/prometheus-operator/v0.49.0/example/prometheus-operator-crd/monitoring.coreos.com_servicemonitors.yaml +kubectl apply -f https://raw.githubusercontent.com/prometheus-operator/prometheus-operator/v0.49.0/example/prometheus-operator-crd/monitoring.coreos.com_thanosrulers.yaml +``` + +### From 15.x to 16.x + +Version 16 upgrades kube-state-metrics to v2.0.0. This includes changed command-line arguments and removed metrics, see this [blog post](https://kubernetes.io/blog/2021/04/13/kube-state-metrics-v-2-0/). This version also removes Grafana dashboards that supported Kubernetes 1.14 or earlier. + +### From 14.x to 15.x + +Version 15 upgrades prometheus-operator from 0.46.x to 0.47.x. Helm does not automatically upgrade or install new CRDs on a chart upgrade, so you have to install the CRDs manually before updating: + +```console +kubectl apply -f https://raw.githubusercontent.com/prometheus-operator/prometheus-operator/v0.47.0/example/prometheus-operator-crd/monitoring.coreos.com_alertmanagerconfigs.yaml +kubectl apply -f https://raw.githubusercontent.com/prometheus-operator/prometheus-operator/v0.47.0/example/prometheus-operator-crd/monitoring.coreos.com_alertmanagers.yaml +kubectl apply -f https://raw.githubusercontent.com/prometheus-operator/prometheus-operator/v0.47.0/example/prometheus-operator-crd/monitoring.coreos.com_podmonitors.yaml +kubectl apply -f https://raw.githubusercontent.com/prometheus-operator/prometheus-operator/v0.47.0/example/prometheus-operator-crd/monitoring.coreos.com_probes.yaml +kubectl apply -f https://raw.githubusercontent.com/prometheus-operator/prometheus-operator/v0.47.0/example/prometheus-operator-crd/monitoring.coreos.com_prometheuses.yaml +kubectl apply -f https://raw.githubusercontent.com/prometheus-operator/prometheus-operator/v0.47.0/example/prometheus-operator-crd/monitoring.coreos.com_servicemonitors.yaml +kubectl apply -f https://raw.githubusercontent.com/prometheus-operator/prometheus-operator/v0.47.0/example/prometheus-operator-crd/monitoring.coreos.com_thanosrulers.yaml +``` + +### From 13.x to 14.x + +Version 14 upgrades prometheus-operator from 0.45.x to 0.46.x. Helm does not automatically upgrade or install new CRDs on a chart upgrade, so you have to install the CRDs manually before updating: + +```console +kubectl apply -f https://raw.githubusercontent.com/prometheus-operator/prometheus-operator/v0.46.0/example/prometheus-operator-crd/monitoring.coreos.com_alertmanagerconfigs.yaml +kubectl apply -f https://raw.githubusercontent.com/prometheus-operator/prometheus-operator/v0.46.0/example/prometheus-operator-crd/monitoring.coreos.com_alertmanagers.yaml +kubectl apply -f https://raw.githubusercontent.com/prometheus-operator/prometheus-operator/v0.46.0/example/prometheus-operator-crd/monitoring.coreos.com_podmonitors.yaml +kubectl apply -f https://raw.githubusercontent.com/prometheus-operator/prometheus-operator/v0.46.0/example/prometheus-operator-crd/monitoring.coreos.com_probes.yaml +kubectl apply -f https://raw.githubusercontent.com/prometheus-operator/prometheus-operator/v0.46.0/example/prometheus-operator-crd/monitoring.coreos.com_prometheuses.yaml +kubectl apply -f https://raw.githubusercontent.com/prometheus-operator/prometheus-operator/v0.46.0/example/prometheus-operator-crd/monitoring.coreos.com_servicemonitors.yaml +kubectl apply -f https://raw.githubusercontent.com/prometheus-operator/prometheus-operator/v0.46.0/example/prometheus-operator-crd/monitoring.coreos.com_thanosrulers.yaml +``` + +### From 12.x to 13.x + +Version 13 upgrades prometheus-operator from 0.44.x to 0.45.x. Helm does not automatically upgrade or install new CRDs on a chart upgrade, so you have to install the CRD manually before updating: + +```console +kubectl apply -f https://raw.githubusercontent.com/prometheus-operator/prometheus-operator/v0.45.0/example/prometheus-operator-crd/monitoring.coreos.com_alertmanagerconfigs.yaml +kubectl apply -f https://raw.githubusercontent.com/prometheus-operator/prometheus-operator/v0.45.0/example/prometheus-operator-crd/monitoring.coreos.com_prometheuses.yaml +kubectl apply -f https://raw.githubusercontent.com/prometheus-operator/prometheus-operator/v0.45.0/example/prometheus-operator-crd/monitoring.coreos.com_alertmanagers.yaml +``` + +### From 11.x to 12.x + +Version 12 upgrades prometheus-operator from 0.43.x to 0.44.x. Helm does not automatically upgrade or install new CRDs on a chart upgrade, so you have to install the CRD manually before updating: + +```console +kubectl apply -f https://raw.githubusercontent.com/prometheus-operator/prometheus-operator/release-0.44/example/prometheus-operator-crd/monitoring.coreos.com_prometheuses.yaml +``` + +The chart was migrated to support only helm v3 and later. + +### From 10.x to 11.x + +Version 11 upgrades prometheus-operator from 0.42.x to 0.43.x. Starting with 0.43.x an additional `AlertmanagerConfigs` CRD is introduced. Helm does not automatically upgrade or install new CRDs on a chart upgrade, so you have to install the CRD manually before updating: + +```console +kubectl apply -f https://raw.githubusercontent.com/prometheus-operator/prometheus-operator/release-0.43/example/prometheus-operator-crd/monitoring.coreos.com_alertmanagerconfigs.yaml +``` + +Version 11 removes the deprecated tlsProxy via ghostunnel in favor of native TLS support the prometheus-operator gained with v0.39.0. + +### From 9.x to 10.x + +Version 10 upgrades prometheus-operator from 0.38.x to 0.42.x. Starting with 0.40.x an additional `Probes` CRD is introduced. Helm does not automatically upgrade or install new CRDs on a chart upgrade, so you have to install the CRD manually before updating: + +```console +kubectl apply -f https://raw.githubusercontent.com/prometheus-operator/prometheus-operator/release-0.42/example/prometheus-operator-crd/monitoring.coreos.com_probes.yaml +``` + +### From 8.x to 9.x + +Version 9 of the helm chart removes the existing `additionalScrapeConfigsExternal` in favour of `additionalScrapeConfigsSecret`. This change lets users specify the secret name and secret key to use for the additional scrape configuration of prometheus. This is useful for users that have prometheus-operator as a subchart and also have a template that creates the additional scrape configuration. + +### From 7.x to 8.x + +Due to new template functions being used in the rules in version 8.x.x of the chart, an upgrade to Prometheus Operator and Prometheus is necessary in order to support them. First, upgrade to the latest version of 7.x.x + +```console +helm upgrade [RELEASE_NAME] prometheus-community/kube-prometheus-stack --version 7.5.0 +``` + +Then upgrade to 8.x.x + +```console +helm upgrade [RELEASE_NAME] prometheus-community/kube-prometheus-stack --version [8.x.x] +``` + +Minimal recommended Prometheus version for this chart release is `2.12.x` + +### From 6.x to 7.x + +Due to a change in grafana subchart, version 7.x.x now requires Helm >= 2.12.0. + +### From 5.x to 6.x + +Due to a change in deployment labels of kube-state-metrics, the upgrade requires `helm upgrade --force` in order to re-create the deployment. If this is not done an error will occur indicating that the deployment cannot be modified: + +```console +invalid: spec.selector: Invalid value: v1.LabelSelector{MatchLabels:map[string]string{"app.kubernetes.io/name":"kube-state-metrics"}, MatchExpressions:[]v1.LabelSelectorRequirement(nil)}: field is immutable +``` + +If this error has already been encountered, a `helm history` command can be used to determine which release has worked, then `helm rollback` to the release, then `helm upgrade --force` to this new one + +## Configuration + +See [Customizing the Chart Before Installing](https://helm.sh/docs/intro/using_helm/#customizing-the-chart-before-installing). To see all configurable options with detailed comments: + +```console +helm show values prometheus-community/kube-prometheus-stack +``` + +You may also run `helm show values` on this chart's [dependencies](#dependencies) for additional options. + +### Rancher Monitoring Configuration + +The following table shows values exposed by Rancher Monitoring's additions to the chart: + +| Parameter | Description | Default | +| ----- | ----------- | ------ | +| `nameOverride` | Provide a name that should be used instead of the chart name when naming all resources deployed by this chart |`"rancher-monitoring"`| +| `namespaceOverride` | Override the deployment namespace | `"cattle-monitoring-system"` | +| `global.rbac.userRoles.create` | Create default user ClusterRoles to allow users to interact with Prometheus CRs, ConfigMaps, and Secrets | `true` | +| `global.rbac.userRoles.aggregateToDefaultRoles` | Aggregate default user ClusterRoles into default k8s ClusterRoles | `true` | +| `prometheus-adapter.enabled` | Whether to install [prometheus-adapter](https://github.com/prometheus-community/helm-charts/tree/main/charts/prometheus-adapter) within the cluster | `true` | +| `prometheus-adapter.prometheus.url` | A URL pointing to the Prometheus deployment within your cluster. The default value is set based on the assumption that you plan to deploy the default Prometheus instance from this chart where `.Values.namespaceOverride=cattle-monitoring-system` and `.Values.nameOverride=rancher-monitoring` | `http://rancher-monitoring-prometheus.cattle-monitoring-system.svc` | +| `prometheus-adapter.prometheus.port` | The port on the Prometheus deployment that Prometheus Adapter can make requests to | `9090` | +| `prometheus.prometheusSpec.ignoreNamespaceSelectors` | Ignore NamespaceSelector settings from the PodMonitor and ServiceMonitor configs. If true, PodMonitors and ServiceMonitors can only discover Pods and Services within the namespace they are deployed into | `false` | + +The following values are enabled for different distributions via [rancher-pushprox](https://github.com/rancher/dev-charts/tree/master/packages/rancher-pushprox). See the rancher-pushprox `README.md` for more information on what all values can be configured for the PushProxy chart. + +| Parameter | Description | Default | +| ----- | ----------- | ------ | +| `rkeControllerManager.enabled` | Create a PushProx installation for monitoring kube-controller-manager metrics in RKE clusters | `false` | +| `rkeScheduler.enabled` | Create a PushProx installation for monitoring kube-scheduler metrics in RKE clusters | `false` | +| `rkeProxy.enabled` | Create a PushProx installation for monitoring kube-proxy metrics in RKE clusters | `false` | +| `rkeIngressNginx.enabled` | Create a PushProx installation for monitoring ingress-nginx metrics in RKE clusters | `false` | +| `rkeEtcd.enabled` | Create a PushProx installation for monitoring etcd metrics in RKE clusters | `false` | +| `rke2IngressNginx.enabled` | Create a PushProx installation for monitoring ingress-nginx metrics in RKE2 clusters | `false` | +| `k3sServer.enabled` | Create a PushProx installation for monitoring k3s-server metrics (accounts for kube-controller-manager, kube-scheduler, and kube-proxy metrics) in k3s clusters | `false` | +| `kubeAdmControllerManager.enabled` | Create a PushProx installation for monitoring kube-controller-manager metrics in kubeAdm clusters | `false` | +| `kubeAdmScheduler.enabled` | Create a PushProx installation for monitoring kube-scheduler metrics in kubeAdm clusters | `false` | +| `kubeAdmProxy.enabled` | Create a PushProx installation for monitoring kube-proxy metrics in kubeAdm clusters | `false` | +| `kubeAdmEtcd.enabled` | Create a PushProx installation for monitoring etcd metrics in kubeAdm clusters | `false` | + + +### Multiple releases + +The same chart can be used to run multiple Prometheus instances in the same cluster if required. To achieve this, it is necessary to run only one instance of prometheus-operator and a pair of alertmanager pods for an HA configuration, while all other components need to be disabled. To disable a dependency during installation, set `kubeStateMetrics.enabled`, `nodeExporter.enabled` and `grafana.enabled` to `false`. + +## Work-Arounds for Known Issues + +### Running on private GKE clusters + +When Google configure the control plane for private clusters, they automatically configure VPC peering between your Kubernetes cluster’s network and a separate Google managed project. In order to restrict what Google are able to access within your cluster, the firewall rules configured restrict access to your Kubernetes pods. This means that in order to use the webhook component with a GKE private cluster, you must configure an additional firewall rule to allow the GKE control plane access to your webhook pod. + +You can read more information on how to add firewall rules for the GKE control plane nodes in the [GKE docs](https://cloud.google.com/kubernetes-engine/docs/how-to/private-clusters#add_firewall_rules) + +Alternatively, you can disable the hooks by setting `prometheusOperator.admissionWebhooks.enabled=false`. + +## PrometheusRules Admission Webhooks + +With Prometheus Operator version 0.30+, the core Prometheus Operator pod exposes an endpoint that will integrate with the `validatingwebhookconfiguration` Kubernetes feature to prevent malformed rules from being added to the cluster. + +### How the Chart Configures the Hooks + +A validating and mutating webhook configuration requires the endpoint to which the request is sent to use TLS. It is possible to set up custom certificates to do this, but in most cases, a self-signed certificate is enough. The setup of this component requires some more complex orchestration when using helm. The steps are created to be idempotent and to allow turning the feature on and off without running into helm quirks. + +1. A pre-install hook provisions a certificate into the same namespace using a format compatible with provisioning using end user certificates. If the certificate already exists, the hook exits. +2. The prometheus operator pod is configured to use a TLS proxy container, which will load that certificate. +3. Validating and Mutating webhook configurations are created in the cluster, with their failure mode set to Ignore. This allows rules to be created by the same chart at the same time, even though the webhook has not yet been fully set up - it does not have the correct CA field set. +4. A post-install hook reads the CA from the secret created by step 1 and patches the Validating and Mutating webhook configurations. This process will allow a custom CA provisioned by some other process to also be patched into the webhook configurations. The chosen failure policy is also patched into the webhook configurations + +### Alternatives + +It should be possible to use [jetstack/cert-manager](https://github.com/jetstack/cert-manager) if a more complete solution is required, but it has not been tested. + +You can enable automatic self-signed TLS certificate provisioning via cert-manager by setting the `prometheusOperator.admissionWebhooks.certManager.enabled` value to true. + +### Limitations + +Because the operator can only run as a single pod, there is potential for this component failure to cause rule deployment failure. Because this risk is outweighed by the benefit of having validation, the feature is enabled by default. + +## Developing Prometheus Rules and Grafana Dashboards + +This chart Grafana Dashboards and Prometheus Rules are just a copy from [prometheus-operator/prometheus-operator](https://github.com/prometheus-operator/prometheus-operator) and other sources, synced (with alterations) by scripts in [hack](hack) folder. In order to introduce any changes you need to first [add them to the original repository](https://github.com/prometheus-operator/kube-prometheus/blob/main/docs/customizations/developing-prometheus-rules-and-grafana-dashboards.md) and then sync there by scripts. + +## Further Information + +For more in-depth documentation of configuration options meanings, please see + +- [Prometheus Operator](https://github.com/prometheus-operator/prometheus-operator) +- [Prometheus](https://prometheus.io/docs/introduction/overview/) +- [Grafana](https://github.com/grafana/helm-charts/tree/main/charts/grafana#grafana-helm-chart) + +## prometheus.io/scrape + +The prometheus operator does not support annotation-based discovery of services, using the `PodMonitor` or `ServiceMonitor` CRD in its place as they provide far more configuration options. +For information on how to use PodMonitors/ServiceMonitors, please see the documentation on the `prometheus-operator/prometheus-operator` documentation here: + +- [ServiceMonitors](https://github.com/prometheus-operator/prometheus-operator/blob/main/Documentation/user-guides/getting-started.md#include-servicemonitors) +- [PodMonitors](https://github.com/prometheus-operator/prometheus-operator/blob/main/Documentation/user-guides/getting-started.md#include-podmonitors) +- [Running Exporters](https://github.com/prometheus-operator/prometheus-operator/blob/main/Documentation/user-guides/running-exporters.md) + +By default, Prometheus discovers PodMonitors and ServiceMonitors within its namespace, that are labeled with the same release tag as the prometheus-operator release. +Sometimes, you may need to discover custom PodMonitors/ServiceMonitors, for example used to scrape data from third-party applications. +An easy way of doing this, without compromising the default PodMonitors/ServiceMonitors discovery, is allowing Prometheus to discover all PodMonitors/ServiceMonitors within its namespace, without applying label filtering. +To do so, you can set `prometheus.prometheusSpec.podMonitorSelectorNilUsesHelmValues` and `prometheus.prometheusSpec.serviceMonitorSelectorNilUsesHelmValues` to `false`. + +## Migrating from stable/prometheus-operator chart + +## Zero downtime + +Since `kube-prometheus-stack` is fully compatible with the `stable/prometheus-operator` chart, a migration without downtime can be achieved. +However, the old name prefix needs to be kept. If you want the new name please follow the step by step guide below (with downtime). + +You can override the name to achieve this: + +```console +helm upgrade prometheus-operator prometheus-community/kube-prometheus-stack -n monitoring --reuse-values --set nameOverride=prometheus-operator +``` + +**Note**: It is recommended to run this first with `--dry-run --debug`. + +## Redeploy with new name (downtime) + +If the **prometheus-operator** values are compatible with the new **kube-prometheus-stack** chart, please follow the below steps for migration: + +> The guide presumes that chart is deployed in `monitoring` namespace and the deployments are running there. If in other namespace, please replace the `monitoring` to the deployed namespace. + +1. Patch the PersistenceVolume created/used by the prometheus-operator chart to `Retain` claim policy: + + ```console + kubectl patch pv/ -p '{"spec":{"persistentVolumeReclaimPolicy":"Retain"}}' + ``` + + **Note:** To execute the above command, the user must have a cluster wide permission. Please refer [Kubernetes RBAC](https://kubernetes.io/docs/reference/access-authn-authz/rbac/) + +2. Uninstall the **prometheus-operator** release and delete the existing PersistentVolumeClaim, and verify PV become Released. + + ```console + helm uninstall prometheus-operator -n monitoring + kubectl delete pvc/ -n monitoring + ``` + + Additionally, you have to manually remove the remaining `prometheus-operator-kubelet` service. + + ```console + kubectl delete service/prometheus-operator-kubelet -n kube-system + ``` + + You can choose to remove all your existing CRDs (ServiceMonitors, Podmonitors, etc.) if you want to. + +3. Remove current `spec.claimRef` values to change the PV's status from Released to Available. + + ```console + kubectl patch pv/ --type json -p='[{"op": "remove", "path": "/spec/claimRef"}]' -n monitoring + ``` + +**Note:** To execute the above command, the user must have a cluster wide permission. Please refer to [Kubernetes RBAC](https://kubernetes.io/docs/reference/access-authn-authz/rbac/) + +After these steps, proceed to a fresh **kube-prometheus-stack** installation and make sure the current release of **kube-prometheus-stack** matching the `volumeClaimTemplate` values in the `values.yaml`. + +The binding is done via matching a specific amount of storage requested and with certain access modes. + +For example, if you had storage specified as this with **prometheus-operator**: + +```yaml +volumeClaimTemplate: + spec: + storageClassName: gp2 + accessModes: ["ReadWriteOnce"] + resources: + requests: + storage: 50Gi +``` + +You have to specify matching `volumeClaimTemplate` with 50Gi storage and `ReadWriteOnce` access mode. + +Additionally, you should check the current AZ of your legacy installation's PV, and configure the fresh release to use the same AZ as the old one. If the pods are in a different AZ than the PV, the release will fail to bind the existing one, hence creating a new PV. + +This can be achieved either by specifying the labels through `values.yaml`, e.g. setting `prometheus.prometheusSpec.nodeSelector` to: + +```yaml +nodeSelector: + failure-domain.beta.kubernetes.io/zone: east-west-1a +``` + +or passing these values as `--set` overrides during installation. + +The new release should now re-attach your previously released PV with its content. + +## Migrating from coreos/prometheus-operator chart + +The multiple charts have been combined into a single chart that installs prometheus operator, prometheus, alertmanager, grafana as well as the multitude of exporters necessary to monitor a cluster. + +There is no simple and direct migration path between the charts as the changes are extensive and intended to make the chart easier to support. + +The capabilities of the old chart are all available in the new chart, including the ability to run multiple prometheus instances on a single cluster - you will need to disable the parts of the chart you do not wish to deploy. + +You can check out the tickets for this change [here](https://github.com/prometheus-operator/prometheus-operator/issues/592) and [here](https://github.com/helm/charts/pull/6765). + +### High-level overview of Changes + +#### Added dependencies + +The chart has added 3 [dependencies](#dependencies). + +- Node-Exporter, Kube-State-Metrics: These components are loaded as dependencies into the chart, and are relatively simple components +- Grafana: The Grafana chart is more feature-rich than this chart - it contains a sidecar that is able to load data sources and dashboards from configmaps deployed into the same cluster. For more information check out the [documentation for the chart](https://github.com/grafana/helm-charts/blob/main/charts/grafana/README.md) + +#### Kubelet Service + +Because the kubelet service has a new name in the chart, make sure to clean up the old kubelet service in the `kube-system` namespace to prevent counting container metrics twice. + +#### Persistent Volumes + +If you would like to keep the data of the current persistent volumes, it should be possible to attach existing volumes to new PVCs and PVs that are created using the conventions in the new chart. For example, in order to use an existing Azure disk for a helm release called `prometheus-migration` the following resources can be created: + +```yaml +apiVersion: v1 +kind: PersistentVolume +metadata: + name: pvc-prometheus-migration-prometheus-0 +spec: + accessModes: + - ReadWriteOnce + azureDisk: + cachingMode: None + diskName: pvc-prometheus-migration-prometheus-0 + diskURI: /subscriptions/f5125d82-2622-4c50-8d25-3f7ba3e9ac4b/resourceGroups/sample-migration-resource-group/providers/Microsoft.Compute/disks/pvc-prometheus-migration-prometheus-0 + fsType: "" + kind: Managed + readOnly: false + capacity: + storage: 1Gi + persistentVolumeReclaimPolicy: Delete + storageClassName: prometheus + volumeMode: Filesystem +``` + +```yaml +apiVersion: v1 +kind: PersistentVolumeClaim +metadata: + labels: + app.kubernetes.io/name: prometheus + prometheus: prometheus-migration-prometheus + name: prometheus-prometheus-migration-prometheus-db-prometheus-prometheus-migration-prometheus-0 + namespace: monitoring +spec: + accessModes: + - ReadWriteOnce + resources: + requests: + storage: 1Gi + storageClassName: prometheus + volumeMode: Filesystem + volumeName: pvc-prometheus-migration-prometheus-0 +``` + +The PVC will take ownership of the PV and when you create a release using a persistent volume claim template it will use the existing PVCs as they match the naming convention used by the chart. For other cloud providers similar approaches can be used. + +#### KubeProxy + +The metrics bind address of kube-proxy is default to `127.0.0.1:10249` that prometheus instances **cannot** access to. You should expose metrics by changing `metricsBindAddress` field value to `0.0.0.0:10249` if you want to collect them. + +Depending on the cluster, the relevant part `config.conf` will be in ConfigMap `kube-system/kube-proxy` or `kube-system/kube-proxy-config`. For example: + +```console +kubectl -n kube-system edit cm kube-proxy +``` + +```yaml +apiVersion: v1 +data: + config.conf: |- + apiVersion: kubeproxy.config.k8s.io/v1alpha1 + kind: KubeProxyConfiguration + # ... + # metricsBindAddress: 127.0.0.1:10249 + metricsBindAddress: 0.0.0.0:10249 + # ... + kubeconfig.conf: |- + # ... +kind: ConfigMap +metadata: + labels: + app: kube-proxy + name: kube-proxy + namespace: kube-system +``` diff --git a/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/app-README.md b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/app-README.md new file mode 100644 index 0000000000..3920854384 --- /dev/null +++ b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/app-README.md @@ -0,0 +1,46 @@ +# Rancher Monitoring and Alerting + + This chart is based on the upstream [kube-prometheus-stack](https://github.com/prometheus-community/helm-charts/tree/main/charts/kube-prometheus-stack) chart. The chart deploys [Prometheus Operator](https://github.com/prometheus-operator/prometheus-operator) and its CRDs along with [Grafana](https://github.com/grafana/helm-charts/tree/main/charts/grafana), [Prometheus Adapter](https://github.com/prometheus-community/helm-charts/tree/main/charts/prometheus-adapter) and additional charts / Kubernetes manifests to gather metrics. It allows users to monitor their Kubernetes clusters, view metrics in Grafana dashboards, and set up alerts and notifications. + +For more information on how to use the feature, refer to our [docs](https://rancher.com/docs/rancher/v2.x/en/monitoring-alerting/v2.5/). + +The chart installs the following components: + +- [Prometheus Operator](https://github.com/coreos/prometheus-operator) - The operator provides easy monitoring definitions for Kubernetes services, manages [Prometheus](https://prometheus.io/) and [AlertManager](https://prometheus.io/docs/alerting/latest/alertmanager/) instances, and adds default scrape targets for some Kubernetes components. +- [kube-prometheus](https://github.com/prometheus-operator/kube-prometheus/) - A collection of community-curated Kubernetes manifests, Grafana Dashboards, and PrometheusRules that deploy a default end-to-end cluster monitoring configuration. +- [Grafana](https://github.com/grafana/helm-charts/tree/main/charts/grafana) - Grafana allows a user to create / view dashboards based on the cluster metrics collected by Prometheus. +- [node-exporter](https://github.com/prometheus-community/helm-charts/tree/main/charts/prometheus-node-exporter) / [kube-state-metrics](https://github.com/prometheus-community/helm-charts/tree/main/charts/kube-state-metrics) / [rancher-pushprox](https://github.com/rancher/charts/tree/dev-v2.7/packages/rancher-monitoring/rancher-pushprox/charts) - These charts monitor various Kubernetes components across different Kubernetes cluster types. +- [Prometheus Adapter](https://github.com/prometheus-community/helm-charts/tree/main/charts/prometheus-adapter) - The adapter allows a user to expose custom metrics, resource metrics, and external metrics on the default [Prometheus](https://prometheus.io/) instance to the Kubernetes API Server. + +For more information, review the Helm README of this chart. + +## Upgrading to Kubernetes v1.25+ + +Starting in Kubernetes v1.25, [Pod Security Policies](https://kubernetes.io/docs/concepts/security/pod-security-policy/) have been removed from the Kubernetes API. + +As a result, **before upgrading to Kubernetes v1.25** (or on a fresh install in a Kubernetes v1.25+ cluster), users are expected to perform an in-place upgrade of this chart with `global.cattle.psp.enabled` set to `false` if it has been previously set to `true`. +​ +> **Note:** +> In this chart release, any previous field that was associated with any PSP resources have been removed in favor of a single global field: `global.cattle.psp.enabled`. + +> **Note:** +> If you upgrade your cluster to Kubernetes v1.25+ before removing PSPs via a `helm upgrade` (even if you manually clean up resources), **it will leave the Helm release in a broken state within the cluster such that further Helm operations will not work (`helm uninstall`, `helm upgrade`, etc.).** +> +> If your charts get stuck in this state, please consult the Rancher docs on how to clean up your Helm release secrets. + +Upon setting `global.cattle.psp.enabled` to false, the chart will remove any PSP resources deployed on its behalf from the cluster. This is the default setting for this chart. +​ +As a replacement for PSPs, [Pod Security Admission](https://kubernetes.io/docs/concepts/security/pod-security-admission/) should be used. Please consult the Rancher docs for more details on how to configure your chart release namespaces to work with the new Pod Security Admission and apply Pod Security Standards. + +## Upgrading from 100.0.0+up16.6.0 to 100.1.0+up19.0.3 + +### Noticeable changes: +Grafana: +- `sidecar.dashboards.searchNamespace`, `sidecar.datasources.searchNamespace` and `sidecar.notifiers.searchNamespace` support a list of namespaces now. + +Kube-state-metrics +- the type of `collectors` is changed from Dictionary to List. +- `kubeStateMetrics.serviceMonitor.namespaceOverride` was replaced by `kube-state-metrics.namespaceOverride`. + +### Known issues: +- Occasionally, the upgrade fails with errors related to the webhook `prometheusrulemutate.monitoring.coreos.com`. This is a known issue in the upstream, and the workaround is to trigger the upgrade one more time. [32416](https://github.com/rancher/rancher/issues/32416#issuecomment-828881726) diff --git a/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/grafana/.helmignore b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/grafana/.helmignore new file mode 100644 index 0000000000..8cade1318f --- /dev/null +++ b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/grafana/.helmignore @@ -0,0 +1,23 @@ +# Patterns to ignore when building packages. +# This supports shell glob matching, relative path matching, and +# negation (prefixed with !). Only one pattern per line. +.DS_Store +# Common VCS dirs +.git/ +.gitignore +.bzr/ +.bzrignore +.hg/ +.hgignore +.svn/ +# Common backup files +*.swp +*.bak +*.tmp +*~ +# Various IDEs +.vscode +.project +.idea/ +*.tmproj +OWNERS diff --git a/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/grafana/Chart.yaml b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/grafana/Chart.yaml new file mode 100644 index 0000000000..ff6bcb26aa --- /dev/null +++ b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/grafana/Chart.yaml @@ -0,0 +1,39 @@ +annotations: + artifacthub.io/license: Apache-2.0 + artifacthub.io/links: | + - name: Chart Source + url: https://github.com/grafana/helm-charts + - name: Upstream Project + url: https://github.com/grafana/grafana + catalog.cattle.io/hidden: "true" + catalog.cattle.io/kube-version: '>= 1.26.0-0 < 1.31.0-0' + catalog.cattle.io/os: linux + catalog.rancher.io/certified: rancher + catalog.rancher.io/namespace: cattle-monitoring-system + catalog.rancher.io/release-name: rancher-grafana +apiVersion: v2 +appVersion: 10.4.1 +description: The leading tool for querying and visualizing time series and metrics. +home: https://grafana.com +icon: https://artifacthub.io/image/b4fed1a7-6c8f-4945-b99d-096efa3e4116 +keywords: +- monitoring +- metric +kubeVersion: '>=1.26.0-0' +maintainers: +- email: zanhsieh@gmail.com + name: zanhsieh +- email: rluckie@cisco.com + name: rtluckie +- email: maor.friedman@redhat.com + name: maorfr +- email: miroslav.hadzhiev@gmail.com + name: Xtigyro +- email: mail@torstenwalter.de + name: torstenwalter +name: grafana +sources: +- https://github.com/grafana/grafana +- https://github.com/grafana/helm-charts +type: application +version: 7.3.11 diff --git a/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/grafana/README.md b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/grafana/README.md new file mode 100644 index 0000000000..0ff07f297d --- /dev/null +++ b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/grafana/README.md @@ -0,0 +1,770 @@ +# Grafana Helm Chart + +* Installs the web dashboarding system [Grafana](http://grafana.org/) + +## Get Repo Info + +```console +helm repo add grafana https://grafana.github.io/helm-charts +helm repo update +``` + +_See [helm repo](https://helm.sh/docs/helm/helm_repo/) for command documentation._ + +## Installing the Chart + +To install the chart with the release name `my-release`: + +```console +helm install my-release grafana/grafana +``` + +## Uninstalling the Chart + +To uninstall/delete the my-release deployment: + +```console +helm delete my-release +``` + +The command removes all the Kubernetes components associated with the chart and deletes the release. + +## Upgrading an existing Release to a new major version + +A major chart version change (like v1.2.3 -> v2.0.0) indicates that there is an +incompatible breaking change needing manual actions. + +### To 4.0.0 (And 3.12.1) + +This version requires Helm >= 2.12.0. + +### To 5.0.0 + +You have to add --force to your helm upgrade command as the labels of the chart have changed. + +### To 6.0.0 + +This version requires Helm >= 3.1.0. + +### To 7.0.0 + +For consistency with other Helm charts, the `global.image.registry` parameter was renamed +to `global.imageRegistry`. If you were not previously setting `global.image.registry`, no action +is required on upgrade. If you were previously setting `global.image.registry`, you will +need to instead set `global.imageRegistry`. + +## Configuration + +| Parameter | Description | Default | +|-------------------------------------------|-----------------------------------------------|---------------------------------------------------------| +| `replicas` | Number of nodes | `1` | +| `podDisruptionBudget.minAvailable` | Pod disruption minimum available | `nil` | +| `podDisruptionBudget.maxUnavailable` | Pod disruption maximum unavailable | `nil` | +| `podDisruptionBudget.apiVersion` | Pod disruption apiVersion | `nil` | +| `deploymentStrategy` | Deployment strategy | `{ "type": "RollingUpdate" }` | +| `livenessProbe` | Liveness Probe settings | `{ "httpGet": { "path": "/api/health", "port": 3000 } "initialDelaySeconds": 60, "timeoutSeconds": 30, "failureThreshold": 10 }` | +| `readinessProbe` | Readiness Probe settings | `{ "httpGet": { "path": "/api/health", "port": 3000 } }`| +| `securityContext` | Deployment securityContext | `{"runAsUser": 472, "runAsGroup": 472, "fsGroup": 472}` | +| `priorityClassName` | Name of Priority Class to assign pods | `nil` | +| `image.registry` | Image registry | `docker.io` | +| `image.repository` | Image repository | `grafana/grafana` | +| `image.tag` | Overrides the Grafana image tag whose default is the chart appVersion (`Must be >= 5.0.0`) | `` | +| `image.sha` | Image sha (optional) | `` | +| `image.pullPolicy` | Image pull policy | `IfNotPresent` | +| `image.pullSecrets` | Image pull secrets (can be templated) | `[]` | +| `service.enabled` | Enable grafana service | `true` | +| `service.type` | Kubernetes service type | `ClusterIP` | +| `service.port` | Kubernetes port where service is exposed | `80` | +| `service.portName` | Name of the port on the service | `service` | +| `service.appProtocol` | Adds the appProtocol field to the service | `` | +| `service.targetPort` | Internal service is port | `3000` | +| `service.nodePort` | Kubernetes service nodePort | `nil` | +| `service.annotations` | Service annotations (can be templated) | `{}` | +| `service.labels` | Custom labels | `{}` | +| `service.clusterIP` | internal cluster service IP | `nil` | +| `service.loadBalancerIP` | IP address to assign to load balancer (if supported) | `nil` | +| `service.loadBalancerSourceRanges` | list of IP CIDRs allowed access to lb (if supported) | `[]` | +| `service.externalIPs` | service external IP addresses | `[]` | +| `service.externalTrafficPolicy` | change the default externalTrafficPolicy | `nil` | +| `headlessService` | Create a headless service | `false` | +| `extraExposePorts` | Additional service ports for sidecar containers| `[]` | +| `hostAliases` | adds rules to the pod's /etc/hosts | `[]` | +| `ingress.enabled` | Enables Ingress | `false` | +| `ingress.annotations` | Ingress annotations (values are templated) | `{}` | +| `ingress.labels` | Custom labels | `{}` | +| `ingress.path` | Ingress accepted path | `/` | +| `ingress.pathType` | Ingress type of path | `Prefix` | +| `ingress.hosts` | Ingress accepted hostnames | `["chart-example.local"]` | +| `ingress.extraPaths` | Ingress extra paths to prepend to every host configuration. Useful when configuring [custom actions with AWS ALB Ingress Controller](https://kubernetes-sigs.github.io/aws-load-balancer-controller/v2.6/guide/ingress/annotations/#actions). Requires `ingress.hosts` to have one or more host entries. | `[]` | +| `ingress.tls` | Ingress TLS configuration | `[]` | +| `ingress.ingressClassName` | Ingress Class Name. MAY be required for Kubernetes versions >= 1.18 | `""` | +| `resources` | CPU/Memory resource requests/limits | `{}` | +| `nodeSelector` | Node labels for pod assignment | `{}` | +| `tolerations` | Toleration labels for pod assignment | `[]` | +| `affinity` | Affinity settings for pod assignment | `{}` | +| `extraInitContainers` | Init containers to add to the grafana pod | `{}` | +| `extraContainers` | Sidecar containers to add to the grafana pod | `""` | +| `extraContainerVolumes` | Volumes that can be mounted in sidecar containers | `[]` | +| `extraLabels` | Custom labels for all manifests | `{}` | +| `schedulerName` | Name of the k8s scheduler (other than default) | `nil` | +| `persistence.enabled` | Use persistent volume to store data | `false` | +| `persistence.type` | Type of persistence (`pvc` or `statefulset`) | `pvc` | +| `persistence.size` | Size of persistent volume claim | `10Gi` | +| `persistence.existingClaim` | Use an existing PVC to persist data (can be templated) | `nil` | +| `persistence.storageClassName` | Type of persistent volume claim | `nil` | +| `persistence.accessModes` | Persistence access modes | `[ReadWriteOnce]` | +| `persistence.annotations` | PersistentVolumeClaim annotations | `{}` | +| `persistence.finalizers` | PersistentVolumeClaim finalizers | `[ "kubernetes.io/pvc-protection" ]` | +| `persistence.extraPvcLabels` | Extra labels to apply to a PVC. | `{}` | +| `persistence.subPath` | Mount a sub dir of the persistent volume (can be templated) | `nil` | +| `persistence.inMemory.enabled` | If persistence is not enabled, whether to mount the local storage in-memory to improve performance | `false` | +| `persistence.inMemory.sizeLimit` | SizeLimit for the in-memory local storage | `nil` | +| `initChownData.enabled` | If false, don't reset data ownership at startup | true | +| `initChownData.image.registry` | init-chown-data container image registry | `docker.io` | +| `initChownData.image.repository` | init-chown-data container image repository | `busybox` | +| `initChownData.image.tag` | init-chown-data container image tag | `1.31.1` | +| `initChownData.image.sha` | init-chown-data container image sha (optional)| `""` | +| `initChownData.image.pullPolicy` | init-chown-data container image pull policy | `IfNotPresent` | +| `initChownData.resources` | init-chown-data pod resource requests & limits | `{}` | +| `schedulerName` | Alternate scheduler name | `nil` | +| `env` | Extra environment variables passed to pods | `{}` | +| `envValueFrom` | Environment variables from alternate sources. See the API docs on [EnvVarSource](https://kubernetes.io/docs/reference/generated/kubernetes-api/v1.17/#envvarsource-v1-core) for format details. Can be templated | `{}` | +| `envFromSecret` | Name of a Kubernetes secret (must be manually created in the same namespace) containing values to be added to the environment. Can be templated | `""` | +| `envFromSecrets` | List of Kubernetes secrets (must be manually created in the same namespace) containing values to be added to the environment. Can be templated | `[]` | +| `envFromConfigMaps` | List of Kubernetes ConfigMaps (must be manually created in the same namespace) containing values to be added to the environment. Can be templated | `[]` | +| `envRenderSecret` | Sensible environment variables passed to pods and stored as secret. (passed through [tpl](https://helm.sh/docs/howto/charts_tips_and_tricks/#using-the-tpl-function)) | `{}` | +| `enableServiceLinks` | Inject Kubernetes services as environment variables. | `true` | +| `extraSecretMounts` | Additional grafana server secret mounts | `[]` | +| `extraVolumeMounts` | Additional grafana server volume mounts | `[]` | +| `extraVolumes` | Additional Grafana server volumes | `[]` | +| `automountServiceAccountToken` | Mounted the service account token on the grafana pod. Mandatory, if sidecars are enabled | `true` | +| `createConfigmap` | Enable creating the grafana configmap | `true` | +| `extraConfigmapMounts` | Additional grafana server configMap volume mounts (values are templated) | `[]` | +| `extraEmptyDirMounts` | Additional grafana server emptyDir volume mounts | `[]` | +| `plugins` | Plugins to be loaded along with Grafana | `[]` | +| `datasources` | Configure grafana datasources (passed through tpl) | `{}` | +| `alerting` | Configure grafana alerting (passed through tpl) | `{}` | +| `notifiers` | Configure grafana notifiers | `{}` | +| `dashboardProviders` | Configure grafana dashboard providers | `{}` | +| `dashboards` | Dashboards to import | `{}` | +| `dashboardsConfigMaps` | ConfigMaps reference that contains dashboards | `{}` | +| `grafana.ini` | Grafana's primary configuration | `{}` | +| `global.imageRegistry` | Global image pull registry for all images. | `null` | +| `global.imagePullSecrets` | Global image pull secrets (can be templated). Allows either an array of {name: pullSecret} maps (k8s-style), or an array of strings (more common helm-style). | `[]` | +| `ldap.enabled` | Enable LDAP authentication | `false` | +| `ldap.existingSecret` | The name of an existing secret containing the `ldap.toml` file, this must have the key `ldap-toml`. | `""` | +| `ldap.config` | Grafana's LDAP configuration | `""` | +| `annotations` | Deployment annotations | `{}` | +| `labels` | Deployment labels | `{}` | +| `podAnnotations` | Pod annotations | `{}` | +| `podLabels` | Pod labels | `{}` | +| `podPortName` | Name of the grafana port on the pod | `grafana` | +| `lifecycleHooks` | Lifecycle hooks for podStart and preStop [Example](https://kubernetes.io/docs/tasks/configure-pod-container/attach-handler-lifecycle-event/#define-poststart-and-prestop-handlers) | `{}` | +| `sidecar.image.registry` | Sidecar image registry | `quay.io` | +| `sidecar.image.repository` | Sidecar image repository | `kiwigrid/k8s-sidecar` | +| `sidecar.image.tag` | Sidecar image tag | `1.26.0` | +| `sidecar.image.sha` | Sidecar image sha (optional) | `""` | +| `sidecar.imagePullPolicy` | Sidecar image pull policy | `IfNotPresent` | +| `sidecar.resources` | Sidecar resources | `{}` | +| `sidecar.securityContext` | Sidecar securityContext | `{}` | +| `sidecar.enableUniqueFilenames` | Sets the kiwigrid/k8s-sidecar UNIQUE_FILENAMES environment variable. If set to `true` the sidecar will create unique filenames where duplicate data keys exist between ConfigMaps and/or Secrets within the same or multiple Namespaces. | `false` | +| `sidecar.alerts.enabled` | Enables the cluster wide search for alerts and adds/updates/deletes them in grafana |`false` | +| `sidecar.alerts.label` | Label that config maps with alerts should have to be added | `grafana_alert` | +| `sidecar.alerts.labelValue` | Label value that config maps with alerts should have to be added | `""` | +| `sidecar.alerts.searchNamespace` | Namespaces list. If specified, the sidecar will search for alerts config-maps inside these namespaces. Otherwise the namespace in which the sidecar is running will be used. It's also possible to specify ALL to search in all namespaces. | `nil` | +| `sidecar.alerts.watchMethod` | Method to use to detect ConfigMap changes. With WATCH the sidecar will do a WATCH requests, with SLEEP it will list all ConfigMaps, then sleep for 60 seconds. | `WATCH` | +| `sidecar.alerts.resource` | Should the sidecar looks into secrets, configmaps or both. | `both` | +| `sidecar.alerts.reloadURL` | Full url of datasource configuration reload API endpoint, to invoke after a config-map change | `"http://localhost:3000/api/admin/provisioning/alerting/reload"` | +| `sidecar.alerts.skipReload` | Enabling this omits defining the REQ_URL and REQ_METHOD environment variables | `false` | +| `sidecar.alerts.initAlerts` | Set to true to deploy the alerts sidecar as an initContainer. This is needed if skipReload is true, to load any alerts defined at startup time. | `false` | +| `sidecar.alerts.extraMounts` | Additional alerts sidecar volume mounts. | `[]` | +| `sidecar.dashboards.enabled` | Enables the cluster wide search for dashboards and adds/updates/deletes them in grafana | `false` | +| `sidecar.dashboards.SCProvider` | Enables creation of sidecar provider | `true` | +| `sidecar.dashboards.provider.name` | Unique name of the grafana provider | `sidecarProvider` | +| `sidecar.dashboards.provider.orgid` | Id of the organisation, to which the dashboards should be added | `1` | +| `sidecar.dashboards.provider.folder` | Logical folder in which grafana groups dashboards | `""` | +| `sidecar.dashboards.provider.disableDelete` | Activate to avoid the deletion of imported dashboards | `false` | +| `sidecar.dashboards.provider.allowUiUpdates` | Allow updating provisioned dashboards from the UI | `false` | +| `sidecar.dashboards.provider.type` | Provider type | `file` | +| `sidecar.dashboards.provider.foldersFromFilesStructure` | Allow Grafana to replicate dashboard structure from filesystem. | `false` | +| `sidecar.dashboards.watchMethod` | Method to use to detect ConfigMap changes. With WATCH the sidecar will do a WATCH requests, with SLEEP it will list all ConfigMaps, then sleep for 60 seconds. | `WATCH` | +| `sidecar.skipTlsVerify` | Set to true to skip tls verification for kube api calls | `nil` | +| `sidecar.dashboards.label` | Label that config maps with dashboards should have to be added | `grafana_dashboard` | +| `sidecar.dashboards.labelValue` | Label value that config maps with dashboards should have to be added | `""` | +| `sidecar.dashboards.folder` | Folder in the pod that should hold the collected dashboards (unless `sidecar.dashboards.defaultFolderName` is set). This path will be mounted. | `/tmp/dashboards` | +| `sidecar.dashboards.folderAnnotation` | The annotation the sidecar will look for in configmaps to override the destination folder for files | `nil` | +| `sidecar.dashboards.defaultFolderName` | The default folder name, it will create a subfolder under the `sidecar.dashboards.folder` and put dashboards in there instead | `nil` | +| `sidecar.dashboards.searchNamespace` | Namespaces list. If specified, the sidecar will search for dashboards config-maps inside these namespaces. Otherwise the namespace in which the sidecar is running will be used. It's also possible to specify ALL to search in all namespaces. | `nil` | +| `sidecar.dashboards.script` | Absolute path to shell script to execute after a configmap got reloaded. | `nil` | +| `sidecar.dashboards.reloadURL` | Full url of dashboards configuration reload API endpoint, to invoke after a config-map change | `"http://localhost:3000/api/admin/provisioning/dashboards/reload"` | +| `sidecar.dashboards.skipReload` | Enabling this omits defining the REQ_USERNAME, REQ_PASSWORD, REQ_URL and REQ_METHOD environment variables | `false` | +| `sidecar.dashboards.resource` | Should the sidecar looks into secrets, configmaps or both. | `both` | +| `sidecar.dashboards.extraMounts` | Additional dashboard sidecar volume mounts. | `[]` | +| `sidecar.datasources.enabled` | Enables the cluster wide search for datasources and adds/updates/deletes them in grafana |`false` | +| `sidecar.datasources.label` | Label that config maps with datasources should have to be added | `grafana_datasource` | +| `sidecar.datasources.labelValue` | Label value that config maps with datasources should have to be added | `""` | +| `sidecar.datasources.searchNamespace` | Namespaces list. If specified, the sidecar will search for datasources config-maps inside these namespaces. Otherwise the namespace in which the sidecar is running will be used. It's also possible to specify ALL to search in all namespaces. | `nil` | +| `sidecar.datasources.watchMethod` | Method to use to detect ConfigMap changes. With WATCH the sidecar will do a WATCH requests, with SLEEP it will list all ConfigMaps, then sleep for 60 seconds. | `WATCH` | +| `sidecar.datasources.resource` | Should the sidecar looks into secrets, configmaps or both. | `both` | +| `sidecar.datasources.reloadURL` | Full url of datasource configuration reload API endpoint, to invoke after a config-map change | `"http://localhost:3000/api/admin/provisioning/datasources/reload"` | +| `sidecar.datasources.skipReload` | Enabling this omits defining the REQ_URL and REQ_METHOD environment variables | `false` | +| `sidecar.datasources.initDatasources` | Set to true to deploy the datasource sidecar as an initContainer in addition to a container. This is needed if skipReload is true, to load any datasources defined at startup time. | `false` | +| `sidecar.notifiers.enabled` | Enables the cluster wide search for notifiers and adds/updates/deletes them in grafana | `false` | +| `sidecar.notifiers.label` | Label that config maps with notifiers should have to be added | `grafana_notifier` | +| `sidecar.notifiers.labelValue` | Label value that config maps with notifiers should have to be added | `""` | +| `sidecar.notifiers.searchNamespace` | Namespaces list. If specified, the sidecar will search for notifiers config-maps (or secrets) inside these namespaces. Otherwise the namespace in which the sidecar is running will be used. It's also possible to specify ALL to search in all namespaces. | `nil` | +| `sidecar.notifiers.watchMethod` | Method to use to detect ConfigMap changes. With WATCH the sidecar will do a WATCH requests, with SLEEP it will list all ConfigMaps, then sleep for 60 seconds. | `WATCH` | +| `sidecar.notifiers.resource` | Should the sidecar looks into secrets, configmaps or both. | `both` | +| `sidecar.notifiers.reloadURL` | Full url of notifier configuration reload API endpoint, to invoke after a config-map change | `"http://localhost:3000/api/admin/provisioning/notifications/reload"` | +| `sidecar.notifiers.skipReload` | Enabling this omits defining the REQ_URL and REQ_METHOD environment variables | `false` | +| `sidecar.notifiers.initNotifiers` | Set to true to deploy the notifier sidecar as an initContainer in addition to a container. This is needed if skipReload is true, to load any notifiers defined at startup time. | `false` | +| `smtp.existingSecret` | The name of an existing secret containing the SMTP credentials. | `""` | +| `smtp.userKey` | The key in the existing SMTP secret containing the username. | `"user"` | +| `smtp.passwordKey` | The key in the existing SMTP secret containing the password. | `"password"` | +| `admin.existingSecret` | The name of an existing secret containing the admin credentials (can be templated). | `""` | +| `admin.userKey` | The key in the existing admin secret containing the username. | `"admin-user"` | +| `admin.passwordKey` | The key in the existing admin secret containing the password. | `"admin-password"` | +| `serviceAccount.automountServiceAccountToken` | Automount the service account token on all pods where is service account is used | `false` | +| `serviceAccount.annotations` | ServiceAccount annotations | | +| `serviceAccount.create` | Create service account | `true` | +| `serviceAccount.labels` | ServiceAccount labels | `{}` | +| `serviceAccount.name` | Service account name to use, when empty will be set to created account if `serviceAccount.create` is set else to `default` | `` | +| `serviceAccount.nameTest` | Service account name to use for test, when empty will be set to created account if `serviceAccount.create` is set else to `default` | `nil` | +| `rbac.create` | Create and use RBAC resources | `true` | +| `rbac.namespaced` | Creates Role and Rolebinding instead of the default ClusterRole and ClusteRoleBindings for the grafana instance | `false` | +| `rbac.useExistingRole` | Set to a rolename to use existing role - skipping role creating - but still doing serviceaccount and rolebinding to the rolename set here. | `nil` | +| `rbac.pspEnabled` | Create PodSecurityPolicy (with `rbac.create`, grant roles permissions as well) | `false` | +| `rbac.pspUseAppArmor` | Enforce AppArmor in created PodSecurityPolicy (requires `rbac.pspEnabled`) | `false` | +| `rbac.extraRoleRules` | Additional rules to add to the Role | [] | +| `rbac.extraClusterRoleRules` | Additional rules to add to the ClusterRole | [] | +| `command` | Define command to be executed by grafana container at startup | `nil` | +| `args` | Define additional args if command is used | `nil` | +| `testFramework.enabled` | Whether to create test-related resources | `true` | +| `testFramework.image.registry` | `test-framework` image registry. | `docker.io` | +| `testFramework.image.repository` | `test-framework` image repository. | `bats/bats` | +| `testFramework.image.tag` | `test-framework` image tag. | `v1.4.1` | +| `testFramework.imagePullPolicy` | `test-framework` image pull policy. | `IfNotPresent` | +| `testFramework.securityContext` | `test-framework` securityContext | `{}` | +| `downloadDashboards.env` | Environment variables to be passed to the `download-dashboards` container | `{}` | +| `downloadDashboards.envFromSecret` | Name of a Kubernetes secret (must be manually created in the same namespace) containing values to be added to the environment. Can be templated | `""` | +| `downloadDashboards.resources` | Resources of `download-dashboards` container | `{}` | +| `downloadDashboardsImage.registry` | Curl docker image registry | `docker.io` | +| `downloadDashboardsImage.repository` | Curl docker image repository | `curlimages/curl` | +| `downloadDashboardsImage.tag` | Curl docker image tag | `7.73.0` | +| `downloadDashboardsImage.sha` | Curl docker image sha (optional) | `""` | +| `downloadDashboardsImage.pullPolicy` | Curl docker image pull policy | `IfNotPresent` | +| `namespaceOverride` | Override the deployment namespace | `""` (`Release.Namespace`) | +| `serviceMonitor.enabled` | Use servicemonitor from prometheus operator | `false` | +| `serviceMonitor.namespace` | Namespace this servicemonitor is installed in | | +| `serviceMonitor.interval` | How frequently Prometheus should scrape | `1m` | +| `serviceMonitor.path` | Path to scrape | `/metrics` | +| `serviceMonitor.scheme` | Scheme to use for metrics scraping | `http` | +| `serviceMonitor.tlsConfig` | TLS configuration block for the endpoint | `{}` | +| `serviceMonitor.labels` | Labels for the servicemonitor passed to Prometheus Operator | `{}` | +| `serviceMonitor.scrapeTimeout` | Timeout after which the scrape is ended | `30s` | +| `serviceMonitor.relabelings` | RelabelConfigs to apply to samples before scraping. | `[]` | +| `serviceMonitor.metricRelabelings` | MetricRelabelConfigs to apply to samples before ingestion. | `[]` | +| `revisionHistoryLimit` | Number of old ReplicaSets to retain | `10` | +| `imageRenderer.enabled` | Enable the image-renderer deployment & service | `false` | +| `imageRenderer.image.registry` | image-renderer Image registry | `docker.io` | +| `imageRenderer.image.repository` | image-renderer Image repository | `grafana/grafana-image-renderer` | +| `imageRenderer.image.tag` | image-renderer Image tag | `latest` | +| `imageRenderer.image.sha` | image-renderer Image sha (optional) | `""` | +| `imageRenderer.image.pullPolicy` | image-renderer ImagePullPolicy | `Always` | +| `imageRenderer.env` | extra env-vars for image-renderer | `{}` | +| `imageRenderer.envValueFrom` | Environment variables for image-renderer from alternate sources. See the API docs on [EnvVarSource](https://kubernetes.io/docs/reference/generated/kubernetes-api/v1.17/#envvarsource-v1-core) for format details. Can be templated | `{}` | +| `imageRenderer.serviceAccountName` | image-renderer deployment serviceAccountName | `""` | +| `imageRenderer.securityContext` | image-renderer deployment securityContext | `{}` | +| `imageRenderer.podAnnotations ` | image-renderer image-renderer pod annotation | `{}` | +| `imageRenderer.hostAliases` | image-renderer deployment Host Aliases | `[]` | +| `imageRenderer.priorityClassName` | image-renderer deployment priority class | `''` | +| `imageRenderer.service.enabled` | Enable the image-renderer service | `true` | +| `imageRenderer.service.portName` | image-renderer service port name | `http` | +| `imageRenderer.service.port` | image-renderer port used by deployment | `8081` | +| `imageRenderer.service.targetPort` | image-renderer service port used by service | `8081` | +| `imageRenderer.appProtocol` | Adds the appProtocol field to the service | `` | +| `imageRenderer.grafanaSubPath` | Grafana sub path to use for image renderer callback url | `''` | +| `imageRenderer.podPortName` | name of the image-renderer port on the pod | `http` | +| `imageRenderer.revisionHistoryLimit` | number of image-renderer replica sets to keep | `10` | +| `imageRenderer.networkPolicy.limitIngress` | Enable a NetworkPolicy to limit inbound traffic from only the created grafana pods | `true` | +| `imageRenderer.networkPolicy.limitEgress` | Enable a NetworkPolicy to limit outbound traffic to only the created grafana pods | `false` | +| `imageRenderer.resources` | Set resource limits for image-renderer pods | `{}` | +| `imageRenderer.nodeSelector` | Node labels for pod assignment | `{}` | +| `imageRenderer.tolerations` | Toleration labels for pod assignment | `[]` | +| `imageRenderer.affinity` | Affinity settings for pod assignment | `{}` | +| `networkPolicy.enabled` | Enable creation of NetworkPolicy resources. | `false` | +| `networkPolicy.allowExternal` | Don't require client label for connections | `true` | +| `networkPolicy.explicitNamespacesSelector` | A Kubernetes LabelSelector to explicitly select namespaces from which traffic could be allowed | `{}` | +| `networkPolicy.ingress` | Enable the creation of an ingress network policy | `true` | +| `networkPolicy.egress.enabled` | Enable the creation of an egress network policy | `false` | +| `networkPolicy.egress.ports` | An array of ports to allow for the egress | `[]` | +| `enableKubeBackwardCompatibility` | Enable backward compatibility of kubernetes where pod's defintion version below 1.13 doesn't have the enableServiceLinks option | `false` | + +### Example ingress with path + +With grafana 6.3 and above + +```yaml +grafana.ini: + server: + domain: monitoring.example.com + root_url: "%(protocol)s://%(domain)s/grafana" + serve_from_sub_path: true +ingress: + enabled: true + hosts: + - "monitoring.example.com" + path: "/grafana" +``` + +### Example of extraVolumeMounts and extraVolumes + +Configure additional volumes with `extraVolumes` and volume mounts with `extraVolumeMounts`. + +Example for `extraVolumeMounts` and corresponding `extraVolumes`: + +```yaml +extraVolumeMounts: + - name: plugins + mountPath: /var/lib/grafana/plugins + subPath: configs/grafana/plugins + readOnly: false + - name: dashboards + mountPath: /var/lib/grafana/dashboards + hostPath: /usr/shared/grafana/dashboards + readOnly: false + +extraVolumes: + - name: plugins + existingClaim: existing-grafana-claim + - name: dashboards + hostPath: /usr/shared/grafana/dashboards +``` + +Volumes default to `emptyDir`. Set to `persistentVolumeClaim`, +`hostPath`, `csi`, or `configMap` for other types. For a +`persistentVolumeClaim`, specify an existing claim name with +`existingClaim`. + +## Import dashboards + +There are a few methods to import dashboards to Grafana. Below are some examples and explanations as to how to use each method: + +```yaml +dashboards: + default: + some-dashboard: + json: | + { + "annotations": + + ... + # Complete json file here + ... + + "title": "Some Dashboard", + "uid": "abcd1234", + "version": 1 + } + custom-dashboard: + # This is a path to a file inside the dashboards directory inside the chart directory + file: dashboards/custom-dashboard.json + prometheus-stats: + # Ref: https://grafana.com/dashboards/2 + gnetId: 2 + revision: 2 + datasource: Prometheus + loki-dashboard-quick-search: + gnetId: 12019 + revision: 2 + datasource: + - name: DS_PROMETHEUS + value: Prometheus + - name: DS_LOKI + value: Loki + local-dashboard: + url: https://raw.githubusercontent.com/user/repository/master/dashboards/dashboard.json +``` + +## BASE64 dashboards + +Dashboards could be stored on a server that does not return JSON directly and instead of it returns a Base64 encoded file (e.g. Gerrit) +A new parameter has been added to the url use case so if you specify a b64content value equals to true after the url entry a Base64 decoding is applied before save the file to disk. +If this entry is not set or is equals to false not decoding is applied to the file before saving it to disk. + +### Gerrit use case + +Gerrit API for download files has the following schema: where {project-name} and +{file-id} usually has '/' in their values and so they MUST be replaced by %2F so if project-name is user/repo, branch-id is master and file-id is equals to dir1/dir2/dashboard +the url value is + +## Sidecar for dashboards + +If the parameter `sidecar.dashboards.enabled` is set, a sidecar container is deployed in the grafana +pod. This container watches all configmaps (or secrets) in the cluster and filters out the ones with +a label as defined in `sidecar.dashboards.label`. The files defined in those configmaps are written +to a folder and accessed by grafana. Changes to the configmaps are monitored and the imported +dashboards are deleted/updated. + +A recommendation is to use one configmap per dashboard, as a reduction of multiple dashboards inside +one configmap is currently not properly mirrored in grafana. + +Example dashboard config: + +```yaml +apiVersion: v1 +kind: ConfigMap +metadata: + name: sample-grafana-dashboard + labels: + grafana_dashboard: "1" +data: + k8s-dashboard.json: |- + [...] +``` + +## Sidecar for datasources + +If the parameter `sidecar.datasources.enabled` is set, an init container is deployed in the grafana +pod. This container lists all secrets (or configmaps, though not recommended) in the cluster and +filters out the ones with a label as defined in `sidecar.datasources.label`. The files defined in +those secrets are written to a folder and accessed by grafana on startup. Using these yaml files, +the data sources in grafana can be imported. + +Should you aim for reloading datasources in Grafana each time the config is changed, set `sidecar.datasources.skipReload: false` and adjust `sidecar.datasources.reloadURL` to `http://..svc.cluster.local/api/admin/provisioning/datasources/reload`. + +Secrets are recommended over configmaps for this usecase because datasources usually contain private +data like usernames and passwords. Secrets are the more appropriate cluster resource to manage those. + +Example values to add a postgres datasource as a kubernetes secret: +```yaml +apiVersion: v1 +kind: Secret +metadata: + name: grafana-datasources + labels: + grafana_datasource: 'true' # default value for: sidecar.datasources.label +stringData: + pg-db.yaml: |- + apiVersion: 1 + datasources: + - name: My pg db datasource + type: postgres + url: my-postgresql-db:5432 + user: db-readonly-user + secureJsonData: + password: 'SUperSEcretPa$$word' + jsonData: + database: my_datase + sslmode: 'disable' # disable/require/verify-ca/verify-full + maxOpenConns: 0 # Grafana v5.4+ + maxIdleConns: 2 # Grafana v5.4+ + connMaxLifetime: 14400 # Grafana v5.4+ + postgresVersion: 1000 # 903=9.3, 904=9.4, 905=9.5, 906=9.6, 1000=10 + timescaledb: false + # allow users to edit datasources from the UI. + editable: false +``` + +Example values to add a datasource adapted from [Grafana](http://docs.grafana.org/administration/provisioning/#example-datasource-config-file): + +```yaml +datasources: + datasources.yaml: + apiVersion: 1 + datasources: + # name of the datasource. Required + - name: Graphite + # datasource type. Required + type: graphite + # access mode. proxy or direct (Server or Browser in the UI). Required + access: proxy + # org id. will default to orgId 1 if not specified + orgId: 1 + # url + url: http://localhost:8080 + # database password, if used + password: + # database user, if used + user: + # database name, if used + database: + # enable/disable basic auth + basicAuth: + # basic auth username + basicAuthUser: + # basic auth password + basicAuthPassword: + # enable/disable with credentials headers + withCredentials: + # mark as default datasource. Max one per org + isDefault: + # fields that will be converted to json and stored in json_data + jsonData: + graphiteVersion: "1.1" + tlsAuth: true + tlsAuthWithCACert: true + # json object of data that will be encrypted. + secureJsonData: + tlsCACert: "..." + tlsClientCert: "..." + tlsClientKey: "..." + version: 1 + # allow users to edit datasources from the UI. + editable: false +``` + +## Sidecar for notifiers + +If the parameter `sidecar.notifiers.enabled` is set, an init container is deployed in the grafana +pod. This container lists all secrets (or configmaps, though not recommended) in the cluster and +filters out the ones with a label as defined in `sidecar.notifiers.label`. The files defined in +those secrets are written to a folder and accessed by grafana on startup. Using these yaml files, +the notification channels in grafana can be imported. The secrets must be created before +`helm install` so that the notifiers init container can list the secrets. + +Secrets are recommended over configmaps for this usecase because alert notification channels usually contain +private data like SMTP usernames and passwords. Secrets are the more appropriate cluster resource to manage those. + +Example datasource config adapted from [Grafana](https://grafana.com/docs/grafana/latest/administration/provisioning/#alert-notification-channels): + +```yaml +notifiers: + - name: notification-channel-1 + type: slack + uid: notifier1 + # either + org_id: 2 + # or + org_name: Main Org. + is_default: true + send_reminder: true + frequency: 1h + disable_resolve_message: false + # See `Supported Settings` section for settings supporter for each + # alert notification type. + settings: + recipient: 'XXX' + token: 'xoxb' + uploadImage: true + url: https://slack.com + +delete_notifiers: + - name: notification-channel-1 + uid: notifier1 + org_id: 2 + - name: notification-channel-2 + # default org_id: 1 +``` + +## Sidecar for alerting resources + +If the parameter `sidecar.alerts.enabled` is set, a sidecar container is deployed in the grafana +pod. This container watches all configmaps (or secrets) in the cluster (namespace defined by `sidecar.alerts.searchNamespace`) and filters out the ones with +a label as defined in `sidecar.alerts.label` (default is `grafana_alert`). The files defined in those configmaps are written +to a folder and accessed by grafana. Changes to the configmaps are monitored and the imported alerting resources are updated, however, deletions are a little more complicated (see below). + +This sidecar can be used to provision alert rules, contact points, notification policies, notification templates and mute timings as shown in [Grafana Documentation](https://grafana.com/docs/grafana/next/alerting/set-up/provision-alerting-resources/file-provisioning/). + +To fetch the alert config which will be provisioned, use the alert provisioning API ([Grafana Documentation](https://grafana.com/docs/grafana/next/developers/http_api/alerting_provisioning/)). +You can use either JSON or YAML format. + +Example config for an alert rule: + +```yaml +apiVersion: v1 +kind: ConfigMap +metadata: + name: sample-grafana-alert + labels: + grafana_alert: "1" +data: + k8s-alert.yml: |- + apiVersion: 1 + groups: + - orgId: 1 + name: k8s-alert + [...] +``` + +To delete provisioned alert rules is a two step process, you need to delete the configmap which defined the alert rule +and then create a configuration which deletes the alert rule. + +Example deletion configuration: +```yaml +apiVersion: v1 +kind: ConfigMap +metadata: + name: delete-sample-grafana-alert + namespace: monitoring + labels: + grafana_alert: "1" +data: + delete-k8s-alert.yml: |- + apiVersion: 1 + deleteRules: + - orgId: 1 + uid: 16624780-6564-45dc-825c-8bded4ad92d3 +``` + +## Statically provision alerting resources +If you don't need to change alerting resources (alert rules, contact points, notification policies and notification templates) regularly you could use the `alerting` config option instead of the sidecar option above. +This will grab the alerting config and apply it statically at build time for the helm file. + +There are two methods to statically provision alerting configuration in Grafana. Below are some examples and explanations as to how to use each method: + +```yaml +alerting: + team1-alert-rules.yaml: + file: alerting/team1/rules.yaml + team2-alert-rules.yaml: + file: alerting/team2/rules.yaml + team3-alert-rules.yaml: + file: alerting/team3/rules.yaml + notification-policies.yaml: + file: alerting/shared/notification-policies.yaml + notification-templates.yaml: + file: alerting/shared/notification-templates.yaml + contactpoints.yaml: + apiVersion: 1 + contactPoints: + - orgId: 1 + name: Slack channel + receivers: + - uid: default-receiver + type: slack + settings: + # Webhook URL to be filled in + url: "" + # We need to escape double curly braces for the tpl function. + text: '{{ `{{ template "default.message" . }}` }}' + title: '{{ `{{ template "default.title" . }}` }}' +``` + +The two possibilities for static alerting resource provisioning are: + +* Inlining the file contents as shown for contact points in the above example. +* Importing a file using a relative path starting from the chart root directory as shown for the alert rules in the above example. + +### Important notes on file provisioning + +* The format of the files is defined in the [Grafana documentation](https://grafana.com/docs/grafana/next/alerting/set-up/provision-alerting-resources/file-provisioning/) on file provisioning. +* The chart supports importing YAML and JSON files. +* The filename must be unique, otherwise one volume mount will overwrite the other. +* In case of inlining, double curly braces that arise from the Grafana configuration format and are not intended as templates for the chart must be escaped. +* The number of total files under `alerting:` is not limited. Each file will end up as a volume mount in the corresponding provisioning folder of the deployed Grafana instance. +* The file size for each import is limited by what the function `.Files.Get` can handle, which suffices for most cases. + +## How to serve Grafana with a path prefix (/grafana) + +In order to serve Grafana with a prefix (e.g., ), add the following to your values.yaml. + +```yaml +ingress: + enabled: true + annotations: + kubernetes.io/ingress.class: "nginx" + nginx.ingress.kubernetes.io/rewrite-target: /$1 + nginx.ingress.kubernetes.io/use-regex: "true" + + path: /grafana/?(.*) + hosts: + - k8s.example.dev + +grafana.ini: + server: + root_url: http://localhost:3000/grafana # this host can be localhost +``` + +## How to securely reference secrets in grafana.ini + +This example uses Grafana [file providers](https://grafana.com/docs/grafana/latest/administration/configuration/#file-provider) for secret values and the `extraSecretMounts` configuration flag (Additional grafana server secret mounts) to mount the secrets. + +In grafana.ini: + +```yaml +grafana.ini: + [auth.generic_oauth] + enabled = true + client_id = $__file{/etc/secrets/auth_generic_oauth/client_id} + client_secret = $__file{/etc/secrets/auth_generic_oauth/client_secret} +``` + +Existing secret, or created along with helm: + +```yaml +--- +apiVersion: v1 +kind: Secret +metadata: + name: auth-generic-oauth-secret +type: Opaque +stringData: + client_id: + client_secret: +``` + +Include in the `extraSecretMounts` configuration flag: + +```yaml +- extraSecretMounts: + - name: auth-generic-oauth-secret-mount + secretName: auth-generic-oauth-secret + defaultMode: 0440 + mountPath: /etc/secrets/auth_generic_oauth + readOnly: true +``` + +### extraSecretMounts using a Container Storage Interface (CSI) provider + +This example uses a CSI driver e.g. retrieving secrets using [Azure Key Vault Provider](https://github.com/Azure/secrets-store-csi-driver-provider-azure) + +```yaml +- extraSecretMounts: + - name: secrets-store-inline + mountPath: /run/secrets + readOnly: true + csi: + driver: secrets-store.csi.k8s.io + readOnly: true + volumeAttributes: + secretProviderClass: "my-provider" + nodePublishSecretRef: + name: akv-creds +``` + +## Image Renderer Plug-In + +This chart supports enabling [remote image rendering](https://github.com/grafana/grafana-image-renderer/blob/master/README.md#run-in-docker) + +```yaml +imageRenderer: + enabled: true +``` + +### Image Renderer NetworkPolicy + +By default the image-renderer pods will have a network policy which only allows ingress traffic from the created grafana instance + +### High Availability for unified alerting + +If you want to run Grafana in a high availability cluster you need to enable +the headless service by setting `headlessService: true` in your `values.yaml` +file. + +As next step you have to setup the `grafana.ini` in your `values.yaml` in a way +that it will make use of the headless service to obtain all the IPs of the +cluster. You should replace ``{{ Name }}`` with the name of your helm deployment. + +```yaml +grafana.ini: + ... + unified_alerting: + enabled: true + ha_peers: {{ Name }}-headless:9094 + ha_listen_address: ${POD_IP}:9094 + ha_advertise_address: ${POD_IP}:9094 + + alerting: + enabled: false +``` diff --git a/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/grafana/dashboards/custom-dashboard.json b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/grafana/dashboards/custom-dashboard.json new file mode 100644 index 0000000000..9e26dfeeb6 --- /dev/null +++ b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/grafana/dashboards/custom-dashboard.json @@ -0,0 +1 @@ +{} \ No newline at end of file diff --git a/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/grafana/templates/NOTES.txt b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/grafana/templates/NOTES.txt new file mode 100644 index 0000000000..d86419fe23 --- /dev/null +++ b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/grafana/templates/NOTES.txt @@ -0,0 +1,55 @@ +1. Get your '{{ .Values.adminUser }}' user password by running: + + kubectl get secret --namespace {{ include "grafana.namespace" . }} {{ .Values.admin.existingSecret | default (include "grafana.fullname" .) }} -o jsonpath="{.data.{{ .Values.admin.passwordKey | default "admin-password" }}}" | base64 --decode ; echo + + +2. The Grafana server can be accessed via port {{ .Values.service.port }} on the following DNS name from within your cluster: + + {{ include "grafana.fullname" . }}.{{ include "grafana.namespace" . }}.svc.cluster.local +{{ if .Values.ingress.enabled }} + If you bind grafana to 80, please update values in values.yaml and reinstall: + ``` + securityContext: + runAsUser: 0 + runAsGroup: 0 + fsGroup: 0 + + command: + - "setcap" + - "'cap_net_bind_service=+ep'" + - "/usr/sbin/grafana-server &&" + - "sh" + - "/run.sh" + ``` + Details refer to https://grafana.com/docs/installation/configuration/#http-port. + Or grafana would always crash. + + From outside the cluster, the server URL(s) are: + {{- range .Values.ingress.hosts }} + http://{{ . }} + {{- end }} +{{- else }} + Get the Grafana URL to visit by running these commands in the same shell: + {{- if contains "NodePort" .Values.service.type }} + export NODE_PORT=$(kubectl get --namespace {{ include "grafana.namespace" . }} -o jsonpath="{.spec.ports[0].nodePort}" services {{ include "grafana.fullname" . }}) + export NODE_IP=$(kubectl get nodes --namespace {{ include "grafana.namespace" . }} -o jsonpath="{.items[0].status.addresses[0].address}") + echo http://$NODE_IP:$NODE_PORT + {{- else if contains "LoadBalancer" .Values.service.type }} + NOTE: It may take a few minutes for the LoadBalancer IP to be available. + You can watch the status of by running 'kubectl get svc --namespace {{ include "grafana.namespace" . }} -w {{ include "grafana.fullname" . }}' + export SERVICE_IP=$(kubectl get svc --namespace {{ include "grafana.namespace" . }} {{ include "grafana.fullname" . }} -o jsonpath='{.status.loadBalancer.ingress[0].ip}') + http://$SERVICE_IP:{{ .Values.service.port -}} + {{- else if contains "ClusterIP" .Values.service.type }} + export POD_NAME=$(kubectl get pods --namespace {{ include "grafana.namespace" . }} -l "app.kubernetes.io/name={{ include "grafana.name" . }},app.kubernetes.io/instance={{ .Release.Name }}" -o jsonpath="{.items[0].metadata.name}") + kubectl --namespace {{ include "grafana.namespace" . }} port-forward $POD_NAME 3000 + {{- end }} +{{- end }} + +3. Login with the password from step 1 and the username: {{ .Values.adminUser }} + +{{- if not .Values.persistence.enabled }} +################################################################################# +###### WARNING: Persistence is disabled!!! You will lose your data when ##### +###### the Grafana pod is terminated. ##### +################################################################################# +{{- end }} diff --git a/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/grafana/templates/_config.tpl b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/grafana/templates/_config.tpl new file mode 100644 index 0000000000..19df19cd2a --- /dev/null +++ b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/grafana/templates/_config.tpl @@ -0,0 +1,171 @@ +{{/* + Generate config map data + */}} +{{- define "grafana.configData" -}} +{{ include "grafana.assertNoLeakedSecrets" . }} +{{- $files := .Files }} +{{- $root := . -}} +{{- with .Values.plugins }} +plugins: {{ join "," . }} +{{- end }} +grafana.ini: | +{{- range $elem, $elemVal := index .Values "grafana.ini" }} + {{- if not (kindIs "map" $elemVal) }} + {{- if kindIs "invalid" $elemVal }} + {{ $elem }} = + {{- else if kindIs "string" $elemVal }} + {{ $elem }} = {{ tpl $elemVal $ }} + {{- else }} + {{ $elem }} = {{ $elemVal }} + {{- end }} + {{- end }} +{{- end }} +{{- range $key, $value := index .Values "grafana.ini" }} + {{- if kindIs "map" $value }} + [{{ $key }}] + {{- range $elem, $elemVal := $value }} + {{- if kindIs "invalid" $elemVal }} + {{ $elem }} = + {{- else if kindIs "string" $elemVal }} + {{ $elem }} = {{ tpl $elemVal $ }} + {{- else }} + {{ $elem }} = {{ $elemVal }} + {{- end }} + {{- end }} + {{- end }} +{{- end }} + +{{- range $key, $value := .Values.datasources }} +{{- if not (hasKey $value "secret") }} +{{ $key }}: | + {{- tpl (toYaml $value | nindent 2) $root }} +{{- end }} +{{- end }} + +{{- range $key, $value := .Values.notifiers }} +{{- if not (hasKey $value "secret") }} +{{ $key }}: | + {{- toYaml $value | nindent 2 }} +{{- end }} +{{- end }} + +{{- range $key, $value := .Values.alerting }} +{{- if (hasKey $value "file") }} +{{ $key }}: +{{- toYaml ( $files.Get $value.file ) | nindent 2 }} +{{- else if (or (hasKey $value "secret") (hasKey $value "secretFile"))}} +{{/* will be stored inside secret generated by "configSecret.yaml"*/}} +{{- else }} +{{ $key }}: | + {{- tpl (toYaml $value | nindent 2) $root }} +{{- end }} +{{- end }} + +{{- range $key, $value := .Values.dashboardProviders }} +{{ $key }}: | + {{- toYaml $value | nindent 2 }} +{{- end }} + +{{- if .Values.dashboards }} +download_dashboards.sh: | + #!/usr/bin/env sh + set -euf + {{- if .Values.dashboardProviders }} + {{- range $key, $value := .Values.dashboardProviders }} + {{- range $value.providers }} + mkdir -p {{ .options.path }} + {{- end }} + {{- end }} + {{- end }} +{{ $dashboardProviders := .Values.dashboardProviders }} +{{- range $provider, $dashboards := .Values.dashboards }} + {{- range $key, $value := $dashboards }} + {{- if (or (hasKey $value "gnetId") (hasKey $value "url")) }} + curl -skf \ + --connect-timeout 60 \ + --max-time 60 \ + {{- if not $value.b64content }} + {{- if not $value.acceptHeader }} + -H "Accept: application/json" \ + {{- else }} + -H "Accept: {{ $value.acceptHeader }}" \ + {{- end }} + {{- if $value.token }} + -H "Authorization: token {{ $value.token }}" \ + {{- end }} + {{- if $value.bearerToken }} + -H "Authorization: Bearer {{ $value.bearerToken }}" \ + {{- end }} + {{- if $value.basic }} + -H "Authorization: Basic {{ $value.basic }}" \ + {{- end }} + {{- if $value.gitlabToken }} + -H "PRIVATE-TOKEN: {{ $value.gitlabToken }}" \ + {{- end }} + -H "Content-Type: application/json;charset=UTF-8" \ + {{- end }} + {{- $dpPath := "" -}} + {{- range $kd := (index $dashboardProviders "dashboardproviders.yaml").providers }} + {{- if eq $kd.name $provider }} + {{- $dpPath = $kd.options.path }} + {{- end }} + {{- end }} + {{- if $value.url }} + "{{ $value.url }}" \ + {{- else }} + "https://grafana.com/api/dashboards/{{ $value.gnetId }}/revisions/{{- if $value.revision -}}{{ $value.revision }}{{- else -}}1{{- end -}}/download" \ + {{- end }} + {{- if $value.datasource }} + {{- if kindIs "string" $value.datasource }} + | sed '/-- .* --/! s/"datasource":.*,/"datasource": "{{ $value.datasource }}",/g' \ + {{- end }} + {{- if kindIs "slice" $value.datasource }} + {{- range $value.datasource }} + | sed '/-- .* --/! s/${{"{"}}{{ .name }}}/{{ .value }}/g' \ + {{- end }} + {{- end }} + {{- end }} + {{- if $value.b64content }} + | base64 -d \ + {{- end }} + > "{{- if $dpPath -}}{{ $dpPath }}{{- else -}}/var/lib/grafana/dashboards/{{ $provider }}{{- end -}}/{{ $key }}.json" + {{ end }} + {{- end }} +{{- end }} +{{- end }} +{{- end -}} + +{{/* + Generate dashboard json config map data + */}} +{{- define "grafana.configDashboardProviderData" -}} +provider.yaml: |- + apiVersion: 1 + providers: + - name: '{{ .Values.sidecar.dashboards.provider.name }}' + orgId: {{ .Values.sidecar.dashboards.provider.orgid }} + {{- if not .Values.sidecar.dashboards.provider.foldersFromFilesStructure }} + folder: '{{ .Values.sidecar.dashboards.provider.folder }}' + {{- end }} + type: {{ .Values.sidecar.dashboards.provider.type }} + disableDeletion: {{ .Values.sidecar.dashboards.provider.disableDelete }} + allowUiUpdates: {{ .Values.sidecar.dashboards.provider.allowUiUpdates }} + updateIntervalSeconds: {{ .Values.sidecar.dashboards.provider.updateIntervalSeconds | default 30 }} + options: + foldersFromFilesStructure: {{ .Values.sidecar.dashboards.provider.foldersFromFilesStructure }} + path: {{ .Values.sidecar.dashboards.folder }}{{- with .Values.sidecar.dashboards.defaultFolderName }}/{{ . }}{{- end }} +{{- end -}} + +{{- define "grafana.secretsData" -}} +{{- if and (not .Values.env.GF_SECURITY_DISABLE_INITIAL_ADMIN_CREATION) (not .Values.admin.existingSecret) (not .Values.env.GF_SECURITY_ADMIN_PASSWORD__FILE) (not .Values.env.GF_SECURITY_ADMIN_PASSWORD) }} +admin-user: {{ .Values.adminUser | b64enc | quote }} +{{- if .Values.adminPassword }} +admin-password: {{ .Values.adminPassword | b64enc | quote }} +{{- else }} +admin-password: {{ include "grafana.password" . }} +{{- end }} +{{- end }} +{{- if not .Values.ldap.existingSecret }} +ldap-toml: {{ tpl .Values.ldap.config $ | b64enc | quote }} +{{- end }} +{{- end -}} diff --git a/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/grafana/templates/_helpers.tpl b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/grafana/templates/_helpers.tpl new file mode 100644 index 0000000000..68d2d815d8 --- /dev/null +++ b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/grafana/templates/_helpers.tpl @@ -0,0 +1,305 @@ +# Rancher +{{- define "system_default_registry" -}} +{{- if .Values.global.cattle.systemDefaultRegistry -}} +{{- printf "%s/" .Values.global.cattle.systemDefaultRegistry -}} +{{- end -}} +{{- end -}} + +# Windows Support + +{{/* +Windows cluster will add default taint for linux nodes, +add below linux tolerations to workloads could be scheduled to those linux nodes +*/}} + +{{- define "linux-node-tolerations" -}} +- key: "cattle.io/os" + value: "linux" + effect: "NoSchedule" + operator: "Equal" +{{- end -}} + +{{- define "linux-node-selector" -}} +{{- if semverCompare "<1.14-0" .Capabilities.KubeVersion.GitVersion -}} +beta.kubernetes.io/os: linux +{{- else -}} +kubernetes.io/os: linux +{{- end -}} +{{- end -}} + +{{/* vim: set filetype=mustache: */}} +{{/* +Expand the name of the chart. +*/}} +{{- define "grafana.name" -}} +{{- default .Chart.Name .Values.nameOverride | trunc 63 | trimSuffix "-" }} +{{- end }} + +{{/* +Create a default fully qualified app name. +We truncate at 63 chars because some Kubernetes name fields are limited to this (by the DNS naming spec). +If release name contains chart name it will be used as a full name. +*/}} +{{- define "grafana.fullname" -}} +{{- if .Values.fullnameOverride }} +{{- .Values.fullnameOverride | trunc 63 | trimSuffix "-" }} +{{- else }} +{{- $name := default .Chart.Name .Values.nameOverride }} +{{- if contains $name .Release.Name }} +{{- .Release.Name | trunc 63 | trimSuffix "-" }} +{{- else }} +{{- printf "%s-%s" .Release.Name $name | trunc 63 | trimSuffix "-" }} +{{- end }} +{{- end }} +{{- end }} + +{{/* +Create chart name and version as used by the chart label. +*/}} +{{- define "grafana.chart" -}} +{{- printf "%s-%s" .Chart.Name .Chart.Version | replace "+" "_" | trunc 63 | trimSuffix "-" }} +{{- end }} + +{{/* +Create the name of the service account +*/}} +{{- define "grafana.serviceAccountName" -}} +{{- if .Values.serviceAccount.create }} +{{- default (include "grafana.fullname" .) .Values.serviceAccount.name }} +{{- else }} +{{- default "default" .Values.serviceAccount.name }} +{{- end }} +{{- end }} + +{{- define "grafana.serviceAccountNameTest" -}} +{{- if .Values.serviceAccount.create }} +{{- default (print (include "grafana.fullname" .) "-test") .Values.serviceAccount.nameTest }} +{{- else }} +{{- default "default" .Values.serviceAccount.nameTest }} +{{- end }} +{{- end }} + +{{/* +Allow the release namespace to be overridden for multi-namespace deployments in combined charts +*/}} +{{- define "grafana.namespace" -}} +{{- if .Values.namespaceOverride }} +{{- .Values.namespaceOverride }} +{{- else }} +{{- .Release.Namespace }} +{{- end }} +{{- end }} + +{{/* +Common labels +*/}} +{{- define "grafana.labels" -}} +helm.sh/chart: {{ include "grafana.chart" . }} +{{ include "grafana.selectorLabels" . }} +{{- if or .Chart.AppVersion .Values.image.tag }} +app.kubernetes.io/version: {{ mustRegexReplaceAllLiteral "@sha.*" .Values.image.tag "" | default .Chart.AppVersion | quote }} +{{- end }} +app.kubernetes.io/managed-by: {{ .Release.Service }} +{{- with .Values.extraLabels }} +{{ toYaml . }} +{{- end }} +{{- end }} + +{{/* +Selector labels +*/}} +{{- define "grafana.selectorLabels" -}} +app.kubernetes.io/name: {{ include "grafana.name" . }} +app.kubernetes.io/instance: {{ .Release.Name }} +{{- end }} + +{{/* +Common labels +*/}} +{{- define "grafana.imageRenderer.labels" -}} +helm.sh/chart: {{ include "grafana.chart" . }} +{{ include "grafana.imageRenderer.selectorLabels" . }} +{{- if or .Chart.AppVersion .Values.image.tag }} +app.kubernetes.io/version: {{ mustRegexReplaceAllLiteral "@sha.*" .Values.image.tag "" | default .Chart.AppVersion | quote }} +{{- end }} +app.kubernetes.io/managed-by: {{ .Release.Service }} +{{- end }} + +{{/* +Selector labels ImageRenderer +*/}} +{{- define "grafana.imageRenderer.selectorLabels" -}} +app.kubernetes.io/name: {{ include "grafana.name" . }}-image-renderer +app.kubernetes.io/instance: {{ .Release.Name }} +{{- end }} + +{{/* +Looks if there's an existing secret and reuse its password. If not it generates +new password and use it. +*/}} +{{- define "grafana.password" -}} +{{- $secret := (lookup "v1" "Secret" (include "grafana.namespace" .) (include "grafana.fullname" .) ) }} +{{- if $secret }} +{{- index $secret "data" "admin-password" }} +{{- else }} +{{- (randAlphaNum 40) | b64enc | quote }} +{{- end }} +{{- end }} + +{{/* +Return the appropriate apiVersion for rbac. +*/}} +{{- define "grafana.rbac.apiVersion" -}} +{{- if $.Capabilities.APIVersions.Has "rbac.authorization.k8s.io/v1" }} +{{- print "rbac.authorization.k8s.io/v1" }} +{{- else }} +{{- print "rbac.authorization.k8s.io/v1beta1" }} +{{- end }} +{{- end }} + +{{/* +Return the appropriate apiVersion for ingress. +*/}} +{{- define "grafana.ingress.apiVersion" -}} +{{- if and ($.Capabilities.APIVersions.Has "networking.k8s.io/v1") (semverCompare ">= 1.19-0" .Capabilities.KubeVersion.Version) }} +{{- print "networking.k8s.io/v1" }} +{{- else if $.Capabilities.APIVersions.Has "networking.k8s.io/v1beta1" }} +{{- print "networking.k8s.io/v1beta1" }} +{{- else }} +{{- print "extensions/v1beta1" }} +{{- end }} +{{- end }} + +{{/* +Return the appropriate apiVersion for Horizontal Pod Autoscaler. +*/}} +{{- define "grafana.hpa.apiVersion" -}} +{{- if .Capabilities.APIVersions.Has "autoscaling/v2" }} +{{- print "autoscaling/v2" }} +{{- else }} +{{- print "autoscaling/v2beta2" }} +{{- end }} +{{- end }} + +{{/* +Return the appropriate apiVersion for podDisruptionBudget. +*/}} +{{- define "grafana.podDisruptionBudget.apiVersion" -}} +{{- if $.Values.podDisruptionBudget.apiVersion }} +{{- print $.Values.podDisruptionBudget.apiVersion }} +{{- else if $.Capabilities.APIVersions.Has "policy/v1/PodDisruptionBudget" }} +{{- print "policy/v1" }} +{{- else }} +{{- print "policy/v1beta1" }} +{{- end }} +{{- end }} + +{{/* +Return if ingress is stable. +*/}} +{{- define "grafana.ingress.isStable" -}} +{{- eq (include "grafana.ingress.apiVersion" .) "networking.k8s.io/v1" }} +{{- end }} + +{{/* +Return if ingress supports ingressClassName. +*/}} +{{- define "grafana.ingress.supportsIngressClassName" -}} +{{- or (eq (include "grafana.ingress.isStable" .) "true") (and (eq (include "grafana.ingress.apiVersion" .) "networking.k8s.io/v1beta1") (semverCompare ">= 1.18-0" .Capabilities.KubeVersion.Version)) }} +{{- end }} + +{{/* +Return if ingress supports pathType. +*/}} +{{- define "grafana.ingress.supportsPathType" -}} +{{- or (eq (include "grafana.ingress.isStable" .) "true") (and (eq (include "grafana.ingress.apiVersion" .) "networking.k8s.io/v1beta1") (semverCompare ">= 1.18-0" .Capabilities.KubeVersion.Version)) }} +{{- end }} + +{{/* +Formats imagePullSecrets. Input is (dict "root" . "imagePullSecrets" .{specific imagePullSecrets}) +*/}} +{{- define "grafana.imagePullSecrets" -}} +{{- $root := .root }} +{{- range (concat .root.Values.global.imagePullSecrets .imagePullSecrets) }} +{{- if eq (typeOf .) "map[string]interface {}" }} +- {{ toYaml (dict "name" (tpl .name $root)) | trim }} +{{- else }} +- name: {{ tpl . $root }} +{{- end }} +{{- end }} +{{- end }} + + +{{/* + Checks whether or not the configSecret secret has to be created + */}} +{{- define "grafana.shouldCreateConfigSecret" -}} +{{- $secretFound := false -}} +{{- range $key, $value := .Values.datasources }} + {{- if hasKey $value "secret" }} + {{- $secretFound = true}} + {{- end }} +{{- end }} +{{- range $key, $value := .Values.notifiers }} + {{- if hasKey $value "secret" }} + {{- $secretFound = true}} + {{- end }} +{{- end }} +{{- range $key, $value := .Values.alerting }} + {{- if (or (hasKey $value "secret") (hasKey $value "secretFile")) }} + {{- $secretFound = true}} + {{- end }} +{{- end }} +{{- $secretFound}} +{{- end -}} + +{{/* + Checks whether the user is attempting to store secrets in plaintext + in the grafana.ini configmap +*/}} +{{/* grafana.assertNoLeakedSecrets checks for sensitive keys in values */}} +{{- define "grafana.assertNoLeakedSecrets" -}} + {{- $sensitiveKeysYaml := ` +sensitiveKeys: +- path: ["database", "password"] +- path: ["smtp", "password"] +- path: ["security", "secret_key"] +- path: ["security", "admin_password"] +- path: ["auth.basic", "password"] +- path: ["auth.ldap", "bind_password"] +- path: ["auth.google", "client_secret"] +- path: ["auth.github", "client_secret"] +- path: ["auth.gitlab", "client_secret"] +- path: ["auth.generic_oauth", "client_secret"] +- path: ["auth.okta", "client_secret"] +- path: ["auth.azuread", "client_secret"] +- path: ["auth.grafana_com", "client_secret"] +- path: ["auth.grafananet", "client_secret"] +- path: ["azure", "user_identity_client_secret"] +- path: ["unified_alerting", "ha_redis_password"] +- path: ["metrics", "basic_auth_password"] +- path: ["external_image_storage.s3", "secret_key"] +- path: ["external_image_storage.webdav", "password"] +- path: ["external_image_storage.azure_blob", "account_key"] +` | fromYaml -}} + {{- if $.Values.assertNoLeakedSecrets -}} + {{- $grafanaIni := index .Values "grafana.ini" -}} + {{- range $_, $secret := $sensitiveKeysYaml.sensitiveKeys -}} + {{- $currentMap := $grafanaIni -}} + {{- $shouldContinue := true -}} + {{- range $index, $elem := $secret.path -}} + {{- if and $shouldContinue (hasKey $currentMap $elem) -}} + {{- if eq (len $secret.path) (add1 $index) -}} + {{- if not (regexMatch "\\$(?:__(?:env|file|vault))?{[^}]+}" (index $currentMap $elem)) -}} + {{- fail (printf "Sensitive key '%s' should not be defined explicitly in values. Use variable expansion instead. You can disable this client-side validation by changing the value of assertNoLeakedSecrets." (join "." $secret.path)) -}} + {{- end -}} + {{- else -}} + {{- $currentMap = index $currentMap $elem -}} + {{- end -}} + {{- else -}} + {{- $shouldContinue = false -}} + {{- end -}} + {{- end -}} + {{- end -}} + {{- end -}} +{{- end -}} diff --git a/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/grafana/templates/_pod.tpl b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/grafana/templates/_pod.tpl new file mode 100644 index 0000000000..2ebf7d5f10 --- /dev/null +++ b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/grafana/templates/_pod.tpl @@ -0,0 +1,1296 @@ +{{- define "grafana.pod" -}} +{{- $sts := list "sts" "StatefulSet" "statefulset" -}} +{{- $root := . -}} +{{- with .Values.schedulerName }} +schedulerName: "{{ . }}" +{{- end }} +serviceAccountName: {{ include "grafana.serviceAccountName" . }} +automountServiceAccountToken: {{ .Values.automountServiceAccountToken }} +{{- with .Values.securityContext }} +securityContext: + {{- toYaml . | nindent 2 }} +{{- end }} +{{- with .Values.hostAliases }} +hostAliases: + {{- toYaml . | nindent 2 }} +{{- end }} +{{- if .Values.dnsPolicy }} +dnsPolicy: {{ .Values.dnsPolicy }} +{{- end }} +{{- with .Values.dnsConfig }} +dnsConfig: + {{- toYaml . | nindent 2 }} +{{- end }} +{{- with .Values.priorityClassName }} +priorityClassName: {{ . }} +{{- end }} +{{- if ( or .Values.persistence.enabled .Values.dashboards .Values.extraInitContainers (and .Values.sidecar.alerts.enabled .Values.sidecar.alerts.initAlerts) (and .Values.sidecar.datasources.enabled .Values.sidecar.datasources.initDatasources) (and .Values.sidecar.notifiers.enabled .Values.sidecar.notifiers.initNotifiers)) }} +initContainers: +{{- end }} +{{- if ( and .Values.persistence.enabled .Values.initChownData.enabled ) }} + - name: init-chown-data + {{- $registry := include "system_default_registry" . | default .Values.initChownData.image.registry -}} + {{- if .Values.initChownData.image.sha }} + image: "{{ $registry }}{{ .Values.initChownData.image.repository }}:{{ .Values.initChownData.image.tag }}@sha256:{{ .Values.initChownData.image.sha }}" + {{- else }} + image: "{{ $registry }}{{ .Values.initChownData.image.repository }}:{{ .Values.initChownData.image.tag }}" + {{- end }} + imagePullPolicy: {{ .Values.initChownData.image.pullPolicy }} + {{- with .Values.initChownData.securityContext }} + securityContext: + {{- toYaml . | nindent 6 }} + {{- end }} + command: + - chown + - -R + - {{ .Values.securityContext.runAsUser }}:{{ .Values.securityContext.runAsGroup }} + - /var/lib/grafana + {{- with .Values.initChownData.resources }} + resources: + {{- toYaml . | nindent 6 }} + {{- end }} + volumeMounts: + - name: storage + mountPath: "/var/lib/grafana" + {{- with .Values.persistence.subPath }} + subPath: {{ tpl . $root }} + {{- end }} +{{- end }} +{{- if .Values.dashboards }} + - name: download-dashboards + {{- $registry := include "system_default_registry" . | default .Values.downloadDashboardsImage.registry -}} + {{- if .Values.downloadDashboardsImage.sha }} + image: "{{ $registry }}{{ .Values.downloadDashboardsImage.repository }}:{{ .Values.downloadDashboardsImage.tag }}@sha256:{{ .Values.downloadDashboardsImage.sha }}" + {{- else }} + image: "{{ $registry }}{{ .Values.downloadDashboardsImage.repository }}:{{ .Values.downloadDashboardsImage.tag }}" + {{- end }} + imagePullPolicy: {{ .Values.downloadDashboardsImage.pullPolicy }} + command: ["/bin/sh"] + args: [ "-c", "mkdir -p /var/lib/grafana/dashboards/default && /bin/sh -x /etc/grafana/download_dashboards.sh" ] + {{- with .Values.downloadDashboards.resources }} + resources: + {{- toYaml . | nindent 6 }} + {{- end }} + env: + {{- range $key, $value := .Values.downloadDashboards.env }} + - name: "{{ $key }}" + value: "{{ $value }}" + {{- end }} + {{- range $key, $value := .Values.downloadDashboards.envValueFrom }} + - name: {{ $key | quote }} + valueFrom: + {{- tpl (toYaml $value) $ | nindent 10 }} + {{- end }} + {{- with .Values.downloadDashboards.securityContext }} + securityContext: + {{- toYaml . | nindent 6 }} + {{- end }} + {{- with .Values.downloadDashboards.envFromSecret }} + envFrom: + - secretRef: + name: {{ tpl . $root }} + {{- end }} + volumeMounts: + - name: config + mountPath: "/etc/grafana/download_dashboards.sh" + subPath: download_dashboards.sh + - name: storage + mountPath: "/var/lib/grafana" + {{- with .Values.persistence.subPath }} + subPath: {{ tpl . $root }} + {{- end }} + {{- range .Values.extraSecretMounts }} + - name: {{ .name }} + mountPath: {{ .mountPath }} + readOnly: {{ .readOnly }} + {{- end }} +{{- end }} +{{- if and .Values.sidecar.alerts.enabled .Values.sidecar.alerts.initAlerts }} + - name: {{ include "grafana.name" . }}-init-sc-alerts + {{- $registry := include "system_default_registry" . | default .Values.sidecar.image.registry -}} + {{- if .Values.sidecar.image.sha }} + image: "{{ $registry }}{{ .Values.sidecar.image.repository }}:{{ .Values.sidecar.image.tag }}@sha256:{{ .Values.sidecar.image.sha }}" + {{- else }} + image: "{{ $registry }}{{ .Values.sidecar.image.repository }}:{{ .Values.sidecar.image.tag }}" + {{- end }} + imagePullPolicy: {{ .Values.sidecar.imagePullPolicy }} + env: + {{- range $key, $value := .Values.sidecar.alerts.env }} + - name: "{{ $key }}" + value: "{{ $value }}" + {{- end }} + {{- if .Values.sidecar.alerts.ignoreAlreadyProcessed }} + - name: IGNORE_ALREADY_PROCESSED + value: "true" + {{- end }} + - name: METHOD + value: "LIST" + - name: LABEL + value: "{{ .Values.sidecar.alerts.label }}" + {{- with .Values.sidecar.alerts.labelValue }} + - name: LABEL_VALUE + value: {{ quote . }} + {{- end }} + {{- if or .Values.sidecar.logLevel .Values.sidecar.alerts.logLevel }} + - name: LOG_LEVEL + value: {{ default .Values.sidecar.logLevel .Values.sidecar.alerts.logLevel }} + {{- end }} + - name: FOLDER + value: "/etc/grafana/provisioning/alerting" + - name: RESOURCE + value: {{ quote .Values.sidecar.alerts.resource }} + {{- with .Values.sidecar.enableUniqueFilenames }} + - name: UNIQUE_FILENAMES + value: "{{ . }}" + {{- end }} + {{- with .Values.sidecar.alerts.searchNamespace }} + - name: NAMESPACE + value: {{ . | join "," | quote }} + {{- end }} + {{- with .Values.sidecar.alerts.skipTlsVerify }} + - name: SKIP_TLS_VERIFY + value: {{ quote . }} + {{- end }} + {{- with .Values.sidecar.alerts.script }} + - name: SCRIPT + value: {{ quote . }} + {{- end }} + {{- with .Values.sidecar.livenessProbe }} + livenessProbe: + {{- toYaml . | nindent 6 }} + {{- end }} + {{- with .Values.sidecar.readinessProbe }} + readinessProbe: + {{- toYaml . | nindent 6 }} + {{- end }} + {{- with .Values.sidecar.resources }} + resources: + {{- toYaml . | nindent 6 }} + {{- end }} + {{- with .Values.sidecar.securityContext }} + securityContext: + {{- toYaml . | nindent 6 }} + {{- end }} + volumeMounts: + - name: sc-alerts-volume + mountPath: "/etc/grafana/provisioning/alerting" + {{- with .Values.sidecar.alerts.extraMounts }} + {{- toYaml . | trim | nindent 6 }} + {{- end }} +{{- end }} +{{- if and .Values.sidecar.datasources.enabled .Values.sidecar.datasources.initDatasources }} + - name: {{ include "grafana.name" . }}-init-sc-datasources + {{- $registry := include "system_default_registry" . | default .Values.sidecar.image.registry -}} + {{- if .Values.sidecar.image.sha }} + image: "{{ $registry }}{{ .Values.sidecar.image.repository }}:{{ .Values.sidecar.image.tag }}@sha256:{{ .Values.sidecar.image.sha }}" + {{- else }} + image: "{{ $registry }}{{ .Values.sidecar.image.repository }}:{{ .Values.sidecar.image.tag }}" + {{- end }} + imagePullPolicy: {{ .Values.sidecar.imagePullPolicy }} + env: + {{- range $key, $value := .Values.sidecar.datasources.env }} + - name: "{{ $key }}" + value: "{{ $value }}" + {{- end }} + {{- if .Values.sidecar.datasources.ignoreAlreadyProcessed }} + - name: IGNORE_ALREADY_PROCESSED + value: "true" + {{- end }} + - name: METHOD + value: "LIST" + - name: LABEL + value: "{{ .Values.sidecar.datasources.label }}" + {{- with .Values.sidecar.datasources.labelValue }} + - name: LABEL_VALUE + value: {{ quote . }} + {{- end }} + {{- if or .Values.sidecar.logLevel .Values.sidecar.datasources.logLevel }} + - name: LOG_LEVEL + value: {{ default .Values.sidecar.logLevel .Values.sidecar.datasources.logLevel }} + {{- end }} + - name: FOLDER + value: "/etc/grafana/provisioning/datasources" + - name: RESOURCE + value: {{ quote .Values.sidecar.datasources.resource }} + {{- with .Values.sidecar.enableUniqueFilenames }} + - name: UNIQUE_FILENAMES + value: "{{ . }}" + {{- end }} + {{- if .Values.sidecar.datasources.searchNamespace }} + - name: NAMESPACE + value: "{{ tpl (.Values.sidecar.datasources.searchNamespace | join ",") . }}" + {{- end }} + {{- with .Values.sidecar.skipTlsVerify }} + - name: SKIP_TLS_VERIFY + value: "{{ . }}" + {{- end }} + {{- with .Values.sidecar.resources }} + resources: + {{- toYaml . | nindent 6 }} + {{- end }} + {{- with .Values.sidecar.securityContext }} + securityContext: + {{- toYaml . | nindent 6 }} + {{- end }} + volumeMounts: + - name: sc-datasources-volume + mountPath: "/etc/grafana/provisioning/datasources" +{{- end }} +{{- if and .Values.sidecar.notifiers.enabled .Values.sidecar.notifiers.initNotifiers }} + - name: {{ include "grafana.name" . }}-init-sc-notifiers + {{- $registry := include "system_default_registry" . | default .Values.sidecar.image.registry -}} + {{- if .Values.sidecar.image.sha }} + image: "{{ $registry }}{{ .Values.sidecar.image.repository }}:{{ .Values.sidecar.image.tag }}@sha256:{{ .Values.sidecar.image.sha }}" + {{- else }} + image: "{{ $registry }}{{ .Values.sidecar.image.repository }}:{{ .Values.sidecar.image.tag }}" + {{- end }} + imagePullPolicy: {{ .Values.sidecar.imagePullPolicy }} + env: + {{- range $key, $value := .Values.sidecar.notifiers.env }} + - name: "{{ $key }}" + value: "{{ $value }}" + {{- end }} + {{- if .Values.sidecar.notifiers.ignoreAlreadyProcessed }} + - name: IGNORE_ALREADY_PROCESSED + value: "true" + {{- end }} + - name: METHOD + value: LIST + - name: LABEL + value: "{{ .Values.sidecar.notifiers.label }}" + {{- with .Values.sidecar.notifiers.labelValue }} + - name: LABEL_VALUE + value: {{ quote . }} + {{- end }} + {{- if or .Values.sidecar.logLevel .Values.sidecar.notifiers.logLevel }} + - name: LOG_LEVEL + value: {{ default .Values.sidecar.logLevel .Values.sidecar.notifiers.logLevel }} + {{- end }} + - name: FOLDER + value: "/etc/grafana/provisioning/notifiers" + - name: RESOURCE + value: {{ quote .Values.sidecar.notifiers.resource }} + {{- with .Values.sidecar.enableUniqueFilenames }} + - name: UNIQUE_FILENAMES + value: "{{ . }}" + {{- end }} + {{- with .Values.sidecar.notifiers.searchNamespace }} + - name: NAMESPACE + value: "{{ tpl (. | join ",") $root }}" + {{- end }} + {{- with .Values.sidecar.skipTlsVerify }} + - name: SKIP_TLS_VERIFY + value: "{{ . }}" + {{- end }} + {{- with .Values.sidecar.livenessProbe }} + livenessProbe: + {{- toYaml . | nindent 6 }} + {{- end }} + {{- with .Values.sidecar.readinessProbe }} + readinessProbe: + {{- toYaml . | nindent 6 }} + {{- end }} + {{- with .Values.sidecar.resources }} + resources: + {{- toYaml . | nindent 6 }} + {{- end }} + {{- with .Values.sidecar.securityContext }} + securityContext: + {{- toYaml . | nindent 6 }} + {{- end }} + volumeMounts: + - name: sc-notifiers-volume + mountPath: "/etc/grafana/provisioning/notifiers" +{{- end}} +{{- with .Values.extraInitContainers }} + {{- tpl (toYaml .) $root | nindent 2 }} +{{- end }} +{{- if or .Values.image.pullSecrets .Values.global.imagePullSecrets }} +imagePullSecrets: + {{- include "grafana.imagePullSecrets" (dict "root" $root "imagePullSecrets" .Values.image.pullSecrets) | nindent 2 }} +{{- end }} +{{- if not .Values.enableKubeBackwardCompatibility }} +enableServiceLinks: {{ .Values.enableServiceLinks }} +{{- end }} +containers: +{{- if and .Values.sidecar.alerts.enabled (not .Values.sidecar.alerts.initAlerts) }} + - name: {{ include "grafana.name" . }}-sc-alerts + {{- $registry := include "system_default_registry" . | default .Values.sidecar.image.registry -}} + {{- if .Values.sidecar.image.sha }} + image: "{{ $registry }}{{ .Values.sidecar.image.repository }}:{{ .Values.sidecar.image.tag }}@sha256:{{ .Values.sidecar.image.sha }}" + {{- else }} + image: "{{ $registry }}{{ .Values.sidecar.image.repository }}:{{ .Values.sidecar.image.tag }}" + {{- end }} + imagePullPolicy: {{ .Values.sidecar.imagePullPolicy }} + env: + {{- range $key, $value := .Values.sidecar.alerts.env }} + - name: "{{ $key }}" + value: "{{ $value }}" + {{- end }} + {{- if .Values.sidecar.alerts.ignoreAlreadyProcessed }} + - name: IGNORE_ALREADY_PROCESSED + value: "true" + {{- end }} + - name: METHOD + value: {{ .Values.sidecar.alerts.watchMethod }} + - name: LABEL + value: "{{ .Values.sidecar.alerts.label }}" + {{- with .Values.sidecar.alerts.labelValue }} + - name: LABEL_VALUE + value: {{ quote . }} + {{- end }} + {{- if or .Values.sidecar.logLevel .Values.sidecar.alerts.logLevel }} + - name: LOG_LEVEL + value: {{ default .Values.sidecar.logLevel .Values.sidecar.alerts.logLevel }} + {{- end }} + - name: FOLDER + value: "/etc/grafana/provisioning/alerting" + - name: RESOURCE + value: {{ quote .Values.sidecar.alerts.resource }} + {{- with .Values.sidecar.enableUniqueFilenames }} + - name: UNIQUE_FILENAMES + value: "{{ . }}" + {{- end }} + {{- with .Values.sidecar.alerts.searchNamespace }} + - name: NAMESPACE + value: {{ . | join "," | quote }} + {{- end }} + {{- with .Values.sidecar.alerts.skipTlsVerify }} + - name: SKIP_TLS_VERIFY + value: {{ quote . }} + {{- end }} + {{- with .Values.sidecar.alerts.script }} + - name: SCRIPT + value: {{ quote . }} + {{- end }} + {{- if and (not .Values.env.GF_SECURITY_ADMIN_USER) (not .Values.env.GF_SECURITY_DISABLE_INITIAL_ADMIN_CREATION) }} + - name: REQ_USERNAME + valueFrom: + secretKeyRef: + name: {{ (tpl .Values.admin.existingSecret .) | default (include "grafana.fullname" .) }} + key: {{ .Values.admin.userKey | default "admin-user" }} + {{- end }} + {{- if and (not .Values.env.GF_SECURITY_ADMIN_PASSWORD) (not .Values.env.GF_SECURITY_ADMIN_PASSWORD__FILE) (not .Values.env.GF_SECURITY_DISABLE_INITIAL_ADMIN_CREATION) }} + - name: REQ_PASSWORD + valueFrom: + secretKeyRef: + name: {{ (tpl .Values.admin.existingSecret .) | default (include "grafana.fullname" .) }} + key: {{ .Values.admin.passwordKey | default "admin-password" }} + {{- end }} + {{- if not .Values.sidecar.alerts.skipReload }} + - name: REQ_URL + value: {{ .Values.sidecar.alerts.reloadURL }} + - name: REQ_METHOD + value: POST + {{- end }} + {{- if .Values.sidecar.alerts.watchServerTimeout }} + {{- if ne .Values.sidecar.alerts.watchMethod "WATCH" }} + {{- fail (printf "Cannot use .Values.sidecar.alerts.watchServerTimeout with .Values.sidecar.alerts.watchMethod %s" .Values.sidecar.alerts.watchMethod) }} + {{- end }} + - name: WATCH_SERVER_TIMEOUT + value: "{{ .Values.sidecar.alerts.watchServerTimeout }}" + {{- end }} + {{- if .Values.sidecar.alerts.watchClientTimeout }} + {{- if ne .Values.sidecar.alerts.watchMethod "WATCH" }} + {{- fail (printf "Cannot use .Values.sidecar.alerts.watchClientTimeout with .Values.sidecar.alerts.watchMethod %s" .Values.sidecar.alerts.watchMethod) }} + {{- end }} + - name: WATCH_CLIENT_TIMEOUT + value: "{{ .Values.sidecar.alerts.watchClientTimeout }}" + {{- end }} + {{- with .Values.sidecar.livenessProbe }} + livenessProbe: + {{- toYaml . | nindent 6 }} + {{- end }} + {{- with .Values.sidecar.readinessProbe }} + readinessProbe: + {{- toYaml . | nindent 6 }} + {{- end }} + {{- with .Values.sidecar.resources }} + resources: + {{- toYaml . | nindent 6 }} + {{- end }} + {{- with .Values.sidecar.securityContext }} + securityContext: + {{- toYaml . | nindent 6 }} + {{- end }} + volumeMounts: + - name: sc-alerts-volume + mountPath: "/etc/grafana/provisioning/alerting" + {{- with .Values.sidecar.alerts.extraMounts }} + {{- toYaml . | trim | nindent 6 }} + {{- end }} +{{- end}} +{{- if .Values.sidecar.dashboards.enabled }} + - name: {{ include "grafana.name" . }}-sc-dashboard + {{- $registry := include "system_default_registry" . | default .Values.sidecar.image.registry -}} + {{- if .Values.sidecar.image.sha }} + image: "{{ $registry }}{{ .Values.sidecar.image.repository }}:{{ .Values.sidecar.image.tag }}@sha256:{{ .Values.sidecar.image.sha }}" + {{- else }} + image: "{{ $registry }}{{ .Values.sidecar.image.repository }}:{{ .Values.sidecar.image.tag }}" + {{- end }} + imagePullPolicy: {{ .Values.sidecar.imagePullPolicy }} + env: + {{- range $key, $value := .Values.sidecar.dashboards.env }} + - name: "{{ $key }}" + value: "{{ $value }}" + {{- end }} + {{- range $key, $value := .Values.sidecar.datasources.envValueFrom }} + - name: {{ $key | quote }} + valueFrom: + {{- tpl (toYaml $value) $ | nindent 10 }} + {{- end }} + {{- if .Values.sidecar.dashboards.ignoreAlreadyProcessed }} + - name: IGNORE_ALREADY_PROCESSED + value: "true" + {{- end }} + - name: METHOD + value: {{ .Values.sidecar.dashboards.watchMethod }} + - name: LABEL + value: "{{ .Values.sidecar.dashboards.label }}" + {{- with .Values.sidecar.dashboards.labelValue }} + - name: LABEL_VALUE + value: {{ quote . }} + {{- end }} + {{- if or .Values.sidecar.logLevel .Values.sidecar.dashboards.logLevel }} + - name: LOG_LEVEL + value: {{ default .Values.sidecar.logLevel .Values.sidecar.dashboards.logLevel }} + {{- end }} + - name: FOLDER + value: "{{ .Values.sidecar.dashboards.folder }}{{- with .Values.sidecar.dashboards.defaultFolderName }}/{{ . }}{{- end }}" + - name: RESOURCE + value: {{ quote .Values.sidecar.dashboards.resource }} + {{- with .Values.sidecar.enableUniqueFilenames }} + - name: UNIQUE_FILENAMES + value: "{{ . }}" + {{- end }} + {{- with .Values.sidecar.dashboards.searchNamespace }} + - name: NAMESPACE + value: "{{ tpl (. | join ",") $root }}" + {{- end }} + {{- with .Values.sidecar.skipTlsVerify }} + - name: SKIP_TLS_VERIFY + value: "{{ . }}" + {{- end }} + {{- with .Values.sidecar.dashboards.folderAnnotation }} + - name: FOLDER_ANNOTATION + value: "{{ . }}" + {{- end }} + {{- with .Values.sidecar.dashboards.script }} + - name: SCRIPT + value: "{{ . }}" + {{- end }} + {{- if not .Values.sidecar.dashboards.skipReload }} + {{- if and (not .Values.env.GF_SECURITY_ADMIN_USER) (not .Values.env.GF_SECURITY_DISABLE_INITIAL_ADMIN_CREATION) }} + - name: REQ_USERNAME + valueFrom: + secretKeyRef: + name: {{ (tpl .Values.admin.existingSecret .) | default (include "grafana.fullname" .) }} + key: {{ .Values.admin.userKey | default "admin-user" }} + {{- end }} + {{- if and (not .Values.env.GF_SECURITY_ADMIN_PASSWORD) (not .Values.env.GF_SECURITY_ADMIN_PASSWORD__FILE) (not .Values.env.GF_SECURITY_DISABLE_INITIAL_ADMIN_CREATION) }} + - name: REQ_PASSWORD + valueFrom: + secretKeyRef: + name: {{ (tpl .Values.admin.existingSecret .) | default (include "grafana.fullname" .) }} + key: {{ .Values.admin.passwordKey | default "admin-password" }} + {{- end }} + - name: REQ_URL + value: {{ .Values.sidecar.dashboards.reloadURL }} + - name: REQ_METHOD + value: POST + {{- end }} + {{- if .Values.sidecar.dashboards.watchServerTimeout }} + {{- if ne .Values.sidecar.dashboards.watchMethod "WATCH" }} + {{- fail (printf "Cannot use .Values.sidecar.dashboards.watchServerTimeout with .Values.sidecar.dashboards.watchMethod %s" .Values.sidecar.dashboards.watchMethod) }} + {{- end }} + - name: WATCH_SERVER_TIMEOUT + value: "{{ .Values.sidecar.dashboards.watchServerTimeout }}" + {{- end }} + {{- if .Values.sidecar.dashboards.watchClientTimeout }} + {{- if ne .Values.sidecar.dashboards.watchMethod "WATCH" }} + {{- fail (printf "Cannot use .Values.sidecar.dashboards.watchClientTimeout with .Values.sidecar.dashboards.watchMethod %s" .Values.sidecar.dashboards.watchMethod) }} + {{- end }} + - name: WATCH_CLIENT_TIMEOUT + value: {{ .Values.sidecar.dashboards.watchClientTimeout | quote }} + {{- end }} + {{- with .Values.sidecar.livenessProbe }} + livenessProbe: + {{- toYaml . | nindent 6 }} + {{- end }} + {{- with .Values.sidecar.readinessProbe }} + readinessProbe: + {{- toYaml . | nindent 6 }} + {{- end }} + {{- with .Values.sidecar.resources }} + resources: + {{- toYaml . | nindent 6 }} + {{- end }} + {{- with .Values.sidecar.securityContext }} + securityContext: + {{- toYaml . | nindent 6 }} + {{- end }} + volumeMounts: + - name: sc-dashboard-volume + mountPath: {{ .Values.sidecar.dashboards.folder | quote }} + {{- with .Values.sidecar.dashboards.extraMounts }} + {{- toYaml . | trim | nindent 6 }} + {{- end }} +{{- end}} +{{- if and .Values.sidecar.datasources.enabled (not .Values.sidecar.datasources.initDatasources) }} + - name: {{ include "grafana.name" . }}-sc-datasources + {{- $registry := include "system_default_registry" . | default .Values.sidecar.image.registry -}} + {{- if .Values.sidecar.image.sha }} + image: "{{ $registry }}{{ .Values.sidecar.image.repository }}:{{ .Values.sidecar.image.tag }}@sha256:{{ .Values.sidecar.image.sha }}" + {{- else }} + image: "{{ $registry }}{{ .Values.sidecar.image.repository }}:{{ .Values.sidecar.image.tag }}" + {{- end }} + imagePullPolicy: {{ .Values.sidecar.imagePullPolicy }} + env: + {{- range $key, $value := .Values.sidecar.datasources.env }} + - name: "{{ $key }}" + value: "{{ $value }}" + {{- end }} + {{- if .Values.sidecar.datasources.ignoreAlreadyProcessed }} + - name: IGNORE_ALREADY_PROCESSED + value: "true" + {{- end }} + - name: METHOD + value: {{ .Values.sidecar.datasources.watchMethod }} + - name: LABEL + value: "{{ .Values.sidecar.datasources.label }}" + {{- with .Values.sidecar.datasources.labelValue }} + - name: LABEL_VALUE + value: {{ quote . }} + {{- end }} + {{- if or .Values.sidecar.logLevel .Values.sidecar.datasources.logLevel }} + - name: LOG_LEVEL + value: {{ default .Values.sidecar.logLevel .Values.sidecar.datasources.logLevel }} + {{- end }} + - name: FOLDER + value: "/etc/grafana/provisioning/datasources" + - name: RESOURCE + value: {{ quote .Values.sidecar.datasources.resource }} + {{- with .Values.sidecar.enableUniqueFilenames }} + - name: UNIQUE_FILENAMES + value: "{{ . }}" + {{- end }} + {{- with .Values.sidecar.datasources.searchNamespace }} + - name: NAMESPACE + value: "{{ tpl (. | join ",") $root }}" + {{- end }} + {{- if .Values.sidecar.skipTlsVerify }} + - name: SKIP_TLS_VERIFY + value: "{{ .Values.sidecar.skipTlsVerify }}" + {{- end }} + {{- if .Values.sidecar.datasources.script }} + - name: SCRIPT + value: "{{ .Values.sidecar.datasources.script }}" + {{- end }} + {{- if and (not .Values.env.GF_SECURITY_ADMIN_USER) (not .Values.env.GF_SECURITY_DISABLE_INITIAL_ADMIN_CREATION) }} + - name: REQ_USERNAME + valueFrom: + secretKeyRef: + name: {{ (tpl .Values.admin.existingSecret .) | default (include "grafana.fullname" .) }} + key: {{ .Values.admin.userKey | default "admin-user" }} + {{- end }} + {{- if and (not .Values.env.GF_SECURITY_ADMIN_PASSWORD) (not .Values.env.GF_SECURITY_ADMIN_PASSWORD__FILE) (not .Values.env.GF_SECURITY_DISABLE_INITIAL_ADMIN_CREATION) }} + - name: REQ_PASSWORD + valueFrom: + secretKeyRef: + name: {{ (tpl .Values.admin.existingSecret .) | default (include "grafana.fullname" .) }} + key: {{ .Values.admin.passwordKey | default "admin-password" }} + {{- end }} + {{- if not .Values.sidecar.datasources.skipReload }} + - name: REQ_URL + value: {{ .Values.sidecar.datasources.reloadURL }} + - name: REQ_METHOD + value: POST + {{- end }} + {{- if .Values.sidecar.datasources.watchServerTimeout }} + {{- if ne .Values.sidecar.datasources.watchMethod "WATCH" }} + {{- fail (printf "Cannot use .Values.sidecar.datasources.watchServerTimeout with .Values.sidecar.datasources.watchMethod %s" .Values.sidecar.datasources.watchMethod) }} + {{- end }} + - name: WATCH_SERVER_TIMEOUT + value: "{{ .Values.sidecar.datasources.watchServerTimeout }}" + {{- end }} + {{- if .Values.sidecar.datasources.watchClientTimeout }} + {{- if ne .Values.sidecar.datasources.watchMethod "WATCH" }} + {{- fail (printf "Cannot use .Values.sidecar.datasources.watchClientTimeout with .Values.sidecar.datasources.watchMethod %s" .Values.sidecar.datasources.watchMethod) }} + {{- end }} + - name: WATCH_CLIENT_TIMEOUT + value: "{{ .Values.sidecar.datasources.watchClientTimeout }}" + {{- end }} + {{- with .Values.sidecar.livenessProbe }} + livenessProbe: + {{- toYaml . | nindent 6 }} + {{- end }} + {{- with .Values.sidecar.readinessProbe }} + readinessProbe: + {{- toYaml . | nindent 6 }} + {{- end }} + {{- with .Values.sidecar.resources }} + resources: + {{- toYaml . | nindent 6 }} + {{- end }} + {{- with .Values.sidecar.securityContext }} + securityContext: + {{- toYaml . | nindent 6 }} + {{- end }} + volumeMounts: + - name: sc-datasources-volume + mountPath: "/etc/grafana/provisioning/datasources" +{{- end}} +{{- if .Values.sidecar.notifiers.enabled }} + - name: {{ include "grafana.name" . }}-sc-notifiers + {{- $registry := include "system_default_registry" . | default .Values.sidecar.image.registry -}} + {{- if .Values.sidecar.image.sha }} + image: "{{ $registry }}{{ .Values.sidecar.image.repository }}:{{ .Values.sidecar.image.tag }}@sha256:{{ .Values.sidecar.image.sha }}" + {{- else }} + image: "{{ $registry }}{{ .Values.sidecar.image.repository }}:{{ .Values.sidecar.image.tag }}" + {{- end }} + imagePullPolicy: {{ .Values.sidecar.imagePullPolicy }} + env: + {{- range $key, $value := .Values.sidecar.notifiers.env }} + - name: "{{ $key }}" + value: "{{ $value }}" + {{- end }} + {{- if .Values.sidecar.notifiers.ignoreAlreadyProcessed }} + - name: IGNORE_ALREADY_PROCESSED + value: "true" + {{- end }} + - name: METHOD + value: {{ .Values.sidecar.notifiers.watchMethod }} + - name: LABEL + value: "{{ .Values.sidecar.notifiers.label }}" + {{- with .Values.sidecar.notifiers.labelValue }} + - name: LABEL_VALUE + value: {{ quote . }} + {{- end }} + {{- if or .Values.sidecar.logLevel .Values.sidecar.notifiers.logLevel }} + - name: LOG_LEVEL + value: {{ default .Values.sidecar.logLevel .Values.sidecar.notifiers.logLevel }} + {{- end }} + - name: FOLDER + value: "/etc/grafana/provisioning/notifiers" + - name: RESOURCE + value: {{ quote .Values.sidecar.notifiers.resource }} + {{- if .Values.sidecar.enableUniqueFilenames }} + - name: UNIQUE_FILENAMES + value: "{{ .Values.sidecar.enableUniqueFilenames }}" + {{- end }} + {{- with .Values.sidecar.notifiers.searchNamespace }} + - name: NAMESPACE + value: "{{ tpl (. | join ",") $root }}" + {{- end }} + {{- with .Values.sidecar.skipTlsVerify }} + - name: SKIP_TLS_VERIFY + value: "{{ . }}" + {{- end }} + {{- if .Values.sidecar.notifiers.script }} + - name: SCRIPT + value: "{{ .Values.sidecar.notifiers.script }}" + {{- end }} + {{- if and (not .Values.env.GF_SECURITY_ADMIN_USER) (not .Values.env.GF_SECURITY_DISABLE_INITIAL_ADMIN_CREATION) }} + - name: REQ_USERNAME + valueFrom: + secretKeyRef: + name: {{ (tpl .Values.admin.existingSecret .) | default (include "grafana.fullname" .) }} + key: {{ .Values.admin.userKey | default "admin-user" }} + {{- end }} + {{- if and (not .Values.env.GF_SECURITY_ADMIN_PASSWORD) (not .Values.env.GF_SECURITY_ADMIN_PASSWORD__FILE) (not .Values.env.GF_SECURITY_DISABLE_INITIAL_ADMIN_CREATION) }} + - name: REQ_PASSWORD + valueFrom: + secretKeyRef: + name: {{ (tpl .Values.admin.existingSecret .) | default (include "grafana.fullname" .) }} + key: {{ .Values.admin.passwordKey | default "admin-password" }} + {{- end }} + {{- if not .Values.sidecar.notifiers.skipReload }} + - name: REQ_URL + value: {{ .Values.sidecar.notifiers.reloadURL }} + - name: REQ_METHOD + value: POST + {{- end }} + {{- if .Values.sidecar.notifiers.watchServerTimeout }} + {{- if ne .Values.sidecar.notifiers.watchMethod "WATCH" }} + {{- fail (printf "Cannot use .Values.sidecar.notifiers.watchServerTimeout with .Values.sidecar.notifiers.watchMethod %s" .Values.sidecar.notifiers.watchMethod) }} + {{- end }} + - name: WATCH_SERVER_TIMEOUT + value: "{{ .Values.sidecar.notifiers.watchServerTimeout }}" + {{- end }} + {{- if .Values.sidecar.notifiers.watchClientTimeout }} + {{- if ne .Values.sidecar.notifiers.watchMethod "WATCH" }} + {{- fail (printf "Cannot use .Values.sidecar.notifiers.watchClientTimeout with .Values.sidecar.notifiers.watchMethod %s" .Values.sidecar.notifiers.watchMethod) }} + {{- end }} + - name: WATCH_CLIENT_TIMEOUT + value: "{{ .Values.sidecar.notifiers.watchClientTimeout }}" + {{- end }} + {{- with .Values.sidecar.livenessProbe }} + livenessProbe: + {{- toYaml . | nindent 6 }} + {{- end }} + {{- with .Values.sidecar.readinessProbe }} + readinessProbe: + {{- toYaml . | nindent 6 }} + {{- end }} + {{- with .Values.sidecar.resources }} + resources: + {{- toYaml . | nindent 6 }} + {{- end }} + {{- with .Values.sidecar.securityContext }} + securityContext: + {{- toYaml . | nindent 6 }} + {{- end }} + volumeMounts: + - name: sc-notifiers-volume + mountPath: "/etc/grafana/provisioning/notifiers" +{{- end}} +{{- if .Values.sidecar.plugins.enabled }} + - name: {{ include "grafana.name" . }}-sc-plugins + {{- $registry := include "system_default_registry" . | default .Values.sidecar.image.registry -}} + {{- if .Values.sidecar.image.sha }} + image: "{{ $registry }}{{ .Values.sidecar.image.repository }}:{{ .Values.sidecar.image.tag }}@sha256:{{ .Values.sidecar.image.sha }}" + {{- else }} + image: "{{ $registry }}{{ .Values.sidecar.image.repository }}:{{ .Values.sidecar.image.tag }}" + {{- end }} + imagePullPolicy: {{ .Values.sidecar.imagePullPolicy }} + env: + {{- range $key, $value := .Values.sidecar.plugins.env }} + - name: "{{ $key }}" + value: "{{ $value }}" + {{- end }} + {{- if .Values.sidecar.plugins.ignoreAlreadyProcessed }} + - name: IGNORE_ALREADY_PROCESSED + value: "true" + {{- end }} + - name: METHOD + value: {{ .Values.sidecar.plugins.watchMethod }} + - name: LABEL + value: "{{ .Values.sidecar.plugins.label }}" + {{- if .Values.sidecar.plugins.labelValue }} + - name: LABEL_VALUE + value: {{ quote .Values.sidecar.plugins.labelValue }} + {{- end }} + {{- if or .Values.sidecar.logLevel .Values.sidecar.plugins.logLevel }} + - name: LOG_LEVEL + value: {{ default .Values.sidecar.logLevel .Values.sidecar.plugins.logLevel }} + {{- end }} + - name: FOLDER + value: "/etc/grafana/provisioning/plugins" + - name: RESOURCE + value: {{ quote .Values.sidecar.plugins.resource }} + {{- with .Values.sidecar.enableUniqueFilenames }} + - name: UNIQUE_FILENAMES + value: "{{ . }}" + {{- end }} + {{- with .Values.sidecar.plugins.searchNamespace }} + - name: NAMESPACE + value: "{{ tpl (. | join ",") $root }}" + {{- end }} + {{- with .Values.sidecar.plugins.script }} + - name: SCRIPT + value: "{{ . }}" + {{- end }} + {{- with .Values.sidecar.skipTlsVerify }} + - name: SKIP_TLS_VERIFY + value: "{{ . }}" + {{- end }} + {{- if and (not .Values.env.GF_SECURITY_ADMIN_USER) (not .Values.env.GF_SECURITY_DISABLE_INITIAL_ADMIN_CREATION) }} + - name: REQ_USERNAME + valueFrom: + secretKeyRef: + name: {{ (tpl .Values.admin.existingSecret .) | default (include "grafana.fullname" .) }} + key: {{ .Values.admin.userKey | default "admin-user" }} + {{- end }} + {{- if and (not .Values.env.GF_SECURITY_ADMIN_PASSWORD) (not .Values.env.GF_SECURITY_ADMIN_PASSWORD__FILE) (not .Values.env.GF_SECURITY_DISABLE_INITIAL_ADMIN_CREATION) }} + - name: REQ_PASSWORD + valueFrom: + secretKeyRef: + name: {{ (tpl .Values.admin.existingSecret .) | default (include "grafana.fullname" .) }} + key: {{ .Values.admin.passwordKey | default "admin-password" }} + {{- end }} + {{- if not .Values.sidecar.plugins.skipReload }} + - name: REQ_URL + value: {{ .Values.sidecar.plugins.reloadURL }} + - name: REQ_METHOD + value: POST + {{- end }} + {{- if .Values.sidecar.plugins.watchServerTimeout }} + {{- if ne .Values.sidecar.plugins.watchMethod "WATCH" }} + {{- fail (printf "Cannot use .Values.sidecar.plugins.watchServerTimeout with .Values.sidecar.plugins.watchMethod %s" .Values.sidecar.plugins.watchMethod) }} + {{- end }} + - name: WATCH_SERVER_TIMEOUT + value: "{{ .Values.sidecar.plugins.watchServerTimeout }}" + {{- end }} + {{- if .Values.sidecar.plugins.watchClientTimeout }} + {{- if ne .Values.sidecar.plugins.watchMethod "WATCH" }} + {{- fail (printf "Cannot use .Values.sidecar.plugins.watchClientTimeout with .Values.sidecar.plugins.watchMethod %s" .Values.sidecar.plugins.watchMethod) }} + {{- end }} + - name: WATCH_CLIENT_TIMEOUT + value: "{{ .Values.sidecar.plugins.watchClientTimeout }}" + {{- end }} + {{- with .Values.sidecar.livenessProbe }} + livenessProbe: + {{- toYaml . | nindent 6 }} + {{- end }} + {{- with .Values.sidecar.readinessProbe }} + readinessProbe: + {{- toYaml . | nindent 6 }} + {{- end }} + {{- with .Values.sidecar.resources }} + resources: + {{- toYaml . | nindent 6 }} + {{- end }} + {{- with .Values.sidecar.securityContext }} + securityContext: + {{- toYaml . | nindent 6 }} + {{- end }} + volumeMounts: + - name: sc-plugins-volume + mountPath: "/etc/grafana/provisioning/plugins" +{{- end}} + - name: {{ .Chart.Name }} + {{- $registry := include "system_default_registry" . | default .Values.image.registry -}} + {{- if .Values.image.sha }} + image: "{{ $registry }}{{ .Values.image.repository }}:{{ .Values.image.tag | default .Chart.AppVersion }}@sha256:{{ .Values.image.sha }}" + {{- else }} + image: "{{ $registry }}{{ .Values.image.repository }}:{{ .Values.image.tag | default .Chart.AppVersion }}" + {{- end }} + imagePullPolicy: {{ .Values.image.pullPolicy }} + {{- if .Values.command }} + command: + {{- range .Values.command }} + - {{ . | quote }} + {{- end }} + {{- end }} + {{- if .Values.args }} + args: + {{- range .Values.args }} + - {{ . | quote }} + {{- end }} + {{- end }} + {{- with .Values.containerSecurityContext }} + securityContext: + {{- toYaml . | nindent 6 }} + {{- end }} + volumeMounts: + - name: config + mountPath: "/etc/grafana/grafana.ini" + subPath: grafana.ini + {{- if .Values.ldap.enabled }} + - name: ldap + mountPath: "/etc/grafana/ldap.toml" + subPath: ldap.toml + {{- end }} + {{- range .Values.extraConfigmapMounts }} + - name: {{ tpl .name $root }} + mountPath: {{ tpl .mountPath $root }} + subPath: {{ tpl (.subPath | default "") $root }} + readOnly: {{ .readOnly }} + {{- end }} + - name: storage + mountPath: "/var/lib/grafana" + {{- with .Values.persistence.subPath }} + subPath: {{ tpl . $root }} + {{- end }} + {{- with .Values.dashboards }} + {{- range $provider, $dashboards := . }} + {{- range $key, $value := $dashboards }} + {{- if (or (hasKey $value "json") (hasKey $value "file")) }} + - name: dashboards-{{ $provider }} + mountPath: "/var/lib/grafana/dashboards/{{ $provider }}/{{ $key }}.json" + subPath: "{{ $key }}.json" + {{- end }} + {{- end }} + {{- end }} + {{- end }} + {{- with .Values.dashboardsConfigMaps }} + {{- range (keys . | sortAlpha) }} + - name: dashboards-{{ . }} + mountPath: "/var/lib/grafana/dashboards/{{ . }}" + {{- end }} + {{- end }} + {{- with .Values.datasources }} + {{- $datasources := . }} + {{- range (keys . | sortAlpha) }} + {{- if (or (hasKey (index $datasources .) "secret")) }} {{/*check if current datasource should be handeled as secret */}} + - name: config-secret + mountPath: "/etc/grafana/provisioning/datasources/{{ . }}" + subPath: {{ . | quote }} + {{- else }} + - name: config + mountPath: "/etc/grafana/provisioning/datasources/{{ . }}" + subPath: {{ . | quote }} + {{- end }} + {{- end }} + {{- end }} + {{- with .Values.notifiers }} + {{- $notifiers := . }} + {{- range (keys . | sortAlpha) }} + {{- if (or (hasKey (index $notifiers .) "secret")) }} {{/*check if current notifier should be handeled as secret */}} + - name: config-secret + mountPath: "/etc/grafana/provisioning/notifiers/{{ . }}" + subPath: {{ . | quote }} + {{- else }} + - name: config + mountPath: "/etc/grafana/provisioning/notifiers/{{ . }}" + subPath: {{ . | quote }} + {{- end }} + {{- end }} + {{- end }} + {{- with .Values.alerting }} + {{- $alertingmap := .}} + {{- range (keys . | sortAlpha) }} + {{- if (or (hasKey (index $.Values.alerting .) "secret") (hasKey (index $.Values.alerting .) "secretFile")) }} {{/*check if current alerting entry should be handeled as secret */}} + - name: config-secret + mountPath: "/etc/grafana/provisioning/alerting/{{ . }}" + subPath: {{ . | quote }} + {{- else }} + - name: config + mountPath: "/etc/grafana/provisioning/alerting/{{ . }}" + subPath: {{ . | quote }} + {{- end }} + {{- end }} + {{- end }} + {{- with .Values.dashboardProviders }} + {{- range (keys . | sortAlpha) }} + - name: config + mountPath: "/etc/grafana/provisioning/dashboards/{{ . }}" + subPath: {{ . | quote }} + {{- end }} + {{- end }} + {{- with .Values.sidecar.alerts.enabled }} + - name: sc-alerts-volume + mountPath: "/etc/grafana/provisioning/alerting" + {{- end}} + {{- if .Values.sidecar.dashboards.enabled }} + - name: sc-dashboard-volume + mountPath: {{ .Values.sidecar.dashboards.folder | quote }} + {{- if .Values.sidecar.dashboards.SCProvider }} + - name: sc-dashboard-provider + mountPath: "/etc/grafana/provisioning/dashboards/sc-dashboardproviders.yaml" + subPath: provider.yaml + {{- end}} + {{- end}} + {{- if .Values.sidecar.datasources.enabled }} + - name: sc-datasources-volume + mountPath: "/etc/grafana/provisioning/datasources" + {{- end}} + {{- if .Values.sidecar.plugins.enabled }} + - name: sc-plugins-volume + mountPath: "/etc/grafana/provisioning/plugins" + {{- end}} + {{- if .Values.sidecar.notifiers.enabled }} + - name: sc-notifiers-volume + mountPath: "/etc/grafana/provisioning/notifiers" + {{- end}} + {{- range .Values.extraSecretMounts }} + - name: {{ .name }} + mountPath: {{ .mountPath }} + readOnly: {{ .readOnly }} + subPath: {{ .subPath | default "" }} + {{- end }} + {{- range .Values.extraVolumeMounts }} + - name: {{ .name }} + mountPath: {{ .mountPath }} + subPath: {{ .subPath | default "" }} + readOnly: {{ .readOnly }} + {{- end }} + {{- range .Values.extraEmptyDirMounts }} + - name: {{ .name }} + mountPath: {{ .mountPath }} + {{- end }} + ports: + - name: {{ .Values.podPortName }} + containerPort: {{ .Values.service.targetPort }} + protocol: TCP + - name: {{ .Values.gossipPortName }}-tcp + containerPort: 9094 + protocol: TCP + - name: {{ .Values.gossipPortName }}-udp + containerPort: 9094 + protocol: UDP + env: + - name: POD_IP + valueFrom: + fieldRef: + fieldPath: status.podIP + {{- if and (not .Values.env.GF_SECURITY_ADMIN_USER) (not .Values.env.GF_SECURITY_DISABLE_INITIAL_ADMIN_CREATION) }} + - name: GF_SECURITY_ADMIN_USER + valueFrom: + secretKeyRef: + name: {{ (tpl .Values.admin.existingSecret .) | default (include "grafana.fullname" .) }} + key: {{ .Values.admin.userKey | default "admin-user" }} + {{- end }} + {{- if and (not .Values.env.GF_SECURITY_ADMIN_PASSWORD) (not .Values.env.GF_SECURITY_ADMIN_PASSWORD__FILE) (not .Values.env.GF_SECURITY_DISABLE_INITIAL_ADMIN_CREATION) }} + - name: GF_SECURITY_ADMIN_PASSWORD + valueFrom: + secretKeyRef: + name: {{ (tpl .Values.admin.existingSecret .) | default (include "grafana.fullname" .) }} + key: {{ .Values.admin.passwordKey | default "admin-password" }} + {{- end }} + {{- if .Values.plugins }} + - name: GF_INSTALL_PLUGINS + valueFrom: + configMapKeyRef: + name: {{ include "grafana.fullname" . }} + key: plugins + {{- end }} + {{- if .Values.smtp.existingSecret }} + - name: GF_SMTP_USER + valueFrom: + secretKeyRef: + name: {{ .Values.smtp.existingSecret }} + key: {{ .Values.smtp.userKey | default "user" }} + - name: GF_SMTP_PASSWORD + valueFrom: + secretKeyRef: + name: {{ .Values.smtp.existingSecret }} + key: {{ .Values.smtp.passwordKey | default "password" }} + {{- end }} + {{- if .Values.imageRenderer.enabled }} + - name: GF_RENDERING_SERVER_URL + value: http://{{ include "grafana.fullname" . }}-image-renderer.{{ include "grafana.namespace" . }}:{{ .Values.imageRenderer.service.port }}/render + - name: GF_RENDERING_CALLBACK_URL + value: {{ .Values.imageRenderer.grafanaProtocol }}://{{ include "grafana.fullname" . }}.{{ include "grafana.namespace" . }}:{{ .Values.service.port }}/{{ .Values.imageRenderer.grafanaSubPath }} + {{- end }} + - name: GF_PATHS_DATA + value: {{ (get .Values "grafana.ini").paths.data }} + - name: GF_PATHS_LOGS + value: {{ (get .Values "grafana.ini").paths.logs }} + - name: GF_PATHS_PLUGINS + value: {{ (get .Values "grafana.ini").paths.plugins }} + - name: GF_PATHS_PROVISIONING + value: {{ (get .Values "grafana.ini").paths.provisioning }} + {{- range $key, $value := .Values.envValueFrom }} + - name: {{ $key | quote }} + valueFrom: + {{- tpl (toYaml $value) $ | nindent 10 }} + {{- end }} + {{- range $key, $value := .Values.env }} + - name: "{{ tpl $key $ }}" + value: "{{ tpl (print $value) $ }}" + {{- end }} + {{- if or .Values.envFromSecret (or .Values.envRenderSecret .Values.envFromSecrets) .Values.envFromConfigMaps }} + envFrom: + {{- if .Values.envFromSecret }} + - secretRef: + name: {{ tpl .Values.envFromSecret . }} + {{- end }} + {{- if .Values.envRenderSecret }} + - secretRef: + name: {{ include "grafana.fullname" . }}-env + {{- end }} + {{- range .Values.envFromSecrets }} + - secretRef: + name: {{ tpl .name $ }} + optional: {{ .optional | default false }} + {{- if .prefix }} + prefix: {{ tpl .prefix $ }} + {{- end }} + {{- end }} + {{- range .Values.envFromConfigMaps }} + - configMapRef: + name: {{ tpl .name $ }} + optional: {{ .optional | default false }} + {{- if .prefix }} + prefix: {{ tpl .prefix $ }} + {{- end }} + {{- end }} + {{- end }} + {{- with .Values.livenessProbe }} + livenessProbe: + {{- toYaml . | nindent 6 }} + {{- end }} + {{- with .Values.readinessProbe }} + readinessProbe: + {{- toYaml . | nindent 6 }} + {{- end }} + {{- with .Values.lifecycleHooks }} + lifecycle: + {{- tpl (toYaml .) $root | nindent 6 }} + {{- end }} + {{- with .Values.resources }} + resources: + {{- toYaml . | nindent 6 }} + {{- end }} +{{- with .Values.extraContainers }} + {{- tpl . $ | nindent 2 }} +{{- end }} +nodeSelector: {{ include "linux-node-selector" . | nindent 2 }} +{{- with .Values.nodeSelector }} + {{- toYaml . | nindent 2 }} +{{- end }} +{{- with .Values.affinity }} +affinity: + {{- tpl (toYaml .) $root | nindent 2 }} +{{- end }} +{{- with .Values.topologySpreadConstraints }} +topologySpreadConstraints: + {{- toYaml . | nindent 2 }} +{{- end }} +tolerations: {{ include "linux-node-tolerations" . | nindent 2 }} +{{- with .Values.tolerations }} + {{- toYaml . | nindent 2 }} +{{- end }} +volumes: + - name: config + configMap: + name: {{ include "grafana.fullname" . }} + {{- $createConfigSecret := eq (include "grafana.shouldCreateConfigSecret" .) "true" -}} + {{- if and .Values.createConfigmap $createConfigSecret }} + - name: config-secret + secret: + secretName: {{ include "grafana.fullname" . }}-config-secret + {{- end }} + {{- range .Values.extraConfigmapMounts }} + - name: {{ tpl .name $root }} + configMap: + name: {{ tpl .configMap $root }} + {{- with .items }} + items: + {{- toYaml . | nindent 8 }} + {{- end }} + {{- end }} + {{- if .Values.dashboards }} + {{- range (keys .Values.dashboards | sortAlpha) }} + - name: dashboards-{{ . }} + configMap: + name: {{ include "grafana.fullname" $ }}-dashboards-{{ . }} + {{- end }} + {{- end }} + {{- if .Values.dashboardsConfigMaps }} + {{- range $provider, $name := .Values.dashboardsConfigMaps }} + - name: dashboards-{{ $provider }} + configMap: + name: {{ tpl $name $root }} + {{- end }} + {{- end }} + {{- if .Values.ldap.enabled }} + - name: ldap + secret: + {{- if .Values.ldap.existingSecret }} + secretName: {{ .Values.ldap.existingSecret }} + {{- else }} + secretName: {{ include "grafana.fullname" . }} + {{- end }} + items: + - key: ldap-toml + path: ldap.toml + {{- end }} + {{- if and .Values.persistence.enabled (eq .Values.persistence.type "pvc") }} + - name: storage + persistentVolumeClaim: + claimName: {{ tpl (.Values.persistence.existingClaim | default (include "grafana.fullname" .)) . }} + {{- else if and .Values.persistence.enabled (has .Values.persistence.type $sts) }} + {{/* nothing */}} + {{- else }} + - name: storage + {{- if .Values.persistence.inMemory.enabled }} + emptyDir: + medium: Memory + {{- with .Values.persistence.inMemory.sizeLimit }} + sizeLimit: {{ . }} + {{- end }} + {{- else }} + emptyDir: {} + {{- end }} + {{- end }} + {{- if .Values.sidecar.alerts.enabled }} + - name: sc-alerts-volume + emptyDir: + {{- with .Values.sidecar.alerts.sizeLimit }} + sizeLimit: {{ . }} + {{- else }} + {} + {{- end }} + {{- end }} + {{- if .Values.sidecar.dashboards.enabled }} + - name: sc-dashboard-volume + emptyDir: + {{- with .Values.sidecar.dashboards.sizeLimit }} + sizeLimit: {{ . }} + {{- else }} + {} + {{- end }} + {{- if .Values.sidecar.dashboards.SCProvider }} + - name: sc-dashboard-provider + configMap: + name: {{ include "grafana.fullname" . }}-config-dashboards + {{- end }} + {{- end }} + {{- if .Values.sidecar.datasources.enabled }} + - name: sc-datasources-volume + emptyDir: + {{- with .Values.sidecar.datasources.sizeLimit }} + sizeLimit: {{ . }} + {{- else }} + {} + {{- end }} + {{- end }} + {{- if .Values.sidecar.plugins.enabled }} + - name: sc-plugins-volume + emptyDir: + {{- with .Values.sidecar.plugins.sizeLimit }} + sizeLimit: {{ . }} + {{- else }} + {} + {{- end }} + {{- end }} + {{- if .Values.sidecar.notifiers.enabled }} + - name: sc-notifiers-volume + emptyDir: + {{- with .Values.sidecar.notifiers.sizeLimit }} + sizeLimit: {{ . }} + {{- else }} + {} + {{- end }} + {{- end }} + {{- range .Values.extraSecretMounts }} + {{- if .secretName }} + - name: {{ .name }} + secret: + secretName: {{ .secretName }} + defaultMode: {{ .defaultMode }} + {{- with .items }} + items: + {{- toYaml . | nindent 8 }} + {{- end }} + {{- else if .projected }} + - name: {{ .name }} + projected: + {{- toYaml .projected | nindent 6 }} + {{- else if .csi }} + - name: {{ .name }} + csi: + {{- toYaml .csi | nindent 6 }} + {{- end }} + {{- end }} + {{- range .Values.extraVolumes }} + - name: {{ .name }} + {{- if .existingClaim }} + persistentVolumeClaim: + claimName: {{ .existingClaim }} + {{- else if .hostPath }} + hostPath: + {{ toYaml .hostPath | nindent 6 }} + {{- else if .csi }} + csi: + {{- toYaml .csi | nindent 6 }} + {{- else if .configMap }} + configMap: + {{- toYaml .configMap | nindent 6 }} + {{- else if .emptyDir }} + emptyDir: + {{- toYaml .emptyDir | nindent 6 }} + {{- else }} + emptyDir: {} + {{- end }} + {{- end }} + {{- range .Values.extraEmptyDirMounts }} + - name: {{ .name }} + emptyDir: {} + {{- end }} + {{- with .Values.extraContainerVolumes }} + {{- tpl (toYaml .) $root | nindent 2 }} + {{- end }} +{{- end }} diff --git a/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/grafana/templates/clusterrole.yaml b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/grafana/templates/clusterrole.yaml new file mode 100644 index 0000000000..3af4b62b63 --- /dev/null +++ b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/grafana/templates/clusterrole.yaml @@ -0,0 +1,25 @@ +{{- if and .Values.rbac.create (or (not .Values.rbac.namespaced) .Values.rbac.extraClusterRoleRules) (not .Values.rbac.useExistingClusterRole) }} +kind: ClusterRole +apiVersion: rbac.authorization.k8s.io/v1 +metadata: + labels: + {{- include "grafana.labels" . | nindent 4 }} + {{- with .Values.annotations }} + annotations: + {{- toYaml . | nindent 4 }} + {{- end }} + name: {{ include "grafana.fullname" . }}-clusterrole +{{- if or .Values.sidecar.dashboards.enabled .Values.rbac.extraClusterRoleRules .Values.sidecar.datasources.enabled .Values.sidecar.plugins.enabled .Values.sidecar.alerts.enabled }} +rules: + {{- if or .Values.sidecar.dashboards.enabled .Values.sidecar.datasources.enabled .Values.sidecar.plugins.enabled .Values.sidecar.alerts.enabled }} + - apiGroups: [""] # "" indicates the core API group + resources: ["configmaps", "secrets"] + verbs: ["get", "watch", "list"] + {{- end}} + {{- with .Values.rbac.extraClusterRoleRules }} + {{- toYaml . | nindent 2 }} + {{- end}} +{{- else }} +rules: [] +{{- end}} +{{- end}} diff --git a/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/grafana/templates/clusterrolebinding.yaml b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/grafana/templates/clusterrolebinding.yaml new file mode 100644 index 0000000000..bda9431a2c --- /dev/null +++ b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/grafana/templates/clusterrolebinding.yaml @@ -0,0 +1,24 @@ +{{- if and .Values.rbac.create (or (not .Values.rbac.namespaced) .Values.rbac.extraClusterRoleRules) }} +kind: ClusterRoleBinding +apiVersion: rbac.authorization.k8s.io/v1 +metadata: + name: {{ include "grafana.fullname" . }}-clusterrolebinding + labels: + {{- include "grafana.labels" . | nindent 4 }} + {{- with .Values.annotations }} + annotations: + {{- toYaml . | nindent 4 }} + {{- end }} +subjects: + - kind: ServiceAccount + name: {{ include "grafana.serviceAccountName" . }} + namespace: {{ include "grafana.namespace" . }} +roleRef: + kind: ClusterRole + {{- if .Values.rbac.useExistingClusterRole }} + name: {{ .Values.rbac.useExistingClusterRole }} + {{- else }} + name: {{ include "grafana.fullname" . }}-clusterrole + {{- end }} + apiGroup: rbac.authorization.k8s.io +{{- end }} diff --git a/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/grafana/templates/configSecret.yaml b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/grafana/templates/configSecret.yaml new file mode 100644 index 0000000000..55574b9bbc --- /dev/null +++ b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/grafana/templates/configSecret.yaml @@ -0,0 +1,43 @@ +{{- $createConfigSecret := eq (include "grafana.shouldCreateConfigSecret" .) "true" -}} +{{- if and .Values.createConfigmap $createConfigSecret }} +{{- $files := .Files }} +{{- $root := . -}} +apiVersion: v1 +kind: Secret +metadata: + name: "{{ include "grafana.fullname" . }}-config-secret" + namespace: {{ include "grafana.namespace" . }} + labels: + {{- include "grafana.labels" . | nindent 4 }} + {{- with .Values.annotations }} + annotations: + {{- toYaml . | nindent 4 }} + {{- end }} +data: +{{- range $key, $value := .Values.alerting }} + {{- if (hasKey $value "secretFile") }} + {{- $key | nindent 2 }}: + {{- toYaml ( $files.Get $value.secretFile ) | b64enc | nindent 4}} + {{/* as of https://helm.sh/docs/chart_template_guide/accessing_files/ this will only work if you fork this chart and add files to it*/}} + {{- end }} +{{- end }} +stringData: +{{- range $key, $value := .Values.datasources }} +{{- if (hasKey $value "secret") }} +{{- $key | nindent 2 }}: | + {{- tpl (toYaml $value.secret | nindent 4) $root }} +{{- end }} +{{- end }} +{{- range $key, $value := .Values.notifiers }} +{{- if (hasKey $value "secret") }} +{{- $key | nindent 2 }}: | + {{- tpl (toYaml $value.secret | nindent 4) $root }} +{{- end }} +{{- end }} +{{- range $key, $value := .Values.alerting }} +{{ if (hasKey $value "secret") }} + {{- $key | nindent 2 }}: | + {{- tpl (toYaml $value.secret | nindent 4) $root }} + {{- end }} +{{- end }} +{{- end }} diff --git a/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/grafana/templates/configmap-dashboard-provider.yaml b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/grafana/templates/configmap-dashboard-provider.yaml new file mode 100644 index 0000000000..b412c4d1f0 --- /dev/null +++ b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/grafana/templates/configmap-dashboard-provider.yaml @@ -0,0 +1,15 @@ +{{- if and .Values.sidecar.dashboards.enabled .Values.sidecar.dashboards.SCProvider }} +apiVersion: v1 +kind: ConfigMap +metadata: + labels: + {{- include "grafana.labels" . | nindent 4 }} + {{- with .Values.annotations }} + annotations: + {{- toYaml . | nindent 4 }} + {{- end }} + name: {{ include "grafana.fullname" . }}-config-dashboards + namespace: {{ include "grafana.namespace" . }} +data: + {{- include "grafana.configDashboardProviderData" . | nindent 2 }} +{{- end }} diff --git a/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/grafana/templates/configmap.yaml b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/grafana/templates/configmap.yaml new file mode 100644 index 0000000000..7d7428be51 --- /dev/null +++ b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/grafana/templates/configmap.yaml @@ -0,0 +1,15 @@ +{{- if .Values.createConfigmap }} +apiVersion: v1 +kind: ConfigMap +metadata: + name: {{ include "grafana.fullname" . }} + namespace: {{ include "grafana.namespace" . }} + labels: + {{- include "grafana.labels" . | nindent 4 }} + {{- with .Values.annotations }} + annotations: + {{- toYaml . | nindent 4 }} + {{- end }} +data: + {{- include "grafana.configData" . | nindent 2 }} +{{- end }} diff --git a/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/grafana/templates/dashboards-json-configmap.yaml b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/grafana/templates/dashboards-json-configmap.yaml new file mode 100644 index 0000000000..b96ce72026 --- /dev/null +++ b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/grafana/templates/dashboards-json-configmap.yaml @@ -0,0 +1,38 @@ +{{- if .Values.dashboards }} +{{ $files := .Files }} +{{- range $provider, $dashboards := .Values.dashboards }} +apiVersion: v1 +kind: ConfigMap +metadata: + name: {{ include "grafana.fullname" $ }}-dashboards-{{ $provider }} + namespace: {{ include "grafana.namespace" $ }} + labels: + {{- include "grafana.labels" $ | nindent 4 }} + dashboard-provider: {{ $provider }} + {{- if $.Values.sidecar.dashboards.enabled }} + {{ $.Values.sidecar.dashboards.label }}: {{ $.Values.sidecar.dashboards.labelValue | quote }} + {{- end }} +{{- if $dashboards }} +data: +{{- $dashboardFound := false }} +{{- range $key, $value := $dashboards }} +{{- if (or (hasKey $value "json") (hasKey $value "file")) }} +{{- $dashboardFound = true }} + {{- print $key | nindent 2 }}.json: + {{- if hasKey $value "json" }} + |- + {{- $value.json | nindent 6 }} + {{- end }} + {{- if hasKey $value "file" }} + {{- toYaml ( $files.Get $value.file ) | nindent 4}} + {{- end }} +{{- end }} +{{- end }} +{{- if not $dashboardFound }} + {} +{{- end }} +{{- end }} +--- +{{- end }} + +{{- end }} diff --git a/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/grafana/templates/deployment.yaml b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/grafana/templates/deployment.yaml new file mode 100644 index 0000000000..46c016faa3 --- /dev/null +++ b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/grafana/templates/deployment.yaml @@ -0,0 +1,53 @@ +{{- if (and (not .Values.useStatefulSet) (or (not .Values.persistence.enabled) (eq .Values.persistence.type "pvc"))) }} +apiVersion: apps/v1 +kind: Deployment +metadata: + name: {{ include "grafana.fullname" . }} + namespace: {{ include "grafana.namespace" . }} + labels: + {{- include "grafana.labels" . | nindent 4 }} + {{- with .Values.labels }} + {{- toYaml . | nindent 4 }} + {{- end }} + {{- with .Values.annotations }} + annotations: + {{- toYaml . | nindent 4 }} + {{- end }} +spec: + {{- if and (not .Values.autoscaling.enabled) (.Values.replicas) }} + replicas: {{ .Values.replicas }} + {{- end }} + revisionHistoryLimit: {{ .Values.revisionHistoryLimit }} + selector: + matchLabels: + {{- include "grafana.selectorLabels" . | nindent 6 }} + {{- with .Values.deploymentStrategy }} + strategy: + {{- toYaml . | trim | nindent 4 }} + {{- end }} + template: + metadata: + labels: + {{- include "grafana.selectorLabels" . | nindent 8 }} + {{- with .Values.podLabels }} + {{- toYaml . | nindent 8 }} + {{- end }} + annotations: + checksum/config: {{ include "grafana.configData" . | sha256sum }} + {{- if .Values.dashboards }} + checksum/dashboards-json-config: {{ include (print $.Template.BasePath "/dashboards-json-configmap.yaml") . | sha256sum }} + {{- end }} + checksum/sc-dashboard-provider-config: {{ include "grafana.configDashboardProviderData" . | sha256sum }} + {{- if and (or (and (not .Values.admin.existingSecret) (not .Values.env.GF_SECURITY_ADMIN_PASSWORD__FILE) (not .Values.env.GF_SECURITY_ADMIN_PASSWORD)) (and .Values.ldap.enabled (not .Values.ldap.existingSecret))) (not .Values.env.GF_SECURITY_DISABLE_INITIAL_ADMIN_CREATION) }} + checksum/secret: {{ include "grafana.secretsData" . | sha256sum }} + {{- end }} + {{- if .Values.envRenderSecret }} + checksum/secret-env: {{ tpl (toYaml .Values.envRenderSecret) . | sha256sum }} + {{- end }} + kubectl.kubernetes.io/default-container: {{ .Chart.Name }} + {{- with .Values.podAnnotations }} + {{- toYaml . | nindent 8 }} + {{- end }} + spec: + {{- include "grafana.pod" . | nindent 6 }} +{{- end }} diff --git a/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/grafana/templates/extra-manifests.yaml b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/grafana/templates/extra-manifests.yaml new file mode 100644 index 0000000000..a9bb3b6ba8 --- /dev/null +++ b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/grafana/templates/extra-manifests.yaml @@ -0,0 +1,4 @@ +{{ range .Values.extraObjects }} +--- +{{ tpl (toYaml .) $ }} +{{ end }} diff --git a/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/grafana/templates/headless-service.yaml b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/grafana/templates/headless-service.yaml new file mode 100644 index 0000000000..3028589d32 --- /dev/null +++ b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/grafana/templates/headless-service.yaml @@ -0,0 +1,22 @@ +{{- $sts := list "sts" "StatefulSet" "statefulset" -}} +{{- if or .Values.headlessService (and .Values.persistence.enabled (not .Values.persistence.existingClaim) (has .Values.persistence.type $sts)) }} +apiVersion: v1 +kind: Service +metadata: + name: {{ include "grafana.fullname" . }}-headless + namespace: {{ include "grafana.namespace" . }} + labels: + {{- include "grafana.labels" . | nindent 4 }} + {{- with .Values.annotations }} + annotations: + {{- toYaml . | nindent 4 }} + {{- end }} +spec: + clusterIP: None + selector: + {{- include "grafana.selectorLabels" . | nindent 4 }} + type: ClusterIP + ports: + - name: {{ .Values.gossipPortName }}-tcp + port: 9094 +{{- end }} diff --git a/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/grafana/templates/hpa.yaml b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/grafana/templates/hpa.yaml new file mode 100644 index 0000000000..46bbcb49a2 --- /dev/null +++ b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/grafana/templates/hpa.yaml @@ -0,0 +1,52 @@ +{{- $sts := list "sts" "StatefulSet" "statefulset" -}} +{{- if .Values.autoscaling.enabled }} +apiVersion: {{ include "grafana.hpa.apiVersion" . }} +kind: HorizontalPodAutoscaler +metadata: + name: {{ include "grafana.fullname" . }} + namespace: {{ include "grafana.namespace" . }} + labels: + app.kubernetes.io/name: {{ include "grafana.name" . }} + helm.sh/chart: {{ include "grafana.chart" . }} + app.kubernetes.io/managed-by: {{ .Release.Service }} + app.kubernetes.io/instance: {{ .Release.Name }} +spec: + scaleTargetRef: + apiVersion: apps/v1 + {{- if has .Values.persistence.type $sts }} + kind: StatefulSet + {{- else }} + kind: Deployment + {{- end }} + name: {{ include "grafana.fullname" . }} + minReplicas: {{ .Values.autoscaling.minReplicas }} + maxReplicas: {{ .Values.autoscaling.maxReplicas }} + metrics: + {{- if .Values.autoscaling.targetMemory }} + - type: Resource + resource: + name: memory + {{- if eq (include "grafana.hpa.apiVersion" .) "autoscaling/v2beta1" }} + targetAverageUtilization: {{ .Values.autoscaling.targetMemory }} + {{- else }} + target: + type: Utilization + averageUtilization: {{ .Values.autoscaling.targetMemory }} + {{- end }} + {{- end }} + {{- if .Values.autoscaling.targetCPU }} + - type: Resource + resource: + name: cpu + {{- if eq (include "grafana.hpa.apiVersion" .) "autoscaling/v2beta1" }} + targetAverageUtilization: {{ .Values.autoscaling.targetCPU }} + {{- else }} + target: + type: Utilization + averageUtilization: {{ .Values.autoscaling.targetCPU }} + {{- end }} + {{- end }} + {{- if .Values.autoscaling.behavior }} + behavior: {{ toYaml .Values.autoscaling.behavior | nindent 4 }} + {{- end }} +{{- end }} diff --git a/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/grafana/templates/image-renderer-deployment.yaml b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/grafana/templates/image-renderer-deployment.yaml new file mode 100644 index 0000000000..28231b803e --- /dev/null +++ b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/grafana/templates/image-renderer-deployment.yaml @@ -0,0 +1,131 @@ +{{ if .Values.imageRenderer.enabled }} +{{- $root := . -}} +apiVersion: apps/v1 +kind: Deployment +metadata: + name: {{ include "grafana.fullname" . }}-image-renderer + namespace: {{ include "grafana.namespace" . }} + labels: + {{- include "grafana.imageRenderer.labels" . | nindent 4 }} + {{- with .Values.imageRenderer.labels }} + {{- toYaml . | nindent 4 }} + {{- end }} + {{- with .Values.imageRenderer.annotations }} + annotations: + {{- toYaml . | nindent 4 }} + {{- end }} +spec: + {{- if and (not .Values.imageRenderer.autoscaling.enabled) (.Values.imageRenderer.replicas) }} + replicas: {{ .Values.imageRenderer.replicas }} + {{- end }} + revisionHistoryLimit: {{ .Values.imageRenderer.revisionHistoryLimit }} + selector: + matchLabels: + {{- include "grafana.imageRenderer.selectorLabels" . | nindent 6 }} + + {{- with .Values.imageRenderer.deploymentStrategy }} + strategy: + {{- toYaml . | trim | nindent 4 }} + {{- end }} + template: + metadata: + labels: + {{- include "grafana.imageRenderer.selectorLabels" . | nindent 8 }} + {{- with .Values.imageRenderer.podLabels }} + {{- toYaml . | nindent 8 }} + {{- end }} + annotations: + checksum/config: {{ include (print $.Template.BasePath "/configmap.yaml") . | sha256sum }} + {{- with .Values.imageRenderer.podAnnotations }} + {{- toYaml . | nindent 8 }} + {{- end }} + spec: + {{- with .Values.imageRenderer.schedulerName }} + schedulerName: "{{ . }}" + {{- end }} + {{- with .Values.imageRenderer.serviceAccountName }} + serviceAccountName: "{{ . }}" + {{- end }} + {{- with .Values.imageRenderer.securityContext }} + securityContext: + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.imageRenderer.hostAliases }} + hostAliases: + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.imageRenderer.priorityClassName }} + priorityClassName: {{ . }} + {{- end }} + {{- with .Values.imageRenderer.image.pullSecrets }} + imagePullSecrets: + {{- range . }} + - name: {{ tpl . $root }} + {{- end}} + {{- end }} + containers: + - name: {{ .Chart.Name }}-image-renderer + {{- $registry := include "system_default_registry" | default .Values.imageRenderer.image.registry -}} + {{- if .Values.imageRenderer.image.sha }} + image: "{{ $registry }}{{ .Values.imageRenderer.image.repository }}:{{ .Values.imageRenderer.image.tag }}@sha256:{{ .Values.imageRenderer.image.sha }}" + {{- else }} + image: "{{ $registry }}{{ .Values.imageRenderer.image.repository }}:{{ .Values.imageRenderer.image.tag }}" + {{- end }} + imagePullPolicy: {{ .Values.imageRenderer.image.pullPolicy }} + {{- if .Values.imageRenderer.command }} + command: + {{- range .Values.imageRenderer.command }} + - {{ . }} + {{- end }} + {{- end}} + ports: + - name: {{ .Values.imageRenderer.service.portName }} + containerPort: {{ .Values.imageRenderer.service.targetPort }} + protocol: TCP + livenessProbe: + httpGet: + path: / + port: {{ .Values.imageRenderer.service.portName }} + env: + - name: HTTP_PORT + value: {{ .Values.imageRenderer.service.targetPort | quote }} + {{- if .Values.imageRenderer.serviceMonitor.enabled }} + - name: ENABLE_METRICS + value: "true" + {{- end }} + {{- range $key, $value := .Values.imageRenderer.envValueFrom }} + - name: {{ $key | quote }} + valueFrom: + {{- tpl (toYaml $value) $ | nindent 16 }} + {{- end }} + {{- range $key, $value := .Values.imageRenderer.env }} + - name: {{ $key | quote }} + value: {{ $value | quote }} + {{- end }} + {{- with .Values.imageRenderer.containerSecurityContext }} + securityContext: + {{- toYaml . | nindent 12 }} + {{- end }} + volumeMounts: + - mountPath: /tmp + name: image-renderer-tmpfs + {{- with .Values.imageRenderer.resources }} + resources: + {{- toYaml . | nindent 12 }} + {{- end }} + {{- with .Values.imageRenderer.nodeSelector }} + nodeSelector: + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.imageRenderer.affinity }} + affinity: + {{- tpl (toYaml .) $root | nindent 8 }} + {{- end }} + {{- with .Values.imageRenderer.tolerations }} + tolerations: + {{- toYaml . | nindent 8 }} + {{- end }} + volumes: + - name: image-renderer-tmpfs + emptyDir: {} +{{- end }} diff --git a/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/grafana/templates/image-renderer-hpa.yaml b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/grafana/templates/image-renderer-hpa.yaml new file mode 100644 index 0000000000..b0f0059b79 --- /dev/null +++ b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/grafana/templates/image-renderer-hpa.yaml @@ -0,0 +1,47 @@ +{{- if and .Values.imageRenderer.enabled .Values.imageRenderer.autoscaling.enabled }} +apiVersion: {{ include "grafana.hpa.apiVersion" . }} +kind: HorizontalPodAutoscaler +metadata: + name: {{ include "grafana.fullname" . }}-image-renderer + namespace: {{ include "grafana.namespace" . }} + labels: + app.kubernetes.io/name: {{ include "grafana.name" . }}-image-renderer + helm.sh/chart: {{ include "grafana.chart" . }} + app.kubernetes.io/managed-by: {{ .Release.Service }} + app.kubernetes.io/instance: {{ .Release.Name }} +spec: + scaleTargetRef: + apiVersion: apps/v1 + kind: Deployment + name: {{ include "grafana.fullname" . }}-image-renderer + minReplicas: {{ .Values.imageRenderer.autoscaling.minReplicas }} + maxReplicas: {{ .Values.imageRenderer.autoscaling.maxReplicas }} + metrics: + {{- if .Values.imageRenderer.autoscaling.targetMemory }} + - type: Resource + resource: + name: memory + {{- if eq (include "grafana.hpa.apiVersion" .) "autoscaling/v2beta1" }} + targetAverageUtilization: {{ .Values.imageRenderer.autoscaling.targetMemory }} + {{- else }} + target: + type: Utilization + averageUtilization: {{ .Values.imageRenderer.autoscaling.targetMemory }} + {{- end }} + {{- end }} + {{- if .Values.imageRenderer.autoscaling.targetCPU }} + - type: Resource + resource: + name: cpu + {{- if eq (include "grafana.hpa.apiVersion" .) "autoscaling/v2beta1" }} + targetAverageUtilization: {{ .Values.imageRenderer.autoscaling.targetCPU }} + {{- else }} + target: + type: Utilization + averageUtilization: {{ .Values.imageRenderer.autoscaling.targetCPU }} + {{- end }} + {{- end }} + {{- if .Values.imageRenderer.autoscaling.behavior }} + behavior: {{ toYaml .Values.imageRenderer.autoscaling.behavior | nindent 4 }} + {{- end }} +{{- end }} diff --git a/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/grafana/templates/image-renderer-network-policy.yaml b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/grafana/templates/image-renderer-network-policy.yaml new file mode 100644 index 0000000000..d1a0eb313d --- /dev/null +++ b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/grafana/templates/image-renderer-network-policy.yaml @@ -0,0 +1,79 @@ +{{- if and .Values.imageRenderer.enabled .Values.imageRenderer.networkPolicy.limitIngress }} +--- +apiVersion: networking.k8s.io/v1 +kind: NetworkPolicy +metadata: + name: {{ include "grafana.fullname" . }}-image-renderer-ingress + namespace: {{ include "grafana.namespace" . }} + annotations: + comment: Limit image-renderer ingress traffic from grafana +spec: + podSelector: + matchLabels: + {{- include "grafana.imageRenderer.selectorLabels" . | nindent 6 }} + {{- with .Values.imageRenderer.podLabels }} + {{- toYaml . | nindent 6 }} + {{- end }} + + policyTypes: + - Ingress + ingress: + - ports: + - port: {{ .Values.imageRenderer.service.targetPort }} + protocol: TCP + from: + - namespaceSelector: + matchLabels: + kubernetes.io/metadata.name: {{ include "grafana.namespace" . }} + podSelector: + matchLabels: + {{- include "grafana.selectorLabels" . | nindent 14 }} + {{- with .Values.podLabels }} + {{- toYaml . | nindent 14 }} + {{- end }} + {{- with .Values.imageRenderer.networkPolicy.extraIngressSelectors -}} + {{ toYaml . | nindent 8 }} + {{- end }} +{{- end }} + +{{- if and .Values.imageRenderer.enabled .Values.imageRenderer.networkPolicy.limitEgress }} +--- +apiVersion: networking.k8s.io/v1 +kind: NetworkPolicy +metadata: + name: {{ include "grafana.fullname" . }}-image-renderer-egress + namespace: {{ include "grafana.namespace" . }} + annotations: + comment: Limit image-renderer egress traffic to grafana +spec: + podSelector: + matchLabels: + {{- include "grafana.imageRenderer.selectorLabels" . | nindent 6 }} + {{- with .Values.imageRenderer.podLabels }} + {{- toYaml . | nindent 6 }} + {{- end }} + + policyTypes: + - Egress + egress: + # allow dns resolution + - ports: + - port: 53 + protocol: UDP + - port: 53 + protocol: TCP + # talk only to grafana + - ports: + - port: {{ .Values.service.targetPort }} + protocol: TCP + to: + - namespaceSelector: + matchLabels: + name: {{ include "grafana.namespace" . }} + podSelector: + matchLabels: + {{- include "grafana.selectorLabels" . | nindent 14 }} + {{- with .Values.podLabels }} + {{- toYaml . | nindent 14 }} + {{- end }} +{{- end }} diff --git a/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/grafana/templates/image-renderer-service.yaml b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/grafana/templates/image-renderer-service.yaml new file mode 100644 index 0000000000..f8da127cf8 --- /dev/null +++ b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/grafana/templates/image-renderer-service.yaml @@ -0,0 +1,31 @@ +{{- if and .Values.imageRenderer.enabled .Values.imageRenderer.service.enabled }} +apiVersion: v1 +kind: Service +metadata: + name: {{ include "grafana.fullname" . }}-image-renderer + namespace: {{ include "grafana.namespace" . }} + labels: + {{- include "grafana.imageRenderer.labels" . | nindent 4 }} + {{- with .Values.imageRenderer.service.labels }} + {{- toYaml . | nindent 4 }} + {{- end }} + {{- with .Values.imageRenderer.service.annotations }} + annotations: + {{- toYaml . | nindent 4 }} + {{- end }} +spec: + type: ClusterIP + {{- with .Values.imageRenderer.service.clusterIP }} + clusterIP: {{ . }} + {{- end }} + ports: + - name: {{ .Values.imageRenderer.service.portName }} + port: {{ .Values.imageRenderer.service.port }} + protocol: TCP + targetPort: {{ .Values.imageRenderer.service.targetPort }} + {{- with .Values.imageRenderer.appProtocol }} + appProtocol: {{ . }} + {{- end }} + selector: + {{- include "grafana.imageRenderer.selectorLabels" . | nindent 4 }} +{{- end }} diff --git a/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/grafana/templates/image-renderer-servicemonitor.yaml b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/grafana/templates/image-renderer-servicemonitor.yaml new file mode 100644 index 0000000000..5d9f09d266 --- /dev/null +++ b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/grafana/templates/image-renderer-servicemonitor.yaml @@ -0,0 +1,48 @@ +{{- if .Values.imageRenderer.serviceMonitor.enabled }} +--- +apiVersion: monitoring.coreos.com/v1 +kind: ServiceMonitor +metadata: + name: {{ include "grafana.fullname" . }}-image-renderer + {{- if .Values.imageRenderer.serviceMonitor.namespace }} + namespace: {{ tpl .Values.imageRenderer.serviceMonitor.namespace . }} + {{- else }} + namespace: {{ include "grafana.namespace" . }} + {{- end }} + labels: + {{- include "grafana.imageRenderer.labels" . | nindent 4 }} + {{- with .Values.imageRenderer.serviceMonitor.labels }} + {{- toYaml . | nindent 4 }} + {{- end }} +spec: + endpoints: + - port: {{ .Values.imageRenderer.service.portName }} + {{- with .Values.imageRenderer.serviceMonitor.interval }} + interval: {{ . }} + {{- end }} + {{- with .Values.imageRenderer.serviceMonitor.scrapeTimeout }} + scrapeTimeout: {{ . }} + {{- end }} + honorLabels: true + path: {{ .Values.imageRenderer.serviceMonitor.path }} + scheme: {{ .Values.imageRenderer.serviceMonitor.scheme }} + {{- with .Values.imageRenderer.serviceMonitor.tlsConfig }} + tlsConfig: + {{- toYaml . | nindent 6 }} + {{- end }} + {{- with .Values.imageRenderer.serviceMonitor.relabelings }} + relabelings: + {{- toYaml . | nindent 6 }} + {{- end }} + jobLabel: "{{ .Release.Name }}-image-renderer" + selector: + matchLabels: + {{- include "grafana.imageRenderer.selectorLabels" . | nindent 6 }} + namespaceSelector: + matchNames: + - {{ include "grafana.namespace" . }} + {{- with .Values.imageRenderer.serviceMonitor.targetLabels }} + targetLabels: + {{- toYaml . | nindent 4 }} + {{- end }} +{{- end }} diff --git a/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/grafana/templates/ingress.yaml b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/grafana/templates/ingress.yaml new file mode 100644 index 0000000000..b2ffd81095 --- /dev/null +++ b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/grafana/templates/ingress.yaml @@ -0,0 +1,78 @@ +{{- if .Values.ingress.enabled -}} +{{- $ingressApiIsStable := eq (include "grafana.ingress.isStable" .) "true" -}} +{{- $ingressSupportsIngressClassName := eq (include "grafana.ingress.supportsIngressClassName" .) "true" -}} +{{- $ingressSupportsPathType := eq (include "grafana.ingress.supportsPathType" .) "true" -}} +{{- $fullName := include "grafana.fullname" . -}} +{{- $servicePort := .Values.service.port -}} +{{- $ingressPath := .Values.ingress.path -}} +{{- $ingressPathType := .Values.ingress.pathType -}} +{{- $extraPaths := .Values.ingress.extraPaths -}} +apiVersion: {{ include "grafana.ingress.apiVersion" . }} +kind: Ingress +metadata: + name: {{ $fullName }} + namespace: {{ include "grafana.namespace" . }} + labels: + {{- include "grafana.labels" . | nindent 4 }} + {{- with .Values.ingress.labels }} + {{- toYaml . | nindent 4 }} + {{- end }} + {{- with .Values.ingress.annotations }} + annotations: + {{- range $key, $value := . }} + {{ $key }}: {{ tpl $value $ | quote }} + {{- end }} + {{- end }} +spec: + {{- if and $ingressSupportsIngressClassName .Values.ingress.ingressClassName }} + ingressClassName: {{ .Values.ingress.ingressClassName }} + {{- end -}} + {{- with .Values.ingress.tls }} + tls: + {{- tpl (toYaml .) $ | nindent 4 }} + {{- end }} + rules: + {{- if .Values.ingress.hosts }} + {{- range .Values.ingress.hosts }} + - host: {{ tpl . $ | quote }} + http: + paths: + {{- with $extraPaths }} + {{- toYaml . | nindent 10 }} + {{- end }} + - path: {{ $ingressPath }} + {{- if $ingressSupportsPathType }} + pathType: {{ $ingressPathType }} + {{- end }} + backend: + {{- if $ingressApiIsStable }} + service: + name: {{ $fullName }} + port: + number: {{ $servicePort }} + {{- else }} + serviceName: {{ $fullName }} + servicePort: {{ $servicePort }} + {{- end }} + {{- end }} + {{- else }} + - http: + paths: + - backend: + {{- if $ingressApiIsStable }} + service: + name: {{ $fullName }} + port: + number: {{ $servicePort }} + {{- else }} + serviceName: {{ $fullName }} + servicePort: {{ $servicePort }} + {{- end }} + {{- with $ingressPath }} + path: {{ . }} + {{- end }} + {{- if $ingressSupportsPathType }} + pathType: {{ $ingressPathType }} + {{- end }} + {{- end -}} +{{- end }} diff --git a/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/grafana/templates/networkpolicy.yaml b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/grafana/templates/networkpolicy.yaml new file mode 100644 index 0000000000..4cd3ed6976 --- /dev/null +++ b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/grafana/templates/networkpolicy.yaml @@ -0,0 +1,61 @@ +{{- if .Values.networkPolicy.enabled }} +apiVersion: networking.k8s.io/v1 +kind: NetworkPolicy +metadata: + name: {{ include "grafana.fullname" . }} + namespace: {{ include "grafana.namespace" . }} + labels: + {{- include "grafana.labels" . | nindent 4 }} + {{- with .Values.labels }} + {{- toYaml . | nindent 4 }} + {{- end }} + {{- with .Values.annotations }} + annotations: + {{- toYaml . | nindent 4 }} + {{- end }} +spec: + policyTypes: + {{- if .Values.networkPolicy.ingress }} + - Ingress + {{- end }} + {{- if .Values.networkPolicy.egress.enabled }} + - Egress + {{- end }} + podSelector: + matchLabels: + {{- include "grafana.selectorLabels" . | nindent 6 }} + + {{- if .Values.networkPolicy.egress.enabled }} + egress: + {{- if not .Values.networkPolicy.egress.blockDNSResolution }} + - ports: + - port: 53 + protocol: UDP + {{- end }} + - ports: + {{ .Values.networkPolicy.egress.ports | toJson }} + {{- with .Values.networkPolicy.egress.to }} + to: + {{- toYaml . | nindent 12 }} + {{- end }} + {{- end }} + {{- if .Values.networkPolicy.ingress }} + ingress: + - ports: + - port: {{ .Values.service.targetPort }} + {{- if not .Values.networkPolicy.allowExternal }} + from: + - podSelector: + matchLabels: + {{ include "grafana.fullname" . }}-client: "true" + {{- with .Values.networkPolicy.explicitNamespacesSelector }} + - namespaceSelector: + {{- toYaml . | nindent 12 }} + {{- end }} + - podSelector: + matchLabels: + {{- include "grafana.labels" . | nindent 14 }} + role: read + {{- end }} + {{- end }} +{{- end }} diff --git a/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/grafana/templates/nginx-config.yaml b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/grafana/templates/nginx-config.yaml new file mode 100644 index 0000000000..557471f6ff --- /dev/null +++ b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/grafana/templates/nginx-config.yaml @@ -0,0 +1,94 @@ +apiVersion: v1 +kind: ConfigMap +metadata: + name: grafana-nginx-proxy-config + namespace: {{ template "grafana.namespace" . }} + labels: + {{- include "grafana.labels" . | nindent 4 }} +data: + nginx.conf: |- + worker_processes auto; + error_log /dev/stdout warn; + pid /var/cache/nginx/nginx.pid; + + events { + worker_connections 1024; + } + + http { + include /etc/nginx/mime.types; + log_format main '[$time_local - $status] $remote_addr - $remote_user $request ($http_referer)'; + + proxy_connect_timeout 10; + proxy_read_timeout 180; + proxy_send_timeout 5; + proxy_buffering off; + proxy_cache_path /var/cache/nginx/cache levels=1:2 keys_zone=my_zone:100m inactive=1d max_size=10g; + + map $http_upgrade $connection_upgrade { + default upgrade; + '' close; + } + + server { + listen 8080; + access_log off; + + gzip on; + gzip_min_length 1k; + gzip_comp_level 2; + gzip_types text/plain application/javascript application/x-javascript text/css application/xml text/javascript image/jpeg image/gif image/png; + gzip_vary on; + gzip_disable "MSIE [1-6]\."; + + proxy_set_header Host $host; + + location /api/dashboards { + proxy_pass http://localhost:3000; + } + + location /api/search { + proxy_pass http://localhost:3000; + + sub_filter_types application/json; + sub_filter_once off; + } + + location /api/live/ { + proxy_http_version 1.1; + proxy_set_header Upgrade $http_upgrade; + proxy_set_header Connection $connection_upgrade; + proxy_set_header Host $http_host; + proxy_pass http://localhost:3000; + } + + location / { + proxy_cache my_zone; + proxy_cache_valid 200 302 1d; + proxy_cache_valid 301 30d; + proxy_cache_valid any 5m; + proxy_cache_bypass $http_cache_control; + add_header X-Proxy-Cache $upstream_cache_status; + add_header Cache-Control "public"; + + proxy_pass http://localhost:3000/; + + sub_filter_once off; + + {{- if eq .Values.global.cattle.clusterId "local" -}} + sub_filter '"appSubUrl":""' '"appSubUrl":"/api/v1/namespaces/{{ template "grafana.namespace" . }}/services/http:{{ template "grafana.fullname" . }}:{{ .Values.service.port }}/proxy"'; + {{- else -}} + sub_filter '"appSubUrl":""' '"appSubUrl":"/k8s/clusters/{{ .Values.global.cattle.clusterId }}/api/v1/namespaces/{{ template "grafana.namespace" . }}/services/http:{{ template "grafana.fullname" . }}:{{ .Values.service.port }}/proxy"'; + {{- end -}} + + sub_filter ':"/avatar/' ':"avatar/'; + + if ($request_filename ~ .*\.(?:js|css|jpg|jpeg|gif|png|ico|cur|gz|svg|svgz|mp4|ogg|ogv|webm)$) { + expires 90d; + } + + rewrite ^/k8s/clusters/.*/proxy(.*) /$1 break; + + } + } + } diff --git a/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/grafana/templates/poddisruptionbudget.yaml b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/grafana/templates/poddisruptionbudget.yaml new file mode 100644 index 0000000000..05251214ac --- /dev/null +++ b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/grafana/templates/poddisruptionbudget.yaml @@ -0,0 +1,22 @@ +{{- if .Values.podDisruptionBudget }} +apiVersion: {{ include "grafana.podDisruptionBudget.apiVersion" . }} +kind: PodDisruptionBudget +metadata: + name: {{ include "grafana.fullname" . }} + namespace: {{ include "grafana.namespace" . }} + labels: + {{- include "grafana.labels" . | nindent 4 }} + {{- with .Values.labels }} + {{- toYaml . | nindent 4 }} + {{- end }} +spec: + {{- with .Values.podDisruptionBudget.minAvailable }} + minAvailable: {{ . }} + {{- end }} + {{- with .Values.podDisruptionBudget.maxUnavailable }} + maxUnavailable: {{ . }} + {{- end }} + selector: + matchLabels: + {{- include "grafana.selectorLabels" . | nindent 6 }} +{{- end }} diff --git a/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/grafana/templates/podsecuritypolicy.yaml b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/grafana/templates/podsecuritypolicy.yaml new file mode 100644 index 0000000000..973caccd57 --- /dev/null +++ b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/grafana/templates/podsecuritypolicy.yaml @@ -0,0 +1,45 @@ +{{- if and (or .Values.global.cattle.psp.enabled .Values.rbac.pspEnabled) (.Capabilities.APIVersions.Has "policy/v1beta1/PodSecurityPolicy") }} +apiVersion: policy/v1beta1 +kind: PodSecurityPolicy +metadata: + name: {{ include "grafana.fullname" . }} + labels: + {{- include "grafana.labels" . | nindent 4 }} +{{- if .Values.rbac.pspAnnotations }} + annotations: {{ toYaml .Values.rbac.pspAnnotations | nindent 4 }} +{{- end }} +spec: + privileged: false + allowPrivilegeEscalation: false + requiredDropCapabilities: + # Default set from Docker, with DAC_OVERRIDE and CHOWN + - ALL + volumes: + - 'configMap' + - 'emptyDir' + - 'projected' + - 'csi' + - 'secret' + - 'downwardAPI' + - 'persistentVolumeClaim' + hostNetwork: false + hostIPC: false + hostPID: false + runAsUser: + rule: 'RunAsAny' + seLinux: + rule: 'RunAsAny' + supplementalGroups: + rule: 'MustRunAs' + ranges: + # Forbid adding the root group. + - min: 1 + max: 65535 + fsGroup: + rule: 'MustRunAs' + ranges: + # Forbid adding the root group. + - min: 1 + max: 65535 + readOnlyRootFilesystem: false +{{- end }} diff --git a/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/grafana/templates/pvc.yaml b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/grafana/templates/pvc.yaml new file mode 100644 index 0000000000..c9b234305f --- /dev/null +++ b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/grafana/templates/pvc.yaml @@ -0,0 +1,41 @@ +{{- if and .Values.persistence.enabled (not .Values.persistence.existingClaim) (eq .Values.persistence.type "pvc")}} +apiVersion: v1 +kind: PersistentVolumeClaim +metadata: + name: {{ include "grafana.fullname" . }} + namespace: {{ include "grafana.namespace" . }} + labels: + {{- include "grafana.labels" . | nindent 4 }} + {{- with .Values.persistence.extraPvcLabels }} + {{- toYaml . | nindent 4 }} + {{- end }} + {{- with .Values.persistence.annotations }} + annotations: + {{- toYaml . | nindent 4 }} + {{- end }} + {{- with .Values.persistence.finalizers }} + finalizers: + {{- toYaml . | nindent 4 }} + {{- end }} +spec: + accessModes: +{{- $_ := required "Must provide at least one access mode for persistent volumes used by Grafana" .Values.persistence.accessModes }} +{{- $_ := required "Must provide at least one access mode for persistent volumes used by Grafana" (first .Values.persistence.accessModes) }} + {{- range .Values.persistence.accessModes }} + - {{ . | quote }} + {{- end }} + resources: + requests: + storage: {{ .Values.persistence.size | quote }} + {{- if (lookup "v1" "PersistentVolumeClaim" (include "grafana.namespace" .) (include "grafana.fullname" .)) }} + volumeName: {{ (lookup "v1" "PersistentVolumeClaim" (include "grafana.namespace" .) (include "grafana.fullname" .)).spec.volumeName }} + {{- end }} + {{- with .Values.persistence.storageClassName }} + storageClassName: {{ . }} + {{- end }} + {{- with .Values.persistence.selectorLabels }} + selector: + matchLabels: + {{- toYaml . | nindent 6 }} + {{- end }} +{{- end }} diff --git a/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/grafana/templates/role.yaml b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/grafana/templates/role.yaml new file mode 100644 index 0000000000..469b6f4e6c --- /dev/null +++ b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/grafana/templates/role.yaml @@ -0,0 +1,32 @@ +{{- if and .Values.rbac.create (not .Values.rbac.useExistingRole) -}} +apiVersion: rbac.authorization.k8s.io/v1 +kind: Role +metadata: + name: {{ include "grafana.fullname" . }} + namespace: {{ include "grafana.namespace" . }} + labels: + {{- include "grafana.labels" . | nindent 4 }} + {{- with .Values.annotations }} + annotations: + {{- toYaml . | nindent 4 }} + {{- end }} +{{- if or (or .Values.global.cattle.psp.enabled .Values.rbac.pspEnabled) (and .Values.rbac.namespaced (or .Values.sidecar.dashboards.enabled .Values.sidecar.datasources.enabled .Values.sidecar.plugins.enabled .Values.rbac.extraRoleRules)) }} +rules: + {{- if and (or .Values.global.cattle.psp.enabled .Values.rbac.pspEnabled) (.Capabilities.APIVersions.Has "policy/v1beta1/PodSecurityPolicy") }} + - apiGroups: ['extensions'] + resources: ['podsecuritypolicies'] + verbs: ['use'] + resourceNames: [{{ include "grafana.fullname" . }}] + {{- end }} + {{- if and .Values.rbac.namespaced (or .Values.sidecar.dashboards.enabled .Values.sidecar.datasources.enabled .Values.sidecar.plugins.enabled) }} + - apiGroups: [""] # "" indicates the core API group + resources: ["configmaps", "secrets"] + verbs: ["get", "watch", "list"] + {{- end }} + {{- with .Values.rbac.extraRoleRules }} + {{- toYaml . | nindent 2 }} + {{- end}} +{{- else }} +rules: [] +{{- end }} +{{- end }} diff --git a/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/grafana/templates/rolebinding.yaml b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/grafana/templates/rolebinding.yaml new file mode 100644 index 0000000000..58f77c6b0b --- /dev/null +++ b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/grafana/templates/rolebinding.yaml @@ -0,0 +1,25 @@ +{{- if .Values.rbac.create }} +apiVersion: rbac.authorization.k8s.io/v1 +kind: RoleBinding +metadata: + name: {{ include "grafana.fullname" . }} + namespace: {{ include "grafana.namespace" . }} + labels: + {{- include "grafana.labels" . | nindent 4 }} + {{- with .Values.annotations }} + annotations: + {{- toYaml . | nindent 4 }} + {{- end }} +roleRef: + apiGroup: rbac.authorization.k8s.io + kind: Role + {{- if .Values.rbac.useExistingRole }} + name: {{ .Values.rbac.useExistingRole }} + {{- else }} + name: {{ include "grafana.fullname" . }} + {{- end }} +subjects: +- kind: ServiceAccount + name: {{ include "grafana.serviceAccountName" . }} + namespace: {{ include "grafana.namespace" . }} +{{- end }} diff --git a/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/grafana/templates/secret-env.yaml b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/grafana/templates/secret-env.yaml new file mode 100644 index 0000000000..eb14aac707 --- /dev/null +++ b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/grafana/templates/secret-env.yaml @@ -0,0 +1,14 @@ +{{- if .Values.envRenderSecret }} +apiVersion: v1 +kind: Secret +metadata: + name: {{ include "grafana.fullname" . }}-env + namespace: {{ include "grafana.namespace" . }} + labels: + {{- include "grafana.labels" . | nindent 4 }} +type: Opaque +data: +{{- range $key, $val := .Values.envRenderSecret }} + {{ $key }}: {{ tpl ($val | toString) $ | b64enc | quote }} +{{- end }} +{{- end }} diff --git a/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/grafana/templates/secret.yaml b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/grafana/templates/secret.yaml new file mode 100644 index 0000000000..fd2ca50f4b --- /dev/null +++ b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/grafana/templates/secret.yaml @@ -0,0 +1,16 @@ +{{- if or (and (not .Values.admin.existingSecret) (not .Values.env.GF_SECURITY_ADMIN_PASSWORD__FILE) (not .Values.env.GF_SECURITY_ADMIN_PASSWORD) (not .Values.env.GF_SECURITY_DISABLE_INITIAL_ADMIN_CREATION)) (and .Values.ldap.enabled (not .Values.ldap.existingSecret)) }} +apiVersion: v1 +kind: Secret +metadata: + name: {{ include "grafana.fullname" . }} + namespace: {{ include "grafana.namespace" . }} + labels: + {{- include "grafana.labels" . | nindent 4 }} + {{- with .Values.annotations }} + annotations: + {{- toYaml . | nindent 4 }} + {{- end }} +type: Opaque +data: + {{- include "grafana.secretsData" . | nindent 2 }} +{{- end }} diff --git a/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/grafana/templates/service.yaml b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/grafana/templates/service.yaml new file mode 100644 index 0000000000..e9396a15c6 --- /dev/null +++ b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/grafana/templates/service.yaml @@ -0,0 +1,61 @@ +{{- if .Values.service.enabled }} +{{- $root := . }} +apiVersion: v1 +kind: Service +metadata: + name: {{ include "grafana.fullname" . }} + namespace: {{ include "grafana.namespace" . }} + labels: + {{- include "grafana.labels" . | nindent 4 }} + {{- with .Values.service.labels }} + {{- toYaml . | nindent 4 }} + {{- end }} + {{- with .Values.service.annotations }} + annotations: + {{- tpl (toYaml . | nindent 4) $root }} + {{- end }} +spec: + {{- if (or (eq .Values.service.type "ClusterIP") (empty .Values.service.type)) }} + type: ClusterIP + {{- with .Values.service.clusterIP }} + clusterIP: {{ . }} + {{- end }} + {{- else if eq .Values.service.type "LoadBalancer" }} + type: LoadBalancer + {{- with .Values.service.loadBalancerIP }} + loadBalancerIP: {{ . }} + {{- end }} + {{- with .Values.service.loadBalancerClass }} + loadBalancerClass: {{ . }} + {{- end }} + {{- with .Values.service.loadBalancerSourceRanges }} + loadBalancerSourceRanges: + {{- toYaml . | nindent 4 }} + {{- end }} + {{- else }} + type: {{ .Values.service.type }} + {{- end }} + {{- with .Values.service.externalIPs }} + externalIPs: + {{- toYaml . | nindent 4 }} + {{- end }} + {{- with .Values.service.externalTrafficPolicy }} + externalTrafficPolicy: {{ . }} + {{- end }} + ports: + - name: {{ .Values.service.portName }} + port: {{ .Values.service.port }} + protocol: TCP + targetPort: {{ .Values.service.targetPort }} + {{- with .Values.service.appProtocol }} + appProtocol: {{ . }} + {{- end }} + {{- if (and (eq .Values.service.type "NodePort") (not (empty .Values.service.nodePort))) }} + nodePort: {{ .Values.service.nodePort }} + {{- end }} + {{- with .Values.extraExposePorts }} + {{- tpl (toYaml . | nindent 4) $root }} + {{- end }} + selector: + {{- include "grafana.selectorLabels" . | nindent 4 }} +{{- end }} diff --git a/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/grafana/templates/serviceaccount.yaml b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/grafana/templates/serviceaccount.yaml new file mode 100644 index 0000000000..ffca0717ae --- /dev/null +++ b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/grafana/templates/serviceaccount.yaml @@ -0,0 +1,17 @@ +{{- if .Values.serviceAccount.create }} +apiVersion: v1 +kind: ServiceAccount +automountServiceAccountToken: {{ .Values.serviceAccount.autoMount | default .Values.serviceAccount.automountServiceAccountToken }} +metadata: + labels: + {{- include "grafana.labels" . | nindent 4 }} + {{- with .Values.serviceAccount.labels }} + {{- toYaml . | nindent 4 }} + {{- end }} + {{- with .Values.serviceAccount.annotations }} + annotations: + {{- tpl (toYaml . | nindent 4) $ }} + {{- end }} + name: {{ include "grafana.serviceAccountName" . }} + namespace: {{ include "grafana.namespace" . }} +{{- end }} diff --git a/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/grafana/templates/servicemonitor.yaml b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/grafana/templates/servicemonitor.yaml new file mode 100644 index 0000000000..b321b1269c --- /dev/null +++ b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/grafana/templates/servicemonitor.yaml @@ -0,0 +1,68 @@ +{{- if .Values.serviceMonitor.enabled }} +--- +apiVersion: monitoring.coreos.com/v1 +kind: ServiceMonitor +metadata: + name: {{ include "grafana.fullname" . }} + {{- if .Values.serviceMonitor.namespace }} + namespace: {{ tpl .Values.serviceMonitor.namespace . }} + {{- else }} + namespace: {{ include "grafana.namespace" . }} + {{- end }} + labels: + {{- include "grafana.labels" . | nindent 4 }} + {{- with .Values.serviceMonitor.labels }} + {{- tpl (toYaml . | nindent 4) $ }} + {{- end }} +spec: + endpoints: + - port: {{ .Values.service.portName }} + {{- with .Values.serviceMonitor.interval }} + interval: {{ . }} + {{- end }} + {{- with .Values.serviceMonitor.scrapeTimeout }} + scrapeTimeout: {{ . }} + {{- end }} + honorLabels: true + path: {{ .Values.serviceMonitor.path }} + scheme: {{ .Values.serviceMonitor.scheme }} + {{- with .Values.serviceMonitor.tlsConfig }} + tlsConfig: + {{- toYaml . | nindent 6 }} + {{- end }} + metricRelabelings: + {{- if .Values.serviceMonitor.metricRelabelings }} + {{- toYaml .Values.serviceMonitor.metricRelabelings | nindent 6 }} + {{- end }} + {{ if .Values.global.cattle.clusterId }} + - sourceLabels: [__address__] + targetLabel: cluster_id + replacement: {{ .Values.global.cattle.clusterId }} + {{- end }} + {{ if .Values.global.cattle.clusterName }} + - sourceLabels: [__address__] + targetLabel: cluster_name + replacement: {{ .Values.global.cattle.clusterName }} + {{- end }} + {{- if .Values.serviceMonitor.relabelings }} + {{- with .Values.serviceMonitor.relabelings }} + relabelings: + {{- toYaml . | nindent 6 }} + {{- end }} + {{- end }} + {{- with .Values.serviceMonitor.metricRelabelings }} + metricRelabelings: + {{- toYaml . | nindent 6 }} + {{- end }} + jobLabel: "{{ .Release.Name }}" + selector: + matchLabels: + {{- include "grafana.selectorLabels" . | nindent 6 }} + namespaceSelector: + matchNames: + - {{ include "grafana.namespace" . }} + {{- with .Values.serviceMonitor.targetLabels }} + targetLabels: + {{- toYaml . | nindent 4 }} + {{- end }} +{{- end }} diff --git a/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/grafana/templates/statefulset.yaml b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/grafana/templates/statefulset.yaml new file mode 100644 index 0000000000..49278083e8 --- /dev/null +++ b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/grafana/templates/statefulset.yaml @@ -0,0 +1,58 @@ +{{- $sts := list "sts" "StatefulSet" "statefulset" -}} +{{- if (or (.Values.useStatefulSet) (and .Values.persistence.enabled (not .Values.persistence.existingClaim) (has .Values.persistence.type $sts)))}} +apiVersion: apps/v1 +kind: StatefulSet +metadata: + name: {{ include "grafana.fullname" . }} + namespace: {{ include "grafana.namespace" . }} + labels: + {{- include "grafana.labels" . | nindent 4 }} + {{- with .Values.annotations }} + annotations: + {{- toYaml . | nindent 4 }} + {{- end }} +spec: + replicas: {{ .Values.replicas }} + selector: + matchLabels: + {{- include "grafana.selectorLabels" . | nindent 6 }} + serviceName: {{ include "grafana.fullname" . }}-headless + template: + metadata: + labels: + {{- include "grafana.selectorLabels" . | nindent 8 }} + {{- with .Values.podLabels }} + {{- toYaml . | nindent 8 }} + {{- end }} + annotations: + checksum/config: {{ include (print $.Template.BasePath "/configmap.yaml") . | sha256sum }} + checksum/dashboards-json-config: {{ include (print $.Template.BasePath "/dashboards-json-configmap.yaml") . | sha256sum }} + checksum/sc-dashboard-provider-config: {{ include (print $.Template.BasePath "/configmap-dashboard-provider.yaml") . | sha256sum }} + {{- if and (or (and (not .Values.admin.existingSecret) (not .Values.env.GF_SECURITY_ADMIN_PASSWORD__FILE) (not .Values.env.GF_SECURITY_ADMIN_PASSWORD)) (and .Values.ldap.enabled (not .Values.ldap.existingSecret))) (not .Values.env.GF_SECURITY_DISABLE_INITIAL_ADMIN_CREATION) }} + checksum/secret: {{ include (print $.Template.BasePath "/secret.yaml") . | sha256sum }} + {{- end }} + kubectl.kubernetes.io/default-container: {{ .Chart.Name }} + {{- with .Values.podAnnotations }} + {{- toYaml . | nindent 8 }} + {{- end }} + spec: + {{- include "grafana.pod" . | nindent 6 }} + {{- if .Values.persistence.enabled}} + volumeClaimTemplates: + - metadata: + name: storage + spec: +{{- $_ := required "Must provide at least one access mode for persistent volumes used by Grafana" .Values.persistence.accessModes }} +{{- $_ := required "Must provide at least one access mode for persistent volumes used by Grafana" (first .Values.persistence.accessModes) }} + accessModes: {{ .Values.persistence.accessModes }} + storageClassName: {{ .Values.persistence.storageClassName }} + resources: + requests: + storage: {{ required "Must provide size for persistent volumes used by Grafana" .Values.persistence.size }} + {{- with .Values.persistence.selectorLabels }} + selector: + matchLabels: + {{- toYaml . | nindent 10 }} + {{- end }} + {{- end }} +{{- end }} diff --git a/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/grafana/templates/tests/test-configmap.yaml b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/grafana/templates/tests/test-configmap.yaml new file mode 100644 index 0000000000..01c96c9243 --- /dev/null +++ b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/grafana/templates/tests/test-configmap.yaml @@ -0,0 +1,20 @@ +{{- if .Values.testFramework.enabled }} +apiVersion: v1 +kind: ConfigMap +metadata: + name: {{ include "grafana.fullname" . }}-test + namespace: {{ include "grafana.namespace" . }} + annotations: + "helm.sh/hook": test-success + "helm.sh/hook-delete-policy": "before-hook-creation,hook-succeeded" + labels: + {{- include "grafana.labels" . | nindent 4 }} +data: + run.sh: |- + @test "Test Health" { + url="http://{{ include "grafana.fullname" . }}/api/health" + + code=$(wget --server-response --spider --timeout 90 --tries 10 ${url} 2>&1 | awk '/^ HTTP/{print $2}') + [ "$code" == "200" ] + } +{{- end }} diff --git a/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/grafana/templates/tests/test-podsecuritypolicy.yaml b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/grafana/templates/tests/test-podsecuritypolicy.yaml new file mode 100644 index 0000000000..70a0a884c9 --- /dev/null +++ b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/grafana/templates/tests/test-podsecuritypolicy.yaml @@ -0,0 +1,32 @@ +{{- if and (.Capabilities.APIVersions.Has "policy/v1beta1/PodSecurityPolicy") .Values.testFramework.enabled (or .Values.global.cattle.psp.enabled .Values.rbac.pspEnabled) }} +apiVersion: policy/v1beta1 +kind: PodSecurityPolicy +metadata: + name: {{ include "grafana.fullname" . }}-test + annotations: + "helm.sh/hook": test-success + "helm.sh/hook-delete-policy": "before-hook-creation,hook-succeeded" + labels: + {{- include "grafana.labels" . | nindent 4 }} +spec: + allowPrivilegeEscalation: true + privileged: false + hostNetwork: false + hostIPC: false + hostPID: false + fsGroup: + rule: RunAsAny + seLinux: + rule: RunAsAny + supplementalGroups: + rule: RunAsAny + runAsUser: + rule: RunAsAny + volumes: + - configMap + - downwardAPI + - emptyDir + - projected + - csi + - secret +{{- end }} diff --git a/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/grafana/templates/tests/test-role.yaml b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/grafana/templates/tests/test-role.yaml new file mode 100644 index 0000000000..976418b137 --- /dev/null +++ b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/grafana/templates/tests/test-role.yaml @@ -0,0 +1,17 @@ +{{- if and (.Capabilities.APIVersions.Has "policy/v1beta1/PodSecurityPolicy") .Values.testFramework.enabled (or .Values.global.cattle.psp.enabled .Values.rbac.pspEnabled) }} +apiVersion: rbac.authorization.k8s.io/v1 +kind: Role +metadata: + name: {{ include "grafana.fullname" . }}-test + namespace: {{ include "grafana.namespace" . }} + annotations: + "helm.sh/hook": test-success + "helm.sh/hook-delete-policy": "before-hook-creation,hook-succeeded" + labels: + {{- include "grafana.labels" . | nindent 4 }} +rules: + - apiGroups: ['policy'] + resources: ['podsecuritypolicies'] + verbs: ['use'] + resourceNames: [{{ include "grafana.fullname" . }}-test] +{{- end }} diff --git a/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/grafana/templates/tests/test-rolebinding.yaml b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/grafana/templates/tests/test-rolebinding.yaml new file mode 100644 index 0000000000..509566eccd --- /dev/null +++ b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/grafana/templates/tests/test-rolebinding.yaml @@ -0,0 +1,20 @@ +{{- if and (.Capabilities.APIVersions.Has "policy/v1beta1/PodSecurityPolicy") .Values.testFramework.enabled (or .Values.global.cattle.psp.enabled .Values.rbac.pspEnabled) }} +apiVersion: rbac.authorization.k8s.io/v1 +kind: RoleBinding +metadata: + name: {{ include "grafana.fullname" . }}-test + namespace: {{ include "grafana.namespace" . }} + annotations: + "helm.sh/hook": test-success + "helm.sh/hook-delete-policy": "before-hook-creation,hook-succeeded" + labels: + {{- include "grafana.labels" . | nindent 4 }} +roleRef: + apiGroup: rbac.authorization.k8s.io + kind: Role + name: {{ include "grafana.fullname" . }}-test +subjects: + - kind: ServiceAccount + name: {{ include "grafana.serviceAccountNameTest" . }} + namespace: {{ include "grafana.namespace" . }} +{{- end }} diff --git a/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/grafana/templates/tests/test-serviceaccount.yaml b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/grafana/templates/tests/test-serviceaccount.yaml new file mode 100644 index 0000000000..38fba3596a --- /dev/null +++ b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/grafana/templates/tests/test-serviceaccount.yaml @@ -0,0 +1,12 @@ +{{- if and .Values.testFramework.enabled .Values.serviceAccount.create }} +apiVersion: v1 +kind: ServiceAccount +metadata: + labels: + {{- include "grafana.labels" . | nindent 4 }} + name: {{ include "grafana.serviceAccountNameTest" . }} + namespace: {{ include "grafana.namespace" . }} + annotations: + "helm.sh/hook": test-success + "helm.sh/hook-delete-policy": "before-hook-creation,hook-succeeded" +{{- end }} diff --git a/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/grafana/templates/tests/test.yaml b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/grafana/templates/tests/test.yaml new file mode 100644 index 0000000000..83aaa185c2 --- /dev/null +++ b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/grafana/templates/tests/test.yaml @@ -0,0 +1,53 @@ +{{- if .Values.testFramework.enabled }} +{{- $root := . }} +apiVersion: v1 +kind: Pod +metadata: + name: {{ include "grafana.fullname" . }}-test + labels: + {{- include "grafana.labels" . | nindent 4 }} + annotations: + "helm.sh/hook": test-success + "helm.sh/hook-delete-policy": "before-hook-creation,hook-succeeded" + namespace: {{ include "grafana.namespace" . }} +spec: + serviceAccountName: {{ include "grafana.serviceAccountNameTest" . }} + {{- with .Values.testFramework.securityContext }} + securityContext: + {{- toYaml . | nindent 4 }} + {{- end }} + {{- if or .Values.image.pullSecrets .Values.global.imagePullSecrets }} + imagePullSecrets: + {{- include "grafana.imagePullSecrets" (dict "root" $root "imagePullSecrets" .Values.image.pullSecrets) | nindent 4 }} + {{- end }} + {{- with .Values.nodeSelector }} + nodeSelector: + {{- toYaml . | nindent 4 }} + {{- end }} + {{- with .Values.affinity }} + affinity: + {{- tpl (toYaml .) $root | nindent 4 }} + {{- end }} + {{- with .Values.tolerations }} + tolerations: + {{- toYaml . | nindent 4 }} + {{- end }} + containers: + - name: {{ .Release.Name }}-test + image: "{{ template "system_default_registry" . | default .Values.testFramework.image.registry }}/{{ .Values.testFramework.image.repository }}:{{ .Values.testFramework.image.tag }}" + imagePullPolicy: "{{ .Values.testFramework.imagePullPolicy}}" + command: ["/opt/bats/bin/bats", "-t", "/tests/run.sh"] + volumeMounts: + - mountPath: /tests + name: tests + readOnly: true + {{- with .Values.testFramework.resources }} + resources: + {{- toYaml . | nindent 8 }} + {{- end }} + volumes: + - name: tests + configMap: + name: {{ include "grafana.fullname" . }}-test + restartPolicy: Never +{{- end }} diff --git a/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/grafana/values.yaml b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/grafana/values.yaml new file mode 100644 index 0000000000..2bb62aec39 --- /dev/null +++ b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/grafana/values.yaml @@ -0,0 +1,1315 @@ +global: + cattle: + psp: + enabled: false + systemDefaultRegistry: "" + + # To help compatibility with other charts which use global.imagePullSecrets. + # Allow either an array of {name: pullSecret} maps (k8s-style), or an array of strings (more common helm-style). + # Can be tempalted. + # global: + # imagePullSecrets: + # - name: pullSecret1 + # - name: pullSecret2 + # or + # global: + # imagePullSecrets: + # - pullSecret1 + # - pullSecret2 + imagePullSecrets: [] + +rbac: + create: true + ## Use an existing ClusterRole/Role (depending on rbac.namespaced false/true) + # useExistingRole: name-of-some-role + # useExistingClusterRole: name-of-some-clusterRole + pspEnabled: false + pspUseAppArmor: false + namespaced: false + extraRoleRules: [] + # - apiGroups: [] + # resources: [] + # verbs: [] + extraClusterRoleRules: [] + # - apiGroups: [] + # resources: [] + # verbs: [] +serviceAccount: + create: true + name: + nameTest: + ## ServiceAccount labels. + labels: {} + ## Service account annotations. Can be templated. + # annotations: + # eks.amazonaws.com/role-arn: arn:aws:iam::123456789000:role/iam-role-name-here + + ## autoMount is deprecated in favor of automountServiceAccountToken + # autoMount: false + automountServiceAccountToken: true + +replicas: 1 + +## Create a headless service for the deployment +headlessService: false + +## Should the service account be auto mounted on the pod +automountServiceAccountToken: true + +## Create HorizontalPodAutoscaler object for deployment type +# +autoscaling: + enabled: false + minReplicas: 1 + maxReplicas: 5 + targetCPU: "60" + targetMemory: "" + behavior: {} + +## See `kubectl explain poddisruptionbudget.spec` for more +## ref: https://kubernetes.io/docs/tasks/run-application/configure-pdb/ +podDisruptionBudget: {} +# apiVersion: "" +# minAvailable: 1 +# maxUnavailable: 1 + +## See `kubectl explain deployment.spec.strategy` for more +## ref: https://kubernetes.io/docs/concepts/workloads/controllers/deployment/#strategy +deploymentStrategy: + type: RollingUpdate + +readinessProbe: + httpGet: + path: /api/health + port: 3000 + +livenessProbe: + httpGet: + path: /api/health + port: 3000 + initialDelaySeconds: 60 + timeoutSeconds: 30 + failureThreshold: 10 + +## Use an alternate scheduler, e.g. "stork". +## ref: https://kubernetes.io/docs/tasks/administer-cluster/configure-multiple-schedulers/ +## +# schedulerName: "default-scheduler" + +image: + repository: rancher/mirrored-grafana-grafana + # Overrides the Grafana image tag whose default is the chart appVersion + tag: 10.3.3 + sha: "" + pullPolicy: IfNotPresent + + ## Optionally specify an array of imagePullSecrets. + ## Secrets must be manually created in the namespace. + ## ref: https://kubernetes.io/docs/tasks/configure-pod-container/pull-image-private-registry/ + ## Can be templated. + ## + pullSecrets: [] + # - myRegistrKeySecretName + +testFramework: + enabled: false + imagePullPolicy: IfNotPresent + securityContext: + runAsNonRoot: true + runAsUser: 1000 + # limits: + # cpu: 100m + # memory: 128Mi + # requests: + # cpu: 100m + # memory: 128Mi + +# dns configuration for pod +dnsPolicy: ~ +dnsConfig: {} + # nameservers: + # - 8.8.8.8 + # options: + # - name: ndots + # value: "2" + # - name: edns0 + +securityContext: + runAsNonRoot: true + runAsUser: 472 + runAsGroup: 472 + fsGroup: 472 + +containerSecurityContext: + allowPrivilegeEscalation: false + capabilities: + drop: + - ALL + seccompProfile: + type: RuntimeDefault + +# Enable creating the grafana configmap +createConfigmap: true + +# Extra configmaps to mount in grafana pods +# Values are templated. +extraConfigmapMounts: [] + # - name: certs-configmap + # mountPath: /etc/grafana/ssl/ + # subPath: certificates.crt # (optional) + # configMap: certs-configmap + # readOnly: true + + +extraEmptyDirMounts: [] + # - name: provisioning-notifiers + # mountPath: /etc/grafana/provisioning/notifiers + + +# Apply extra labels to common labels. +extraLabels: {} + +## Assign a PriorityClassName to pods if set +# priorityClassName: + +downloadDashboardsImage: + repository: rancher/mirrored-curlimages-curl + tag: 7.85.0 + sha: "" + pullPolicy: IfNotPresent + +downloadDashboards: + env: {} + envFromSecret: "" + resources: {} + securityContext: + allowPrivilegeEscalation: false + capabilities: + drop: + - ALL + seccompProfile: + type: RuntimeDefault + envValueFrom: {} + # ENV_NAME: + # configMapKeyRef: + # name: configmap-name + # key: value_key + +## Pod Annotations +# podAnnotations: {} + +## Pod Labels +# podLabels: {} + +podPortName: grafana +gossipPortName: gossip +## Deployment annotations +# annotations: {} + +## Expose the grafana service to be accessed from outside the cluster (LoadBalancer service). +## or access it from within the cluster (ClusterIP service). Set the service type and the port to serve it. +## ref: http://kubernetes.io/docs/user-guide/services/ +## +service: + enabled: true + type: ClusterIP + loadBalancerIP: "" + loadBalancerClass: "" + loadBalancerSourceRanges: [] + port: 80 + targetPort: 3000 + # targetPort: 4181 To be used with a proxy extraContainer + ## Service annotations. Can be templated. + annotations: {} + labels: {} + portName: service + # Adds the appProtocol field to the service. This allows to work with istio protocol selection. Ex: "http" or "tcp" + appProtocol: "" + +serviceMonitor: + ## If true, a ServiceMonitor CRD is created for a prometheus operator + ## https://github.com/coreos/prometheus-operator + ## + enabled: false + path: /metrics + # namespace: monitoring (defaults to use the namespace this chart is deployed to) + labels: {} + interval: 30s + scheme: http + tlsConfig: {} + scrapeTimeout: 30s + relabelings: [] + metricRelabelings: [] + targetLabels: [] + +extraExposePorts: [] + # - name: keycloak + # port: 8080 + # targetPort: 8080 + +# overrides pod.spec.hostAliases in the grafana deployment's pods +hostAliases: [] + # - ip: "1.2.3.4" + # hostnames: + # - "my.host.com" + +ingress: + enabled: false + # For Kubernetes >= 1.18 you should specify the ingress-controller via the field ingressClassName + # See https://kubernetes.io/blog/2020/04/02/improvements-to-the-ingress-api-in-kubernetes-1.18/#specifying-the-class-of-an-ingress + # ingressClassName: nginx + # Values can be templated + annotations: {} + # kubernetes.io/ingress.class: nginx + # kubernetes.io/tls-acme: "true" + labels: {} + path: / + + # pathType is only for k8s >= 1.1= + pathType: Prefix + + hosts: + - chart-example.local + ## Extra paths to prepend to every host configuration. This is useful when working with annotation based services. + extraPaths: [] + # - path: /* + # backend: + # serviceName: ssl-redirect + # servicePort: use-annotation + ## Or for k8s > 1.19 + # - path: /* + # pathType: Prefix + # backend: + # service: + # name: ssl-redirect + # port: + # name: use-annotation + + + tls: [] + # - secretName: chart-example-tls + # hosts: + # - chart-example.local + +resources: {} +# limits: +# cpu: 100m +# memory: 128Mi +# requests: +# cpu: 100m +# memory: 128Mi + +## Node labels for pod assignment +## ref: https://kubernetes.io/docs/user-guide/node-selection/ +# +nodeSelector: {} + +## Tolerations for pod assignment +## ref: https://kubernetes.io/docs/concepts/configuration/taint-and-toleration/ +## +tolerations: [] + +## Affinity for pod assignment (evaluated as template) +## ref: https://kubernetes.io/docs/concepts/configuration/assign-pod-node/#affinity-and-anti-affinity +## +affinity: {} + +## Topology Spread Constraints +## ref: https://kubernetes.io/docs/concepts/workloads/pods/pod-topology-spread-constraints/ +## +topologySpreadConstraints: [] + +## Additional init containers (evaluated as template) +## ref: https://kubernetes.io/docs/concepts/workloads/pods/init-containers/ +## +extraInitContainers: [] + +## Enable an Specify container in extraContainers. This is meant to allow adding an authentication proxy to a grafana pod +extraContainers: "" +# extraContainers: | +# - name: proxy +# image: quay.io/gambol99/keycloak-proxy:latest +# args: +# - -provider=github +# - -client-id= +# - -client-secret= +# - -github-org= +# - -email-domain=* +# - -cookie-secret= +# - -http-address=http://0.0.0.0:4181 +# - -upstream-url=http://127.0.0.1:3000 +# ports: +# - name: proxy-web +# containerPort: 4181 + +## Volumes that can be used in init containers that will not be mounted to deployment pods +extraContainerVolumes: [] +# - name: volume-from-secret +# secret: +# secretName: secret-to-mount +# - name: empty-dir-volume +# emptyDir: {} + +## Enable persistence using Persistent Volume Claims +## ref: http://kubernetes.io/docs/user-guide/persistent-volumes/ +## +persistence: + type: pvc + enabled: false + # storageClassName: default + accessModes: + - ReadWriteOnce + size: 10Gi + # annotations: {} + finalizers: + - kubernetes.io/pvc-protection + # selectorLabels: {} + ## Sub-directory of the PV to mount. Can be templated. + # subPath: "" + ## Name of an existing PVC. Can be templated. + # existingClaim: + ## Extra labels to apply to a PVC. + extraPvcLabels: {} + + ## If persistence is not enabled, this allows to mount the + ## local storage in-memory to improve performance + ## + inMemory: + enabled: false + ## The maximum usage on memory medium EmptyDir would be + ## the minimum value between the SizeLimit specified + ## here and the sum of memory limits of all containers in a pod + ## + # sizeLimit: 300Mi + +initChownData: + ## If false, data ownership will not be reset at startup + ## This allows the grafana-server to be run with an arbitrary user + ## + enabled: true + + ## initChownData container image + ## + image: + repository: rancher/mirrored-library-busybox + tag: "1.31.1" + sha: "" + pullPolicy: IfNotPresent + + ## initChownData resource requests and limits + ## Ref: http://kubernetes.io/docs/user-guide/compute-resources/ + ## + resources: {} + # limits: + # cpu: 100m + # memory: 128Mi + # requests: + # cpu: 100m + # memory: 128Mi + securityContext: + runAsNonRoot: false + runAsUser: 0 + seccompProfile: + type: RuntimeDefault + capabilities: + add: + - CHOWN + +# Administrator credentials when not using an existing secret (see below) +adminUser: admin +# adminPassword: strongpassword + +# Use an existing secret for the admin user. +admin: + ## Name of the secret. Can be templated. + existingSecret: "" + userKey: admin-user + passwordKey: admin-password + +## Define command to be executed at startup by grafana container +## Needed if using `vault-env` to manage secrets (ref: https://banzaicloud.com/blog/inject-secrets-into-pods-vault/) +## Default is "run.sh" as defined in grafana's Dockerfile +# command: +# - "sh" +# - "/run.sh" + +## Optionally define args if command is used +## Needed if using `hashicorp/envconsul` to manage secrets +## By default no arguments are set +# args: +# - "-secret" +# - "secret/grafana" +# - "./grafana" + +## Extra environment variables that will be pass onto deployment pods +## +## to provide grafana with access to CloudWatch on AWS EKS: +## 1. create an iam role of type "Web identity" with provider oidc.eks.* (note the provider for later) +## 2. edit the "Trust relationships" of the role, add a line inside the StringEquals clause using the +## same oidc eks provider as noted before (same as the existing line) +## also, replace NAMESPACE and prometheus-operator-grafana with the service account namespace and name +## +## "oidc.eks.us-east-1.amazonaws.com/id/XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX:sub": "system:serviceaccount:NAMESPACE:prometheus-operator-grafana", +## +## 3. attach a policy to the role, you can use a built in policy called CloudWatchReadOnlyAccess +## 4. use the following env: (replace 123456789000 and iam-role-name-here with your aws account number and role name) +## +## env: +## AWS_ROLE_ARN: arn:aws:iam::123456789000:role/iam-role-name-here +## AWS_WEB_IDENTITY_TOKEN_FILE: /var/run/secrets/eks.amazonaws.com/serviceaccount/token +## AWS_REGION: us-east-1 +## +## 5. uncomment the EKS section in extraSecretMounts: below +## 6. uncomment the annotation section in the serviceAccount: above +## make sure to replace arn:aws:iam::123456789000:role/iam-role-name-here with your role arn + +env: {} + +## "valueFrom" environment variable references that will be added to deployment pods. Name is templated. +## ref: https://kubernetes.io/docs/reference/generated/kubernetes-api/v1.19/#envvarsource-v1-core +## Renders in container spec as: +## env: +## ... +## - name: +## valueFrom: +## +envValueFrom: {} + # ENV_NAME: + # configMapKeyRef: + # name: configmap-name + # key: value_key + +## The name of a secret in the same kubernetes namespace which contain values to be added to the environment +## This can be useful for auth tokens, etc. Value is templated. +envFromSecret: "" + +## Sensible environment variables that will be rendered as new secret object +## This can be useful for auth tokens, etc. +## If the secret values contains "{{", they'll need to be properly escaped so that they are not interpreted by Helm +## ref: https://helm.sh/docs/howto/charts_tips_and_tricks/#using-the-tpl-function +envRenderSecret: {} + +## The names of secrets in the same kubernetes namespace which contain values to be added to the environment +## Each entry should contain a name key, and can optionally specify whether the secret must be defined with an optional key. +## Name is templated. +envFromSecrets: [] +## - name: secret-name +## prefix: prefix +## optional: true + +## The names of conifgmaps in the same kubernetes namespace which contain values to be added to the environment +## Each entry should contain a name key, and can optionally specify whether the configmap must be defined with an optional key. +## Name is templated. +## ref: https://kubernetes.io/docs/reference/generated/kubernetes-api/v1.23/#configmapenvsource-v1-core +envFromConfigMaps: [] +## - name: configmap-name +## prefix: prefix +## optional: true + +# Inject Kubernetes services as environment variables. +# See https://kubernetes.io/docs/concepts/services-networking/connect-applications-service/#environment-variables +enableServiceLinks: true + +## Additional grafana server secret mounts +# Defines additional mounts with secrets. Secrets must be manually created in the namespace. +extraSecretMounts: [] + # - name: secret-files + # mountPath: /etc/secrets + # secretName: grafana-secret-files + # readOnly: true + # subPath: "" + # + # for AWS EKS (cloudwatch) use the following (see also instruction in env: above) + # - name: aws-iam-token + # mountPath: /var/run/secrets/eks.amazonaws.com/serviceaccount + # readOnly: true + # projected: + # defaultMode: 420 + # sources: + # - serviceAccountToken: + # audience: sts.amazonaws.com + # expirationSeconds: 86400 + # path: token + # + # for CSI e.g. Azure Key Vault use the following + # - name: secrets-store-inline + # mountPath: /run/secrets + # readOnly: true + # csi: + # driver: secrets-store.csi.k8s.io + # readOnly: true + # volumeAttributes: + # secretProviderClass: "akv-grafana-spc" + # nodePublishSecretRef: # Only required when using service principal mode + # name: grafana-akv-creds # Only required when using service principal mode + +## Additional grafana server volume mounts +# Defines additional volume mounts. +extraVolumeMounts: [] + # - name: extra-volume-0 + # mountPath: /mnt/volume0 + # readOnly: true + # - name: extra-volume-1 + # mountPath: /mnt/volume1 + # readOnly: true + # - name: grafana-secrets + # mountPath: /mnt/volume2 + +## Additional Grafana server volumes +extraVolumes: [] + # - name: extra-volume-0 + # existingClaim: volume-claim + # - name: extra-volume-1 + # hostPath: + # path: /usr/shared/ + # type: "" + # - name: grafana-secrets + # csi: + # driver: secrets-store.csi.k8s.io + # readOnly: true + # volumeAttributes: + # secretProviderClass: "grafana-env-spc" + +## Container Lifecycle Hooks. Execute a specific bash command or make an HTTP request +lifecycleHooks: {} + # postStart: + # exec: + # command: [] + +## Pass the plugins you want installed as a list. +## +plugins: [] + # - digrich-bubblechart-panel + # - grafana-clock-panel + ## You can also use other plugin download URL, as long as they are valid zip files, + ## and specify the name of the plugin after the semicolon. Like this: + # - https://grafana.com/api/plugins/marcusolsson-json-datasource/versions/1.3.2/download;marcusolsson-json-datasource + +## Configure grafana datasources +## ref: http://docs.grafana.org/administration/provisioning/#datasources +## +datasources: {} +# datasources.yaml: +# apiVersion: 1 +# datasources: +# - name: Prometheus +# type: prometheus +# url: http://prometheus-prometheus-server +# access: proxy +# isDefault: true +# - name: CloudWatch +# type: cloudwatch +# access: proxy +# uid: cloudwatch +# editable: false +# jsonData: +# authType: default +# defaultRegion: us-east-1 +# deleteDatasources: [] +# - name: Prometheus + +## Configure grafana alerting (can be templated) +## ref: http://docs.grafana.org/administration/provisioning/#alerting +## +alerting: {} + # rules.yaml: + # apiVersion: 1 + # groups: + # - orgId: 1 + # name: '{{ .Chart.Name }}_my_rule_group' + # folder: my_first_folder + # interval: 60s + # rules: + # - uid: my_id_1 + # title: my_first_rule + # condition: A + # data: + # - refId: A + # datasourceUid: '-100' + # model: + # conditions: + # - evaluator: + # params: + # - 3 + # type: gt + # operator: + # type: and + # query: + # params: + # - A + # reducer: + # type: last + # type: query + # datasource: + # type: __expr__ + # uid: '-100' + # expression: 1==0 + # intervalMs: 1000 + # maxDataPoints: 43200 + # refId: A + # type: math + # dashboardUid: my_dashboard + # panelId: 123 + # noDataState: Alerting + # for: 60s + # annotations: + # some_key: some_value + # labels: + # team: sre_team_1 + # contactpoints.yaml: + # secret: + # apiVersion: 1 + # contactPoints: + # - orgId: 1 + # name: cp_1 + # receivers: + # - uid: first_uid + # type: pagerduty + # settings: + # integrationKey: XXX + # severity: critical + # class: ping failure + # component: Grafana + # group: app-stack + # summary: | + # {{ `{{ include "default.message" . }}` }} + +## Configure notifiers +## ref: http://docs.grafana.org/administration/provisioning/#alert-notification-channels +## +notifiers: {} +# notifiers.yaml: +# notifiers: +# - name: email-notifier +# type: email +# uid: email1 +# # either: +# org_id: 1 +# # or +# org_name: Main Org. +# is_default: true +# settings: +# addresses: an_email_address@example.com +# delete_notifiers: + +## Configure grafana dashboard providers +## ref: http://docs.grafana.org/administration/provisioning/#dashboards +## +## `path` must be /var/lib/grafana/dashboards/ +## +dashboardProviders: {} +# dashboardproviders.yaml: +# apiVersion: 1 +# providers: +# - name: 'default' +# orgId: 1 +# folder: '' +# type: file +# disableDeletion: false +# editable: true +# options: +# path: /var/lib/grafana/dashboards/default + +## Configure grafana dashboard to import +## NOTE: To use dashboards you must also enable/configure dashboardProviders +## ref: https://grafana.com/dashboards +## +## dashboards per provider, use provider name as key. +## +dashboards: {} + # default: + # some-dashboard: + # json: | + # $RAW_JSON + # custom-dashboard: + # file: dashboards/custom-dashboard.json + # prometheus-stats: + # gnetId: 2 + # revision: 2 + # datasource: Prometheus + # local-dashboard: + # url: https://example.com/repository/test.json + # token: '' + # local-dashboard-base64: + # url: https://example.com/repository/test-b64.json + # token: '' + # b64content: true + # local-dashboard-gitlab: + # url: https://example.com/repository/test-gitlab.json + # gitlabToken: '' + # local-dashboard-bitbucket: + # url: https://example.com/repository/test-bitbucket.json + # bearerToken: '' + # local-dashboard-azure: + # url: https://example.com/repository/test-azure.json + # basic: '' + # acceptHeader: '*/*' + +## Reference to external ConfigMap per provider. Use provider name as key and ConfigMap name as value. +## A provider dashboards must be defined either by external ConfigMaps or in values.yaml, not in both. +## ConfigMap data example: +## +## data: +## example-dashboard.json: | +## RAW_JSON +## +dashboardsConfigMaps: {} +# default: "" + +## Grafana's primary configuration +## NOTE: values in map will be converted to ini format +## ref: http://docs.grafana.org/installation/configuration/ +## +grafana.ini: + paths: + data: /var/lib/grafana/ + logs: /var/log/grafana + plugins: /var/lib/grafana/plugins + provisioning: /etc/grafana/provisioning + analytics: + check_for_updates: true + log: + mode: console + grafana_net: + url: https://grafana.net + server: + domain: "{{ if (and .Values.ingress.enabled .Values.ingress.hosts) }}{{ .Values.ingress.hosts | first }}{{ else }}''{{ end }}" +## grafana Authentication can be enabled with the following values on grafana.ini + # server: + # The full public facing url you use in browser, used for redirects and emails + # root_url: + # https://grafana.com/docs/grafana/latest/auth/github/#enable-github-in-grafana + # auth.github: + # enabled: false + # allow_sign_up: false + # scopes: user:email,read:org + # auth_url: https://github.com/login/oauth/authorize + # token_url: https://github.com/login/oauth/access_token + # api_url: https://api.github.com/user + # team_ids: + # allowed_organizations: + # client_id: + # client_secret: +## LDAP Authentication can be enabled with the following values on grafana.ini +## NOTE: Grafana will fail to start if the value for ldap.toml is invalid + # auth.ldap: + # enabled: true + # allow_sign_up: true + # config_file: /etc/grafana/ldap.toml + +## Grafana's LDAP configuration +## Templated by the template in _helpers.tpl +## NOTE: To enable the grafana.ini must be configured with auth.ldap.enabled +## ref: http://docs.grafana.org/installation/configuration/#auth-ldap +## ref: http://docs.grafana.org/installation/ldap/#configuration +ldap: + enabled: false + # `existingSecret` is a reference to an existing secret containing the ldap configuration + # for Grafana in a key `ldap-toml`. + existingSecret: "" + # `config` is the content of `ldap.toml` that will be stored in the created secret + config: "" + # config: |- + # verbose_logging = true + + # [[servers]] + # host = "my-ldap-server" + # port = 636 + # use_ssl = true + # start_tls = false + # ssl_skip_verify = false + # bind_dn = "uid=%s,ou=users,dc=myorg,dc=com" + +## Grafana's SMTP configuration +## NOTE: To enable, grafana.ini must be configured with smtp.enabled +## ref: http://docs.grafana.org/installation/configuration/#smtp +smtp: + # `existingSecret` is a reference to an existing secret containing the smtp configuration + # for Grafana. + existingSecret: "" + userKey: "user" + passwordKey: "password" + +## Sidecars that collect the configmaps with specified label and stores the included files them into the respective folders +## Requires at least Grafana 5 to work and can't be used together with parameters dashboardProviders, datasources and dashboards +sidecar: + image: + repository: rancher/mirrored-kiwigrid-k8s-sidecar + tag: 1.26.1 + sha: "" + imagePullPolicy: IfNotPresent + resources: {} +# limits: +# cpu: 100m +# memory: 100Mi +# requests: +# cpu: 50m +# memory: 50Mi + securityContext: + allowPrivilegeEscalation: false + capabilities: + drop: + - ALL + seccompProfile: + type: RuntimeDefault + # skipTlsVerify Set to true to skip tls verification for kube api calls + # skipTlsVerify: true + enableUniqueFilenames: false + readinessProbe: {} + livenessProbe: {} + # Log level default for all sidecars. Can be one of: DEBUG, INFO, WARN, ERROR, CRITICAL. Defaults to INFO + # logLevel: INFO + alerts: + enabled: false + # Additional environment variables for the alerts sidecar + env: {} + # Do not reprocess already processed unchanged resources on k8s API reconnect. + # ignoreAlreadyProcessed: true + # label that the configmaps with alert are marked with + label: grafana_alert + # value of label that the configmaps with alert are set to + labelValue: "" + # Log level. Can be one of: DEBUG, INFO, WARN, ERROR, CRITICAL. + # logLevel: INFO + # If specified, the sidecar will search for alert config-maps inside this namespace. + # Otherwise the namespace in which the sidecar is running will be used. + # It's also possible to specify ALL to search in all namespaces + searchNamespace: null + # Method to use to detect ConfigMap changes. With WATCH the sidecar will do a WATCH requests, with SLEEP it will list all ConfigMaps, then sleep for 60 seconds. + watchMethod: WATCH + # search in configmap, secret or both + resource: both + # watchServerTimeout: request to the server, asking it to cleanly close the connection after that. + # defaults to 60sec; much higher values like 3600 seconds (1h) are feasible for non-Azure K8S + # watchServerTimeout: 3600 + # + # watchClientTimeout: is a client-side timeout, configuring your local socket. + # If you have a network outage dropping all packets with no RST/FIN, + # this is how long your client waits before realizing & dropping the connection. + # defaults to 66sec (sic!) + # watchClientTimeout: 60 + # + # Endpoint to send request to reload alerts + reloadURL: "http://localhost:3000/api/admin/provisioning/alerting/reload" + # Absolute path to shell script to execute after a alert got reloaded + script: null + skipReload: false + # This is needed if skipReload is true, to load any alerts defined at startup time. + # Deploy the alert sidecar as an initContainer. + initAlerts: false + # Additional alert sidecar volume mounts + extraMounts: [] + # Sets the size limit of the alert sidecar emptyDir volume + sizeLimit: {} + dashboards: + enabled: false + # Additional environment variables for the dashboards sidecar + env: {} + # Do not reprocess already processed unchanged resources on k8s API reconnect. + # ignoreAlreadyProcessed: true + SCProvider: true + # label that the configmaps with dashboards are marked with + label: grafana_dashboard + # value of label that the configmaps with dashboards are set to + labelValue: "" + # Log level. Can be one of: DEBUG, INFO, WARN, ERROR, CRITICAL. + # logLevel: INFO + # folder in the pod that should hold the collected dashboards (unless `defaultFolderName` is set) + folder: /tmp/dashboards + # The default folder name, it will create a subfolder under the `folder` and put dashboards in there instead + defaultFolderName: null + # Namespaces list. If specified, the sidecar will search for config-maps/secrets inside these namespaces. + # Otherwise the namespace in which the sidecar is running will be used. + # It's also possible to specify ALL to search in all namespaces. + searchNamespace: null + # Method to use to detect ConfigMap changes. With WATCH the sidecar will do a WATCH requests, with SLEEP it will list all ConfigMaps, then sleep for 60 seconds. + watchMethod: WATCH + # search in configmap, secret or both + resource: both + # If specified, the sidecar will look for annotation with this name to create folder and put graph here. + # You can use this parameter together with `provider.foldersFromFilesStructure`to annotate configmaps and create folder structure. + folderAnnotation: null + # Endpoint to send request to reload alerts + reloadURL: "http://localhost:3000/api/admin/provisioning/dashboards/reload" + # Absolute path to shell script to execute after a configmap got reloaded + script: null + skipReload: false + # watchServerTimeout: request to the server, asking it to cleanly close the connection after that. + # defaults to 60sec; much higher values like 3600 seconds (1h) are feasible for non-Azure K8S + # watchServerTimeout: 3600 + # + # watchClientTimeout: is a client-side timeout, configuring your local socket. + # If you have a network outage dropping all packets with no RST/FIN, + # this is how long your client waits before realizing & dropping the connection. + # defaults to 66sec (sic!) + # watchClientTimeout: 60 + # + # provider configuration that lets grafana manage the dashboards + provider: + # name of the provider, should be unique + name: sidecarProvider + # orgid as configured in grafana + orgid: 1 + # folder in which the dashboards should be imported in grafana + folder: '' + # type of the provider + type: file + # disableDelete to activate a import-only behaviour + disableDelete: false + # allow updating provisioned dashboards from the UI + allowUiUpdates: false + # allow Grafana to replicate dashboard structure from filesystem + foldersFromFilesStructure: false + # Additional dashboard sidecar volume mounts + extraMounts: [] + # Sets the size limit of the dashboard sidecar emptyDir volume + sizeLimit: {} + datasources: + enabled: false + # Additional environment variables for the datasourcessidecar + env: {} + envValueFrom: {} + # Do not reprocess already processed unchanged resources on k8s API reconnect. + # ignoreAlreadyProcessed: true + # label that the configmaps with datasources are marked with + label: grafana_datasource + # value of label that the configmaps with datasources are set to + labelValue: "" + # Log level. Can be one of: DEBUG, INFO, WARN, ERROR, CRITICAL. + # logLevel: INFO + # If specified, the sidecar will search for datasource config-maps inside this namespace. + # Otherwise the namespace in which the sidecar is running will be used. + # It's also possible to specify ALL to search in all namespaces + searchNamespace: null + # Method to use to detect ConfigMap changes. With WATCH the sidecar will do a WATCH requests, with SLEEP it will list all ConfigMaps, then sleep for 60 seconds. + watchMethod: WATCH + # search in configmap, secret or both + resource: both + # watchServerTimeout: request to the server, asking it to cleanly close the connection after that. + # defaults to 60sec; much higher values like 3600 seconds (1h) are feasible for non-Azure K8S + # watchServerTimeout: 3600 + # + # watchClientTimeout: is a client-side timeout, configuring your local socket. + # If you have a network outage dropping all packets with no RST/FIN, + # this is how long your client waits before realizing & dropping the connection. + # defaults to 66sec (sic!) + # watchClientTimeout: 60 + # + # Endpoint to send request to reload datasources + reloadURL: "http://localhost:3000/api/admin/provisioning/datasources/reload" + # Absolute path to shell script to execute after a datasource got reloaded + script: null + skipReload: true + # This is needed if skipReload is true, to load any datasources defined at startup time. + # Deploy the datasources sidecar as an initContainer. + initDatasources: true + # Sets the size limit of the datasource sidecar emptyDir volume + sizeLimit: {} + plugins: + enabled: false + # Additional environment variables for the plugins sidecar + env: {} + # Do not reprocess already processed unchanged resources on k8s API reconnect. + # ignoreAlreadyProcessed: true + # label that the configmaps with plugins are marked with + label: grafana_plugin + # value of label that the configmaps with plugins are set to + labelValue: "" + # Log level. Can be one of: DEBUG, INFO, WARN, ERROR, CRITICAL. + # logLevel: INFO + # If specified, the sidecar will search for plugin config-maps inside this namespace. + # Otherwise the namespace in which the sidecar is running will be used. + # It's also possible to specify ALL to search in all namespaces + searchNamespace: null + # Method to use to detect ConfigMap changes. With WATCH the sidecar will do a WATCH requests, with SLEEP it will list all ConfigMaps, then sleep for 60 seconds. + watchMethod: WATCH + # search in configmap, secret or both + resource: both + # watchServerTimeout: request to the server, asking it to cleanly close the connection after that. + # defaults to 60sec; much higher values like 3600 seconds (1h) are feasible for non-Azure K8S + # watchServerTimeout: 3600 + # + # watchClientTimeout: is a client-side timeout, configuring your local socket. + # If you have a network outage dropping all packets with no RST/FIN, + # this is how long your client waits before realizing & dropping the connection. + # defaults to 66sec (sic!) + # watchClientTimeout: 60 + # + # Endpoint to send request to reload plugins + reloadURL: "http://localhost:3000/api/admin/provisioning/plugins/reload" + # Absolute path to shell script to execute after a plugin got reloaded + script: null + skipReload: false + # Deploy the datasource sidecar as an initContainer in addition to a container. + # This is needed if skipReload is true, to load any plugins defined at startup time. + initPlugins: false + # Sets the size limit of the plugin sidecar emptyDir volume + sizeLimit: {} + notifiers: + enabled: false + # Additional environment variables for the notifierssidecar + env: {} + # Do not reprocess already processed unchanged resources on k8s API reconnect. + # ignoreAlreadyProcessed: true + # label that the configmaps with notifiers are marked with + label: grafana_notifier + # value of label that the configmaps with notifiers are set to + labelValue: "" + # Log level. Can be one of: DEBUG, INFO, WARN, ERROR, CRITICAL. + # logLevel: INFO + # If specified, the sidecar will search for notifier config-maps inside this namespace. + # Otherwise the namespace in which the sidecar is running will be used. + # It's also possible to specify ALL to search in all namespaces + searchNamespace: null + # Method to use to detect ConfigMap changes. With WATCH the sidecar will do a WATCH requests, with SLEEP it will list all ConfigMaps, then sleep for 60 seconds. + watchMethod: WATCH + # search in configmap, secret or both + resource: both + # watchServerTimeout: request to the server, asking it to cleanly close the connection after that. + # defaults to 60sec; much higher values like 3600 seconds (1h) are feasible for non-Azure K8S + # watchServerTimeout: 3600 + # + # watchClientTimeout: is a client-side timeout, configuring your local socket. + # If you have a network outage dropping all packets with no RST/FIN, + # this is how long your client waits before realizing & dropping the connection. + # defaults to 66sec (sic!) + # watchClientTimeout: 60 + # + # Endpoint to send request to reload notifiers + reloadURL: "http://localhost:3000/api/admin/provisioning/notifications/reload" + # Absolute path to shell script to execute after a notifier got reloaded + script: null + skipReload: false + # Deploy the notifier sidecar as an initContainer in addition to a container. + # This is needed if skipReload is true, to load any notifiers defined at startup time. + initNotifiers: false + # Sets the size limit of the notifier sidecar emptyDir volume + sizeLimit: {} + +## Override the deployment namespace +## +namespaceOverride: "" + +## Number of old ReplicaSets to retain +## +revisionHistoryLimit: 10 + +## Add a seperate remote image renderer deployment/service +imageRenderer: + deploymentStrategy: {} + # Enable the image-renderer deployment & service + enabled: false + replicas: 1 + autoscaling: + enabled: false + minReplicas: 1 + maxReplicas: 5 + targetCPU: "60" + targetMemory: "" + behavior: {} + image: + # image-renderer Image repository + repository: rancher/mirrored-grafana-grafana-image-renderer + # image-renderer Image tag + tag: 3.10.1 + # image-renderer Image sha (optional) + sha: "" + # image-renderer ImagePullPolicy + pullPolicy: Always + # extra environment variables + env: + HTTP_HOST: "0.0.0.0" + # RENDERING_ARGS: --no-sandbox,--disable-gpu,--window-size=1280x758 + # RENDERING_MODE: clustered + # IGNORE_HTTPS_ERRORS: true + + ## "valueFrom" environment variable references that will be added to deployment pods. Name is templated. + ## ref: https://kubernetes.io/docs/reference/generated/kubernetes-api/v1.19/#envvarsource-v1-core + ## Renders in container spec as: + ## env: + ## ... + ## - name: + ## valueFrom: + ## + envValueFrom: {} + # ENV_NAME: + # configMapKeyRef: + # name: configmap-name + # key: value_key + + # image-renderer deployment serviceAccount + serviceAccountName: "" + # image-renderer deployment securityContext + securityContext: {} + # image-renderer deployment container securityContext + containerSecurityContext: + seccompProfile: + type: RuntimeDefault + capabilities: + drop: ['ALL'] + allowPrivilegeEscalation: false + readOnlyRootFilesystem: true + ## image-renderer pod annotation + podAnnotations: {} + # image-renderer deployment Host Aliases + hostAliases: [] + # image-renderer deployment priority class + priorityClassName: '' + service: + # Enable the image-renderer service + enabled: true + # image-renderer service port name + portName: 'http' + # image-renderer service port used by both service and deployment + port: 8081 + targetPort: 8081 + # Adds the appProtocol field to the image-renderer service. This allows to work with istio protocol selection. Ex: "http" or "tcp" + appProtocol: "" + serviceMonitor: + ## If true, a ServiceMonitor CRD is created for a prometheus operator + ## https://github.com/coreos/prometheus-operator + ## + enabled: false + path: /metrics + # namespace: monitoring (defaults to use the namespace this chart is deployed to) + labels: {} + interval: 1m + scheme: http + tlsConfig: {} + scrapeTimeout: 30s + relabelings: [] + # See: https://doc.crds.dev/github.com/prometheus-operator/kube-prometheus/monitoring.coreos.com/ServiceMonitor/v1@v0.11.0#spec-targetLabels + targetLabels: [] + # - targetLabel1 + # - targetLabel2 + # If https is enabled in Grafana, this needs to be set as 'https' to correctly configure the callback used in Grafana + grafanaProtocol: http + # In case a sub_path is used this needs to be added to the image renderer callback + grafanaSubPath: "" + # name of the image-renderer port on the pod + podPortName: http + # number of image-renderer replica sets to keep + revisionHistoryLimit: 10 + networkPolicy: + # Enable a NetworkPolicy to limit inbound traffic to only the created grafana pods + limitIngress: true + # Enable a NetworkPolicy to limit outbound traffic to only the created grafana pods + limitEgress: false + # Allow additional services to access image-renderer (eg. Prometheus operator when ServiceMonitor is enabled) + extraIngressSelectors: [] + resources: {} +# limits: +# cpu: 100m +# memory: 100Mi +# requests: +# cpu: 50m +# memory: 50Mi + ## Node labels for pod assignment + ## ref: https://kubernetes.io/docs/user-guide/node-selection/ + # + nodeSelector: {} + + ## Tolerations for pod assignment + ## ref: https://kubernetes.io/docs/concepts/configuration/taint-and-toleration/ + ## + tolerations: [] + + ## Affinity for pod assignment (evaluated as template) + ## ref: https://kubernetes.io/docs/concepts/configuration/assign-pod-node/#affinity-and-anti-affinity + ## + affinity: {} + + ## Use an alternate scheduler, e.g. "stork". + ## ref: https://kubernetes.io/docs/tasks/administer-cluster/configure-multiple-schedulers/ + ## + # schedulerName: "default-scheduler" + +networkPolicy: + ## @param networkPolicy.enabled Enable creation of NetworkPolicy resources. Only Ingress traffic is filtered for now. + ## + enabled: false + ## @param networkPolicy.allowExternal Don't require client label for connections + ## The Policy model to apply. When set to false, only pods with the correct + ## client label will have network access to grafana port defined. + ## When true, grafana will accept connections from any source + ## (with the correct destination port). + ## + ingress: true + ## @param networkPolicy.ingress When true enables the creation + ## an ingress network policy + ## + allowExternal: true + ## @param networkPolicy.explicitNamespacesSelector A Kubernetes LabelSelector to explicitly select namespaces from which traffic could be allowed + ## If explicitNamespacesSelector is missing or set to {}, only client Pods that are in the networkPolicy's namespace + ## and that match other criteria, the ones that have the good label, can reach the grafana. + ## But sometimes, we want the grafana to be accessible to clients from other namespaces, in this case, we can use this + ## LabelSelector to select these namespaces, note that the networkPolicy's namespace should also be explicitly added. + ## + ## Example: + ## explicitNamespacesSelector: + ## matchLabels: + ## role: frontend + ## matchExpressions: + ## - {key: role, operator: In, values: [frontend]} + ## + explicitNamespacesSelector: {} + ## + ## + ## + ## + ## + ## + egress: + ## @param networkPolicy.egress.enabled When enabled, an egress network policy will be + ## created allowing grafana to connect to external data sources from kubernetes cluster. + enabled: false + ## + ## @param networkPolicy.egress.blockDNSResolution When enabled, DNS resolution will be blocked + ## for all pods in the grafana namespace. + blockDNSResolution: false + ## + ## @param networkPolicy.egress.ports Add individual ports to be allowed by the egress + ports: [] + ## Add ports to the egress by specifying - port: + ## E.X. + ## - port: 80 + ## - port: 443 + ## + ## @param networkPolicy.egress.to Allow egress traffic to specific destinations + to: [] + ## Add destinations to the egress by specifying - ipBlock: + ## E.X. + ## to: + ## - namespaceSelector: + ## matchExpressions: + ## - {key: role, operator: In, values: [grafana]} + ## + ## + ## + ## + ## + +# Enable backward compatibility of kubernetes where version below 1.13 doesn't have the enableServiceLinks option +enableKubeBackwardCompatibility: false +useStatefulSet: false +# Create a dynamic manifests via values: +extraObjects: [] + # - apiVersion: "kubernetes-client.io/v1" + # kind: ExternalSecret + # metadata: + # name: grafana-secrets + # spec: + # backendType: gcpSecretsManager + # data: + # - key: grafana-admin-password + # name: adminPassword + +# assertNoLeakedSecrets is a helper function defined in _helpers.tpl that checks if secret +# values are not exposed in the rendered grafana.ini configmap. It is enabled by default. +# +# To pass values into grafana.ini without exposing them in a configmap, use variable expansion: +# https://grafana.com/docs/grafana/latest/setup-grafana/configure-grafana/#variable-expansion +# +# Alternatively, if you wish to allow secret values to be exposed in the rendered grafana.ini configmap, +# you can disable this check by setting assertNoLeakedSecrets to false. +assertNoLeakedSecrets: true diff --git a/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/hardenedKubelet/.helmignore b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/hardenedKubelet/.helmignore new file mode 100644 index 0000000000..0e8a0eb36f --- /dev/null +++ b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/hardenedKubelet/.helmignore @@ -0,0 +1,23 @@ +# Patterns to ignore when building packages. +# This supports shell glob matching, relative path matching, and +# negation (prefixed with !). Only one pattern per line. +.DS_Store +# Common VCS dirs +.git/ +.gitignore +.bzr/ +.bzrignore +.hg/ +.hgignore +.svn/ +# Common backup files +*.swp +*.bak +*.tmp +*.orig +*~ +# Various IDEs +.project +.idea/ +*.tmproj +.vscode/ diff --git a/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/hardenedKubelet/Chart.yaml b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/hardenedKubelet/Chart.yaml new file mode 100644 index 0000000000..acd648a5b4 --- /dev/null +++ b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/hardenedKubelet/Chart.yaml @@ -0,0 +1,15 @@ +annotations: + catalog.cattle.io/hidden: "true" + catalog.cattle.io/kube-version: '>= 1.26.0-0 < 1.31.0-0' + catalog.cattle.io/os: linux + catalog.rancher.io/certified: rancher + catalog.rancher.io/namespace: cattle-monitoring-system + catalog.rancher.io/release-name: rancher-pushprox +apiVersion: v1 +appVersion: 0.1.0 +description: Sets up a deployment of the PushProx proxy and a DaemonSet of PushProx + clients. +kubeVersion: '>=1.26.0-0' +name: hardenedKubelet +type: application +version: 0.2.0 diff --git a/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/hardenedKubelet/README.md b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/hardenedKubelet/README.md new file mode 100644 index 0000000000..345002f48a --- /dev/null +++ b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/hardenedKubelet/README.md @@ -0,0 +1,90 @@ +# rancher-pushprox + +A Rancher chart based on Rancher [PushProx](https://github.com/rancher/PushProx) that sets up a Deployment of a PushProx proxy and a DaemonSet of PushProx clients on a Kubernetes cluster. + +Installs [rancher-pushprox](https://github.com/rancher/charts/tree/gh-pages/packages/rancher-pushprox) to create PushProx clients that can access their host's network and register with a PushProx proxy. A [Prometheus Operator](https://github.com/coreos/prometheus-operator) ServiceMonitor CR is also included that is configured to scrape the metrics from each of the clients through the proxy. + +Using an instance of this chart is suitable for the following scenarios: +- You need to scrape metrics from a port that should not be accessible outside of the host (e.g. scraping `etcd` metrics in a hardened cluster) +- You need to scrape metrics on a host that are not exposed outside of 127.0.0.1 (e.g. scraping `kube-proxy` metrics) +- You need to scrape metrics through HTTPS using certs hosted directly on `hostPath` +- You need to scrape metrics from Kubernetes components that require authorization via a service account (e.g. permissions to make request to `/metrics`) +- You need to scrape metrics without access to cacerts (i.e. enable `insecureSkipVerify`) + +The clients and proxy are created based on a Rancher fork of the [prometheus-community/PushProx](https://github.com/prometheus-community/PushProx) project. + +## Upgrading to Kubernetes v1.25+ + +Starting in Kubernetes v1.25, [Pod Security Policies](https://kubernetes.io/docs/concepts/security/pod-security-policy/) have been removed from the Kubernetes API. + +As a result, **before upgrading to Kubernetes v1.25** (or on a fresh install in a Kubernetes v1.25+ cluster), users are expected to perform an in-place upgrade of this chart with `global.cattle.psp.enabled` set to `false` if it has been previously set to `true`. +​ +> **Note:** +> In this chart release, any previous field that was associated with any PSP resources have been removed in favor of a single global field: `global.cattle.psp.enabled`. + +> **Note:** +> If you upgrade your cluster to Kubernetes v1.25+ before removing PSPs via a `helm upgrade` (even if you manually clean up resources), **it will leave the Helm release in a broken state within the cluster such that further Helm operations will not work (`helm uninstall`, `helm upgrade`, etc.).** +> +> If your charts get stuck in this state, please consult the Rancher docs on how to clean up your Helm release secrets. + +Upon setting `global.cattle.psp.enabled` to false, the chart will remove any PSP resources deployed on its behalf from the cluster. This is the default setting for this chart. + +As a replacement for PSPs, [Pod Security Admission](https://kubernetes.io/docs/concepts/security/pod-security-admission/) should be used. Please consult the Rancher docs for more details on how to configure your chart release namespaces to work with the new Pod Security Admission and apply Pod Security Standards. + +## Configuration + +The following tables list the configurable parameters of the rancher-pushprox chart and their default values. + +### General + +#### Required +| Parameter | Description | Example | +| ----- | ----------- | ------ | +| `component` | The component that is being monitored | `kube-etcd` +| `metricsPort` | The port on the host that contains the metrics you want to scrape (e.g. `http://:/metrics`) | `2379` | +| `namespaceOverride` | The namespace to install the chart | `""` + +#### Optional +| Parameter | Description | Default | +| ----- | ----------- | ------ | +| `serviceMonitor.enabled` | Deploys a [Prometheus Operator](https://github.com/coreos/prometheus-operator/blob/master/Documentation/api.md#servicemonitor) ServiceMonitor CR that is configured to scrape metrics on the hosts that the clients are deployed on via the proxy. Also deploys a Service that points to all pods with the expected client name that exposes the `metricsPort` selected | `true` | +| `serviceMonitor.endpoints` | A list of endpoints that will be added to the ServiceMonitor based on the [Endpoint spec](https://github.com/prometheus-operator/prometheus-operator/blob/master/Documentation/api.md#endpoint) | `[{port: metrics}]` | +| `service.selector` | The selector that is used to populate the Service's Endpoints object. The chart will error out on rendering templating if `.Values.clients.enabled` is set alongside this field, since it is expected that this service should point to the PushProx Clients Daemonset / Deployment | `{}` | +| `clients.enabled` | Deploys a DaemonSet of clients that are each capable of scraping endpoints on the hostNetwork it is deployed on | `true` | +| `clients.port` | The port where the client will publish PushProx client-specific metrics. If deploying multiple clients onto the same node, the clients should not have conflicting ports | `9369` | +| `clients.proxyUrl` | Overrides the default proxyUrl setting of `http://pushprox-{{ .Values.component }}-proxy.{{ . Release.Namespace }}.svc.cluster.local:{{ .Values.proxy.port }}"` with the `proxyUrl` specified | `""` | +| `clients.useLocalhost` | Sets a flag on each client deployment to redirect scrapes directed to `HOST_IP` to `127.0.0.1` | `false` | +| `clients.https.enabled` | Enables scraping metrics via HTTPS using the provided TLS certs that exist on each host | `false` | +| `clients.https.forceHTTPSScheme` | Forces scraping metrics via HTTPS using the provided TLS certs that exist on each host | `false` | +| `clients.https.useServiceAccountCredentials` | If set to true, the client will create a service account with permissions to scrape `/metrics` endpoint of Kubernetes components. The client will use the service account token provided to make authorized scrape requests to the Kubernetes API | `false` | +| `clients.https.authenticationMethod.bearerTokenFile.enabled` | If set to true, the client will use service account credentials mounted at the configured path `clients.https.authenticationMethod.bearerTokenFile.bearerTokenFilePath`. This requires permissions to scrape `/metrics` endpoint of Kubernetes components. This method is deprecated by the prometheus operator and may be removed in a future release | `false` | +| `clients.https.authenticationMethod.bearerTokenFile.bearerTokenFilePath` | This is a volume mount on the pod with permissions to scrape `/metrics` endpoint of Kubernetes components | `"/var/run/secrets/kubernetes.io/serviceaccount/token"` | +| `clients.https.authenticationMethod.bearerTokenSecret.enabled` | If set to true, the client will use service account credentials to scrape `/metrics` endpoint of Kubernetes components. This method is deprecated by the prometheus operator and may be removed in a future release | `false` | +| `clients.https.authenticationMethod.authorization.enabled` | If set to true, the client will use service account credentials to scrape `/metrics` endpoint of Kubernetes components | `false` | +| `clients.https.authenticationMethod.authorization.type` | If set, the client will use this type of authorization in its client requests for metrics | `"bearer"` | +| `clients.https.authenticationMethod.authorization.credentials.key` | If set, the client will use this key in the secret created by `clients.https.useServiceAccountCredentials` for authorization in its client requests for metrics | `"token"` | +| `clients.https.authenticationMethod.authorization.credentials.optional` | If set to false, the client will fail if the key in the secret created by `clients.https.useServiceAccountCredentials` does not exist | `false` | +| `clients.https.insecureSkipVerify` | If set to true, the client will disable SSL security checks | `false` | +| `clients.https.certDir` | A `hostPath` where TLS certs can be found. This path is mounted as a volume on an `initContainer` which copies only the necessary files over to an EmptyDir volume used by each client. Required and only used if `clients.https.enabled` is set | `""` | +| `clients.https.certFile` | The path to the TLS cert file located within `clients.https.certDir`. Required and only used if `clients.https.enabled` is set | `""` | +| `clients.https.keyFile` | The path to the TLS key file located within `clients.https.certDir`. Required and only used if `clients.https.enabled` is set | `""` | +| `clients.https.caCertFile` | The path to the TLS cacert file located within `clients.https.certDir`. Required and only used if `clients.https.enabled` is set | `""` | +| `clients.https.seLinuxOptions` | seLinuxOptions to be passed into the container that copies certs. Should define a container with permissions to read the files in the certDir provided on the host. Required and only used if `clients.https.enabled` is set and `clients.https.certDir` is provided. | `""` | +| `clients.metrics.enabled` | Whether the client should publish PushProx client-specific metrics. | `false` | +| `clients.rbac.additionalRules` | Additional permissions to provide to the ServiceAccount bound to the client. This can be used to provide additional permissions for the client to scrape metrics from the k8s API. Only enabled if clients.https.enabled and clients.https.useServiceAccountCredentials are true | `[]` | +| `clients.deployment.enabled` | Deploys the client as a Deployment (generally used if the underlying hostNetwork Pod that is being scraped is managed by a Deployment) | `false` | +| `clients.deployment.replicas` | The number of pods the Deployment has, it should match the number of pod the hostNetwork Deployment has. Required and only used if `client.deployment.enable` is set | `0` | +| `clients.deployment.affinity` | The affinity rules that allocate the pod to the node in which the hostNetwork Deployment's pods run. Required and only used if `client.deployment.enable` is set | `{}` | +| `clients.resources` | Set resource limits and requests for the client container | `{}` | +| `clients.nodeSelector` | Select which nodes to deploy the clients on | `{}` | +| `clients.tolerations` | Specify tolerations for clients | `[]` | +| `proxy.enabled` | Deploys the proxy that each client will register with | `true` | +| `proxy.port` | The port exposed by the proxy that each client will register with to allow metrics to be scraped from the host | `8080` | +| `proxy.resources` | Set resource limits and requests for the proxy container | `{}` | +| `proxy.nodeSelector` | Select which nodes the proxy can be deployed on | `{}` | +| `proxy.tolerations` | Specify tolerations (if necessary) to allow the proxy to be deployed on the selected node | `[]` | +| `kubeVersionOverrides` | A list of Semver constraint strings (defined by https://github.com/Masterminds/semver) and values.yaml overrides. For each key in kubeVersionOverrides, this chart will check to see if the current Kubernetes cluster's version matches any of the semver constraints provided as keys on the map. On seeing a match, the default value for each values.yaml field overridden will be updated with the new value. If multiple matches are encountered (due to overlapping semver ranges), the matches will be applied in order. | `[]` + +*Tip: The filepaths set in `clients.https.File` can include wildcard characters*. + +See [rancher-monitoring](https://github.com/rancher/charts/tree/gh-pages/packages/rancher-monitoring) for examples of how this chart can be used. diff --git a/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/hardenedKubelet/templates/_helpers.tpl b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/hardenedKubelet/templates/_helpers.tpl new file mode 100644 index 0000000000..1ba5093944 --- /dev/null +++ b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/hardenedKubelet/templates/_helpers.tpl @@ -0,0 +1,170 @@ +# Rancher + +{{- define "system_default_registry" -}} +{{- if .Values.global.cattle.systemDefaultRegistry -}} +{{- printf "%s/" .Values.global.cattle.systemDefaultRegistry -}} +{{- end -}} +{{- end -}} + +# Windows Support + +{{/* +Windows cluster will add default taint for linux nodes, +add below linux tolerations to workloads could be scheduled to those linux nodes +*/}} + +{{- define "linux-node-tolerations" -}} +- key: "cattle.io/os" + value: "linux" + effect: "NoSchedule" + operator: "Equal" +{{- end -}} + +{{- define "linux-node-selector" -}} +{{- if semverCompare "<1.14-0" .Capabilities.KubeVersion.GitVersion -}} +beta.kubernetes.io/os: linux +{{- else -}} +kubernetes.io/os: linux +{{- end -}} +{{- end -}} + +# General + +{{- define "applyKubeVersionOverrides" -}} +{{- $overrides := dict -}} +{{- range $override := .Values.kubeVersionOverrides -}} +{{- if semverCompare $override.constraint $.Capabilities.KubeVersion.Version -}} +{{- $_ := mergeOverwrite $overrides $override.values -}} +{{- end -}} +{{- end -}} +{{- $_ := mergeOverwrite .Values $overrides -}} +{{- end -}} + +{{- define "pushprox.namespace" -}} + {{- if .Values.namespaceOverride -}} + {{- .Values.namespaceOverride -}} + {{- else -}} + {{- .Release.Namespace -}} + {{- end -}} +{{- end -}} + +{{- define "pushProxy.commonLabels" -}} +release: {{ .Release.Name }} +component: {{ .Values.component | quote }} +provider: kubernetes +{{- end -}} + +{{- define "pushProxy.proxyUrl" -}} +{{- $_ := (required "Template requires either .Values.proxy.port or .Values.client.proxyUrl to set proxyUrl for client" (or .Values.clients.proxyUrl .Values.proxy.port)) -}} +{{- if .Values.clients.proxyUrl -}} +{{ printf "%s" .Values.clients.proxyUrl }} +{{- else -}} +{{ printf "http://%s.%s.svc:%d" (include "pushProxy.proxy.name" .) (include "pushprox.namespace" .) (int .Values.proxy.port) }} +{{- end -}}{{- end -}} + +# Client + +{{- define "pushProxy.client.name" -}} +{{- printf "pushprox-%s-client" (required ".Values.component is required" .Values.component) -}} +{{- end -}} + +{{- define "pushProxy.client.serviceAccountTokenName" -}} +{{- printf "pushprox-%s-client-service-account-token" (required ".Values.component is required" .Values.component) -}} +{{- end -}} + +{{- define "pushProxy.client.labels" -}} +k8s-app: {{ template "pushProxy.client.name" . }} +{{ template "pushProxy.commonLabels" . }} +{{- end -}} + +# Proxy + +{{- define "pushProxy.proxy.name" -}} +{{- printf "pushprox-%s-proxy" (required ".Values.component is required" .Values.component) -}} +{{- end -}} + +{{- define "pushProxy.proxy.labels" -}} +k8s-app: {{ template "pushProxy.proxy.name" . }} +{{ template "pushProxy.commonLabels" . }} +{{- end -}} + +# ServiceMonitor + +{{- define "pushprox.serviceMonitor.name" -}} +{{- printf "%s-%s" .Release.Name (required ".Values.component is required" .Values.component) -}} +{{- end -}} + +{{- define "pushProxy.serviceMonitor.labels" -}} +app: {{ template "pushprox.serviceMonitor.name" . }} +{{ template "pushProxy.commonLabels" . }} +{{- end -}} + +{{- define "pushProxy.serviceMonitor.endpoints" -}} +{{- $proxyURL := (include "pushProxy.proxyUrl" .) -}} +{{- $useHTTPS := .Values.clients.https.enabled -}} +{{- $setHTTPSScheme := .Values.clients.https.forceHTTPSScheme -}} +{{- $insecureSkipVerify := .Values.clients.https.insecureSkipVerify -}} +{{- $useServiceAccountCredentials := .Values.clients.https.useServiceAccountCredentials -}} +{{- $serviceAccountTokenName := (include "pushProxy.client.serviceAccountTokenName" . ) -}} +{{- $metricRelabelings := list }} +{{- $endpoints := .Values.serviceMonitor.endpoints }} +{{- if .Values.proxy.enabled }} +{{- $_ := set . "proxyUrl" $proxyURL }} +{{- end }} +{{- range $endpoints }} +{{- if $.Values.proxy.enabled }} +{{- $_ := set . "proxyUrl" $proxyURL }} +{{- end }} +{{- $clusterIdRelabel := dict }} +{{- $metricRelabelings := list }} +{{- if $.Values.global.cattle.clusterId }} +{{- $_ := set $clusterIdRelabel "action" "replace" }} +{{- $_ := set $clusterIdRelabel "sourceLabels" (list "__address__") }} +{{- $_ := set $clusterIdRelabel "targetLabel" "cluster_id" }} +{{- $_ := set $clusterIdRelabel "replacement" $.Values.global.cattle.clusterId }} +{{- $metricRelabelings = append $metricRelabelings $clusterIdRelabel }} +{{- end }} +{{- $clusterNameRelabel := dict }} +{{- if $.Values.global.cattle.clusterName }} +{{- $_ := set $clusterNameRelabel "action" "replace" }} +{{- $_ := set $clusterNameRelabel "sourceLabels" (list "__address__") }} +{{- $_ := set $clusterNameRelabel "targetLabel" "cluster_name" }} +{{- $_ := set $clusterNameRelabel "replacement" $.Values.global.cattle.clusterName }} +{{- $metricRelabelings = append $metricRelabelings $clusterNameRelabel }} +{{- end }} +{{- if not (empty $metricRelabelings) }} +{{- $_ := set . "metricRelabelings" ($metricRelabelings)}} +{{- end }} +{{- if $setHTTPSScheme -}} +{{- $_ := set . "scheme" "https" }} +{{- end -}} +{{- if $useHTTPS -}} +{{- if (hasKey . "params") }} +{{- $_ := set (get . "params") "_scheme" (list "https") }} +{{- else }} +{{- $_ := set . "params" (dict "_scheme" (list "https")) }} +{{- end }} +{{- end }} +{{- if (hasKey . "tlsConfig") }} +{{- $_ := set (get . "tlsConfig") "insecureSkipVerify" $insecureSkipVerify }} +{{- else }} +{{- $_ := set . "tlsConfig" (dict "insecureSkipVerify" $insecureSkipVerify) }} +{{- end }} +{{- if $.Values.clients.https.authenticationMethod.bearerTokenFile.enabled }} +{{- $_ := set . "bearerTokenFile" $.Values.clients.https.authenticationMethod.bearerTokenFile.bearerTokenFilePath }} +{{- end }} +{{- if $.Values.clients.https.authenticationMethod.bearerTokenSecret.enabled }} +{{- $_ := set . "bearerTokenSecret" $serviceAccountTokenName }} +{{- end }} +{{- if $.Values.clients.https.authenticationMethod.authorization.enabled }} +{{- if (hasKey . "authorization") }} +{{- $_ := set (get . "authorization") "type" $.Values.clients.https.authenticationMethod.authorization.type }} +{{- $_ := set (get . "authorization") "credentials" (dict "name" $serviceAccountTokenName "key" $.Values.clients.https.authenticationMethod.authorization.credentials.key "optional" $.Values.clients.https.authenticationMethod.authorization.credentials.optional) }} +{{- else }} +{{- $_ := set . "authorization" (dict "type" $.Values.clients.https.authenticationMethod.authorization.type) }} +{{- $_ := set . "authorization" (dict "credentials" (dict "name" $serviceAccountTokenName "key" $.Values.clients.https.authenticationMethod.authorization.credentials.key "optional" $.Values.clients.https.authenticationMethod.authorization.credentials.optional)) }} +{{- end }} +{{- end }} +{{- end }} +{{- toYaml $endpoints }} +{{- end -}} diff --git a/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/hardenedKubelet/templates/pushprox-clients-rbac.yaml b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/hardenedKubelet/templates/pushprox-clients-rbac.yaml new file mode 100644 index 0000000000..a8e27c3735 --- /dev/null +++ b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/hardenedKubelet/templates/pushprox-clients-rbac.yaml @@ -0,0 +1,97 @@ +{{- template "applyKubeVersionOverrides" . -}} +{{- if .Values.clients }}{{- if .Values.clients.enabled }} +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRole +metadata: + name: {{ template "pushProxy.client.name" . }} + labels: {{ include "pushProxy.client.labels" . | nindent 4 }} +rules: +{{- if .Values.global.cattle.psp.enabled }} +- apiGroups: ['policy'] + resources: ['podsecuritypolicies'] + verbs: ['use'] + resourceNames: + - {{ template "pushProxy.client.name" . }} +{{- end }} +{{- if and .Values.clients.https.enabled .Values.clients.https.useServiceAccountCredentials }} +- nonResourceURLs: ["/metrics"] + verbs: ["get"] +{{- if .Values.clients.rbac.additionalRules }} +{{ toYaml .Values.clients.rbac.additionalRules }} +{{- end }} +{{- end }} +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRoleBinding +metadata: + name: {{ template "pushProxy.client.name" . }} + labels: {{ include "pushProxy.client.labels" . | nindent 4 }} +roleRef: + apiGroup: rbac.authorization.k8s.io + kind: ClusterRole + name: {{ template "pushProxy.client.name" . }} +subjects: + - kind: ServiceAccount + name: {{ template "pushProxy.client.name" . }} + namespace: {{ include "pushprox.namespace" . }} +--- +apiVersion: v1 +kind: ServiceAccount +metadata: + name: {{ template "pushProxy.client.name" . }} + namespace: {{ include "pushprox.namespace" . }} + labels: {{ include "pushProxy.client.labels" . | nindent 4 }} +--- +{{- if .Values.clients.https.useServiceAccountCredentials }} +apiVersion: v1 +kind: Secret +type: kubernetes.io/service-account-token +metadata: + name: {{ template "pushProxy.client.serviceAccountTokenName" . }} + namespace: {{ include "pushprox.namespace" . }} + labels: {{ include "pushProxy.client.labels" . | nindent 4 }} + annotations: + kubernetes.io/service-account.name: {{ template "pushProxy.client.name" . }} +{{- end }} +--- +{{- if .Values.global.cattle.psp.enabled }} +apiVersion: policy/v1beta1 +kind: PodSecurityPolicy +metadata: + name: {{ template "pushProxy.client.name" . }} + namespace: {{ include "pushprox.namespace" . }} + labels: {{ include "pushProxy.client.labels" . | nindent 4 }} +spec: + privileged: false + hostNetwork: true + hostIPC: false + hostPID: false + runAsUser: + rule: 'RunAsAny' + seLinux: + rule: 'RunAsAny' +{{- if and .Values.clients.https.enabled .Values.clients.https.certDir .Values.global.seLinux.enabled .Values.clients.https.seLinuxOptions }} + seLinuxOptions: {{ .Values.clients.https.seLinuxOptions | toYaml | nindent 6 }} +{{- end }} + supplementalGroups: + rule: 'MustRunAs' + ranges: + - min: 0 + max: 65535 + fsGroup: + rule: 'MustRunAs' + ranges: + - min: 0 + max: 65535 + readOnlyRootFilesystem: false + volumes: + - 'secret' +{{- if and .Values.clients.https.enabled .Values.clients.https.certDir }} + - 'emptyDir' + - 'hostPath' + allowedHostPaths: + - pathPrefix: {{ required "Need access to volume on host with the SSL cert files to use HTTPs" .Values.clients.https.certDir }} + readOnly: true +{{- end }} +{{- end }} +{{- end }}{{- end }} diff --git a/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/hardenedKubelet/templates/pushprox-clients.yaml b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/hardenedKubelet/templates/pushprox-clients.yaml new file mode 100644 index 0000000000..e8fcfb3883 --- /dev/null +++ b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/hardenedKubelet/templates/pushprox-clients.yaml @@ -0,0 +1,157 @@ +{{- template "applyKubeVersionOverrides" . -}} +{{- if .Values.clients }}{{- if .Values.clients.enabled }} +apiVersion: apps/v1 +{{- if .Values.clients.deployment.enabled }} +kind: Deployment +{{- else }} +kind: DaemonSet +{{- end }} +metadata: + name: {{ template "pushProxy.client.name" . }} + namespace: {{ template "pushprox.namespace" . }} + labels: {{ include "pushProxy.client.labels" . | nindent 4 }} + pushprox-exporter: "client" +spec: + {{- if .Values.clients.deployment.enabled }} + replicas: {{ .Values.clients.deployment.replicas }} + {{- end }} + selector: + matchLabels: {{ include "pushProxy.client.labels" . | nindent 6 }} + template: + metadata: + labels: {{ include "pushProxy.client.labels" . | nindent 8 }} + spec: + {{- if .Values.clients.affinity }} + affinity: {{ toYaml .Values.clients.affinity | nindent 8 }} + {{- end }} + nodeSelector: {{ include "linux-node-selector" . | nindent 8 }} +{{- if .Values.clients.nodeSelector }} +{{ toYaml .Values.clients.nodeSelector | indent 8 }} +{{- end }} + tolerations: {{ include "linux-node-tolerations" . | nindent 8 }} +{{- if .Values.clients.tolerations }} +{{ toYaml .Values.clients.tolerations | indent 8 }} +{{- end }} + hostNetwork: true + dnsPolicy: ClusterFirstWithHostNet + serviceAccountName: {{ template "pushProxy.client.name" . }} + {{- if .Values.global.imagePullSecretName }} + imagePullSecrets: + - name: {{ .Values.global.imagePullSecretName }} + {{- end }} + containers: + - name: pushprox-client + image: {{ template "system_default_registry" . }}{{ .Values.clients.image.repository }}:{{ .Values.clients.image.tag }} + command: + {{- range .Values.clients.command }} + - {{ . | quote }} + {{- end }} + args: + - --fqdn=$(HOST_IP) + - --proxy-url=$(PROXY_URL) + {{- if .Values.clients.metrics.enabled }} + - --metrics-addr=$(PORT) + {{- end }} + - --allow-port={{ required "Need .Values.metricsPort to configure client to be allowed to scrape metrics at port" .Values.metricsPort}} + {{- if .Values.clients.useLocalhost }} + - --use-localhost + {{- end }} + {{- if .Values.clients.https.enabled }} + {{- if .Values.clients.https.insecureSkipVerify }} + - --insecure-skip-verify + {{- end }} + {{- if .Values.clients.https.useServiceAccountCredentials }} + - --token-path=/var/run/secrets/kubernetes.io/serviceaccount/token + {{- end }} + {{- if .Values.clients.https.certDir }} + - --tls.cert=/etc/ssl/push-proxy/push-proxy.pem + - --tls.key=/etc/ssl/push-proxy/push-proxy-key.pem + - --tls.cacert=/etc/ssl/push-proxy/push-proxy-ca-cert.pem + {{- end }} + {{- end }} + env: + - name: HOST_IP + valueFrom: + fieldRef: + fieldPath: status.hostIP + {{- if .Values.clients.metrics.enabled }} + - name: PORT + value: :{{ .Values.clients.port }} + {{- end }} + - name: PROXY_URL + value: {{ template "pushProxy.proxyUrl" . }} + securityContext: + runAsNonRoot: true + runAsUser: 1000 + {{- if and .Values.clients.https.enabled .Values.clients.https.certDir }} + volumeMounts: + - name: metrics-cert-dir + mountPath: /etc/ssl/push-proxy + {{- end }} + {{- if .Values.clients.resources }} + resources: {{ toYaml .Values.clients.resources | nindent 10 }} + {{- end }} + {{- if and .Values.clients.https.enabled .Values.clients.https.certDir }} + initContainers: + - name: copy-certs + image: {{ template "system_default_registry" . }}{{ .Values.clients.copyCertsImage.repository }}:{{ .Values.clients.copyCertsImage.tag }} + command: + - sh + - -c + - | + echo "Searching for files to copy within the source volume" + echo "cert: ${CERT_FILE_NAME}" + echo "key: ${KEY_FILE_NAME}" + echo "cacert: ${CACERT_FILE_NAME}" + + CERT_FILE_SOURCE=$(find /etc/source/ -type f -name "${CERT_FILE_NAME}" | sort -r | head -n 1) + KEY_FILE_SOURCE=$(find /etc/source/ -type f -name "${KEY_FILE_NAME}" | sort -r | head -n 1) + CACERT_FILE_SOURCE=$(find /etc/source/ -type f -name "${CACERT_FILE_NAME}" | sort -r | head -n 1) + + test -z ${CERT_FILE_SOURCE} && echo "Failed to find cert file" && exit 1 + test -z ${KEY_FILE_SOURCE} && echo "Failed to find key file" && exit 1 + test -z ${CACERT_FILE_SOURCE} && echo "Failed to find cacert file" && exit 1 + + echo "Copying cert file from $CERT_FILE_SOURCE to $CERT_FILE_TARGET" + cp $CERT_FILE_SOURCE $CERT_FILE_TARGET || exit 1 + chmod 444 $CERT_FILE_TARGET || exit 1 + + echo "Copying key file from $KEY_FILE_SOURCE to $KEY_FILE_TARGET" + cp $KEY_FILE_SOURCE $KEY_FILE_TARGET || exit 1 + chmod 444 $KEY_FILE_TARGET || exit 1 + + echo "Copying cacert file from $CACERT_FILE_SOURCE to $CACERT_FILE_TARGET" + cp $CACERT_FILE_SOURCE $CACERT_FILE_TARGET || exit 1 + chmod 444 $CACERT_FILE_TARGET || exit 1 + env: + - name: CERT_FILE_NAME + value: {{ required "Need a TLS cert file for scraping metrics endpoint over HTTPs" .Values.clients.https.certFile }} + - name: KEY_FILE_NAME + value: {{ required "Need a TLS key file for scraping metrics endpoint over HTTPs" .Values.clients.https.keyFile }} + - name: CACERT_FILE_NAME + value: {{ required "Need a TLS CA cert file for scraping metrics endpoint over HTTPs" .Values.clients.https.caCertFile }} + - name: CERT_FILE_TARGET + value: /etc/ssl/push-proxy/push-proxy.pem + - name: KEY_FILE_TARGET + value: /etc/ssl/push-proxy/push-proxy-key.pem + - name: CACERT_FILE_TARGET + value: /etc/ssl/push-proxy/push-proxy-ca-cert.pem + securityContext: + runAsNonRoot: false +{{- if and .Values.global.seLinux.enabled .Values.clients.https.seLinuxOptions }} + seLinuxOptions: {{ .Values.clients.https.seLinuxOptions | toYaml | nindent 12 }} +{{- end }} + volumeMounts: + - name: metrics-cert-dir-source + mountPath: /etc/source + readOnly: true + - name: metrics-cert-dir + mountPath: /etc/ssl/push-proxy + volumes: + - name: metrics-cert-dir-source + hostPath: + path: {{ required "Need access to volume on host with the SSL cert files to use HTTPs" .Values.clients.https.certDir }} + - name: metrics-cert-dir + emptyDir: {} + {{- end }} +{{- end }}{{- end }} diff --git a/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/hardenedKubelet/templates/pushprox-proxy-rbac.yaml b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/hardenedKubelet/templates/pushprox-proxy-rbac.yaml new file mode 100644 index 0000000000..eefe609058 --- /dev/null +++ b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/hardenedKubelet/templates/pushprox-proxy-rbac.yaml @@ -0,0 +1,68 @@ +{{- template "applyKubeVersionOverrides" . -}} +{{- if and .Values.proxy }}{{ if .Values.proxy.enabled }} +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRole +metadata: + name: {{ template "pushProxy.proxy.name" . }} + labels: {{ include "pushProxy.proxy.labels" . | nindent 4 }} +rules: +{{- if .Values.global.cattle.psp.enabled }} +- apiGroups: ['policy'] + resources: ['podsecuritypolicies'] + verbs: ['use'] + resourceNames: + - {{ template "pushProxy.proxy.name" . }} +{{- end }} +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRoleBinding +metadata: + name: {{ template "pushProxy.proxy.name" . }} + labels: {{ include "pushProxy.proxy.labels" . | nindent 4 }} +roleRef: + apiGroup: rbac.authorization.k8s.io + kind: ClusterRole + name: {{ template "pushProxy.proxy.name" . }} +subjects: + - kind: ServiceAccount + name: {{ template "pushProxy.proxy.name" . }} + namespace: {{ include "pushprox.namespace" . }} +--- +apiVersion: v1 +kind: ServiceAccount +metadata: + name: {{ template "pushProxy.proxy.name" . }} + namespace: {{ include "pushprox.namespace" . }} + labels: {{ include "pushProxy.proxy.labels" . | nindent 4 }} +--- +{{- if .Values.global.cattle.psp.enabled }} +apiVersion: policy/v1beta1 +kind: PodSecurityPolicy +metadata: + name: {{ template "pushProxy.proxy.name" . }} + namespace: {{ include "pushprox.namespace" . }} + labels: {{ include "pushProxy.proxy.labels" . | nindent 4 }} +spec: + privileged: false + hostNetwork: false + hostIPC: false + hostPID: false + runAsUser: + rule: 'MustRunAsNonRoot' + seLinux: + rule: 'RunAsAny' + supplementalGroups: + rule: 'MustRunAs' + ranges: + - min: 1 + max: 65535 + fsGroup: + rule: 'MustRunAs' + ranges: + - min: 1 + max: 65535 + readOnlyRootFilesystem: false + volumes: + - 'secret' +{{- end }}{{- end }} +{{- end }} diff --git a/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/hardenedKubelet/templates/pushprox-proxy.yaml b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/hardenedKubelet/templates/pushprox-proxy.yaml new file mode 100644 index 0000000000..723bbd6c00 --- /dev/null +++ b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/hardenedKubelet/templates/pushprox-proxy.yaml @@ -0,0 +1,57 @@ +{{- template "applyKubeVersionOverrides" . -}} +{{- if and .Values.proxy }}{{ if .Values.proxy.enabled }} +apiVersion: apps/v1 +kind: Deployment +metadata: + name: {{ template "pushProxy.proxy.name" . }} + namespace: {{ template "pushprox.namespace" . }} + labels: {{ include "pushProxy.proxy.labels" . | nindent 4 }} + pushprox-exporter: "proxy" +spec: + selector: + matchLabels: {{ include "pushProxy.proxy.labels" . | nindent 6 }} + template: + metadata: + labels: {{ include "pushProxy.proxy.labels" . | nindent 8 }} + spec: + securityContext: + runAsNonRoot: true + runAsUser: 1000 + nodeSelector: {{ include "linux-node-selector" . | nindent 8 }} +{{- if .Values.proxy.nodeSelector }} +{{ toYaml .Values.proxy.nodeSelector | indent 8 }} +{{- end }} + tolerations: {{ include "linux-node-tolerations" . | nindent 8 }} +{{- if .Values.proxy.tolerations }} +{{ toYaml .Values.proxy.tolerations | indent 8 }} +{{- end }} + serviceAccountName: {{ template "pushProxy.proxy.name" . }} + {{- if .Values.global.imagePullSecretName }} + imagePullSecrets: + - name: {{ .Values.global.imagePullSecretName }} + {{- end }} + containers: + - name: pushprox-proxy + image: {{ template "system_default_registry" . }}{{ .Values.proxy.image.repository }}:{{ .Values.proxy.image.tag }} + command: + {{- range .Values.proxy.command }} + - {{ . | quote }} + {{- end }} + {{- if .Values.proxy.resources }} + resources: {{ toYaml .Values.proxy.resources | nindent 10 }} + {{- end }} +--- +apiVersion: v1 +kind: Service +metadata: + name: {{ template "pushProxy.proxy.name" . }} + namespace: {{ template "pushprox.namespace" . }} + labels: {{ include "pushProxy.proxy.labels" . | nindent 4 }} +spec: + ports: + - name: pp-proxy + port: {{ required "Need .Values.proxy.port to configure proxy" .Values.proxy.port }} + protocol: TCP + targetPort: {{ .Values.proxy.port }} + selector: {{ include "pushProxy.proxy.labels" . | nindent 4 }} +{{- end }}{{- end }} diff --git a/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/hardenedKubelet/templates/pushprox-servicemonitor.yaml b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/hardenedKubelet/templates/pushprox-servicemonitor.yaml new file mode 100644 index 0000000000..67eb2216b5 --- /dev/null +++ b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/hardenedKubelet/templates/pushprox-servicemonitor.yaml @@ -0,0 +1,45 @@ +{{- template "applyKubeVersionOverrides" . -}} +{{- if .Values.serviceMonitor }}{{- if .Values.serviceMonitor.enabled }} +apiVersion: monitoring.coreos.com/v1 +kind: ServiceMonitor +metadata: + name: {{ template "pushprox.serviceMonitor.name" . }} + namespace: {{ template "pushprox.namespace" . }} + labels: {{ include "pushProxy.serviceMonitor.labels" . | nindent 4 }} +spec: + endpoints: {{include "pushProxy.serviceMonitor.endpoints" . | nindent 4 }} + jobLabel: component + podTargetLabels: + - component + - pushprox-exporter + namespaceSelector: + matchNames: + - {{ template "pushprox.namespace" . }} + selector: + matchLabels: {{ include "pushProxy.client.labels" . | nindent 6 }} +--- +{{- $selector := "" }} +{{- if not (kindIs "invalid" .Values.service) }} +{{- if not (kindIs "invalid" .Values.service.selector) }} +{{ if .Values.service.selector }} +{{- if .Values.clients.enabled }} +{{- required (printf "Cannot override .Values.service.selector=%s when .Values.clients.enabled=true" (toJson .Values.service.selector)) "" }} +{{- end }} +{{- $selector = (toYaml .Values.service.selector) }} +{{- end }} +{{- end }} +{{- end }} +apiVersion: v1 +kind: Service +metadata: + name: {{ template "pushProxy.client.name" . }} + namespace: {{ template "pushprox.namespace" . }} + labels: {{ include "pushProxy.client.labels" . | nindent 4 }} +spec: + ports: + - name: metrics + port: {{ required "Need .Values.metricsPort to configure client to listen to metrics at port" .Values.metricsPort}} + protocol: TCP + targetPort: {{ .Values.metricsPort }} + selector: {{ default (include "pushProxy.client.labels" .) $selector | nindent 4 }} +{{- end }}{{- end }} diff --git a/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/hardenedKubelet/templates/validate-install-crd.yaml b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/hardenedKubelet/templates/validate-install-crd.yaml new file mode 100644 index 0000000000..16abc2fa83 --- /dev/null +++ b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/hardenedKubelet/templates/validate-install-crd.yaml @@ -0,0 +1,14 @@ +#{{- if gt (len (lookup "rbac.authorization.k8s.io/v1" "ClusterRole" "" "")) 0 -}} +# {{- $found := dict -}} +# {{- set $found "monitoring.coreos.com/v1/ServiceMonitor" false -}} +# {{- range .Capabilities.APIVersions -}} +# {{- if hasKey $found (toString .) -}} +# {{- set $found (toString .) true -}} +# {{- end -}} +# {{- end -}} +# {{- range $_, $exists := $found -}} +# {{- if (eq $exists false) -}} +# {{- required "Required CRDs are missing. Please install Prometheus Operator CRDs before installing this chart." "" -}} +# {{- end -}} +# {{- end -}} +#{{- end -}} diff --git a/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/hardenedKubelet/templates/validate-psp-install.yaml b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/hardenedKubelet/templates/validate-psp-install.yaml new file mode 100644 index 0000000000..a30c59d3b7 --- /dev/null +++ b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/hardenedKubelet/templates/validate-psp-install.yaml @@ -0,0 +1,7 @@ +#{{- if gt (len (lookup "rbac.authorization.k8s.io/v1" "ClusterRole" "" "")) 0 -}} +#{{- if .Values.global.cattle.psp.enabled }} +#{{- if not (.Capabilities.APIVersions.Has "policy/v1beta1/PodSecurityPolicy") }} +#{{- fail "The target cluster does not have the PodSecurityPolicy API resource. Please disable PSPs in this chart before proceeding." -}} +#{{- end }} +#{{- end }} +#{{- end }} diff --git a/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/hardenedKubelet/values.yaml b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/hardenedKubelet/values.yaml new file mode 100644 index 0000000000..1e076041b3 --- /dev/null +++ b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/hardenedKubelet/values.yaml @@ -0,0 +1,166 @@ +# Default values for rancher-pushprox. +# This is a YAML-formatted file. +# Declare variables to be passed into your templates. + +# Default image containing both the proxy and the client was generated from the following Dockerfile +# https://github.com/prometheus-community/PushProx/blob/eeadbe766641699129920ccfaaaa30a85c67fe81/Dockerfile#L1-L15 + +# Configuration + +global: + cattle: + psp: + enabled: false + systemDefaultRegistry: "" + seLinux: + enabled: false + +# A list of Semver constraint strings (defined by https://github.com/Masterminds/semver) and values.yaml overrides. +# +# For each key in kubeVersionOverrides, this chart will check to see if the current Kubernetes cluster's version matches +# any of the semver constraints provided as keys on the map. +# +# On seeing a match, the default value for each values.yaml field overridden will be updated with the new value. +# +# If multiple matches are encountered (due to overlapping semver ranges), the matches will be applied in order. +# +# Notes: +# - On running a helm template, Helm generally assumes the kubeVersion is v1.20.0 +# - On running a helm install --dry-run, the correct kubeVersion should be chosen. +kubeVersionOverrides: [] +# - constraint: "< 1.21" +# values: +# metricsPort: 10252 +# clients: +# https: +# enabled: false +# insecureSkipVerify: false +# useServiceAccountCredentials: false + +namespaceOverride: "" + +# The component that is being monitored (i.e. etcd) +component: "component" + +# The port containing the metrics that need to be scraped +metricsPort: 2739 + +# Configure ServiceMonitor that monitors metrics from the metricsPort endpoint +serviceMonitor: + enabled: true + # A list of endpoints that will be added to the ServiceMonitor based on the Endpoint spec + # Source: https://github.com/prometheus-operator/prometheus-operator/blob/master/Documentation/api.md#endpoint + # By default, proxyUrl and params._scheme will be overridden based on other values + endpoints: + - port: metrics + +# Configure Service that grabs scrape targets +service: + # The selector that is used to populate the Service's Endpoints object. + # The chart will error out on rendering templating if .Values.clients.enabled is set alongside this field, + # since it is expected that this service should point to the PushProx Clients Daemonset / Deployment + selector: {} + +clients: + enabled: true + # The port which the PushProx client will post PushProx metrics to + port: 9369 + # If unset, this will default to the URL for the proxy service: http://pushprox-{{component}}-proxy.{{namepsace}}.svc.cluster.local:{{proxy.port}} + # Should be modified if the clients are being deployed outside the cluster where the proxy rests, otherwise leave it null + proxyUrl: "" + # If set to true, the client will forward any requests from the host IP to 127.0.0.1 + # It will only allow proxy requests to the metricsPort specified + useLocalhost: false + # Configuration for accessing metrics via HTTPS + https: + # Does the client require https to access the metrics? + enabled: false + # Does the client require requests be sent to http or https? + forceHTTPSScheme: false + # If set to true, the client will create a service account with adequate permissions and set a flag + # on the client to use the service account token provided by it to make authorized scrape requests + useServiceAccountCredentials: false + # Configuration for authentication to metrics via https endpoint + authenticationMethod: + # Reads token from defined file in container + # This function is deprecated in the prometheus operator api and may be removed in a future version + bearerTokenFile: + enabled: false + bearerTokenFilePath: "/var/run/secrets/kubernetes.io/serviceaccount/token" + # Reads token from defined secret in namespace + # This function is deprecated in the prometheus operator api and may be removed in a future version + bearerTokenSecret: + enabled: false + # Reads token from defined secret in namespace + authorization: + enabled: false + type: "bearer" + credentials: + key: "token" + optional: false + # If set to true, the client will disable SSL security checks + insecureSkipVerify: false + # Directory on host where necessary TLS cert and key to scrape metrics can be found + certDir: "" + # Filenames for files located in .Values.clients.https.certDir that correspond to TLS settings + certFile: "" + keyFile: "" + caCertFile: "" + # seLinuxOptions to be passed into the container that copies certs. Should define a container with permissions to read the files in the certDir provided on the host. + # Required and only used if `clients.https.enabled` is set and `clients.https.certDir` is provided. + seLinuxOptions: {} + + metrics: + # Whether the client should publish PushProx client-specific metrics to .Values.clients.port + enabled: false + + rbac: + # Additional permissions to provide to the ServiceAccount bound to the client + # This can be used to provide additional permissions for the client to scrape metrics from the k8s API + # Only enabled if clients.https.enabled and clients.https.useServiceAccountCredentials are true + additionalRules: [] + + # Resource limits + resources: {} + + # Options to select all nodes to deploy client DaemonSet on + nodeSelector: {} + tolerations: [] + affinity: {} + + image: + repository: rancher/pushprox-client + tag: v0.1.3-rancher2-client + command: ["pushprox-client"] + + copyCertsImage: + repository: rancher/mirrored-library-busybox + tag: 1.31.1 + + # The default intention of rancher-pushprox clients is to scrape hostNetwork metrics across all nodes. + # This can be used to scrape internal Kubernetes components or DaemonSets of hostNetwork Pods in + # situations where a cloud provider firewall prevents Pod-To-Host communication but not Pod-To-Pod. + # However, if the underlying hostNetwork Pod that is being scraped is managed by a Deployment, + # this advanced option enables users to deploy the client as a Deployment instead of a DaemonSet. + # If a user deploys this feature and the underlying Deployment's number of replicas changes, the user will + # be responsible for upgrading this chart accordingly to the right number of replicas. + deployment: + enabled: false + replicas: 0 + +proxy: + enabled: true + # The port through which PushProx clients will communicate to the proxy + port: 8080 + + # Resource limits + resources: {} + + # Options to select a node to run a single proxy deployment on + nodeSelector: {} + tolerations: [] + + image: + repository: rancher/pushprox-proxy + tag: v0.1.3-rancher2-proxy + command: ["pushprox-proxy"] diff --git a/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/hardenedNodeExporter/.helmignore b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/hardenedNodeExporter/.helmignore new file mode 100644 index 0000000000..0e8a0eb36f --- /dev/null +++ b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/hardenedNodeExporter/.helmignore @@ -0,0 +1,23 @@ +# Patterns to ignore when building packages. +# This supports shell glob matching, relative path matching, and +# negation (prefixed with !). Only one pattern per line. +.DS_Store +# Common VCS dirs +.git/ +.gitignore +.bzr/ +.bzrignore +.hg/ +.hgignore +.svn/ +# Common backup files +*.swp +*.bak +*.tmp +*.orig +*~ +# Various IDEs +.project +.idea/ +*.tmproj +.vscode/ diff --git a/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/hardenedNodeExporter/Chart.yaml b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/hardenedNodeExporter/Chart.yaml new file mode 100644 index 0000000000..068932bacb --- /dev/null +++ b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/hardenedNodeExporter/Chart.yaml @@ -0,0 +1,15 @@ +annotations: + catalog.cattle.io/hidden: "true" + catalog.cattle.io/kube-version: '>= 1.26.0-0 < 1.31.0-0' + catalog.cattle.io/os: linux + catalog.rancher.io/certified: rancher + catalog.rancher.io/namespace: cattle-monitoring-system + catalog.rancher.io/release-name: rancher-pushprox +apiVersion: v1 +appVersion: 0.1.0 +description: Sets up a deployment of the PushProx proxy and a DaemonSet of PushProx + clients. +kubeVersion: '>=1.26.0-0' +name: hardenedNodeExporter +type: application +version: 0.2.0 diff --git a/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/hardenedNodeExporter/README.md b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/hardenedNodeExporter/README.md new file mode 100644 index 0000000000..345002f48a --- /dev/null +++ b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/hardenedNodeExporter/README.md @@ -0,0 +1,90 @@ +# rancher-pushprox + +A Rancher chart based on Rancher [PushProx](https://github.com/rancher/PushProx) that sets up a Deployment of a PushProx proxy and a DaemonSet of PushProx clients on a Kubernetes cluster. + +Installs [rancher-pushprox](https://github.com/rancher/charts/tree/gh-pages/packages/rancher-pushprox) to create PushProx clients that can access their host's network and register with a PushProx proxy. A [Prometheus Operator](https://github.com/coreos/prometheus-operator) ServiceMonitor CR is also included that is configured to scrape the metrics from each of the clients through the proxy. + +Using an instance of this chart is suitable for the following scenarios: +- You need to scrape metrics from a port that should not be accessible outside of the host (e.g. scraping `etcd` metrics in a hardened cluster) +- You need to scrape metrics on a host that are not exposed outside of 127.0.0.1 (e.g. scraping `kube-proxy` metrics) +- You need to scrape metrics through HTTPS using certs hosted directly on `hostPath` +- You need to scrape metrics from Kubernetes components that require authorization via a service account (e.g. permissions to make request to `/metrics`) +- You need to scrape metrics without access to cacerts (i.e. enable `insecureSkipVerify`) + +The clients and proxy are created based on a Rancher fork of the [prometheus-community/PushProx](https://github.com/prometheus-community/PushProx) project. + +## Upgrading to Kubernetes v1.25+ + +Starting in Kubernetes v1.25, [Pod Security Policies](https://kubernetes.io/docs/concepts/security/pod-security-policy/) have been removed from the Kubernetes API. + +As a result, **before upgrading to Kubernetes v1.25** (or on a fresh install in a Kubernetes v1.25+ cluster), users are expected to perform an in-place upgrade of this chart with `global.cattle.psp.enabled` set to `false` if it has been previously set to `true`. +​ +> **Note:** +> In this chart release, any previous field that was associated with any PSP resources have been removed in favor of a single global field: `global.cattle.psp.enabled`. + +> **Note:** +> If you upgrade your cluster to Kubernetes v1.25+ before removing PSPs via a `helm upgrade` (even if you manually clean up resources), **it will leave the Helm release in a broken state within the cluster such that further Helm operations will not work (`helm uninstall`, `helm upgrade`, etc.).** +> +> If your charts get stuck in this state, please consult the Rancher docs on how to clean up your Helm release secrets. + +Upon setting `global.cattle.psp.enabled` to false, the chart will remove any PSP resources deployed on its behalf from the cluster. This is the default setting for this chart. + +As a replacement for PSPs, [Pod Security Admission](https://kubernetes.io/docs/concepts/security/pod-security-admission/) should be used. Please consult the Rancher docs for more details on how to configure your chart release namespaces to work with the new Pod Security Admission and apply Pod Security Standards. + +## Configuration + +The following tables list the configurable parameters of the rancher-pushprox chart and their default values. + +### General + +#### Required +| Parameter | Description | Example | +| ----- | ----------- | ------ | +| `component` | The component that is being monitored | `kube-etcd` +| `metricsPort` | The port on the host that contains the metrics you want to scrape (e.g. `http://:/metrics`) | `2379` | +| `namespaceOverride` | The namespace to install the chart | `""` + +#### Optional +| Parameter | Description | Default | +| ----- | ----------- | ------ | +| `serviceMonitor.enabled` | Deploys a [Prometheus Operator](https://github.com/coreos/prometheus-operator/blob/master/Documentation/api.md#servicemonitor) ServiceMonitor CR that is configured to scrape metrics on the hosts that the clients are deployed on via the proxy. Also deploys a Service that points to all pods with the expected client name that exposes the `metricsPort` selected | `true` | +| `serviceMonitor.endpoints` | A list of endpoints that will be added to the ServiceMonitor based on the [Endpoint spec](https://github.com/prometheus-operator/prometheus-operator/blob/master/Documentation/api.md#endpoint) | `[{port: metrics}]` | +| `service.selector` | The selector that is used to populate the Service's Endpoints object. The chart will error out on rendering templating if `.Values.clients.enabled` is set alongside this field, since it is expected that this service should point to the PushProx Clients Daemonset / Deployment | `{}` | +| `clients.enabled` | Deploys a DaemonSet of clients that are each capable of scraping endpoints on the hostNetwork it is deployed on | `true` | +| `clients.port` | The port where the client will publish PushProx client-specific metrics. If deploying multiple clients onto the same node, the clients should not have conflicting ports | `9369` | +| `clients.proxyUrl` | Overrides the default proxyUrl setting of `http://pushprox-{{ .Values.component }}-proxy.{{ . Release.Namespace }}.svc.cluster.local:{{ .Values.proxy.port }}"` with the `proxyUrl` specified | `""` | +| `clients.useLocalhost` | Sets a flag on each client deployment to redirect scrapes directed to `HOST_IP` to `127.0.0.1` | `false` | +| `clients.https.enabled` | Enables scraping metrics via HTTPS using the provided TLS certs that exist on each host | `false` | +| `clients.https.forceHTTPSScheme` | Forces scraping metrics via HTTPS using the provided TLS certs that exist on each host | `false` | +| `clients.https.useServiceAccountCredentials` | If set to true, the client will create a service account with permissions to scrape `/metrics` endpoint of Kubernetes components. The client will use the service account token provided to make authorized scrape requests to the Kubernetes API | `false` | +| `clients.https.authenticationMethod.bearerTokenFile.enabled` | If set to true, the client will use service account credentials mounted at the configured path `clients.https.authenticationMethod.bearerTokenFile.bearerTokenFilePath`. This requires permissions to scrape `/metrics` endpoint of Kubernetes components. This method is deprecated by the prometheus operator and may be removed in a future release | `false` | +| `clients.https.authenticationMethod.bearerTokenFile.bearerTokenFilePath` | This is a volume mount on the pod with permissions to scrape `/metrics` endpoint of Kubernetes components | `"/var/run/secrets/kubernetes.io/serviceaccount/token"` | +| `clients.https.authenticationMethod.bearerTokenSecret.enabled` | If set to true, the client will use service account credentials to scrape `/metrics` endpoint of Kubernetes components. This method is deprecated by the prometheus operator and may be removed in a future release | `false` | +| `clients.https.authenticationMethod.authorization.enabled` | If set to true, the client will use service account credentials to scrape `/metrics` endpoint of Kubernetes components | `false` | +| `clients.https.authenticationMethod.authorization.type` | If set, the client will use this type of authorization in its client requests for metrics | `"bearer"` | +| `clients.https.authenticationMethod.authorization.credentials.key` | If set, the client will use this key in the secret created by `clients.https.useServiceAccountCredentials` for authorization in its client requests for metrics | `"token"` | +| `clients.https.authenticationMethod.authorization.credentials.optional` | If set to false, the client will fail if the key in the secret created by `clients.https.useServiceAccountCredentials` does not exist | `false` | +| `clients.https.insecureSkipVerify` | If set to true, the client will disable SSL security checks | `false` | +| `clients.https.certDir` | A `hostPath` where TLS certs can be found. This path is mounted as a volume on an `initContainer` which copies only the necessary files over to an EmptyDir volume used by each client. Required and only used if `clients.https.enabled` is set | `""` | +| `clients.https.certFile` | The path to the TLS cert file located within `clients.https.certDir`. Required and only used if `clients.https.enabled` is set | `""` | +| `clients.https.keyFile` | The path to the TLS key file located within `clients.https.certDir`. Required and only used if `clients.https.enabled` is set | `""` | +| `clients.https.caCertFile` | The path to the TLS cacert file located within `clients.https.certDir`. Required and only used if `clients.https.enabled` is set | `""` | +| `clients.https.seLinuxOptions` | seLinuxOptions to be passed into the container that copies certs. Should define a container with permissions to read the files in the certDir provided on the host. Required and only used if `clients.https.enabled` is set and `clients.https.certDir` is provided. | `""` | +| `clients.metrics.enabled` | Whether the client should publish PushProx client-specific metrics. | `false` | +| `clients.rbac.additionalRules` | Additional permissions to provide to the ServiceAccount bound to the client. This can be used to provide additional permissions for the client to scrape metrics from the k8s API. Only enabled if clients.https.enabled and clients.https.useServiceAccountCredentials are true | `[]` | +| `clients.deployment.enabled` | Deploys the client as a Deployment (generally used if the underlying hostNetwork Pod that is being scraped is managed by a Deployment) | `false` | +| `clients.deployment.replicas` | The number of pods the Deployment has, it should match the number of pod the hostNetwork Deployment has. Required and only used if `client.deployment.enable` is set | `0` | +| `clients.deployment.affinity` | The affinity rules that allocate the pod to the node in which the hostNetwork Deployment's pods run. Required and only used if `client.deployment.enable` is set | `{}` | +| `clients.resources` | Set resource limits and requests for the client container | `{}` | +| `clients.nodeSelector` | Select which nodes to deploy the clients on | `{}` | +| `clients.tolerations` | Specify tolerations for clients | `[]` | +| `proxy.enabled` | Deploys the proxy that each client will register with | `true` | +| `proxy.port` | The port exposed by the proxy that each client will register with to allow metrics to be scraped from the host | `8080` | +| `proxy.resources` | Set resource limits and requests for the proxy container | `{}` | +| `proxy.nodeSelector` | Select which nodes the proxy can be deployed on | `{}` | +| `proxy.tolerations` | Specify tolerations (if necessary) to allow the proxy to be deployed on the selected node | `[]` | +| `kubeVersionOverrides` | A list of Semver constraint strings (defined by https://github.com/Masterminds/semver) and values.yaml overrides. For each key in kubeVersionOverrides, this chart will check to see if the current Kubernetes cluster's version matches any of the semver constraints provided as keys on the map. On seeing a match, the default value for each values.yaml field overridden will be updated with the new value. If multiple matches are encountered (due to overlapping semver ranges), the matches will be applied in order. | `[]` + +*Tip: The filepaths set in `clients.https.File` can include wildcard characters*. + +See [rancher-monitoring](https://github.com/rancher/charts/tree/gh-pages/packages/rancher-monitoring) for examples of how this chart can be used. diff --git a/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/hardenedNodeExporter/templates/_helpers.tpl b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/hardenedNodeExporter/templates/_helpers.tpl new file mode 100644 index 0000000000..1ba5093944 --- /dev/null +++ b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/hardenedNodeExporter/templates/_helpers.tpl @@ -0,0 +1,170 @@ +# Rancher + +{{- define "system_default_registry" -}} +{{- if .Values.global.cattle.systemDefaultRegistry -}} +{{- printf "%s/" .Values.global.cattle.systemDefaultRegistry -}} +{{- end -}} +{{- end -}} + +# Windows Support + +{{/* +Windows cluster will add default taint for linux nodes, +add below linux tolerations to workloads could be scheduled to those linux nodes +*/}} + +{{- define "linux-node-tolerations" -}} +- key: "cattle.io/os" + value: "linux" + effect: "NoSchedule" + operator: "Equal" +{{- end -}} + +{{- define "linux-node-selector" -}} +{{- if semverCompare "<1.14-0" .Capabilities.KubeVersion.GitVersion -}} +beta.kubernetes.io/os: linux +{{- else -}} +kubernetes.io/os: linux +{{- end -}} +{{- end -}} + +# General + +{{- define "applyKubeVersionOverrides" -}} +{{- $overrides := dict -}} +{{- range $override := .Values.kubeVersionOverrides -}} +{{- if semverCompare $override.constraint $.Capabilities.KubeVersion.Version -}} +{{- $_ := mergeOverwrite $overrides $override.values -}} +{{- end -}} +{{- end -}} +{{- $_ := mergeOverwrite .Values $overrides -}} +{{- end -}} + +{{- define "pushprox.namespace" -}} + {{- if .Values.namespaceOverride -}} + {{- .Values.namespaceOverride -}} + {{- else -}} + {{- .Release.Namespace -}} + {{- end -}} +{{- end -}} + +{{- define "pushProxy.commonLabels" -}} +release: {{ .Release.Name }} +component: {{ .Values.component | quote }} +provider: kubernetes +{{- end -}} + +{{- define "pushProxy.proxyUrl" -}} +{{- $_ := (required "Template requires either .Values.proxy.port or .Values.client.proxyUrl to set proxyUrl for client" (or .Values.clients.proxyUrl .Values.proxy.port)) -}} +{{- if .Values.clients.proxyUrl -}} +{{ printf "%s" .Values.clients.proxyUrl }} +{{- else -}} +{{ printf "http://%s.%s.svc:%d" (include "pushProxy.proxy.name" .) (include "pushprox.namespace" .) (int .Values.proxy.port) }} +{{- end -}}{{- end -}} + +# Client + +{{- define "pushProxy.client.name" -}} +{{- printf "pushprox-%s-client" (required ".Values.component is required" .Values.component) -}} +{{- end -}} + +{{- define "pushProxy.client.serviceAccountTokenName" -}} +{{- printf "pushprox-%s-client-service-account-token" (required ".Values.component is required" .Values.component) -}} +{{- end -}} + +{{- define "pushProxy.client.labels" -}} +k8s-app: {{ template "pushProxy.client.name" . }} +{{ template "pushProxy.commonLabels" . }} +{{- end -}} + +# Proxy + +{{- define "pushProxy.proxy.name" -}} +{{- printf "pushprox-%s-proxy" (required ".Values.component is required" .Values.component) -}} +{{- end -}} + +{{- define "pushProxy.proxy.labels" -}} +k8s-app: {{ template "pushProxy.proxy.name" . }} +{{ template "pushProxy.commonLabels" . }} +{{- end -}} + +# ServiceMonitor + +{{- define "pushprox.serviceMonitor.name" -}} +{{- printf "%s-%s" .Release.Name (required ".Values.component is required" .Values.component) -}} +{{- end -}} + +{{- define "pushProxy.serviceMonitor.labels" -}} +app: {{ template "pushprox.serviceMonitor.name" . }} +{{ template "pushProxy.commonLabels" . }} +{{- end -}} + +{{- define "pushProxy.serviceMonitor.endpoints" -}} +{{- $proxyURL := (include "pushProxy.proxyUrl" .) -}} +{{- $useHTTPS := .Values.clients.https.enabled -}} +{{- $setHTTPSScheme := .Values.clients.https.forceHTTPSScheme -}} +{{- $insecureSkipVerify := .Values.clients.https.insecureSkipVerify -}} +{{- $useServiceAccountCredentials := .Values.clients.https.useServiceAccountCredentials -}} +{{- $serviceAccountTokenName := (include "pushProxy.client.serviceAccountTokenName" . ) -}} +{{- $metricRelabelings := list }} +{{- $endpoints := .Values.serviceMonitor.endpoints }} +{{- if .Values.proxy.enabled }} +{{- $_ := set . "proxyUrl" $proxyURL }} +{{- end }} +{{- range $endpoints }} +{{- if $.Values.proxy.enabled }} +{{- $_ := set . "proxyUrl" $proxyURL }} +{{- end }} +{{- $clusterIdRelabel := dict }} +{{- $metricRelabelings := list }} +{{- if $.Values.global.cattle.clusterId }} +{{- $_ := set $clusterIdRelabel "action" "replace" }} +{{- $_ := set $clusterIdRelabel "sourceLabels" (list "__address__") }} +{{- $_ := set $clusterIdRelabel "targetLabel" "cluster_id" }} +{{- $_ := set $clusterIdRelabel "replacement" $.Values.global.cattle.clusterId }} +{{- $metricRelabelings = append $metricRelabelings $clusterIdRelabel }} +{{- end }} +{{- $clusterNameRelabel := dict }} +{{- if $.Values.global.cattle.clusterName }} +{{- $_ := set $clusterNameRelabel "action" "replace" }} +{{- $_ := set $clusterNameRelabel "sourceLabels" (list "__address__") }} +{{- $_ := set $clusterNameRelabel "targetLabel" "cluster_name" }} +{{- $_ := set $clusterNameRelabel "replacement" $.Values.global.cattle.clusterName }} +{{- $metricRelabelings = append $metricRelabelings $clusterNameRelabel }} +{{- end }} +{{- if not (empty $metricRelabelings) }} +{{- $_ := set . "metricRelabelings" ($metricRelabelings)}} +{{- end }} +{{- if $setHTTPSScheme -}} +{{- $_ := set . "scheme" "https" }} +{{- end -}} +{{- if $useHTTPS -}} +{{- if (hasKey . "params") }} +{{- $_ := set (get . "params") "_scheme" (list "https") }} +{{- else }} +{{- $_ := set . "params" (dict "_scheme" (list "https")) }} +{{- end }} +{{- end }} +{{- if (hasKey . "tlsConfig") }} +{{- $_ := set (get . "tlsConfig") "insecureSkipVerify" $insecureSkipVerify }} +{{- else }} +{{- $_ := set . "tlsConfig" (dict "insecureSkipVerify" $insecureSkipVerify) }} +{{- end }} +{{- if $.Values.clients.https.authenticationMethod.bearerTokenFile.enabled }} +{{- $_ := set . "bearerTokenFile" $.Values.clients.https.authenticationMethod.bearerTokenFile.bearerTokenFilePath }} +{{- end }} +{{- if $.Values.clients.https.authenticationMethod.bearerTokenSecret.enabled }} +{{- $_ := set . "bearerTokenSecret" $serviceAccountTokenName }} +{{- end }} +{{- if $.Values.clients.https.authenticationMethod.authorization.enabled }} +{{- if (hasKey . "authorization") }} +{{- $_ := set (get . "authorization") "type" $.Values.clients.https.authenticationMethod.authorization.type }} +{{- $_ := set (get . "authorization") "credentials" (dict "name" $serviceAccountTokenName "key" $.Values.clients.https.authenticationMethod.authorization.credentials.key "optional" $.Values.clients.https.authenticationMethod.authorization.credentials.optional) }} +{{- else }} +{{- $_ := set . "authorization" (dict "type" $.Values.clients.https.authenticationMethod.authorization.type) }} +{{- $_ := set . "authorization" (dict "credentials" (dict "name" $serviceAccountTokenName "key" $.Values.clients.https.authenticationMethod.authorization.credentials.key "optional" $.Values.clients.https.authenticationMethod.authorization.credentials.optional)) }} +{{- end }} +{{- end }} +{{- end }} +{{- toYaml $endpoints }} +{{- end -}} diff --git a/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/hardenedNodeExporter/templates/pushprox-clients-rbac.yaml b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/hardenedNodeExporter/templates/pushprox-clients-rbac.yaml new file mode 100644 index 0000000000..a8e27c3735 --- /dev/null +++ b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/hardenedNodeExporter/templates/pushprox-clients-rbac.yaml @@ -0,0 +1,97 @@ +{{- template "applyKubeVersionOverrides" . -}} +{{- if .Values.clients }}{{- if .Values.clients.enabled }} +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRole +metadata: + name: {{ template "pushProxy.client.name" . }} + labels: {{ include "pushProxy.client.labels" . | nindent 4 }} +rules: +{{- if .Values.global.cattle.psp.enabled }} +- apiGroups: ['policy'] + resources: ['podsecuritypolicies'] + verbs: ['use'] + resourceNames: + - {{ template "pushProxy.client.name" . }} +{{- end }} +{{- if and .Values.clients.https.enabled .Values.clients.https.useServiceAccountCredentials }} +- nonResourceURLs: ["/metrics"] + verbs: ["get"] +{{- if .Values.clients.rbac.additionalRules }} +{{ toYaml .Values.clients.rbac.additionalRules }} +{{- end }} +{{- end }} +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRoleBinding +metadata: + name: {{ template "pushProxy.client.name" . }} + labels: {{ include "pushProxy.client.labels" . | nindent 4 }} +roleRef: + apiGroup: rbac.authorization.k8s.io + kind: ClusterRole + name: {{ template "pushProxy.client.name" . }} +subjects: + - kind: ServiceAccount + name: {{ template "pushProxy.client.name" . }} + namespace: {{ include "pushprox.namespace" . }} +--- +apiVersion: v1 +kind: ServiceAccount +metadata: + name: {{ template "pushProxy.client.name" . }} + namespace: {{ include "pushprox.namespace" . }} + labels: {{ include "pushProxy.client.labels" . | nindent 4 }} +--- +{{- if .Values.clients.https.useServiceAccountCredentials }} +apiVersion: v1 +kind: Secret +type: kubernetes.io/service-account-token +metadata: + name: {{ template "pushProxy.client.serviceAccountTokenName" . }} + namespace: {{ include "pushprox.namespace" . }} + labels: {{ include "pushProxy.client.labels" . | nindent 4 }} + annotations: + kubernetes.io/service-account.name: {{ template "pushProxy.client.name" . }} +{{- end }} +--- +{{- if .Values.global.cattle.psp.enabled }} +apiVersion: policy/v1beta1 +kind: PodSecurityPolicy +metadata: + name: {{ template "pushProxy.client.name" . }} + namespace: {{ include "pushprox.namespace" . }} + labels: {{ include "pushProxy.client.labels" . | nindent 4 }} +spec: + privileged: false + hostNetwork: true + hostIPC: false + hostPID: false + runAsUser: + rule: 'RunAsAny' + seLinux: + rule: 'RunAsAny' +{{- if and .Values.clients.https.enabled .Values.clients.https.certDir .Values.global.seLinux.enabled .Values.clients.https.seLinuxOptions }} + seLinuxOptions: {{ .Values.clients.https.seLinuxOptions | toYaml | nindent 6 }} +{{- end }} + supplementalGroups: + rule: 'MustRunAs' + ranges: + - min: 0 + max: 65535 + fsGroup: + rule: 'MustRunAs' + ranges: + - min: 0 + max: 65535 + readOnlyRootFilesystem: false + volumes: + - 'secret' +{{- if and .Values.clients.https.enabled .Values.clients.https.certDir }} + - 'emptyDir' + - 'hostPath' + allowedHostPaths: + - pathPrefix: {{ required "Need access to volume on host with the SSL cert files to use HTTPs" .Values.clients.https.certDir }} + readOnly: true +{{- end }} +{{- end }} +{{- end }}{{- end }} diff --git a/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/hardenedNodeExporter/templates/pushprox-clients.yaml b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/hardenedNodeExporter/templates/pushprox-clients.yaml new file mode 100644 index 0000000000..e8fcfb3883 --- /dev/null +++ b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/hardenedNodeExporter/templates/pushprox-clients.yaml @@ -0,0 +1,157 @@ +{{- template "applyKubeVersionOverrides" . -}} +{{- if .Values.clients }}{{- if .Values.clients.enabled }} +apiVersion: apps/v1 +{{- if .Values.clients.deployment.enabled }} +kind: Deployment +{{- else }} +kind: DaemonSet +{{- end }} +metadata: + name: {{ template "pushProxy.client.name" . }} + namespace: {{ template "pushprox.namespace" . }} + labels: {{ include "pushProxy.client.labels" . | nindent 4 }} + pushprox-exporter: "client" +spec: + {{- if .Values.clients.deployment.enabled }} + replicas: {{ .Values.clients.deployment.replicas }} + {{- end }} + selector: + matchLabels: {{ include "pushProxy.client.labels" . | nindent 6 }} + template: + metadata: + labels: {{ include "pushProxy.client.labels" . | nindent 8 }} + spec: + {{- if .Values.clients.affinity }} + affinity: {{ toYaml .Values.clients.affinity | nindent 8 }} + {{- end }} + nodeSelector: {{ include "linux-node-selector" . | nindent 8 }} +{{- if .Values.clients.nodeSelector }} +{{ toYaml .Values.clients.nodeSelector | indent 8 }} +{{- end }} + tolerations: {{ include "linux-node-tolerations" . | nindent 8 }} +{{- if .Values.clients.tolerations }} +{{ toYaml .Values.clients.tolerations | indent 8 }} +{{- end }} + hostNetwork: true + dnsPolicy: ClusterFirstWithHostNet + serviceAccountName: {{ template "pushProxy.client.name" . }} + {{- if .Values.global.imagePullSecretName }} + imagePullSecrets: + - name: {{ .Values.global.imagePullSecretName }} + {{- end }} + containers: + - name: pushprox-client + image: {{ template "system_default_registry" . }}{{ .Values.clients.image.repository }}:{{ .Values.clients.image.tag }} + command: + {{- range .Values.clients.command }} + - {{ . | quote }} + {{- end }} + args: + - --fqdn=$(HOST_IP) + - --proxy-url=$(PROXY_URL) + {{- if .Values.clients.metrics.enabled }} + - --metrics-addr=$(PORT) + {{- end }} + - --allow-port={{ required "Need .Values.metricsPort to configure client to be allowed to scrape metrics at port" .Values.metricsPort}} + {{- if .Values.clients.useLocalhost }} + - --use-localhost + {{- end }} + {{- if .Values.clients.https.enabled }} + {{- if .Values.clients.https.insecureSkipVerify }} + - --insecure-skip-verify + {{- end }} + {{- if .Values.clients.https.useServiceAccountCredentials }} + - --token-path=/var/run/secrets/kubernetes.io/serviceaccount/token + {{- end }} + {{- if .Values.clients.https.certDir }} + - --tls.cert=/etc/ssl/push-proxy/push-proxy.pem + - --tls.key=/etc/ssl/push-proxy/push-proxy-key.pem + - --tls.cacert=/etc/ssl/push-proxy/push-proxy-ca-cert.pem + {{- end }} + {{- end }} + env: + - name: HOST_IP + valueFrom: + fieldRef: + fieldPath: status.hostIP + {{- if .Values.clients.metrics.enabled }} + - name: PORT + value: :{{ .Values.clients.port }} + {{- end }} + - name: PROXY_URL + value: {{ template "pushProxy.proxyUrl" . }} + securityContext: + runAsNonRoot: true + runAsUser: 1000 + {{- if and .Values.clients.https.enabled .Values.clients.https.certDir }} + volumeMounts: + - name: metrics-cert-dir + mountPath: /etc/ssl/push-proxy + {{- end }} + {{- if .Values.clients.resources }} + resources: {{ toYaml .Values.clients.resources | nindent 10 }} + {{- end }} + {{- if and .Values.clients.https.enabled .Values.clients.https.certDir }} + initContainers: + - name: copy-certs + image: {{ template "system_default_registry" . }}{{ .Values.clients.copyCertsImage.repository }}:{{ .Values.clients.copyCertsImage.tag }} + command: + - sh + - -c + - | + echo "Searching for files to copy within the source volume" + echo "cert: ${CERT_FILE_NAME}" + echo "key: ${KEY_FILE_NAME}" + echo "cacert: ${CACERT_FILE_NAME}" + + CERT_FILE_SOURCE=$(find /etc/source/ -type f -name "${CERT_FILE_NAME}" | sort -r | head -n 1) + KEY_FILE_SOURCE=$(find /etc/source/ -type f -name "${KEY_FILE_NAME}" | sort -r | head -n 1) + CACERT_FILE_SOURCE=$(find /etc/source/ -type f -name "${CACERT_FILE_NAME}" | sort -r | head -n 1) + + test -z ${CERT_FILE_SOURCE} && echo "Failed to find cert file" && exit 1 + test -z ${KEY_FILE_SOURCE} && echo "Failed to find key file" && exit 1 + test -z ${CACERT_FILE_SOURCE} && echo "Failed to find cacert file" && exit 1 + + echo "Copying cert file from $CERT_FILE_SOURCE to $CERT_FILE_TARGET" + cp $CERT_FILE_SOURCE $CERT_FILE_TARGET || exit 1 + chmod 444 $CERT_FILE_TARGET || exit 1 + + echo "Copying key file from $KEY_FILE_SOURCE to $KEY_FILE_TARGET" + cp $KEY_FILE_SOURCE $KEY_FILE_TARGET || exit 1 + chmod 444 $KEY_FILE_TARGET || exit 1 + + echo "Copying cacert file from $CACERT_FILE_SOURCE to $CACERT_FILE_TARGET" + cp $CACERT_FILE_SOURCE $CACERT_FILE_TARGET || exit 1 + chmod 444 $CACERT_FILE_TARGET || exit 1 + env: + - name: CERT_FILE_NAME + value: {{ required "Need a TLS cert file for scraping metrics endpoint over HTTPs" .Values.clients.https.certFile }} + - name: KEY_FILE_NAME + value: {{ required "Need a TLS key file for scraping metrics endpoint over HTTPs" .Values.clients.https.keyFile }} + - name: CACERT_FILE_NAME + value: {{ required "Need a TLS CA cert file for scraping metrics endpoint over HTTPs" .Values.clients.https.caCertFile }} + - name: CERT_FILE_TARGET + value: /etc/ssl/push-proxy/push-proxy.pem + - name: KEY_FILE_TARGET + value: /etc/ssl/push-proxy/push-proxy-key.pem + - name: CACERT_FILE_TARGET + value: /etc/ssl/push-proxy/push-proxy-ca-cert.pem + securityContext: + runAsNonRoot: false +{{- if and .Values.global.seLinux.enabled .Values.clients.https.seLinuxOptions }} + seLinuxOptions: {{ .Values.clients.https.seLinuxOptions | toYaml | nindent 12 }} +{{- end }} + volumeMounts: + - name: metrics-cert-dir-source + mountPath: /etc/source + readOnly: true + - name: metrics-cert-dir + mountPath: /etc/ssl/push-proxy + volumes: + - name: metrics-cert-dir-source + hostPath: + path: {{ required "Need access to volume on host with the SSL cert files to use HTTPs" .Values.clients.https.certDir }} + - name: metrics-cert-dir + emptyDir: {} + {{- end }} +{{- end }}{{- end }} diff --git a/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/hardenedNodeExporter/templates/pushprox-proxy-rbac.yaml b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/hardenedNodeExporter/templates/pushprox-proxy-rbac.yaml new file mode 100644 index 0000000000..eefe609058 --- /dev/null +++ b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/hardenedNodeExporter/templates/pushprox-proxy-rbac.yaml @@ -0,0 +1,68 @@ +{{- template "applyKubeVersionOverrides" . -}} +{{- if and .Values.proxy }}{{ if .Values.proxy.enabled }} +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRole +metadata: + name: {{ template "pushProxy.proxy.name" . }} + labels: {{ include "pushProxy.proxy.labels" . | nindent 4 }} +rules: +{{- if .Values.global.cattle.psp.enabled }} +- apiGroups: ['policy'] + resources: ['podsecuritypolicies'] + verbs: ['use'] + resourceNames: + - {{ template "pushProxy.proxy.name" . }} +{{- end }} +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRoleBinding +metadata: + name: {{ template "pushProxy.proxy.name" . }} + labels: {{ include "pushProxy.proxy.labels" . | nindent 4 }} +roleRef: + apiGroup: rbac.authorization.k8s.io + kind: ClusterRole + name: {{ template "pushProxy.proxy.name" . }} +subjects: + - kind: ServiceAccount + name: {{ template "pushProxy.proxy.name" . }} + namespace: {{ include "pushprox.namespace" . }} +--- +apiVersion: v1 +kind: ServiceAccount +metadata: + name: {{ template "pushProxy.proxy.name" . }} + namespace: {{ include "pushprox.namespace" . }} + labels: {{ include "pushProxy.proxy.labels" . | nindent 4 }} +--- +{{- if .Values.global.cattle.psp.enabled }} +apiVersion: policy/v1beta1 +kind: PodSecurityPolicy +metadata: + name: {{ template "pushProxy.proxy.name" . }} + namespace: {{ include "pushprox.namespace" . }} + labels: {{ include "pushProxy.proxy.labels" . | nindent 4 }} +spec: + privileged: false + hostNetwork: false + hostIPC: false + hostPID: false + runAsUser: + rule: 'MustRunAsNonRoot' + seLinux: + rule: 'RunAsAny' + supplementalGroups: + rule: 'MustRunAs' + ranges: + - min: 1 + max: 65535 + fsGroup: + rule: 'MustRunAs' + ranges: + - min: 1 + max: 65535 + readOnlyRootFilesystem: false + volumes: + - 'secret' +{{- end }}{{- end }} +{{- end }} diff --git a/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/hardenedNodeExporter/templates/pushprox-proxy.yaml b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/hardenedNodeExporter/templates/pushprox-proxy.yaml new file mode 100644 index 0000000000..723bbd6c00 --- /dev/null +++ b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/hardenedNodeExporter/templates/pushprox-proxy.yaml @@ -0,0 +1,57 @@ +{{- template "applyKubeVersionOverrides" . -}} +{{- if and .Values.proxy }}{{ if .Values.proxy.enabled }} +apiVersion: apps/v1 +kind: Deployment +metadata: + name: {{ template "pushProxy.proxy.name" . }} + namespace: {{ template "pushprox.namespace" . }} + labels: {{ include "pushProxy.proxy.labels" . | nindent 4 }} + pushprox-exporter: "proxy" +spec: + selector: + matchLabels: {{ include "pushProxy.proxy.labels" . | nindent 6 }} + template: + metadata: + labels: {{ include "pushProxy.proxy.labels" . | nindent 8 }} + spec: + securityContext: + runAsNonRoot: true + runAsUser: 1000 + nodeSelector: {{ include "linux-node-selector" . | nindent 8 }} +{{- if .Values.proxy.nodeSelector }} +{{ toYaml .Values.proxy.nodeSelector | indent 8 }} +{{- end }} + tolerations: {{ include "linux-node-tolerations" . | nindent 8 }} +{{- if .Values.proxy.tolerations }} +{{ toYaml .Values.proxy.tolerations | indent 8 }} +{{- end }} + serviceAccountName: {{ template "pushProxy.proxy.name" . }} + {{- if .Values.global.imagePullSecretName }} + imagePullSecrets: + - name: {{ .Values.global.imagePullSecretName }} + {{- end }} + containers: + - name: pushprox-proxy + image: {{ template "system_default_registry" . }}{{ .Values.proxy.image.repository }}:{{ .Values.proxy.image.tag }} + command: + {{- range .Values.proxy.command }} + - {{ . | quote }} + {{- end }} + {{- if .Values.proxy.resources }} + resources: {{ toYaml .Values.proxy.resources | nindent 10 }} + {{- end }} +--- +apiVersion: v1 +kind: Service +metadata: + name: {{ template "pushProxy.proxy.name" . }} + namespace: {{ template "pushprox.namespace" . }} + labels: {{ include "pushProxy.proxy.labels" . | nindent 4 }} +spec: + ports: + - name: pp-proxy + port: {{ required "Need .Values.proxy.port to configure proxy" .Values.proxy.port }} + protocol: TCP + targetPort: {{ .Values.proxy.port }} + selector: {{ include "pushProxy.proxy.labels" . | nindent 4 }} +{{- end }}{{- end }} diff --git a/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/hardenedNodeExporter/templates/pushprox-servicemonitor.yaml b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/hardenedNodeExporter/templates/pushprox-servicemonitor.yaml new file mode 100644 index 0000000000..67eb2216b5 --- /dev/null +++ b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/hardenedNodeExporter/templates/pushprox-servicemonitor.yaml @@ -0,0 +1,45 @@ +{{- template "applyKubeVersionOverrides" . -}} +{{- if .Values.serviceMonitor }}{{- if .Values.serviceMonitor.enabled }} +apiVersion: monitoring.coreos.com/v1 +kind: ServiceMonitor +metadata: + name: {{ template "pushprox.serviceMonitor.name" . }} + namespace: {{ template "pushprox.namespace" . }} + labels: {{ include "pushProxy.serviceMonitor.labels" . | nindent 4 }} +spec: + endpoints: {{include "pushProxy.serviceMonitor.endpoints" . | nindent 4 }} + jobLabel: component + podTargetLabels: + - component + - pushprox-exporter + namespaceSelector: + matchNames: + - {{ template "pushprox.namespace" . }} + selector: + matchLabels: {{ include "pushProxy.client.labels" . | nindent 6 }} +--- +{{- $selector := "" }} +{{- if not (kindIs "invalid" .Values.service) }} +{{- if not (kindIs "invalid" .Values.service.selector) }} +{{ if .Values.service.selector }} +{{- if .Values.clients.enabled }} +{{- required (printf "Cannot override .Values.service.selector=%s when .Values.clients.enabled=true" (toJson .Values.service.selector)) "" }} +{{- end }} +{{- $selector = (toYaml .Values.service.selector) }} +{{- end }} +{{- end }} +{{- end }} +apiVersion: v1 +kind: Service +metadata: + name: {{ template "pushProxy.client.name" . }} + namespace: {{ template "pushprox.namespace" . }} + labels: {{ include "pushProxy.client.labels" . | nindent 4 }} +spec: + ports: + - name: metrics + port: {{ required "Need .Values.metricsPort to configure client to listen to metrics at port" .Values.metricsPort}} + protocol: TCP + targetPort: {{ .Values.metricsPort }} + selector: {{ default (include "pushProxy.client.labels" .) $selector | nindent 4 }} +{{- end }}{{- end }} diff --git a/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/hardenedNodeExporter/templates/validate-install-crd.yaml b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/hardenedNodeExporter/templates/validate-install-crd.yaml new file mode 100644 index 0000000000..16abc2fa83 --- /dev/null +++ b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/hardenedNodeExporter/templates/validate-install-crd.yaml @@ -0,0 +1,14 @@ +#{{- if gt (len (lookup "rbac.authorization.k8s.io/v1" "ClusterRole" "" "")) 0 -}} +# {{- $found := dict -}} +# {{- set $found "monitoring.coreos.com/v1/ServiceMonitor" false -}} +# {{- range .Capabilities.APIVersions -}} +# {{- if hasKey $found (toString .) -}} +# {{- set $found (toString .) true -}} +# {{- end -}} +# {{- end -}} +# {{- range $_, $exists := $found -}} +# {{- if (eq $exists false) -}} +# {{- required "Required CRDs are missing. Please install Prometheus Operator CRDs before installing this chart." "" -}} +# {{- end -}} +# {{- end -}} +#{{- end -}} diff --git a/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/hardenedNodeExporter/templates/validate-psp-install.yaml b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/hardenedNodeExporter/templates/validate-psp-install.yaml new file mode 100644 index 0000000000..a30c59d3b7 --- /dev/null +++ b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/hardenedNodeExporter/templates/validate-psp-install.yaml @@ -0,0 +1,7 @@ +#{{- if gt (len (lookup "rbac.authorization.k8s.io/v1" "ClusterRole" "" "")) 0 -}} +#{{- if .Values.global.cattle.psp.enabled }} +#{{- if not (.Capabilities.APIVersions.Has "policy/v1beta1/PodSecurityPolicy") }} +#{{- fail "The target cluster does not have the PodSecurityPolicy API resource. Please disable PSPs in this chart before proceeding." -}} +#{{- end }} +#{{- end }} +#{{- end }} diff --git a/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/hardenedNodeExporter/values.yaml b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/hardenedNodeExporter/values.yaml new file mode 100644 index 0000000000..1e076041b3 --- /dev/null +++ b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/hardenedNodeExporter/values.yaml @@ -0,0 +1,166 @@ +# Default values for rancher-pushprox. +# This is a YAML-formatted file. +# Declare variables to be passed into your templates. + +# Default image containing both the proxy and the client was generated from the following Dockerfile +# https://github.com/prometheus-community/PushProx/blob/eeadbe766641699129920ccfaaaa30a85c67fe81/Dockerfile#L1-L15 + +# Configuration + +global: + cattle: + psp: + enabled: false + systemDefaultRegistry: "" + seLinux: + enabled: false + +# A list of Semver constraint strings (defined by https://github.com/Masterminds/semver) and values.yaml overrides. +# +# For each key in kubeVersionOverrides, this chart will check to see if the current Kubernetes cluster's version matches +# any of the semver constraints provided as keys on the map. +# +# On seeing a match, the default value for each values.yaml field overridden will be updated with the new value. +# +# If multiple matches are encountered (due to overlapping semver ranges), the matches will be applied in order. +# +# Notes: +# - On running a helm template, Helm generally assumes the kubeVersion is v1.20.0 +# - On running a helm install --dry-run, the correct kubeVersion should be chosen. +kubeVersionOverrides: [] +# - constraint: "< 1.21" +# values: +# metricsPort: 10252 +# clients: +# https: +# enabled: false +# insecureSkipVerify: false +# useServiceAccountCredentials: false + +namespaceOverride: "" + +# The component that is being monitored (i.e. etcd) +component: "component" + +# The port containing the metrics that need to be scraped +metricsPort: 2739 + +# Configure ServiceMonitor that monitors metrics from the metricsPort endpoint +serviceMonitor: + enabled: true + # A list of endpoints that will be added to the ServiceMonitor based on the Endpoint spec + # Source: https://github.com/prometheus-operator/prometheus-operator/blob/master/Documentation/api.md#endpoint + # By default, proxyUrl and params._scheme will be overridden based on other values + endpoints: + - port: metrics + +# Configure Service that grabs scrape targets +service: + # The selector that is used to populate the Service's Endpoints object. + # The chart will error out on rendering templating if .Values.clients.enabled is set alongside this field, + # since it is expected that this service should point to the PushProx Clients Daemonset / Deployment + selector: {} + +clients: + enabled: true + # The port which the PushProx client will post PushProx metrics to + port: 9369 + # If unset, this will default to the URL for the proxy service: http://pushprox-{{component}}-proxy.{{namepsace}}.svc.cluster.local:{{proxy.port}} + # Should be modified if the clients are being deployed outside the cluster where the proxy rests, otherwise leave it null + proxyUrl: "" + # If set to true, the client will forward any requests from the host IP to 127.0.0.1 + # It will only allow proxy requests to the metricsPort specified + useLocalhost: false + # Configuration for accessing metrics via HTTPS + https: + # Does the client require https to access the metrics? + enabled: false + # Does the client require requests be sent to http or https? + forceHTTPSScheme: false + # If set to true, the client will create a service account with adequate permissions and set a flag + # on the client to use the service account token provided by it to make authorized scrape requests + useServiceAccountCredentials: false + # Configuration for authentication to metrics via https endpoint + authenticationMethod: + # Reads token from defined file in container + # This function is deprecated in the prometheus operator api and may be removed in a future version + bearerTokenFile: + enabled: false + bearerTokenFilePath: "/var/run/secrets/kubernetes.io/serviceaccount/token" + # Reads token from defined secret in namespace + # This function is deprecated in the prometheus operator api and may be removed in a future version + bearerTokenSecret: + enabled: false + # Reads token from defined secret in namespace + authorization: + enabled: false + type: "bearer" + credentials: + key: "token" + optional: false + # If set to true, the client will disable SSL security checks + insecureSkipVerify: false + # Directory on host where necessary TLS cert and key to scrape metrics can be found + certDir: "" + # Filenames for files located in .Values.clients.https.certDir that correspond to TLS settings + certFile: "" + keyFile: "" + caCertFile: "" + # seLinuxOptions to be passed into the container that copies certs. Should define a container with permissions to read the files in the certDir provided on the host. + # Required and only used if `clients.https.enabled` is set and `clients.https.certDir` is provided. + seLinuxOptions: {} + + metrics: + # Whether the client should publish PushProx client-specific metrics to .Values.clients.port + enabled: false + + rbac: + # Additional permissions to provide to the ServiceAccount bound to the client + # This can be used to provide additional permissions for the client to scrape metrics from the k8s API + # Only enabled if clients.https.enabled and clients.https.useServiceAccountCredentials are true + additionalRules: [] + + # Resource limits + resources: {} + + # Options to select all nodes to deploy client DaemonSet on + nodeSelector: {} + tolerations: [] + affinity: {} + + image: + repository: rancher/pushprox-client + tag: v0.1.3-rancher2-client + command: ["pushprox-client"] + + copyCertsImage: + repository: rancher/mirrored-library-busybox + tag: 1.31.1 + + # The default intention of rancher-pushprox clients is to scrape hostNetwork metrics across all nodes. + # This can be used to scrape internal Kubernetes components or DaemonSets of hostNetwork Pods in + # situations where a cloud provider firewall prevents Pod-To-Host communication but not Pod-To-Pod. + # However, if the underlying hostNetwork Pod that is being scraped is managed by a Deployment, + # this advanced option enables users to deploy the client as a Deployment instead of a DaemonSet. + # If a user deploys this feature and the underlying Deployment's number of replicas changes, the user will + # be responsible for upgrading this chart accordingly to the right number of replicas. + deployment: + enabled: false + replicas: 0 + +proxy: + enabled: true + # The port through which PushProx clients will communicate to the proxy + port: 8080 + + # Resource limits + resources: {} + + # Options to select a node to run a single proxy deployment on + nodeSelector: {} + tolerations: [] + + image: + repository: rancher/pushprox-proxy + tag: v0.1.3-rancher2-proxy + command: ["pushprox-proxy"] diff --git a/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/k3sServer/.helmignore b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/k3sServer/.helmignore new file mode 100644 index 0000000000..0e8a0eb36f --- /dev/null +++ b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/k3sServer/.helmignore @@ -0,0 +1,23 @@ +# Patterns to ignore when building packages. +# This supports shell glob matching, relative path matching, and +# negation (prefixed with !). Only one pattern per line. +.DS_Store +# Common VCS dirs +.git/ +.gitignore +.bzr/ +.bzrignore +.hg/ +.hgignore +.svn/ +# Common backup files +*.swp +*.bak +*.tmp +*.orig +*~ +# Various IDEs +.project +.idea/ +*.tmproj +.vscode/ diff --git a/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/k3sServer/Chart.yaml b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/k3sServer/Chart.yaml new file mode 100644 index 0000000000..275e02e5dc --- /dev/null +++ b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/k3sServer/Chart.yaml @@ -0,0 +1,15 @@ +annotations: + catalog.cattle.io/hidden: "true" + catalog.cattle.io/kube-version: '>= 1.26.0-0 < 1.31.0-0' + catalog.cattle.io/os: linux + catalog.rancher.io/certified: rancher + catalog.rancher.io/namespace: cattle-monitoring-system + catalog.rancher.io/release-name: rancher-pushprox +apiVersion: v1 +appVersion: 0.1.0 +description: Sets up a deployment of the PushProx proxy and a DaemonSet of PushProx + clients. +kubeVersion: '>=1.26.0-0' +name: k3sServer +type: application +version: 0.2.0 diff --git a/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/k3sServer/README.md b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/k3sServer/README.md new file mode 100644 index 0000000000..345002f48a --- /dev/null +++ b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/k3sServer/README.md @@ -0,0 +1,90 @@ +# rancher-pushprox + +A Rancher chart based on Rancher [PushProx](https://github.com/rancher/PushProx) that sets up a Deployment of a PushProx proxy and a DaemonSet of PushProx clients on a Kubernetes cluster. + +Installs [rancher-pushprox](https://github.com/rancher/charts/tree/gh-pages/packages/rancher-pushprox) to create PushProx clients that can access their host's network and register with a PushProx proxy. A [Prometheus Operator](https://github.com/coreos/prometheus-operator) ServiceMonitor CR is also included that is configured to scrape the metrics from each of the clients through the proxy. + +Using an instance of this chart is suitable for the following scenarios: +- You need to scrape metrics from a port that should not be accessible outside of the host (e.g. scraping `etcd` metrics in a hardened cluster) +- You need to scrape metrics on a host that are not exposed outside of 127.0.0.1 (e.g. scraping `kube-proxy` metrics) +- You need to scrape metrics through HTTPS using certs hosted directly on `hostPath` +- You need to scrape metrics from Kubernetes components that require authorization via a service account (e.g. permissions to make request to `/metrics`) +- You need to scrape metrics without access to cacerts (i.e. enable `insecureSkipVerify`) + +The clients and proxy are created based on a Rancher fork of the [prometheus-community/PushProx](https://github.com/prometheus-community/PushProx) project. + +## Upgrading to Kubernetes v1.25+ + +Starting in Kubernetes v1.25, [Pod Security Policies](https://kubernetes.io/docs/concepts/security/pod-security-policy/) have been removed from the Kubernetes API. + +As a result, **before upgrading to Kubernetes v1.25** (or on a fresh install in a Kubernetes v1.25+ cluster), users are expected to perform an in-place upgrade of this chart with `global.cattle.psp.enabled` set to `false` if it has been previously set to `true`. +​ +> **Note:** +> In this chart release, any previous field that was associated with any PSP resources have been removed in favor of a single global field: `global.cattle.psp.enabled`. + +> **Note:** +> If you upgrade your cluster to Kubernetes v1.25+ before removing PSPs via a `helm upgrade` (even if you manually clean up resources), **it will leave the Helm release in a broken state within the cluster such that further Helm operations will not work (`helm uninstall`, `helm upgrade`, etc.).** +> +> If your charts get stuck in this state, please consult the Rancher docs on how to clean up your Helm release secrets. + +Upon setting `global.cattle.psp.enabled` to false, the chart will remove any PSP resources deployed on its behalf from the cluster. This is the default setting for this chart. + +As a replacement for PSPs, [Pod Security Admission](https://kubernetes.io/docs/concepts/security/pod-security-admission/) should be used. Please consult the Rancher docs for more details on how to configure your chart release namespaces to work with the new Pod Security Admission and apply Pod Security Standards. + +## Configuration + +The following tables list the configurable parameters of the rancher-pushprox chart and their default values. + +### General + +#### Required +| Parameter | Description | Example | +| ----- | ----------- | ------ | +| `component` | The component that is being monitored | `kube-etcd` +| `metricsPort` | The port on the host that contains the metrics you want to scrape (e.g. `http://:/metrics`) | `2379` | +| `namespaceOverride` | The namespace to install the chart | `""` + +#### Optional +| Parameter | Description | Default | +| ----- | ----------- | ------ | +| `serviceMonitor.enabled` | Deploys a [Prometheus Operator](https://github.com/coreos/prometheus-operator/blob/master/Documentation/api.md#servicemonitor) ServiceMonitor CR that is configured to scrape metrics on the hosts that the clients are deployed on via the proxy. Also deploys a Service that points to all pods with the expected client name that exposes the `metricsPort` selected | `true` | +| `serviceMonitor.endpoints` | A list of endpoints that will be added to the ServiceMonitor based on the [Endpoint spec](https://github.com/prometheus-operator/prometheus-operator/blob/master/Documentation/api.md#endpoint) | `[{port: metrics}]` | +| `service.selector` | The selector that is used to populate the Service's Endpoints object. The chart will error out on rendering templating if `.Values.clients.enabled` is set alongside this field, since it is expected that this service should point to the PushProx Clients Daemonset / Deployment | `{}` | +| `clients.enabled` | Deploys a DaemonSet of clients that are each capable of scraping endpoints on the hostNetwork it is deployed on | `true` | +| `clients.port` | The port where the client will publish PushProx client-specific metrics. If deploying multiple clients onto the same node, the clients should not have conflicting ports | `9369` | +| `clients.proxyUrl` | Overrides the default proxyUrl setting of `http://pushprox-{{ .Values.component }}-proxy.{{ . Release.Namespace }}.svc.cluster.local:{{ .Values.proxy.port }}"` with the `proxyUrl` specified | `""` | +| `clients.useLocalhost` | Sets a flag on each client deployment to redirect scrapes directed to `HOST_IP` to `127.0.0.1` | `false` | +| `clients.https.enabled` | Enables scraping metrics via HTTPS using the provided TLS certs that exist on each host | `false` | +| `clients.https.forceHTTPSScheme` | Forces scraping metrics via HTTPS using the provided TLS certs that exist on each host | `false` | +| `clients.https.useServiceAccountCredentials` | If set to true, the client will create a service account with permissions to scrape `/metrics` endpoint of Kubernetes components. The client will use the service account token provided to make authorized scrape requests to the Kubernetes API | `false` | +| `clients.https.authenticationMethod.bearerTokenFile.enabled` | If set to true, the client will use service account credentials mounted at the configured path `clients.https.authenticationMethod.bearerTokenFile.bearerTokenFilePath`. This requires permissions to scrape `/metrics` endpoint of Kubernetes components. This method is deprecated by the prometheus operator and may be removed in a future release | `false` | +| `clients.https.authenticationMethod.bearerTokenFile.bearerTokenFilePath` | This is a volume mount on the pod with permissions to scrape `/metrics` endpoint of Kubernetes components | `"/var/run/secrets/kubernetes.io/serviceaccount/token"` | +| `clients.https.authenticationMethod.bearerTokenSecret.enabled` | If set to true, the client will use service account credentials to scrape `/metrics` endpoint of Kubernetes components. This method is deprecated by the prometheus operator and may be removed in a future release | `false` | +| `clients.https.authenticationMethod.authorization.enabled` | If set to true, the client will use service account credentials to scrape `/metrics` endpoint of Kubernetes components | `false` | +| `clients.https.authenticationMethod.authorization.type` | If set, the client will use this type of authorization in its client requests for metrics | `"bearer"` | +| `clients.https.authenticationMethod.authorization.credentials.key` | If set, the client will use this key in the secret created by `clients.https.useServiceAccountCredentials` for authorization in its client requests for metrics | `"token"` | +| `clients.https.authenticationMethod.authorization.credentials.optional` | If set to false, the client will fail if the key in the secret created by `clients.https.useServiceAccountCredentials` does not exist | `false` | +| `clients.https.insecureSkipVerify` | If set to true, the client will disable SSL security checks | `false` | +| `clients.https.certDir` | A `hostPath` where TLS certs can be found. This path is mounted as a volume on an `initContainer` which copies only the necessary files over to an EmptyDir volume used by each client. Required and only used if `clients.https.enabled` is set | `""` | +| `clients.https.certFile` | The path to the TLS cert file located within `clients.https.certDir`. Required and only used if `clients.https.enabled` is set | `""` | +| `clients.https.keyFile` | The path to the TLS key file located within `clients.https.certDir`. Required and only used if `clients.https.enabled` is set | `""` | +| `clients.https.caCertFile` | The path to the TLS cacert file located within `clients.https.certDir`. Required and only used if `clients.https.enabled` is set | `""` | +| `clients.https.seLinuxOptions` | seLinuxOptions to be passed into the container that copies certs. Should define a container with permissions to read the files in the certDir provided on the host. Required and only used if `clients.https.enabled` is set and `clients.https.certDir` is provided. | `""` | +| `clients.metrics.enabled` | Whether the client should publish PushProx client-specific metrics. | `false` | +| `clients.rbac.additionalRules` | Additional permissions to provide to the ServiceAccount bound to the client. This can be used to provide additional permissions for the client to scrape metrics from the k8s API. Only enabled if clients.https.enabled and clients.https.useServiceAccountCredentials are true | `[]` | +| `clients.deployment.enabled` | Deploys the client as a Deployment (generally used if the underlying hostNetwork Pod that is being scraped is managed by a Deployment) | `false` | +| `clients.deployment.replicas` | The number of pods the Deployment has, it should match the number of pod the hostNetwork Deployment has. Required and only used if `client.deployment.enable` is set | `0` | +| `clients.deployment.affinity` | The affinity rules that allocate the pod to the node in which the hostNetwork Deployment's pods run. Required and only used if `client.deployment.enable` is set | `{}` | +| `clients.resources` | Set resource limits and requests for the client container | `{}` | +| `clients.nodeSelector` | Select which nodes to deploy the clients on | `{}` | +| `clients.tolerations` | Specify tolerations for clients | `[]` | +| `proxy.enabled` | Deploys the proxy that each client will register with | `true` | +| `proxy.port` | The port exposed by the proxy that each client will register with to allow metrics to be scraped from the host | `8080` | +| `proxy.resources` | Set resource limits and requests for the proxy container | `{}` | +| `proxy.nodeSelector` | Select which nodes the proxy can be deployed on | `{}` | +| `proxy.tolerations` | Specify tolerations (if necessary) to allow the proxy to be deployed on the selected node | `[]` | +| `kubeVersionOverrides` | A list of Semver constraint strings (defined by https://github.com/Masterminds/semver) and values.yaml overrides. For each key in kubeVersionOverrides, this chart will check to see if the current Kubernetes cluster's version matches any of the semver constraints provided as keys on the map. On seeing a match, the default value for each values.yaml field overridden will be updated with the new value. If multiple matches are encountered (due to overlapping semver ranges), the matches will be applied in order. | `[]` + +*Tip: The filepaths set in `clients.https.File` can include wildcard characters*. + +See [rancher-monitoring](https://github.com/rancher/charts/tree/gh-pages/packages/rancher-monitoring) for examples of how this chart can be used. diff --git a/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/k3sServer/templates/_helpers.tpl b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/k3sServer/templates/_helpers.tpl new file mode 100644 index 0000000000..1ba5093944 --- /dev/null +++ b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/k3sServer/templates/_helpers.tpl @@ -0,0 +1,170 @@ +# Rancher + +{{- define "system_default_registry" -}} +{{- if .Values.global.cattle.systemDefaultRegistry -}} +{{- printf "%s/" .Values.global.cattle.systemDefaultRegistry -}} +{{- end -}} +{{- end -}} + +# Windows Support + +{{/* +Windows cluster will add default taint for linux nodes, +add below linux tolerations to workloads could be scheduled to those linux nodes +*/}} + +{{- define "linux-node-tolerations" -}} +- key: "cattle.io/os" + value: "linux" + effect: "NoSchedule" + operator: "Equal" +{{- end -}} + +{{- define "linux-node-selector" -}} +{{- if semverCompare "<1.14-0" .Capabilities.KubeVersion.GitVersion -}} +beta.kubernetes.io/os: linux +{{- else -}} +kubernetes.io/os: linux +{{- end -}} +{{- end -}} + +# General + +{{- define "applyKubeVersionOverrides" -}} +{{- $overrides := dict -}} +{{- range $override := .Values.kubeVersionOverrides -}} +{{- if semverCompare $override.constraint $.Capabilities.KubeVersion.Version -}} +{{- $_ := mergeOverwrite $overrides $override.values -}} +{{- end -}} +{{- end -}} +{{- $_ := mergeOverwrite .Values $overrides -}} +{{- end -}} + +{{- define "pushprox.namespace" -}} + {{- if .Values.namespaceOverride -}} + {{- .Values.namespaceOverride -}} + {{- else -}} + {{- .Release.Namespace -}} + {{- end -}} +{{- end -}} + +{{- define "pushProxy.commonLabels" -}} +release: {{ .Release.Name }} +component: {{ .Values.component | quote }} +provider: kubernetes +{{- end -}} + +{{- define "pushProxy.proxyUrl" -}} +{{- $_ := (required "Template requires either .Values.proxy.port or .Values.client.proxyUrl to set proxyUrl for client" (or .Values.clients.proxyUrl .Values.proxy.port)) -}} +{{- if .Values.clients.proxyUrl -}} +{{ printf "%s" .Values.clients.proxyUrl }} +{{- else -}} +{{ printf "http://%s.%s.svc:%d" (include "pushProxy.proxy.name" .) (include "pushprox.namespace" .) (int .Values.proxy.port) }} +{{- end -}}{{- end -}} + +# Client + +{{- define "pushProxy.client.name" -}} +{{- printf "pushprox-%s-client" (required ".Values.component is required" .Values.component) -}} +{{- end -}} + +{{- define "pushProxy.client.serviceAccountTokenName" -}} +{{- printf "pushprox-%s-client-service-account-token" (required ".Values.component is required" .Values.component) -}} +{{- end -}} + +{{- define "pushProxy.client.labels" -}} +k8s-app: {{ template "pushProxy.client.name" . }} +{{ template "pushProxy.commonLabels" . }} +{{- end -}} + +# Proxy + +{{- define "pushProxy.proxy.name" -}} +{{- printf "pushprox-%s-proxy" (required ".Values.component is required" .Values.component) -}} +{{- end -}} + +{{- define "pushProxy.proxy.labels" -}} +k8s-app: {{ template "pushProxy.proxy.name" . }} +{{ template "pushProxy.commonLabels" . }} +{{- end -}} + +# ServiceMonitor + +{{- define "pushprox.serviceMonitor.name" -}} +{{- printf "%s-%s" .Release.Name (required ".Values.component is required" .Values.component) -}} +{{- end -}} + +{{- define "pushProxy.serviceMonitor.labels" -}} +app: {{ template "pushprox.serviceMonitor.name" . }} +{{ template "pushProxy.commonLabels" . }} +{{- end -}} + +{{- define "pushProxy.serviceMonitor.endpoints" -}} +{{- $proxyURL := (include "pushProxy.proxyUrl" .) -}} +{{- $useHTTPS := .Values.clients.https.enabled -}} +{{- $setHTTPSScheme := .Values.clients.https.forceHTTPSScheme -}} +{{- $insecureSkipVerify := .Values.clients.https.insecureSkipVerify -}} +{{- $useServiceAccountCredentials := .Values.clients.https.useServiceAccountCredentials -}} +{{- $serviceAccountTokenName := (include "pushProxy.client.serviceAccountTokenName" . ) -}} +{{- $metricRelabelings := list }} +{{- $endpoints := .Values.serviceMonitor.endpoints }} +{{- if .Values.proxy.enabled }} +{{- $_ := set . "proxyUrl" $proxyURL }} +{{- end }} +{{- range $endpoints }} +{{- if $.Values.proxy.enabled }} +{{- $_ := set . "proxyUrl" $proxyURL }} +{{- end }} +{{- $clusterIdRelabel := dict }} +{{- $metricRelabelings := list }} +{{- if $.Values.global.cattle.clusterId }} +{{- $_ := set $clusterIdRelabel "action" "replace" }} +{{- $_ := set $clusterIdRelabel "sourceLabels" (list "__address__") }} +{{- $_ := set $clusterIdRelabel "targetLabel" "cluster_id" }} +{{- $_ := set $clusterIdRelabel "replacement" $.Values.global.cattle.clusterId }} +{{- $metricRelabelings = append $metricRelabelings $clusterIdRelabel }} +{{- end }} +{{- $clusterNameRelabel := dict }} +{{- if $.Values.global.cattle.clusterName }} +{{- $_ := set $clusterNameRelabel "action" "replace" }} +{{- $_ := set $clusterNameRelabel "sourceLabels" (list "__address__") }} +{{- $_ := set $clusterNameRelabel "targetLabel" "cluster_name" }} +{{- $_ := set $clusterNameRelabel "replacement" $.Values.global.cattle.clusterName }} +{{- $metricRelabelings = append $metricRelabelings $clusterNameRelabel }} +{{- end }} +{{- if not (empty $metricRelabelings) }} +{{- $_ := set . "metricRelabelings" ($metricRelabelings)}} +{{- end }} +{{- if $setHTTPSScheme -}} +{{- $_ := set . "scheme" "https" }} +{{- end -}} +{{- if $useHTTPS -}} +{{- if (hasKey . "params") }} +{{- $_ := set (get . "params") "_scheme" (list "https") }} +{{- else }} +{{- $_ := set . "params" (dict "_scheme" (list "https")) }} +{{- end }} +{{- end }} +{{- if (hasKey . "tlsConfig") }} +{{- $_ := set (get . "tlsConfig") "insecureSkipVerify" $insecureSkipVerify }} +{{- else }} +{{- $_ := set . "tlsConfig" (dict "insecureSkipVerify" $insecureSkipVerify) }} +{{- end }} +{{- if $.Values.clients.https.authenticationMethod.bearerTokenFile.enabled }} +{{- $_ := set . "bearerTokenFile" $.Values.clients.https.authenticationMethod.bearerTokenFile.bearerTokenFilePath }} +{{- end }} +{{- if $.Values.clients.https.authenticationMethod.bearerTokenSecret.enabled }} +{{- $_ := set . "bearerTokenSecret" $serviceAccountTokenName }} +{{- end }} +{{- if $.Values.clients.https.authenticationMethod.authorization.enabled }} +{{- if (hasKey . "authorization") }} +{{- $_ := set (get . "authorization") "type" $.Values.clients.https.authenticationMethod.authorization.type }} +{{- $_ := set (get . "authorization") "credentials" (dict "name" $serviceAccountTokenName "key" $.Values.clients.https.authenticationMethod.authorization.credentials.key "optional" $.Values.clients.https.authenticationMethod.authorization.credentials.optional) }} +{{- else }} +{{- $_ := set . "authorization" (dict "type" $.Values.clients.https.authenticationMethod.authorization.type) }} +{{- $_ := set . "authorization" (dict "credentials" (dict "name" $serviceAccountTokenName "key" $.Values.clients.https.authenticationMethod.authorization.credentials.key "optional" $.Values.clients.https.authenticationMethod.authorization.credentials.optional)) }} +{{- end }} +{{- end }} +{{- end }} +{{- toYaml $endpoints }} +{{- end -}} diff --git a/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/k3sServer/templates/pushprox-clients-rbac.yaml b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/k3sServer/templates/pushprox-clients-rbac.yaml new file mode 100644 index 0000000000..a8e27c3735 --- /dev/null +++ b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/k3sServer/templates/pushprox-clients-rbac.yaml @@ -0,0 +1,97 @@ +{{- template "applyKubeVersionOverrides" . -}} +{{- if .Values.clients }}{{- if .Values.clients.enabled }} +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRole +metadata: + name: {{ template "pushProxy.client.name" . }} + labels: {{ include "pushProxy.client.labels" . | nindent 4 }} +rules: +{{- if .Values.global.cattle.psp.enabled }} +- apiGroups: ['policy'] + resources: ['podsecuritypolicies'] + verbs: ['use'] + resourceNames: + - {{ template "pushProxy.client.name" . }} +{{- end }} +{{- if and .Values.clients.https.enabled .Values.clients.https.useServiceAccountCredentials }} +- nonResourceURLs: ["/metrics"] + verbs: ["get"] +{{- if .Values.clients.rbac.additionalRules }} +{{ toYaml .Values.clients.rbac.additionalRules }} +{{- end }} +{{- end }} +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRoleBinding +metadata: + name: {{ template "pushProxy.client.name" . }} + labels: {{ include "pushProxy.client.labels" . | nindent 4 }} +roleRef: + apiGroup: rbac.authorization.k8s.io + kind: ClusterRole + name: {{ template "pushProxy.client.name" . }} +subjects: + - kind: ServiceAccount + name: {{ template "pushProxy.client.name" . }} + namespace: {{ include "pushprox.namespace" . }} +--- +apiVersion: v1 +kind: ServiceAccount +metadata: + name: {{ template "pushProxy.client.name" . }} + namespace: {{ include "pushprox.namespace" . }} + labels: {{ include "pushProxy.client.labels" . | nindent 4 }} +--- +{{- if .Values.clients.https.useServiceAccountCredentials }} +apiVersion: v1 +kind: Secret +type: kubernetes.io/service-account-token +metadata: + name: {{ template "pushProxy.client.serviceAccountTokenName" . }} + namespace: {{ include "pushprox.namespace" . }} + labels: {{ include "pushProxy.client.labels" . | nindent 4 }} + annotations: + kubernetes.io/service-account.name: {{ template "pushProxy.client.name" . }} +{{- end }} +--- +{{- if .Values.global.cattle.psp.enabled }} +apiVersion: policy/v1beta1 +kind: PodSecurityPolicy +metadata: + name: {{ template "pushProxy.client.name" . }} + namespace: {{ include "pushprox.namespace" . }} + labels: {{ include "pushProxy.client.labels" . | nindent 4 }} +spec: + privileged: false + hostNetwork: true + hostIPC: false + hostPID: false + runAsUser: + rule: 'RunAsAny' + seLinux: + rule: 'RunAsAny' +{{- if and .Values.clients.https.enabled .Values.clients.https.certDir .Values.global.seLinux.enabled .Values.clients.https.seLinuxOptions }} + seLinuxOptions: {{ .Values.clients.https.seLinuxOptions | toYaml | nindent 6 }} +{{- end }} + supplementalGroups: + rule: 'MustRunAs' + ranges: + - min: 0 + max: 65535 + fsGroup: + rule: 'MustRunAs' + ranges: + - min: 0 + max: 65535 + readOnlyRootFilesystem: false + volumes: + - 'secret' +{{- if and .Values.clients.https.enabled .Values.clients.https.certDir }} + - 'emptyDir' + - 'hostPath' + allowedHostPaths: + - pathPrefix: {{ required "Need access to volume on host with the SSL cert files to use HTTPs" .Values.clients.https.certDir }} + readOnly: true +{{- end }} +{{- end }} +{{- end }}{{- end }} diff --git a/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/k3sServer/templates/pushprox-clients.yaml b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/k3sServer/templates/pushprox-clients.yaml new file mode 100644 index 0000000000..e8fcfb3883 --- /dev/null +++ b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/k3sServer/templates/pushprox-clients.yaml @@ -0,0 +1,157 @@ +{{- template "applyKubeVersionOverrides" . -}} +{{- if .Values.clients }}{{- if .Values.clients.enabled }} +apiVersion: apps/v1 +{{- if .Values.clients.deployment.enabled }} +kind: Deployment +{{- else }} +kind: DaemonSet +{{- end }} +metadata: + name: {{ template "pushProxy.client.name" . }} + namespace: {{ template "pushprox.namespace" . }} + labels: {{ include "pushProxy.client.labels" . | nindent 4 }} + pushprox-exporter: "client" +spec: + {{- if .Values.clients.deployment.enabled }} + replicas: {{ .Values.clients.deployment.replicas }} + {{- end }} + selector: + matchLabels: {{ include "pushProxy.client.labels" . | nindent 6 }} + template: + metadata: + labels: {{ include "pushProxy.client.labels" . | nindent 8 }} + spec: + {{- if .Values.clients.affinity }} + affinity: {{ toYaml .Values.clients.affinity | nindent 8 }} + {{- end }} + nodeSelector: {{ include "linux-node-selector" . | nindent 8 }} +{{- if .Values.clients.nodeSelector }} +{{ toYaml .Values.clients.nodeSelector | indent 8 }} +{{- end }} + tolerations: {{ include "linux-node-tolerations" . | nindent 8 }} +{{- if .Values.clients.tolerations }} +{{ toYaml .Values.clients.tolerations | indent 8 }} +{{- end }} + hostNetwork: true + dnsPolicy: ClusterFirstWithHostNet + serviceAccountName: {{ template "pushProxy.client.name" . }} + {{- if .Values.global.imagePullSecretName }} + imagePullSecrets: + - name: {{ .Values.global.imagePullSecretName }} + {{- end }} + containers: + - name: pushprox-client + image: {{ template "system_default_registry" . }}{{ .Values.clients.image.repository }}:{{ .Values.clients.image.tag }} + command: + {{- range .Values.clients.command }} + - {{ . | quote }} + {{- end }} + args: + - --fqdn=$(HOST_IP) + - --proxy-url=$(PROXY_URL) + {{- if .Values.clients.metrics.enabled }} + - --metrics-addr=$(PORT) + {{- end }} + - --allow-port={{ required "Need .Values.metricsPort to configure client to be allowed to scrape metrics at port" .Values.metricsPort}} + {{- if .Values.clients.useLocalhost }} + - --use-localhost + {{- end }} + {{- if .Values.clients.https.enabled }} + {{- if .Values.clients.https.insecureSkipVerify }} + - --insecure-skip-verify + {{- end }} + {{- if .Values.clients.https.useServiceAccountCredentials }} + - --token-path=/var/run/secrets/kubernetes.io/serviceaccount/token + {{- end }} + {{- if .Values.clients.https.certDir }} + - --tls.cert=/etc/ssl/push-proxy/push-proxy.pem + - --tls.key=/etc/ssl/push-proxy/push-proxy-key.pem + - --tls.cacert=/etc/ssl/push-proxy/push-proxy-ca-cert.pem + {{- end }} + {{- end }} + env: + - name: HOST_IP + valueFrom: + fieldRef: + fieldPath: status.hostIP + {{- if .Values.clients.metrics.enabled }} + - name: PORT + value: :{{ .Values.clients.port }} + {{- end }} + - name: PROXY_URL + value: {{ template "pushProxy.proxyUrl" . }} + securityContext: + runAsNonRoot: true + runAsUser: 1000 + {{- if and .Values.clients.https.enabled .Values.clients.https.certDir }} + volumeMounts: + - name: metrics-cert-dir + mountPath: /etc/ssl/push-proxy + {{- end }} + {{- if .Values.clients.resources }} + resources: {{ toYaml .Values.clients.resources | nindent 10 }} + {{- end }} + {{- if and .Values.clients.https.enabled .Values.clients.https.certDir }} + initContainers: + - name: copy-certs + image: {{ template "system_default_registry" . }}{{ .Values.clients.copyCertsImage.repository }}:{{ .Values.clients.copyCertsImage.tag }} + command: + - sh + - -c + - | + echo "Searching for files to copy within the source volume" + echo "cert: ${CERT_FILE_NAME}" + echo "key: ${KEY_FILE_NAME}" + echo "cacert: ${CACERT_FILE_NAME}" + + CERT_FILE_SOURCE=$(find /etc/source/ -type f -name "${CERT_FILE_NAME}" | sort -r | head -n 1) + KEY_FILE_SOURCE=$(find /etc/source/ -type f -name "${KEY_FILE_NAME}" | sort -r | head -n 1) + CACERT_FILE_SOURCE=$(find /etc/source/ -type f -name "${CACERT_FILE_NAME}" | sort -r | head -n 1) + + test -z ${CERT_FILE_SOURCE} && echo "Failed to find cert file" && exit 1 + test -z ${KEY_FILE_SOURCE} && echo "Failed to find key file" && exit 1 + test -z ${CACERT_FILE_SOURCE} && echo "Failed to find cacert file" && exit 1 + + echo "Copying cert file from $CERT_FILE_SOURCE to $CERT_FILE_TARGET" + cp $CERT_FILE_SOURCE $CERT_FILE_TARGET || exit 1 + chmod 444 $CERT_FILE_TARGET || exit 1 + + echo "Copying key file from $KEY_FILE_SOURCE to $KEY_FILE_TARGET" + cp $KEY_FILE_SOURCE $KEY_FILE_TARGET || exit 1 + chmod 444 $KEY_FILE_TARGET || exit 1 + + echo "Copying cacert file from $CACERT_FILE_SOURCE to $CACERT_FILE_TARGET" + cp $CACERT_FILE_SOURCE $CACERT_FILE_TARGET || exit 1 + chmod 444 $CACERT_FILE_TARGET || exit 1 + env: + - name: CERT_FILE_NAME + value: {{ required "Need a TLS cert file for scraping metrics endpoint over HTTPs" .Values.clients.https.certFile }} + - name: KEY_FILE_NAME + value: {{ required "Need a TLS key file for scraping metrics endpoint over HTTPs" .Values.clients.https.keyFile }} + - name: CACERT_FILE_NAME + value: {{ required "Need a TLS CA cert file for scraping metrics endpoint over HTTPs" .Values.clients.https.caCertFile }} + - name: CERT_FILE_TARGET + value: /etc/ssl/push-proxy/push-proxy.pem + - name: KEY_FILE_TARGET + value: /etc/ssl/push-proxy/push-proxy-key.pem + - name: CACERT_FILE_TARGET + value: /etc/ssl/push-proxy/push-proxy-ca-cert.pem + securityContext: + runAsNonRoot: false +{{- if and .Values.global.seLinux.enabled .Values.clients.https.seLinuxOptions }} + seLinuxOptions: {{ .Values.clients.https.seLinuxOptions | toYaml | nindent 12 }} +{{- end }} + volumeMounts: + - name: metrics-cert-dir-source + mountPath: /etc/source + readOnly: true + - name: metrics-cert-dir + mountPath: /etc/ssl/push-proxy + volumes: + - name: metrics-cert-dir-source + hostPath: + path: {{ required "Need access to volume on host with the SSL cert files to use HTTPs" .Values.clients.https.certDir }} + - name: metrics-cert-dir + emptyDir: {} + {{- end }} +{{- end }}{{- end }} diff --git a/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/k3sServer/templates/pushprox-proxy-rbac.yaml b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/k3sServer/templates/pushprox-proxy-rbac.yaml new file mode 100644 index 0000000000..eefe609058 --- /dev/null +++ b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/k3sServer/templates/pushprox-proxy-rbac.yaml @@ -0,0 +1,68 @@ +{{- template "applyKubeVersionOverrides" . -}} +{{- if and .Values.proxy }}{{ if .Values.proxy.enabled }} +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRole +metadata: + name: {{ template "pushProxy.proxy.name" . }} + labels: {{ include "pushProxy.proxy.labels" . | nindent 4 }} +rules: +{{- if .Values.global.cattle.psp.enabled }} +- apiGroups: ['policy'] + resources: ['podsecuritypolicies'] + verbs: ['use'] + resourceNames: + - {{ template "pushProxy.proxy.name" . }} +{{- end }} +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRoleBinding +metadata: + name: {{ template "pushProxy.proxy.name" . }} + labels: {{ include "pushProxy.proxy.labels" . | nindent 4 }} +roleRef: + apiGroup: rbac.authorization.k8s.io + kind: ClusterRole + name: {{ template "pushProxy.proxy.name" . }} +subjects: + - kind: ServiceAccount + name: {{ template "pushProxy.proxy.name" . }} + namespace: {{ include "pushprox.namespace" . }} +--- +apiVersion: v1 +kind: ServiceAccount +metadata: + name: {{ template "pushProxy.proxy.name" . }} + namespace: {{ include "pushprox.namespace" . }} + labels: {{ include "pushProxy.proxy.labels" . | nindent 4 }} +--- +{{- if .Values.global.cattle.psp.enabled }} +apiVersion: policy/v1beta1 +kind: PodSecurityPolicy +metadata: + name: {{ template "pushProxy.proxy.name" . }} + namespace: {{ include "pushprox.namespace" . }} + labels: {{ include "pushProxy.proxy.labels" . | nindent 4 }} +spec: + privileged: false + hostNetwork: false + hostIPC: false + hostPID: false + runAsUser: + rule: 'MustRunAsNonRoot' + seLinux: + rule: 'RunAsAny' + supplementalGroups: + rule: 'MustRunAs' + ranges: + - min: 1 + max: 65535 + fsGroup: + rule: 'MustRunAs' + ranges: + - min: 1 + max: 65535 + readOnlyRootFilesystem: false + volumes: + - 'secret' +{{- end }}{{- end }} +{{- end }} diff --git a/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/k3sServer/templates/pushprox-proxy.yaml b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/k3sServer/templates/pushprox-proxy.yaml new file mode 100644 index 0000000000..723bbd6c00 --- /dev/null +++ b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/k3sServer/templates/pushprox-proxy.yaml @@ -0,0 +1,57 @@ +{{- template "applyKubeVersionOverrides" . -}} +{{- if and .Values.proxy }}{{ if .Values.proxy.enabled }} +apiVersion: apps/v1 +kind: Deployment +metadata: + name: {{ template "pushProxy.proxy.name" . }} + namespace: {{ template "pushprox.namespace" . }} + labels: {{ include "pushProxy.proxy.labels" . | nindent 4 }} + pushprox-exporter: "proxy" +spec: + selector: + matchLabels: {{ include "pushProxy.proxy.labels" . | nindent 6 }} + template: + metadata: + labels: {{ include "pushProxy.proxy.labels" . | nindent 8 }} + spec: + securityContext: + runAsNonRoot: true + runAsUser: 1000 + nodeSelector: {{ include "linux-node-selector" . | nindent 8 }} +{{- if .Values.proxy.nodeSelector }} +{{ toYaml .Values.proxy.nodeSelector | indent 8 }} +{{- end }} + tolerations: {{ include "linux-node-tolerations" . | nindent 8 }} +{{- if .Values.proxy.tolerations }} +{{ toYaml .Values.proxy.tolerations | indent 8 }} +{{- end }} + serviceAccountName: {{ template "pushProxy.proxy.name" . }} + {{- if .Values.global.imagePullSecretName }} + imagePullSecrets: + - name: {{ .Values.global.imagePullSecretName }} + {{- end }} + containers: + - name: pushprox-proxy + image: {{ template "system_default_registry" . }}{{ .Values.proxy.image.repository }}:{{ .Values.proxy.image.tag }} + command: + {{- range .Values.proxy.command }} + - {{ . | quote }} + {{- end }} + {{- if .Values.proxy.resources }} + resources: {{ toYaml .Values.proxy.resources | nindent 10 }} + {{- end }} +--- +apiVersion: v1 +kind: Service +metadata: + name: {{ template "pushProxy.proxy.name" . }} + namespace: {{ template "pushprox.namespace" . }} + labels: {{ include "pushProxy.proxy.labels" . | nindent 4 }} +spec: + ports: + - name: pp-proxy + port: {{ required "Need .Values.proxy.port to configure proxy" .Values.proxy.port }} + protocol: TCP + targetPort: {{ .Values.proxy.port }} + selector: {{ include "pushProxy.proxy.labels" . | nindent 4 }} +{{- end }}{{- end }} diff --git a/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/k3sServer/templates/pushprox-servicemonitor.yaml b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/k3sServer/templates/pushprox-servicemonitor.yaml new file mode 100644 index 0000000000..67eb2216b5 --- /dev/null +++ b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/k3sServer/templates/pushprox-servicemonitor.yaml @@ -0,0 +1,45 @@ +{{- template "applyKubeVersionOverrides" . -}} +{{- if .Values.serviceMonitor }}{{- if .Values.serviceMonitor.enabled }} +apiVersion: monitoring.coreos.com/v1 +kind: ServiceMonitor +metadata: + name: {{ template "pushprox.serviceMonitor.name" . }} + namespace: {{ template "pushprox.namespace" . }} + labels: {{ include "pushProxy.serviceMonitor.labels" . | nindent 4 }} +spec: + endpoints: {{include "pushProxy.serviceMonitor.endpoints" . | nindent 4 }} + jobLabel: component + podTargetLabels: + - component + - pushprox-exporter + namespaceSelector: + matchNames: + - {{ template "pushprox.namespace" . }} + selector: + matchLabels: {{ include "pushProxy.client.labels" . | nindent 6 }} +--- +{{- $selector := "" }} +{{- if not (kindIs "invalid" .Values.service) }} +{{- if not (kindIs "invalid" .Values.service.selector) }} +{{ if .Values.service.selector }} +{{- if .Values.clients.enabled }} +{{- required (printf "Cannot override .Values.service.selector=%s when .Values.clients.enabled=true" (toJson .Values.service.selector)) "" }} +{{- end }} +{{- $selector = (toYaml .Values.service.selector) }} +{{- end }} +{{- end }} +{{- end }} +apiVersion: v1 +kind: Service +metadata: + name: {{ template "pushProxy.client.name" . }} + namespace: {{ template "pushprox.namespace" . }} + labels: {{ include "pushProxy.client.labels" . | nindent 4 }} +spec: + ports: + - name: metrics + port: {{ required "Need .Values.metricsPort to configure client to listen to metrics at port" .Values.metricsPort}} + protocol: TCP + targetPort: {{ .Values.metricsPort }} + selector: {{ default (include "pushProxy.client.labels" .) $selector | nindent 4 }} +{{- end }}{{- end }} diff --git a/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/k3sServer/templates/validate-install-crd.yaml b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/k3sServer/templates/validate-install-crd.yaml new file mode 100644 index 0000000000..16abc2fa83 --- /dev/null +++ b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/k3sServer/templates/validate-install-crd.yaml @@ -0,0 +1,14 @@ +#{{- if gt (len (lookup "rbac.authorization.k8s.io/v1" "ClusterRole" "" "")) 0 -}} +# {{- $found := dict -}} +# {{- set $found "monitoring.coreos.com/v1/ServiceMonitor" false -}} +# {{- range .Capabilities.APIVersions -}} +# {{- if hasKey $found (toString .) -}} +# {{- set $found (toString .) true -}} +# {{- end -}} +# {{- end -}} +# {{- range $_, $exists := $found -}} +# {{- if (eq $exists false) -}} +# {{- required "Required CRDs are missing. Please install Prometheus Operator CRDs before installing this chart." "" -}} +# {{- end -}} +# {{- end -}} +#{{- end -}} diff --git a/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/k3sServer/templates/validate-psp-install.yaml b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/k3sServer/templates/validate-psp-install.yaml new file mode 100644 index 0000000000..a30c59d3b7 --- /dev/null +++ b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/k3sServer/templates/validate-psp-install.yaml @@ -0,0 +1,7 @@ +#{{- if gt (len (lookup "rbac.authorization.k8s.io/v1" "ClusterRole" "" "")) 0 -}} +#{{- if .Values.global.cattle.psp.enabled }} +#{{- if not (.Capabilities.APIVersions.Has "policy/v1beta1/PodSecurityPolicy") }} +#{{- fail "The target cluster does not have the PodSecurityPolicy API resource. Please disable PSPs in this chart before proceeding." -}} +#{{- end }} +#{{- end }} +#{{- end }} diff --git a/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/k3sServer/values.yaml b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/k3sServer/values.yaml new file mode 100644 index 0000000000..1e076041b3 --- /dev/null +++ b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/k3sServer/values.yaml @@ -0,0 +1,166 @@ +# Default values for rancher-pushprox. +# This is a YAML-formatted file. +# Declare variables to be passed into your templates. + +# Default image containing both the proxy and the client was generated from the following Dockerfile +# https://github.com/prometheus-community/PushProx/blob/eeadbe766641699129920ccfaaaa30a85c67fe81/Dockerfile#L1-L15 + +# Configuration + +global: + cattle: + psp: + enabled: false + systemDefaultRegistry: "" + seLinux: + enabled: false + +# A list of Semver constraint strings (defined by https://github.com/Masterminds/semver) and values.yaml overrides. +# +# For each key in kubeVersionOverrides, this chart will check to see if the current Kubernetes cluster's version matches +# any of the semver constraints provided as keys on the map. +# +# On seeing a match, the default value for each values.yaml field overridden will be updated with the new value. +# +# If multiple matches are encountered (due to overlapping semver ranges), the matches will be applied in order. +# +# Notes: +# - On running a helm template, Helm generally assumes the kubeVersion is v1.20.0 +# - On running a helm install --dry-run, the correct kubeVersion should be chosen. +kubeVersionOverrides: [] +# - constraint: "< 1.21" +# values: +# metricsPort: 10252 +# clients: +# https: +# enabled: false +# insecureSkipVerify: false +# useServiceAccountCredentials: false + +namespaceOverride: "" + +# The component that is being monitored (i.e. etcd) +component: "component" + +# The port containing the metrics that need to be scraped +metricsPort: 2739 + +# Configure ServiceMonitor that monitors metrics from the metricsPort endpoint +serviceMonitor: + enabled: true + # A list of endpoints that will be added to the ServiceMonitor based on the Endpoint spec + # Source: https://github.com/prometheus-operator/prometheus-operator/blob/master/Documentation/api.md#endpoint + # By default, proxyUrl and params._scheme will be overridden based on other values + endpoints: + - port: metrics + +# Configure Service that grabs scrape targets +service: + # The selector that is used to populate the Service's Endpoints object. + # The chart will error out on rendering templating if .Values.clients.enabled is set alongside this field, + # since it is expected that this service should point to the PushProx Clients Daemonset / Deployment + selector: {} + +clients: + enabled: true + # The port which the PushProx client will post PushProx metrics to + port: 9369 + # If unset, this will default to the URL for the proxy service: http://pushprox-{{component}}-proxy.{{namepsace}}.svc.cluster.local:{{proxy.port}} + # Should be modified if the clients are being deployed outside the cluster where the proxy rests, otherwise leave it null + proxyUrl: "" + # If set to true, the client will forward any requests from the host IP to 127.0.0.1 + # It will only allow proxy requests to the metricsPort specified + useLocalhost: false + # Configuration for accessing metrics via HTTPS + https: + # Does the client require https to access the metrics? + enabled: false + # Does the client require requests be sent to http or https? + forceHTTPSScheme: false + # If set to true, the client will create a service account with adequate permissions and set a flag + # on the client to use the service account token provided by it to make authorized scrape requests + useServiceAccountCredentials: false + # Configuration for authentication to metrics via https endpoint + authenticationMethod: + # Reads token from defined file in container + # This function is deprecated in the prometheus operator api and may be removed in a future version + bearerTokenFile: + enabled: false + bearerTokenFilePath: "/var/run/secrets/kubernetes.io/serviceaccount/token" + # Reads token from defined secret in namespace + # This function is deprecated in the prometheus operator api and may be removed in a future version + bearerTokenSecret: + enabled: false + # Reads token from defined secret in namespace + authorization: + enabled: false + type: "bearer" + credentials: + key: "token" + optional: false + # If set to true, the client will disable SSL security checks + insecureSkipVerify: false + # Directory on host where necessary TLS cert and key to scrape metrics can be found + certDir: "" + # Filenames for files located in .Values.clients.https.certDir that correspond to TLS settings + certFile: "" + keyFile: "" + caCertFile: "" + # seLinuxOptions to be passed into the container that copies certs. Should define a container with permissions to read the files in the certDir provided on the host. + # Required and only used if `clients.https.enabled` is set and `clients.https.certDir` is provided. + seLinuxOptions: {} + + metrics: + # Whether the client should publish PushProx client-specific metrics to .Values.clients.port + enabled: false + + rbac: + # Additional permissions to provide to the ServiceAccount bound to the client + # This can be used to provide additional permissions for the client to scrape metrics from the k8s API + # Only enabled if clients.https.enabled and clients.https.useServiceAccountCredentials are true + additionalRules: [] + + # Resource limits + resources: {} + + # Options to select all nodes to deploy client DaemonSet on + nodeSelector: {} + tolerations: [] + affinity: {} + + image: + repository: rancher/pushprox-client + tag: v0.1.3-rancher2-client + command: ["pushprox-client"] + + copyCertsImage: + repository: rancher/mirrored-library-busybox + tag: 1.31.1 + + # The default intention of rancher-pushprox clients is to scrape hostNetwork metrics across all nodes. + # This can be used to scrape internal Kubernetes components or DaemonSets of hostNetwork Pods in + # situations where a cloud provider firewall prevents Pod-To-Host communication but not Pod-To-Pod. + # However, if the underlying hostNetwork Pod that is being scraped is managed by a Deployment, + # this advanced option enables users to deploy the client as a Deployment instead of a DaemonSet. + # If a user deploys this feature and the underlying Deployment's number of replicas changes, the user will + # be responsible for upgrading this chart accordingly to the right number of replicas. + deployment: + enabled: false + replicas: 0 + +proxy: + enabled: true + # The port through which PushProx clients will communicate to the proxy + port: 8080 + + # Resource limits + resources: {} + + # Options to select a node to run a single proxy deployment on + nodeSelector: {} + tolerations: [] + + image: + repository: rancher/pushprox-proxy + tag: v0.1.3-rancher2-proxy + command: ["pushprox-proxy"] diff --git a/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/kube-state-metrics/.helmignore b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/kube-state-metrics/.helmignore new file mode 100644 index 0000000000..f0c1319444 --- /dev/null +++ b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/kube-state-metrics/.helmignore @@ -0,0 +1,21 @@ +# Patterns to ignore when building packages. +# This supports shell glob matching, relative path matching, and +# negation (prefixed with !). Only one pattern per line. +.DS_Store +# Common VCS dirs +.git/ +.gitignore +.bzr/ +.bzrignore +.hg/ +.hgignore +.svn/ +# Common backup files +*.swp +*.bak +*.tmp +*~ +# Various IDEs +.project +.idea/ +*.tmproj diff --git a/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/kube-state-metrics/Chart.yaml b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/kube-state-metrics/Chart.yaml new file mode 100644 index 0000000000..002a6a180d --- /dev/null +++ b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/kube-state-metrics/Chart.yaml @@ -0,0 +1,32 @@ +annotations: + artifacthub.io/license: Apache-2.0 + artifacthub.io/links: | + - name: Chart Source + url: https://github.com/prometheus-community/helm-charts + catalog.cattle.io/hidden: "true" + catalog.cattle.io/kube-version: '>= 1.26.0-0 < 1.31.0-0' + catalog.cattle.io/os: linux + catalog.rancher.io/certified: rancher + catalog.rancher.io/namespace: cattle-monitoring-system + catalog.rancher.io/release-name: rancher-kube-state-metrics +apiVersion: v2 +appVersion: 2.10.1 +description: Install kube-state-metrics to generate and expose cluster-level metrics +home: https://github.com/kubernetes/kube-state-metrics/ +keywords: +- metric +- monitoring +- prometheus +- kubernetes +maintainers: +- email: tariq.ibrahim@mulesoft.com + name: tariq1890 +- email: manuel@rueg.eu + name: mrueg +- email: david@0xdc.me + name: dotdc +name: kube-state-metrics +sources: +- https://github.com/kubernetes/kube-state-metrics/ +type: application +version: 5.16.4 diff --git a/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/kube-state-metrics/README.md b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/kube-state-metrics/README.md new file mode 100644 index 0000000000..843be89e69 --- /dev/null +++ b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/kube-state-metrics/README.md @@ -0,0 +1,85 @@ +# kube-state-metrics Helm Chart + +Installs the [kube-state-metrics agent](https://github.com/kubernetes/kube-state-metrics). + +## Get Repository Info + +```console +helm repo add prometheus-community https://prometheus-community.github.io/helm-charts +helm repo update +``` + +_See [helm repo](https://helm.sh/docs/helm/helm_repo/) for command documentation._ + + +## Install Chart + +```console +helm install [RELEASE_NAME] prometheus-community/kube-state-metrics [flags] +``` + +_See [configuration](#configuration) below._ + +_See [helm install](https://helm.sh/docs/helm/helm_install/) for command documentation._ + +## Uninstall Chart + +```console +helm uninstall [RELEASE_NAME] +``` + +This removes all the Kubernetes components associated with the chart and deletes the release. + +_See [helm uninstall](https://helm.sh/docs/helm/helm_uninstall/) for command documentation._ + +## Upgrading Chart + +```console +helm upgrade [RELEASE_NAME] prometheus-community/kube-state-metrics [flags] +``` + +_See [helm upgrade](https://helm.sh/docs/helm/helm_upgrade/) for command documentation._ + +### Migrating from stable/kube-state-metrics and kubernetes/kube-state-metrics + +You can upgrade in-place: + +1. [get repository info](#get-repository-info) +1. [upgrade](#upgrading-chart) your existing release name using the new chart repository + +## Upgrading to v3.0.0 + +v3.0.0 includes kube-state-metrics v2.0, see the [changelog](https://github.com/kubernetes/kube-state-metrics/blob/release-2.0/CHANGELOG.md) for major changes on the application-side. + +The upgraded chart now the following changes: + +* Dropped support for helm v2 (helm v3 or later is required) +* collectors key was renamed to resources +* namespace key was renamed to namespaces + +## Configuration + +See [Customizing the Chart Before Installing](https://helm.sh/docs/intro/using_helm/#customizing-the-chart-before-installing). To see all configurable options with detailed comments: + +```console +helm show values prometheus-community/kube-state-metrics +``` + +### kube-rbac-proxy + +You can enable `kube-state-metrics` endpoint protection using `kube-rbac-proxy`. By setting `kubeRBACProxy.enabled: true`, this chart will deploy one RBAC proxy container per endpoint (metrics & telemetry). +To authorize access, authenticate your requests (via a `ServiceAccount` for example) with a `ClusterRole` attached such as: + +```yaml +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRole +metadata: + name: kube-state-metrics-read +rules: + - apiGroups: [ "" ] + resources: ["services/kube-state-metrics"] + verbs: + - get +``` + +See [kube-rbac-proxy examples](https://github.com/brancz/kube-rbac-proxy/tree/master/examples/resource-attributes) for more details. diff --git a/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/kube-state-metrics/templates/NOTES.txt b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/kube-state-metrics/templates/NOTES.txt new file mode 100644 index 0000000000..3589c24ec3 --- /dev/null +++ b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/kube-state-metrics/templates/NOTES.txt @@ -0,0 +1,23 @@ +kube-state-metrics is a simple service that listens to the Kubernetes API server and generates metrics about the state of the objects. +The exposed metrics can be found here: +https://github.com/kubernetes/kube-state-metrics/blob/master/docs/README.md#exposed-metrics + +The metrics are exported on the HTTP endpoint /metrics on the listening port. +In your case, {{ template "kube-state-metrics.fullname" . }}.{{ template "kube-state-metrics.namespace" . }}.svc.cluster.local:{{ .Values.service.port }}/metrics + +They are served either as plaintext or protobuf depending on the Accept header. +They are designed to be consumed either by Prometheus itself or by a scraper that is compatible with scraping a Prometheus client endpoint. + +{{- if .Values.kubeRBACProxy.enabled}} + +kube-rbac-proxy endpoint protections is enabled: +- Metrics endpoints are now HTTPS +- Ensure that the client authenticates the requests (e.g. via service account) with the following role permissions: +``` +rules: + - apiGroups: [ "" ] + resources: ["services/{{ template "kube-state-metrics.fullname" . }}"] + verbs: + - get +``` +{{- end }} diff --git a/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/kube-state-metrics/templates/_helpers.tpl b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/kube-state-metrics/templates/_helpers.tpl new file mode 100644 index 0000000000..ed277fbb53 --- /dev/null +++ b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/kube-state-metrics/templates/_helpers.tpl @@ -0,0 +1,196 @@ +# Rancher +{{- define "system_default_registry" -}} +{{- if .Values.global.cattle.systemDefaultRegistry -}} +{{- printf "%s/" .Values.global.cattle.systemDefaultRegistry -}} +{{- end -}} +{{- end -}} + +{{- define "monitoring_registry" -}} + {{- $temp_registry := (include "system_default_registry" .) -}} + {{- if $temp_registry -}} + {{- trimSuffix "/" $temp_registry -}} + {{- else -}} + {{- .Values.global.imageRegistry -}} + {{- end -}} +{{- end -}} + +# Windows Support + +{{/* +Windows cluster will add default taint for linux nodes, +add below linux tolerations to workloads could be scheduled to those linux nodes +*/}} + +{{- define "linux-node-tolerations" -}} +- key: "cattle.io/os" + value: "linux" + effect: "NoSchedule" + operator: "Equal" +{{- end -}} + +{{- define "linux-node-selector" -}} +{{- if semverCompare "<1.14-0" .Capabilities.KubeVersion.GitVersion -}} +beta.kubernetes.io/os: linux +{{- else -}} +kubernetes.io/os: linux +{{- end -}} +{{- end -}} + +{{/* vim: set filetype=mustache: */}} +{{/* +Expand the name of the chart. +*/}} +{{- define "kube-state-metrics.name" -}} +{{- default .Chart.Name .Values.nameOverride | trunc 63 | trimSuffix "-" -}} +{{- end -}} + +{{/* +Create a default fully qualified app name. +We truncate at 63 chars because some Kubernetes name fields are limited to this (by the DNS naming spec). +If release name contains chart name it will be used as a full name. +*/}} +{{- define "kube-state-metrics.fullname" -}} +{{- if .Values.fullnameOverride -}} +{{- .Values.fullnameOverride | trunc 63 | trimSuffix "-" -}} +{{- else -}} +{{- $name := default .Chart.Name .Values.nameOverride -}} +{{- if contains $name .Release.Name -}} +{{- .Release.Name | trunc 63 | trimSuffix "-" -}} +{{- else -}} +{{- printf "%s-%s" .Release.Name $name | trunc 63 | trimSuffix "-" -}} +{{- end -}} +{{- end -}} +{{- end -}} + +{{/* +Create the name of the service account to use +*/}} +{{- define "kube-state-metrics.serviceAccountName" -}} +{{- if .Values.serviceAccount.create -}} + {{ default (include "kube-state-metrics.fullname" .) .Values.serviceAccount.name }} +{{- else -}} + {{ default "default" .Values.serviceAccount.name }} +{{- end -}} +{{- end -}} + +{{/* +Allow the release namespace to be overridden for multi-namespace deployments in combined charts +*/}} +{{- define "kube-state-metrics.namespace" -}} + {{- if .Values.namespaceOverride -}} + {{- .Values.namespaceOverride -}} + {{- else -}} + {{- .Release.Namespace -}} + {{- end -}} +{{- end -}} + +{{/* +Create chart name and version as used by the chart label. +*/}} +{{- define "kube-state-metrics.chart" -}} +{{- printf "%s-%s" .Chart.Name .Chart.Version | replace "+" "_" | trunc 63 | trimSuffix "-" -}} +{{- end -}} + +{{/* +Generate basic labels +*/}} +{{- define "kube-state-metrics.labels" }} +helm.sh/chart: {{ template "kube-state-metrics.chart" . }} +app.kubernetes.io/managed-by: {{ .Release.Service }} +app.kubernetes.io/component: metrics +app.kubernetes.io/part-of: {{ template "kube-state-metrics.name" . }} +{{- include "kube-state-metrics.selectorLabels" . }} +{{- if .Chart.AppVersion }} +app.kubernetes.io/version: {{ .Chart.AppVersion | quote }} +{{- end }} +{{- if .Values.customLabels }} +{{ toYaml .Values.customLabels }} +{{- end }} +{{- if .Values.releaseLabel }} +release: {{ .Release.Name }} +{{- end }} +{{- end }} + +{{/* +Selector labels +*/}} +{{- define "kube-state-metrics.selectorLabels" }} +{{- if .Values.selectorOverride }} +{{ toYaml .Values.selectorOverride }} +{{- else }} +app.kubernetes.io/name: {{ include "kube-state-metrics.name" . }} +app.kubernetes.io/instance: {{ .Release.Name }} +{{- end }} +{{- end }} + +{{/* Sets default scrape limits for servicemonitor */}} +{{- define "servicemonitor.scrapeLimits" -}} +{{- with .sampleLimit }} +sampleLimit: {{ . }} +{{- end }} +{{- with .targetLimit }} +targetLimit: {{ . }} +{{- end }} +{{- with .labelLimit }} +labelLimit: {{ . }} +{{- end }} +{{- with .labelNameLengthLimit }} +labelNameLengthLimit: {{ . }} +{{- end }} +{{- with .labelValueLengthLimit }} +labelValueLengthLimit: {{ . }} +{{- end }} +{{- end -}} + +{{/* +Formats imagePullSecrets. Input is (dict "Values" .Values "imagePullSecrets" .{specific imagePullSecrets}) +*/}} +{{- define "kube-state-metrics.imagePullSecrets" -}} +{{- range (concat .Values.global.imagePullSecrets .imagePullSecrets) }} + {{- if eq (typeOf .) "map[string]interface {}" }} +- {{ toYaml . | trim }} + {{- else }} +- name: {{ . }} + {{- end }} +{{- end }} +{{- end -}} + +{{/* +The image to use for kube-state-metrics +*/}} +{{- define "kube-state-metrics.image" -}} +{{- $registry := (include "monitoring_registry" .) }} +{{- if .Values.image.sha }} +{{- if $registry }} +{{- printf "%s/%s:%s@%s" $registry .Values.image.repository (default (printf "v%s" .Chart.AppVersion) .Values.image.tag) .Values.image.sha }} +{{- else }} +{{- printf "%s/%s:%s@%s" .Values.image.registry .Values.image.repository (default (printf "v%s" .Chart.AppVersion) .Values.image.tag) .Values.image.sha }} +{{- end }} +{{- else }} +{{- if $registry }} +{{- printf "%s/%s:%s" $registry .Values.image.repository (default (printf "v%s" .Chart.AppVersion) .Values.image.tag) }} +{{- else }} +{{- printf "%s/%s:%s" .Values.image.registry .Values.image.repository (default (printf "v%s" .Chart.AppVersion) .Values.image.tag) }} +{{- end }} +{{- end }} +{{- end }} + +{{/* +The image to use for kubeRBACProxy +*/}} +{{- define "kubeRBACProxy.image" -}} +{{- $registry := (include "monitoring_registry" .) }} +{{- if .Values.kubeRBACProxy.image.sha }} +{{- if $registry }} +{{- printf "%s/%s:%s@%s" $registry .Values.kubeRBACProxy.image.repository (default (printf "v%s" .Chart.AppVersion) .Values.kubeRBACProxy.image.tag) .Values.kubeRBACProxy.image.sha }} +{{- else }} +{{- printf "%s/%s:%s@%s" .Values.kubeRBACProxy.image.registry .Values.kubeRBACProxy.image.repository (default (printf "v%s" .Chart.AppVersion) .Values.kubeRBACProxy.image.tag) .Values.kubeRBACProxy.image.sha }} +{{- end }} +{{- else }} +{{- if $registry }} +{{- printf "%s/%s:%s" $registry .Values.kubeRBACProxy.image.repository (default (printf "v%s" .Chart.AppVersion) .Values.kubeRBACProxy.image.tag) }} +{{- else }} +{{- printf "%s/%s:%s" .Values.kubeRBACProxy.image.registry .Values.kubeRBACProxy.image.repository (default (printf "v%s" .Chart.AppVersion) .Values.kubeRBACProxy.image.tag) }} +{{- end }} +{{- end }} +{{- end }} diff --git a/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/kube-state-metrics/templates/ciliumnetworkpolicy.yaml b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/kube-state-metrics/templates/ciliumnetworkpolicy.yaml new file mode 100644 index 0000000000..025cd47a88 --- /dev/null +++ b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/kube-state-metrics/templates/ciliumnetworkpolicy.yaml @@ -0,0 +1,33 @@ +{{- if and .Values.networkPolicy.enabled (eq .Values.networkPolicy.flavor "cilium") }} +apiVersion: cilium.io/v2 +kind: CiliumNetworkPolicy +metadata: + {{- if .Values.annotations }} + annotations: + {{ toYaml .Values.annotations | nindent 4 }} + {{- end }} + labels: + {{- include "kube-state-metrics.labels" . | indent 4 }} + name: {{ template "kube-state-metrics.fullname" . }} + namespace: {{ template "kube-state-metrics.namespace" . }} +spec: + endpointSelector: + matchLabels: + {{- include "kube-state-metrics.selectorLabels" . | indent 6 }} + egress: + {{- if and .Values.networkPolicy.cilium .Values.networkPolicy.cilium.kubeApiServerSelector }} + {{ toYaml .Values.networkPolicy.cilium.kubeApiServerSelector | nindent 6 }} + {{- else }} + - toEntities: + - kube-apiserver + {{- end }} + ingress: + - toPorts: + - ports: + - port: {{ .Values.service.port | quote }} + protocol: TCP + {{- if .Values.selfMonitor.enabled }} + - port: {{ .Values.selfMonitor.telemetryPort | default 8081 | quote }} + protocol: TCP + {{ end }} +{{ end }} diff --git a/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/kube-state-metrics/templates/clusterrolebinding.yaml b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/kube-state-metrics/templates/clusterrolebinding.yaml new file mode 100644 index 0000000000..cf9f628d04 --- /dev/null +++ b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/kube-state-metrics/templates/clusterrolebinding.yaml @@ -0,0 +1,20 @@ +{{- if and .Values.rbac.create .Values.rbac.useClusterRole -}} +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRoleBinding +metadata: + labels: + {{- include "kube-state-metrics.labels" . | indent 4 }} + name: {{ template "kube-state-metrics.fullname" . }} +roleRef: + apiGroup: rbac.authorization.k8s.io + kind: ClusterRole +{{- if .Values.rbac.useExistingRole }} + name: {{ .Values.rbac.useExistingRole }} +{{- else }} + name: {{ template "kube-state-metrics.fullname" . }} +{{- end }} +subjects: +- kind: ServiceAccount + name: {{ template "kube-state-metrics.serviceAccountName" . }} + namespace: {{ template "kube-state-metrics.namespace" . }} +{{- end -}} diff --git a/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/kube-state-metrics/templates/crs-configmap.yaml b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/kube-state-metrics/templates/crs-configmap.yaml new file mode 100644 index 0000000000..d38a75a51d --- /dev/null +++ b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/kube-state-metrics/templates/crs-configmap.yaml @@ -0,0 +1,16 @@ +{{- if .Values.customResourceState.enabled}} +apiVersion: v1 +kind: ConfigMap +metadata: + name: {{ template "kube-state-metrics.fullname" . }}-customresourcestate-config + namespace: {{ template "kube-state-metrics.namespace" . }} + labels: + {{- include "kube-state-metrics.labels" . | indent 4 }} + {{- if .Values.annotations }} + annotations: + {{ toYaml .Values.annotations | nindent 4 }} + {{- end }} +data: + config.yaml: | + {{- toYaml .Values.customResourceState.config | nindent 4 }} +{{- end }} diff --git a/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/kube-state-metrics/templates/deployment.yaml b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/kube-state-metrics/templates/deployment.yaml new file mode 100644 index 0000000000..03158eb948 --- /dev/null +++ b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/kube-state-metrics/templates/deployment.yaml @@ -0,0 +1,314 @@ +apiVersion: apps/v1 +{{- if .Values.autosharding.enabled }} +kind: StatefulSet +{{- else }} +kind: Deployment +{{- end }} +metadata: + name: {{ template "kube-state-metrics.fullname" . }} + namespace: {{ template "kube-state-metrics.namespace" . }} + labels: + {{- include "kube-state-metrics.labels" . | indent 4 }} + {{- if .Values.annotations }} + annotations: +{{ toYaml .Values.annotations | indent 4 }} + {{- end }} +spec: + selector: + matchLabels: + {{- include "kube-state-metrics.selectorLabels" . | indent 6 }} + replicas: {{ .Values.replicas }} + {{- if not .Values.autosharding.enabled }} + strategy: + type: {{ .Values.updateStrategy | default "RollingUpdate" }} + {{- end }} + revisionHistoryLimit: {{ .Values.revisionHistoryLimit }} + {{- if .Values.autosharding.enabled }} + serviceName: {{ template "kube-state-metrics.fullname" . }} + volumeClaimTemplates: [] + {{- end }} + template: + metadata: + labels: + {{- include "kube-state-metrics.labels" . | indent 8 }} + {{- if .Values.podAnnotations }} + annotations: +{{ toYaml .Values.podAnnotations | indent 8 }} + {{- end }} + spec: + hostNetwork: {{ .Values.hostNetwork }} + serviceAccountName: {{ template "kube-state-metrics.serviceAccountName" . }} + {{- if .Values.securityContext.enabled }} + securityContext: {{- omit .Values.securityContext "enabled" | toYaml | nindent 8 }} + {{- end }} + {{- if .Values.priorityClassName }} + priorityClassName: {{ .Values.priorityClassName }} + {{- end }} + {{- with .Values.initContainers }} + initContainers: + {{- toYaml . | nindent 6 }} + {{- end }} + containers: + {{- $servicePort := ternary 9090 (.Values.service.port | default 8080) .Values.kubeRBACProxy.enabled}} + {{- $telemetryPort := ternary 9091 (.Values.selfMonitor.telemetryPort | default 8081) .Values.kubeRBACProxy.enabled}} + - name: {{ template "kube-state-metrics.name" . }} + {{- if .Values.autosharding.enabled }} + env: + - name: POD_NAME + valueFrom: + fieldRef: + fieldPath: metadata.name + - name: POD_NAMESPACE + valueFrom: + fieldRef: + fieldPath: metadata.namespace + {{- end }} + args: + {{- if .Values.extraArgs }} + {{- .Values.extraArgs | toYaml | nindent 8 }} + {{- end }} + - --port={{ $servicePort }} + {{- if .Values.collectors }} + - --resources={{ .Values.collectors | join "," }} + {{- end }} + {{- if .Values.metricLabelsAllowlist }} + - --metric-labels-allowlist={{ .Values.metricLabelsAllowlist | join "," }} + {{- end }} + {{- if .Values.metricAnnotationsAllowList }} + - --metric-annotations-allowlist={{ .Values.metricAnnotationsAllowList | join "," }} + {{- end }} + {{- if .Values.metricAllowlist }} + - --metric-allowlist={{ .Values.metricAllowlist | join "," }} + {{- end }} + {{- if .Values.metricDenylist }} + - --metric-denylist={{ .Values.metricDenylist | join "," }} + {{- end }} + {{- $namespaces := list }} + {{- if .Values.namespaces }} + {{- range $ns := join "," .Values.namespaces | split "," }} + {{- $namespaces = append $namespaces (tpl $ns $) }} + {{- end }} + {{- end }} + {{- if .Values.releaseNamespace }} + {{- $namespaces = append $namespaces ( include "kube-state-metrics.namespace" . ) }} + {{- end }} + {{- if $namespaces }} + - --namespaces={{ $namespaces | mustUniq | join "," }} + {{- end }} + {{- if .Values.namespacesDenylist }} + - --namespaces-denylist={{ tpl (.Values.namespacesDenylist | join ",") $ }} + {{- end }} + {{- if .Values.autosharding.enabled }} + - --pod=$(POD_NAME) + - --pod-namespace=$(POD_NAMESPACE) + {{- end }} + {{- if .Values.kubeconfig.enabled }} + - --kubeconfig=/opt/k8s/.kube/config + {{- end }} + {{- if .Values.kubeRBACProxy.enabled }} + - --telemetry-host=127.0.0.1 + - --telemetry-port={{ $telemetryPort }} + {{- else }} + {{- if .Values.selfMonitor.telemetryHost }} + - --telemetry-host={{ .Values.selfMonitor.telemetryHost }} + {{- end }} + {{- if .Values.selfMonitor.telemetryPort }} + - --telemetry-port={{ $telemetryPort }} + {{- end }} + {{- end }} + {{- if .Values.customResourceState.enabled }} + - --custom-resource-state-config-file=/etc/customresourcestate/config.yaml + {{- end }} + {{- if or (.Values.kubeconfig.enabled) (.Values.customResourceState.enabled) (.Values.volumeMounts) }} + volumeMounts: + {{- if .Values.kubeconfig.enabled }} + - name: kubeconfig + mountPath: /opt/k8s/.kube/ + readOnly: true + {{- end }} + {{- if .Values.customResourceState.enabled }} + - name: customresourcestate-config + mountPath: /etc/customresourcestate + readOnly: true + {{- end }} + {{- if .Values.volumeMounts }} +{{ toYaml .Values.volumeMounts | indent 8 }} + {{- end }} + {{- end }} + imagePullPolicy: {{ .Values.image.pullPolicy }} + image: {{ include "kube-state-metrics.image" . }} + {{- if eq .Values.kubeRBACProxy.enabled false }} + ports: + - containerPort: {{ .Values.service.port | default 8080}} + name: "http" + {{- if .Values.selfMonitor.enabled }} + - containerPort: {{ $telemetryPort }} + name: "metrics" + {{- end }} + {{- end }} + livenessProbe: + failureThreshold: {{ .Values.livenessProbe.failureThreshold }} + httpGet: + {{- if .Values.hostNetwork }} + host: 127.0.0.1 + {{- end }} + httpHeaders: + {{- range $_, $header := .Values.livenessProbe.httpGet.httpHeaders }} + - name: {{ $header.name }} + value: {{ $header.value }} + {{- end }} + path: /healthz + port: {{ $servicePort }} + scheme: {{ upper .Values.livenessProbe.httpGet.scheme }} + initialDelaySeconds: {{ .Values.livenessProbe.initialDelaySeconds }} + periodSeconds: {{ .Values.livenessProbe.periodSeconds }} + successThreshold: {{ .Values.livenessProbe.successThreshold }} + timeoutSeconds: {{ .Values.livenessProbe.timeoutSeconds }} + readinessProbe: + failureThreshold: {{ .Values.readinessProbe.failureThreshold }} + httpGet: + {{- if .Values.hostNetwork }} + host: 127.0.0.1 + {{- end }} + httpHeaders: + {{- range $_, $header := .Values.readinessProbe.httpGet.httpHeaders }} + - name: {{ $header.name }} + value: {{ $header.value }} + {{- end }} + path: / + port: {{ $servicePort }} + scheme: {{ upper .Values.readinessProbe.httpGet.scheme }} + initialDelaySeconds: {{ .Values.readinessProbe.initialDelaySeconds }} + periodSeconds: {{ .Values.readinessProbe.periodSeconds }} + successThreshold: {{ .Values.readinessProbe.successThreshold }} + timeoutSeconds: {{ .Values.readinessProbe.timeoutSeconds }} + {{- if .Values.resources }} + resources: +{{ toYaml .Values.resources | indent 10 }} +{{- end }} +{{- if .Values.containerSecurityContext }} + securityContext: +{{ toYaml .Values.containerSecurityContext | indent 10 }} +{{- end }} + {{- if .Values.kubeRBACProxy.enabled }} + - name: kube-rbac-proxy-http + args: + {{- if .Values.kubeRBACProxy.extraArgs }} + {{- .Values.kubeRBACProxy.extraArgs | toYaml | nindent 8 }} + {{- end }} + - --secure-listen-address=:{{ .Values.service.port | default 8080}} + - --upstream=http://127.0.0.1:{{ $servicePort }}/ + - --proxy-endpoints-port=8888 + - --config-file=/etc/kube-rbac-proxy-config/config-file.yaml + volumeMounts: + - name: kube-rbac-proxy-config + mountPath: /etc/kube-rbac-proxy-config + {{- with .Values.kubeRBACProxy.volumeMounts }} + {{- toYaml . | nindent 10 }} + {{- end }} + imagePullPolicy: {{ .Values.kubeRBACProxy.image.pullPolicy }} + image: {{ include "kubeRBACProxy.image" . }} + ports: + - containerPort: {{ .Values.service.port | default 8080}} + name: "http" + - containerPort: 8888 + name: "http-healthz" + readinessProbe: + httpGet: + scheme: HTTPS + port: 8888 + path: healthz + initialDelaySeconds: 5 + timeoutSeconds: 5 + {{- if .Values.kubeRBACProxy.resources }} + resources: +{{ toYaml .Values.kubeRBACProxy.resources | indent 10 }} +{{- end }} +{{- if .Values.kubeRBACProxy.containerSecurityContext }} + securityContext: +{{ toYaml .Values.kubeRBACProxy.containerSecurityContext | indent 10 }} +{{- end }} + {{- if .Values.selfMonitor.enabled }} + - name: kube-rbac-proxy-telemetry + args: + {{- if .Values.kubeRBACProxy.extraArgs }} + {{- .Values.kubeRBACProxy.extraArgs | toYaml | nindent 8 }} + {{- end }} + - --secure-listen-address=:{{ .Values.selfMonitor.telemetryPort | default 8081 }} + - --upstream=http://127.0.0.1:{{ $telemetryPort }}/ + - --proxy-endpoints-port=8889 + - --config-file=/etc/kube-rbac-proxy-config/config-file.yaml + volumeMounts: + - name: kube-rbac-proxy-config + mountPath: /etc/kube-rbac-proxy-config + {{- with .Values.kubeRBACProxy.volumeMounts }} + {{- toYaml . | nindent 10 }} + {{- end }} + imagePullPolicy: {{ .Values.kubeRBACProxy.image.pullPolicy }} + image: {{ include "kubeRBACProxy.image" . }} + ports: + - containerPort: {{ .Values.selfMonitor.telemetryPort | default 8081 }} + name: "metrics" + - containerPort: 8889 + name: "metrics-healthz" + readinessProbe: + httpGet: + scheme: HTTPS + port: 8889 + path: healthz + initialDelaySeconds: 5 + timeoutSeconds: 5 + {{- if .Values.kubeRBACProxy.resources }} + resources: +{{ toYaml .Values.kubeRBACProxy.resources | indent 10 }} +{{- end }} +{{- if .Values.kubeRBACProxy.containerSecurityContext }} + securityContext: +{{ toYaml .Values.kubeRBACProxy.containerSecurityContext | indent 10 }} +{{- end }} + {{- end }} + {{- end }} + {{- with .Values.containers }} + {{- toYaml . | nindent 6 }} + {{- end }} +{{- if or .Values.imagePullSecrets .Values.global.imagePullSecrets }} + imagePullSecrets: + {{- include "kube-state-metrics.imagePullSecrets" (dict "Values" .Values "imagePullSecrets" .Values.imagePullSecrets) | indent 8 }} + {{- end }} + {{- if .Values.affinity }} + affinity: +{{ toYaml .Values.affinity | indent 8 }} + {{- end }} + nodeSelector: {{ include "linux-node-selector" . | nindent 8 }} + {{- if .Values.nodeSelector }} +{{ toYaml .Values.nodeSelector | indent 8 }} + {{- end }} + tolerations: {{ include "linux-node-tolerations" . | nindent 8 }} + {{- if .Values.tolerations }} +{{ toYaml .Values.tolerations | indent 8 }} + {{- end }} + {{- if .Values.topologySpreadConstraints }} + topologySpreadConstraints: +{{ toYaml .Values.topologySpreadConstraints | indent 8 }} + {{- end }} + {{- if or (.Values.kubeconfig.enabled) (.Values.customResourceState.enabled) (.Values.volumes) (.Values.kubeRBACProxy.enabled) }} + volumes: + {{- if .Values.kubeconfig.enabled}} + - name: kubeconfig + secret: + secretName: {{ template "kube-state-metrics.fullname" . }}-kubeconfig + {{- end }} + {{- if .Values.kubeRBACProxy.enabled}} + - name: kube-rbac-proxy-config + configMap: + name: {{ template "kube-state-metrics.fullname" . }}-rbac-config + {{- end }} + {{- if .Values.customResourceState.enabled}} + - name: customresourcestate-config + configMap: + name: {{ template "kube-state-metrics.fullname" . }}-customresourcestate-config + {{- end }} + {{- if .Values.volumes }} +{{ toYaml .Values.volumes | indent 8 }} + {{- end }} + {{- end }} diff --git a/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/kube-state-metrics/templates/extra-manifests.yaml b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/kube-state-metrics/templates/extra-manifests.yaml new file mode 100644 index 0000000000..567f7bf329 --- /dev/null +++ b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/kube-state-metrics/templates/extra-manifests.yaml @@ -0,0 +1,4 @@ +{{ range .Values.extraManifests }} +--- +{{ tpl (toYaml .) $ }} +{{ end }} diff --git a/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/kube-state-metrics/templates/kubeconfig-secret.yaml b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/kube-state-metrics/templates/kubeconfig-secret.yaml new file mode 100644 index 0000000000..6af0084502 --- /dev/null +++ b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/kube-state-metrics/templates/kubeconfig-secret.yaml @@ -0,0 +1,12 @@ +{{- if .Values.kubeconfig.enabled -}} +apiVersion: v1 +kind: Secret +metadata: + name: {{ template "kube-state-metrics.fullname" . }}-kubeconfig + namespace: {{ template "kube-state-metrics.namespace" . }} + labels: + {{- include "kube-state-metrics.labels" . | indent 4 }} +type: Opaque +data: + config: '{{ .Values.kubeconfig.secret }}' +{{- end -}} diff --git a/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/kube-state-metrics/templates/networkpolicy.yaml b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/kube-state-metrics/templates/networkpolicy.yaml new file mode 100644 index 0000000000..309b38ec54 --- /dev/null +++ b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/kube-state-metrics/templates/networkpolicy.yaml @@ -0,0 +1,43 @@ +{{- if and .Values.networkPolicy.enabled (eq .Values.networkPolicy.flavor "kubernetes") }} +kind: NetworkPolicy +apiVersion: networking.k8s.io/v1 +metadata: + {{- if .Values.annotations }} + annotations: + {{ toYaml .Values.annotations | nindent 4 }} + {{- end }} + labels: + {{- include "kube-state-metrics.labels" . | indent 4 }} + name: {{ template "kube-state-metrics.fullname" . }} + namespace: {{ template "kube-state-metrics.namespace" . }} +spec: + {{- if .Values.networkPolicy.egress }} + ## Deny all egress by default + egress: + {{- toYaml .Values.networkPolicy.egress | nindent 4 }} + {{- end }} + ingress: + {{- if .Values.networkPolicy.ingress }} + {{- toYaml .Values.networkPolicy.ingress | nindent 4 }} + {{- else }} + ## Allow ingress on default ports by default + - ports: + - port: {{ .Values.service.port | default 8080 }} + protocol: TCP + {{- if .Values.selfMonitor.enabled }} + {{- $telemetryPort := ternary 9091 (.Values.selfMonitor.telemetryPort | default 8081) .Values.kubeRBACProxy.enabled}} + - port: {{ $telemetryPort }} + protocol: TCP + {{- end }} + {{- end }} + podSelector: + {{- if .Values.networkPolicy.podSelector }} + {{- toYaml .Values.networkPolicy.podSelector | nindent 4 }} + {{- else }} + matchLabels: + {{- include "kube-state-metrics.selectorLabels" . | indent 6 }} + {{- end }} + policyTypes: + - Ingress + - Egress +{{- end }} diff --git a/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/kube-state-metrics/templates/pdb.yaml b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/kube-state-metrics/templates/pdb.yaml new file mode 100644 index 0000000000..3771b511de --- /dev/null +++ b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/kube-state-metrics/templates/pdb.yaml @@ -0,0 +1,18 @@ +{{- if .Values.podDisruptionBudget -}} +{{ if $.Capabilities.APIVersions.Has "policy/v1/PodDisruptionBudget" -}} +apiVersion: policy/v1 +{{- else -}} +apiVersion: policy/v1beta1 +{{- end }} +kind: PodDisruptionBudget +metadata: + name: {{ template "kube-state-metrics.fullname" . }} + namespace: {{ template "kube-state-metrics.namespace" . }} + labels: + {{- include "kube-state-metrics.labels" . | indent 4 }} +spec: + selector: + matchLabels: + app.kubernetes.io/name: {{ template "kube-state-metrics.name" . }} +{{ toYaml .Values.podDisruptionBudget | indent 2 }} +{{- end -}} diff --git a/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/kube-state-metrics/templates/podsecuritypolicy.yaml b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/kube-state-metrics/templates/podsecuritypolicy.yaml new file mode 100644 index 0000000000..d9d944d740 --- /dev/null +++ b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/kube-state-metrics/templates/podsecuritypolicy.yaml @@ -0,0 +1,39 @@ +{{- if and .Values.rbac.create (and (or .Values.global.cattle.psp.enabled .Values.podSecurityPolicy.enabled) (.Capabilities.APIVersions.Has "policy/v1beta1/PodSecurityPolicy")) }} +apiVersion: policy/v1beta1 +kind: PodSecurityPolicy +metadata: + name: {{ template "kube-state-metrics.fullname" . }} + labels: + {{- include "kube-state-metrics.labels" . | indent 4 }} +{{- if .Values.podSecurityPolicy.annotations }} + annotations: +{{ toYaml .Values.podSecurityPolicy.annotations | indent 4 }} +{{- end }} +spec: + privileged: false + volumes: + - 'secret' +{{- if .Values.podSecurityPolicy.additionalVolumes }} +{{ toYaml .Values.podSecurityPolicy.additionalVolumes | indent 4 }} +{{- end }} + hostNetwork: false + hostIPC: false + hostPID: false + runAsUser: + rule: 'MustRunAsNonRoot' + seLinux: + rule: 'RunAsAny' + supplementalGroups: + rule: 'MustRunAs' + ranges: + # Forbid adding the root group. + - min: 1 + max: 65535 + fsGroup: + rule: 'MustRunAs' + ranges: + # Forbid adding the root group. + - min: 1 + max: 65535 + readOnlyRootFilesystem: false +{{- end }} diff --git a/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/kube-state-metrics/templates/psp-clusterrole.yaml b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/kube-state-metrics/templates/psp-clusterrole.yaml new file mode 100644 index 0000000000..c69e01a716 --- /dev/null +++ b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/kube-state-metrics/templates/psp-clusterrole.yaml @@ -0,0 +1,19 @@ +{{- if and .Values.rbac.create (and (or .Values.global.cattle.psp.enabled .Values.podSecurityPolicy.enabled) (.Capabilities.APIVersions.Has "policy/v1beta1/PodSecurityPolicy")) }} +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRole +metadata: + labels: + {{- include "kube-state-metrics.labels" . | indent 4 }} + name: psp-{{ template "kube-state-metrics.fullname" . }} +rules: +{{- $kubeTargetVersion := default .Capabilities.KubeVersion.GitVersion .Values.kubeTargetVersionOverride }} +{{- if semverCompare "> 1.15.0-0" $kubeTargetVersion }} +- apiGroups: ['policy'] +{{- else }} +- apiGroups: ['extensions'] +{{- end }} + resources: ['podsecuritypolicies'] + verbs: ['use'] + resourceNames: + - {{ template "kube-state-metrics.fullname" . }} +{{- end }} diff --git a/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/kube-state-metrics/templates/psp-clusterrolebinding.yaml b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/kube-state-metrics/templates/psp-clusterrolebinding.yaml new file mode 100644 index 0000000000..df81c49028 --- /dev/null +++ b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/kube-state-metrics/templates/psp-clusterrolebinding.yaml @@ -0,0 +1,16 @@ +{{- if and .Values.rbac.create (and (or .Values.global.cattle.psp.enabled .Values.podSecurityPolicy.enabled) (.Capabilities.APIVersions.Has "policy/v1beta1/PodSecurityPolicy")) }} +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRoleBinding +metadata: + labels: + {{- include "kube-state-metrics.labels" . | indent 4 }} + name: psp-{{ template "kube-state-metrics.fullname" . }} +roleRef: + apiGroup: rbac.authorization.k8s.io + kind: ClusterRole + name: psp-{{ template "kube-state-metrics.fullname" . }} +subjects: + - kind: ServiceAccount + name: {{ template "kube-state-metrics.serviceAccountName" . }} + namespace: {{ template "kube-state-metrics.namespace" . }} +{{- end }} diff --git a/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/kube-state-metrics/templates/rbac-configmap.yaml b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/kube-state-metrics/templates/rbac-configmap.yaml new file mode 100644 index 0000000000..671dc9d660 --- /dev/null +++ b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/kube-state-metrics/templates/rbac-configmap.yaml @@ -0,0 +1,22 @@ +{{- if .Values.kubeRBACProxy.enabled}} +apiVersion: v1 +kind: ConfigMap +metadata: + name: {{ template "kube-state-metrics.fullname" . }}-rbac-config + namespace: {{ template "kube-state-metrics.namespace" . }} + labels: + {{- include "kube-state-metrics.labels" . | indent 4 }} + {{- if .Values.annotations }} + annotations: + {{ toYaml .Values.annotations | nindent 4 }} + {{- end }} +data: + config-file.yaml: |+ + authorization: + resourceAttributes: + namespace: {{ template "kube-state-metrics.namespace" . }} + apiVersion: v1 + resource: services + subresource: {{ template "kube-state-metrics.fullname" . }} + name: {{ template "kube-state-metrics.fullname" . }} +{{- end }} diff --git a/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/kube-state-metrics/templates/role.yaml b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/kube-state-metrics/templates/role.yaml new file mode 100644 index 0000000000..0170878376 --- /dev/null +++ b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/kube-state-metrics/templates/role.yaml @@ -0,0 +1,215 @@ +{{- if not (kindIs "slice" .Values.collectors) }} +{{- fail "Collectors need to be a List since kube-state-metrics chart 3.2.2. Please check README for more information."}} +{{- end }} +{{- if and (eq .Values.rbac.create true) (not .Values.rbac.useExistingRole) -}} +{{- range (ternary (join "," .Values.namespaces | split "," ) (list "") (eq $.Values.rbac.useClusterRole false)) }} +--- +apiVersion: rbac.authorization.k8s.io/v1 +{{- if eq $.Values.rbac.useClusterRole false }} +kind: Role +{{- else }} +kind: ClusterRole +{{- end }} +metadata: + labels: + {{- include "kube-state-metrics.labels" $ | indent 4 }} + name: {{ template "kube-state-metrics.fullname" $ }} +{{- if eq $.Values.rbac.useClusterRole false }} + namespace: {{ . }} +{{- end }} +rules: +{{ if has "certificatesigningrequests" $.Values.collectors }} +- apiGroups: ["certificates.k8s.io"] + resources: + - certificatesigningrequests + verbs: ["list", "watch"] +{{ end -}} +{{ if has "configmaps" $.Values.collectors }} +- apiGroups: [""] + resources: + - configmaps + verbs: ["list", "watch"] +{{ end -}} +{{ if has "cronjobs" $.Values.collectors }} +- apiGroups: ["batch"] + resources: + - cronjobs + verbs: ["list", "watch"] +{{ end -}} +{{ if has "daemonsets" $.Values.collectors }} +- apiGroups: ["extensions", "apps"] + resources: + - daemonsets + verbs: ["list", "watch"] +{{ end -}} +{{ if has "deployments" $.Values.collectors }} +- apiGroups: ["extensions", "apps"] + resources: + - deployments + verbs: ["list", "watch"] +{{ end -}} +{{ if has "endpoints" $.Values.collectors }} +- apiGroups: [""] + resources: + - endpoints + verbs: ["list", "watch"] +{{ end -}} +{{ if has "endpointslices" $.Values.collectors }} +- apiGroups: ["discovery.k8s.io"] + resources: + - endpointslices + verbs: ["list", "watch"] +{{ end -}} +{{ if has "horizontalpodautoscalers" $.Values.collectors }} +- apiGroups: ["autoscaling"] + resources: + - horizontalpodautoscalers + verbs: ["list", "watch"] +{{ end -}} +{{ if has "ingresses" $.Values.collectors }} +- apiGroups: ["extensions", "networking.k8s.io"] + resources: + - ingresses + verbs: ["list", "watch"] +{{ end -}} +{{ if has "jobs" $.Values.collectors }} +- apiGroups: ["batch"] + resources: + - jobs + verbs: ["list", "watch"] +{{ end -}} +{{ if has "leases" $.Values.collectors }} +- apiGroups: ["coordination.k8s.io"] + resources: + - leases + verbs: ["list", "watch"] +{{ end -}} +{{ if has "limitranges" $.Values.collectors }} +- apiGroups: [""] + resources: + - limitranges + verbs: ["list", "watch"] +{{ end -}} +{{ if has "mutatingwebhookconfigurations" $.Values.collectors }} +- apiGroups: ["admissionregistration.k8s.io"] + resources: + - mutatingwebhookconfigurations + verbs: ["list", "watch"] +{{ end -}} +{{ if has "namespaces" $.Values.collectors }} +- apiGroups: [""] + resources: + - namespaces + verbs: ["list", "watch"] +{{ end -}} +{{ if has "networkpolicies" $.Values.collectors }} +- apiGroups: ["networking.k8s.io"] + resources: + - networkpolicies + verbs: ["list", "watch"] +{{ end -}} +{{ if has "nodes" $.Values.collectors }} +- apiGroups: [""] + resources: + - nodes + verbs: ["list", "watch"] +{{ end -}} +{{ if has "persistentvolumeclaims" $.Values.collectors }} +- apiGroups: [""] + resources: + - persistentvolumeclaims + verbs: ["list", "watch"] +{{ end -}} +{{ if has "persistentvolumes" $.Values.collectors }} +- apiGroups: [""] + resources: + - persistentvolumes + verbs: ["list", "watch"] +{{ end -}} +{{ if has "poddisruptionbudgets" $.Values.collectors }} +- apiGroups: ["policy"] + resources: + - poddisruptionbudgets + verbs: ["list", "watch"] +{{ end -}} +{{ if has "pods" $.Values.collectors }} +- apiGroups: [""] + resources: + - pods + verbs: ["list", "watch"] +{{ end -}} +{{ if has "replicasets" $.Values.collectors }} +- apiGroups: ["extensions", "apps"] + resources: + - replicasets + verbs: ["list", "watch"] +{{ end -}} +{{ if has "replicationcontrollers" $.Values.collectors }} +- apiGroups: [""] + resources: + - replicationcontrollers + verbs: ["list", "watch"] +{{ end -}} +{{ if has "resourcequotas" $.Values.collectors }} +- apiGroups: [""] + resources: + - resourcequotas + verbs: ["list", "watch"] +{{ end -}} +{{ if has "secrets" $.Values.collectors }} +- apiGroups: [""] + resources: + - secrets + verbs: ["list", "watch"] +{{ end -}} +{{ if has "services" $.Values.collectors }} +- apiGroups: [""] + resources: + - services + verbs: ["list", "watch"] +{{ end -}} +{{ if has "statefulsets" $.Values.collectors }} +- apiGroups: ["apps"] + resources: + - statefulsets + verbs: ["list", "watch"] +{{ end -}} +{{ if has "storageclasses" $.Values.collectors }} +- apiGroups: ["storage.k8s.io"] + resources: + - storageclasses + verbs: ["list", "watch"] +{{ end -}} +{{ if has "validatingwebhookconfigurations" $.Values.collectors }} +- apiGroups: ["admissionregistration.k8s.io"] + resources: + - validatingwebhookconfigurations + verbs: ["list", "watch"] +{{ end -}} +{{ if has "volumeattachments" $.Values.collectors }} +- apiGroups: ["storage.k8s.io"] + resources: + - volumeattachments + verbs: ["list", "watch"] +{{ end -}} +{{- if $.Values.kubeRBACProxy.enabled }} +- apiGroups: ["authentication.k8s.io"] + resources: + - tokenreviews + verbs: ["create"] +- apiGroups: ["authorization.k8s.io"] + resources: + - subjectaccessreviews + verbs: ["create"] +{{- end }} +{{- if $.Values.customResourceState.enabled }} +- apiGroups: ["apiextensions.k8s.io"] + resources: + - customresourcedefinitions + verbs: ["list", "watch"] +{{- end }} +{{ if $.Values.rbac.extraRules }} +{{ toYaml $.Values.rbac.extraRules }} +{{ end }} +{{- end -}} +{{- end -}} diff --git a/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/kube-state-metrics/templates/rolebinding.yaml b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/kube-state-metrics/templates/rolebinding.yaml new file mode 100644 index 0000000000..330651b73f --- /dev/null +++ b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/kube-state-metrics/templates/rolebinding.yaml @@ -0,0 +1,24 @@ +{{- if and (eq .Values.rbac.create true) (eq .Values.rbac.useClusterRole false) -}} +{{- range (join "," $.Values.namespaces) | split "," }} +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: RoleBinding +metadata: + labels: + {{- include "kube-state-metrics.labels" $ | indent 4 }} + name: {{ template "kube-state-metrics.fullname" $ }} + namespace: {{ . }} +roleRef: + apiGroup: rbac.authorization.k8s.io + kind: Role +{{- if (not $.Values.rbac.useExistingRole) }} + name: {{ template "kube-state-metrics.fullname" $ }} +{{- else }} + name: {{ $.Values.rbac.useExistingRole }} +{{- end }} +subjects: +- kind: ServiceAccount + name: {{ template "kube-state-metrics.serviceAccountName" $ }} + namespace: {{ template "kube-state-metrics.namespace" $ }} +{{- end -}} +{{- end -}} diff --git a/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/kube-state-metrics/templates/service.yaml b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/kube-state-metrics/templates/service.yaml new file mode 100644 index 0000000000..6c486a662a --- /dev/null +++ b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/kube-state-metrics/templates/service.yaml @@ -0,0 +1,49 @@ +apiVersion: v1 +kind: Service +metadata: + name: {{ template "kube-state-metrics.fullname" . }} + namespace: {{ template "kube-state-metrics.namespace" . }} + labels: + {{- include "kube-state-metrics.labels" . | indent 4 }} + annotations: + {{- if .Values.prometheusScrape }} + prometheus.io/scrape: '{{ .Values.prometheusScrape }}' + {{- end }} + {{- if .Values.service.annotations }} + {{- toYaml .Values.service.annotations | nindent 4 }} + {{- end }} +spec: + type: "{{ .Values.service.type }}" + ports: + - name: "http" + protocol: TCP + port: {{ .Values.service.port | default 8080}} + {{- if .Values.service.nodePort }} + nodePort: {{ .Values.service.nodePort }} + {{- end }} + targetPort: {{ .Values.service.port | default 8080}} + {{ if .Values.selfMonitor.enabled }} + - name: "metrics" + protocol: TCP + port: {{ .Values.selfMonitor.telemetryPort | default 8081 }} + targetPort: {{ .Values.selfMonitor.telemetryPort | default 8081 }} + {{- if .Values.selfMonitor.telemetryNodePort }} + nodePort: {{ .Values.selfMonitor.telemetryNodePort }} + {{- end }} + {{ end }} +{{- if .Values.service.loadBalancerIP }} + loadBalancerIP: "{{ .Values.service.loadBalancerIP }}" +{{- end }} +{{- if .Values.service.loadBalancerSourceRanges }} + loadBalancerSourceRanges: + {{- range $cidr := .Values.service.loadBalancerSourceRanges }} + - {{ $cidr }} + {{- end }} +{{- end }} +{{- if .Values.autosharding.enabled }} + clusterIP: None +{{- else if .Values.service.clusterIP }} + clusterIP: "{{ .Values.service.clusterIP }}" +{{- end }} + selector: + {{- include "kube-state-metrics.selectorLabels" . | indent 4 }} diff --git a/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/kube-state-metrics/templates/serviceaccount.yaml b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/kube-state-metrics/templates/serviceaccount.yaml new file mode 100644 index 0000000000..38a93b31d1 --- /dev/null +++ b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/kube-state-metrics/templates/serviceaccount.yaml @@ -0,0 +1,17 @@ +{{- if .Values.serviceAccount.create -}} +apiVersion: v1 +kind: ServiceAccount +metadata: + labels: + {{- include "kube-state-metrics.labels" . | indent 4 }} + name: {{ template "kube-state-metrics.serviceAccountName" . }} + namespace: {{ template "kube-state-metrics.namespace" . }} +{{- if .Values.serviceAccount.annotations }} + annotations: +{{ toYaml .Values.serviceAccount.annotations | indent 4 }} +{{- end }} +{{- if or .Values.serviceAccount.imagePullSecrets .Values.global.imagePullSecrets }} +imagePullSecrets: + {{- include "kube-state-metrics.imagePullSecrets" (dict "Values" .Values "imagePullSecrets" .Values.serviceAccount.imagePullSecrets) | indent 2 }} +{{- end }} +{{- end -}} diff --git a/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/kube-state-metrics/templates/servicemonitor.yaml b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/kube-state-metrics/templates/servicemonitor.yaml new file mode 100644 index 0000000000..01ec44e067 --- /dev/null +++ b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/kube-state-metrics/templates/servicemonitor.yaml @@ -0,0 +1,126 @@ +{{- if .Values.prometheus.monitor.enabled }} +apiVersion: monitoring.coreos.com/v1 +kind: ServiceMonitor +metadata: + name: {{ template "kube-state-metrics.fullname" . }} + namespace: {{ template "kube-state-metrics.namespace" . }} + labels: + {{- include "kube-state-metrics.labels" . | indent 4 }} + {{- with .Values.prometheus.monitor.additionalLabels }} + {{- tpl (toYaml . | nindent 4) $ }} + {{- end }} + {{- with .Values.prometheus.monitor.annotations }} + annotations: + {{- tpl (toYaml . | nindent 4) $ }} + {{- end }} +spec: + jobLabel: {{ default "app.kubernetes.io/name" .Values.prometheus.monitor.jobLabel }} + {{- with .Values.prometheus.monitor.targetLabels }} + targetLabels: + {{- toYaml . | trim | nindent 4 }} + {{- end }} + {{- with .Values.prometheus.monitor.podTargetLabels }} + podTargetLabels: + {{- toYaml . | trim | nindent 4 }} + {{- end }} + {{- include "servicemonitor.scrapeLimits" .Values.prometheus.monitor | indent 2 }} + {{- if .Values.prometheus.monitor.namespaceSelector }} + namespaceSelector: + matchNames: + {{- with .Values.prometheus.monitor.namespaceSelector }} + {{- toYaml . | nindent 6 }} + {{- end }} + {{- end }} + selector: + matchLabels: + {{- with .Values.prometheus.monitor.selectorOverride }} + {{- toYaml . | nindent 6 }} + {{- else }} + {{- include "kube-state-metrics.selectorLabels" . | indent 6 }} + {{- end }} + endpoints: + - port: http + {{- if .Values.prometheus.monitor.interval }} + interval: {{ .Values.prometheus.monitor.interval }} + {{- end }} + {{- if .Values.prometheus.monitor.scrapeTimeout }} + scrapeTimeout: {{ .Values.prometheus.monitor.scrapeTimeout }} + {{- end }} + {{- if .Values.prometheus.monitor.proxyUrl }} + proxyUrl: {{ .Values.prometheus.monitor.proxyUrl}} + {{- end }} + {{- if .Values.prometheus.monitor.enableHttp2 }} + enableHttp2: {{ .Values.prometheus.monitor.enableHttp2}} + {{- end }} + {{- if .Values.prometheus.monitor.honorLabels }} + honorLabels: true + {{- end }} + metricRelabelings: + {{- if .Values.prometheus.monitor.metricRelabelings }} + {{- toYaml .Values.prometheus.monitor.metricRelabelings | nindent 6 }} + {{- end }} + {{ if .Values.global.cattle.clusterId }} + - sourceLabels: [__address__] + targetLabel: cluster_id + replacement: {{ .Values.global.cattle.clusterId }} + {{- end }} + {{ if .Values.global.cattle.clusterName }} + - sourceLabels: [__address__] + targetLabel: cluster_name + replacement: {{ .Values.global.cattle.clusterName }} + {{- end }} + {{- if .Values.prometheus.monitor.relabelings }} + relabelings: + {{- toYaml .Values.prometheus.monitor.relabelings | nindent 8 }} + {{- end }} + {{- if .Values.prometheus.monitor.scheme }} + scheme: {{ .Values.prometheus.monitor.scheme }} + {{- end }} + {{- if .Values.prometheus.monitor.tlsConfig }} + tlsConfig: + {{- toYaml .Values.prometheus.monitor.tlsConfig | nindent 8 }} + {{- end }} + {{- if .Values.prometheus.monitor.bearerTokenFile }} + bearerTokenFile: {{ .Values.prometheus.monitor.bearerTokenFile }} + {{- end }} + {{- with .Values.prometheus.monitor.bearerTokenSecret }} + bearerTokenSecret: + {{- toYaml . | nindent 8 }} + {{- end }} + {{- if .Values.selfMonitor.enabled }} + - port: metrics + {{- if .Values.prometheus.monitor.interval }} + interval: {{ .Values.prometheus.monitor.interval }} + {{- end }} + {{- if .Values.prometheus.monitor.scrapeTimeout }} + scrapeTimeout: {{ .Values.prometheus.monitor.scrapeTimeout }} + {{- end }} + {{- if .Values.prometheus.monitor.proxyUrl }} + proxyUrl: {{ .Values.prometheus.monitor.proxyUrl}} + {{- end }} + {{- if .Values.prometheus.monitor.enableHttp2 }} + enableHttp2: {{ .Values.prometheus.monitor.enableHttp2}} + {{- end }} + {{- if .Values.prometheus.monitor.honorLabels }} + honorLabels: true + {{- end }} + {{- if .Values.prometheus.monitor.relabelings }} + relabelings: + {{- toYaml .Values.prometheus.monitor.relabelings | nindent 8 }} + {{- end }} + {{- if .Values.prometheus.monitor.scheme }} + scheme: {{ .Values.prometheus.monitor.scheme }} + {{- end }} + {{- if .Values.prometheus.monitor.tlsConfig }} + tlsConfig: + {{- toYaml .Values.prometheus.monitor.tlsConfig | nindent 8 }} + {{- end }} + {{- if .Values.prometheus.monitor.bearerTokenFile }} + bearerTokenFile: {{ .Values.prometheus.monitor.bearerTokenFile }} + {{- end }} + {{- with .Values.prometheus.monitor.bearerTokenSecret }} + bearerTokenSecret: + {{- toYaml . | nindent 8 }} + {{- end }} + {{- end }} +{{- end }} diff --git a/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/kube-state-metrics/templates/stsdiscovery-role.yaml b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/kube-state-metrics/templates/stsdiscovery-role.yaml new file mode 100644 index 0000000000..489de147c1 --- /dev/null +++ b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/kube-state-metrics/templates/stsdiscovery-role.yaml @@ -0,0 +1,26 @@ +{{- if and .Values.autosharding.enabled .Values.rbac.create -}} +apiVersion: rbac.authorization.k8s.io/v1 +kind: Role +metadata: + name: stsdiscovery-{{ template "kube-state-metrics.fullname" . }} + namespace: {{ template "kube-state-metrics.namespace" . }} + labels: + {{- include "kube-state-metrics.labels" . | indent 4 }} +rules: +- apiGroups: + - "" + resources: + - pods + verbs: + - get +- apiGroups: + - apps + resourceNames: + - {{ template "kube-state-metrics.fullname" . }} + resources: + - statefulsets + verbs: + - get + - list + - watch +{{- end }} diff --git a/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/kube-state-metrics/templates/stsdiscovery-rolebinding.yaml b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/kube-state-metrics/templates/stsdiscovery-rolebinding.yaml new file mode 100644 index 0000000000..73b37a4f64 --- /dev/null +++ b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/kube-state-metrics/templates/stsdiscovery-rolebinding.yaml @@ -0,0 +1,17 @@ +{{- if and .Values.autosharding.enabled .Values.rbac.create -}} +apiVersion: rbac.authorization.k8s.io/v1 +kind: RoleBinding +metadata: + name: stsdiscovery-{{ template "kube-state-metrics.fullname" . }} + namespace: {{ template "kube-state-metrics.namespace" . }} + labels: + {{- include "kube-state-metrics.labels" . | indent 4 }} +roleRef: + apiGroup: rbac.authorization.k8s.io + kind: Role + name: stsdiscovery-{{ template "kube-state-metrics.fullname" . }} +subjects: + - kind: ServiceAccount + name: {{ template "kube-state-metrics.serviceAccountName" . }} + namespace: {{ template "kube-state-metrics.namespace" . }} +{{- end }} diff --git a/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/kube-state-metrics/templates/verticalpodautoscaler.yaml b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/kube-state-metrics/templates/verticalpodautoscaler.yaml new file mode 100644 index 0000000000..f46305b517 --- /dev/null +++ b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/kube-state-metrics/templates/verticalpodautoscaler.yaml @@ -0,0 +1,44 @@ +{{- if and (.Capabilities.APIVersions.Has "autoscaling.k8s.io/v1") (.Values.verticalPodAutoscaler.enabled) }} +apiVersion: autoscaling.k8s.io/v1 +kind: VerticalPodAutoscaler +metadata: + name: {{ template "kube-state-metrics.fullname" . }} + namespace: {{ template "kube-state-metrics.namespace" . }} + labels: + {{- include "kube-state-metrics.labels" . | indent 4 }} +spec: + {{- with .Values.verticalPodAutoscaler.recommenders }} + recommenders: + {{- toYaml . | nindent 4 }} + {{- end }} + resourcePolicy: + containerPolicies: + - containerName: {{ template "kube-state-metrics.name" . }} + {{- with .Values.verticalPodAutoscaler.controlledResources }} + controlledResources: + {{- toYaml . | nindent 8 }} + {{- end }} + {{- if .Values.verticalPodAutoscaler.controlledValues }} + controlledValues: {{ .Values.verticalPodAutoscaler.controlledValues }} + {{- end }} + {{- if .Values.verticalPodAutoscaler.maxAllowed }} + maxAllowed: + {{ toYaml .Values.verticalPodAutoscaler.maxAllowed | nindent 8 }} + {{- end }} + {{- if .Values.verticalPodAutoscaler.minAllowed }} + minAllowed: + {{ toYaml .Values.verticalPodAutoscaler.minAllowed | nindent 8 }} + {{- end }} + targetRef: + apiVersion: apps/v1 + {{- if .Values.autosharding.enabled }} + kind: StatefulSet + {{- else }} + kind: Deployment + {{- end }} + name: {{ template "kube-state-metrics.fullname" . }} + {{- with .Values.verticalPodAutoscaler.updatePolicy }} + updatePolicy: + {{- toYaml . | nindent 4 }} + {{- end }} +{{- end }} diff --git a/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/kube-state-metrics/values.yaml b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/kube-state-metrics/values.yaml new file mode 100644 index 0000000000..bc8ee28fda --- /dev/null +++ b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/kube-state-metrics/values.yaml @@ -0,0 +1,491 @@ +# Default values for kube-state-metrics. +prometheusScrape: true +image: + registry: docker.io + repository: rancher/mirrored-kube-state-metrics-kube-state-metrics + tag: v2.10.1 + sha: "" + pullPolicy: IfNotPresent + +imagePullSecrets: [] +# - name: "image-pull-secret" + +global: + cattle: + psp: + enabled: false + systemDefaultRegistry: "" + + # To help compatibility with other charts which use global.imagePullSecrets. + # Allow either an array of {name: pullSecret} maps (k8s-style), or an array of strings (more common helm-style). + # global: + # imagePullSecrets: + # - name: pullSecret1 + # - name: pullSecret2 + # or + # global: + # imagePullSecrets: + # - pullSecret1 + # - pullSecret2 + imagePullSecrets: [] + # + # Allow parent charts to override registry hostname + imageRegistry: "" + +# If set to true, this will deploy kube-state-metrics as a StatefulSet and the data +# will be automatically sharded across <.Values.replicas> pods using the built-in +# autodiscovery feature: https://github.com/kubernetes/kube-state-metrics#automated-sharding +# This is an experimental feature and there are no stability guarantees. +autosharding: + enabled: false + +replicas: 1 + +# Change the deployment strategy when autosharding is disabled. +# ref: https://kubernetes.io/docs/concepts/workloads/controllers/deployment/#strategy +# The default is "RollingUpdate" as per Kubernetes defaults. +# During a release, 'RollingUpdate' can lead to two running instances for a short period of time while 'Recreate' can create a small gap in data. +# updateStrategy: Recreate + +# Number of old history to retain to allow rollback +# Default Kubernetes value is set to 10 +revisionHistoryLimit: 10 + +# List of additional cli arguments to configure kube-state-metrics +# for example: --enable-gzip-encoding, --log-file, etc. +# all the possible args can be found here: https://github.com/kubernetes/kube-state-metrics/blob/master/docs/cli-arguments.md +extraArgs: [] + +service: + port: 8080 + # Default to clusterIP for backward compatibility + type: ClusterIP + nodePort: 0 + loadBalancerIP: "" + # Only allow access to the loadBalancerIP from these IPs + loadBalancerSourceRanges: [] + clusterIP: "" + annotations: {} + +## Additional labels to add to all resources +customLabels: {} + # app: kube-state-metrics + +## Override selector labels +selectorOverride: {} + +## set to true to add the release label so scraping of the servicemonitor with kube-prometheus-stack works out of the box +releaseLabel: false + +hostNetwork: false + +rbac: + # If true, create & use RBAC resources + create: true + + # Set to a rolename to use existing role - skipping role creating - but still doing serviceaccount and rolebinding to it, rolename set here. + # useExistingRole: your-existing-role + + # If set to false - Run without Cluteradmin privs needed - ONLY works if namespace is also set (if useExistingRole is set this name is used as ClusterRole or Role to bind to) + useClusterRole: true + + # Add permissions for CustomResources' apiGroups in Role/ClusterRole. Should be used in conjunction with Custom Resource State Metrics configuration + # Example: + # - apiGroups: ["monitoring.coreos.com"] + # resources: ["prometheuses"] + # verbs: ["list", "watch"] + extraRules: [] + +# Configure kube-rbac-proxy. When enabled, creates one kube-rbac-proxy container per exposed HTTP endpoint (metrics and telemetry if enabled). +# The requests are served through the same service but requests are then HTTPS. +kubeRBACProxy: + enabled: false + image: + repository: rancher/mirrored-kube-rbac-proxy + tag: v0.14.0 + sha: "" + pullPolicy: IfNotPresent + + # List of additional cli arguments to configure kube-rbac-prxy + # for example: --tls-cipher-suites, --log-file, etc. + # all the possible args can be found here: https://github.com/brancz/kube-rbac-proxy#usage + extraArgs: [] + + ## Specify security settings for a Container + ## Allows overrides and additional options compared to (Pod) securityContext + ## Ref: https://kubernetes.io/docs/tasks/configure-pod-container/security-context/#set-the-security-context-for-a-container + containerSecurityContext: + readOnlyRootFilesystem: true + allowPrivilegeEscalation: false + capabilities: + drop: + - ALL + + resources: {} + # We usually recommend not to specify default resources and to leave this as a conscious + # choice for the user. This also increases chances charts run on environments with little + # resources, such as Minikube. If you do want to specify resources, uncomment the following + # lines, adjust them as necessary, and remove the curly braces after 'resources:'. + # limits: + # cpu: 100m + # memory: 64Mi + # requests: + # cpu: 10m + # memory: 32Mi + + ## volumeMounts enables mounting custom volumes in rbac-proxy containers + ## Useful for TLS certificates and keys + volumeMounts: [] + # - mountPath: /etc/tls + # name: kube-rbac-proxy-tls + # readOnly: true + +serviceAccount: + # Specifies whether a ServiceAccount should be created, require rbac true + create: true + # The name of the ServiceAccount to use. + # If not set and create is true, a name is generated using the fullname template + name: + # Reference to one or more secrets to be used when pulling images + # ref: https://kubernetes.io/docs/tasks/configure-pod-container/pull-image-private-registry/ + imagePullSecrets: [] + # ServiceAccount annotations. + # Use case: AWS EKS IAM roles for service accounts + # ref: https://docs.aws.amazon.com/eks/latest/userguide/specify-service-account-role.html + annotations: {} + +prometheus: + monitor: + enabled: false + annotations: {} + additionalLabels: {} + namespace: "" + namespaceSelector: [] + jobLabel: "" + targetLabels: [] + podTargetLabels: [] + interval: "" + ## SampleLimit defines per-scrape limit on number of scraped samples that will be accepted. + ## + sampleLimit: 0 + + ## TargetLimit defines a limit on the number of scraped targets that will be accepted. + ## + targetLimit: 0 + + ## Per-scrape limit on number of labels that will be accepted for a sample. Only valid in Prometheus versions 2.27.0 and newer. + ## + labelLimit: 0 + + ## Per-scrape limit on length of labels name that will be accepted for a sample. Only valid in Prometheus versions 2.27.0 and newer. + ## + labelNameLengthLimit: 0 + + ## Per-scrape limit on length of labels value that will be accepted for a sample. Only valid in Prometheus versions 2.27.0 and newer. + ## + labelValueLengthLimit: 0 + scrapeTimeout: "" + proxyUrl: "" + ## Whether to enable HTTP2 for servicemonitor + # enableHttp2: false + selectorOverride: {} + honorLabels: false + metricRelabelings: [] + relabelings: [] + scheme: "" + ## File to read bearer token for scraping targets + bearerTokenFile: "" + ## Secret to mount to read bearer token for scraping targets. The secret needs + ## to be in the same namespace as the service monitor and accessible by the + ## Prometheus Operator + bearerTokenSecret: {} + # name: secret-name + # key: key-name + tlsConfig: {} + +## Specify if a Pod Security Policy for kube-state-metrics must be created +## Ref: https://kubernetes.io/docs/concepts/policy/pod-security-policy/ +## +podSecurityPolicy: + annotations: {} + ## Specify pod annotations + ## Ref: https://kubernetes.io/docs/concepts/policy/pod-security-policy/#apparmor + ## Ref: https://kubernetes.io/docs/concepts/policy/pod-security-policy/#seccomp + ## Ref: https://kubernetes.io/docs/concepts/policy/pod-security-policy/#sysctl + ## + # seccomp.security.alpha.kubernetes.io/allowedProfileNames: '*' + # seccomp.security.alpha.kubernetes.io/defaultProfileName: 'docker/default' + # apparmor.security.beta.kubernetes.io/defaultProfileName: 'runtime/default' + + additionalVolumes: [] + +## Configure network policy for kube-state-metrics +networkPolicy: + enabled: false + # networkPolicy.flavor -- Flavor of the network policy to use. + # Can be: + # * kubernetes for networking.k8s.io/v1/NetworkPolicy + # * cilium for cilium.io/v2/CiliumNetworkPolicy + flavor: kubernetes + + ## Configure the cilium network policy kube-apiserver selector + # cilium: + # kubeApiServerSelector: + # - toEntities: + # - kube-apiserver + + # egress: + # - {} + # ingress: + # - {} + # podSelector: + # matchLabels: + # app.kubernetes.io/name: kube-state-metrics + +securityContext: + enabled: true + runAsGroup: 65534 + runAsUser: 65534 + fsGroup: 65534 + runAsNonRoot: true + seccompProfile: + type: RuntimeDefault + +## Specify security settings for a Container +## Allows overrides and additional options compared to (Pod) securityContext +## Ref: https://kubernetes.io/docs/tasks/configure-pod-container/security-context/#set-the-security-context-for-a-container +containerSecurityContext: + readOnlyRootFilesystem: true + allowPrivilegeEscalation: false + capabilities: + drop: + - ALL + +## Node labels for pod assignment +## Ref: https://kubernetes.io/docs/user-guide/node-selection/ +nodeSelector: {} + +## Affinity settings for pod assignment +## Ref: https://kubernetes.io/docs/concepts/configuration/assign-pod-node/ +affinity: {} + +## Tolerations for pod assignment +## Ref: https://kubernetes.io/docs/concepts/configuration/taint-and-toleration/ +tolerations: [] + +## Topology spread constraints for pod assignment +## Ref: https://kubernetes.io/docs/concepts/workloads/pods/pod-topology-spread-constraints/ +topologySpreadConstraints: [] + +# Annotations to be added to the deployment/statefulset +annotations: {} + +# Annotations to be added to the pod +podAnnotations: {} + +## Assign a PriorityClassName to pods if set +# priorityClassName: "" + +# Ref: https://kubernetes.io/docs/tasks/run-application/configure-pdb/ +podDisruptionBudget: {} + +# Comma-separated list of metrics to be exposed. +# This list comprises of exact metric names and/or regex patterns. +# The allowlist and denylist are mutually exclusive. +metricAllowlist: [] + +# Comma-separated list of metrics not to be enabled. +# This list comprises of exact metric names and/or regex patterns. +# The allowlist and denylist are mutually exclusive. +metricDenylist: [] + +# Comma-separated list of additional Kubernetes label keys that will be used in the resource's +# labels metric. By default the metric contains only name and namespace labels. +# To include additional labels, provide a list of resource names in their plural form and Kubernetes +# label keys you would like to allow for them (Example: '=namespaces=[k8s-label-1,k8s-label-n,...],pods=[app],...)'. +# A single '*' can be provided per resource instead to allow any labels, but that has +# severe performance implications (Example: '=pods=[*]'). +metricLabelsAllowlist: [] + # - namespaces=[k8s-label-1,k8s-label-n] + +# Comma-separated list of Kubernetes annotations keys that will be used in the resource' +# labels metric. By default the metric contains only name and namespace labels. +# To include additional annotations provide a list of resource names in their plural form and Kubernetes +# annotation keys you would like to allow for them (Example: '=namespaces=[kubernetes.io/team,...],pods=[kubernetes.io/team],...)'. +# A single '*' can be provided per resource instead to allow any annotations, but that has +# severe performance implications (Example: '=pods=[*]'). +metricAnnotationsAllowList: [] + # - pods=[k8s-annotation-1,k8s-annotation-n] + +# Available collectors for kube-state-metrics. +# By default, all available resources are enabled, comment out to disable. +collectors: + - certificatesigningrequests + - configmaps + - cronjobs + - daemonsets + - deployments + - endpoints + - horizontalpodautoscalers + - ingresses + - jobs + - leases + - limitranges + - mutatingwebhookconfigurations + - namespaces + - networkpolicies + - nodes + - persistentvolumeclaims + - persistentvolumes + - poddisruptionbudgets + - pods + - replicasets + - replicationcontrollers + - resourcequotas + - secrets + - services + - statefulsets + - storageclasses + - validatingwebhookconfigurations + - volumeattachments + +# Enabling kubeconfig will pass the --kubeconfig argument to the container +kubeconfig: + enabled: false + # base64 encoded kube-config file + secret: + +# Enabling support for customResourceState, will create a configMap including your config that will be read from kube-state-metrics +customResourceState: + enabled: false + # Add (Cluster)Role permissions to list/watch the customResources defined in the config to rbac.extraRules + config: {} + +# Enable only the release namespace for collecting resources. By default all namespaces are collected. +# If releaseNamespace and namespaces are both set a merged list will be collected. +releaseNamespace: false + +# Comma-separated list(string) or yaml list of namespaces to be enabled for collecting resources. By default all namespaces are collected. +namespaces: "" + +# Comma-separated list of namespaces not to be enabled. If namespaces and namespaces-denylist are both set, +# only namespaces that are excluded in namespaces-denylist will be used. +namespacesDenylist: "" + +## Override the deployment namespace +## +namespaceOverride: "" + +resources: {} + # We usually recommend not to specify default resources and to leave this as a conscious + # choice for the user. This also increases chances charts run on environments with little + # resources, such as Minikube. If you do want to specify resources, uncomment the following + # lines, adjust them as necessary, and remove the curly braces after 'resources:'. + # limits: + # cpu: 100m + # memory: 64Mi + # requests: + # cpu: 10m + # memory: 32Mi + +## Provide a k8s version to define apiGroups for podSecurityPolicy Cluster Role. +## For example: kubeTargetVersionOverride: 1.14.9 +## +kubeTargetVersionOverride: "" + +# Enable self metrics configuration for service and Service Monitor +# Default values for telemetry configuration can be overridden +# If you set telemetryNodePort, you must also set service.type to NodePort +selfMonitor: + enabled: false + # telemetryHost: 0.0.0.0 + # telemetryPort: 8081 + # telemetryNodePort: 0 + +# Enable vertical pod autoscaler support for kube-state-metrics +verticalPodAutoscaler: + enabled: false + + # Recommender responsible for generating recommendation for the object. + # List should be empty (then the default recommender will generate the recommendation) + # or contain exactly one recommender. + # recommenders: [] + # - name: custom-recommender-performance + + # List of resources that the vertical pod autoscaler can control. Defaults to cpu and memory + controlledResources: [] + # Specifies which resource values should be controlled: RequestsOnly or RequestsAndLimits. + # controlledValues: RequestsAndLimits + + # Define the max allowed resources for the pod + maxAllowed: {} + # cpu: 200m + # memory: 100Mi + # Define the min allowed resources for the pod + minAllowed: {} + # cpu: 200m + # memory: 100Mi + + # updatePolicy: + # Specifies minimal number of replicas which need to be alive for VPA Updater to attempt pod eviction + # minReplicas: 1 + # Specifies whether recommended updates are applied when a Pod is started and whether recommended updates + # are applied during the life of a Pod. Possible values are "Off", "Initial", "Recreate", and "Auto". + # updateMode: Auto + +# volumeMounts are used to add custom volume mounts to deployment. +# See example below +volumeMounts: [] +# - mountPath: /etc/config +# name: config-volume + +# volumes are used to add custom volumes to deployment +# See example below +volumes: [] +# - configMap: +# name: cm-for-volume +# name: config-volume + +# Extra manifests to deploy as an array +extraManifests: [] + # - apiVersion: v1 + # kind: ConfigMap + # metadata: + # labels: + # name: prometheus-extra + # data: + # extra-data: "value" + +## Containers allows injecting additional containers. +containers: [] + # - name: crd-init + # image: kiwigrid/k8s-sidecar:latest + +## InitContainers allows injecting additional initContainers. +initContainers: [] + # - name: crd-sidecar + # image: kiwigrid/k8s-sidecar:latest + +## Liveness probe +## +livenessProbe: + failureThreshold: 3 + httpGet: + httpHeaders: [] + scheme: http + initialDelaySeconds: 5 + periodSeconds: 10 + successThreshold: 1 + timeoutSeconds: 5 + +## Readiness probe +## +readinessProbe: + failureThreshold: 3 + httpGet: + httpHeaders: [] + scheme: http + initialDelaySeconds: 5 + periodSeconds: 10 + successThreshold: 1 + timeoutSeconds: 5 diff --git a/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/kubeAdmControllerManager/.helmignore b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/kubeAdmControllerManager/.helmignore new file mode 100644 index 0000000000..0e8a0eb36f --- /dev/null +++ b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/kubeAdmControllerManager/.helmignore @@ -0,0 +1,23 @@ +# Patterns to ignore when building packages. +# This supports shell glob matching, relative path matching, and +# negation (prefixed with !). Only one pattern per line. +.DS_Store +# Common VCS dirs +.git/ +.gitignore +.bzr/ +.bzrignore +.hg/ +.hgignore +.svn/ +# Common backup files +*.swp +*.bak +*.tmp +*.orig +*~ +# Various IDEs +.project +.idea/ +*.tmproj +.vscode/ diff --git a/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/kubeAdmControllerManager/Chart.yaml b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/kubeAdmControllerManager/Chart.yaml new file mode 100644 index 0000000000..ad9fba247a --- /dev/null +++ b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/kubeAdmControllerManager/Chart.yaml @@ -0,0 +1,15 @@ +annotations: + catalog.cattle.io/hidden: "true" + catalog.cattle.io/kube-version: '>= 1.26.0-0 < 1.31.0-0' + catalog.cattle.io/os: linux + catalog.rancher.io/certified: rancher + catalog.rancher.io/namespace: cattle-monitoring-system + catalog.rancher.io/release-name: rancher-pushprox +apiVersion: v1 +appVersion: 0.1.0 +description: Sets up a deployment of the PushProx proxy and a DaemonSet of PushProx + clients. +kubeVersion: '>=1.26.0-0' +name: kubeAdmControllerManager +type: application +version: 0.2.0 diff --git a/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/kubeAdmControllerManager/README.md b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/kubeAdmControllerManager/README.md new file mode 100644 index 0000000000..345002f48a --- /dev/null +++ b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/kubeAdmControllerManager/README.md @@ -0,0 +1,90 @@ +# rancher-pushprox + +A Rancher chart based on Rancher [PushProx](https://github.com/rancher/PushProx) that sets up a Deployment of a PushProx proxy and a DaemonSet of PushProx clients on a Kubernetes cluster. + +Installs [rancher-pushprox](https://github.com/rancher/charts/tree/gh-pages/packages/rancher-pushprox) to create PushProx clients that can access their host's network and register with a PushProx proxy. A [Prometheus Operator](https://github.com/coreos/prometheus-operator) ServiceMonitor CR is also included that is configured to scrape the metrics from each of the clients through the proxy. + +Using an instance of this chart is suitable for the following scenarios: +- You need to scrape metrics from a port that should not be accessible outside of the host (e.g. scraping `etcd` metrics in a hardened cluster) +- You need to scrape metrics on a host that are not exposed outside of 127.0.0.1 (e.g. scraping `kube-proxy` metrics) +- You need to scrape metrics through HTTPS using certs hosted directly on `hostPath` +- You need to scrape metrics from Kubernetes components that require authorization via a service account (e.g. permissions to make request to `/metrics`) +- You need to scrape metrics without access to cacerts (i.e. enable `insecureSkipVerify`) + +The clients and proxy are created based on a Rancher fork of the [prometheus-community/PushProx](https://github.com/prometheus-community/PushProx) project. + +## Upgrading to Kubernetes v1.25+ + +Starting in Kubernetes v1.25, [Pod Security Policies](https://kubernetes.io/docs/concepts/security/pod-security-policy/) have been removed from the Kubernetes API. + +As a result, **before upgrading to Kubernetes v1.25** (or on a fresh install in a Kubernetes v1.25+ cluster), users are expected to perform an in-place upgrade of this chart with `global.cattle.psp.enabled` set to `false` if it has been previously set to `true`. +​ +> **Note:** +> In this chart release, any previous field that was associated with any PSP resources have been removed in favor of a single global field: `global.cattle.psp.enabled`. + +> **Note:** +> If you upgrade your cluster to Kubernetes v1.25+ before removing PSPs via a `helm upgrade` (even if you manually clean up resources), **it will leave the Helm release in a broken state within the cluster such that further Helm operations will not work (`helm uninstall`, `helm upgrade`, etc.).** +> +> If your charts get stuck in this state, please consult the Rancher docs on how to clean up your Helm release secrets. + +Upon setting `global.cattle.psp.enabled` to false, the chart will remove any PSP resources deployed on its behalf from the cluster. This is the default setting for this chart. + +As a replacement for PSPs, [Pod Security Admission](https://kubernetes.io/docs/concepts/security/pod-security-admission/) should be used. Please consult the Rancher docs for more details on how to configure your chart release namespaces to work with the new Pod Security Admission and apply Pod Security Standards. + +## Configuration + +The following tables list the configurable parameters of the rancher-pushprox chart and their default values. + +### General + +#### Required +| Parameter | Description | Example | +| ----- | ----------- | ------ | +| `component` | The component that is being monitored | `kube-etcd` +| `metricsPort` | The port on the host that contains the metrics you want to scrape (e.g. `http://:/metrics`) | `2379` | +| `namespaceOverride` | The namespace to install the chart | `""` + +#### Optional +| Parameter | Description | Default | +| ----- | ----------- | ------ | +| `serviceMonitor.enabled` | Deploys a [Prometheus Operator](https://github.com/coreos/prometheus-operator/blob/master/Documentation/api.md#servicemonitor) ServiceMonitor CR that is configured to scrape metrics on the hosts that the clients are deployed on via the proxy. Also deploys a Service that points to all pods with the expected client name that exposes the `metricsPort` selected | `true` | +| `serviceMonitor.endpoints` | A list of endpoints that will be added to the ServiceMonitor based on the [Endpoint spec](https://github.com/prometheus-operator/prometheus-operator/blob/master/Documentation/api.md#endpoint) | `[{port: metrics}]` | +| `service.selector` | The selector that is used to populate the Service's Endpoints object. The chart will error out on rendering templating if `.Values.clients.enabled` is set alongside this field, since it is expected that this service should point to the PushProx Clients Daemonset / Deployment | `{}` | +| `clients.enabled` | Deploys a DaemonSet of clients that are each capable of scraping endpoints on the hostNetwork it is deployed on | `true` | +| `clients.port` | The port where the client will publish PushProx client-specific metrics. If deploying multiple clients onto the same node, the clients should not have conflicting ports | `9369` | +| `clients.proxyUrl` | Overrides the default proxyUrl setting of `http://pushprox-{{ .Values.component }}-proxy.{{ . Release.Namespace }}.svc.cluster.local:{{ .Values.proxy.port }}"` with the `proxyUrl` specified | `""` | +| `clients.useLocalhost` | Sets a flag on each client deployment to redirect scrapes directed to `HOST_IP` to `127.0.0.1` | `false` | +| `clients.https.enabled` | Enables scraping metrics via HTTPS using the provided TLS certs that exist on each host | `false` | +| `clients.https.forceHTTPSScheme` | Forces scraping metrics via HTTPS using the provided TLS certs that exist on each host | `false` | +| `clients.https.useServiceAccountCredentials` | If set to true, the client will create a service account with permissions to scrape `/metrics` endpoint of Kubernetes components. The client will use the service account token provided to make authorized scrape requests to the Kubernetes API | `false` | +| `clients.https.authenticationMethod.bearerTokenFile.enabled` | If set to true, the client will use service account credentials mounted at the configured path `clients.https.authenticationMethod.bearerTokenFile.bearerTokenFilePath`. This requires permissions to scrape `/metrics` endpoint of Kubernetes components. This method is deprecated by the prometheus operator and may be removed in a future release | `false` | +| `clients.https.authenticationMethod.bearerTokenFile.bearerTokenFilePath` | This is a volume mount on the pod with permissions to scrape `/metrics` endpoint of Kubernetes components | `"/var/run/secrets/kubernetes.io/serviceaccount/token"` | +| `clients.https.authenticationMethod.bearerTokenSecret.enabled` | If set to true, the client will use service account credentials to scrape `/metrics` endpoint of Kubernetes components. This method is deprecated by the prometheus operator and may be removed in a future release | `false` | +| `clients.https.authenticationMethod.authorization.enabled` | If set to true, the client will use service account credentials to scrape `/metrics` endpoint of Kubernetes components | `false` | +| `clients.https.authenticationMethod.authorization.type` | If set, the client will use this type of authorization in its client requests for metrics | `"bearer"` | +| `clients.https.authenticationMethod.authorization.credentials.key` | If set, the client will use this key in the secret created by `clients.https.useServiceAccountCredentials` for authorization in its client requests for metrics | `"token"` | +| `clients.https.authenticationMethod.authorization.credentials.optional` | If set to false, the client will fail if the key in the secret created by `clients.https.useServiceAccountCredentials` does not exist | `false` | +| `clients.https.insecureSkipVerify` | If set to true, the client will disable SSL security checks | `false` | +| `clients.https.certDir` | A `hostPath` where TLS certs can be found. This path is mounted as a volume on an `initContainer` which copies only the necessary files over to an EmptyDir volume used by each client. Required and only used if `clients.https.enabled` is set | `""` | +| `clients.https.certFile` | The path to the TLS cert file located within `clients.https.certDir`. Required and only used if `clients.https.enabled` is set | `""` | +| `clients.https.keyFile` | The path to the TLS key file located within `clients.https.certDir`. Required and only used if `clients.https.enabled` is set | `""` | +| `clients.https.caCertFile` | The path to the TLS cacert file located within `clients.https.certDir`. Required and only used if `clients.https.enabled` is set | `""` | +| `clients.https.seLinuxOptions` | seLinuxOptions to be passed into the container that copies certs. Should define a container with permissions to read the files in the certDir provided on the host. Required and only used if `clients.https.enabled` is set and `clients.https.certDir` is provided. | `""` | +| `clients.metrics.enabled` | Whether the client should publish PushProx client-specific metrics. | `false` | +| `clients.rbac.additionalRules` | Additional permissions to provide to the ServiceAccount bound to the client. This can be used to provide additional permissions for the client to scrape metrics from the k8s API. Only enabled if clients.https.enabled and clients.https.useServiceAccountCredentials are true | `[]` | +| `clients.deployment.enabled` | Deploys the client as a Deployment (generally used if the underlying hostNetwork Pod that is being scraped is managed by a Deployment) | `false` | +| `clients.deployment.replicas` | The number of pods the Deployment has, it should match the number of pod the hostNetwork Deployment has. Required and only used if `client.deployment.enable` is set | `0` | +| `clients.deployment.affinity` | The affinity rules that allocate the pod to the node in which the hostNetwork Deployment's pods run. Required and only used if `client.deployment.enable` is set | `{}` | +| `clients.resources` | Set resource limits and requests for the client container | `{}` | +| `clients.nodeSelector` | Select which nodes to deploy the clients on | `{}` | +| `clients.tolerations` | Specify tolerations for clients | `[]` | +| `proxy.enabled` | Deploys the proxy that each client will register with | `true` | +| `proxy.port` | The port exposed by the proxy that each client will register with to allow metrics to be scraped from the host | `8080` | +| `proxy.resources` | Set resource limits and requests for the proxy container | `{}` | +| `proxy.nodeSelector` | Select which nodes the proxy can be deployed on | `{}` | +| `proxy.tolerations` | Specify tolerations (if necessary) to allow the proxy to be deployed on the selected node | `[]` | +| `kubeVersionOverrides` | A list of Semver constraint strings (defined by https://github.com/Masterminds/semver) and values.yaml overrides. For each key in kubeVersionOverrides, this chart will check to see if the current Kubernetes cluster's version matches any of the semver constraints provided as keys on the map. On seeing a match, the default value for each values.yaml field overridden will be updated with the new value. If multiple matches are encountered (due to overlapping semver ranges), the matches will be applied in order. | `[]` + +*Tip: The filepaths set in `clients.https.File` can include wildcard characters*. + +See [rancher-monitoring](https://github.com/rancher/charts/tree/gh-pages/packages/rancher-monitoring) for examples of how this chart can be used. diff --git a/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/kubeAdmControllerManager/templates/_helpers.tpl b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/kubeAdmControllerManager/templates/_helpers.tpl new file mode 100644 index 0000000000..1ba5093944 --- /dev/null +++ b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/kubeAdmControllerManager/templates/_helpers.tpl @@ -0,0 +1,170 @@ +# Rancher + +{{- define "system_default_registry" -}} +{{- if .Values.global.cattle.systemDefaultRegistry -}} +{{- printf "%s/" .Values.global.cattle.systemDefaultRegistry -}} +{{- end -}} +{{- end -}} + +# Windows Support + +{{/* +Windows cluster will add default taint for linux nodes, +add below linux tolerations to workloads could be scheduled to those linux nodes +*/}} + +{{- define "linux-node-tolerations" -}} +- key: "cattle.io/os" + value: "linux" + effect: "NoSchedule" + operator: "Equal" +{{- end -}} + +{{- define "linux-node-selector" -}} +{{- if semverCompare "<1.14-0" .Capabilities.KubeVersion.GitVersion -}} +beta.kubernetes.io/os: linux +{{- else -}} +kubernetes.io/os: linux +{{- end -}} +{{- end -}} + +# General + +{{- define "applyKubeVersionOverrides" -}} +{{- $overrides := dict -}} +{{- range $override := .Values.kubeVersionOverrides -}} +{{- if semverCompare $override.constraint $.Capabilities.KubeVersion.Version -}} +{{- $_ := mergeOverwrite $overrides $override.values -}} +{{- end -}} +{{- end -}} +{{- $_ := mergeOverwrite .Values $overrides -}} +{{- end -}} + +{{- define "pushprox.namespace" -}} + {{- if .Values.namespaceOverride -}} + {{- .Values.namespaceOverride -}} + {{- else -}} + {{- .Release.Namespace -}} + {{- end -}} +{{- end -}} + +{{- define "pushProxy.commonLabels" -}} +release: {{ .Release.Name }} +component: {{ .Values.component | quote }} +provider: kubernetes +{{- end -}} + +{{- define "pushProxy.proxyUrl" -}} +{{- $_ := (required "Template requires either .Values.proxy.port or .Values.client.proxyUrl to set proxyUrl for client" (or .Values.clients.proxyUrl .Values.proxy.port)) -}} +{{- if .Values.clients.proxyUrl -}} +{{ printf "%s" .Values.clients.proxyUrl }} +{{- else -}} +{{ printf "http://%s.%s.svc:%d" (include "pushProxy.proxy.name" .) (include "pushprox.namespace" .) (int .Values.proxy.port) }} +{{- end -}}{{- end -}} + +# Client + +{{- define "pushProxy.client.name" -}} +{{- printf "pushprox-%s-client" (required ".Values.component is required" .Values.component) -}} +{{- end -}} + +{{- define "pushProxy.client.serviceAccountTokenName" -}} +{{- printf "pushprox-%s-client-service-account-token" (required ".Values.component is required" .Values.component) -}} +{{- end -}} + +{{- define "pushProxy.client.labels" -}} +k8s-app: {{ template "pushProxy.client.name" . }} +{{ template "pushProxy.commonLabels" . }} +{{- end -}} + +# Proxy + +{{- define "pushProxy.proxy.name" -}} +{{- printf "pushprox-%s-proxy" (required ".Values.component is required" .Values.component) -}} +{{- end -}} + +{{- define "pushProxy.proxy.labels" -}} +k8s-app: {{ template "pushProxy.proxy.name" . }} +{{ template "pushProxy.commonLabels" . }} +{{- end -}} + +# ServiceMonitor + +{{- define "pushprox.serviceMonitor.name" -}} +{{- printf "%s-%s" .Release.Name (required ".Values.component is required" .Values.component) -}} +{{- end -}} + +{{- define "pushProxy.serviceMonitor.labels" -}} +app: {{ template "pushprox.serviceMonitor.name" . }} +{{ template "pushProxy.commonLabels" . }} +{{- end -}} + +{{- define "pushProxy.serviceMonitor.endpoints" -}} +{{- $proxyURL := (include "pushProxy.proxyUrl" .) -}} +{{- $useHTTPS := .Values.clients.https.enabled -}} +{{- $setHTTPSScheme := .Values.clients.https.forceHTTPSScheme -}} +{{- $insecureSkipVerify := .Values.clients.https.insecureSkipVerify -}} +{{- $useServiceAccountCredentials := .Values.clients.https.useServiceAccountCredentials -}} +{{- $serviceAccountTokenName := (include "pushProxy.client.serviceAccountTokenName" . ) -}} +{{- $metricRelabelings := list }} +{{- $endpoints := .Values.serviceMonitor.endpoints }} +{{- if .Values.proxy.enabled }} +{{- $_ := set . "proxyUrl" $proxyURL }} +{{- end }} +{{- range $endpoints }} +{{- if $.Values.proxy.enabled }} +{{- $_ := set . "proxyUrl" $proxyURL }} +{{- end }} +{{- $clusterIdRelabel := dict }} +{{- $metricRelabelings := list }} +{{- if $.Values.global.cattle.clusterId }} +{{- $_ := set $clusterIdRelabel "action" "replace" }} +{{- $_ := set $clusterIdRelabel "sourceLabels" (list "__address__") }} +{{- $_ := set $clusterIdRelabel "targetLabel" "cluster_id" }} +{{- $_ := set $clusterIdRelabel "replacement" $.Values.global.cattle.clusterId }} +{{- $metricRelabelings = append $metricRelabelings $clusterIdRelabel }} +{{- end }} +{{- $clusterNameRelabel := dict }} +{{- if $.Values.global.cattle.clusterName }} +{{- $_ := set $clusterNameRelabel "action" "replace" }} +{{- $_ := set $clusterNameRelabel "sourceLabels" (list "__address__") }} +{{- $_ := set $clusterNameRelabel "targetLabel" "cluster_name" }} +{{- $_ := set $clusterNameRelabel "replacement" $.Values.global.cattle.clusterName }} +{{- $metricRelabelings = append $metricRelabelings $clusterNameRelabel }} +{{- end }} +{{- if not (empty $metricRelabelings) }} +{{- $_ := set . "metricRelabelings" ($metricRelabelings)}} +{{- end }} +{{- if $setHTTPSScheme -}} +{{- $_ := set . "scheme" "https" }} +{{- end -}} +{{- if $useHTTPS -}} +{{- if (hasKey . "params") }} +{{- $_ := set (get . "params") "_scheme" (list "https") }} +{{- else }} +{{- $_ := set . "params" (dict "_scheme" (list "https")) }} +{{- end }} +{{- end }} +{{- if (hasKey . "tlsConfig") }} +{{- $_ := set (get . "tlsConfig") "insecureSkipVerify" $insecureSkipVerify }} +{{- else }} +{{- $_ := set . "tlsConfig" (dict "insecureSkipVerify" $insecureSkipVerify) }} +{{- end }} +{{- if $.Values.clients.https.authenticationMethod.bearerTokenFile.enabled }} +{{- $_ := set . "bearerTokenFile" $.Values.clients.https.authenticationMethod.bearerTokenFile.bearerTokenFilePath }} +{{- end }} +{{- if $.Values.clients.https.authenticationMethod.bearerTokenSecret.enabled }} +{{- $_ := set . "bearerTokenSecret" $serviceAccountTokenName }} +{{- end }} +{{- if $.Values.clients.https.authenticationMethod.authorization.enabled }} +{{- if (hasKey . "authorization") }} +{{- $_ := set (get . "authorization") "type" $.Values.clients.https.authenticationMethod.authorization.type }} +{{- $_ := set (get . "authorization") "credentials" (dict "name" $serviceAccountTokenName "key" $.Values.clients.https.authenticationMethod.authorization.credentials.key "optional" $.Values.clients.https.authenticationMethod.authorization.credentials.optional) }} +{{- else }} +{{- $_ := set . "authorization" (dict "type" $.Values.clients.https.authenticationMethod.authorization.type) }} +{{- $_ := set . "authorization" (dict "credentials" (dict "name" $serviceAccountTokenName "key" $.Values.clients.https.authenticationMethod.authorization.credentials.key "optional" $.Values.clients.https.authenticationMethod.authorization.credentials.optional)) }} +{{- end }} +{{- end }} +{{- end }} +{{- toYaml $endpoints }} +{{- end -}} diff --git a/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/kubeAdmControllerManager/templates/pushprox-clients-rbac.yaml b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/kubeAdmControllerManager/templates/pushprox-clients-rbac.yaml new file mode 100644 index 0000000000..a8e27c3735 --- /dev/null +++ b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/kubeAdmControllerManager/templates/pushprox-clients-rbac.yaml @@ -0,0 +1,97 @@ +{{- template "applyKubeVersionOverrides" . -}} +{{- if .Values.clients }}{{- if .Values.clients.enabled }} +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRole +metadata: + name: {{ template "pushProxy.client.name" . }} + labels: {{ include "pushProxy.client.labels" . | nindent 4 }} +rules: +{{- if .Values.global.cattle.psp.enabled }} +- apiGroups: ['policy'] + resources: ['podsecuritypolicies'] + verbs: ['use'] + resourceNames: + - {{ template "pushProxy.client.name" . }} +{{- end }} +{{- if and .Values.clients.https.enabled .Values.clients.https.useServiceAccountCredentials }} +- nonResourceURLs: ["/metrics"] + verbs: ["get"] +{{- if .Values.clients.rbac.additionalRules }} +{{ toYaml .Values.clients.rbac.additionalRules }} +{{- end }} +{{- end }} +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRoleBinding +metadata: + name: {{ template "pushProxy.client.name" . }} + labels: {{ include "pushProxy.client.labels" . | nindent 4 }} +roleRef: + apiGroup: rbac.authorization.k8s.io + kind: ClusterRole + name: {{ template "pushProxy.client.name" . }} +subjects: + - kind: ServiceAccount + name: {{ template "pushProxy.client.name" . }} + namespace: {{ include "pushprox.namespace" . }} +--- +apiVersion: v1 +kind: ServiceAccount +metadata: + name: {{ template "pushProxy.client.name" . }} + namespace: {{ include "pushprox.namespace" . }} + labels: {{ include "pushProxy.client.labels" . | nindent 4 }} +--- +{{- if .Values.clients.https.useServiceAccountCredentials }} +apiVersion: v1 +kind: Secret +type: kubernetes.io/service-account-token +metadata: + name: {{ template "pushProxy.client.serviceAccountTokenName" . }} + namespace: {{ include "pushprox.namespace" . }} + labels: {{ include "pushProxy.client.labels" . | nindent 4 }} + annotations: + kubernetes.io/service-account.name: {{ template "pushProxy.client.name" . }} +{{- end }} +--- +{{- if .Values.global.cattle.psp.enabled }} +apiVersion: policy/v1beta1 +kind: PodSecurityPolicy +metadata: + name: {{ template "pushProxy.client.name" . }} + namespace: {{ include "pushprox.namespace" . }} + labels: {{ include "pushProxy.client.labels" . | nindent 4 }} +spec: + privileged: false + hostNetwork: true + hostIPC: false + hostPID: false + runAsUser: + rule: 'RunAsAny' + seLinux: + rule: 'RunAsAny' +{{- if and .Values.clients.https.enabled .Values.clients.https.certDir .Values.global.seLinux.enabled .Values.clients.https.seLinuxOptions }} + seLinuxOptions: {{ .Values.clients.https.seLinuxOptions | toYaml | nindent 6 }} +{{- end }} + supplementalGroups: + rule: 'MustRunAs' + ranges: + - min: 0 + max: 65535 + fsGroup: + rule: 'MustRunAs' + ranges: + - min: 0 + max: 65535 + readOnlyRootFilesystem: false + volumes: + - 'secret' +{{- if and .Values.clients.https.enabled .Values.clients.https.certDir }} + - 'emptyDir' + - 'hostPath' + allowedHostPaths: + - pathPrefix: {{ required "Need access to volume on host with the SSL cert files to use HTTPs" .Values.clients.https.certDir }} + readOnly: true +{{- end }} +{{- end }} +{{- end }}{{- end }} diff --git a/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/kubeAdmControllerManager/templates/pushprox-clients.yaml b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/kubeAdmControllerManager/templates/pushprox-clients.yaml new file mode 100644 index 0000000000..e8fcfb3883 --- /dev/null +++ b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/kubeAdmControllerManager/templates/pushprox-clients.yaml @@ -0,0 +1,157 @@ +{{- template "applyKubeVersionOverrides" . -}} +{{- if .Values.clients }}{{- if .Values.clients.enabled }} +apiVersion: apps/v1 +{{- if .Values.clients.deployment.enabled }} +kind: Deployment +{{- else }} +kind: DaemonSet +{{- end }} +metadata: + name: {{ template "pushProxy.client.name" . }} + namespace: {{ template "pushprox.namespace" . }} + labels: {{ include "pushProxy.client.labels" . | nindent 4 }} + pushprox-exporter: "client" +spec: + {{- if .Values.clients.deployment.enabled }} + replicas: {{ .Values.clients.deployment.replicas }} + {{- end }} + selector: + matchLabels: {{ include "pushProxy.client.labels" . | nindent 6 }} + template: + metadata: + labels: {{ include "pushProxy.client.labels" . | nindent 8 }} + spec: + {{- if .Values.clients.affinity }} + affinity: {{ toYaml .Values.clients.affinity | nindent 8 }} + {{- end }} + nodeSelector: {{ include "linux-node-selector" . | nindent 8 }} +{{- if .Values.clients.nodeSelector }} +{{ toYaml .Values.clients.nodeSelector | indent 8 }} +{{- end }} + tolerations: {{ include "linux-node-tolerations" . | nindent 8 }} +{{- if .Values.clients.tolerations }} +{{ toYaml .Values.clients.tolerations | indent 8 }} +{{- end }} + hostNetwork: true + dnsPolicy: ClusterFirstWithHostNet + serviceAccountName: {{ template "pushProxy.client.name" . }} + {{- if .Values.global.imagePullSecretName }} + imagePullSecrets: + - name: {{ .Values.global.imagePullSecretName }} + {{- end }} + containers: + - name: pushprox-client + image: {{ template "system_default_registry" . }}{{ .Values.clients.image.repository }}:{{ .Values.clients.image.tag }} + command: + {{- range .Values.clients.command }} + - {{ . | quote }} + {{- end }} + args: + - --fqdn=$(HOST_IP) + - --proxy-url=$(PROXY_URL) + {{- if .Values.clients.metrics.enabled }} + - --metrics-addr=$(PORT) + {{- end }} + - --allow-port={{ required "Need .Values.metricsPort to configure client to be allowed to scrape metrics at port" .Values.metricsPort}} + {{- if .Values.clients.useLocalhost }} + - --use-localhost + {{- end }} + {{- if .Values.clients.https.enabled }} + {{- if .Values.clients.https.insecureSkipVerify }} + - --insecure-skip-verify + {{- end }} + {{- if .Values.clients.https.useServiceAccountCredentials }} + - --token-path=/var/run/secrets/kubernetes.io/serviceaccount/token + {{- end }} + {{- if .Values.clients.https.certDir }} + - --tls.cert=/etc/ssl/push-proxy/push-proxy.pem + - --tls.key=/etc/ssl/push-proxy/push-proxy-key.pem + - --tls.cacert=/etc/ssl/push-proxy/push-proxy-ca-cert.pem + {{- end }} + {{- end }} + env: + - name: HOST_IP + valueFrom: + fieldRef: + fieldPath: status.hostIP + {{- if .Values.clients.metrics.enabled }} + - name: PORT + value: :{{ .Values.clients.port }} + {{- end }} + - name: PROXY_URL + value: {{ template "pushProxy.proxyUrl" . }} + securityContext: + runAsNonRoot: true + runAsUser: 1000 + {{- if and .Values.clients.https.enabled .Values.clients.https.certDir }} + volumeMounts: + - name: metrics-cert-dir + mountPath: /etc/ssl/push-proxy + {{- end }} + {{- if .Values.clients.resources }} + resources: {{ toYaml .Values.clients.resources | nindent 10 }} + {{- end }} + {{- if and .Values.clients.https.enabled .Values.clients.https.certDir }} + initContainers: + - name: copy-certs + image: {{ template "system_default_registry" . }}{{ .Values.clients.copyCertsImage.repository }}:{{ .Values.clients.copyCertsImage.tag }} + command: + - sh + - -c + - | + echo "Searching for files to copy within the source volume" + echo "cert: ${CERT_FILE_NAME}" + echo "key: ${KEY_FILE_NAME}" + echo "cacert: ${CACERT_FILE_NAME}" + + CERT_FILE_SOURCE=$(find /etc/source/ -type f -name "${CERT_FILE_NAME}" | sort -r | head -n 1) + KEY_FILE_SOURCE=$(find /etc/source/ -type f -name "${KEY_FILE_NAME}" | sort -r | head -n 1) + CACERT_FILE_SOURCE=$(find /etc/source/ -type f -name "${CACERT_FILE_NAME}" | sort -r | head -n 1) + + test -z ${CERT_FILE_SOURCE} && echo "Failed to find cert file" && exit 1 + test -z ${KEY_FILE_SOURCE} && echo "Failed to find key file" && exit 1 + test -z ${CACERT_FILE_SOURCE} && echo "Failed to find cacert file" && exit 1 + + echo "Copying cert file from $CERT_FILE_SOURCE to $CERT_FILE_TARGET" + cp $CERT_FILE_SOURCE $CERT_FILE_TARGET || exit 1 + chmod 444 $CERT_FILE_TARGET || exit 1 + + echo "Copying key file from $KEY_FILE_SOURCE to $KEY_FILE_TARGET" + cp $KEY_FILE_SOURCE $KEY_FILE_TARGET || exit 1 + chmod 444 $KEY_FILE_TARGET || exit 1 + + echo "Copying cacert file from $CACERT_FILE_SOURCE to $CACERT_FILE_TARGET" + cp $CACERT_FILE_SOURCE $CACERT_FILE_TARGET || exit 1 + chmod 444 $CACERT_FILE_TARGET || exit 1 + env: + - name: CERT_FILE_NAME + value: {{ required "Need a TLS cert file for scraping metrics endpoint over HTTPs" .Values.clients.https.certFile }} + - name: KEY_FILE_NAME + value: {{ required "Need a TLS key file for scraping metrics endpoint over HTTPs" .Values.clients.https.keyFile }} + - name: CACERT_FILE_NAME + value: {{ required "Need a TLS CA cert file for scraping metrics endpoint over HTTPs" .Values.clients.https.caCertFile }} + - name: CERT_FILE_TARGET + value: /etc/ssl/push-proxy/push-proxy.pem + - name: KEY_FILE_TARGET + value: /etc/ssl/push-proxy/push-proxy-key.pem + - name: CACERT_FILE_TARGET + value: /etc/ssl/push-proxy/push-proxy-ca-cert.pem + securityContext: + runAsNonRoot: false +{{- if and .Values.global.seLinux.enabled .Values.clients.https.seLinuxOptions }} + seLinuxOptions: {{ .Values.clients.https.seLinuxOptions | toYaml | nindent 12 }} +{{- end }} + volumeMounts: + - name: metrics-cert-dir-source + mountPath: /etc/source + readOnly: true + - name: metrics-cert-dir + mountPath: /etc/ssl/push-proxy + volumes: + - name: metrics-cert-dir-source + hostPath: + path: {{ required "Need access to volume on host with the SSL cert files to use HTTPs" .Values.clients.https.certDir }} + - name: metrics-cert-dir + emptyDir: {} + {{- end }} +{{- end }}{{- end }} diff --git a/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/kubeAdmControllerManager/templates/pushprox-proxy-rbac.yaml b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/kubeAdmControllerManager/templates/pushprox-proxy-rbac.yaml new file mode 100644 index 0000000000..eefe609058 --- /dev/null +++ b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/kubeAdmControllerManager/templates/pushprox-proxy-rbac.yaml @@ -0,0 +1,68 @@ +{{- template "applyKubeVersionOverrides" . -}} +{{- if and .Values.proxy }}{{ if .Values.proxy.enabled }} +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRole +metadata: + name: {{ template "pushProxy.proxy.name" . }} + labels: {{ include "pushProxy.proxy.labels" . | nindent 4 }} +rules: +{{- if .Values.global.cattle.psp.enabled }} +- apiGroups: ['policy'] + resources: ['podsecuritypolicies'] + verbs: ['use'] + resourceNames: + - {{ template "pushProxy.proxy.name" . }} +{{- end }} +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRoleBinding +metadata: + name: {{ template "pushProxy.proxy.name" . }} + labels: {{ include "pushProxy.proxy.labels" . | nindent 4 }} +roleRef: + apiGroup: rbac.authorization.k8s.io + kind: ClusterRole + name: {{ template "pushProxy.proxy.name" . }} +subjects: + - kind: ServiceAccount + name: {{ template "pushProxy.proxy.name" . }} + namespace: {{ include "pushprox.namespace" . }} +--- +apiVersion: v1 +kind: ServiceAccount +metadata: + name: {{ template "pushProxy.proxy.name" . }} + namespace: {{ include "pushprox.namespace" . }} + labels: {{ include "pushProxy.proxy.labels" . | nindent 4 }} +--- +{{- if .Values.global.cattle.psp.enabled }} +apiVersion: policy/v1beta1 +kind: PodSecurityPolicy +metadata: + name: {{ template "pushProxy.proxy.name" . }} + namespace: {{ include "pushprox.namespace" . }} + labels: {{ include "pushProxy.proxy.labels" . | nindent 4 }} +spec: + privileged: false + hostNetwork: false + hostIPC: false + hostPID: false + runAsUser: + rule: 'MustRunAsNonRoot' + seLinux: + rule: 'RunAsAny' + supplementalGroups: + rule: 'MustRunAs' + ranges: + - min: 1 + max: 65535 + fsGroup: + rule: 'MustRunAs' + ranges: + - min: 1 + max: 65535 + readOnlyRootFilesystem: false + volumes: + - 'secret' +{{- end }}{{- end }} +{{- end }} diff --git a/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/kubeAdmControllerManager/templates/pushprox-proxy.yaml b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/kubeAdmControllerManager/templates/pushprox-proxy.yaml new file mode 100644 index 0000000000..723bbd6c00 --- /dev/null +++ b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/kubeAdmControllerManager/templates/pushprox-proxy.yaml @@ -0,0 +1,57 @@ +{{- template "applyKubeVersionOverrides" . -}} +{{- if and .Values.proxy }}{{ if .Values.proxy.enabled }} +apiVersion: apps/v1 +kind: Deployment +metadata: + name: {{ template "pushProxy.proxy.name" . }} + namespace: {{ template "pushprox.namespace" . }} + labels: {{ include "pushProxy.proxy.labels" . | nindent 4 }} + pushprox-exporter: "proxy" +spec: + selector: + matchLabels: {{ include "pushProxy.proxy.labels" . | nindent 6 }} + template: + metadata: + labels: {{ include "pushProxy.proxy.labels" . | nindent 8 }} + spec: + securityContext: + runAsNonRoot: true + runAsUser: 1000 + nodeSelector: {{ include "linux-node-selector" . | nindent 8 }} +{{- if .Values.proxy.nodeSelector }} +{{ toYaml .Values.proxy.nodeSelector | indent 8 }} +{{- end }} + tolerations: {{ include "linux-node-tolerations" . | nindent 8 }} +{{- if .Values.proxy.tolerations }} +{{ toYaml .Values.proxy.tolerations | indent 8 }} +{{- end }} + serviceAccountName: {{ template "pushProxy.proxy.name" . }} + {{- if .Values.global.imagePullSecretName }} + imagePullSecrets: + - name: {{ .Values.global.imagePullSecretName }} + {{- end }} + containers: + - name: pushprox-proxy + image: {{ template "system_default_registry" . }}{{ .Values.proxy.image.repository }}:{{ .Values.proxy.image.tag }} + command: + {{- range .Values.proxy.command }} + - {{ . | quote }} + {{- end }} + {{- if .Values.proxy.resources }} + resources: {{ toYaml .Values.proxy.resources | nindent 10 }} + {{- end }} +--- +apiVersion: v1 +kind: Service +metadata: + name: {{ template "pushProxy.proxy.name" . }} + namespace: {{ template "pushprox.namespace" . }} + labels: {{ include "pushProxy.proxy.labels" . | nindent 4 }} +spec: + ports: + - name: pp-proxy + port: {{ required "Need .Values.proxy.port to configure proxy" .Values.proxy.port }} + protocol: TCP + targetPort: {{ .Values.proxy.port }} + selector: {{ include "pushProxy.proxy.labels" . | nindent 4 }} +{{- end }}{{- end }} diff --git a/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/kubeAdmControllerManager/templates/pushprox-servicemonitor.yaml b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/kubeAdmControllerManager/templates/pushprox-servicemonitor.yaml new file mode 100644 index 0000000000..67eb2216b5 --- /dev/null +++ b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/kubeAdmControllerManager/templates/pushprox-servicemonitor.yaml @@ -0,0 +1,45 @@ +{{- template "applyKubeVersionOverrides" . -}} +{{- if .Values.serviceMonitor }}{{- if .Values.serviceMonitor.enabled }} +apiVersion: monitoring.coreos.com/v1 +kind: ServiceMonitor +metadata: + name: {{ template "pushprox.serviceMonitor.name" . }} + namespace: {{ template "pushprox.namespace" . }} + labels: {{ include "pushProxy.serviceMonitor.labels" . | nindent 4 }} +spec: + endpoints: {{include "pushProxy.serviceMonitor.endpoints" . | nindent 4 }} + jobLabel: component + podTargetLabels: + - component + - pushprox-exporter + namespaceSelector: + matchNames: + - {{ template "pushprox.namespace" . }} + selector: + matchLabels: {{ include "pushProxy.client.labels" . | nindent 6 }} +--- +{{- $selector := "" }} +{{- if not (kindIs "invalid" .Values.service) }} +{{- if not (kindIs "invalid" .Values.service.selector) }} +{{ if .Values.service.selector }} +{{- if .Values.clients.enabled }} +{{- required (printf "Cannot override .Values.service.selector=%s when .Values.clients.enabled=true" (toJson .Values.service.selector)) "" }} +{{- end }} +{{- $selector = (toYaml .Values.service.selector) }} +{{- end }} +{{- end }} +{{- end }} +apiVersion: v1 +kind: Service +metadata: + name: {{ template "pushProxy.client.name" . }} + namespace: {{ template "pushprox.namespace" . }} + labels: {{ include "pushProxy.client.labels" . | nindent 4 }} +spec: + ports: + - name: metrics + port: {{ required "Need .Values.metricsPort to configure client to listen to metrics at port" .Values.metricsPort}} + protocol: TCP + targetPort: {{ .Values.metricsPort }} + selector: {{ default (include "pushProxy.client.labels" .) $selector | nindent 4 }} +{{- end }}{{- end }} diff --git a/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/kubeAdmControllerManager/templates/validate-install-crd.yaml b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/kubeAdmControllerManager/templates/validate-install-crd.yaml new file mode 100644 index 0000000000..16abc2fa83 --- /dev/null +++ b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/kubeAdmControllerManager/templates/validate-install-crd.yaml @@ -0,0 +1,14 @@ +#{{- if gt (len (lookup "rbac.authorization.k8s.io/v1" "ClusterRole" "" "")) 0 -}} +# {{- $found := dict -}} +# {{- set $found "monitoring.coreos.com/v1/ServiceMonitor" false -}} +# {{- range .Capabilities.APIVersions -}} +# {{- if hasKey $found (toString .) -}} +# {{- set $found (toString .) true -}} +# {{- end -}} +# {{- end -}} +# {{- range $_, $exists := $found -}} +# {{- if (eq $exists false) -}} +# {{- required "Required CRDs are missing. Please install Prometheus Operator CRDs before installing this chart." "" -}} +# {{- end -}} +# {{- end -}} +#{{- end -}} diff --git a/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/kubeAdmControllerManager/templates/validate-psp-install.yaml b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/kubeAdmControllerManager/templates/validate-psp-install.yaml new file mode 100644 index 0000000000..a30c59d3b7 --- /dev/null +++ b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/kubeAdmControllerManager/templates/validate-psp-install.yaml @@ -0,0 +1,7 @@ +#{{- if gt (len (lookup "rbac.authorization.k8s.io/v1" "ClusterRole" "" "")) 0 -}} +#{{- if .Values.global.cattle.psp.enabled }} +#{{- if not (.Capabilities.APIVersions.Has "policy/v1beta1/PodSecurityPolicy") }} +#{{- fail "The target cluster does not have the PodSecurityPolicy API resource. Please disable PSPs in this chart before proceeding." -}} +#{{- end }} +#{{- end }} +#{{- end }} diff --git a/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/kubeAdmControllerManager/values.yaml b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/kubeAdmControllerManager/values.yaml new file mode 100644 index 0000000000..1e076041b3 --- /dev/null +++ b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/kubeAdmControllerManager/values.yaml @@ -0,0 +1,166 @@ +# Default values for rancher-pushprox. +# This is a YAML-formatted file. +# Declare variables to be passed into your templates. + +# Default image containing both the proxy and the client was generated from the following Dockerfile +# https://github.com/prometheus-community/PushProx/blob/eeadbe766641699129920ccfaaaa30a85c67fe81/Dockerfile#L1-L15 + +# Configuration + +global: + cattle: + psp: + enabled: false + systemDefaultRegistry: "" + seLinux: + enabled: false + +# A list of Semver constraint strings (defined by https://github.com/Masterminds/semver) and values.yaml overrides. +# +# For each key in kubeVersionOverrides, this chart will check to see if the current Kubernetes cluster's version matches +# any of the semver constraints provided as keys on the map. +# +# On seeing a match, the default value for each values.yaml field overridden will be updated with the new value. +# +# If multiple matches are encountered (due to overlapping semver ranges), the matches will be applied in order. +# +# Notes: +# - On running a helm template, Helm generally assumes the kubeVersion is v1.20.0 +# - On running a helm install --dry-run, the correct kubeVersion should be chosen. +kubeVersionOverrides: [] +# - constraint: "< 1.21" +# values: +# metricsPort: 10252 +# clients: +# https: +# enabled: false +# insecureSkipVerify: false +# useServiceAccountCredentials: false + +namespaceOverride: "" + +# The component that is being monitored (i.e. etcd) +component: "component" + +# The port containing the metrics that need to be scraped +metricsPort: 2739 + +# Configure ServiceMonitor that monitors metrics from the metricsPort endpoint +serviceMonitor: + enabled: true + # A list of endpoints that will be added to the ServiceMonitor based on the Endpoint spec + # Source: https://github.com/prometheus-operator/prometheus-operator/blob/master/Documentation/api.md#endpoint + # By default, proxyUrl and params._scheme will be overridden based on other values + endpoints: + - port: metrics + +# Configure Service that grabs scrape targets +service: + # The selector that is used to populate the Service's Endpoints object. + # The chart will error out on rendering templating if .Values.clients.enabled is set alongside this field, + # since it is expected that this service should point to the PushProx Clients Daemonset / Deployment + selector: {} + +clients: + enabled: true + # The port which the PushProx client will post PushProx metrics to + port: 9369 + # If unset, this will default to the URL for the proxy service: http://pushprox-{{component}}-proxy.{{namepsace}}.svc.cluster.local:{{proxy.port}} + # Should be modified if the clients are being deployed outside the cluster where the proxy rests, otherwise leave it null + proxyUrl: "" + # If set to true, the client will forward any requests from the host IP to 127.0.0.1 + # It will only allow proxy requests to the metricsPort specified + useLocalhost: false + # Configuration for accessing metrics via HTTPS + https: + # Does the client require https to access the metrics? + enabled: false + # Does the client require requests be sent to http or https? + forceHTTPSScheme: false + # If set to true, the client will create a service account with adequate permissions and set a flag + # on the client to use the service account token provided by it to make authorized scrape requests + useServiceAccountCredentials: false + # Configuration for authentication to metrics via https endpoint + authenticationMethod: + # Reads token from defined file in container + # This function is deprecated in the prometheus operator api and may be removed in a future version + bearerTokenFile: + enabled: false + bearerTokenFilePath: "/var/run/secrets/kubernetes.io/serviceaccount/token" + # Reads token from defined secret in namespace + # This function is deprecated in the prometheus operator api and may be removed in a future version + bearerTokenSecret: + enabled: false + # Reads token from defined secret in namespace + authorization: + enabled: false + type: "bearer" + credentials: + key: "token" + optional: false + # If set to true, the client will disable SSL security checks + insecureSkipVerify: false + # Directory on host where necessary TLS cert and key to scrape metrics can be found + certDir: "" + # Filenames for files located in .Values.clients.https.certDir that correspond to TLS settings + certFile: "" + keyFile: "" + caCertFile: "" + # seLinuxOptions to be passed into the container that copies certs. Should define a container with permissions to read the files in the certDir provided on the host. + # Required and only used if `clients.https.enabled` is set and `clients.https.certDir` is provided. + seLinuxOptions: {} + + metrics: + # Whether the client should publish PushProx client-specific metrics to .Values.clients.port + enabled: false + + rbac: + # Additional permissions to provide to the ServiceAccount bound to the client + # This can be used to provide additional permissions for the client to scrape metrics from the k8s API + # Only enabled if clients.https.enabled and clients.https.useServiceAccountCredentials are true + additionalRules: [] + + # Resource limits + resources: {} + + # Options to select all nodes to deploy client DaemonSet on + nodeSelector: {} + tolerations: [] + affinity: {} + + image: + repository: rancher/pushprox-client + tag: v0.1.3-rancher2-client + command: ["pushprox-client"] + + copyCertsImage: + repository: rancher/mirrored-library-busybox + tag: 1.31.1 + + # The default intention of rancher-pushprox clients is to scrape hostNetwork metrics across all nodes. + # This can be used to scrape internal Kubernetes components or DaemonSets of hostNetwork Pods in + # situations where a cloud provider firewall prevents Pod-To-Host communication but not Pod-To-Pod. + # However, if the underlying hostNetwork Pod that is being scraped is managed by a Deployment, + # this advanced option enables users to deploy the client as a Deployment instead of a DaemonSet. + # If a user deploys this feature and the underlying Deployment's number of replicas changes, the user will + # be responsible for upgrading this chart accordingly to the right number of replicas. + deployment: + enabled: false + replicas: 0 + +proxy: + enabled: true + # The port through which PushProx clients will communicate to the proxy + port: 8080 + + # Resource limits + resources: {} + + # Options to select a node to run a single proxy deployment on + nodeSelector: {} + tolerations: [] + + image: + repository: rancher/pushprox-proxy + tag: v0.1.3-rancher2-proxy + command: ["pushprox-proxy"] diff --git a/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/kubeAdmEtcd/.helmignore b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/kubeAdmEtcd/.helmignore new file mode 100644 index 0000000000..0e8a0eb36f --- /dev/null +++ b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/kubeAdmEtcd/.helmignore @@ -0,0 +1,23 @@ +# Patterns to ignore when building packages. +# This supports shell glob matching, relative path matching, and +# negation (prefixed with !). Only one pattern per line. +.DS_Store +# Common VCS dirs +.git/ +.gitignore +.bzr/ +.bzrignore +.hg/ +.hgignore +.svn/ +# Common backup files +*.swp +*.bak +*.tmp +*.orig +*~ +# Various IDEs +.project +.idea/ +*.tmproj +.vscode/ diff --git a/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/kubeAdmEtcd/Chart.yaml b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/kubeAdmEtcd/Chart.yaml new file mode 100644 index 0000000000..d144d3ee2a --- /dev/null +++ b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/kubeAdmEtcd/Chart.yaml @@ -0,0 +1,15 @@ +annotations: + catalog.cattle.io/hidden: "true" + catalog.cattle.io/kube-version: '>= 1.26.0-0 < 1.31.0-0' + catalog.cattle.io/os: linux + catalog.rancher.io/certified: rancher + catalog.rancher.io/namespace: cattle-monitoring-system + catalog.rancher.io/release-name: rancher-pushprox +apiVersion: v1 +appVersion: 0.1.0 +description: Sets up a deployment of the PushProx proxy and a DaemonSet of PushProx + clients. +kubeVersion: '>=1.26.0-0' +name: kubeAdmEtcd +type: application +version: 0.2.0 diff --git a/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/kubeAdmEtcd/README.md b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/kubeAdmEtcd/README.md new file mode 100644 index 0000000000..345002f48a --- /dev/null +++ b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/kubeAdmEtcd/README.md @@ -0,0 +1,90 @@ +# rancher-pushprox + +A Rancher chart based on Rancher [PushProx](https://github.com/rancher/PushProx) that sets up a Deployment of a PushProx proxy and a DaemonSet of PushProx clients on a Kubernetes cluster. + +Installs [rancher-pushprox](https://github.com/rancher/charts/tree/gh-pages/packages/rancher-pushprox) to create PushProx clients that can access their host's network and register with a PushProx proxy. A [Prometheus Operator](https://github.com/coreos/prometheus-operator) ServiceMonitor CR is also included that is configured to scrape the metrics from each of the clients through the proxy. + +Using an instance of this chart is suitable for the following scenarios: +- You need to scrape metrics from a port that should not be accessible outside of the host (e.g. scraping `etcd` metrics in a hardened cluster) +- You need to scrape metrics on a host that are not exposed outside of 127.0.0.1 (e.g. scraping `kube-proxy` metrics) +- You need to scrape metrics through HTTPS using certs hosted directly on `hostPath` +- You need to scrape metrics from Kubernetes components that require authorization via a service account (e.g. permissions to make request to `/metrics`) +- You need to scrape metrics without access to cacerts (i.e. enable `insecureSkipVerify`) + +The clients and proxy are created based on a Rancher fork of the [prometheus-community/PushProx](https://github.com/prometheus-community/PushProx) project. + +## Upgrading to Kubernetes v1.25+ + +Starting in Kubernetes v1.25, [Pod Security Policies](https://kubernetes.io/docs/concepts/security/pod-security-policy/) have been removed from the Kubernetes API. + +As a result, **before upgrading to Kubernetes v1.25** (or on a fresh install in a Kubernetes v1.25+ cluster), users are expected to perform an in-place upgrade of this chart with `global.cattle.psp.enabled` set to `false` if it has been previously set to `true`. +​ +> **Note:** +> In this chart release, any previous field that was associated with any PSP resources have been removed in favor of a single global field: `global.cattle.psp.enabled`. + +> **Note:** +> If you upgrade your cluster to Kubernetes v1.25+ before removing PSPs via a `helm upgrade` (even if you manually clean up resources), **it will leave the Helm release in a broken state within the cluster such that further Helm operations will not work (`helm uninstall`, `helm upgrade`, etc.).** +> +> If your charts get stuck in this state, please consult the Rancher docs on how to clean up your Helm release secrets. + +Upon setting `global.cattle.psp.enabled` to false, the chart will remove any PSP resources deployed on its behalf from the cluster. This is the default setting for this chart. + +As a replacement for PSPs, [Pod Security Admission](https://kubernetes.io/docs/concepts/security/pod-security-admission/) should be used. Please consult the Rancher docs for more details on how to configure your chart release namespaces to work with the new Pod Security Admission and apply Pod Security Standards. + +## Configuration + +The following tables list the configurable parameters of the rancher-pushprox chart and their default values. + +### General + +#### Required +| Parameter | Description | Example | +| ----- | ----------- | ------ | +| `component` | The component that is being monitored | `kube-etcd` +| `metricsPort` | The port on the host that contains the metrics you want to scrape (e.g. `http://:/metrics`) | `2379` | +| `namespaceOverride` | The namespace to install the chart | `""` + +#### Optional +| Parameter | Description | Default | +| ----- | ----------- | ------ | +| `serviceMonitor.enabled` | Deploys a [Prometheus Operator](https://github.com/coreos/prometheus-operator/blob/master/Documentation/api.md#servicemonitor) ServiceMonitor CR that is configured to scrape metrics on the hosts that the clients are deployed on via the proxy. Also deploys a Service that points to all pods with the expected client name that exposes the `metricsPort` selected | `true` | +| `serviceMonitor.endpoints` | A list of endpoints that will be added to the ServiceMonitor based on the [Endpoint spec](https://github.com/prometheus-operator/prometheus-operator/blob/master/Documentation/api.md#endpoint) | `[{port: metrics}]` | +| `service.selector` | The selector that is used to populate the Service's Endpoints object. The chart will error out on rendering templating if `.Values.clients.enabled` is set alongside this field, since it is expected that this service should point to the PushProx Clients Daemonset / Deployment | `{}` | +| `clients.enabled` | Deploys a DaemonSet of clients that are each capable of scraping endpoints on the hostNetwork it is deployed on | `true` | +| `clients.port` | The port where the client will publish PushProx client-specific metrics. If deploying multiple clients onto the same node, the clients should not have conflicting ports | `9369` | +| `clients.proxyUrl` | Overrides the default proxyUrl setting of `http://pushprox-{{ .Values.component }}-proxy.{{ . Release.Namespace }}.svc.cluster.local:{{ .Values.proxy.port }}"` with the `proxyUrl` specified | `""` | +| `clients.useLocalhost` | Sets a flag on each client deployment to redirect scrapes directed to `HOST_IP` to `127.0.0.1` | `false` | +| `clients.https.enabled` | Enables scraping metrics via HTTPS using the provided TLS certs that exist on each host | `false` | +| `clients.https.forceHTTPSScheme` | Forces scraping metrics via HTTPS using the provided TLS certs that exist on each host | `false` | +| `clients.https.useServiceAccountCredentials` | If set to true, the client will create a service account with permissions to scrape `/metrics` endpoint of Kubernetes components. The client will use the service account token provided to make authorized scrape requests to the Kubernetes API | `false` | +| `clients.https.authenticationMethod.bearerTokenFile.enabled` | If set to true, the client will use service account credentials mounted at the configured path `clients.https.authenticationMethod.bearerTokenFile.bearerTokenFilePath`. This requires permissions to scrape `/metrics` endpoint of Kubernetes components. This method is deprecated by the prometheus operator and may be removed in a future release | `false` | +| `clients.https.authenticationMethod.bearerTokenFile.bearerTokenFilePath` | This is a volume mount on the pod with permissions to scrape `/metrics` endpoint of Kubernetes components | `"/var/run/secrets/kubernetes.io/serviceaccount/token"` | +| `clients.https.authenticationMethod.bearerTokenSecret.enabled` | If set to true, the client will use service account credentials to scrape `/metrics` endpoint of Kubernetes components. This method is deprecated by the prometheus operator and may be removed in a future release | `false` | +| `clients.https.authenticationMethod.authorization.enabled` | If set to true, the client will use service account credentials to scrape `/metrics` endpoint of Kubernetes components | `false` | +| `clients.https.authenticationMethod.authorization.type` | If set, the client will use this type of authorization in its client requests for metrics | `"bearer"` | +| `clients.https.authenticationMethod.authorization.credentials.key` | If set, the client will use this key in the secret created by `clients.https.useServiceAccountCredentials` for authorization in its client requests for metrics | `"token"` | +| `clients.https.authenticationMethod.authorization.credentials.optional` | If set to false, the client will fail if the key in the secret created by `clients.https.useServiceAccountCredentials` does not exist | `false` | +| `clients.https.insecureSkipVerify` | If set to true, the client will disable SSL security checks | `false` | +| `clients.https.certDir` | A `hostPath` where TLS certs can be found. This path is mounted as a volume on an `initContainer` which copies only the necessary files over to an EmptyDir volume used by each client. Required and only used if `clients.https.enabled` is set | `""` | +| `clients.https.certFile` | The path to the TLS cert file located within `clients.https.certDir`. Required and only used if `clients.https.enabled` is set | `""` | +| `clients.https.keyFile` | The path to the TLS key file located within `clients.https.certDir`. Required and only used if `clients.https.enabled` is set | `""` | +| `clients.https.caCertFile` | The path to the TLS cacert file located within `clients.https.certDir`. Required and only used if `clients.https.enabled` is set | `""` | +| `clients.https.seLinuxOptions` | seLinuxOptions to be passed into the container that copies certs. Should define a container with permissions to read the files in the certDir provided on the host. Required and only used if `clients.https.enabled` is set and `clients.https.certDir` is provided. | `""` | +| `clients.metrics.enabled` | Whether the client should publish PushProx client-specific metrics. | `false` | +| `clients.rbac.additionalRules` | Additional permissions to provide to the ServiceAccount bound to the client. This can be used to provide additional permissions for the client to scrape metrics from the k8s API. Only enabled if clients.https.enabled and clients.https.useServiceAccountCredentials are true | `[]` | +| `clients.deployment.enabled` | Deploys the client as a Deployment (generally used if the underlying hostNetwork Pod that is being scraped is managed by a Deployment) | `false` | +| `clients.deployment.replicas` | The number of pods the Deployment has, it should match the number of pod the hostNetwork Deployment has. Required and only used if `client.deployment.enable` is set | `0` | +| `clients.deployment.affinity` | The affinity rules that allocate the pod to the node in which the hostNetwork Deployment's pods run. Required and only used if `client.deployment.enable` is set | `{}` | +| `clients.resources` | Set resource limits and requests for the client container | `{}` | +| `clients.nodeSelector` | Select which nodes to deploy the clients on | `{}` | +| `clients.tolerations` | Specify tolerations for clients | `[]` | +| `proxy.enabled` | Deploys the proxy that each client will register with | `true` | +| `proxy.port` | The port exposed by the proxy that each client will register with to allow metrics to be scraped from the host | `8080` | +| `proxy.resources` | Set resource limits and requests for the proxy container | `{}` | +| `proxy.nodeSelector` | Select which nodes the proxy can be deployed on | `{}` | +| `proxy.tolerations` | Specify tolerations (if necessary) to allow the proxy to be deployed on the selected node | `[]` | +| `kubeVersionOverrides` | A list of Semver constraint strings (defined by https://github.com/Masterminds/semver) and values.yaml overrides. For each key in kubeVersionOverrides, this chart will check to see if the current Kubernetes cluster's version matches any of the semver constraints provided as keys on the map. On seeing a match, the default value for each values.yaml field overridden will be updated with the new value. If multiple matches are encountered (due to overlapping semver ranges), the matches will be applied in order. | `[]` + +*Tip: The filepaths set in `clients.https.File` can include wildcard characters*. + +See [rancher-monitoring](https://github.com/rancher/charts/tree/gh-pages/packages/rancher-monitoring) for examples of how this chart can be used. diff --git a/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/kubeAdmEtcd/templates/_helpers.tpl b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/kubeAdmEtcd/templates/_helpers.tpl new file mode 100644 index 0000000000..1ba5093944 --- /dev/null +++ b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/kubeAdmEtcd/templates/_helpers.tpl @@ -0,0 +1,170 @@ +# Rancher + +{{- define "system_default_registry" -}} +{{- if .Values.global.cattle.systemDefaultRegistry -}} +{{- printf "%s/" .Values.global.cattle.systemDefaultRegistry -}} +{{- end -}} +{{- end -}} + +# Windows Support + +{{/* +Windows cluster will add default taint for linux nodes, +add below linux tolerations to workloads could be scheduled to those linux nodes +*/}} + +{{- define "linux-node-tolerations" -}} +- key: "cattle.io/os" + value: "linux" + effect: "NoSchedule" + operator: "Equal" +{{- end -}} + +{{- define "linux-node-selector" -}} +{{- if semverCompare "<1.14-0" .Capabilities.KubeVersion.GitVersion -}} +beta.kubernetes.io/os: linux +{{- else -}} +kubernetes.io/os: linux +{{- end -}} +{{- end -}} + +# General + +{{- define "applyKubeVersionOverrides" -}} +{{- $overrides := dict -}} +{{- range $override := .Values.kubeVersionOverrides -}} +{{- if semverCompare $override.constraint $.Capabilities.KubeVersion.Version -}} +{{- $_ := mergeOverwrite $overrides $override.values -}} +{{- end -}} +{{- end -}} +{{- $_ := mergeOverwrite .Values $overrides -}} +{{- end -}} + +{{- define "pushprox.namespace" -}} + {{- if .Values.namespaceOverride -}} + {{- .Values.namespaceOverride -}} + {{- else -}} + {{- .Release.Namespace -}} + {{- end -}} +{{- end -}} + +{{- define "pushProxy.commonLabels" -}} +release: {{ .Release.Name }} +component: {{ .Values.component | quote }} +provider: kubernetes +{{- end -}} + +{{- define "pushProxy.proxyUrl" -}} +{{- $_ := (required "Template requires either .Values.proxy.port or .Values.client.proxyUrl to set proxyUrl for client" (or .Values.clients.proxyUrl .Values.proxy.port)) -}} +{{- if .Values.clients.proxyUrl -}} +{{ printf "%s" .Values.clients.proxyUrl }} +{{- else -}} +{{ printf "http://%s.%s.svc:%d" (include "pushProxy.proxy.name" .) (include "pushprox.namespace" .) (int .Values.proxy.port) }} +{{- end -}}{{- end -}} + +# Client + +{{- define "pushProxy.client.name" -}} +{{- printf "pushprox-%s-client" (required ".Values.component is required" .Values.component) -}} +{{- end -}} + +{{- define "pushProxy.client.serviceAccountTokenName" -}} +{{- printf "pushprox-%s-client-service-account-token" (required ".Values.component is required" .Values.component) -}} +{{- end -}} + +{{- define "pushProxy.client.labels" -}} +k8s-app: {{ template "pushProxy.client.name" . }} +{{ template "pushProxy.commonLabels" . }} +{{- end -}} + +# Proxy + +{{- define "pushProxy.proxy.name" -}} +{{- printf "pushprox-%s-proxy" (required ".Values.component is required" .Values.component) -}} +{{- end -}} + +{{- define "pushProxy.proxy.labels" -}} +k8s-app: {{ template "pushProxy.proxy.name" . }} +{{ template "pushProxy.commonLabels" . }} +{{- end -}} + +# ServiceMonitor + +{{- define "pushprox.serviceMonitor.name" -}} +{{- printf "%s-%s" .Release.Name (required ".Values.component is required" .Values.component) -}} +{{- end -}} + +{{- define "pushProxy.serviceMonitor.labels" -}} +app: {{ template "pushprox.serviceMonitor.name" . }} +{{ template "pushProxy.commonLabels" . }} +{{- end -}} + +{{- define "pushProxy.serviceMonitor.endpoints" -}} +{{- $proxyURL := (include "pushProxy.proxyUrl" .) -}} +{{- $useHTTPS := .Values.clients.https.enabled -}} +{{- $setHTTPSScheme := .Values.clients.https.forceHTTPSScheme -}} +{{- $insecureSkipVerify := .Values.clients.https.insecureSkipVerify -}} +{{- $useServiceAccountCredentials := .Values.clients.https.useServiceAccountCredentials -}} +{{- $serviceAccountTokenName := (include "pushProxy.client.serviceAccountTokenName" . ) -}} +{{- $metricRelabelings := list }} +{{- $endpoints := .Values.serviceMonitor.endpoints }} +{{- if .Values.proxy.enabled }} +{{- $_ := set . "proxyUrl" $proxyURL }} +{{- end }} +{{- range $endpoints }} +{{- if $.Values.proxy.enabled }} +{{- $_ := set . "proxyUrl" $proxyURL }} +{{- end }} +{{- $clusterIdRelabel := dict }} +{{- $metricRelabelings := list }} +{{- if $.Values.global.cattle.clusterId }} +{{- $_ := set $clusterIdRelabel "action" "replace" }} +{{- $_ := set $clusterIdRelabel "sourceLabels" (list "__address__") }} +{{- $_ := set $clusterIdRelabel "targetLabel" "cluster_id" }} +{{- $_ := set $clusterIdRelabel "replacement" $.Values.global.cattle.clusterId }} +{{- $metricRelabelings = append $metricRelabelings $clusterIdRelabel }} +{{- end }} +{{- $clusterNameRelabel := dict }} +{{- if $.Values.global.cattle.clusterName }} +{{- $_ := set $clusterNameRelabel "action" "replace" }} +{{- $_ := set $clusterNameRelabel "sourceLabels" (list "__address__") }} +{{- $_ := set $clusterNameRelabel "targetLabel" "cluster_name" }} +{{- $_ := set $clusterNameRelabel "replacement" $.Values.global.cattle.clusterName }} +{{- $metricRelabelings = append $metricRelabelings $clusterNameRelabel }} +{{- end }} +{{- if not (empty $metricRelabelings) }} +{{- $_ := set . "metricRelabelings" ($metricRelabelings)}} +{{- end }} +{{- if $setHTTPSScheme -}} +{{- $_ := set . "scheme" "https" }} +{{- end -}} +{{- if $useHTTPS -}} +{{- if (hasKey . "params") }} +{{- $_ := set (get . "params") "_scheme" (list "https") }} +{{- else }} +{{- $_ := set . "params" (dict "_scheme" (list "https")) }} +{{- end }} +{{- end }} +{{- if (hasKey . "tlsConfig") }} +{{- $_ := set (get . "tlsConfig") "insecureSkipVerify" $insecureSkipVerify }} +{{- else }} +{{- $_ := set . "tlsConfig" (dict "insecureSkipVerify" $insecureSkipVerify) }} +{{- end }} +{{- if $.Values.clients.https.authenticationMethod.bearerTokenFile.enabled }} +{{- $_ := set . "bearerTokenFile" $.Values.clients.https.authenticationMethod.bearerTokenFile.bearerTokenFilePath }} +{{- end }} +{{- if $.Values.clients.https.authenticationMethod.bearerTokenSecret.enabled }} +{{- $_ := set . "bearerTokenSecret" $serviceAccountTokenName }} +{{- end }} +{{- if $.Values.clients.https.authenticationMethod.authorization.enabled }} +{{- if (hasKey . "authorization") }} +{{- $_ := set (get . "authorization") "type" $.Values.clients.https.authenticationMethod.authorization.type }} +{{- $_ := set (get . "authorization") "credentials" (dict "name" $serviceAccountTokenName "key" $.Values.clients.https.authenticationMethod.authorization.credentials.key "optional" $.Values.clients.https.authenticationMethod.authorization.credentials.optional) }} +{{- else }} +{{- $_ := set . "authorization" (dict "type" $.Values.clients.https.authenticationMethod.authorization.type) }} +{{- $_ := set . "authorization" (dict "credentials" (dict "name" $serviceAccountTokenName "key" $.Values.clients.https.authenticationMethod.authorization.credentials.key "optional" $.Values.clients.https.authenticationMethod.authorization.credentials.optional)) }} +{{- end }} +{{- end }} +{{- end }} +{{- toYaml $endpoints }} +{{- end -}} diff --git a/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/kubeAdmEtcd/templates/pushprox-clients-rbac.yaml b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/kubeAdmEtcd/templates/pushprox-clients-rbac.yaml new file mode 100644 index 0000000000..a8e27c3735 --- /dev/null +++ b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/kubeAdmEtcd/templates/pushprox-clients-rbac.yaml @@ -0,0 +1,97 @@ +{{- template "applyKubeVersionOverrides" . -}} +{{- if .Values.clients }}{{- if .Values.clients.enabled }} +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRole +metadata: + name: {{ template "pushProxy.client.name" . }} + labels: {{ include "pushProxy.client.labels" . | nindent 4 }} +rules: +{{- if .Values.global.cattle.psp.enabled }} +- apiGroups: ['policy'] + resources: ['podsecuritypolicies'] + verbs: ['use'] + resourceNames: + - {{ template "pushProxy.client.name" . }} +{{- end }} +{{- if and .Values.clients.https.enabled .Values.clients.https.useServiceAccountCredentials }} +- nonResourceURLs: ["/metrics"] + verbs: ["get"] +{{- if .Values.clients.rbac.additionalRules }} +{{ toYaml .Values.clients.rbac.additionalRules }} +{{- end }} +{{- end }} +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRoleBinding +metadata: + name: {{ template "pushProxy.client.name" . }} + labels: {{ include "pushProxy.client.labels" . | nindent 4 }} +roleRef: + apiGroup: rbac.authorization.k8s.io + kind: ClusterRole + name: {{ template "pushProxy.client.name" . }} +subjects: + - kind: ServiceAccount + name: {{ template "pushProxy.client.name" . }} + namespace: {{ include "pushprox.namespace" . }} +--- +apiVersion: v1 +kind: ServiceAccount +metadata: + name: {{ template "pushProxy.client.name" . }} + namespace: {{ include "pushprox.namespace" . }} + labels: {{ include "pushProxy.client.labels" . | nindent 4 }} +--- +{{- if .Values.clients.https.useServiceAccountCredentials }} +apiVersion: v1 +kind: Secret +type: kubernetes.io/service-account-token +metadata: + name: {{ template "pushProxy.client.serviceAccountTokenName" . }} + namespace: {{ include "pushprox.namespace" . }} + labels: {{ include "pushProxy.client.labels" . | nindent 4 }} + annotations: + kubernetes.io/service-account.name: {{ template "pushProxy.client.name" . }} +{{- end }} +--- +{{- if .Values.global.cattle.psp.enabled }} +apiVersion: policy/v1beta1 +kind: PodSecurityPolicy +metadata: + name: {{ template "pushProxy.client.name" . }} + namespace: {{ include "pushprox.namespace" . }} + labels: {{ include "pushProxy.client.labels" . | nindent 4 }} +spec: + privileged: false + hostNetwork: true + hostIPC: false + hostPID: false + runAsUser: + rule: 'RunAsAny' + seLinux: + rule: 'RunAsAny' +{{- if and .Values.clients.https.enabled .Values.clients.https.certDir .Values.global.seLinux.enabled .Values.clients.https.seLinuxOptions }} + seLinuxOptions: {{ .Values.clients.https.seLinuxOptions | toYaml | nindent 6 }} +{{- end }} + supplementalGroups: + rule: 'MustRunAs' + ranges: + - min: 0 + max: 65535 + fsGroup: + rule: 'MustRunAs' + ranges: + - min: 0 + max: 65535 + readOnlyRootFilesystem: false + volumes: + - 'secret' +{{- if and .Values.clients.https.enabled .Values.clients.https.certDir }} + - 'emptyDir' + - 'hostPath' + allowedHostPaths: + - pathPrefix: {{ required "Need access to volume on host with the SSL cert files to use HTTPs" .Values.clients.https.certDir }} + readOnly: true +{{- end }} +{{- end }} +{{- end }}{{- end }} diff --git a/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/kubeAdmEtcd/templates/pushprox-clients.yaml b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/kubeAdmEtcd/templates/pushprox-clients.yaml new file mode 100644 index 0000000000..e8fcfb3883 --- /dev/null +++ b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/kubeAdmEtcd/templates/pushprox-clients.yaml @@ -0,0 +1,157 @@ +{{- template "applyKubeVersionOverrides" . -}} +{{- if .Values.clients }}{{- if .Values.clients.enabled }} +apiVersion: apps/v1 +{{- if .Values.clients.deployment.enabled }} +kind: Deployment +{{- else }} +kind: DaemonSet +{{- end }} +metadata: + name: {{ template "pushProxy.client.name" . }} + namespace: {{ template "pushprox.namespace" . }} + labels: {{ include "pushProxy.client.labels" . | nindent 4 }} + pushprox-exporter: "client" +spec: + {{- if .Values.clients.deployment.enabled }} + replicas: {{ .Values.clients.deployment.replicas }} + {{- end }} + selector: + matchLabels: {{ include "pushProxy.client.labels" . | nindent 6 }} + template: + metadata: + labels: {{ include "pushProxy.client.labels" . | nindent 8 }} + spec: + {{- if .Values.clients.affinity }} + affinity: {{ toYaml .Values.clients.affinity | nindent 8 }} + {{- end }} + nodeSelector: {{ include "linux-node-selector" . | nindent 8 }} +{{- if .Values.clients.nodeSelector }} +{{ toYaml .Values.clients.nodeSelector | indent 8 }} +{{- end }} + tolerations: {{ include "linux-node-tolerations" . | nindent 8 }} +{{- if .Values.clients.tolerations }} +{{ toYaml .Values.clients.tolerations | indent 8 }} +{{- end }} + hostNetwork: true + dnsPolicy: ClusterFirstWithHostNet + serviceAccountName: {{ template "pushProxy.client.name" . }} + {{- if .Values.global.imagePullSecretName }} + imagePullSecrets: + - name: {{ .Values.global.imagePullSecretName }} + {{- end }} + containers: + - name: pushprox-client + image: {{ template "system_default_registry" . }}{{ .Values.clients.image.repository }}:{{ .Values.clients.image.tag }} + command: + {{- range .Values.clients.command }} + - {{ . | quote }} + {{- end }} + args: + - --fqdn=$(HOST_IP) + - --proxy-url=$(PROXY_URL) + {{- if .Values.clients.metrics.enabled }} + - --metrics-addr=$(PORT) + {{- end }} + - --allow-port={{ required "Need .Values.metricsPort to configure client to be allowed to scrape metrics at port" .Values.metricsPort}} + {{- if .Values.clients.useLocalhost }} + - --use-localhost + {{- end }} + {{- if .Values.clients.https.enabled }} + {{- if .Values.clients.https.insecureSkipVerify }} + - --insecure-skip-verify + {{- end }} + {{- if .Values.clients.https.useServiceAccountCredentials }} + - --token-path=/var/run/secrets/kubernetes.io/serviceaccount/token + {{- end }} + {{- if .Values.clients.https.certDir }} + - --tls.cert=/etc/ssl/push-proxy/push-proxy.pem + - --tls.key=/etc/ssl/push-proxy/push-proxy-key.pem + - --tls.cacert=/etc/ssl/push-proxy/push-proxy-ca-cert.pem + {{- end }} + {{- end }} + env: + - name: HOST_IP + valueFrom: + fieldRef: + fieldPath: status.hostIP + {{- if .Values.clients.metrics.enabled }} + - name: PORT + value: :{{ .Values.clients.port }} + {{- end }} + - name: PROXY_URL + value: {{ template "pushProxy.proxyUrl" . }} + securityContext: + runAsNonRoot: true + runAsUser: 1000 + {{- if and .Values.clients.https.enabled .Values.clients.https.certDir }} + volumeMounts: + - name: metrics-cert-dir + mountPath: /etc/ssl/push-proxy + {{- end }} + {{- if .Values.clients.resources }} + resources: {{ toYaml .Values.clients.resources | nindent 10 }} + {{- end }} + {{- if and .Values.clients.https.enabled .Values.clients.https.certDir }} + initContainers: + - name: copy-certs + image: {{ template "system_default_registry" . }}{{ .Values.clients.copyCertsImage.repository }}:{{ .Values.clients.copyCertsImage.tag }} + command: + - sh + - -c + - | + echo "Searching for files to copy within the source volume" + echo "cert: ${CERT_FILE_NAME}" + echo "key: ${KEY_FILE_NAME}" + echo "cacert: ${CACERT_FILE_NAME}" + + CERT_FILE_SOURCE=$(find /etc/source/ -type f -name "${CERT_FILE_NAME}" | sort -r | head -n 1) + KEY_FILE_SOURCE=$(find /etc/source/ -type f -name "${KEY_FILE_NAME}" | sort -r | head -n 1) + CACERT_FILE_SOURCE=$(find /etc/source/ -type f -name "${CACERT_FILE_NAME}" | sort -r | head -n 1) + + test -z ${CERT_FILE_SOURCE} && echo "Failed to find cert file" && exit 1 + test -z ${KEY_FILE_SOURCE} && echo "Failed to find key file" && exit 1 + test -z ${CACERT_FILE_SOURCE} && echo "Failed to find cacert file" && exit 1 + + echo "Copying cert file from $CERT_FILE_SOURCE to $CERT_FILE_TARGET" + cp $CERT_FILE_SOURCE $CERT_FILE_TARGET || exit 1 + chmod 444 $CERT_FILE_TARGET || exit 1 + + echo "Copying key file from $KEY_FILE_SOURCE to $KEY_FILE_TARGET" + cp $KEY_FILE_SOURCE $KEY_FILE_TARGET || exit 1 + chmod 444 $KEY_FILE_TARGET || exit 1 + + echo "Copying cacert file from $CACERT_FILE_SOURCE to $CACERT_FILE_TARGET" + cp $CACERT_FILE_SOURCE $CACERT_FILE_TARGET || exit 1 + chmod 444 $CACERT_FILE_TARGET || exit 1 + env: + - name: CERT_FILE_NAME + value: {{ required "Need a TLS cert file for scraping metrics endpoint over HTTPs" .Values.clients.https.certFile }} + - name: KEY_FILE_NAME + value: {{ required "Need a TLS key file for scraping metrics endpoint over HTTPs" .Values.clients.https.keyFile }} + - name: CACERT_FILE_NAME + value: {{ required "Need a TLS CA cert file for scraping metrics endpoint over HTTPs" .Values.clients.https.caCertFile }} + - name: CERT_FILE_TARGET + value: /etc/ssl/push-proxy/push-proxy.pem + - name: KEY_FILE_TARGET + value: /etc/ssl/push-proxy/push-proxy-key.pem + - name: CACERT_FILE_TARGET + value: /etc/ssl/push-proxy/push-proxy-ca-cert.pem + securityContext: + runAsNonRoot: false +{{- if and .Values.global.seLinux.enabled .Values.clients.https.seLinuxOptions }} + seLinuxOptions: {{ .Values.clients.https.seLinuxOptions | toYaml | nindent 12 }} +{{- end }} + volumeMounts: + - name: metrics-cert-dir-source + mountPath: /etc/source + readOnly: true + - name: metrics-cert-dir + mountPath: /etc/ssl/push-proxy + volumes: + - name: metrics-cert-dir-source + hostPath: + path: {{ required "Need access to volume on host with the SSL cert files to use HTTPs" .Values.clients.https.certDir }} + - name: metrics-cert-dir + emptyDir: {} + {{- end }} +{{- end }}{{- end }} diff --git a/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/kubeAdmEtcd/templates/pushprox-proxy-rbac.yaml b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/kubeAdmEtcd/templates/pushprox-proxy-rbac.yaml new file mode 100644 index 0000000000..eefe609058 --- /dev/null +++ b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/kubeAdmEtcd/templates/pushprox-proxy-rbac.yaml @@ -0,0 +1,68 @@ +{{- template "applyKubeVersionOverrides" . -}} +{{- if and .Values.proxy }}{{ if .Values.proxy.enabled }} +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRole +metadata: + name: {{ template "pushProxy.proxy.name" . }} + labels: {{ include "pushProxy.proxy.labels" . | nindent 4 }} +rules: +{{- if .Values.global.cattle.psp.enabled }} +- apiGroups: ['policy'] + resources: ['podsecuritypolicies'] + verbs: ['use'] + resourceNames: + - {{ template "pushProxy.proxy.name" . }} +{{- end }} +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRoleBinding +metadata: + name: {{ template "pushProxy.proxy.name" . }} + labels: {{ include "pushProxy.proxy.labels" . | nindent 4 }} +roleRef: + apiGroup: rbac.authorization.k8s.io + kind: ClusterRole + name: {{ template "pushProxy.proxy.name" . }} +subjects: + - kind: ServiceAccount + name: {{ template "pushProxy.proxy.name" . }} + namespace: {{ include "pushprox.namespace" . }} +--- +apiVersion: v1 +kind: ServiceAccount +metadata: + name: {{ template "pushProxy.proxy.name" . }} + namespace: {{ include "pushprox.namespace" . }} + labels: {{ include "pushProxy.proxy.labels" . | nindent 4 }} +--- +{{- if .Values.global.cattle.psp.enabled }} +apiVersion: policy/v1beta1 +kind: PodSecurityPolicy +metadata: + name: {{ template "pushProxy.proxy.name" . }} + namespace: {{ include "pushprox.namespace" . }} + labels: {{ include "pushProxy.proxy.labels" . | nindent 4 }} +spec: + privileged: false + hostNetwork: false + hostIPC: false + hostPID: false + runAsUser: + rule: 'MustRunAsNonRoot' + seLinux: + rule: 'RunAsAny' + supplementalGroups: + rule: 'MustRunAs' + ranges: + - min: 1 + max: 65535 + fsGroup: + rule: 'MustRunAs' + ranges: + - min: 1 + max: 65535 + readOnlyRootFilesystem: false + volumes: + - 'secret' +{{- end }}{{- end }} +{{- end }} diff --git a/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/kubeAdmEtcd/templates/pushprox-proxy.yaml b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/kubeAdmEtcd/templates/pushprox-proxy.yaml new file mode 100644 index 0000000000..723bbd6c00 --- /dev/null +++ b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/kubeAdmEtcd/templates/pushprox-proxy.yaml @@ -0,0 +1,57 @@ +{{- template "applyKubeVersionOverrides" . -}} +{{- if and .Values.proxy }}{{ if .Values.proxy.enabled }} +apiVersion: apps/v1 +kind: Deployment +metadata: + name: {{ template "pushProxy.proxy.name" . }} + namespace: {{ template "pushprox.namespace" . }} + labels: {{ include "pushProxy.proxy.labels" . | nindent 4 }} + pushprox-exporter: "proxy" +spec: + selector: + matchLabels: {{ include "pushProxy.proxy.labels" . | nindent 6 }} + template: + metadata: + labels: {{ include "pushProxy.proxy.labels" . | nindent 8 }} + spec: + securityContext: + runAsNonRoot: true + runAsUser: 1000 + nodeSelector: {{ include "linux-node-selector" . | nindent 8 }} +{{- if .Values.proxy.nodeSelector }} +{{ toYaml .Values.proxy.nodeSelector | indent 8 }} +{{- end }} + tolerations: {{ include "linux-node-tolerations" . | nindent 8 }} +{{- if .Values.proxy.tolerations }} +{{ toYaml .Values.proxy.tolerations | indent 8 }} +{{- end }} + serviceAccountName: {{ template "pushProxy.proxy.name" . }} + {{- if .Values.global.imagePullSecretName }} + imagePullSecrets: + - name: {{ .Values.global.imagePullSecretName }} + {{- end }} + containers: + - name: pushprox-proxy + image: {{ template "system_default_registry" . }}{{ .Values.proxy.image.repository }}:{{ .Values.proxy.image.tag }} + command: + {{- range .Values.proxy.command }} + - {{ . | quote }} + {{- end }} + {{- if .Values.proxy.resources }} + resources: {{ toYaml .Values.proxy.resources | nindent 10 }} + {{- end }} +--- +apiVersion: v1 +kind: Service +metadata: + name: {{ template "pushProxy.proxy.name" . }} + namespace: {{ template "pushprox.namespace" . }} + labels: {{ include "pushProxy.proxy.labels" . | nindent 4 }} +spec: + ports: + - name: pp-proxy + port: {{ required "Need .Values.proxy.port to configure proxy" .Values.proxy.port }} + protocol: TCP + targetPort: {{ .Values.proxy.port }} + selector: {{ include "pushProxy.proxy.labels" . | nindent 4 }} +{{- end }}{{- end }} diff --git a/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/kubeAdmEtcd/templates/pushprox-servicemonitor.yaml b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/kubeAdmEtcd/templates/pushprox-servicemonitor.yaml new file mode 100644 index 0000000000..67eb2216b5 --- /dev/null +++ b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/kubeAdmEtcd/templates/pushprox-servicemonitor.yaml @@ -0,0 +1,45 @@ +{{- template "applyKubeVersionOverrides" . -}} +{{- if .Values.serviceMonitor }}{{- if .Values.serviceMonitor.enabled }} +apiVersion: monitoring.coreos.com/v1 +kind: ServiceMonitor +metadata: + name: {{ template "pushprox.serviceMonitor.name" . }} + namespace: {{ template "pushprox.namespace" . }} + labels: {{ include "pushProxy.serviceMonitor.labels" . | nindent 4 }} +spec: + endpoints: {{include "pushProxy.serviceMonitor.endpoints" . | nindent 4 }} + jobLabel: component + podTargetLabels: + - component + - pushprox-exporter + namespaceSelector: + matchNames: + - {{ template "pushprox.namespace" . }} + selector: + matchLabels: {{ include "pushProxy.client.labels" . | nindent 6 }} +--- +{{- $selector := "" }} +{{- if not (kindIs "invalid" .Values.service) }} +{{- if not (kindIs "invalid" .Values.service.selector) }} +{{ if .Values.service.selector }} +{{- if .Values.clients.enabled }} +{{- required (printf "Cannot override .Values.service.selector=%s when .Values.clients.enabled=true" (toJson .Values.service.selector)) "" }} +{{- end }} +{{- $selector = (toYaml .Values.service.selector) }} +{{- end }} +{{- end }} +{{- end }} +apiVersion: v1 +kind: Service +metadata: + name: {{ template "pushProxy.client.name" . }} + namespace: {{ template "pushprox.namespace" . }} + labels: {{ include "pushProxy.client.labels" . | nindent 4 }} +spec: + ports: + - name: metrics + port: {{ required "Need .Values.metricsPort to configure client to listen to metrics at port" .Values.metricsPort}} + protocol: TCP + targetPort: {{ .Values.metricsPort }} + selector: {{ default (include "pushProxy.client.labels" .) $selector | nindent 4 }} +{{- end }}{{- end }} diff --git a/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/kubeAdmEtcd/templates/validate-install-crd.yaml b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/kubeAdmEtcd/templates/validate-install-crd.yaml new file mode 100644 index 0000000000..16abc2fa83 --- /dev/null +++ b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/kubeAdmEtcd/templates/validate-install-crd.yaml @@ -0,0 +1,14 @@ +#{{- if gt (len (lookup "rbac.authorization.k8s.io/v1" "ClusterRole" "" "")) 0 -}} +# {{- $found := dict -}} +# {{- set $found "monitoring.coreos.com/v1/ServiceMonitor" false -}} +# {{- range .Capabilities.APIVersions -}} +# {{- if hasKey $found (toString .) -}} +# {{- set $found (toString .) true -}} +# {{- end -}} +# {{- end -}} +# {{- range $_, $exists := $found -}} +# {{- if (eq $exists false) -}} +# {{- required "Required CRDs are missing. Please install Prometheus Operator CRDs before installing this chart." "" -}} +# {{- end -}} +# {{- end -}} +#{{- end -}} diff --git a/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/kubeAdmEtcd/templates/validate-psp-install.yaml b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/kubeAdmEtcd/templates/validate-psp-install.yaml new file mode 100644 index 0000000000..a30c59d3b7 --- /dev/null +++ b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/kubeAdmEtcd/templates/validate-psp-install.yaml @@ -0,0 +1,7 @@ +#{{- if gt (len (lookup "rbac.authorization.k8s.io/v1" "ClusterRole" "" "")) 0 -}} +#{{- if .Values.global.cattle.psp.enabled }} +#{{- if not (.Capabilities.APIVersions.Has "policy/v1beta1/PodSecurityPolicy") }} +#{{- fail "The target cluster does not have the PodSecurityPolicy API resource. Please disable PSPs in this chart before proceeding." -}} +#{{- end }} +#{{- end }} +#{{- end }} diff --git a/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/kubeAdmEtcd/values.yaml b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/kubeAdmEtcd/values.yaml new file mode 100644 index 0000000000..1e076041b3 --- /dev/null +++ b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/kubeAdmEtcd/values.yaml @@ -0,0 +1,166 @@ +# Default values for rancher-pushprox. +# This is a YAML-formatted file. +# Declare variables to be passed into your templates. + +# Default image containing both the proxy and the client was generated from the following Dockerfile +# https://github.com/prometheus-community/PushProx/blob/eeadbe766641699129920ccfaaaa30a85c67fe81/Dockerfile#L1-L15 + +# Configuration + +global: + cattle: + psp: + enabled: false + systemDefaultRegistry: "" + seLinux: + enabled: false + +# A list of Semver constraint strings (defined by https://github.com/Masterminds/semver) and values.yaml overrides. +# +# For each key in kubeVersionOverrides, this chart will check to see if the current Kubernetes cluster's version matches +# any of the semver constraints provided as keys on the map. +# +# On seeing a match, the default value for each values.yaml field overridden will be updated with the new value. +# +# If multiple matches are encountered (due to overlapping semver ranges), the matches will be applied in order. +# +# Notes: +# - On running a helm template, Helm generally assumes the kubeVersion is v1.20.0 +# - On running a helm install --dry-run, the correct kubeVersion should be chosen. +kubeVersionOverrides: [] +# - constraint: "< 1.21" +# values: +# metricsPort: 10252 +# clients: +# https: +# enabled: false +# insecureSkipVerify: false +# useServiceAccountCredentials: false + +namespaceOverride: "" + +# The component that is being monitored (i.e. etcd) +component: "component" + +# The port containing the metrics that need to be scraped +metricsPort: 2739 + +# Configure ServiceMonitor that monitors metrics from the metricsPort endpoint +serviceMonitor: + enabled: true + # A list of endpoints that will be added to the ServiceMonitor based on the Endpoint spec + # Source: https://github.com/prometheus-operator/prometheus-operator/blob/master/Documentation/api.md#endpoint + # By default, proxyUrl and params._scheme will be overridden based on other values + endpoints: + - port: metrics + +# Configure Service that grabs scrape targets +service: + # The selector that is used to populate the Service's Endpoints object. + # The chart will error out on rendering templating if .Values.clients.enabled is set alongside this field, + # since it is expected that this service should point to the PushProx Clients Daemonset / Deployment + selector: {} + +clients: + enabled: true + # The port which the PushProx client will post PushProx metrics to + port: 9369 + # If unset, this will default to the URL for the proxy service: http://pushprox-{{component}}-proxy.{{namepsace}}.svc.cluster.local:{{proxy.port}} + # Should be modified if the clients are being deployed outside the cluster where the proxy rests, otherwise leave it null + proxyUrl: "" + # If set to true, the client will forward any requests from the host IP to 127.0.0.1 + # It will only allow proxy requests to the metricsPort specified + useLocalhost: false + # Configuration for accessing metrics via HTTPS + https: + # Does the client require https to access the metrics? + enabled: false + # Does the client require requests be sent to http or https? + forceHTTPSScheme: false + # If set to true, the client will create a service account with adequate permissions and set a flag + # on the client to use the service account token provided by it to make authorized scrape requests + useServiceAccountCredentials: false + # Configuration for authentication to metrics via https endpoint + authenticationMethod: + # Reads token from defined file in container + # This function is deprecated in the prometheus operator api and may be removed in a future version + bearerTokenFile: + enabled: false + bearerTokenFilePath: "/var/run/secrets/kubernetes.io/serviceaccount/token" + # Reads token from defined secret in namespace + # This function is deprecated in the prometheus operator api and may be removed in a future version + bearerTokenSecret: + enabled: false + # Reads token from defined secret in namespace + authorization: + enabled: false + type: "bearer" + credentials: + key: "token" + optional: false + # If set to true, the client will disable SSL security checks + insecureSkipVerify: false + # Directory on host where necessary TLS cert and key to scrape metrics can be found + certDir: "" + # Filenames for files located in .Values.clients.https.certDir that correspond to TLS settings + certFile: "" + keyFile: "" + caCertFile: "" + # seLinuxOptions to be passed into the container that copies certs. Should define a container with permissions to read the files in the certDir provided on the host. + # Required and only used if `clients.https.enabled` is set and `clients.https.certDir` is provided. + seLinuxOptions: {} + + metrics: + # Whether the client should publish PushProx client-specific metrics to .Values.clients.port + enabled: false + + rbac: + # Additional permissions to provide to the ServiceAccount bound to the client + # This can be used to provide additional permissions for the client to scrape metrics from the k8s API + # Only enabled if clients.https.enabled and clients.https.useServiceAccountCredentials are true + additionalRules: [] + + # Resource limits + resources: {} + + # Options to select all nodes to deploy client DaemonSet on + nodeSelector: {} + tolerations: [] + affinity: {} + + image: + repository: rancher/pushprox-client + tag: v0.1.3-rancher2-client + command: ["pushprox-client"] + + copyCertsImage: + repository: rancher/mirrored-library-busybox + tag: 1.31.1 + + # The default intention of rancher-pushprox clients is to scrape hostNetwork metrics across all nodes. + # This can be used to scrape internal Kubernetes components or DaemonSets of hostNetwork Pods in + # situations where a cloud provider firewall prevents Pod-To-Host communication but not Pod-To-Pod. + # However, if the underlying hostNetwork Pod that is being scraped is managed by a Deployment, + # this advanced option enables users to deploy the client as a Deployment instead of a DaemonSet. + # If a user deploys this feature and the underlying Deployment's number of replicas changes, the user will + # be responsible for upgrading this chart accordingly to the right number of replicas. + deployment: + enabled: false + replicas: 0 + +proxy: + enabled: true + # The port through which PushProx clients will communicate to the proxy + port: 8080 + + # Resource limits + resources: {} + + # Options to select a node to run a single proxy deployment on + nodeSelector: {} + tolerations: [] + + image: + repository: rancher/pushprox-proxy + tag: v0.1.3-rancher2-proxy + command: ["pushprox-proxy"] diff --git a/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/kubeAdmProxy/.helmignore b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/kubeAdmProxy/.helmignore new file mode 100644 index 0000000000..0e8a0eb36f --- /dev/null +++ b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/kubeAdmProxy/.helmignore @@ -0,0 +1,23 @@ +# Patterns to ignore when building packages. +# This supports shell glob matching, relative path matching, and +# negation (prefixed with !). Only one pattern per line. +.DS_Store +# Common VCS dirs +.git/ +.gitignore +.bzr/ +.bzrignore +.hg/ +.hgignore +.svn/ +# Common backup files +*.swp +*.bak +*.tmp +*.orig +*~ +# Various IDEs +.project +.idea/ +*.tmproj +.vscode/ diff --git a/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/kubeAdmProxy/Chart.yaml b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/kubeAdmProxy/Chart.yaml new file mode 100644 index 0000000000..a1222c4feb --- /dev/null +++ b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/kubeAdmProxy/Chart.yaml @@ -0,0 +1,15 @@ +annotations: + catalog.cattle.io/hidden: "true" + catalog.cattle.io/kube-version: '>= 1.26.0-0 < 1.31.0-0' + catalog.cattle.io/os: linux + catalog.rancher.io/certified: rancher + catalog.rancher.io/namespace: cattle-monitoring-system + catalog.rancher.io/release-name: rancher-pushprox +apiVersion: v1 +appVersion: 0.1.0 +description: Sets up a deployment of the PushProx proxy and a DaemonSet of PushProx + clients. +kubeVersion: '>=1.26.0-0' +name: kubeAdmProxy +type: application +version: 0.2.0 diff --git a/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/kubeAdmProxy/README.md b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/kubeAdmProxy/README.md new file mode 100644 index 0000000000..345002f48a --- /dev/null +++ b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/kubeAdmProxy/README.md @@ -0,0 +1,90 @@ +# rancher-pushprox + +A Rancher chart based on Rancher [PushProx](https://github.com/rancher/PushProx) that sets up a Deployment of a PushProx proxy and a DaemonSet of PushProx clients on a Kubernetes cluster. + +Installs [rancher-pushprox](https://github.com/rancher/charts/tree/gh-pages/packages/rancher-pushprox) to create PushProx clients that can access their host's network and register with a PushProx proxy. A [Prometheus Operator](https://github.com/coreos/prometheus-operator) ServiceMonitor CR is also included that is configured to scrape the metrics from each of the clients through the proxy. + +Using an instance of this chart is suitable for the following scenarios: +- You need to scrape metrics from a port that should not be accessible outside of the host (e.g. scraping `etcd` metrics in a hardened cluster) +- You need to scrape metrics on a host that are not exposed outside of 127.0.0.1 (e.g. scraping `kube-proxy` metrics) +- You need to scrape metrics through HTTPS using certs hosted directly on `hostPath` +- You need to scrape metrics from Kubernetes components that require authorization via a service account (e.g. permissions to make request to `/metrics`) +- You need to scrape metrics without access to cacerts (i.e. enable `insecureSkipVerify`) + +The clients and proxy are created based on a Rancher fork of the [prometheus-community/PushProx](https://github.com/prometheus-community/PushProx) project. + +## Upgrading to Kubernetes v1.25+ + +Starting in Kubernetes v1.25, [Pod Security Policies](https://kubernetes.io/docs/concepts/security/pod-security-policy/) have been removed from the Kubernetes API. + +As a result, **before upgrading to Kubernetes v1.25** (or on a fresh install in a Kubernetes v1.25+ cluster), users are expected to perform an in-place upgrade of this chart with `global.cattle.psp.enabled` set to `false` if it has been previously set to `true`. +​ +> **Note:** +> In this chart release, any previous field that was associated with any PSP resources have been removed in favor of a single global field: `global.cattle.psp.enabled`. + +> **Note:** +> If you upgrade your cluster to Kubernetes v1.25+ before removing PSPs via a `helm upgrade` (even if you manually clean up resources), **it will leave the Helm release in a broken state within the cluster such that further Helm operations will not work (`helm uninstall`, `helm upgrade`, etc.).** +> +> If your charts get stuck in this state, please consult the Rancher docs on how to clean up your Helm release secrets. + +Upon setting `global.cattle.psp.enabled` to false, the chart will remove any PSP resources deployed on its behalf from the cluster. This is the default setting for this chart. + +As a replacement for PSPs, [Pod Security Admission](https://kubernetes.io/docs/concepts/security/pod-security-admission/) should be used. Please consult the Rancher docs for more details on how to configure your chart release namespaces to work with the new Pod Security Admission and apply Pod Security Standards. + +## Configuration + +The following tables list the configurable parameters of the rancher-pushprox chart and their default values. + +### General + +#### Required +| Parameter | Description | Example | +| ----- | ----------- | ------ | +| `component` | The component that is being monitored | `kube-etcd` +| `metricsPort` | The port on the host that contains the metrics you want to scrape (e.g. `http://:/metrics`) | `2379` | +| `namespaceOverride` | The namespace to install the chart | `""` + +#### Optional +| Parameter | Description | Default | +| ----- | ----------- | ------ | +| `serviceMonitor.enabled` | Deploys a [Prometheus Operator](https://github.com/coreos/prometheus-operator/blob/master/Documentation/api.md#servicemonitor) ServiceMonitor CR that is configured to scrape metrics on the hosts that the clients are deployed on via the proxy. Also deploys a Service that points to all pods with the expected client name that exposes the `metricsPort` selected | `true` | +| `serviceMonitor.endpoints` | A list of endpoints that will be added to the ServiceMonitor based on the [Endpoint spec](https://github.com/prometheus-operator/prometheus-operator/blob/master/Documentation/api.md#endpoint) | `[{port: metrics}]` | +| `service.selector` | The selector that is used to populate the Service's Endpoints object. The chart will error out on rendering templating if `.Values.clients.enabled` is set alongside this field, since it is expected that this service should point to the PushProx Clients Daemonset / Deployment | `{}` | +| `clients.enabled` | Deploys a DaemonSet of clients that are each capable of scraping endpoints on the hostNetwork it is deployed on | `true` | +| `clients.port` | The port where the client will publish PushProx client-specific metrics. If deploying multiple clients onto the same node, the clients should not have conflicting ports | `9369` | +| `clients.proxyUrl` | Overrides the default proxyUrl setting of `http://pushprox-{{ .Values.component }}-proxy.{{ . Release.Namespace }}.svc.cluster.local:{{ .Values.proxy.port }}"` with the `proxyUrl` specified | `""` | +| `clients.useLocalhost` | Sets a flag on each client deployment to redirect scrapes directed to `HOST_IP` to `127.0.0.1` | `false` | +| `clients.https.enabled` | Enables scraping metrics via HTTPS using the provided TLS certs that exist on each host | `false` | +| `clients.https.forceHTTPSScheme` | Forces scraping metrics via HTTPS using the provided TLS certs that exist on each host | `false` | +| `clients.https.useServiceAccountCredentials` | If set to true, the client will create a service account with permissions to scrape `/metrics` endpoint of Kubernetes components. The client will use the service account token provided to make authorized scrape requests to the Kubernetes API | `false` | +| `clients.https.authenticationMethod.bearerTokenFile.enabled` | If set to true, the client will use service account credentials mounted at the configured path `clients.https.authenticationMethod.bearerTokenFile.bearerTokenFilePath`. This requires permissions to scrape `/metrics` endpoint of Kubernetes components. This method is deprecated by the prometheus operator and may be removed in a future release | `false` | +| `clients.https.authenticationMethod.bearerTokenFile.bearerTokenFilePath` | This is a volume mount on the pod with permissions to scrape `/metrics` endpoint of Kubernetes components | `"/var/run/secrets/kubernetes.io/serviceaccount/token"` | +| `clients.https.authenticationMethod.bearerTokenSecret.enabled` | If set to true, the client will use service account credentials to scrape `/metrics` endpoint of Kubernetes components. This method is deprecated by the prometheus operator and may be removed in a future release | `false` | +| `clients.https.authenticationMethod.authorization.enabled` | If set to true, the client will use service account credentials to scrape `/metrics` endpoint of Kubernetes components | `false` | +| `clients.https.authenticationMethod.authorization.type` | If set, the client will use this type of authorization in its client requests for metrics | `"bearer"` | +| `clients.https.authenticationMethod.authorization.credentials.key` | If set, the client will use this key in the secret created by `clients.https.useServiceAccountCredentials` for authorization in its client requests for metrics | `"token"` | +| `clients.https.authenticationMethod.authorization.credentials.optional` | If set to false, the client will fail if the key in the secret created by `clients.https.useServiceAccountCredentials` does not exist | `false` | +| `clients.https.insecureSkipVerify` | If set to true, the client will disable SSL security checks | `false` | +| `clients.https.certDir` | A `hostPath` where TLS certs can be found. This path is mounted as a volume on an `initContainer` which copies only the necessary files over to an EmptyDir volume used by each client. Required and only used if `clients.https.enabled` is set | `""` | +| `clients.https.certFile` | The path to the TLS cert file located within `clients.https.certDir`. Required and only used if `clients.https.enabled` is set | `""` | +| `clients.https.keyFile` | The path to the TLS key file located within `clients.https.certDir`. Required and only used if `clients.https.enabled` is set | `""` | +| `clients.https.caCertFile` | The path to the TLS cacert file located within `clients.https.certDir`. Required and only used if `clients.https.enabled` is set | `""` | +| `clients.https.seLinuxOptions` | seLinuxOptions to be passed into the container that copies certs. Should define a container with permissions to read the files in the certDir provided on the host. Required and only used if `clients.https.enabled` is set and `clients.https.certDir` is provided. | `""` | +| `clients.metrics.enabled` | Whether the client should publish PushProx client-specific metrics. | `false` | +| `clients.rbac.additionalRules` | Additional permissions to provide to the ServiceAccount bound to the client. This can be used to provide additional permissions for the client to scrape metrics from the k8s API. Only enabled if clients.https.enabled and clients.https.useServiceAccountCredentials are true | `[]` | +| `clients.deployment.enabled` | Deploys the client as a Deployment (generally used if the underlying hostNetwork Pod that is being scraped is managed by a Deployment) | `false` | +| `clients.deployment.replicas` | The number of pods the Deployment has, it should match the number of pod the hostNetwork Deployment has. Required and only used if `client.deployment.enable` is set | `0` | +| `clients.deployment.affinity` | The affinity rules that allocate the pod to the node in which the hostNetwork Deployment's pods run. Required and only used if `client.deployment.enable` is set | `{}` | +| `clients.resources` | Set resource limits and requests for the client container | `{}` | +| `clients.nodeSelector` | Select which nodes to deploy the clients on | `{}` | +| `clients.tolerations` | Specify tolerations for clients | `[]` | +| `proxy.enabled` | Deploys the proxy that each client will register with | `true` | +| `proxy.port` | The port exposed by the proxy that each client will register with to allow metrics to be scraped from the host | `8080` | +| `proxy.resources` | Set resource limits and requests for the proxy container | `{}` | +| `proxy.nodeSelector` | Select which nodes the proxy can be deployed on | `{}` | +| `proxy.tolerations` | Specify tolerations (if necessary) to allow the proxy to be deployed on the selected node | `[]` | +| `kubeVersionOverrides` | A list of Semver constraint strings (defined by https://github.com/Masterminds/semver) and values.yaml overrides. For each key in kubeVersionOverrides, this chart will check to see if the current Kubernetes cluster's version matches any of the semver constraints provided as keys on the map. On seeing a match, the default value for each values.yaml field overridden will be updated with the new value. If multiple matches are encountered (due to overlapping semver ranges), the matches will be applied in order. | `[]` + +*Tip: The filepaths set in `clients.https.File` can include wildcard characters*. + +See [rancher-monitoring](https://github.com/rancher/charts/tree/gh-pages/packages/rancher-monitoring) for examples of how this chart can be used. diff --git a/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/kubeAdmProxy/templates/_helpers.tpl b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/kubeAdmProxy/templates/_helpers.tpl new file mode 100644 index 0000000000..1ba5093944 --- /dev/null +++ b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/kubeAdmProxy/templates/_helpers.tpl @@ -0,0 +1,170 @@ +# Rancher + +{{- define "system_default_registry" -}} +{{- if .Values.global.cattle.systemDefaultRegistry -}} +{{- printf "%s/" .Values.global.cattle.systemDefaultRegistry -}} +{{- end -}} +{{- end -}} + +# Windows Support + +{{/* +Windows cluster will add default taint for linux nodes, +add below linux tolerations to workloads could be scheduled to those linux nodes +*/}} + +{{- define "linux-node-tolerations" -}} +- key: "cattle.io/os" + value: "linux" + effect: "NoSchedule" + operator: "Equal" +{{- end -}} + +{{- define "linux-node-selector" -}} +{{- if semverCompare "<1.14-0" .Capabilities.KubeVersion.GitVersion -}} +beta.kubernetes.io/os: linux +{{- else -}} +kubernetes.io/os: linux +{{- end -}} +{{- end -}} + +# General + +{{- define "applyKubeVersionOverrides" -}} +{{- $overrides := dict -}} +{{- range $override := .Values.kubeVersionOverrides -}} +{{- if semverCompare $override.constraint $.Capabilities.KubeVersion.Version -}} +{{- $_ := mergeOverwrite $overrides $override.values -}} +{{- end -}} +{{- end -}} +{{- $_ := mergeOverwrite .Values $overrides -}} +{{- end -}} + +{{- define "pushprox.namespace" -}} + {{- if .Values.namespaceOverride -}} + {{- .Values.namespaceOverride -}} + {{- else -}} + {{- .Release.Namespace -}} + {{- end -}} +{{- end -}} + +{{- define "pushProxy.commonLabels" -}} +release: {{ .Release.Name }} +component: {{ .Values.component | quote }} +provider: kubernetes +{{- end -}} + +{{- define "pushProxy.proxyUrl" -}} +{{- $_ := (required "Template requires either .Values.proxy.port or .Values.client.proxyUrl to set proxyUrl for client" (or .Values.clients.proxyUrl .Values.proxy.port)) -}} +{{- if .Values.clients.proxyUrl -}} +{{ printf "%s" .Values.clients.proxyUrl }} +{{- else -}} +{{ printf "http://%s.%s.svc:%d" (include "pushProxy.proxy.name" .) (include "pushprox.namespace" .) (int .Values.proxy.port) }} +{{- end -}}{{- end -}} + +# Client + +{{- define "pushProxy.client.name" -}} +{{- printf "pushprox-%s-client" (required ".Values.component is required" .Values.component) -}} +{{- end -}} + +{{- define "pushProxy.client.serviceAccountTokenName" -}} +{{- printf "pushprox-%s-client-service-account-token" (required ".Values.component is required" .Values.component) -}} +{{- end -}} + +{{- define "pushProxy.client.labels" -}} +k8s-app: {{ template "pushProxy.client.name" . }} +{{ template "pushProxy.commonLabels" . }} +{{- end -}} + +# Proxy + +{{- define "pushProxy.proxy.name" -}} +{{- printf "pushprox-%s-proxy" (required ".Values.component is required" .Values.component) -}} +{{- end -}} + +{{- define "pushProxy.proxy.labels" -}} +k8s-app: {{ template "pushProxy.proxy.name" . }} +{{ template "pushProxy.commonLabels" . }} +{{- end -}} + +# ServiceMonitor + +{{- define "pushprox.serviceMonitor.name" -}} +{{- printf "%s-%s" .Release.Name (required ".Values.component is required" .Values.component) -}} +{{- end -}} + +{{- define "pushProxy.serviceMonitor.labels" -}} +app: {{ template "pushprox.serviceMonitor.name" . }} +{{ template "pushProxy.commonLabels" . }} +{{- end -}} + +{{- define "pushProxy.serviceMonitor.endpoints" -}} +{{- $proxyURL := (include "pushProxy.proxyUrl" .) -}} +{{- $useHTTPS := .Values.clients.https.enabled -}} +{{- $setHTTPSScheme := .Values.clients.https.forceHTTPSScheme -}} +{{- $insecureSkipVerify := .Values.clients.https.insecureSkipVerify -}} +{{- $useServiceAccountCredentials := .Values.clients.https.useServiceAccountCredentials -}} +{{- $serviceAccountTokenName := (include "pushProxy.client.serviceAccountTokenName" . ) -}} +{{- $metricRelabelings := list }} +{{- $endpoints := .Values.serviceMonitor.endpoints }} +{{- if .Values.proxy.enabled }} +{{- $_ := set . "proxyUrl" $proxyURL }} +{{- end }} +{{- range $endpoints }} +{{- if $.Values.proxy.enabled }} +{{- $_ := set . "proxyUrl" $proxyURL }} +{{- end }} +{{- $clusterIdRelabel := dict }} +{{- $metricRelabelings := list }} +{{- if $.Values.global.cattle.clusterId }} +{{- $_ := set $clusterIdRelabel "action" "replace" }} +{{- $_ := set $clusterIdRelabel "sourceLabels" (list "__address__") }} +{{- $_ := set $clusterIdRelabel "targetLabel" "cluster_id" }} +{{- $_ := set $clusterIdRelabel "replacement" $.Values.global.cattle.clusterId }} +{{- $metricRelabelings = append $metricRelabelings $clusterIdRelabel }} +{{- end }} +{{- $clusterNameRelabel := dict }} +{{- if $.Values.global.cattle.clusterName }} +{{- $_ := set $clusterNameRelabel "action" "replace" }} +{{- $_ := set $clusterNameRelabel "sourceLabels" (list "__address__") }} +{{- $_ := set $clusterNameRelabel "targetLabel" "cluster_name" }} +{{- $_ := set $clusterNameRelabel "replacement" $.Values.global.cattle.clusterName }} +{{- $metricRelabelings = append $metricRelabelings $clusterNameRelabel }} +{{- end }} +{{- if not (empty $metricRelabelings) }} +{{- $_ := set . "metricRelabelings" ($metricRelabelings)}} +{{- end }} +{{- if $setHTTPSScheme -}} +{{- $_ := set . "scheme" "https" }} +{{- end -}} +{{- if $useHTTPS -}} +{{- if (hasKey . "params") }} +{{- $_ := set (get . "params") "_scheme" (list "https") }} +{{- else }} +{{- $_ := set . "params" (dict "_scheme" (list "https")) }} +{{- end }} +{{- end }} +{{- if (hasKey . "tlsConfig") }} +{{- $_ := set (get . "tlsConfig") "insecureSkipVerify" $insecureSkipVerify }} +{{- else }} +{{- $_ := set . "tlsConfig" (dict "insecureSkipVerify" $insecureSkipVerify) }} +{{- end }} +{{- if $.Values.clients.https.authenticationMethod.bearerTokenFile.enabled }} +{{- $_ := set . "bearerTokenFile" $.Values.clients.https.authenticationMethod.bearerTokenFile.bearerTokenFilePath }} +{{- end }} +{{- if $.Values.clients.https.authenticationMethod.bearerTokenSecret.enabled }} +{{- $_ := set . "bearerTokenSecret" $serviceAccountTokenName }} +{{- end }} +{{- if $.Values.clients.https.authenticationMethod.authorization.enabled }} +{{- if (hasKey . "authorization") }} +{{- $_ := set (get . "authorization") "type" $.Values.clients.https.authenticationMethod.authorization.type }} +{{- $_ := set (get . "authorization") "credentials" (dict "name" $serviceAccountTokenName "key" $.Values.clients.https.authenticationMethod.authorization.credentials.key "optional" $.Values.clients.https.authenticationMethod.authorization.credentials.optional) }} +{{- else }} +{{- $_ := set . "authorization" (dict "type" $.Values.clients.https.authenticationMethod.authorization.type) }} +{{- $_ := set . "authorization" (dict "credentials" (dict "name" $serviceAccountTokenName "key" $.Values.clients.https.authenticationMethod.authorization.credentials.key "optional" $.Values.clients.https.authenticationMethod.authorization.credentials.optional)) }} +{{- end }} +{{- end }} +{{- end }} +{{- toYaml $endpoints }} +{{- end -}} diff --git a/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/kubeAdmProxy/templates/pushprox-clients-rbac.yaml b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/kubeAdmProxy/templates/pushprox-clients-rbac.yaml new file mode 100644 index 0000000000..a8e27c3735 --- /dev/null +++ b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/kubeAdmProxy/templates/pushprox-clients-rbac.yaml @@ -0,0 +1,97 @@ +{{- template "applyKubeVersionOverrides" . -}} +{{- if .Values.clients }}{{- if .Values.clients.enabled }} +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRole +metadata: + name: {{ template "pushProxy.client.name" . }} + labels: {{ include "pushProxy.client.labels" . | nindent 4 }} +rules: +{{- if .Values.global.cattle.psp.enabled }} +- apiGroups: ['policy'] + resources: ['podsecuritypolicies'] + verbs: ['use'] + resourceNames: + - {{ template "pushProxy.client.name" . }} +{{- end }} +{{- if and .Values.clients.https.enabled .Values.clients.https.useServiceAccountCredentials }} +- nonResourceURLs: ["/metrics"] + verbs: ["get"] +{{- if .Values.clients.rbac.additionalRules }} +{{ toYaml .Values.clients.rbac.additionalRules }} +{{- end }} +{{- end }} +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRoleBinding +metadata: + name: {{ template "pushProxy.client.name" . }} + labels: {{ include "pushProxy.client.labels" . | nindent 4 }} +roleRef: + apiGroup: rbac.authorization.k8s.io + kind: ClusterRole + name: {{ template "pushProxy.client.name" . }} +subjects: + - kind: ServiceAccount + name: {{ template "pushProxy.client.name" . }} + namespace: {{ include "pushprox.namespace" . }} +--- +apiVersion: v1 +kind: ServiceAccount +metadata: + name: {{ template "pushProxy.client.name" . }} + namespace: {{ include "pushprox.namespace" . }} + labels: {{ include "pushProxy.client.labels" . | nindent 4 }} +--- +{{- if .Values.clients.https.useServiceAccountCredentials }} +apiVersion: v1 +kind: Secret +type: kubernetes.io/service-account-token +metadata: + name: {{ template "pushProxy.client.serviceAccountTokenName" . }} + namespace: {{ include "pushprox.namespace" . }} + labels: {{ include "pushProxy.client.labels" . | nindent 4 }} + annotations: + kubernetes.io/service-account.name: {{ template "pushProxy.client.name" . }} +{{- end }} +--- +{{- if .Values.global.cattle.psp.enabled }} +apiVersion: policy/v1beta1 +kind: PodSecurityPolicy +metadata: + name: {{ template "pushProxy.client.name" . }} + namespace: {{ include "pushprox.namespace" . }} + labels: {{ include "pushProxy.client.labels" . | nindent 4 }} +spec: + privileged: false + hostNetwork: true + hostIPC: false + hostPID: false + runAsUser: + rule: 'RunAsAny' + seLinux: + rule: 'RunAsAny' +{{- if and .Values.clients.https.enabled .Values.clients.https.certDir .Values.global.seLinux.enabled .Values.clients.https.seLinuxOptions }} + seLinuxOptions: {{ .Values.clients.https.seLinuxOptions | toYaml | nindent 6 }} +{{- end }} + supplementalGroups: + rule: 'MustRunAs' + ranges: + - min: 0 + max: 65535 + fsGroup: + rule: 'MustRunAs' + ranges: + - min: 0 + max: 65535 + readOnlyRootFilesystem: false + volumes: + - 'secret' +{{- if and .Values.clients.https.enabled .Values.clients.https.certDir }} + - 'emptyDir' + - 'hostPath' + allowedHostPaths: + - pathPrefix: {{ required "Need access to volume on host with the SSL cert files to use HTTPs" .Values.clients.https.certDir }} + readOnly: true +{{- end }} +{{- end }} +{{- end }}{{- end }} diff --git a/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/kubeAdmProxy/templates/pushprox-clients.yaml b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/kubeAdmProxy/templates/pushprox-clients.yaml new file mode 100644 index 0000000000..e8fcfb3883 --- /dev/null +++ b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/kubeAdmProxy/templates/pushprox-clients.yaml @@ -0,0 +1,157 @@ +{{- template "applyKubeVersionOverrides" . -}} +{{- if .Values.clients }}{{- if .Values.clients.enabled }} +apiVersion: apps/v1 +{{- if .Values.clients.deployment.enabled }} +kind: Deployment +{{- else }} +kind: DaemonSet +{{- end }} +metadata: + name: {{ template "pushProxy.client.name" . }} + namespace: {{ template "pushprox.namespace" . }} + labels: {{ include "pushProxy.client.labels" . | nindent 4 }} + pushprox-exporter: "client" +spec: + {{- if .Values.clients.deployment.enabled }} + replicas: {{ .Values.clients.deployment.replicas }} + {{- end }} + selector: + matchLabels: {{ include "pushProxy.client.labels" . | nindent 6 }} + template: + metadata: + labels: {{ include "pushProxy.client.labels" . | nindent 8 }} + spec: + {{- if .Values.clients.affinity }} + affinity: {{ toYaml .Values.clients.affinity | nindent 8 }} + {{- end }} + nodeSelector: {{ include "linux-node-selector" . | nindent 8 }} +{{- if .Values.clients.nodeSelector }} +{{ toYaml .Values.clients.nodeSelector | indent 8 }} +{{- end }} + tolerations: {{ include "linux-node-tolerations" . | nindent 8 }} +{{- if .Values.clients.tolerations }} +{{ toYaml .Values.clients.tolerations | indent 8 }} +{{- end }} + hostNetwork: true + dnsPolicy: ClusterFirstWithHostNet + serviceAccountName: {{ template "pushProxy.client.name" . }} + {{- if .Values.global.imagePullSecretName }} + imagePullSecrets: + - name: {{ .Values.global.imagePullSecretName }} + {{- end }} + containers: + - name: pushprox-client + image: {{ template "system_default_registry" . }}{{ .Values.clients.image.repository }}:{{ .Values.clients.image.tag }} + command: + {{- range .Values.clients.command }} + - {{ . | quote }} + {{- end }} + args: + - --fqdn=$(HOST_IP) + - --proxy-url=$(PROXY_URL) + {{- if .Values.clients.metrics.enabled }} + - --metrics-addr=$(PORT) + {{- end }} + - --allow-port={{ required "Need .Values.metricsPort to configure client to be allowed to scrape metrics at port" .Values.metricsPort}} + {{- if .Values.clients.useLocalhost }} + - --use-localhost + {{- end }} + {{- if .Values.clients.https.enabled }} + {{- if .Values.clients.https.insecureSkipVerify }} + - --insecure-skip-verify + {{- end }} + {{- if .Values.clients.https.useServiceAccountCredentials }} + - --token-path=/var/run/secrets/kubernetes.io/serviceaccount/token + {{- end }} + {{- if .Values.clients.https.certDir }} + - --tls.cert=/etc/ssl/push-proxy/push-proxy.pem + - --tls.key=/etc/ssl/push-proxy/push-proxy-key.pem + - --tls.cacert=/etc/ssl/push-proxy/push-proxy-ca-cert.pem + {{- end }} + {{- end }} + env: + - name: HOST_IP + valueFrom: + fieldRef: + fieldPath: status.hostIP + {{- if .Values.clients.metrics.enabled }} + - name: PORT + value: :{{ .Values.clients.port }} + {{- end }} + - name: PROXY_URL + value: {{ template "pushProxy.proxyUrl" . }} + securityContext: + runAsNonRoot: true + runAsUser: 1000 + {{- if and .Values.clients.https.enabled .Values.clients.https.certDir }} + volumeMounts: + - name: metrics-cert-dir + mountPath: /etc/ssl/push-proxy + {{- end }} + {{- if .Values.clients.resources }} + resources: {{ toYaml .Values.clients.resources | nindent 10 }} + {{- end }} + {{- if and .Values.clients.https.enabled .Values.clients.https.certDir }} + initContainers: + - name: copy-certs + image: {{ template "system_default_registry" . }}{{ .Values.clients.copyCertsImage.repository }}:{{ .Values.clients.copyCertsImage.tag }} + command: + - sh + - -c + - | + echo "Searching for files to copy within the source volume" + echo "cert: ${CERT_FILE_NAME}" + echo "key: ${KEY_FILE_NAME}" + echo "cacert: ${CACERT_FILE_NAME}" + + CERT_FILE_SOURCE=$(find /etc/source/ -type f -name "${CERT_FILE_NAME}" | sort -r | head -n 1) + KEY_FILE_SOURCE=$(find /etc/source/ -type f -name "${KEY_FILE_NAME}" | sort -r | head -n 1) + CACERT_FILE_SOURCE=$(find /etc/source/ -type f -name "${CACERT_FILE_NAME}" | sort -r | head -n 1) + + test -z ${CERT_FILE_SOURCE} && echo "Failed to find cert file" && exit 1 + test -z ${KEY_FILE_SOURCE} && echo "Failed to find key file" && exit 1 + test -z ${CACERT_FILE_SOURCE} && echo "Failed to find cacert file" && exit 1 + + echo "Copying cert file from $CERT_FILE_SOURCE to $CERT_FILE_TARGET" + cp $CERT_FILE_SOURCE $CERT_FILE_TARGET || exit 1 + chmod 444 $CERT_FILE_TARGET || exit 1 + + echo "Copying key file from $KEY_FILE_SOURCE to $KEY_FILE_TARGET" + cp $KEY_FILE_SOURCE $KEY_FILE_TARGET || exit 1 + chmod 444 $KEY_FILE_TARGET || exit 1 + + echo "Copying cacert file from $CACERT_FILE_SOURCE to $CACERT_FILE_TARGET" + cp $CACERT_FILE_SOURCE $CACERT_FILE_TARGET || exit 1 + chmod 444 $CACERT_FILE_TARGET || exit 1 + env: + - name: CERT_FILE_NAME + value: {{ required "Need a TLS cert file for scraping metrics endpoint over HTTPs" .Values.clients.https.certFile }} + - name: KEY_FILE_NAME + value: {{ required "Need a TLS key file for scraping metrics endpoint over HTTPs" .Values.clients.https.keyFile }} + - name: CACERT_FILE_NAME + value: {{ required "Need a TLS CA cert file for scraping metrics endpoint over HTTPs" .Values.clients.https.caCertFile }} + - name: CERT_FILE_TARGET + value: /etc/ssl/push-proxy/push-proxy.pem + - name: KEY_FILE_TARGET + value: /etc/ssl/push-proxy/push-proxy-key.pem + - name: CACERT_FILE_TARGET + value: /etc/ssl/push-proxy/push-proxy-ca-cert.pem + securityContext: + runAsNonRoot: false +{{- if and .Values.global.seLinux.enabled .Values.clients.https.seLinuxOptions }} + seLinuxOptions: {{ .Values.clients.https.seLinuxOptions | toYaml | nindent 12 }} +{{- end }} + volumeMounts: + - name: metrics-cert-dir-source + mountPath: /etc/source + readOnly: true + - name: metrics-cert-dir + mountPath: /etc/ssl/push-proxy + volumes: + - name: metrics-cert-dir-source + hostPath: + path: {{ required "Need access to volume on host with the SSL cert files to use HTTPs" .Values.clients.https.certDir }} + - name: metrics-cert-dir + emptyDir: {} + {{- end }} +{{- end }}{{- end }} diff --git a/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/kubeAdmProxy/templates/pushprox-proxy-rbac.yaml b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/kubeAdmProxy/templates/pushprox-proxy-rbac.yaml new file mode 100644 index 0000000000..eefe609058 --- /dev/null +++ b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/kubeAdmProxy/templates/pushprox-proxy-rbac.yaml @@ -0,0 +1,68 @@ +{{- template "applyKubeVersionOverrides" . -}} +{{- if and .Values.proxy }}{{ if .Values.proxy.enabled }} +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRole +metadata: + name: {{ template "pushProxy.proxy.name" . }} + labels: {{ include "pushProxy.proxy.labels" . | nindent 4 }} +rules: +{{- if .Values.global.cattle.psp.enabled }} +- apiGroups: ['policy'] + resources: ['podsecuritypolicies'] + verbs: ['use'] + resourceNames: + - {{ template "pushProxy.proxy.name" . }} +{{- end }} +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRoleBinding +metadata: + name: {{ template "pushProxy.proxy.name" . }} + labels: {{ include "pushProxy.proxy.labels" . | nindent 4 }} +roleRef: + apiGroup: rbac.authorization.k8s.io + kind: ClusterRole + name: {{ template "pushProxy.proxy.name" . }} +subjects: + - kind: ServiceAccount + name: {{ template "pushProxy.proxy.name" . }} + namespace: {{ include "pushprox.namespace" . }} +--- +apiVersion: v1 +kind: ServiceAccount +metadata: + name: {{ template "pushProxy.proxy.name" . }} + namespace: {{ include "pushprox.namespace" . }} + labels: {{ include "pushProxy.proxy.labels" . | nindent 4 }} +--- +{{- if .Values.global.cattle.psp.enabled }} +apiVersion: policy/v1beta1 +kind: PodSecurityPolicy +metadata: + name: {{ template "pushProxy.proxy.name" . }} + namespace: {{ include "pushprox.namespace" . }} + labels: {{ include "pushProxy.proxy.labels" . | nindent 4 }} +spec: + privileged: false + hostNetwork: false + hostIPC: false + hostPID: false + runAsUser: + rule: 'MustRunAsNonRoot' + seLinux: + rule: 'RunAsAny' + supplementalGroups: + rule: 'MustRunAs' + ranges: + - min: 1 + max: 65535 + fsGroup: + rule: 'MustRunAs' + ranges: + - min: 1 + max: 65535 + readOnlyRootFilesystem: false + volumes: + - 'secret' +{{- end }}{{- end }} +{{- end }} diff --git a/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/kubeAdmProxy/templates/pushprox-proxy.yaml b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/kubeAdmProxy/templates/pushprox-proxy.yaml new file mode 100644 index 0000000000..723bbd6c00 --- /dev/null +++ b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/kubeAdmProxy/templates/pushprox-proxy.yaml @@ -0,0 +1,57 @@ +{{- template "applyKubeVersionOverrides" . -}} +{{- if and .Values.proxy }}{{ if .Values.proxy.enabled }} +apiVersion: apps/v1 +kind: Deployment +metadata: + name: {{ template "pushProxy.proxy.name" . }} + namespace: {{ template "pushprox.namespace" . }} + labels: {{ include "pushProxy.proxy.labels" . | nindent 4 }} + pushprox-exporter: "proxy" +spec: + selector: + matchLabels: {{ include "pushProxy.proxy.labels" . | nindent 6 }} + template: + metadata: + labels: {{ include "pushProxy.proxy.labels" . | nindent 8 }} + spec: + securityContext: + runAsNonRoot: true + runAsUser: 1000 + nodeSelector: {{ include "linux-node-selector" . | nindent 8 }} +{{- if .Values.proxy.nodeSelector }} +{{ toYaml .Values.proxy.nodeSelector | indent 8 }} +{{- end }} + tolerations: {{ include "linux-node-tolerations" . | nindent 8 }} +{{- if .Values.proxy.tolerations }} +{{ toYaml .Values.proxy.tolerations | indent 8 }} +{{- end }} + serviceAccountName: {{ template "pushProxy.proxy.name" . }} + {{- if .Values.global.imagePullSecretName }} + imagePullSecrets: + - name: {{ .Values.global.imagePullSecretName }} + {{- end }} + containers: + - name: pushprox-proxy + image: {{ template "system_default_registry" . }}{{ .Values.proxy.image.repository }}:{{ .Values.proxy.image.tag }} + command: + {{- range .Values.proxy.command }} + - {{ . | quote }} + {{- end }} + {{- if .Values.proxy.resources }} + resources: {{ toYaml .Values.proxy.resources | nindent 10 }} + {{- end }} +--- +apiVersion: v1 +kind: Service +metadata: + name: {{ template "pushProxy.proxy.name" . }} + namespace: {{ template "pushprox.namespace" . }} + labels: {{ include "pushProxy.proxy.labels" . | nindent 4 }} +spec: + ports: + - name: pp-proxy + port: {{ required "Need .Values.proxy.port to configure proxy" .Values.proxy.port }} + protocol: TCP + targetPort: {{ .Values.proxy.port }} + selector: {{ include "pushProxy.proxy.labels" . | nindent 4 }} +{{- end }}{{- end }} diff --git a/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/kubeAdmProxy/templates/pushprox-servicemonitor.yaml b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/kubeAdmProxy/templates/pushprox-servicemonitor.yaml new file mode 100644 index 0000000000..67eb2216b5 --- /dev/null +++ b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/kubeAdmProxy/templates/pushprox-servicemonitor.yaml @@ -0,0 +1,45 @@ +{{- template "applyKubeVersionOverrides" . -}} +{{- if .Values.serviceMonitor }}{{- if .Values.serviceMonitor.enabled }} +apiVersion: monitoring.coreos.com/v1 +kind: ServiceMonitor +metadata: + name: {{ template "pushprox.serviceMonitor.name" . }} + namespace: {{ template "pushprox.namespace" . }} + labels: {{ include "pushProxy.serviceMonitor.labels" . | nindent 4 }} +spec: + endpoints: {{include "pushProxy.serviceMonitor.endpoints" . | nindent 4 }} + jobLabel: component + podTargetLabels: + - component + - pushprox-exporter + namespaceSelector: + matchNames: + - {{ template "pushprox.namespace" . }} + selector: + matchLabels: {{ include "pushProxy.client.labels" . | nindent 6 }} +--- +{{- $selector := "" }} +{{- if not (kindIs "invalid" .Values.service) }} +{{- if not (kindIs "invalid" .Values.service.selector) }} +{{ if .Values.service.selector }} +{{- if .Values.clients.enabled }} +{{- required (printf "Cannot override .Values.service.selector=%s when .Values.clients.enabled=true" (toJson .Values.service.selector)) "" }} +{{- end }} +{{- $selector = (toYaml .Values.service.selector) }} +{{- end }} +{{- end }} +{{- end }} +apiVersion: v1 +kind: Service +metadata: + name: {{ template "pushProxy.client.name" . }} + namespace: {{ template "pushprox.namespace" . }} + labels: {{ include "pushProxy.client.labels" . | nindent 4 }} +spec: + ports: + - name: metrics + port: {{ required "Need .Values.metricsPort to configure client to listen to metrics at port" .Values.metricsPort}} + protocol: TCP + targetPort: {{ .Values.metricsPort }} + selector: {{ default (include "pushProxy.client.labels" .) $selector | nindent 4 }} +{{- end }}{{- end }} diff --git a/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/kubeAdmProxy/templates/validate-install-crd.yaml b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/kubeAdmProxy/templates/validate-install-crd.yaml new file mode 100644 index 0000000000..16abc2fa83 --- /dev/null +++ b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/kubeAdmProxy/templates/validate-install-crd.yaml @@ -0,0 +1,14 @@ +#{{- if gt (len (lookup "rbac.authorization.k8s.io/v1" "ClusterRole" "" "")) 0 -}} +# {{- $found := dict -}} +# {{- set $found "monitoring.coreos.com/v1/ServiceMonitor" false -}} +# {{- range .Capabilities.APIVersions -}} +# {{- if hasKey $found (toString .) -}} +# {{- set $found (toString .) true -}} +# {{- end -}} +# {{- end -}} +# {{- range $_, $exists := $found -}} +# {{- if (eq $exists false) -}} +# {{- required "Required CRDs are missing. Please install Prometheus Operator CRDs before installing this chart." "" -}} +# {{- end -}} +# {{- end -}} +#{{- end -}} diff --git a/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/kubeAdmProxy/templates/validate-psp-install.yaml b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/kubeAdmProxy/templates/validate-psp-install.yaml new file mode 100644 index 0000000000..a30c59d3b7 --- /dev/null +++ b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/kubeAdmProxy/templates/validate-psp-install.yaml @@ -0,0 +1,7 @@ +#{{- if gt (len (lookup "rbac.authorization.k8s.io/v1" "ClusterRole" "" "")) 0 -}} +#{{- if .Values.global.cattle.psp.enabled }} +#{{- if not (.Capabilities.APIVersions.Has "policy/v1beta1/PodSecurityPolicy") }} +#{{- fail "The target cluster does not have the PodSecurityPolicy API resource. Please disable PSPs in this chart before proceeding." -}} +#{{- end }} +#{{- end }} +#{{- end }} diff --git a/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/kubeAdmProxy/values.yaml b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/kubeAdmProxy/values.yaml new file mode 100644 index 0000000000..1e076041b3 --- /dev/null +++ b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/kubeAdmProxy/values.yaml @@ -0,0 +1,166 @@ +# Default values for rancher-pushprox. +# This is a YAML-formatted file. +# Declare variables to be passed into your templates. + +# Default image containing both the proxy and the client was generated from the following Dockerfile +# https://github.com/prometheus-community/PushProx/blob/eeadbe766641699129920ccfaaaa30a85c67fe81/Dockerfile#L1-L15 + +# Configuration + +global: + cattle: + psp: + enabled: false + systemDefaultRegistry: "" + seLinux: + enabled: false + +# A list of Semver constraint strings (defined by https://github.com/Masterminds/semver) and values.yaml overrides. +# +# For each key in kubeVersionOverrides, this chart will check to see if the current Kubernetes cluster's version matches +# any of the semver constraints provided as keys on the map. +# +# On seeing a match, the default value for each values.yaml field overridden will be updated with the new value. +# +# If multiple matches are encountered (due to overlapping semver ranges), the matches will be applied in order. +# +# Notes: +# - On running a helm template, Helm generally assumes the kubeVersion is v1.20.0 +# - On running a helm install --dry-run, the correct kubeVersion should be chosen. +kubeVersionOverrides: [] +# - constraint: "< 1.21" +# values: +# metricsPort: 10252 +# clients: +# https: +# enabled: false +# insecureSkipVerify: false +# useServiceAccountCredentials: false + +namespaceOverride: "" + +# The component that is being monitored (i.e. etcd) +component: "component" + +# The port containing the metrics that need to be scraped +metricsPort: 2739 + +# Configure ServiceMonitor that monitors metrics from the metricsPort endpoint +serviceMonitor: + enabled: true + # A list of endpoints that will be added to the ServiceMonitor based on the Endpoint spec + # Source: https://github.com/prometheus-operator/prometheus-operator/blob/master/Documentation/api.md#endpoint + # By default, proxyUrl and params._scheme will be overridden based on other values + endpoints: + - port: metrics + +# Configure Service that grabs scrape targets +service: + # The selector that is used to populate the Service's Endpoints object. + # The chart will error out on rendering templating if .Values.clients.enabled is set alongside this field, + # since it is expected that this service should point to the PushProx Clients Daemonset / Deployment + selector: {} + +clients: + enabled: true + # The port which the PushProx client will post PushProx metrics to + port: 9369 + # If unset, this will default to the URL for the proxy service: http://pushprox-{{component}}-proxy.{{namepsace}}.svc.cluster.local:{{proxy.port}} + # Should be modified if the clients are being deployed outside the cluster where the proxy rests, otherwise leave it null + proxyUrl: "" + # If set to true, the client will forward any requests from the host IP to 127.0.0.1 + # It will only allow proxy requests to the metricsPort specified + useLocalhost: false + # Configuration for accessing metrics via HTTPS + https: + # Does the client require https to access the metrics? + enabled: false + # Does the client require requests be sent to http or https? + forceHTTPSScheme: false + # If set to true, the client will create a service account with adequate permissions and set a flag + # on the client to use the service account token provided by it to make authorized scrape requests + useServiceAccountCredentials: false + # Configuration for authentication to metrics via https endpoint + authenticationMethod: + # Reads token from defined file in container + # This function is deprecated in the prometheus operator api and may be removed in a future version + bearerTokenFile: + enabled: false + bearerTokenFilePath: "/var/run/secrets/kubernetes.io/serviceaccount/token" + # Reads token from defined secret in namespace + # This function is deprecated in the prometheus operator api and may be removed in a future version + bearerTokenSecret: + enabled: false + # Reads token from defined secret in namespace + authorization: + enabled: false + type: "bearer" + credentials: + key: "token" + optional: false + # If set to true, the client will disable SSL security checks + insecureSkipVerify: false + # Directory on host where necessary TLS cert and key to scrape metrics can be found + certDir: "" + # Filenames for files located in .Values.clients.https.certDir that correspond to TLS settings + certFile: "" + keyFile: "" + caCertFile: "" + # seLinuxOptions to be passed into the container that copies certs. Should define a container with permissions to read the files in the certDir provided on the host. + # Required and only used if `clients.https.enabled` is set and `clients.https.certDir` is provided. + seLinuxOptions: {} + + metrics: + # Whether the client should publish PushProx client-specific metrics to .Values.clients.port + enabled: false + + rbac: + # Additional permissions to provide to the ServiceAccount bound to the client + # This can be used to provide additional permissions for the client to scrape metrics from the k8s API + # Only enabled if clients.https.enabled and clients.https.useServiceAccountCredentials are true + additionalRules: [] + + # Resource limits + resources: {} + + # Options to select all nodes to deploy client DaemonSet on + nodeSelector: {} + tolerations: [] + affinity: {} + + image: + repository: rancher/pushprox-client + tag: v0.1.3-rancher2-client + command: ["pushprox-client"] + + copyCertsImage: + repository: rancher/mirrored-library-busybox + tag: 1.31.1 + + # The default intention of rancher-pushprox clients is to scrape hostNetwork metrics across all nodes. + # This can be used to scrape internal Kubernetes components or DaemonSets of hostNetwork Pods in + # situations where a cloud provider firewall prevents Pod-To-Host communication but not Pod-To-Pod. + # However, if the underlying hostNetwork Pod that is being scraped is managed by a Deployment, + # this advanced option enables users to deploy the client as a Deployment instead of a DaemonSet. + # If a user deploys this feature and the underlying Deployment's number of replicas changes, the user will + # be responsible for upgrading this chart accordingly to the right number of replicas. + deployment: + enabled: false + replicas: 0 + +proxy: + enabled: true + # The port through which PushProx clients will communicate to the proxy + port: 8080 + + # Resource limits + resources: {} + + # Options to select a node to run a single proxy deployment on + nodeSelector: {} + tolerations: [] + + image: + repository: rancher/pushprox-proxy + tag: v0.1.3-rancher2-proxy + command: ["pushprox-proxy"] diff --git a/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/kubeAdmScheduler/.helmignore b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/kubeAdmScheduler/.helmignore new file mode 100644 index 0000000000..0e8a0eb36f --- /dev/null +++ b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/kubeAdmScheduler/.helmignore @@ -0,0 +1,23 @@ +# Patterns to ignore when building packages. +# This supports shell glob matching, relative path matching, and +# negation (prefixed with !). Only one pattern per line. +.DS_Store +# Common VCS dirs +.git/ +.gitignore +.bzr/ +.bzrignore +.hg/ +.hgignore +.svn/ +# Common backup files +*.swp +*.bak +*.tmp +*.orig +*~ +# Various IDEs +.project +.idea/ +*.tmproj +.vscode/ diff --git a/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/kubeAdmScheduler/Chart.yaml b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/kubeAdmScheduler/Chart.yaml new file mode 100644 index 0000000000..78a44159a9 --- /dev/null +++ b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/kubeAdmScheduler/Chart.yaml @@ -0,0 +1,15 @@ +annotations: + catalog.cattle.io/hidden: "true" + catalog.cattle.io/kube-version: '>= 1.26.0-0 < 1.31.0-0' + catalog.cattle.io/os: linux + catalog.rancher.io/certified: rancher + catalog.rancher.io/namespace: cattle-monitoring-system + catalog.rancher.io/release-name: rancher-pushprox +apiVersion: v1 +appVersion: 0.1.0 +description: Sets up a deployment of the PushProx proxy and a DaemonSet of PushProx + clients. +kubeVersion: '>=1.26.0-0' +name: kubeAdmScheduler +type: application +version: 0.2.0 diff --git a/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/kubeAdmScheduler/README.md b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/kubeAdmScheduler/README.md new file mode 100644 index 0000000000..345002f48a --- /dev/null +++ b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/kubeAdmScheduler/README.md @@ -0,0 +1,90 @@ +# rancher-pushprox + +A Rancher chart based on Rancher [PushProx](https://github.com/rancher/PushProx) that sets up a Deployment of a PushProx proxy and a DaemonSet of PushProx clients on a Kubernetes cluster. + +Installs [rancher-pushprox](https://github.com/rancher/charts/tree/gh-pages/packages/rancher-pushprox) to create PushProx clients that can access their host's network and register with a PushProx proxy. A [Prometheus Operator](https://github.com/coreos/prometheus-operator) ServiceMonitor CR is also included that is configured to scrape the metrics from each of the clients through the proxy. + +Using an instance of this chart is suitable for the following scenarios: +- You need to scrape metrics from a port that should not be accessible outside of the host (e.g. scraping `etcd` metrics in a hardened cluster) +- You need to scrape metrics on a host that are not exposed outside of 127.0.0.1 (e.g. scraping `kube-proxy` metrics) +- You need to scrape metrics through HTTPS using certs hosted directly on `hostPath` +- You need to scrape metrics from Kubernetes components that require authorization via a service account (e.g. permissions to make request to `/metrics`) +- You need to scrape metrics without access to cacerts (i.e. enable `insecureSkipVerify`) + +The clients and proxy are created based on a Rancher fork of the [prometheus-community/PushProx](https://github.com/prometheus-community/PushProx) project. + +## Upgrading to Kubernetes v1.25+ + +Starting in Kubernetes v1.25, [Pod Security Policies](https://kubernetes.io/docs/concepts/security/pod-security-policy/) have been removed from the Kubernetes API. + +As a result, **before upgrading to Kubernetes v1.25** (or on a fresh install in a Kubernetes v1.25+ cluster), users are expected to perform an in-place upgrade of this chart with `global.cattle.psp.enabled` set to `false` if it has been previously set to `true`. +​ +> **Note:** +> In this chart release, any previous field that was associated with any PSP resources have been removed in favor of a single global field: `global.cattle.psp.enabled`. + +> **Note:** +> If you upgrade your cluster to Kubernetes v1.25+ before removing PSPs via a `helm upgrade` (even if you manually clean up resources), **it will leave the Helm release in a broken state within the cluster such that further Helm operations will not work (`helm uninstall`, `helm upgrade`, etc.).** +> +> If your charts get stuck in this state, please consult the Rancher docs on how to clean up your Helm release secrets. + +Upon setting `global.cattle.psp.enabled` to false, the chart will remove any PSP resources deployed on its behalf from the cluster. This is the default setting for this chart. + +As a replacement for PSPs, [Pod Security Admission](https://kubernetes.io/docs/concepts/security/pod-security-admission/) should be used. Please consult the Rancher docs for more details on how to configure your chart release namespaces to work with the new Pod Security Admission and apply Pod Security Standards. + +## Configuration + +The following tables list the configurable parameters of the rancher-pushprox chart and their default values. + +### General + +#### Required +| Parameter | Description | Example | +| ----- | ----------- | ------ | +| `component` | The component that is being monitored | `kube-etcd` +| `metricsPort` | The port on the host that contains the metrics you want to scrape (e.g. `http://:/metrics`) | `2379` | +| `namespaceOverride` | The namespace to install the chart | `""` + +#### Optional +| Parameter | Description | Default | +| ----- | ----------- | ------ | +| `serviceMonitor.enabled` | Deploys a [Prometheus Operator](https://github.com/coreos/prometheus-operator/blob/master/Documentation/api.md#servicemonitor) ServiceMonitor CR that is configured to scrape metrics on the hosts that the clients are deployed on via the proxy. Also deploys a Service that points to all pods with the expected client name that exposes the `metricsPort` selected | `true` | +| `serviceMonitor.endpoints` | A list of endpoints that will be added to the ServiceMonitor based on the [Endpoint spec](https://github.com/prometheus-operator/prometheus-operator/blob/master/Documentation/api.md#endpoint) | `[{port: metrics}]` | +| `service.selector` | The selector that is used to populate the Service's Endpoints object. The chart will error out on rendering templating if `.Values.clients.enabled` is set alongside this field, since it is expected that this service should point to the PushProx Clients Daemonset / Deployment | `{}` | +| `clients.enabled` | Deploys a DaemonSet of clients that are each capable of scraping endpoints on the hostNetwork it is deployed on | `true` | +| `clients.port` | The port where the client will publish PushProx client-specific metrics. If deploying multiple clients onto the same node, the clients should not have conflicting ports | `9369` | +| `clients.proxyUrl` | Overrides the default proxyUrl setting of `http://pushprox-{{ .Values.component }}-proxy.{{ . Release.Namespace }}.svc.cluster.local:{{ .Values.proxy.port }}"` with the `proxyUrl` specified | `""` | +| `clients.useLocalhost` | Sets a flag on each client deployment to redirect scrapes directed to `HOST_IP` to `127.0.0.1` | `false` | +| `clients.https.enabled` | Enables scraping metrics via HTTPS using the provided TLS certs that exist on each host | `false` | +| `clients.https.forceHTTPSScheme` | Forces scraping metrics via HTTPS using the provided TLS certs that exist on each host | `false` | +| `clients.https.useServiceAccountCredentials` | If set to true, the client will create a service account with permissions to scrape `/metrics` endpoint of Kubernetes components. The client will use the service account token provided to make authorized scrape requests to the Kubernetes API | `false` | +| `clients.https.authenticationMethod.bearerTokenFile.enabled` | If set to true, the client will use service account credentials mounted at the configured path `clients.https.authenticationMethod.bearerTokenFile.bearerTokenFilePath`. This requires permissions to scrape `/metrics` endpoint of Kubernetes components. This method is deprecated by the prometheus operator and may be removed in a future release | `false` | +| `clients.https.authenticationMethod.bearerTokenFile.bearerTokenFilePath` | This is a volume mount on the pod with permissions to scrape `/metrics` endpoint of Kubernetes components | `"/var/run/secrets/kubernetes.io/serviceaccount/token"` | +| `clients.https.authenticationMethod.bearerTokenSecret.enabled` | If set to true, the client will use service account credentials to scrape `/metrics` endpoint of Kubernetes components. This method is deprecated by the prometheus operator and may be removed in a future release | `false` | +| `clients.https.authenticationMethod.authorization.enabled` | If set to true, the client will use service account credentials to scrape `/metrics` endpoint of Kubernetes components | `false` | +| `clients.https.authenticationMethod.authorization.type` | If set, the client will use this type of authorization in its client requests for metrics | `"bearer"` | +| `clients.https.authenticationMethod.authorization.credentials.key` | If set, the client will use this key in the secret created by `clients.https.useServiceAccountCredentials` for authorization in its client requests for metrics | `"token"` | +| `clients.https.authenticationMethod.authorization.credentials.optional` | If set to false, the client will fail if the key in the secret created by `clients.https.useServiceAccountCredentials` does not exist | `false` | +| `clients.https.insecureSkipVerify` | If set to true, the client will disable SSL security checks | `false` | +| `clients.https.certDir` | A `hostPath` where TLS certs can be found. This path is mounted as a volume on an `initContainer` which copies only the necessary files over to an EmptyDir volume used by each client. Required and only used if `clients.https.enabled` is set | `""` | +| `clients.https.certFile` | The path to the TLS cert file located within `clients.https.certDir`. Required and only used if `clients.https.enabled` is set | `""` | +| `clients.https.keyFile` | The path to the TLS key file located within `clients.https.certDir`. Required and only used if `clients.https.enabled` is set | `""` | +| `clients.https.caCertFile` | The path to the TLS cacert file located within `clients.https.certDir`. Required and only used if `clients.https.enabled` is set | `""` | +| `clients.https.seLinuxOptions` | seLinuxOptions to be passed into the container that copies certs. Should define a container with permissions to read the files in the certDir provided on the host. Required and only used if `clients.https.enabled` is set and `clients.https.certDir` is provided. | `""` | +| `clients.metrics.enabled` | Whether the client should publish PushProx client-specific metrics. | `false` | +| `clients.rbac.additionalRules` | Additional permissions to provide to the ServiceAccount bound to the client. This can be used to provide additional permissions for the client to scrape metrics from the k8s API. Only enabled if clients.https.enabled and clients.https.useServiceAccountCredentials are true | `[]` | +| `clients.deployment.enabled` | Deploys the client as a Deployment (generally used if the underlying hostNetwork Pod that is being scraped is managed by a Deployment) | `false` | +| `clients.deployment.replicas` | The number of pods the Deployment has, it should match the number of pod the hostNetwork Deployment has. Required and only used if `client.deployment.enable` is set | `0` | +| `clients.deployment.affinity` | The affinity rules that allocate the pod to the node in which the hostNetwork Deployment's pods run. Required and only used if `client.deployment.enable` is set | `{}` | +| `clients.resources` | Set resource limits and requests for the client container | `{}` | +| `clients.nodeSelector` | Select which nodes to deploy the clients on | `{}` | +| `clients.tolerations` | Specify tolerations for clients | `[]` | +| `proxy.enabled` | Deploys the proxy that each client will register with | `true` | +| `proxy.port` | The port exposed by the proxy that each client will register with to allow metrics to be scraped from the host | `8080` | +| `proxy.resources` | Set resource limits and requests for the proxy container | `{}` | +| `proxy.nodeSelector` | Select which nodes the proxy can be deployed on | `{}` | +| `proxy.tolerations` | Specify tolerations (if necessary) to allow the proxy to be deployed on the selected node | `[]` | +| `kubeVersionOverrides` | A list of Semver constraint strings (defined by https://github.com/Masterminds/semver) and values.yaml overrides. For each key in kubeVersionOverrides, this chart will check to see if the current Kubernetes cluster's version matches any of the semver constraints provided as keys on the map. On seeing a match, the default value for each values.yaml field overridden will be updated with the new value. If multiple matches are encountered (due to overlapping semver ranges), the matches will be applied in order. | `[]` + +*Tip: The filepaths set in `clients.https.File` can include wildcard characters*. + +See [rancher-monitoring](https://github.com/rancher/charts/tree/gh-pages/packages/rancher-monitoring) for examples of how this chart can be used. diff --git a/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/kubeAdmScheduler/templates/_helpers.tpl b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/kubeAdmScheduler/templates/_helpers.tpl new file mode 100644 index 0000000000..1ba5093944 --- /dev/null +++ b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/kubeAdmScheduler/templates/_helpers.tpl @@ -0,0 +1,170 @@ +# Rancher + +{{- define "system_default_registry" -}} +{{- if .Values.global.cattle.systemDefaultRegistry -}} +{{- printf "%s/" .Values.global.cattle.systemDefaultRegistry -}} +{{- end -}} +{{- end -}} + +# Windows Support + +{{/* +Windows cluster will add default taint for linux nodes, +add below linux tolerations to workloads could be scheduled to those linux nodes +*/}} + +{{- define "linux-node-tolerations" -}} +- key: "cattle.io/os" + value: "linux" + effect: "NoSchedule" + operator: "Equal" +{{- end -}} + +{{- define "linux-node-selector" -}} +{{- if semverCompare "<1.14-0" .Capabilities.KubeVersion.GitVersion -}} +beta.kubernetes.io/os: linux +{{- else -}} +kubernetes.io/os: linux +{{- end -}} +{{- end -}} + +# General + +{{- define "applyKubeVersionOverrides" -}} +{{- $overrides := dict -}} +{{- range $override := .Values.kubeVersionOverrides -}} +{{- if semverCompare $override.constraint $.Capabilities.KubeVersion.Version -}} +{{- $_ := mergeOverwrite $overrides $override.values -}} +{{- end -}} +{{- end -}} +{{- $_ := mergeOverwrite .Values $overrides -}} +{{- end -}} + +{{- define "pushprox.namespace" -}} + {{- if .Values.namespaceOverride -}} + {{- .Values.namespaceOverride -}} + {{- else -}} + {{- .Release.Namespace -}} + {{- end -}} +{{- end -}} + +{{- define "pushProxy.commonLabels" -}} +release: {{ .Release.Name }} +component: {{ .Values.component | quote }} +provider: kubernetes +{{- end -}} + +{{- define "pushProxy.proxyUrl" -}} +{{- $_ := (required "Template requires either .Values.proxy.port or .Values.client.proxyUrl to set proxyUrl for client" (or .Values.clients.proxyUrl .Values.proxy.port)) -}} +{{- if .Values.clients.proxyUrl -}} +{{ printf "%s" .Values.clients.proxyUrl }} +{{- else -}} +{{ printf "http://%s.%s.svc:%d" (include "pushProxy.proxy.name" .) (include "pushprox.namespace" .) (int .Values.proxy.port) }} +{{- end -}}{{- end -}} + +# Client + +{{- define "pushProxy.client.name" -}} +{{- printf "pushprox-%s-client" (required ".Values.component is required" .Values.component) -}} +{{- end -}} + +{{- define "pushProxy.client.serviceAccountTokenName" -}} +{{- printf "pushprox-%s-client-service-account-token" (required ".Values.component is required" .Values.component) -}} +{{- end -}} + +{{- define "pushProxy.client.labels" -}} +k8s-app: {{ template "pushProxy.client.name" . }} +{{ template "pushProxy.commonLabels" . }} +{{- end -}} + +# Proxy + +{{- define "pushProxy.proxy.name" -}} +{{- printf "pushprox-%s-proxy" (required ".Values.component is required" .Values.component) -}} +{{- end -}} + +{{- define "pushProxy.proxy.labels" -}} +k8s-app: {{ template "pushProxy.proxy.name" . }} +{{ template "pushProxy.commonLabels" . }} +{{- end -}} + +# ServiceMonitor + +{{- define "pushprox.serviceMonitor.name" -}} +{{- printf "%s-%s" .Release.Name (required ".Values.component is required" .Values.component) -}} +{{- end -}} + +{{- define "pushProxy.serviceMonitor.labels" -}} +app: {{ template "pushprox.serviceMonitor.name" . }} +{{ template "pushProxy.commonLabels" . }} +{{- end -}} + +{{- define "pushProxy.serviceMonitor.endpoints" -}} +{{- $proxyURL := (include "pushProxy.proxyUrl" .) -}} +{{- $useHTTPS := .Values.clients.https.enabled -}} +{{- $setHTTPSScheme := .Values.clients.https.forceHTTPSScheme -}} +{{- $insecureSkipVerify := .Values.clients.https.insecureSkipVerify -}} +{{- $useServiceAccountCredentials := .Values.clients.https.useServiceAccountCredentials -}} +{{- $serviceAccountTokenName := (include "pushProxy.client.serviceAccountTokenName" . ) -}} +{{- $metricRelabelings := list }} +{{- $endpoints := .Values.serviceMonitor.endpoints }} +{{- if .Values.proxy.enabled }} +{{- $_ := set . "proxyUrl" $proxyURL }} +{{- end }} +{{- range $endpoints }} +{{- if $.Values.proxy.enabled }} +{{- $_ := set . "proxyUrl" $proxyURL }} +{{- end }} +{{- $clusterIdRelabel := dict }} +{{- $metricRelabelings := list }} +{{- if $.Values.global.cattle.clusterId }} +{{- $_ := set $clusterIdRelabel "action" "replace" }} +{{- $_ := set $clusterIdRelabel "sourceLabels" (list "__address__") }} +{{- $_ := set $clusterIdRelabel "targetLabel" "cluster_id" }} +{{- $_ := set $clusterIdRelabel "replacement" $.Values.global.cattle.clusterId }} +{{- $metricRelabelings = append $metricRelabelings $clusterIdRelabel }} +{{- end }} +{{- $clusterNameRelabel := dict }} +{{- if $.Values.global.cattle.clusterName }} +{{- $_ := set $clusterNameRelabel "action" "replace" }} +{{- $_ := set $clusterNameRelabel "sourceLabels" (list "__address__") }} +{{- $_ := set $clusterNameRelabel "targetLabel" "cluster_name" }} +{{- $_ := set $clusterNameRelabel "replacement" $.Values.global.cattle.clusterName }} +{{- $metricRelabelings = append $metricRelabelings $clusterNameRelabel }} +{{- end }} +{{- if not (empty $metricRelabelings) }} +{{- $_ := set . "metricRelabelings" ($metricRelabelings)}} +{{- end }} +{{- if $setHTTPSScheme -}} +{{- $_ := set . "scheme" "https" }} +{{- end -}} +{{- if $useHTTPS -}} +{{- if (hasKey . "params") }} +{{- $_ := set (get . "params") "_scheme" (list "https") }} +{{- else }} +{{- $_ := set . "params" (dict "_scheme" (list "https")) }} +{{- end }} +{{- end }} +{{- if (hasKey . "tlsConfig") }} +{{- $_ := set (get . "tlsConfig") "insecureSkipVerify" $insecureSkipVerify }} +{{- else }} +{{- $_ := set . "tlsConfig" (dict "insecureSkipVerify" $insecureSkipVerify) }} +{{- end }} +{{- if $.Values.clients.https.authenticationMethod.bearerTokenFile.enabled }} +{{- $_ := set . "bearerTokenFile" $.Values.clients.https.authenticationMethod.bearerTokenFile.bearerTokenFilePath }} +{{- end }} +{{- if $.Values.clients.https.authenticationMethod.bearerTokenSecret.enabled }} +{{- $_ := set . "bearerTokenSecret" $serviceAccountTokenName }} +{{- end }} +{{- if $.Values.clients.https.authenticationMethod.authorization.enabled }} +{{- if (hasKey . "authorization") }} +{{- $_ := set (get . "authorization") "type" $.Values.clients.https.authenticationMethod.authorization.type }} +{{- $_ := set (get . "authorization") "credentials" (dict "name" $serviceAccountTokenName "key" $.Values.clients.https.authenticationMethod.authorization.credentials.key "optional" $.Values.clients.https.authenticationMethod.authorization.credentials.optional) }} +{{- else }} +{{- $_ := set . "authorization" (dict "type" $.Values.clients.https.authenticationMethod.authorization.type) }} +{{- $_ := set . "authorization" (dict "credentials" (dict "name" $serviceAccountTokenName "key" $.Values.clients.https.authenticationMethod.authorization.credentials.key "optional" $.Values.clients.https.authenticationMethod.authorization.credentials.optional)) }} +{{- end }} +{{- end }} +{{- end }} +{{- toYaml $endpoints }} +{{- end -}} diff --git a/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/kubeAdmScheduler/templates/pushprox-clients-rbac.yaml b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/kubeAdmScheduler/templates/pushprox-clients-rbac.yaml new file mode 100644 index 0000000000..a8e27c3735 --- /dev/null +++ b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/kubeAdmScheduler/templates/pushprox-clients-rbac.yaml @@ -0,0 +1,97 @@ +{{- template "applyKubeVersionOverrides" . -}} +{{- if .Values.clients }}{{- if .Values.clients.enabled }} +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRole +metadata: + name: {{ template "pushProxy.client.name" . }} + labels: {{ include "pushProxy.client.labels" . | nindent 4 }} +rules: +{{- if .Values.global.cattle.psp.enabled }} +- apiGroups: ['policy'] + resources: ['podsecuritypolicies'] + verbs: ['use'] + resourceNames: + - {{ template "pushProxy.client.name" . }} +{{- end }} +{{- if and .Values.clients.https.enabled .Values.clients.https.useServiceAccountCredentials }} +- nonResourceURLs: ["/metrics"] + verbs: ["get"] +{{- if .Values.clients.rbac.additionalRules }} +{{ toYaml .Values.clients.rbac.additionalRules }} +{{- end }} +{{- end }} +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRoleBinding +metadata: + name: {{ template "pushProxy.client.name" . }} + labels: {{ include "pushProxy.client.labels" . | nindent 4 }} +roleRef: + apiGroup: rbac.authorization.k8s.io + kind: ClusterRole + name: {{ template "pushProxy.client.name" . }} +subjects: + - kind: ServiceAccount + name: {{ template "pushProxy.client.name" . }} + namespace: {{ include "pushprox.namespace" . }} +--- +apiVersion: v1 +kind: ServiceAccount +metadata: + name: {{ template "pushProxy.client.name" . }} + namespace: {{ include "pushprox.namespace" . }} + labels: {{ include "pushProxy.client.labels" . | nindent 4 }} +--- +{{- if .Values.clients.https.useServiceAccountCredentials }} +apiVersion: v1 +kind: Secret +type: kubernetes.io/service-account-token +metadata: + name: {{ template "pushProxy.client.serviceAccountTokenName" . }} + namespace: {{ include "pushprox.namespace" . }} + labels: {{ include "pushProxy.client.labels" . | nindent 4 }} + annotations: + kubernetes.io/service-account.name: {{ template "pushProxy.client.name" . }} +{{- end }} +--- +{{- if .Values.global.cattle.psp.enabled }} +apiVersion: policy/v1beta1 +kind: PodSecurityPolicy +metadata: + name: {{ template "pushProxy.client.name" . }} + namespace: {{ include "pushprox.namespace" . }} + labels: {{ include "pushProxy.client.labels" . | nindent 4 }} +spec: + privileged: false + hostNetwork: true + hostIPC: false + hostPID: false + runAsUser: + rule: 'RunAsAny' + seLinux: + rule: 'RunAsAny' +{{- if and .Values.clients.https.enabled .Values.clients.https.certDir .Values.global.seLinux.enabled .Values.clients.https.seLinuxOptions }} + seLinuxOptions: {{ .Values.clients.https.seLinuxOptions | toYaml | nindent 6 }} +{{- end }} + supplementalGroups: + rule: 'MustRunAs' + ranges: + - min: 0 + max: 65535 + fsGroup: + rule: 'MustRunAs' + ranges: + - min: 0 + max: 65535 + readOnlyRootFilesystem: false + volumes: + - 'secret' +{{- if and .Values.clients.https.enabled .Values.clients.https.certDir }} + - 'emptyDir' + - 'hostPath' + allowedHostPaths: + - pathPrefix: {{ required "Need access to volume on host with the SSL cert files to use HTTPs" .Values.clients.https.certDir }} + readOnly: true +{{- end }} +{{- end }} +{{- end }}{{- end }} diff --git a/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/kubeAdmScheduler/templates/pushprox-clients.yaml b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/kubeAdmScheduler/templates/pushprox-clients.yaml new file mode 100644 index 0000000000..e8fcfb3883 --- /dev/null +++ b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/kubeAdmScheduler/templates/pushprox-clients.yaml @@ -0,0 +1,157 @@ +{{- template "applyKubeVersionOverrides" . -}} +{{- if .Values.clients }}{{- if .Values.clients.enabled }} +apiVersion: apps/v1 +{{- if .Values.clients.deployment.enabled }} +kind: Deployment +{{- else }} +kind: DaemonSet +{{- end }} +metadata: + name: {{ template "pushProxy.client.name" . }} + namespace: {{ template "pushprox.namespace" . }} + labels: {{ include "pushProxy.client.labels" . | nindent 4 }} + pushprox-exporter: "client" +spec: + {{- if .Values.clients.deployment.enabled }} + replicas: {{ .Values.clients.deployment.replicas }} + {{- end }} + selector: + matchLabels: {{ include "pushProxy.client.labels" . | nindent 6 }} + template: + metadata: + labels: {{ include "pushProxy.client.labels" . | nindent 8 }} + spec: + {{- if .Values.clients.affinity }} + affinity: {{ toYaml .Values.clients.affinity | nindent 8 }} + {{- end }} + nodeSelector: {{ include "linux-node-selector" . | nindent 8 }} +{{- if .Values.clients.nodeSelector }} +{{ toYaml .Values.clients.nodeSelector | indent 8 }} +{{- end }} + tolerations: {{ include "linux-node-tolerations" . | nindent 8 }} +{{- if .Values.clients.tolerations }} +{{ toYaml .Values.clients.tolerations | indent 8 }} +{{- end }} + hostNetwork: true + dnsPolicy: ClusterFirstWithHostNet + serviceAccountName: {{ template "pushProxy.client.name" . }} + {{- if .Values.global.imagePullSecretName }} + imagePullSecrets: + - name: {{ .Values.global.imagePullSecretName }} + {{- end }} + containers: + - name: pushprox-client + image: {{ template "system_default_registry" . }}{{ .Values.clients.image.repository }}:{{ .Values.clients.image.tag }} + command: + {{- range .Values.clients.command }} + - {{ . | quote }} + {{- end }} + args: + - --fqdn=$(HOST_IP) + - --proxy-url=$(PROXY_URL) + {{- if .Values.clients.metrics.enabled }} + - --metrics-addr=$(PORT) + {{- end }} + - --allow-port={{ required "Need .Values.metricsPort to configure client to be allowed to scrape metrics at port" .Values.metricsPort}} + {{- if .Values.clients.useLocalhost }} + - --use-localhost + {{- end }} + {{- if .Values.clients.https.enabled }} + {{- if .Values.clients.https.insecureSkipVerify }} + - --insecure-skip-verify + {{- end }} + {{- if .Values.clients.https.useServiceAccountCredentials }} + - --token-path=/var/run/secrets/kubernetes.io/serviceaccount/token + {{- end }} + {{- if .Values.clients.https.certDir }} + - --tls.cert=/etc/ssl/push-proxy/push-proxy.pem + - --tls.key=/etc/ssl/push-proxy/push-proxy-key.pem + - --tls.cacert=/etc/ssl/push-proxy/push-proxy-ca-cert.pem + {{- end }} + {{- end }} + env: + - name: HOST_IP + valueFrom: + fieldRef: + fieldPath: status.hostIP + {{- if .Values.clients.metrics.enabled }} + - name: PORT + value: :{{ .Values.clients.port }} + {{- end }} + - name: PROXY_URL + value: {{ template "pushProxy.proxyUrl" . }} + securityContext: + runAsNonRoot: true + runAsUser: 1000 + {{- if and .Values.clients.https.enabled .Values.clients.https.certDir }} + volumeMounts: + - name: metrics-cert-dir + mountPath: /etc/ssl/push-proxy + {{- end }} + {{- if .Values.clients.resources }} + resources: {{ toYaml .Values.clients.resources | nindent 10 }} + {{- end }} + {{- if and .Values.clients.https.enabled .Values.clients.https.certDir }} + initContainers: + - name: copy-certs + image: {{ template "system_default_registry" . }}{{ .Values.clients.copyCertsImage.repository }}:{{ .Values.clients.copyCertsImage.tag }} + command: + - sh + - -c + - | + echo "Searching for files to copy within the source volume" + echo "cert: ${CERT_FILE_NAME}" + echo "key: ${KEY_FILE_NAME}" + echo "cacert: ${CACERT_FILE_NAME}" + + CERT_FILE_SOURCE=$(find /etc/source/ -type f -name "${CERT_FILE_NAME}" | sort -r | head -n 1) + KEY_FILE_SOURCE=$(find /etc/source/ -type f -name "${KEY_FILE_NAME}" | sort -r | head -n 1) + CACERT_FILE_SOURCE=$(find /etc/source/ -type f -name "${CACERT_FILE_NAME}" | sort -r | head -n 1) + + test -z ${CERT_FILE_SOURCE} && echo "Failed to find cert file" && exit 1 + test -z ${KEY_FILE_SOURCE} && echo "Failed to find key file" && exit 1 + test -z ${CACERT_FILE_SOURCE} && echo "Failed to find cacert file" && exit 1 + + echo "Copying cert file from $CERT_FILE_SOURCE to $CERT_FILE_TARGET" + cp $CERT_FILE_SOURCE $CERT_FILE_TARGET || exit 1 + chmod 444 $CERT_FILE_TARGET || exit 1 + + echo "Copying key file from $KEY_FILE_SOURCE to $KEY_FILE_TARGET" + cp $KEY_FILE_SOURCE $KEY_FILE_TARGET || exit 1 + chmod 444 $KEY_FILE_TARGET || exit 1 + + echo "Copying cacert file from $CACERT_FILE_SOURCE to $CACERT_FILE_TARGET" + cp $CACERT_FILE_SOURCE $CACERT_FILE_TARGET || exit 1 + chmod 444 $CACERT_FILE_TARGET || exit 1 + env: + - name: CERT_FILE_NAME + value: {{ required "Need a TLS cert file for scraping metrics endpoint over HTTPs" .Values.clients.https.certFile }} + - name: KEY_FILE_NAME + value: {{ required "Need a TLS key file for scraping metrics endpoint over HTTPs" .Values.clients.https.keyFile }} + - name: CACERT_FILE_NAME + value: {{ required "Need a TLS CA cert file for scraping metrics endpoint over HTTPs" .Values.clients.https.caCertFile }} + - name: CERT_FILE_TARGET + value: /etc/ssl/push-proxy/push-proxy.pem + - name: KEY_FILE_TARGET + value: /etc/ssl/push-proxy/push-proxy-key.pem + - name: CACERT_FILE_TARGET + value: /etc/ssl/push-proxy/push-proxy-ca-cert.pem + securityContext: + runAsNonRoot: false +{{- if and .Values.global.seLinux.enabled .Values.clients.https.seLinuxOptions }} + seLinuxOptions: {{ .Values.clients.https.seLinuxOptions | toYaml | nindent 12 }} +{{- end }} + volumeMounts: + - name: metrics-cert-dir-source + mountPath: /etc/source + readOnly: true + - name: metrics-cert-dir + mountPath: /etc/ssl/push-proxy + volumes: + - name: metrics-cert-dir-source + hostPath: + path: {{ required "Need access to volume on host with the SSL cert files to use HTTPs" .Values.clients.https.certDir }} + - name: metrics-cert-dir + emptyDir: {} + {{- end }} +{{- end }}{{- end }} diff --git a/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/kubeAdmScheduler/templates/pushprox-proxy-rbac.yaml b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/kubeAdmScheduler/templates/pushprox-proxy-rbac.yaml new file mode 100644 index 0000000000..eefe609058 --- /dev/null +++ b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/kubeAdmScheduler/templates/pushprox-proxy-rbac.yaml @@ -0,0 +1,68 @@ +{{- template "applyKubeVersionOverrides" . -}} +{{- if and .Values.proxy }}{{ if .Values.proxy.enabled }} +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRole +metadata: + name: {{ template "pushProxy.proxy.name" . }} + labels: {{ include "pushProxy.proxy.labels" . | nindent 4 }} +rules: +{{- if .Values.global.cattle.psp.enabled }} +- apiGroups: ['policy'] + resources: ['podsecuritypolicies'] + verbs: ['use'] + resourceNames: + - {{ template "pushProxy.proxy.name" . }} +{{- end }} +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRoleBinding +metadata: + name: {{ template "pushProxy.proxy.name" . }} + labels: {{ include "pushProxy.proxy.labels" . | nindent 4 }} +roleRef: + apiGroup: rbac.authorization.k8s.io + kind: ClusterRole + name: {{ template "pushProxy.proxy.name" . }} +subjects: + - kind: ServiceAccount + name: {{ template "pushProxy.proxy.name" . }} + namespace: {{ include "pushprox.namespace" . }} +--- +apiVersion: v1 +kind: ServiceAccount +metadata: + name: {{ template "pushProxy.proxy.name" . }} + namespace: {{ include "pushprox.namespace" . }} + labels: {{ include "pushProxy.proxy.labels" . | nindent 4 }} +--- +{{- if .Values.global.cattle.psp.enabled }} +apiVersion: policy/v1beta1 +kind: PodSecurityPolicy +metadata: + name: {{ template "pushProxy.proxy.name" . }} + namespace: {{ include "pushprox.namespace" . }} + labels: {{ include "pushProxy.proxy.labels" . | nindent 4 }} +spec: + privileged: false + hostNetwork: false + hostIPC: false + hostPID: false + runAsUser: + rule: 'MustRunAsNonRoot' + seLinux: + rule: 'RunAsAny' + supplementalGroups: + rule: 'MustRunAs' + ranges: + - min: 1 + max: 65535 + fsGroup: + rule: 'MustRunAs' + ranges: + - min: 1 + max: 65535 + readOnlyRootFilesystem: false + volumes: + - 'secret' +{{- end }}{{- end }} +{{- end }} diff --git a/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/kubeAdmScheduler/templates/pushprox-proxy.yaml b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/kubeAdmScheduler/templates/pushprox-proxy.yaml new file mode 100644 index 0000000000..723bbd6c00 --- /dev/null +++ b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/kubeAdmScheduler/templates/pushprox-proxy.yaml @@ -0,0 +1,57 @@ +{{- template "applyKubeVersionOverrides" . -}} +{{- if and .Values.proxy }}{{ if .Values.proxy.enabled }} +apiVersion: apps/v1 +kind: Deployment +metadata: + name: {{ template "pushProxy.proxy.name" . }} + namespace: {{ template "pushprox.namespace" . }} + labels: {{ include "pushProxy.proxy.labels" . | nindent 4 }} + pushprox-exporter: "proxy" +spec: + selector: + matchLabels: {{ include "pushProxy.proxy.labels" . | nindent 6 }} + template: + metadata: + labels: {{ include "pushProxy.proxy.labels" . | nindent 8 }} + spec: + securityContext: + runAsNonRoot: true + runAsUser: 1000 + nodeSelector: {{ include "linux-node-selector" . | nindent 8 }} +{{- if .Values.proxy.nodeSelector }} +{{ toYaml .Values.proxy.nodeSelector | indent 8 }} +{{- end }} + tolerations: {{ include "linux-node-tolerations" . | nindent 8 }} +{{- if .Values.proxy.tolerations }} +{{ toYaml .Values.proxy.tolerations | indent 8 }} +{{- end }} + serviceAccountName: {{ template "pushProxy.proxy.name" . }} + {{- if .Values.global.imagePullSecretName }} + imagePullSecrets: + - name: {{ .Values.global.imagePullSecretName }} + {{- end }} + containers: + - name: pushprox-proxy + image: {{ template "system_default_registry" . }}{{ .Values.proxy.image.repository }}:{{ .Values.proxy.image.tag }} + command: + {{- range .Values.proxy.command }} + - {{ . | quote }} + {{- end }} + {{- if .Values.proxy.resources }} + resources: {{ toYaml .Values.proxy.resources | nindent 10 }} + {{- end }} +--- +apiVersion: v1 +kind: Service +metadata: + name: {{ template "pushProxy.proxy.name" . }} + namespace: {{ template "pushprox.namespace" . }} + labels: {{ include "pushProxy.proxy.labels" . | nindent 4 }} +spec: + ports: + - name: pp-proxy + port: {{ required "Need .Values.proxy.port to configure proxy" .Values.proxy.port }} + protocol: TCP + targetPort: {{ .Values.proxy.port }} + selector: {{ include "pushProxy.proxy.labels" . | nindent 4 }} +{{- end }}{{- end }} diff --git a/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/kubeAdmScheduler/templates/pushprox-servicemonitor.yaml b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/kubeAdmScheduler/templates/pushprox-servicemonitor.yaml new file mode 100644 index 0000000000..67eb2216b5 --- /dev/null +++ b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/kubeAdmScheduler/templates/pushprox-servicemonitor.yaml @@ -0,0 +1,45 @@ +{{- template "applyKubeVersionOverrides" . -}} +{{- if .Values.serviceMonitor }}{{- if .Values.serviceMonitor.enabled }} +apiVersion: monitoring.coreos.com/v1 +kind: ServiceMonitor +metadata: + name: {{ template "pushprox.serviceMonitor.name" . }} + namespace: {{ template "pushprox.namespace" . }} + labels: {{ include "pushProxy.serviceMonitor.labels" . | nindent 4 }} +spec: + endpoints: {{include "pushProxy.serviceMonitor.endpoints" . | nindent 4 }} + jobLabel: component + podTargetLabels: + - component + - pushprox-exporter + namespaceSelector: + matchNames: + - {{ template "pushprox.namespace" . }} + selector: + matchLabels: {{ include "pushProxy.client.labels" . | nindent 6 }} +--- +{{- $selector := "" }} +{{- if not (kindIs "invalid" .Values.service) }} +{{- if not (kindIs "invalid" .Values.service.selector) }} +{{ if .Values.service.selector }} +{{- if .Values.clients.enabled }} +{{- required (printf "Cannot override .Values.service.selector=%s when .Values.clients.enabled=true" (toJson .Values.service.selector)) "" }} +{{- end }} +{{- $selector = (toYaml .Values.service.selector) }} +{{- end }} +{{- end }} +{{- end }} +apiVersion: v1 +kind: Service +metadata: + name: {{ template "pushProxy.client.name" . }} + namespace: {{ template "pushprox.namespace" . }} + labels: {{ include "pushProxy.client.labels" . | nindent 4 }} +spec: + ports: + - name: metrics + port: {{ required "Need .Values.metricsPort to configure client to listen to metrics at port" .Values.metricsPort}} + protocol: TCP + targetPort: {{ .Values.metricsPort }} + selector: {{ default (include "pushProxy.client.labels" .) $selector | nindent 4 }} +{{- end }}{{- end }} diff --git a/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/kubeAdmScheduler/templates/validate-install-crd.yaml b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/kubeAdmScheduler/templates/validate-install-crd.yaml new file mode 100644 index 0000000000..16abc2fa83 --- /dev/null +++ b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/kubeAdmScheduler/templates/validate-install-crd.yaml @@ -0,0 +1,14 @@ +#{{- if gt (len (lookup "rbac.authorization.k8s.io/v1" "ClusterRole" "" "")) 0 -}} +# {{- $found := dict -}} +# {{- set $found "monitoring.coreos.com/v1/ServiceMonitor" false -}} +# {{- range .Capabilities.APIVersions -}} +# {{- if hasKey $found (toString .) -}} +# {{- set $found (toString .) true -}} +# {{- end -}} +# {{- end -}} +# {{- range $_, $exists := $found -}} +# {{- if (eq $exists false) -}} +# {{- required "Required CRDs are missing. Please install Prometheus Operator CRDs before installing this chart." "" -}} +# {{- end -}} +# {{- end -}} +#{{- end -}} diff --git a/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/kubeAdmScheduler/templates/validate-psp-install.yaml b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/kubeAdmScheduler/templates/validate-psp-install.yaml new file mode 100644 index 0000000000..a30c59d3b7 --- /dev/null +++ b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/kubeAdmScheduler/templates/validate-psp-install.yaml @@ -0,0 +1,7 @@ +#{{- if gt (len (lookup "rbac.authorization.k8s.io/v1" "ClusterRole" "" "")) 0 -}} +#{{- if .Values.global.cattle.psp.enabled }} +#{{- if not (.Capabilities.APIVersions.Has "policy/v1beta1/PodSecurityPolicy") }} +#{{- fail "The target cluster does not have the PodSecurityPolicy API resource. Please disable PSPs in this chart before proceeding." -}} +#{{- end }} +#{{- end }} +#{{- end }} diff --git a/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/kubeAdmScheduler/values.yaml b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/kubeAdmScheduler/values.yaml new file mode 100644 index 0000000000..1e076041b3 --- /dev/null +++ b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/kubeAdmScheduler/values.yaml @@ -0,0 +1,166 @@ +# Default values for rancher-pushprox. +# This is a YAML-formatted file. +# Declare variables to be passed into your templates. + +# Default image containing both the proxy and the client was generated from the following Dockerfile +# https://github.com/prometheus-community/PushProx/blob/eeadbe766641699129920ccfaaaa30a85c67fe81/Dockerfile#L1-L15 + +# Configuration + +global: + cattle: + psp: + enabled: false + systemDefaultRegistry: "" + seLinux: + enabled: false + +# A list of Semver constraint strings (defined by https://github.com/Masterminds/semver) and values.yaml overrides. +# +# For each key in kubeVersionOverrides, this chart will check to see if the current Kubernetes cluster's version matches +# any of the semver constraints provided as keys on the map. +# +# On seeing a match, the default value for each values.yaml field overridden will be updated with the new value. +# +# If multiple matches are encountered (due to overlapping semver ranges), the matches will be applied in order. +# +# Notes: +# - On running a helm template, Helm generally assumes the kubeVersion is v1.20.0 +# - On running a helm install --dry-run, the correct kubeVersion should be chosen. +kubeVersionOverrides: [] +# - constraint: "< 1.21" +# values: +# metricsPort: 10252 +# clients: +# https: +# enabled: false +# insecureSkipVerify: false +# useServiceAccountCredentials: false + +namespaceOverride: "" + +# The component that is being monitored (i.e. etcd) +component: "component" + +# The port containing the metrics that need to be scraped +metricsPort: 2739 + +# Configure ServiceMonitor that monitors metrics from the metricsPort endpoint +serviceMonitor: + enabled: true + # A list of endpoints that will be added to the ServiceMonitor based on the Endpoint spec + # Source: https://github.com/prometheus-operator/prometheus-operator/blob/master/Documentation/api.md#endpoint + # By default, proxyUrl and params._scheme will be overridden based on other values + endpoints: + - port: metrics + +# Configure Service that grabs scrape targets +service: + # The selector that is used to populate the Service's Endpoints object. + # The chart will error out on rendering templating if .Values.clients.enabled is set alongside this field, + # since it is expected that this service should point to the PushProx Clients Daemonset / Deployment + selector: {} + +clients: + enabled: true + # The port which the PushProx client will post PushProx metrics to + port: 9369 + # If unset, this will default to the URL for the proxy service: http://pushprox-{{component}}-proxy.{{namepsace}}.svc.cluster.local:{{proxy.port}} + # Should be modified if the clients are being deployed outside the cluster where the proxy rests, otherwise leave it null + proxyUrl: "" + # If set to true, the client will forward any requests from the host IP to 127.0.0.1 + # It will only allow proxy requests to the metricsPort specified + useLocalhost: false + # Configuration for accessing metrics via HTTPS + https: + # Does the client require https to access the metrics? + enabled: false + # Does the client require requests be sent to http or https? + forceHTTPSScheme: false + # If set to true, the client will create a service account with adequate permissions and set a flag + # on the client to use the service account token provided by it to make authorized scrape requests + useServiceAccountCredentials: false + # Configuration for authentication to metrics via https endpoint + authenticationMethod: + # Reads token from defined file in container + # This function is deprecated in the prometheus operator api and may be removed in a future version + bearerTokenFile: + enabled: false + bearerTokenFilePath: "/var/run/secrets/kubernetes.io/serviceaccount/token" + # Reads token from defined secret in namespace + # This function is deprecated in the prometheus operator api and may be removed in a future version + bearerTokenSecret: + enabled: false + # Reads token from defined secret in namespace + authorization: + enabled: false + type: "bearer" + credentials: + key: "token" + optional: false + # If set to true, the client will disable SSL security checks + insecureSkipVerify: false + # Directory on host where necessary TLS cert and key to scrape metrics can be found + certDir: "" + # Filenames for files located in .Values.clients.https.certDir that correspond to TLS settings + certFile: "" + keyFile: "" + caCertFile: "" + # seLinuxOptions to be passed into the container that copies certs. Should define a container with permissions to read the files in the certDir provided on the host. + # Required and only used if `clients.https.enabled` is set and `clients.https.certDir` is provided. + seLinuxOptions: {} + + metrics: + # Whether the client should publish PushProx client-specific metrics to .Values.clients.port + enabled: false + + rbac: + # Additional permissions to provide to the ServiceAccount bound to the client + # This can be used to provide additional permissions for the client to scrape metrics from the k8s API + # Only enabled if clients.https.enabled and clients.https.useServiceAccountCredentials are true + additionalRules: [] + + # Resource limits + resources: {} + + # Options to select all nodes to deploy client DaemonSet on + nodeSelector: {} + tolerations: [] + affinity: {} + + image: + repository: rancher/pushprox-client + tag: v0.1.3-rancher2-client + command: ["pushprox-client"] + + copyCertsImage: + repository: rancher/mirrored-library-busybox + tag: 1.31.1 + + # The default intention of rancher-pushprox clients is to scrape hostNetwork metrics across all nodes. + # This can be used to scrape internal Kubernetes components or DaemonSets of hostNetwork Pods in + # situations where a cloud provider firewall prevents Pod-To-Host communication but not Pod-To-Pod. + # However, if the underlying hostNetwork Pod that is being scraped is managed by a Deployment, + # this advanced option enables users to deploy the client as a Deployment instead of a DaemonSet. + # If a user deploys this feature and the underlying Deployment's number of replicas changes, the user will + # be responsible for upgrading this chart accordingly to the right number of replicas. + deployment: + enabled: false + replicas: 0 + +proxy: + enabled: true + # The port through which PushProx clients will communicate to the proxy + port: 8080 + + # Resource limits + resources: {} + + # Options to select a node to run a single proxy deployment on + nodeSelector: {} + tolerations: [] + + image: + repository: rancher/pushprox-proxy + tag: v0.1.3-rancher2-proxy + command: ["pushprox-proxy"] diff --git a/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/prometheus-adapter/.helmignore b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/prometheus-adapter/.helmignore new file mode 100644 index 0000000000..f0c1319444 --- /dev/null +++ b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/prometheus-adapter/.helmignore @@ -0,0 +1,21 @@ +# Patterns to ignore when building packages. +# This supports shell glob matching, relative path matching, and +# negation (prefixed with !). Only one pattern per line. +.DS_Store +# Common VCS dirs +.git/ +.gitignore +.bzr/ +.bzrignore +.hg/ +.hgignore +.svn/ +# Common backup files +*.swp +*.bak +*.tmp +*~ +# Various IDEs +.project +.idea/ +*.tmproj diff --git a/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/prometheus-adapter/Chart.yaml b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/prometheus-adapter/Chart.yaml new file mode 100644 index 0000000000..d067725a17 --- /dev/null +++ b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/prometheus-adapter/Chart.yaml @@ -0,0 +1,28 @@ +annotations: + catalog.cattle.io/hidden: "true" + catalog.cattle.io/kube-version: '>= 1.26.0-0 < 1.31.0-0' + catalog.cattle.io/os: linux + catalog.rancher.io/certified: rancher + catalog.rancher.io/namespace: cattle-monitoring-system + catalog.rancher.io/release-name: rancher-prometheus-adapter +apiVersion: v1 +appVersion: v0.10.0 +description: A Helm chart for k8s prometheus adapter +home: https://github.com/kubernetes-sigs/prometheus-adapter +keywords: +- hpa +- metrics +- prometheus +- adapter +kubeVersion: '>=1.26.0-0' +maintainers: +- email: mattias.gees@jetstack.io + name: mattiasgees +- name: steven-sheehy +- email: hfernandez@mesosphere.com + name: hectorj2f +name: prometheus-adapter +sources: +- https://github.com/kubernetes/charts +- https://github.com/kubernetes-sigs/prometheus-adapter +version: 4.2.0 diff --git a/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/prometheus-adapter/README.md b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/prometheus-adapter/README.md new file mode 100644 index 0000000000..d77bb0c920 --- /dev/null +++ b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/prometheus-adapter/README.md @@ -0,0 +1,160 @@ +# Prometheus Adapter + +Installs the [Prometheus Adapter](https://github.com/kubernetes-sigs/prometheus-adapter) for the Custom Metrics API. Custom metrics are used in Kubernetes by [Horizontal Pod Autoscalers](https://kubernetes.io/docs/tasks/run-application/horizontal-pod-autoscale/) to scale workloads based upon your own metric pulled from an external metrics provider like Prometheus. This chart complements the [metrics-server](https://github.com/helm/charts/tree/master/stable/metrics-server) chart that provides resource only metrics. + +## Prerequisites + +Kubernetes 1.14+ + +## Get Helm Repositories Info + +```console +helm repo add prometheus-community https://prometheus-community.github.io/helm-charts +helm repo update +``` + +_See [`helm repo`](https://helm.sh/docs/helm/helm_repo/) for command documentation._ + +## Install Helm Chart + +```console +helm install [RELEASE_NAME] prometheus-community/prometheus-adapter +``` + +_See [configuration](#configuration) below._ + +_See [helm install](https://helm.sh/docs/helm/helm_install/) for command documentation._ + +## Uninstall Helm Chart + +```console +helm uninstall [RELEASE_NAME] +``` + +This removes all the Kubernetes components associated with the chart and deletes the release. + +_See [helm uninstall](https://helm.sh/docs/helm/helm_uninstall/) for command documentation._ + +## Upgrading Helm Chart + +```console +helm upgrade [RELEASE_NAME] [CHART] --install +``` + +_See [helm upgrade](https://helm.sh/docs/helm/helm_upgrade/) for command documentation._ + +### To 4.2.0 + +Readiness and liveness probes are now fully configurable through values `readinessProbe` and `livenessProbe`. The previous values have been kept as defaults. + +### To 4.0.0 + +Previously, security context of the container was set directly in the deployment template. This release makes it configurable through the new configuration variable `securityContext` whilst keeping the previously set values as defaults. Furthermore, previous variable `runAsUser` is now set in `securityContext` and is not used any longer. Please, use `securityContext.runAsUser` instead. In the same security context, `seccompProfile` has been enabled and set to type `RuntimeDefault`. + +### To 3.0.0 + +Due to a change in deployment labels, the upgrade requires `helm upgrade --force` in order to re-create the deployment. + +## Configuration + +See [Customizing the Chart Before Installing](https://helm.sh/docs/intro/using_helm/#customizing-the-chart-before-installing). To see all configurable options with detailed comments, visit the chart's [values.yaml](./values.yaml), or run these configuration commands: + +```console +helm show values prometheus-community/prometheus-adapter +``` + +### Prometheus Service Endpoint + +To use the chart, ensure the `prometheus.url` and `prometheus.port` are configured with the correct Prometheus service endpoint. If Prometheus is exposed under HTTPS the host's CA Bundle must be exposed to the container using `extraVolumes` and `extraVolumeMounts`. + +### Adapter Rules + +Additionally, the chart comes with a set of default rules out of the box but they may pull in too many metrics or not map them correctly for your needs. Therefore, it is recommended to populate `rules.custom` with a list of rules (see the [config document](https://github.com/kubernetes-sigs/prometheus-adapter/blob/master/docs/config.md) for the proper format). + +### Horizontal Pod Autoscaler Metrics + +Finally, to configure your Horizontal Pod Autoscaler to use the custom metric, see the custom metrics section of the [HPA walkthrough](https://kubernetes.io/docs/tasks/run-application/horizontal-pod-autoscale-walkthrough/#autoscaling-on-multiple-metrics-and-custom-metrics). + +The Prometheus Adapter can serve three different [metrics APIs](https://kubernetes.io/docs/tasks/run-application/horizontal-pod-autoscale/#support-for-metrics-apis): + +### Custom Metrics + +Enabling this option will cause custom metrics to be served at `/apis/custom.metrics.k8s.io/v1beta1`. Enabled by default when `rules.default` is true, but can be customized by populating `rules.custom`: + +```yaml +rules: + custom: + - seriesQuery: '{__name__=~"^some_metric_count$"}' + resources: + template: <<.Resource>> + name: + matches: "" + as: "my_custom_metric" + metricsQuery: sum(<<.Series>>{<<.LabelMatchers>>}) by (<<.GroupBy>>) +``` + +### External Metrics + +Enabling this option will cause external metrics to be served at `/apis/external.metrics.k8s.io/v1beta1`. Can be enabled by populating `rules.external`: + +```yaml +rules: + external: + - seriesQuery: '{__name__=~"^some_metric_count$"}' + resources: + template: <<.Resource>> + name: + matches: "" + as: "my_external_metric" + metricsQuery: sum(<<.Series>>{<<.LabelMatchers>>}) by (<<.GroupBy>>) +``` + +### Resource Metrics + +Enabling this option will cause resource metrics to be served at `/apis/metrics.k8s.io/v1beta1`. Resource metrics will allow pod CPU and Memory metrics to be used in [Horizontal Pod Autoscalers](https://kubernetes.io/docs/tasks/run-application/horizontal-pod-autoscale/) as well as the `kubectl top` command. Can be enabled by populating `rules.resource`: + +```yaml +rules: + resource: + cpu: + containerQuery: | + sum by (<<.GroupBy>>) ( + rate(container_cpu_usage_seconds_total{container!="",<<.LabelMatchers>>}[3m]) + ) + nodeQuery: | + sum by (<<.GroupBy>>) ( + rate(node_cpu_seconds_total{mode!="idle",mode!="iowait",mode!="steal",<<.LabelMatchers>>}[3m]) + ) + resources: + overrides: + node: + resource: node + namespace: + resource: namespace + pod: + resource: pod + containerLabel: container + memory: + containerQuery: | + sum by (<<.GroupBy>>) ( + avg_over_time(container_memory_working_set_bytes{container!="",<<.LabelMatchers>>}[3m]) + ) + nodeQuery: | + sum by (<<.GroupBy>>) ( + avg_over_time(node_memory_MemTotal_bytes{<<.LabelMatchers>>}[3m]) + - + avg_over_time(node_memory_MemAvailable_bytes{<<.LabelMatchers>>}[3m]) + ) + resources: + overrides: + node: + resource: node + namespace: + resource: namespace + pod: + resource: pod + containerLabel: container + window: 3m +``` + +**NOTE:** Setting a value for `rules.resource` will also deploy the resource metrics API service, providing the same functionality as [metrics-server](https://github.com/helm/charts/tree/master/stable/metrics-server). As such it is not possible to deploy them both in the same cluster. diff --git a/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/prometheus-adapter/templates/NOTES.txt b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/prometheus-adapter/templates/NOTES.txt new file mode 100644 index 0000000000..b7b9b99322 --- /dev/null +++ b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/prometheus-adapter/templates/NOTES.txt @@ -0,0 +1,9 @@ +{{ template "k8s-prometheus-adapter.fullname" . }} has been deployed. +In a few minutes you should be able to list metrics using the following command(s): +{{ if .Values.rules.resource }} + kubectl get --raw /apis/metrics.k8s.io/v1beta1 +{{- end }} + kubectl get --raw /apis/custom.metrics.k8s.io/v1beta1 +{{ if .Values.rules.external }} + kubectl get --raw /apis/external.metrics.k8s.io/v1beta1 +{{- end }} diff --git a/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/prometheus-adapter/templates/_helpers.tpl b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/prometheus-adapter/templates/_helpers.tpl new file mode 100644 index 0000000000..edbb829b2b --- /dev/null +++ b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/prometheus-adapter/templates/_helpers.tpl @@ -0,0 +1,113 @@ +# Rancher +{{- define "system_default_registry" -}} +{{- if .Values.global.cattle.systemDefaultRegistry -}} +{{- printf "%s/" .Values.global.cattle.systemDefaultRegistry -}} +{{- end -}} +{{- end -}} + +# Windows Support + +{{/* +Windows cluster will add default taint for linux nodes, +add below linux tolerations to workloads could be scheduled to those linux nodes +*/}} + +{{- define "linux-node-tolerations" -}} +- key: "cattle.io/os" + value: "linux" + effect: "NoSchedule" + operator: "Equal" +{{- end -}} + +{{- define "linux-node-selector" -}} +{{- if semverCompare "<1.14-0" .Capabilities.KubeVersion.GitVersion -}} +beta.kubernetes.io/os: linux +{{- else -}} +kubernetes.io/os: linux +{{- end -}} +{{- end -}} + +{{/* vim: set filetype=mustache: */}} +{{/* +Expand the name of the chart. +*/}} +{{- define "k8s-prometheus-adapter.name" -}} +{{- default .Chart.Name .Values.nameOverride | trunc 63 | trimSuffix "-" -}} +{{- end -}} + +{{/* +Create a default fully qualified app name. +We truncate at 63 chars because some Kubernetes name fields are limited to this (by the DNS naming spec). +If release name contains chart name it will be used as a full name. +*/}} +{{- define "k8s-prometheus-adapter.fullname" -}} +{{- if .Values.fullnameOverride -}} +{{- .Values.fullnameOverride | trunc 63 | trimSuffix "-" -}} +{{- else -}} +{{- $name := default .Chart.Name .Values.nameOverride -}} +{{- if contains $name .Release.Name -}} +{{- .Release.Name | trunc 63 | trimSuffix "-" -}} +{{- else -}} +{{- printf "%s-%s" .Release.Name $name | trunc 63 | trimSuffix "-" -}} +{{- end -}} +{{- end -}} +{{- end -}} + +{{/* +Allow the release namespace to be overridden for multi-namespace deployments in combined charts +*/}} +{{- define "k8s-prometheus-adapter.namespace" -}} +{{- default .Release.Namespace .Values.namespaceOverride -}} +{{- end -}} + +{{/* +Create chart name and version as used by the chart label. +*/}} +{{- define "k8s-prometheus-adapter.chart" -}} +{{- printf "%s-%s" .Chart.Name .Chart.Version | replace "+" "_" | trunc 63 | trimSuffix "-" -}} +{{- end -}} + +{{/* +Generate basic labels +*/}} +{{- define "k8s-prometheus-adapter.labels" }} +helm.sh/chart: {{ include "k8s-prometheus-adapter.chart" . }} +app.kubernetes.io/managed-by: {{ .Release.Service }} +app.kubernetes.io/component: metrics +app.kubernetes.io/part-of: {{ template "k8s-prometheus-adapter.name" . }} +{{- include "k8s-prometheus-adapter.selectorLabels" . }} +{{- if .Chart.AppVersion }} +app.kubernetes.io/version: {{ .Chart.AppVersion | quote }} +{{- end }} +{{- if .Values.customLabels }} +{{ toYaml .Values.customLabels }} +{{- end }} +{{- end }} + +{{/* +Selector labels +*/}} +{{- define "k8s-prometheus-adapter.selectorLabels" }} +app.kubernetes.io/name: {{ include "k8s-prometheus-adapter.name" . }} +app.kubernetes.io/instance: {{ .Release.Name }} +{{- end }} + +{{/* +Create the name of the service account to use +*/}} +{{- define "k8s-prometheus-adapter.serviceAccountName" -}} +{{- if .Values.serviceAccount.create -}} + {{ default (include "k8s-prometheus-adapter.fullname" .) .Values.serviceAccount.name }} +{{- else -}} + {{ default "default" .Values.serviceAccount.name }} +{{- end -}} +{{- end -}} + +{{/* Get Policy API Version */}} +{{- define "k8s-prometheus-adapter.pdb.apiVersion" -}} +{{- if and (.Capabilities.APIVersions.Has "policy/v1") (semverCompare ">= 1.21-0" .Capabilities.KubeVersion.Version) -}} + {{- print "policy/v1" -}} +{{- else -}} + {{- print "policy/v1beta1" -}} +{{- end -}} +{{- end -}} diff --git a/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/prometheus-adapter/templates/certmanager.yaml b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/prometheus-adapter/templates/certmanager.yaml new file mode 100644 index 0000000000..4e32c964c6 --- /dev/null +++ b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/prometheus-adapter/templates/certmanager.yaml @@ -0,0 +1,76 @@ +{{- if .Values.certManager.enabled -}} +--- +# Create a selfsigned Issuer, in order to create a root CA certificate for +# signing webhook serving certificates +apiVersion: cert-manager.io/v1 +kind: Issuer +metadata: + name: {{ template "k8s-prometheus-adapter.fullname" . }}-self-signed-issuer + namespace: {{ include "k8s-prometheus-adapter.namespace" . }} + {{- if .Values.customAnnotations }} + annotations: + {{- toYaml .Values.customAnnotations | nindent 4 }} + {{- end }} + labels: + {{- include "k8s-prometheus-adapter.labels" . | indent 4 }} +spec: + selfSigned: {} +--- +# Generate a CA Certificate used to sign certificates for the webhook +apiVersion: cert-manager.io/v1 +kind: Certificate +metadata: + name: {{ template "k8s-prometheus-adapter.fullname" . }}-root-cert + namespace: {{ include "k8s-prometheus-adapter.namespace" . }} + {{- if .Values.customAnnotations }} + annotations: + {{- toYaml .Values.customAnnotations | nindent 4 }} + {{- end }} + labels: + {{- include "k8s-prometheus-adapter.labels" . | indent 4 }} +spec: + secretName: {{ template "k8s-prometheus-adapter.fullname" . }}-root-cert + duration: {{ .Values.certManager.caCertDuration }} + issuerRef: + name: {{ template "k8s-prometheus-adapter.fullname" . }}-self-signed-issuer + commonName: "ca.webhook.prometheus-adapter" + isCA: true +--- +# Create an Issuer that uses the above generated CA certificate to issue certs +apiVersion: cert-manager.io/v1 +kind: Issuer +metadata: + name: {{ template "k8s-prometheus-adapter.fullname" . }}-root-issuer + namespace: {{ include "k8s-prometheus-adapter.namespace" . }} + {{- if .Values.customAnnotations }} + annotations: + {{- toYaml .Values.customAnnotations | nindent 4 }} + {{- end }} + labels: + {{- include "k8s-prometheus-adapter.labels" . | indent 4 }} +spec: + ca: + secretName: {{ template "k8s-prometheus-adapter.fullname" . }}-root-cert +--- +# Finally, generate a serving certificate for the apiservices to use +apiVersion: cert-manager.io/v1 +kind: Certificate +metadata: + name: {{ template "k8s-prometheus-adapter.fullname" . }}-cert + namespace: {{ include "k8s-prometheus-adapter.namespace" . }} + {{- if .Values.customAnnotations }} + annotations: + {{- toYaml .Values.customAnnotations | nindent 4 }} + {{- end }} + labels: + {{- include "k8s-prometheus-adapter.labels" . | indent 4 }} +spec: + secretName: {{ template "k8s-prometheus-adapter.fullname" . }} + duration: {{ .Values.certManager.certDuration }} + issuerRef: + name: {{ template "k8s-prometheus-adapter.fullname" . }}-root-issuer + dnsNames: + - {{ template "k8s-prometheus-adapter.fullname" . }} + - {{ template "k8s-prometheus-adapter.fullname" . }}.{{ include "k8s-prometheus-adapter.namespace" . }} + - {{ template "k8s-prometheus-adapter.fullname" . }}.{{ include "k8s-prometheus-adapter.namespace" . }}.svc +{{- end -}} \ No newline at end of file diff --git a/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/prometheus-adapter/templates/cluster-role-binding-auth-delegator.yaml b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/prometheus-adapter/templates/cluster-role-binding-auth-delegator.yaml new file mode 100644 index 0000000000..6701e6ba08 --- /dev/null +++ b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/prometheus-adapter/templates/cluster-role-binding-auth-delegator.yaml @@ -0,0 +1,20 @@ +{{- if .Values.rbac.create -}} +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRoleBinding +metadata: + {{- if .Values.customAnnotations }} + annotations: + {{- toYaml .Values.customAnnotations | nindent 4 }} + {{- end }} + labels: + {{- include "k8s-prometheus-adapter.labels" . | indent 4 }} + name: {{ template "k8s-prometheus-adapter.name" . }}-system-auth-delegator +roleRef: + apiGroup: rbac.authorization.k8s.io + kind: ClusterRole + name: system:auth-delegator +subjects: +- kind: ServiceAccount + name: {{ template "k8s-prometheus-adapter.serviceAccountName" . }} + namespace: {{ include "k8s-prometheus-adapter.namespace" . | quote }} +{{- end -}} diff --git a/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/prometheus-adapter/templates/cluster-role-binding-resource-reader.yaml b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/prometheus-adapter/templates/cluster-role-binding-resource-reader.yaml new file mode 100644 index 0000000000..67efd2aa2f --- /dev/null +++ b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/prometheus-adapter/templates/cluster-role-binding-resource-reader.yaml @@ -0,0 +1,20 @@ +{{- if .Values.rbac.create -}} +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRoleBinding +metadata: + {{- if .Values.customAnnotations }} + annotations: + {{- toYaml .Values.customAnnotations | nindent 4 }} + {{- end }} + labels: + {{- include "k8s-prometheus-adapter.labels" . | indent 4 }} + name: {{ template "k8s-prometheus-adapter.name" . }}-resource-reader +roleRef: + apiGroup: rbac.authorization.k8s.io + kind: ClusterRole + name: {{ template "k8s-prometheus-adapter.name" . }}-resource-reader +subjects: +- kind: ServiceAccount + name: {{ template "k8s-prometheus-adapter.serviceAccountName" . }} + namespace: {{ include "k8s-prometheus-adapter.namespace" . | quote }} +{{- end -}} diff --git a/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/prometheus-adapter/templates/cluster-role-resource-reader.yaml b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/prometheus-adapter/templates/cluster-role-resource-reader.yaml new file mode 100644 index 0000000000..2c690a03cc --- /dev/null +++ b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/prometheus-adapter/templates/cluster-role-resource-reader.yaml @@ -0,0 +1,24 @@ +{{- if .Values.rbac.create -}} +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRole +metadata: + {{- if .Values.customAnnotations }} + annotations: + {{- toYaml .Values.customAnnotations | nindent 4 }} + {{- end }} + labels: + {{- include "k8s-prometheus-adapter.labels" . | indent 4 }} + name: {{ template "k8s-prometheus-adapter.name" . }}-resource-reader +rules: +- apiGroups: + - "" + resources: + - namespaces + - pods + - services + - configmaps + verbs: + - get + - list + - watch +{{- end -}} diff --git a/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/prometheus-adapter/templates/configmap.yaml b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/prometheus-adapter/templates/configmap.yaml new file mode 100644 index 0000000000..17f415d970 --- /dev/null +++ b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/prometheus-adapter/templates/configmap.yaml @@ -0,0 +1,97 @@ +{{- if not .Values.rules.existing -}} +apiVersion: v1 +kind: ConfigMap +metadata: + name: {{ template "k8s-prometheus-adapter.fullname" . }} + namespace: {{ include "k8s-prometheus-adapter.namespace" . }} + {{- if .Values.customAnnotations }} + annotations: + {{- toYaml .Values.customAnnotations | nindent 4 }} + {{- end }} + labels: + {{- include "k8s-prometheus-adapter.labels" . | indent 4 }} +data: + config.yaml: | +{{- if or .Values.rules.default .Values.rules.custom }} + rules: +{{- if .Values.rules.default }} + - seriesQuery: '{__name__=~"^container_.*",container!="POD",namespace!="",pod!=""}' + seriesFilters: [] + resources: + overrides: + namespace: + resource: namespace + pod: + resource: pod + name: + matches: ^container_(.*)_seconds_total$ + as: "" + metricsQuery: sum(rate(<<.Series>>{<<.LabelMatchers>>,container!="POD"}[5m])) + by (<<.GroupBy>>) + - seriesQuery: '{__name__=~"^container_.*",container!="POD",namespace!="",pod!=""}' + seriesFilters: + - isNot: ^container_.*_seconds_total$ + resources: + overrides: + namespace: + resource: namespace + pod: + resource: pod + name: + matches: ^container_(.*)_total$ + as: "" + metricsQuery: sum(rate(<<.Series>>{<<.LabelMatchers>>,container!="POD"}[5m])) + by (<<.GroupBy>>) + - seriesQuery: '{__name__=~"^container_.*",container!="POD",namespace!="",pod!=""}' + seriesFilters: + - isNot: ^container_.*_total$ + resources: + overrides: + namespace: + resource: namespace + pod: + resource: pod + name: + matches: ^container_(.*)$ + as: "" + metricsQuery: sum(<<.Series>>{<<.LabelMatchers>>,container!="POD"}) by (<<.GroupBy>>) + - seriesQuery: '{namespace!="",__name__!~"^container_.*"}' + seriesFilters: + - isNot: .*_total$ + resources: + template: <<.Resource>> + name: + matches: "" + as: "" + metricsQuery: sum(<<.Series>>{<<.LabelMatchers>>}) by (<<.GroupBy>>) + - seriesQuery: '{namespace!="",__name__!~"^container_.*"}' + seriesFilters: + - isNot: .*_seconds_total + resources: + template: <<.Resource>> + name: + matches: ^(.*)_total$ + as: "" + metricsQuery: sum(rate(<<.Series>>{<<.LabelMatchers>>}[5m])) by (<<.GroupBy>>) + - seriesQuery: '{namespace!="",__name__!~"^container_.*"}' + seriesFilters: [] + resources: + template: <<.Resource>> + name: + matches: ^(.*)_seconds_total$ + as: "" + metricsQuery: sum(rate(<<.Series>>{<<.LabelMatchers>>}[5m])) by (<<.GroupBy>>) +{{- end -}} +{{- if .Values.rules.custom }} +{{ toYaml .Values.rules.custom | indent 4 }} +{{- end -}} +{{- end -}} +{{- if .Values.rules.external }} + externalRules: +{{ toYaml .Values.rules.external | indent 4 }} +{{- end -}} +{{- if .Values.rules.resource }} + resourceRules: +{{ toYaml .Values.rules.resource | indent 6 }} +{{- end -}} +{{- end -}} diff --git a/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/prometheus-adapter/templates/custom-metrics-apiservice.yaml b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/prometheus-adapter/templates/custom-metrics-apiservice.yaml new file mode 100644 index 0000000000..8b7b4e555e --- /dev/null +++ b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/prometheus-adapter/templates/custom-metrics-apiservice.yaml @@ -0,0 +1,34 @@ +{{- if or .Values.rules.default .Values.rules.custom }} +{{- if .Capabilities.APIVersions.Has "apiregistration.k8s.io/v1" }} +apiVersion: apiregistration.k8s.io/v1 +{{- else }} +apiVersion: apiregistration.k8s.io/v1beta1 +{{- end }} +kind: APIService +metadata: +{{- if or .Values.certManager.enabled .Values.customAnnotations }} + annotations: + certmanager.k8s.io/inject-ca-from: {{ printf "%s/%s-root-cert" (include "k8s-prometheus-adapter.namespace" .) (include "k8s-prometheus-adapter.fullname" .) | quote }} + cert-manager.io/inject-ca-from: {{ printf "%s/%s-root-cert" (include "k8s-prometheus-adapter.namespace" .) (include "k8s-prometheus-adapter.fullname" .) | quote }} + {{- if .Values.customAnnotations }} + {{- toYaml .Values.customAnnotations | nindent 4 }} + {{- end }} +{{- end }} + labels: + {{- include "k8s-prometheus-adapter.labels" . | indent 4 }} + name: v1beta1.custom.metrics.k8s.io +spec: + service: + name: {{ template "k8s-prometheus-adapter.fullname" . }} + namespace: {{ include "k8s-prometheus-adapter.namespace" . | quote }} + {{- if .Values.tls.enable }} + caBundle: {{ b64enc .Values.tls.ca }} + {{- end }} + group: custom.metrics.k8s.io + version: v1beta1 + {{- if not (or .Values.tls.enable .Values.certManager.enabled) }} + insecureSkipTLSVerify: true + {{- end }} + groupPriorityMinimum: 100 + versionPriority: 100 +{{- end }} diff --git a/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/prometheus-adapter/templates/custom-metrics-cluster-role-binding-hpa.yaml b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/prometheus-adapter/templates/custom-metrics-cluster-role-binding-hpa.yaml new file mode 100644 index 0000000000..0cc6920836 --- /dev/null +++ b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/prometheus-adapter/templates/custom-metrics-cluster-role-binding-hpa.yaml @@ -0,0 +1,24 @@ +{{- /* +This if must be aligned with custom-metrics-cluster-role.yaml +as otherwise this binding will point to not existing role. +*/ -}} +{{- if and .Values.rbac.create (or .Values.rules.default .Values.rules.custom) -}} +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRoleBinding +metadata: + {{- if .Values.customAnnotations }} + annotations: + {{- toYaml .Values.customAnnotations | nindent 4 }} + {{- end }} + labels: + {{- include "k8s-prometheus-adapter.labels" . | indent 4 }} + name: {{ template "k8s-prometheus-adapter.name" . }}-hpa-controller +roleRef: + apiGroup: rbac.authorization.k8s.io + kind: ClusterRole + name: {{ template "k8s-prometheus-adapter.name" . }}-server-resources +subjects: +- kind: ServiceAccount + name: {{ template "k8s-prometheus-adapter.serviceAccountName" . }} + namespace: {{ include "k8s-prometheus-adapter.namespace" . | quote }} +{{- end -}} diff --git a/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/prometheus-adapter/templates/custom-metrics-cluster-role.yaml b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/prometheus-adapter/templates/custom-metrics-cluster-role.yaml new file mode 100644 index 0000000000..4aa15ffe99 --- /dev/null +++ b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/prometheus-adapter/templates/custom-metrics-cluster-role.yaml @@ -0,0 +1,17 @@ +{{- if and .Values.rbac.create (or .Values.rules.default .Values.rules.custom) -}} +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRole +metadata: + {{- if .Values.customAnnotations }} + annotations: + {{- toYaml .Values.customAnnotations | nindent 4 }} + {{- end }} + labels: + {{- include "k8s-prometheus-adapter.labels" . | indent 4 }} + name: {{ template "k8s-prometheus-adapter.name" . }}-server-resources +rules: +- apiGroups: + - custom.metrics.k8s.io + resources: ["*"] + verbs: ["*"] +{{- end -}} diff --git a/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/prometheus-adapter/templates/deployment.yaml b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/prometheus-adapter/templates/deployment.yaml new file mode 100644 index 0000000000..a7ea3310a0 --- /dev/null +++ b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/prometheus-adapter/templates/deployment.yaml @@ -0,0 +1,143 @@ +apiVersion: apps/v1 +kind: Deployment +metadata: + {{- if or .Values.customAnnotations .Values.deploymentAnnotations }} + annotations: + {{- with .Values.customAnnotations }} + {{- toYaml . | nindent 4 }} + {{- end }} + {{- with .Values.deploymentAnnotations }} + {{- toYaml . | nindent 4 }} + {{- end }} + {{- end }} + labels: + {{- include "k8s-prometheus-adapter.labels" . | indent 4 }} + name: {{ template "k8s-prometheus-adapter.fullname" . }} + namespace: {{ include "k8s-prometheus-adapter.namespace" . }} +spec: + replicas: {{ .Values.replicas }} + strategy: {{ toYaml .Values.strategy | nindent 4 }} + selector: + matchLabels: + {{- include "k8s-prometheus-adapter.selectorLabels" . | indent 6 }} + template: + metadata: + labels: + {{- include "k8s-prometheus-adapter.labels" . | indent 8 }} + {{- with .Values.podLabels }} + {{- toYaml . | trim | nindent 8 }} + {{- end }} + name: {{ template "k8s-prometheus-adapter.name" . }} + annotations: + checksum/config: {{ include (print $.Template.BasePath "/configmap.yaml") . | sha256sum }} + {{- with .Values.podAnnotations }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.customAnnotations }} + {{- toYaml . | nindent 8 }} + {{- end }} + spec: + serviceAccountName: {{ template "k8s-prometheus-adapter.serviceAccountName" . }} + {{- if .Values.hostNetwork.enabled }} + hostNetwork: true + {{- end }} + {{- if .Values.dnsPolicy }} + dnsPolicy: {{ .Values.dnsPolicy }} + {{- end}} + containers: + - name: {{ .Chart.Name }} + image: "{{ template "system_default_registry" . }}{{ .Values.image.repository }}:{{ .Values.image.tag }}" + imagePullPolicy: {{ .Values.image.pullPolicy }} + {{- with .Values.env }} + env: + {{- toYaml . | nindent 8 }} + {{- end }} + args: + - /adapter + - --secure-port={{ .Values.listenPort }} + {{- if or .Values.tls.enable .Values.certManager.enabled }} + - --tls-cert-file=/var/run/serving-cert/tls.crt + - --tls-private-key-file=/var/run/serving-cert/tls.key + {{- end }} + - --cert-dir=/tmp/cert + - --prometheus-url={{ tpl .Values.prometheus.url . }}{{ if .Values.prometheus.port }}:{{ .Values.prometheus.port }}{{end}}{{ .Values.prometheus.path }} + - --metrics-relist-interval={{ .Values.metricsRelistInterval }} + - --v={{ .Values.logLevel }} + - --config=/etc/adapter/config.yaml + {{- if .Values.extraArguments }} + {{- toYaml .Values.extraArguments | trim | nindent 8 }} + {{- end }} + ports: + - containerPort: {{ .Values.listenPort }} + name: https + {{- with .Values.livenessProbe }} + livenessProbe: + {{- toYaml . | nindent 10 }} + {{- end }} + {{- with .Values.readinessProbe }} + readinessProbe: + {{- toYaml . | nindent 10 }} + {{- end }} + {{- if .Values.resources }} + resources: + {{- toYaml .Values.resources | nindent 10 }} + {{- end }} + {{- with .Values.dnsConfig }} + dnsConfig: + {{ toYaml . | indent 8 }} + {{- end }} + {{- with .Values.securityContext }} + securityContext: + {{- toYaml . | nindent 10 }} + {{- end }} + volumeMounts: + {{- if .Values.extraVolumeMounts }} + {{ toYaml .Values.extraVolumeMounts | trim | nindent 8 }} + {{ end }} + - mountPath: /etc/adapter/ + name: config + readOnly: true + - mountPath: /tmp + name: tmp + {{- if or .Values.tls.enable .Values.certManager.enabled }} + - mountPath: /var/run/serving-cert + name: volume-serving-cert + readOnly: true + {{- end }} + nodeSelector: {{ include "linux-node-selector" . | nindent 8 }} +{{- if .Values.nodeSelector }} +{{- toYaml .Values.nodeSelector | nindent 8 }} +{{- end }} + affinity: + {{- toYaml .Values.affinity | nindent 8 }} + topologySpreadConstraints: + {{- toYaml .Values.topologySpreadConstraints | nindent 8 }} + priorityClassName: {{ .Values.priorityClassName }} + {{- if .Values.podSecurityContext }} + securityContext: + {{- toYaml .Values.podSecurityContext | nindent 8 }} + {{- end }} + tolerations: {{ include "linux-node-tolerations" . | nindent 8 }} +{{- if .Values.tolerations }} +{{- toYaml .Values.tolerations | nindent 8 }} +{{- end }} + {{- if .Values.image.pullSecrets }} + imagePullSecrets: + {{- range .Values.image.pullSecrets }} + - name: {{ . }} + {{- end }} + {{- end }} + volumes: + {{- if .Values.extraVolumes }} + {{ toYaml .Values.extraVolumes | trim | nindent 6 }} + {{ end }} + - name: config + configMap: + name: {{ .Values.rules.existing | default (include "k8s-prometheus-adapter.fullname" . ) }} + - name: tmp + emptyDir: {} + {{- if or .Values.tls.enable .Values.certManager.enabled }} + - name: volume-serving-cert + secret: + secretName: {{ template "k8s-prometheus-adapter.fullname" . }} + {{- end }} diff --git a/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/prometheus-adapter/templates/external-metrics-apiservice.yaml b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/prometheus-adapter/templates/external-metrics-apiservice.yaml new file mode 100644 index 0000000000..21339af128 --- /dev/null +++ b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/prometheus-adapter/templates/external-metrics-apiservice.yaml @@ -0,0 +1,34 @@ +{{- if .Values.rules.external }} +{{- if .Capabilities.APIVersions.Has "apiregistration.k8s.io/v1" }} +apiVersion: apiregistration.k8s.io/v1 +{{- else }} +apiVersion: apiregistration.k8s.io/v1beta1 +{{- end }} +kind: APIService +metadata: +{{- if or .Values.certManager.enabled .Values.customAnnotations }} + annotations: + certmanager.k8s.io/inject-ca-from: {{ printf "%s/%s-root-cert" (include "k8s-prometheus-adapter.namespace" .) (include "k8s-prometheus-adapter.fullname" .) | quote }} + cert-manager.io/inject-ca-from: {{ printf "%s/%s-root-cert" (include "k8s-prometheus-adapter.namespace" .) (include "k8s-prometheus-adapter.fullname" .) | quote }} + {{- if .Values.customAnnotations }} + {{- toYaml .Values.customAnnotations | nindent 4 }} + {{- end }} +{{- end }} + labels: + {{- include "k8s-prometheus-adapter.labels" . | indent 4 }} + name: v1beta1.external.metrics.k8s.io +spec: + service: + name: {{ template "k8s-prometheus-adapter.fullname" . }} + namespace: {{ include "k8s-prometheus-adapter.namespace" . | quote }} + {{- if .Values.tls.enable }} + caBundle: {{ b64enc .Values.tls.ca }} + {{- end }} + group: external.metrics.k8s.io + version: v1beta1 + {{- if not (or .Values.tls.enable .Values.certManager.enabled) }} + insecureSkipTLSVerify: true + {{- end }} + groupPriorityMinimum: 100 + versionPriority: 100 +{{- end -}} diff --git a/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/prometheus-adapter/templates/external-metrics-cluster-role-binding-hpa.yaml b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/prometheus-adapter/templates/external-metrics-cluster-role-binding-hpa.yaml new file mode 100644 index 0000000000..05547bd323 --- /dev/null +++ b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/prometheus-adapter/templates/external-metrics-cluster-role-binding-hpa.yaml @@ -0,0 +1,20 @@ +{{- if and .Values.rbac.create .Values.rules.external -}} +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRoleBinding +metadata: + {{- if .Values.customAnnotations }} + annotations: + {{- toYaml .Values.customAnnotations | nindent 4 }} + {{- end }} + labels: + {{- include "k8s-prometheus-adapter.labels" . | indent 4 }} + name: {{ template "k8s-prometheus-adapter.name" . }}-hpa-controller-external-metrics +roleRef: + apiGroup: rbac.authorization.k8s.io + kind: ClusterRole + name: {{ template "k8s-prometheus-adapter.name" . }}-external-metrics +subjects: +- kind: ServiceAccount + name: horizontal-pod-autoscaler + namespace: kube-system +{{- end -}} diff --git a/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/prometheus-adapter/templates/external-metrics-cluster-role.yaml b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/prometheus-adapter/templates/external-metrics-cluster-role.yaml new file mode 100644 index 0000000000..212ea78b25 --- /dev/null +++ b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/prometheus-adapter/templates/external-metrics-cluster-role.yaml @@ -0,0 +1,21 @@ +{{- if and .Values.rbac.create .Values.rules.external -}} +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRole +metadata: + {{- if .Values.customAnnotations }} + annotations: + {{- toYaml .Values.customAnnotations | nindent 4 }} + {{- end }} + labels: + {{- include "k8s-prometheus-adapter.labels" . | indent 4 }} + name: {{ template "k8s-prometheus-adapter.name" . }}-external-metrics +rules: +- apiGroups: + - "external.metrics.k8s.io" + resources: + - "*" + verbs: + - list + - get + - watch +{{- end -}} diff --git a/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/prometheus-adapter/templates/pdb.yaml b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/prometheus-adapter/templates/pdb.yaml new file mode 100644 index 0000000000..205761a9f1 --- /dev/null +++ b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/prometheus-adapter/templates/pdb.yaml @@ -0,0 +1,23 @@ +{{- if .Values.podDisruptionBudget.enabled }} +apiVersion: {{ include "k8s-prometheus-adapter.pdb.apiVersion" . }} +kind: PodDisruptionBudget +metadata: + name: {{ template "k8s-prometheus-adapter.fullname" . }} + namespace: {{ include "k8s-prometheus-adapter.namespace" . }} + {{- if .Values.customAnnotations }} + annotations: + {{- toYaml .Values.customAnnotations | nindent 4 }} + {{- end }} + labels: + {{- include "k8s-prometheus-adapter.labels" . | indent 4 }} +spec: + {{- if .Values.podDisruptionBudget.minAvailable }} + minAvailable: {{ .Values.podDisruptionBudget.minAvailable }} + {{- end }} + {{- if .Values.podDisruptionBudget.maxUnavailable }} + maxUnavailable: {{ .Values.podDisruptionBudget.maxUnavailable }} + {{- end }} + selector: + matchLabels: + {{- include "k8s-prometheus-adapter.selectorLabels" . | indent 6 }} +{{- end }} diff --git a/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/prometheus-adapter/templates/psp.yaml b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/prometheus-adapter/templates/psp.yaml new file mode 100644 index 0000000000..fded5a7491 --- /dev/null +++ b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/prometheus-adapter/templates/psp.yaml @@ -0,0 +1,66 @@ +{{- if and (or .Values.global.cattle.psp.enabled .Values.psp.create) (.Capabilities.APIVersions.Has "policy/v1beta1/PodSecurityPolicy") }} +--- +apiVersion: policy/v1beta1 +kind: PodSecurityPolicy +metadata: + name: {{ template "k8s-prometheus-adapter.fullname" . }} + {{- if .Values.customAnnotations }} + annotations: + {{- toYaml .Values.customAnnotations | nindent 4 }} + {{- end }} + labels: + {{- include "k8s-prometheus-adapter.labels" . | indent 4 }} +spec: + {{- if .Values.hostNetwork.enabled }} + hostNetwork: true + hostPorts: + - min: {{ .Values.listenPort }} + max: {{ .Values.listenPort }} + {{- end }} + fsGroup: + rule: RunAsAny + runAsGroup: + rule: RunAsAny + runAsUser: + rule: MustRunAs + ranges: + - min: 1024 + max: 65535 + seLinux: + rule: RunAsAny + supplementalGroups: + rule: RunAsAny + volumes: + - secret + - emptyDir + - configMap +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRole +metadata: + labels: + {{- include "k8s-prometheus-adapter.labels" . | indent 4 }} + name: {{ template "k8s-prometheus-adapter.name" . }}-psp +rules: +- apiGroups: + - 'policy' + resources: ['podsecuritypolicies'] + verbs: ['use'] + resourceNames: + - {{ template "k8s-prometheus-adapter.fullname" . }} +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRoleBinding +metadata: + labels: + {{- include "k8s-prometheus-adapter.labels" . | indent 4 }} + name: {{ template "k8s-prometheus-adapter.name" . }}-psp +roleRef: + apiGroup: rbac.authorization.k8s.io + kind: ClusterRole + name: {{ template "k8s-prometheus-adapter.name" . }}-psp +subjects: +- kind: ServiceAccount + name: {{ template "k8s-prometheus-adapter.serviceAccountName" . }} + namespace: {{ include "k8s-prometheus-adapter.namespace" . | quote }} +{{- end -}} diff --git a/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/prometheus-adapter/templates/resource-metrics-apiservice.yaml b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/prometheus-adapter/templates/resource-metrics-apiservice.yaml new file mode 100644 index 0000000000..0cc9fff6a2 --- /dev/null +++ b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/prometheus-adapter/templates/resource-metrics-apiservice.yaml @@ -0,0 +1,34 @@ +{{- if .Values.rules.resource}} +{{- if .Capabilities.APIVersions.Has "apiregistration.k8s.io/v1" }} +apiVersion: apiregistration.k8s.io/v1 +{{- else }} +apiVersion: apiregistration.k8s.io/v1beta1 +{{- end }} +kind: APIService +metadata: +{{- if or .Values.certManager.enabled .Values.customAnnotations }} + annotations: + certmanager.k8s.io/inject-ca-from: {{ printf "%s/%s-root-cert" (include "k8s-prometheus-adapter.namespace" .) (include "k8s-prometheus-adapter.fullname" .) | quote }} + cert-manager.io/inject-ca-from: {{ printf "%s/%s-root-cert" (include "k8s-prometheus-adapter.namespace" .) (include "k8s-prometheus-adapter.fullname" .) | quote }} + {{- if .Values.customAnnotations }} + {{- toYaml .Values.customAnnotations | nindent 4 }} + {{- end }} +{{- end }} + labels: + {{- include "k8s-prometheus-adapter.labels" . | indent 4 }} + name: v1beta1.metrics.k8s.io +spec: + service: + name: {{ template "k8s-prometheus-adapter.fullname" . }} + namespace: {{ include "k8s-prometheus-adapter.namespace" . | quote }} + {{- if .Values.tls.enable }} + caBundle: {{ b64enc .Values.tls.ca }} + {{- end }} + group: metrics.k8s.io + version: v1beta1 + {{- if not (or .Values.tls.enable .Values.certManager.enabled) }} + insecureSkipTLSVerify: true + {{- end }} + groupPriorityMinimum: 100 + versionPriority: 100 +{{- end -}} diff --git a/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/prometheus-adapter/templates/resource-metrics-cluster-role-binding.yaml b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/prometheus-adapter/templates/resource-metrics-cluster-role-binding.yaml new file mode 100644 index 0000000000..3c247e48d2 --- /dev/null +++ b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/prometheus-adapter/templates/resource-metrics-cluster-role-binding.yaml @@ -0,0 +1,20 @@ +{{- if and .Values.rbac.create .Values.rules.resource -}} +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRoleBinding +metadata: + {{- if .Values.customAnnotations }} + annotations: + {{- toYaml .Values.customAnnotations | nindent 4 }} + {{- end }} + labels: + {{- include "k8s-prometheus-adapter.labels" . | indent 4 }} + name: {{ template "k8s-prometheus-adapter.name" . }}-hpa-controller-metrics +roleRef: + apiGroup: rbac.authorization.k8s.io + kind: ClusterRole + name: {{ template "k8s-prometheus-adapter.name" . }}-metrics +subjects: +- kind: ServiceAccount + name: {{ template "k8s-prometheus-adapter.serviceAccountName" . }} + namespace: {{ include "k8s-prometheus-adapter.namespace" . | quote }} +{{- end -}} diff --git a/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/prometheus-adapter/templates/resource-metrics-cluster-role.yaml b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/prometheus-adapter/templates/resource-metrics-cluster-role.yaml new file mode 100644 index 0000000000..73d8953046 --- /dev/null +++ b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/prometheus-adapter/templates/resource-metrics-cluster-role.yaml @@ -0,0 +1,23 @@ +{{- if and .Values.rbac.create .Values.rules.resource -}} +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRole +metadata: + {{- if .Values.customAnnotations }} + annotations: + {{- toYaml .Values.customAnnotations | nindent 4 }} + {{- end }} + labels: + {{- include "k8s-prometheus-adapter.labels" . | indent 4 }} + name: {{ template "k8s-prometheus-adapter.name" . }}-metrics +rules: +- apiGroups: + - "" + resources: + - pods + - nodes + - nodes/stats + verbs: + - get + - list + - watch +{{- end -}} diff --git a/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/prometheus-adapter/templates/role-binding-auth-reader.yaml b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/prometheus-adapter/templates/role-binding-auth-reader.yaml new file mode 100644 index 0000000000..d3c77c1c65 --- /dev/null +++ b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/prometheus-adapter/templates/role-binding-auth-reader.yaml @@ -0,0 +1,21 @@ +{{- if .Values.rbac.create -}} +apiVersion: rbac.authorization.k8s.io/v1 +kind: RoleBinding +metadata: + {{- if .Values.customAnnotations }} + annotations: + {{- toYaml .Values.customAnnotations | nindent 4 }} + {{- end }} + labels: + {{- include "k8s-prometheus-adapter.labels" . | indent 4 }} + name: {{ template "k8s-prometheus-adapter.name" . }}-auth-reader + namespace: kube-system +roleRef: + apiGroup: rbac.authorization.k8s.io + kind: Role + name: extension-apiserver-authentication-reader +subjects: +- kind: ServiceAccount + name: {{ template "k8s-prometheus-adapter.serviceAccountName" . }} + namespace: {{ include "k8s-prometheus-adapter.namespace" . | quote }} +{{- end -}} diff --git a/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/prometheus-adapter/templates/secret.yaml b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/prometheus-adapter/templates/secret.yaml new file mode 100644 index 0000000000..3e7e8887bd --- /dev/null +++ b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/prometheus-adapter/templates/secret.yaml @@ -0,0 +1,17 @@ +{{- if .Values.tls.enable -}} +apiVersion: v1 +kind: Secret +metadata: + {{- if .Values.customAnnotations }} + annotations: + {{- toYaml .Values.customAnnotations | nindent 4 }} + {{- end }} + labels: + {{- include "k8s-prometheus-adapter.labels" . | indent 4 }} + name: {{ template "k8s-prometheus-adapter.fullname" . }} + namespace: {{ include "k8s-prometheus-adapter.namespace" . }} +type: kubernetes.io/tls +data: + tls.crt: {{ b64enc .Values.tls.certificate }} + tls.key: {{ b64enc .Values.tls.key }} +{{- end -}} diff --git a/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/prometheus-adapter/templates/service.yaml b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/prometheus-adapter/templates/service.yaml new file mode 100644 index 0000000000..ddac37cfa1 --- /dev/null +++ b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/prometheus-adapter/templates/service.yaml @@ -0,0 +1,27 @@ +apiVersion: v1 +kind: Service +metadata: + {{- if or .Values.service.annotations .Values.customAnnotations }} + annotations: + {{- if .Values.service.annotations }} + {{ toYaml .Values.service.annotations | indent 4 }} + {{- end }} + {{- if .Values.customAnnotations }} + {{- toYaml .Values.customAnnotations | nindent 4 }} + {{- end }} + {{- end }} + labels: + {{- include "k8s-prometheus-adapter.labels" . | indent 4 }} + name: {{ template "k8s-prometheus-adapter.fullname" . }} + namespace: {{ include "k8s-prometheus-adapter.namespace" . }} +spec: + ports: + - port: {{ .Values.service.port }} + protocol: TCP + targetPort: https + selector: + {{- include "k8s-prometheus-adapter.selectorLabels" . | indent 4 }} + type: {{ .Values.service.type }} + {{- if .Values.service.clusterIP }} + clusterIP: {{ .Values.service.clusterIP }} + {{- end }} diff --git a/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/prometheus-adapter/templates/serviceaccount.yaml b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/prometheus-adapter/templates/serviceaccount.yaml new file mode 100644 index 0000000000..30a169ae0e --- /dev/null +++ b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/prometheus-adapter/templates/serviceaccount.yaml @@ -0,0 +1,18 @@ +{{- if .Values.serviceAccount.create -}} +apiVersion: v1 +kind: ServiceAccount +metadata: + labels: + {{- include "k8s-prometheus-adapter.labels" . | indent 4 }} + name: {{ template "k8s-prometheus-adapter.serviceAccountName" . }} + namespace: {{ include "k8s-prometheus-adapter.namespace" . }} +{{- if or .Values.serviceAccount.annotations .Values.customAnnotations }} + annotations: + {{- if .Values.serviceAccount.annotations }} + {{- toYaml .Values.serviceAccount.annotations | nindent 4 }} + {{- end }} + {{- if .Values.customAnnotations }} + {{- toYaml .Values.customAnnotations | nindent 4 }} + {{- end }} +{{- end }} +{{- end -}} diff --git a/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/prometheus-adapter/values.yaml b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/prometheus-adapter/values.yaml new file mode 100644 index 0000000000..a1445a23f1 --- /dev/null +++ b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/prometheus-adapter/values.yaml @@ -0,0 +1,277 @@ +# Default values for k8s-prometheus-adapter.. +global: + cattle: + psp: + enabled: false + systemDefaultRegistry: "" + +affinity: {} + +topologySpreadConstraints: [] + +image: + repository: rancher/mirrored-prometheus-adapter-prometheus-adapter + tag: v0.12.0 + pullPolicy: IfNotPresent + +logLevel: 4 + +metricsRelistInterval: 1m + +listenPort: 6443 + +nodeSelector: {} + +priorityClassName: "" + +## Override the release namespace (for multi-namespace deployments in combined charts) +namespaceOverride: "" + +## Additional annotations to add to all resources +customAnnotations: {} + # role: custom-metrics + +## Additional labels to add to all resources +customLabels: {} + # monitoring: prometheus-adapter + +# Url to access prometheus +prometheus: + # Value is templated + url: http://prometheus.default.svc + port: 9090 + path: "" + +replicas: 1 + +# k8s 1.21 needs fsGroup to be set for non root deployments +# ref: https://github.com/kubernetes/kubernetes/issues/70679 +podSecurityContext: + fsGroup: 10001 + +# SecurityContext of the container +# ref. https://kubernetes.io/docs/tasks/configure-pod-container/security-context +securityContext: + allowPrivilegeEscalation: false + capabilities: + drop: ["all"] + readOnlyRootFilesystem: true + runAsNonRoot: true + runAsUser: 10001 + seccompProfile: + type: RuntimeDefault + +rbac: + # Specifies whether RBAC resources should be created + create: true + +psp: + # Specifies whether PSP resources should be created + create: false + +serviceAccount: + # Specifies whether a service account should be created + create: true + # The name of the service account to use. + # If not set and create is true, a name is generated using the fullname template + name: + # ServiceAccount annotations. + # Use case: AWS EKS IAM roles for service accounts + # ref: https://docs.aws.amazon.com/eks/latest/userguide/specify-service-account-role.html + annotations: {} + +# Custom DNS configuration to be added to prometheus-adapter pods +dnsConfig: {} + # nameservers: + # - 1.2.3.4 + # searches: + # - ns1.svc.cluster-domain.example + # - my.dns.search.suffix + # options: + # - name: ndots + # value: "2" + # - name: edns0 + +resources: {} + # requests: + # cpu: 100m + # memory: 128Mi + # limits: + # cpu: 100m + # memory: 128Mi + +# Configure liveness probe +# https://kubernetes.io/docs/reference/kubernetes-api/workload-resources/pod-v1/#Probe +livenessProbe: + httpGet: + path: /healthz + port: https + scheme: HTTPS + initialDelaySeconds: 30 + timeoutSeconds: 5 + +# Configure readiness probe +readinessProbe: + httpGet: + path: /healthz + port: https + scheme: HTTPS + initialDelaySeconds: 30 + timeoutSeconds: 5 + +rules: + default: true + + custom: [] + # - seriesQuery: '{__name__=~"^some_metric_count$"}' + # resources: + # template: <<.Resource>> + # name: + # matches: "" + # as: "my_custom_metric" + # metricsQuery: sum(<<.Series>>{<<.LabelMatchers>>}) by (<<.GroupBy>>) + + # Mounts a configMap with pre-generated rules for use. Overrides the + # default, custom, external and resource entries + existing: + + external: [] + # - seriesQuery: '{__name__=~"^some_metric_count$"}' + # resources: + # template: <<.Resource>> + # name: + # matches: "" + # as: "my_external_metric" + # metricsQuery: sum(<<.Series>>{<<.LabelMatchers>>}) by (<<.GroupBy>>) + + # resource: + # cpu: + # containerQuery: | + # sum by (<<.GroupBy>>) ( + # rate(container_cpu_usage_seconds_total{container!="",<<.LabelMatchers>>}[3m]) + # ) + # nodeQuery: | + # sum by (<<.GroupBy>>) ( + # rate(node_cpu_seconds_total{mode!="idle",mode!="iowait",mode!="steal",<<.LabelMatchers>>}[3m]) + # ) + # resources: + # overrides: + # node: + # resource: node + # namespace: + # resource: namespace + # pod: + # resource: pod + # containerLabel: container + # memory: + # containerQuery: | + # sum by (<<.GroupBy>>) ( + # avg_over_time(container_memory_working_set_bytes{container!="",<<.LabelMatchers>>}[3m]) + # ) + # nodeQuery: | + # sum by (<<.GroupBy>>) ( + # avg_over_time(node_memory_MemTotal_bytes{<<.LabelMatchers>>}[3m]) + # - + # avg_over_time(node_memory_MemAvailable_bytes{<<.LabelMatchers>>}[3m]) + # ) + # resources: + # overrides: + # node: + # resource: node + # namespace: + # resource: namespace + # pod: + # resource: pod + # containerLabel: container + # window: 3m + +service: + annotations: {} + port: 443 + type: ClusterIP + # clusterIP: 1.2.3.4 + +tls: + enable: false + ca: |- + # Public CA file that signed the APIService + key: |- + # Private key of the APIService + certificate: |- + # Public key of the APIService + +# Set environment variables from secrets, configmaps or by setting them as name/value +env: [] + # - name: TMP_DIR + # value: /tmp + # - name: PASSWORD + # valueFrom: + # secretKeyRef: + # name: mysecret + # key: password + # optional: false + +# Any extra arguments +extraArguments: [] + # - --tls-private-key-file=/etc/tls/tls.key + # - --tls-cert-file=/etc/tls/tls.crt + +# Any extra volumes +extraVolumes: [] + # - name: example-name + # hostPath: + # path: /path/on/host + # type: DirectoryOrCreate + # - name: ssl-certs + # hostPath: + # path: /etc/ssl/certs/ca-bundle.crt + # type: File + +# Any extra volume mounts +extraVolumeMounts: [] + # - name: example-name + # mountPath: /path/in/container + # - name: ssl-certs + # mountPath: /etc/ssl/certs/ca-certificates.crt + # readOnly: true + +tolerations: [] + +# Labels added to the pod +podLabels: {} + +# Annotations added to the pod +podAnnotations: {} + +# Annotations added to the deployment +deploymentAnnotations: {} + +hostNetwork: + # Specifies if prometheus-adapter should be started in hostNetwork mode. + # + # You would require this enabled if you use alternate overlay networking for pods and + # API server unable to communicate with metrics-server. As an example, this is required + # if you use Weave network on EKS. See also dnsPolicy + enabled: false + +# When hostNetwork is enabled, you probably want to set this to ClusterFirstWithHostNet +# dnsPolicy: ClusterFirstWithHostNet + +# Deployment strategy type +strategy: + type: RollingUpdate + rollingUpdate: + maxUnavailable: 25% + maxSurge: 25% + +podDisruptionBudget: + # Specifies if PodDisruptionBudget should be enabled + # When enabled, minAvailable or maxUnavailable should also be defined. + enabled: false + minAvailable: + maxUnavailable: 1 + +certManager: + enabled: false + caCertDuration: 43800h + certDuration: 8760h diff --git a/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/prometheus-node-exporter/.helmignore b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/prometheus-node-exporter/.helmignore new file mode 100644 index 0000000000..f0c1319444 --- /dev/null +++ b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/prometheus-node-exporter/.helmignore @@ -0,0 +1,21 @@ +# Patterns to ignore when building packages. +# This supports shell glob matching, relative path matching, and +# negation (prefixed with !). Only one pattern per line. +.DS_Store +# Common VCS dirs +.git/ +.gitignore +.bzr/ +.bzrignore +.hg/ +.hgignore +.svn/ +# Common backup files +*.swp +*.bak +*.tmp +*~ +# Various IDEs +.project +.idea/ +*.tmproj diff --git a/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/prometheus-node-exporter/Chart.yaml b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/prometheus-node-exporter/Chart.yaml new file mode 100644 index 0000000000..9130cbcc91 --- /dev/null +++ b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/prometheus-node-exporter/Chart.yaml @@ -0,0 +1,25 @@ +annotations: + artifacthub.io/license: Apache-2.0 + artifacthub.io/links: |- + - name: Chart Source + url: https://github.com/prometheus-community/helm-charts +apiVersion: v2 +appVersion: 1.7.0 +description: A Helm chart for prometheus node-exporter +home: https://github.com/prometheus/node_exporter/ +keywords: +- node-exporter +- prometheus +- exporter +maintainers: +- email: gianrubio@gmail.com + name: gianrubio +- email: zanhsieh@gmail.com + name: zanhsieh +- email: rootsandtrees@posteo.de + name: zeritti +name: prometheus-node-exporter +sources: +- https://github.com/prometheus/node_exporter/ +type: application +version: 4.30.3 diff --git a/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/prometheus-node-exporter/README.md b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/prometheus-node-exporter/README.md new file mode 100644 index 0000000000..149b982267 --- /dev/null +++ b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/prometheus-node-exporter/README.md @@ -0,0 +1,97 @@ + +# Prometheus Node Exporter + +Prometheus exporter for hardware and OS metrics exposed by *NIX kernels, written in Go with pluggable metric collectors. + +This chart bootstraps a Prometheus [Node Exporter](http://github.com/prometheus/node_exporter) daemonset on a [Kubernetes](http://kubernetes.io) cluster using the [Helm](https://helm.sh) package manager. + +## Get Repository Info + +```console +helm repo add prometheus-community https://prometheus-community.github.io/helm-charts +helm repo update +``` + +_See [helm repo](https://helm.sh/docs/helm/helm_repo/) for command documentation._ + +## Install Chart + +```console +helm install [RELEASE_NAME] prometheus-community/prometheus-node-exporter +``` + +_See [configuration](#configuring) below._ + +_See [helm install](https://helm.sh/docs/helm/helm_install/) for command documentation._ + +## Uninstall Chart + +```console +helm uninstall [RELEASE_NAME] +``` + +This removes all the Kubernetes components associated with the chart and deletes the release. + +_See [helm uninstall](https://helm.sh/docs/helm/helm_uninstall/) for command documentation._ + +## Upgrading Chart + +```console +helm upgrade [RELEASE_NAME] prometheus-community/prometheus-node-exporter --install +``` + +_See [helm upgrade](https://helm.sh/docs/helm/helm_upgrade/) for command documentation._ + +### 3.x to 4.x + +Starting from version 4.0.0, the `node exporter` chart is using the [Kubernetes recommended labels](https://kubernetes.io/docs/concepts/overview/working-with-objects/common-labels/). Therefore you have to delete the daemonset before you upgrade. + +```console +kubectl delete daemonset -l app=prometheus-node-exporter +helm upgrade -i prometheus-node-exporter prometheus-community/prometheus-node-exporter +``` + +If you use your own custom [ServiceMonitor](https://github.com/prometheus-operator/prometheus-operator/blob/main/Documentation/api.md#servicemonitor) or [PodMonitor](https://github.com/prometheus-operator/prometheus-operator/blob/main/Documentation/api.md#podmonitor), please ensure to upgrade their `selector` fields accordingly to the new labels. + +### From 2.x to 3.x + +Change the following: + +```yaml +hostRootFsMount: true +``` + +to: + +```yaml +hostRootFsMount: + enabled: true + mountPropagation: HostToContainer +``` + +## Configuring + +See [Customizing the Chart Before Installing](https://helm.sh/docs/intro/using_helm/#customizing-the-chart-before-installing). To see all configurable options with detailed comments, visit the chart's [values.yaml](./values.yaml), or run these configuration commands: + +```console +helm show values prometheus-community/prometheus-node-exporter +``` + +### kube-rbac-proxy + +You can enable `prometheus-node-exporter` endpoint protection using `kube-rbac-proxy`. By setting `kubeRBACProxy.enabled: true`, this chart will deploy a RBAC proxy container protecting the node-exporter endpoint. +To authorize access, authenticate your requests (via a `ServiceAccount` for example) with a `ClusterRole` attached such as: + +```yaml +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRole +metadata: + name: prometheus-node-exporter-read +rules: + - apiGroups: [ "" ] + resources: ["services/node-exporter-prometheus-node-exporter"] + verbs: + - get +``` + +See [kube-rbac-proxy examples](https://github.com/brancz/kube-rbac-proxy/tree/master/examples/resource-attributes) for more details. diff --git a/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/prometheus-node-exporter/templates/NOTES.txt b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/prometheus-node-exporter/templates/NOTES.txt new file mode 100644 index 0000000000..8c5391f1f7 --- /dev/null +++ b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/prometheus-node-exporter/templates/NOTES.txt @@ -0,0 +1,29 @@ +1. Get the application URL by running these commands: +{{- if contains "NodePort" .Values.service.type }} + export NODE_PORT=$(kubectl get --namespace {{ template "prometheus-node-exporter.namespace" . }} -o jsonpath="{.spec.ports[0].nodePort}" services {{ template "prometheus-node-exporter.fullname" . }}) + export NODE_IP=$(kubectl get nodes --namespace {{ template "prometheus-node-exporter.namespace" . }} -o jsonpath="{.items[0].status.addresses[0].address}") + echo http://$NODE_IP:$NODE_PORT +{{- else if contains "LoadBalancer" .Values.service.type }} + NOTE: It may take a few minutes for the LoadBalancer IP to be available. + You can watch the status of by running 'kubectl get svc -w {{ template "prometheus-node-exporter.fullname" . }}' + export SERVICE_IP=$(kubectl get svc --namespace {{ template "prometheus-node-exporter.namespace" . }} {{ template "prometheus-node-exporter.fullname" . }} -o jsonpath='{.status.loadBalancer.ingress[0].ip}') + echo http://$SERVICE_IP:{{ .Values.service.port }} +{{- else if contains "ClusterIP" .Values.service.type }} + export POD_NAME=$(kubectl get pods --namespace {{ template "prometheus-node-exporter.namespace" . }} -l "app.kubernetes.io/name={{ template "prometheus-node-exporter.name" . }},app.kubernetes.io/instance={{ .Release.Name }}" -o jsonpath="{.items[0].metadata.name}") + echo "Visit http://127.0.0.1:{{ .Values.service.port }} to use your application" + kubectl port-forward --namespace {{ template "prometheus-node-exporter.namespace" . }} $POD_NAME {{ .Values.service.port }} +{{- end }} + +{{- if .Values.kubeRBACProxy.enabled}} + +kube-rbac-proxy endpoint protections is enabled: +- Metrics endpoints is now HTTPS +- Ensure that the client authenticates the requests (e.g. via service account) with the following role permissions: +``` +rules: + - apiGroups: [ "" ] + resources: ["services/{{ template "prometheus-node-exporter.fullname" . }}"] + verbs: + - get +``` +{{- end }} \ No newline at end of file diff --git a/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/prometheus-node-exporter/templates/_helpers.tpl b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/prometheus-node-exporter/templates/_helpers.tpl new file mode 100644 index 0000000000..72a6db45a1 --- /dev/null +++ b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/prometheus-node-exporter/templates/_helpers.tpl @@ -0,0 +1,236 @@ +# Rancher +{{- define "system_default_registry" -}} +{{- if .Values.global.cattle.systemDefaultRegistry -}} +{{- printf "%s/" .Values.global.cattle.systemDefaultRegistry -}} +{{- end -}} +{{- end -}} + +# Windows Support + +{{/* +Windows cluster will add default taint for linux nodes, +add below linux tolerations to workloads could be scheduled to those linux nodes +*/}} + +{{- define "linux-node-tolerations" -}} +- key: "cattle.io/os" + value: "linux" + effect: "NoSchedule" + operator: "Equal" +{{- end -}} + +{{- define "linux-node-selector" -}} +{{- if semverCompare "<1.14-0" .Capabilities.KubeVersion.GitVersion -}} +beta.kubernetes.io/os: linux +{{- else -}} +kubernetes.io/os: linux +{{- end -}} +{{- end -}} + +{{/* vim: set filetype=mustache: */}} +{{/* +Expand the name of the chart. +*/}} +{{- define "prometheus-node-exporter.name" -}} +{{- default .Chart.Name .Values.nameOverride | trunc 63 | trimSuffix "-" }} +{{- end }} + +{{/* +Create a default fully qualified app name. +We truncate at 63 chars because some Kubernetes name fields are limited to this (by the DNS naming spec). +If release name contains chart name it will be used as a full name. +*/}} +{{- define "prometheus-node-exporter.fullname" -}} +{{- if .Values.fullnameOverride }} +{{- .Values.fullnameOverride | trunc 63 | trimSuffix "-" }} +{{- else }} +{{- $name := default .Chart.Name .Values.nameOverride }} +{{- if contains $name .Release.Name }} +{{- .Release.Name | trunc 63 | trimSuffix "-" }} +{{- else }} +{{- printf "%s-%s" .Release.Name $name | trunc 63 | trimSuffix "-" }} +{{- end }} +{{- end }} +{{- end }} + +{{/* +Create chart name and version as used by the chart label. +*/}} +{{- define "prometheus-node-exporter.chart" -}} +{{- printf "%s-%s" .Chart.Name .Chart.Version | replace "+" "_" | trunc 63 | trimSuffix "-" }} +{{- end }} + +{{/* +Common labels +*/}} +{{- define "prometheus-node-exporter.labels" -}} +helm.sh/chart: {{ include "prometheus-node-exporter.chart" . }} +app.kubernetes.io/managed-by: {{ .Release.Service }} +app.kubernetes.io/component: metrics +app.kubernetes.io/part-of: {{ include "prometheus-node-exporter.name" . }} +{{ include "prometheus-node-exporter.selectorLabels" . }} +{{- with .Chart.AppVersion }} +app.kubernetes.io/version: {{ . | quote }} +{{- end }} +{{- with .Values.podLabels }} +{{ toYaml . }} +{{- end }} +{{- if .Values.releaseLabel }} +release: {{ .Release.Name }} +{{- end }} +{{- end }} + +{{/* +Selector labels +*/}} +{{- define "prometheus-node-exporter.selectorLabels" -}} +app.kubernetes.io/name: {{ include "prometheus-node-exporter.name" . }} +app.kubernetes.io/instance: {{ .Release.Name }} +{{- end }} + + +{{/* +Create the name of the service account to use +*/}} +{{- define "prometheus-node-exporter.serviceAccountName" -}} +{{- if .Values.serviceAccount.create }} +{{- default (include "prometheus-node-exporter.fullname" .) .Values.serviceAccount.name }} +{{- else }} +{{- default "default" .Values.serviceAccount.name }} +{{- end }} +{{- end }} + +{{/* +The image to use +*/}} +{{- define "prometheus-node-exporter.image" -}} +{{- $temp_registry := (include "system_default_registry" .) }} +{{- if .Values.image.sha }} +{{- fail "image.sha forbidden. Use image.digest instead" }} +{{- else if .Values.image.digest }} +{{- if $temp_registry }} +{{- printf "%s%s:%s@%s" $temp_registry .Values.image.repository (default (printf "v%s" .Chart.AppVersion) .Values.image.tag) .Values.image.digest }} +{{- else if .Values.global.imageRegistry }} +{{- printf "%s/%s:%s@%s" .Values.global.imageRegistry .Values.image.repository (default (printf "v%s" .Chart.AppVersion) .Values.image.tag) .Values.image.digest }} +{{- else }} +{{- printf "%s/%s:%s@%s" .Values.image.registry .Values.image.repository (default (printf "v%s" .Chart.AppVersion) .Values.image.tag) .Values.image.digest }} +{{- end }} +{{- else }} +{{- if $temp_registry }} +{{- printf "%s%s:%s" $temp_registry .Values.image.repository (default (printf "v%s" .Chart.AppVersion) .Values.image.tag) }} +{{- else if .Values.global.imageRegistry }} +{{- printf "%s/%s:%s" .Values.global.imageRegistry .Values.image.repository (default (printf "v%s" .Chart.AppVersion) .Values.image.tag) }} +{{- else }} +{{- printf "%s/%s:%s" .Values.image.registry .Values.image.repository (default (printf "v%s" .Chart.AppVersion) .Values.image.tag) }} +{{- end }} +{{- end }} +{{- end }} + +{{/* +Allow the release namespace to be overridden for multi-namespace deployments in combined charts +*/}} +{{- define "prometheus-node-exporter.namespace" -}} +{{- if .Values.namespaceOverride }} +{{- .Values.namespaceOverride }} +{{- else }} +{{- .Release.Namespace }} +{{- end }} +{{- end }} + +{{/* +Create the namespace name of the service monitor +*/}} +{{- define "prometheus-node-exporter.monitor-namespace" -}} +{{- if .Values.namespaceOverride }} +{{- .Values.namespaceOverride }} +{{- else }} +{{- if .Values.prometheus.monitor.namespace }} +{{- .Values.prometheus.monitor.namespace }} +{{- else }} +{{- .Release.Namespace }} +{{- end }} +{{- end }} +{{- end }} + +{{/* Sets default scrape limits for servicemonitor */}} +{{- define "servicemonitor.scrapeLimits" -}} +{{- with .sampleLimit }} +sampleLimit: {{ . }} +{{- end }} +{{- with .targetLimit }} +targetLimit: {{ . }} +{{- end }} +{{- with .labelLimit }} +labelLimit: {{ . }} +{{- end }} +{{- with .labelNameLengthLimit }} +labelNameLengthLimit: {{ . }} +{{- end }} +{{- with .labelValueLengthLimit }} +labelValueLengthLimit: {{ . }} +{{- end }} +{{- end }} + +{{/* +Formats imagePullSecrets. Input is (dict "Values" .Values "imagePullSecrets" .{specific imagePullSecrets}) +*/}} +{{- define "prometheus-node-exporter.imagePullSecrets" -}} +{{- range (concat .Values.global.imagePullSecrets .imagePullSecrets) }} + {{- if eq (typeOf .) "map[string]interface {}" }} +- {{ toYaml . | trim }} + {{- else }} +- name: {{ . }} + {{- end }} +{{- end }} +{{- end -}} + +{{/* +Create the namespace name of the pod monitor +*/}} +{{- define "prometheus-node-exporter.podmonitor-namespace" -}} +{{- if .Values.namespaceOverride }} +{{- .Values.namespaceOverride }} +{{- else }} +{{- if .Values.prometheus.podMonitor.namespace }} +{{- .Values.prometheus.podMonitor.namespace }} +{{- else }} +{{- .Release.Namespace }} +{{- end }} +{{- end }} +{{- end }} + +{{/* Sets default scrape limits for podmonitor */}} +{{- define "podmonitor.scrapeLimits" -}} +{{- with .sampleLimit }} +sampleLimit: {{ . }} +{{- end }} +{{- with .targetLimit }} +targetLimit: {{ . }} +{{- end }} +{{- with .labelLimit }} +labelLimit: {{ . }} +{{- end }} +{{- with .labelNameLengthLimit }} +labelNameLengthLimit: {{ . }} +{{- end }} +{{- with .labelValueLengthLimit }} +labelValueLengthLimit: {{ . }} +{{- end }} +{{- end }} + +{{/* Sets sidecar volumeMounts */}} +{{- define "prometheus-node-exporter.sidecarVolumeMounts" -}} +{{- range $_, $mount := $.Values.sidecarVolumeMount }} +- name: {{ $mount.name }} + mountPath: {{ $mount.mountPath }} + readOnly: {{ $mount.readOnly }} +{{- end }} +{{- range $_, $mount := $.Values.sidecarHostVolumeMounts }} +- name: {{ $mount.name }} + mountPath: {{ $mount.mountPath }} + readOnly: {{ $mount.readOnly }} +{{- if $mount.mountPropagation }} + mountPropagation: {{ $mount.mountPropagation }} +{{- end }} +{{- end }} +{{- end }} diff --git a/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/prometheus-node-exporter/templates/clusterrole.yaml b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/prometheus-node-exporter/templates/clusterrole.yaml new file mode 100644 index 0000000000..c256dba73d --- /dev/null +++ b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/prometheus-node-exporter/templates/clusterrole.yaml @@ -0,0 +1,19 @@ +{{- if and (eq .Values.rbac.create true) (eq .Values.kubeRBACProxy.enabled true) -}} +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRole +metadata: + name: {{ include "prometheus-node-exporter.fullname" . }} + labels: + {{- include "prometheus-node-exporter.labels" . | nindent 4 }} +rules: + {{- if $.Values.kubeRBACProxy.enabled }} + - apiGroups: [ "authentication.k8s.io" ] + resources: + - tokenreviews + verbs: [ "create" ] + - apiGroups: [ "authorization.k8s.io" ] + resources: + - subjectaccessreviews + verbs: [ "create" ] + {{- end }} +{{- end -}} diff --git a/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/prometheus-node-exporter/templates/clusterrolebinding.yaml b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/prometheus-node-exporter/templates/clusterrolebinding.yaml new file mode 100644 index 0000000000..653305ad9e --- /dev/null +++ b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/prometheus-node-exporter/templates/clusterrolebinding.yaml @@ -0,0 +1,20 @@ +{{- if and (eq .Values.rbac.create true) (eq .Values.kubeRBACProxy.enabled true) -}} +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRoleBinding +metadata: + labels: + {{- include "prometheus-node-exporter.labels" . | nindent 4 }} + name: {{ template "prometheus-node-exporter.fullname" . }} +roleRef: + apiGroup: rbac.authorization.k8s.io + kind: ClusterRole +{{- if .Values.rbac.useExistingRole }} + name: {{ .Values.rbac.useExistingRole }} +{{- else }} + name: {{ template "prometheus-node-exporter.fullname" . }} +{{- end }} +subjects: +- kind: ServiceAccount + name: {{ template "prometheus-node-exporter.serviceAccountName" . }} + namespace: {{ template "prometheus-node-exporter.namespace" . }} +{{- end -}} diff --git a/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/prometheus-node-exporter/templates/daemonset.yaml b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/prometheus-node-exporter/templates/daemonset.yaml new file mode 100644 index 0000000000..48d274f1b1 --- /dev/null +++ b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/prometheus-node-exporter/templates/daemonset.yaml @@ -0,0 +1,309 @@ +apiVersion: apps/v1 +kind: DaemonSet +metadata: + name: {{ include "prometheus-node-exporter.fullname" . }} + namespace: {{ include "prometheus-node-exporter.namespace" . }} + labels: + {{- include "prometheus-node-exporter.labels" . | nindent 4 }} + {{- with .Values.daemonsetAnnotations }} + annotations: + {{- toYaml . | nindent 4 }} + {{- end }} +spec: + selector: + matchLabels: + {{- include "prometheus-node-exporter.selectorLabels" . | nindent 6 }} + revisionHistoryLimit: {{ .Values.revisionHistoryLimit }} + {{- with .Values.updateStrategy }} + updateStrategy: + {{- toYaml . | nindent 4 }} + {{- end }} + template: + metadata: + {{- with .Values.podAnnotations }} + annotations: + {{- toYaml . | nindent 8 }} + {{- end }} + labels: + {{- include "prometheus-node-exporter.labels" . | nindent 8 }} + spec: + automountServiceAccountToken: {{ ternary true false (or .Values.serviceAccount.automountServiceAccountToken .Values.kubeRBACProxy.enabled) }} + {{- with .Values.securityContext }} + securityContext: + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.priorityClassName }} + priorityClassName: {{ . }} + {{- end }} + {{- with .Values.extraInitContainers }} + initContainers: + {{- toYaml . | nindent 8 }} + {{- end }} + serviceAccountName: {{ include "prometheus-node-exporter.serviceAccountName" . }} + {{- with .Values.terminationGracePeriodSeconds }} + terminationGracePeriodSeconds: {{ . }} + {{- end }} + containers: + {{- $servicePort := ternary .Values.kubeRBACProxy.port .Values.service.port .Values.kubeRBACProxy.enabled }} + - name: node-exporter + image: {{ include "prometheus-node-exporter.image" . }} + imagePullPolicy: {{ .Values.image.pullPolicy }} + args: + - --path.procfs=/host/proc + - --path.sysfs=/host/sys + {{- if .Values.hostRootFsMount.enabled }} + - --path.rootfs=/host/root + {{- if semverCompare ">=1.4.0-0" (coalesce .Values.version .Values.image.tag .Chart.AppVersion) }} + - --path.udev.data=/host/root/run/udev/data + {{- end }} + {{- end }} + - --web.listen-address=[$(HOST_IP)]:{{ $servicePort }} + {{- with .Values.extraArgs }} + {{- toYaml . | nindent 12 }} + {{- end }} + {{- with .Values.containerSecurityContext }} + securityContext: + {{- toYaml . | nindent 12 }} + {{- end }} + env: + - name: HOST_IP + {{- if .Values.kubeRBACProxy.enabled }} + value: 127.0.0.1 + {{- else if .Values.service.listenOnAllInterfaces }} + value: 0.0.0.0 + {{- else }} + valueFrom: + fieldRef: + apiVersion: v1 + fieldPath: status.hostIP + {{- end }} + {{- range $key, $value := .Values.env }} + - name: {{ $key }} + value: {{ $value | quote }} + {{- end }} + {{- if eq .Values.kubeRBACProxy.enabled false }} + ports: + - name: {{ .Values.service.portName }} + containerPort: {{ .Values.service.port }} + protocol: TCP + {{- end }} + livenessProbe: + failureThreshold: {{ .Values.livenessProbe.failureThreshold }} + httpGet: + {{- if .Values.kubeRBACProxy.enabled }} + host: 127.0.0.1 + {{- end }} + httpHeaders: + {{- range $_, $header := .Values.livenessProbe.httpGet.httpHeaders }} + - name: {{ $header.name }} + value: {{ $header.value }} + {{- end }} + path: / + port: {{ $servicePort }} + scheme: {{ upper .Values.livenessProbe.httpGet.scheme }} + initialDelaySeconds: {{ .Values.livenessProbe.initialDelaySeconds }} + periodSeconds: {{ .Values.livenessProbe.periodSeconds }} + successThreshold: {{ .Values.livenessProbe.successThreshold }} + timeoutSeconds: {{ .Values.livenessProbe.timeoutSeconds }} + readinessProbe: + failureThreshold: {{ .Values.readinessProbe.failureThreshold }} + httpGet: + {{- if .Values.kubeRBACProxy.enabled }} + host: 127.0.0.1 + {{- end }} + httpHeaders: + {{- range $_, $header := .Values.readinessProbe.httpGet.httpHeaders }} + - name: {{ $header.name }} + value: {{ $header.value }} + {{- end }} + path: / + port: {{ $servicePort }} + scheme: {{ upper .Values.readinessProbe.httpGet.scheme }} + initialDelaySeconds: {{ .Values.readinessProbe.initialDelaySeconds }} + periodSeconds: {{ .Values.readinessProbe.periodSeconds }} + successThreshold: {{ .Values.readinessProbe.successThreshold }} + timeoutSeconds: {{ .Values.readinessProbe.timeoutSeconds }} + {{- with .Values.resources }} + resources: + {{- toYaml . | nindent 12 }} + {{- end }} + {{- if .Values.terminationMessageParams.enabled }} + {{- with .Values.terminationMessageParams }} + terminationMessagePath: {{ .terminationMessagePath }} + terminationMessagePolicy: {{ .terminationMessagePolicy }} + {{- end }} + {{- end }} + volumeMounts: + - name: proc + mountPath: /host/proc + {{- with .Values.hostProcFsMount.mountPropagation }} + mountPropagation: {{ . }} + {{- end }} + readOnly: true + - name: sys + mountPath: /host/sys + {{- with .Values.hostSysFsMount.mountPropagation }} + mountPropagation: {{ . }} + {{- end }} + readOnly: true + {{- if .Values.hostRootFsMount.enabled }} + - name: root + mountPath: /host/root + {{- with .Values.hostRootFsMount.mountPropagation }} + mountPropagation: {{ . }} + {{- end }} + readOnly: true + {{- end }} + {{- range $_, $mount := .Values.extraHostVolumeMounts }} + - name: {{ $mount.name }} + mountPath: {{ $mount.mountPath }} + readOnly: {{ $mount.readOnly }} + {{- with $mount.mountPropagation }} + mountPropagation: {{ . }} + {{- end }} + {{- end }} + {{- range $_, $mount := .Values.sidecarVolumeMount }} + - name: {{ $mount.name }} + mountPath: {{ $mount.mountPath }} + readOnly: true + {{- end }} + {{- range $_, $mount := .Values.configmaps }} + - name: {{ $mount.name }} + mountPath: {{ $mount.mountPath }} + {{- end }} + {{- range $_, $mount := .Values.secrets }} + - name: {{ .name }} + mountPath: {{ .mountPath }} + {{- end }} + {{- range .Values.sidecars }} + {{- $overwrites := dict "volumeMounts" (concat (include "prometheus-node-exporter.sidecarVolumeMounts" $ | fromYamlArray) (.volumeMounts | default list) | default list) }} + {{- $defaults := dict "image" (include "prometheus-node-exporter.image" $) "securityContext" $.Values.containerSecurityContext "imagePullPolicy" $.Values.image.pullPolicy }} + - {{- toYaml (merge $overwrites . $defaults) | nindent 10 }} + {{- end }} + {{- if .Values.kubeRBACProxy.enabled }} + - name: kube-rbac-proxy + args: + {{- if .Values.kubeRBACProxy.extraArgs }} + {{- .Values.kubeRBACProxy.extraArgs | toYaml | nindent 12 }} + {{- end }} + - --secure-listen-address=:{{ .Values.service.port}} + - --upstream=http://127.0.0.1:{{ $servicePort }}/ + - --proxy-endpoints-port=8888 + - --config-file=/etc/kube-rbac-proxy-config/config-file.yaml + volumeMounts: + - name: kube-rbac-proxy-config + mountPath: /etc/kube-rbac-proxy-config + imagePullPolicy: {{ .Values.kubeRBACProxy.image.pullPolicy }} + {{- $base_registry := (include "monitoring_registry" .) }} + {{- if .Values.kubeRBACProxy.image.sha }} + image: "{{ $base_registry | default .Values.kubeRBACProxy.image.registry}}/{{ .Values.kubeRBACProxy.image.repository }}:{{ .Values.kubeRBACProxy.image.tag }}@sha256:{{ .Values.kubeRBACProxy.image.sha }}" + {{- else }} + image: "{{ $base_registry | default .Values.kubeRBACProxy.image.registry}}/{{ .Values.kubeRBACProxy.image.repository }}:{{ .Values.kubeRBACProxy.image.tag }}" + {{- end }} + ports: + - containerPort: {{ .Values.service.port}} + name: {{ .Values.kubeRBACProxy.portName }} + {{- if .Values.kubeRBACProxy.enableHostPort }} + hostPort: {{ .Values.service.port }} + {{- end }} + - containerPort: 8888 + name: "http-healthz" + readinessProbe: + httpGet: + scheme: HTTPS + port: 8888 + path: healthz + initialDelaySeconds: 5 + timeoutSeconds: 5 + {{- if .Values.kubeRBACProxy.resources }} + resources: + {{- toYaml .Values.kubeRBACProxy.resources | nindent 12 }} + {{- end }} + {{- if .Values.terminationMessageParams.enabled }} + {{- with .Values.terminationMessageParams }} + terminationMessagePath: {{ .terminationMessagePath }} + terminationMessagePolicy: {{ .terminationMessagePolicy }} + {{- end }} + {{- end }} + {{- with .Values.kubeRBACProxy.env }} + env: + {{- range $key, $value := $.Values.kubeRBACProxy.env }} + - name: {{ $key }} + value: {{ $value | quote }} + {{- end }} + {{- end }} + {{- if .Values.kubeRBACProxy.containerSecurityContext }} + securityContext: + {{ toYaml .Values.kubeRBACProxy.containerSecurityContext | nindent 12 }} + {{- end }} + {{- end }} + {{- if or .Values.imagePullSecrets .Values.global.imagePullSecrets }} + imagePullSecrets: + {{- include "prometheus-node-exporter.imagePullSecrets" (dict "Values" .Values "imagePullSecrets" .Values.imagePullSecrets) | indent 8 }} + {{- end }} + hostNetwork: {{ .Values.hostNetwork }} + hostPID: {{ .Values.hostPID }} + {{- with .Values.affinity }} + affinity: + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.dnsConfig }} + dnsConfig: + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.nodeSelector }} + nodeSelector: + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.restartPolicy }} + restartPolicy: {{ . }} + {{- end }} + {{- with .Values.tolerations }} + tolerations: + {{- toYaml . | nindent 8 }} + {{- end }} + volumes: + - name: proc + hostPath: + path: /proc + - name: sys + hostPath: + path: /sys + {{- if .Values.hostRootFsMount.enabled }} + - name: root + hostPath: + path: / + {{- end }} + {{- range $_, $mount := .Values.extraHostVolumeMounts }} + - name: {{ $mount.name }} + hostPath: + path: {{ $mount.hostPath }} + {{- with $mount.type }} + type: {{ . }} + {{- end }} + {{- end }} + {{- range $_, $mount := .Values.sidecarVolumeMount }} + - name: {{ $mount.name }} + emptyDir: + medium: Memory + {{- end }} + {{- range $_, $mount := .Values.sidecarHostVolumeMounts }} + - name: {{ $mount.name }} + hostPath: + path: {{ $mount.hostPath }} + {{- end }} + {{- range $_, $mount := .Values.configmaps }} + - name: {{ $mount.name }} + configMap: + name: {{ $mount.name }} + {{- end }} + {{- range $_, $mount := .Values.secrets }} + - name: {{ $mount.name }} + secret: + secretName: {{ $mount.name }} + {{- end }} + {{- if .Values.kubeRBACProxy.enabled }} + - name: kube-rbac-proxy-config + configMap: + name: {{ template "prometheus-node-exporter.fullname" . }}-rbac-config + {{- end }} diff --git a/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/prometheus-node-exporter/templates/endpoints.yaml b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/prometheus-node-exporter/templates/endpoints.yaml new file mode 100644 index 0000000000..56b695203a --- /dev/null +++ b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/prometheus-node-exporter/templates/endpoints.yaml @@ -0,0 +1,18 @@ +{{- if .Values.endpoints }} +apiVersion: v1 +kind: Endpoints +metadata: + name: {{ include "prometheus-node-exporter.fullname" . }} + namespace: {{ include "prometheus-node-exporter.namespace" . }} + labels: + {{- include "prometheus-node-exporter.labels" . | nindent 4 }} +subsets: + - addresses: + {{- range .Values.endpoints }} + - ip: {{ . }} + {{- end }} + ports: + - name: {{ .Values.service.portName }} + port: {{ .Values.service.port }} + protocol: TCP +{{- end }} diff --git a/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/prometheus-node-exporter/templates/extra-manifests.yaml b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/prometheus-node-exporter/templates/extra-manifests.yaml new file mode 100644 index 0000000000..2b21b71062 --- /dev/null +++ b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/prometheus-node-exporter/templates/extra-manifests.yaml @@ -0,0 +1,4 @@ +{{ range .Values.extraManifests }} +--- +{{ tpl . $ }} +{{ end }} diff --git a/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/prometheus-node-exporter/templates/networkpolicy.yaml b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/prometheus-node-exporter/templates/networkpolicy.yaml new file mode 100644 index 0000000000..825722729d --- /dev/null +++ b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/prometheus-node-exporter/templates/networkpolicy.yaml @@ -0,0 +1,23 @@ +{{- if .Values.networkPolicy.enabled }} +apiVersion: networking.k8s.io/v1 +kind: NetworkPolicy +metadata: + name: {{ include "prometheus-node-exporter.fullname" . }} + namespace: {{ include "prometheus-node-exporter.namespace" . }} + labels: + {{- include "prometheus-node-exporter.labels" $ | nindent 4 }} + {{- with .Values.service.annotations }} + annotations: + {{- toYaml . | nindent 4 }} + {{- end }} +spec: + ingress: + - ports: + - port: {{ .Values.service.port }} + policyTypes: + - Egress + - Ingress + podSelector: + matchLabels: + {{- include "prometheus-node-exporter.selectorLabels" . | nindent 6 }} +{{- end }} diff --git a/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/prometheus-node-exporter/templates/podmonitor.yaml b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/prometheus-node-exporter/templates/podmonitor.yaml new file mode 100644 index 0000000000..f88da6a34e --- /dev/null +++ b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/prometheus-node-exporter/templates/podmonitor.yaml @@ -0,0 +1,91 @@ +{{- if .Values.prometheus.podMonitor.enabled }} +apiVersion: {{ .Values.prometheus.podMonitor.apiVersion | default "monitoring.coreos.com/v1" }} +kind: PodMonitor +metadata: + name: {{ include "prometheus-node-exporter.fullname" . }} + namespace: {{ include "prometheus-node-exporter.podmonitor-namespace" . }} + labels: + {{- include "prometheus-node-exporter.labels" . | nindent 4 }} + {{- with .Values.prometheus.podMonitor.additionalLabels }} + {{- toYaml . | nindent 4 }} + {{- end }} +spec: + jobLabel: {{ default "app.kubernetes.io/name" .Values.prometheus.podMonitor.jobLabel }} + {{- include "podmonitor.scrapeLimits" .Values.prometheus.podMonitor | nindent 2 }} + selector: + matchLabels: + {{- with .Values.prometheus.podMonitor.selectorOverride }} + {{- toYaml . | nindent 6 }} + {{- else }} + {{- include "prometheus-node-exporter.selectorLabels" . | nindent 6 }} + {{- end }} + namespaceSelector: + matchNames: + - {{ include "prometheus-node-exporter.namespace" . }} + {{- with .Values.prometheus.podMonitor.attachMetadata }} + attachMetadata: + {{- toYaml . | nindent 4 }} + {{- end }} + {{- with .Values.prometheus.podMonitor.podTargetLabels }} + podTargetLabels: + {{- toYaml . | nindent 4 }} + {{- end }} + podMetricsEndpoints: + - port: {{ .Values.service.portName }} + {{- with .Values.prometheus.podMonitor.scheme }} + scheme: {{ . }} + {{- end }} + {{- with .Values.prometheus.podMonitor.path }} + path: {{ . }} + {{- end }} + {{- with .Values.prometheus.podMonitor.basicAuth }} + basicAuth: + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.prometheus.podMonitor.bearerTokenSecret }} + bearerTokenSecret: + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.prometheus.podMonitor.tlsConfig }} + tlsConfig: + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.prometheus.podMonitor.authorization }} + authorization: + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.prometheus.podMonitor.oauth2 }} + oauth2: + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.prometheus.podMonitor.proxyUrl }} + proxyUrl: {{ . }} + {{- end }} + {{- with .Values.prometheus.podMonitor.interval }} + interval: {{ . }} + {{- end }} + {{- with .Values.prometheus.podMonitor.honorTimestamps }} + honorTimestamps: {{ . }} + {{- end }} + {{- with .Values.prometheus.podMonitor.honorLabels }} + honorLabels: {{ . }} + {{- end }} + {{- with .Values.prometheus.podMonitor.scrapeTimeout }} + scrapeTimeout: {{ . }} + {{- end }} + {{- with .Values.prometheus.podMonitor.relabelings }} + relabelings: + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.prometheus.podMonitor.metricRelabelings }} + metricRelabelings: + {{- toYaml . | nindent 8 }} + {{- end }} + enableHttp2: {{ default false .Values.prometheus.podMonitor.enableHttp2 }} + filterRunning: {{ default true .Values.prometheus.podMonitor.filterRunning }} + followRedirects: {{ default false .Values.prometheus.podMonitor.followRedirects }} + {{- with .Values.prometheus.podMonitor.params }} + params: + {{- toYaml . | nindent 8 }} + {{- end }} +{{- end }} diff --git a/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/prometheus-node-exporter/templates/psp-clusterrole.yaml b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/prometheus-node-exporter/templates/psp-clusterrole.yaml new file mode 100644 index 0000000000..ee5bbba4a5 --- /dev/null +++ b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/prometheus-node-exporter/templates/psp-clusterrole.yaml @@ -0,0 +1,14 @@ +{{- if and (or .Values.global.cattle.psp.enable (and .Values.rbac.create .Values.rbac.pspEnabled)) (.Capabilities.APIVersions.Has "policy/v1beta1/PodSecurityPolicy") }} +kind: ClusterRole +apiVersion: rbac.authorization.k8s.io/v1 +metadata: + name: psp-{{ include "prometheus-node-exporter.fullname" . }} + labels: + {{- include "prometheus-node-exporter.labels" . | nindent 4 }} +rules: +- apiGroups: ['extensions'] + resources: ['podsecuritypolicies'] + verbs: ['use'] + resourceNames: + - {{ include "prometheus-node-exporter.fullname" . }} +{{- end }} diff --git a/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/prometheus-node-exporter/templates/psp-clusterrolebinding.yaml b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/prometheus-node-exporter/templates/psp-clusterrolebinding.yaml new file mode 100644 index 0000000000..160f2bbf7a --- /dev/null +++ b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/prometheus-node-exporter/templates/psp-clusterrolebinding.yaml @@ -0,0 +1,16 @@ +{{- if and (or .Values.global.cattle.psp.enable (and .Values.rbac.create .Values.rbac.pspEnabled)) (.Capabilities.APIVersions.Has "policy/v1beta1/PodSecurityPolicy") }} +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRoleBinding +metadata: + name: psp-{{ include "prometheus-node-exporter.fullname" . }} + labels: + {{- include "prometheus-node-exporter.labels" . | nindent 4 }} +roleRef: + apiGroup: rbac.authorization.k8s.io + kind: ClusterRole + name: psp-{{ include "prometheus-node-exporter.fullname" . }} +subjects: + - kind: ServiceAccount + name: {{ include "prometheus-node-exporter.fullname" . }} + namespace: {{ include "prometheus-node-exporter.namespace" . }} +{{- end }} diff --git a/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/prometheus-node-exporter/templates/psp.yaml b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/prometheus-node-exporter/templates/psp.yaml new file mode 100644 index 0000000000..f3b52e1120 --- /dev/null +++ b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/prometheus-node-exporter/templates/psp.yaml @@ -0,0 +1,49 @@ +{{- if and (or .Values.global.cattle.psp.enable (and .Values.rbac.create .Values.rbac.pspEnabled)) (.Capabilities.APIVersions.Has "policy/v1beta1/PodSecurityPolicy") }} +apiVersion: policy/v1beta1 +kind: PodSecurityPolicy +metadata: + name: {{ include "prometheus-node-exporter.fullname" . }} + namespace: {{ include "prometheus-node-exporter.namespace" . }} + labels: + {{- include "prometheus-node-exporter.labels" . | nindent 4 }} + {{- with .Values.rbac.pspAnnotations }} + annotations: + {{- toYaml . | nindent 4 }} + {{- end }} +spec: + privileged: false + # Allow core volume types. + volumes: + - 'configMap' + - 'emptyDir' + - 'projected' + - 'secret' + - 'downwardAPI' + - 'persistentVolumeClaim' + - 'hostPath' + hostNetwork: true + hostIPC: false + hostPID: true + hostPorts: + - min: 0 + max: 65535 + runAsUser: + # Permits the container to run with root privileges as well. + rule: 'RunAsAny' + seLinux: + # This policy assumes the nodes are using AppArmor rather than SELinux. + rule: 'RunAsAny' + supplementalGroups: + rule: 'MustRunAs' + ranges: + # Allow adding the root group. + - min: 0 + max: 65535 + fsGroup: + rule: 'MustRunAs' + ranges: + # Allow adding the root group. + - min: 0 + max: 65535 + readOnlyRootFilesystem: false +{{- end }} diff --git a/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/prometheus-node-exporter/templates/rbac-configmap.yaml b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/prometheus-node-exporter/templates/rbac-configmap.yaml new file mode 100644 index 0000000000..814e110337 --- /dev/null +++ b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/prometheus-node-exporter/templates/rbac-configmap.yaml @@ -0,0 +1,16 @@ +{{- if .Values.kubeRBACProxy.enabled}} +apiVersion: v1 +kind: ConfigMap +metadata: + name: {{ template "prometheus-node-exporter.fullname" . }}-rbac-config + namespace: {{ include "prometheus-node-exporter.namespace" . }} +data: + config-file.yaml: |+ + authorization: + resourceAttributes: + namespace: {{ template "prometheus-node-exporter.namespace" . }} + apiVersion: v1 + resource: services + subresource: {{ template "prometheus-node-exporter.fullname" . }} + name: {{ template "prometheus-node-exporter.fullname" . }} +{{- end }} \ No newline at end of file diff --git a/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/prometheus-node-exporter/templates/service.yaml b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/prometheus-node-exporter/templates/service.yaml new file mode 100644 index 0000000000..a065e46e39 --- /dev/null +++ b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/prometheus-node-exporter/templates/service.yaml @@ -0,0 +1,29 @@ +{{- if .Values.service.enabled }} +apiVersion: v1 +kind: Service +metadata: + name: {{ include "prometheus-node-exporter.fullname" . }} + namespace: {{ include "prometheus-node-exporter.namespace" . }} + labels: + {{- include "prometheus-node-exporter.labels" $ | nindent 4 }} + {{- with .Values.service.annotations }} + annotations: + {{- toYaml . | nindent 4 }} + {{- end }} +spec: +{{- if .Values.service.ipDualStack.enabled }} + ipFamilies: {{ toYaml .Values.service.ipDualStack.ipFamilies | nindent 4 }} + ipFamilyPolicy: {{ .Values.service.ipDualStack.ipFamilyPolicy }} +{{- end }} + type: {{ .Values.service.type }} + ports: + - port: {{ .Values.service.port }} + {{- if ( and (eq .Values.service.type "NodePort" ) (not (empty .Values.service.nodePort)) ) }} + nodePort: {{ .Values.service.nodePort }} + {{- end }} + targetPort: {{ .Values.service.targetPort }} + protocol: TCP + name: {{ .Values.service.portName }} + selector: + {{- include "prometheus-node-exporter.selectorLabels" . | nindent 4 }} +{{- end }} diff --git a/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/prometheus-node-exporter/templates/serviceaccount.yaml b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/prometheus-node-exporter/templates/serviceaccount.yaml new file mode 100644 index 0000000000..5c3348c09b --- /dev/null +++ b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/prometheus-node-exporter/templates/serviceaccount.yaml @@ -0,0 +1,17 @@ +{{- if and .Values.rbac.create .Values.serviceAccount.create -}} +apiVersion: v1 +kind: ServiceAccount +metadata: + name: {{ include "prometheus-node-exporter.serviceAccountName" . }} + namespace: {{ include "prometheus-node-exporter.namespace" . }} + labels: + {{- include "prometheus-node-exporter.labels" . | nindent 4 }} + {{- with .Values.serviceAccount.annotations }} + annotations: + {{- toYaml . | nindent 4 }} + {{- end }} +{{- if or .Values.serviceAccount.imagePullSecrets .Values.global.imagePullSecrets }} +imagePullSecrets: + {{- include "prometheus-node-exporter.imagePullSecrets" (dict "Values" .Values "imagePullSecrets" .Values.serviceAccount.imagePullSecrets) | indent 2 }} +{{- end }} +{{- end -}} diff --git a/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/prometheus-node-exporter/templates/servicemonitor.yaml b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/prometheus-node-exporter/templates/servicemonitor.yaml new file mode 100644 index 0000000000..6d6e440473 --- /dev/null +++ b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/prometheus-node-exporter/templates/servicemonitor.yaml @@ -0,0 +1,71 @@ +{{- if .Values.prometheus.monitor.enabled }} +apiVersion: {{ .Values.prometheus.monitor.apiVersion | default "monitoring.coreos.com/v1" }} +kind: ServiceMonitor +metadata: + name: {{ include "prometheus-node-exporter.fullname" . }} + namespace: {{ include "prometheus-node-exporter.monitor-namespace" . }} + labels: + {{- include "prometheus-node-exporter.labels" . | nindent 4 }} + {{- with .Values.prometheus.monitor.additionalLabels }} + {{- toYaml . | nindent 4 }} + {{- end }} +spec: + jobLabel: {{ default "app.kubernetes.io/name" .Values.prometheus.monitor.jobLabel }} + {{- include "servicemonitor.scrapeLimits" .Values.prometheus.monitor | nindent 2 }} + {{- with .Values.prometheus.monitor.podTargetLabels }} + podTargetLabels: + {{- toYaml . | nindent 4 }} + {{- end }} + selector: + matchLabels: + {{- with .Values.prometheus.monitor.selectorOverride }} + {{- toYaml . | nindent 6 }} + {{- else }} + {{- include "prometheus-node-exporter.selectorLabels" . | nindent 6 }} + {{- end }} + {{- with .Values.prometheus.monitor.attachMetadata }} + attachMetadata: + {{- toYaml . | nindent 4 }} + {{- end }} + endpoints: + - port: {{ .Values.service.portName }} + scheme: {{ .Values.prometheus.monitor.scheme }} + {{- with .Values.prometheus.monitor.basicAuth }} + basicAuth: + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.prometheus.monitor.bearerTokenFile }} + bearerTokenFile: {{ . }} + {{- end }} + {{- with .Values.prometheus.monitor.tlsConfig }} + tlsConfig: + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.prometheus.monitor.proxyUrl }} + proxyUrl: {{ . }} + {{- end }} + {{- with .Values.prometheus.monitor.interval }} + interval: {{ . }} + {{- end }} + {{- with .Values.prometheus.monitor.scrapeTimeout }} + scrapeTimeout: {{ . }} + {{- end }} + {{- with .Values.prometheus.monitor.relabelings }} + relabelings: + {{- toYaml . | nindent 8 }} + {{- end }} + metricRelabelings: + {{- with .Values.prometheus.monitor.metricRelabelings }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{ if .Values.global.cattle.clusterId }} + - sourceLabels: [__address__] + targetLabel: cluster_id + replacement: {{ .Values.global.cattle.clusterId }} + {{- end }} + {{ if .Values.global.cattle.clusterName }} + - sourceLabels: [__address__] + targetLabel: cluster_name + replacement: {{ .Values.global.cattle.clusterName }} + {{- end }} +{{- end }} diff --git a/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/prometheus-node-exporter/templates/verticalpodautoscaler.yaml b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/prometheus-node-exporter/templates/verticalpodautoscaler.yaml new file mode 100644 index 0000000000..2c2705f872 --- /dev/null +++ b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/prometheus-node-exporter/templates/verticalpodautoscaler.yaml @@ -0,0 +1,40 @@ +{{- if and (.Capabilities.APIVersions.Has "autoscaling.k8s.io/v1") (.Values.verticalPodAutoscaler.enabled) }} +apiVersion: autoscaling.k8s.io/v1 +kind: VerticalPodAutoscaler +metadata: + name: {{ include "prometheus-node-exporter.fullname" . }} + namespace: {{ include "prometheus-node-exporter.namespace" . }} + labels: + {{- include "prometheus-node-exporter.labels" . | nindent 4 }} +spec: + {{- with .Values.verticalPodAutoscaler.recommenders }} + recommenders: + {{- toYaml . | nindent 4 }} + {{- end }} + resourcePolicy: + containerPolicies: + - containerName: node-exporter + {{- with .Values.verticalPodAutoscaler.controlledResources }} + controlledResources: + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.verticalPodAutoscaler.controlledValues }} + controlledValues: {{ . }} + {{- end }} + {{- with .Values.verticalPodAutoscaler.maxAllowed }} + maxAllowed: + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.verticalPodAutoscaler.minAllowed }} + minAllowed: + {{- toYaml . | nindent 8 }} + {{- end }} + targetRef: + apiVersion: apps/v1 + kind: DaemonSet + name: {{ include "prometheus-node-exporter.fullname" . }} + {{- with .Values.verticalPodAutoscaler.updatePolicy }} + updatePolicy: + {{- toYaml . | nindent 4 }} + {{- end }} +{{- end }} diff --git a/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/prometheus-node-exporter/values.yaml b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/prometheus-node-exporter/values.yaml new file mode 100644 index 0000000000..b9f2f7ab87 --- /dev/null +++ b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/prometheus-node-exporter/values.yaml @@ -0,0 +1,530 @@ +# Default values for prometheus-node-exporter. +# This is a YAML-formatted file. +# Declare variables to be passed into your templates. + +image: + registry: docker.io + repository: rancher/mirrored-prometheus-node-exporter + # Overrides the image tag whose default is {{ printf "v%s" .Chart.AppVersion }} + tag: v1.7.0 + pullPolicy: IfNotPresent + digest: "" + +imagePullSecrets: [] +# - name: "image-pull-secret" +nameOverride: "" +fullnameOverride: "" + +# Number of old history to retain to allow rollback +# Default Kubernetes value is set to 10 +revisionHistoryLimit: 10 + +global: + cattle: + psp: + enable: true + systemDefaultRegistry: "" + + # To help compatibility with other charts which use global.imagePullSecrets. + # Allow either an array of {name: pullSecret} maps (k8s-style), or an array of strings (more common helm-style). + # global: + # imagePullSecrets: + # - name: pullSecret1 + # - name: pullSecret2 + # or + # global: + # imagePullSecrets: + # - pullSecret1 + # - pullSecret2 + imagePullSecrets: [] + # + # Allow parent charts to override registry hostname + imageRegistry: "docker.io" + +# Configure kube-rbac-proxy. When enabled, creates a kube-rbac-proxy to protect the node-exporter http endpoint. +# The requests are served through the same service but requests are HTTPS. +kubeRBACProxy: + enabled: false + ## Set environment variables as name/value pairs + env: {} + # VARIABLE: value + image: + registry: docker.io + repository: rancher/mirrored-kube-rbac-proxy + tag: v0.15.0 + sha: "" + pullPolicy: IfNotPresent + + # List of additional cli arguments to configure kube-rbac-proxy + # for example: --tls-cipher-suites, --log-file, etc. + # all the possible args can be found here: https://github.com/brancz/kube-rbac-proxy#usage + extraArgs: [] + + ## Specify security settings for a Container + ## Allows overrides and additional options compared to (Pod) securityContext + ## Ref: https://kubernetes.io/docs/tasks/configure-pod-container/security-context/#set-the-security-context-for-a-container + containerSecurityContext: {} + + # Specify the port used for the Node exporter container (upstream port) + port: 8100 + # Specify the name of the container port + portName: http + # Configure a hostPort. If true, hostPort will be enabled in the container and set to service.port. + enableHostPort: false + + resources: {} + # We usually recommend not to specify default resources and to leave this as a conscious + # choice for the user. This also increases chances charts run on environments with little + # resources, such as Minikube. If you do want to specify resources, uncomment the following + # lines, adjust them as necessary, and remove the curly braces after 'resources:'. + # limits: + # cpu: 100m + # memory: 64Mi + # requests: + # cpu: 10m + # memory: 32Mi + +service: + enabled: true + type: ClusterIP + port: 9796 + targetPort: 9796 + nodePort: + portName: metrics + listenOnAllInterfaces: true + annotations: + prometheus.io/scrape: "true" + ipDualStack: + enabled: false + ipFamilies: ["IPv6", "IPv4"] + ipFamilyPolicy: "PreferDualStack" + +# Set a NetworkPolicy with: +# ingress only on service.port +# no egress permitted +networkPolicy: + enabled: false + +# Additional environment variables that will be passed to the daemonset +env: {} +## env: +## VARIABLE: value + +prometheus: + monitor: + enabled: false + additionalLabels: {} + namespace: "" + + jobLabel: "" + + # List of pod labels to add to node exporter metrics + # https://github.com/prometheus-operator/prometheus-operator/blob/main/Documentation/api.md#servicemonitor + podTargetLabels: [] + + scheme: http + basicAuth: {} + bearerTokenFile: + tlsConfig: {} + + ## proxyUrl: URL of a proxy that should be used for scraping. + ## + proxyUrl: "" + + ## Override serviceMonitor selector + ## + selectorOverride: {} + + ## Attach node metadata to discovered targets. Requires Prometheus v2.35.0 and above. + ## + attachMetadata: + node: false + + relabelings: [] + metricRelabelings: [] + interval: "" + scrapeTimeout: 10s + ## prometheus.monitor.apiVersion ApiVersion for the serviceMonitor Resource(defaults to "monitoring.coreos.com/v1") + apiVersion: "" + + ## SampleLimit defines per-scrape limit on number of scraped samples that will be accepted. + ## + sampleLimit: 0 + + ## TargetLimit defines a limit on the number of scraped targets that will be accepted. + ## + targetLimit: 0 + + ## Per-scrape limit on number of labels that will be accepted for a sample. Only valid in Prometheus versions 2.27.0 and newer. + ## + labelLimit: 0 + + ## Per-scrape limit on length of labels name that will be accepted for a sample. Only valid in Prometheus versions 2.27.0 and newer. + ## + labelNameLengthLimit: 0 + + ## Per-scrape limit on length of labels value that will be accepted for a sample. Only valid in Prometheus versions 2.27.0 and newer. + ## + labelValueLengthLimit: 0 + + # PodMonitor defines monitoring for a set of pods. + # ref. https://github.com/prometheus-operator/prometheus-operator/blob/main/Documentation/api.md#monitoring.coreos.com/v1.PodMonitor + # Using a PodMonitor may be preferred in some environments where there is very large number + # of Node Exporter endpoints (1000+) behind a single service. + # The PodMonitor is disabled by default. When switching from ServiceMonitor to PodMonitor, + # the time series resulting from the configuration through PodMonitor may have different labels. + # For instance, there will not be the service label any longer which might + # affect PromQL queries selecting that label. + podMonitor: + enabled: false + # Namespace in which to deploy the pod monitor. Defaults to the release namespace. + namespace: "" + # Additional labels, e.g. setting a label for pod monitor selector as set in prometheus + additionalLabels: {} + # release: kube-prometheus-stack + # PodTargetLabels transfers labels of the Kubernetes Pod onto the target. + podTargetLabels: [] + # apiVersion defaults to monitoring.coreos.com/v1. + apiVersion: "" + # Override pod selector to select pod objects. + selectorOverride: {} + # Attach node metadata to discovered targets. Requires Prometheus v2.35.0 and above. + attachMetadata: + node: false + # The label to use to retrieve the job name from. Defaults to label app.kubernetes.io/name. + jobLabel: "" + + # Scheme/protocol to use for scraping. + scheme: "http" + # Path to scrape metrics at. + path: "/metrics" + + # BasicAuth allow an endpoint to authenticate over basic authentication. + # More info: https://prometheus.io/docs/operating/configuration/#endpoint + basicAuth: {} + # Secret to mount to read bearer token for scraping targets. + # The secret needs to be in the same namespace as the pod monitor and accessible by the Prometheus Operator. + # https://kubernetes.io/docs/reference/generated/kubernetes-api/v1.24/#secretkeyselector-v1-core + bearerTokenSecret: {} + # TLS configuration to use when scraping the endpoint. + tlsConfig: {} + # Authorization section for this endpoint. + # https://github.com/prometheus-operator/prometheus-operator/blob/main/Documentation/api.md#monitoring.coreos.com/v1.SafeAuthorization + authorization: {} + # OAuth2 for the URL. Only valid in Prometheus versions 2.27.0 and newer. + # https://github.com/prometheus-operator/prometheus-operator/blob/main/Documentation/api.md#monitoring.coreos.com/v1.OAuth2 + oauth2: {} + + # ProxyURL eg http://proxyserver:2195. Directs scrapes through proxy to this endpoint. + proxyUrl: "" + # Interval at which endpoints should be scraped. If not specified Prometheus’ global scrape interval is used. + interval: "" + # Timeout after which the scrape is ended. If not specified, the Prometheus global scrape interval is used. + scrapeTimeout: "" + # HonorTimestamps controls whether Prometheus respects the timestamps present in scraped data. + honorTimestamps: true + # HonorLabels chooses the metric’s labels on collisions with target labels. + honorLabels: true + # Whether to enable HTTP2. Default false. + enableHttp2: "" + # Drop pods that are not running. (Failed, Succeeded). + # Enabled by default. More info: https://kubernetes.io/docs/concepts/workloads/pods/pod-lifecycle/#pod-phase + filterRunning: "" + # FollowRedirects configures whether scrape requests follow HTTP 3xx redirects. Default false. + followRedirects: "" + # Optional HTTP URL parameters + params: {} + + # RelabelConfigs to apply to samples before scraping. Prometheus Operator automatically adds + # relabelings for a few standard Kubernetes fields. The original scrape job’s name + # is available via the __tmp_prometheus_job_name label. + # More info: https://prometheus.io/docs/prometheus/latest/configuration/configuration/#relabel_config + relabelings: [] + # MetricRelabelConfigs to apply to samples before ingestion. + metricRelabelings: [] + + # SampleLimit defines per-scrape limit on number of scraped samples that will be accepted. + sampleLimit: 0 + # TargetLimit defines a limit on the number of scraped targets that will be accepted. + targetLimit: 0 + # Per-scrape limit on number of labels that will be accepted for a sample. + # Only valid in Prometheus versions 2.27.0 and newer. + labelLimit: 0 + # Per-scrape limit on length of labels name that will be accepted for a sample. + # Only valid in Prometheus versions 2.27.0 and newer. + labelNameLengthLimit: 0 + # Per-scrape limit on length of labels value that will be accepted for a sample. + # Only valid in Prometheus versions 2.27.0 and newer. + labelValueLengthLimit: 0 + +## Customize the updateStrategy if set +updateStrategy: + type: RollingUpdate + rollingUpdate: + maxUnavailable: 1 + +resources: {} + # We usually recommend not to specify default resources and to leave this as a conscious + # choice for the user. This also increases chances charts run on environments with little + # resources, such as Minikube. If you do want to specify resources, uncomment the following + # lines, adjust them as necessary, and remove the curly braces after 'resources:'. + # limits: + # cpu: 200m + # memory: 50Mi + # requests: + # cpu: 100m + # memory: 30Mi + +# Specify the container restart policy passed to the Node Export container +# Possible Values: Always (default)|OnFailure|Never +restartPolicy: null + +serviceAccount: + # Specifies whether a ServiceAccount should be created + create: true + # The name of the ServiceAccount to use. + # If not set and create is true, a name is generated using the fullname template + name: + annotations: {} + imagePullSecrets: [] + automountServiceAccountToken: false + +securityContext: + fsGroup: 65534 + runAsGroup: 65534 + runAsNonRoot: true + runAsUser: 65534 + +containerSecurityContext: + readOnlyRootFilesystem: true + # capabilities: + # add: + # - SYS_TIME + +rbac: + ## If true, create & use RBAC resources + ## + create: true + pspAnnotations: {} + +# for deployments that have node_exporter deployed outside of the cluster, list +# their addresses here +endpoints: [] + +# Expose the service to the host network +hostNetwork: true + +# Share the host process ID namespace +hostPID: true + +# Mount the node's root file system (/) at /host/root in the container +hostRootFsMount: + enabled: true + # Defines how new mounts in existing mounts on the node or in the container + # are propagated to the container or node, respectively. Possible values are + # None, HostToContainer, and Bidirectional. If this field is omitted, then + # None is used. More information on: + # https://kubernetes.io/docs/concepts/storage/volumes/#mount-propagation + mountPropagation: HostToContainer + +# Mount the node's proc file system (/proc) at /host/proc in the container +hostProcFsMount: + # Possible values are None, HostToContainer, and Bidirectional + mountPropagation: "" + +# Mount the node's sys file system (/sys) at /host/sys in the container +hostSysFsMount: + # Possible values are None, HostToContainer, and Bidirectional + mountPropagation: "" + +## Assign a group of affinity scheduling rules +## +affinity: {} +# nodeAffinity: +# requiredDuringSchedulingIgnoredDuringExecution: +# nodeSelectorTerms: +# - matchFields: +# - key: metadata.name +# operator: In +# values: +# - target-host-name + +# Annotations to be added to node exporter pods +podAnnotations: + # Fix for very slow GKE cluster upgrades + cluster-autoscaler.kubernetes.io/safe-to-evict: "true" + +# Extra labels to be added to node exporter pods +podLabels: {} + +# Annotations to be added to node exporter daemonset +daemonsetAnnotations: {} + +## set to true to add the release label so scraping of the servicemonitor with kube-prometheus-stack works out of the box +releaseLabel: false + +# Custom DNS configuration to be added to prometheus-node-exporter pods +dnsConfig: {} +# nameservers: +# - 1.2.3.4 +# searches: +# - ns1.svc.cluster-domain.example +# - my.dns.search.suffix +# options: +# - name: ndots +# value: "2" +# - name: edns0 + +## Assign a nodeSelector if operating a hybrid cluster +## +nodeSelector: + kubernetes.io/os: linux + # kubernetes.io/arch: amd64 + +# Specify grace period for graceful termination of pods. Defaults to 30 if null or not specified +terminationGracePeriodSeconds: null + +tolerations: + - effect: NoSchedule + operator: Exists + - effect: NoExecute + operator: Exists + +# Enable or disable container termination message settings +# https://kubernetes.io/docs/tasks/debug/debug-application/determine-reason-pod-failure/ +terminationMessageParams: + enabled: false + # If enabled, specify the path for termination messages + terminationMessagePath: /dev/termination-log + # If enabled, specify the policy for termination messages + terminationMessagePolicy: File + + +## Assign a PriorityClassName to pods if set +# priorityClassName: "" + +## Additional container arguments +## +extraArgs: [] +# - --collector.diskstats.ignored-devices=^(ram|loop|fd|(h|s|v)d[a-z]|nvme\\d+n\\d+p)\\d+$ +# - --collector.textfile.directory=/run/prometheus + +## Additional mounts from the host to node-exporter container +## +extraHostVolumeMounts: [] +# - name: +# hostPath: +# https://kubernetes.io/docs/concepts/storage/volumes/#hostpath-volume-types +# type: "" (Default)|DirectoryOrCreate|Directory|FileOrCreate|File|Socket|CharDevice|BlockDevice +# mountPath: +# readOnly: true|false +# mountPropagation: None|HostToContainer|Bidirectional + +## Additional configmaps to be mounted. +## +configmaps: [] +# - name: +# mountPath: +secrets: [] +# - name: +# mountPath: +## Override the deployment namespace +## +namespaceOverride: "" + +## Additional containers for export metrics to text file; fields image,imagePullPolicy,securityContext take default value from main container +## +sidecars: [] +# - name: nvidia-dcgm-exporter +# image: nvidia/dcgm-exporter:1.4.3 +# volumeMounts: +# - name: tmp +# mountPath: /tmp + +## Volume for sidecar containers +## +sidecarVolumeMount: [] +# - name: collector-textfiles +# mountPath: /run/prometheus +# readOnly: false + +## Additional mounts from the host to sidecar containers +## +sidecarHostVolumeMounts: [] +# - name: +# hostPath: +# mountPath: +# readOnly: true|false +# mountPropagation: None|HostToContainer|Bidirectional + +## Additional InitContainers to initialize the pod +## +extraInitContainers: [] + +## Liveness probe +## +livenessProbe: + failureThreshold: 3 + httpGet: + httpHeaders: [] + scheme: http + initialDelaySeconds: 0 + periodSeconds: 10 + successThreshold: 1 + timeoutSeconds: 1 + +## Readiness probe +## +readinessProbe: + failureThreshold: 3 + httpGet: + httpHeaders: [] + scheme: http + initialDelaySeconds: 0 + periodSeconds: 10 + successThreshold: 1 + timeoutSeconds: 1 + +# Enable vertical pod autoscaler support for prometheus-node-exporter +verticalPodAutoscaler: + enabled: false + + # Recommender responsible for generating recommendation for the object. + # List should be empty (then the default recommender will generate the recommendation) + # or contain exactly one recommender. + # recommenders: + # - name: custom-recommender-performance + + # List of resources that the vertical pod autoscaler can control. Defaults to cpu and memory + controlledResources: [] + # Specifies which resource values should be controlled: RequestsOnly or RequestsAndLimits. + # controlledValues: RequestsAndLimits + + # Define the max allowed resources for the pod + maxAllowed: {} + # cpu: 200m + # memory: 100Mi + # Define the min allowed resources for the pod + minAllowed: {} + # cpu: 200m + # memory: 100Mi + + # updatePolicy: + # Specifies minimal number of replicas which need to be alive for VPA Updater to attempt pod eviction + # minReplicas: 1 + # Specifies whether recommended updates are applied when a Pod is started and whether recommended updates + # are applied during the life of a Pod. Possible values are "Off", "Initial", "Recreate", and "Auto". + # updateMode: Auto + +# Extra manifests to deploy as an array +extraManifests: [] + # - | + # apiVersion: v1 + # kind: ConfigMap + # metadata: + # name: prometheus-extra + # data: + # extra-data: "value" + +# Override version of app, required if image.tag is defined and does not follow semver +version: "" diff --git a/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/rke2ControllerManager/.helmignore b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/rke2ControllerManager/.helmignore new file mode 100644 index 0000000000..0e8a0eb36f --- /dev/null +++ b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/rke2ControllerManager/.helmignore @@ -0,0 +1,23 @@ +# Patterns to ignore when building packages. +# This supports shell glob matching, relative path matching, and +# negation (prefixed with !). Only one pattern per line. +.DS_Store +# Common VCS dirs +.git/ +.gitignore +.bzr/ +.bzrignore +.hg/ +.hgignore +.svn/ +# Common backup files +*.swp +*.bak +*.tmp +*.orig +*~ +# Various IDEs +.project +.idea/ +*.tmproj +.vscode/ diff --git a/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/rke2ControllerManager/Chart.yaml b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/rke2ControllerManager/Chart.yaml new file mode 100644 index 0000000000..dbf4d0b815 --- /dev/null +++ b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/rke2ControllerManager/Chart.yaml @@ -0,0 +1,15 @@ +annotations: + catalog.cattle.io/hidden: "true" + catalog.cattle.io/kube-version: '>= 1.26.0-0 < 1.31.0-0' + catalog.cattle.io/os: linux + catalog.rancher.io/certified: rancher + catalog.rancher.io/namespace: cattle-monitoring-system + catalog.rancher.io/release-name: rancher-pushprox +apiVersion: v1 +appVersion: 0.1.0 +description: Sets up a deployment of the PushProx proxy and a DaemonSet of PushProx + clients. +kubeVersion: '>=1.26.0-0' +name: rke2ControllerManager +type: application +version: 0.2.0 diff --git a/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/rke2ControllerManager/README.md b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/rke2ControllerManager/README.md new file mode 100644 index 0000000000..345002f48a --- /dev/null +++ b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/rke2ControllerManager/README.md @@ -0,0 +1,90 @@ +# rancher-pushprox + +A Rancher chart based on Rancher [PushProx](https://github.com/rancher/PushProx) that sets up a Deployment of a PushProx proxy and a DaemonSet of PushProx clients on a Kubernetes cluster. + +Installs [rancher-pushprox](https://github.com/rancher/charts/tree/gh-pages/packages/rancher-pushprox) to create PushProx clients that can access their host's network and register with a PushProx proxy. A [Prometheus Operator](https://github.com/coreos/prometheus-operator) ServiceMonitor CR is also included that is configured to scrape the metrics from each of the clients through the proxy. + +Using an instance of this chart is suitable for the following scenarios: +- You need to scrape metrics from a port that should not be accessible outside of the host (e.g. scraping `etcd` metrics in a hardened cluster) +- You need to scrape metrics on a host that are not exposed outside of 127.0.0.1 (e.g. scraping `kube-proxy` metrics) +- You need to scrape metrics through HTTPS using certs hosted directly on `hostPath` +- You need to scrape metrics from Kubernetes components that require authorization via a service account (e.g. permissions to make request to `/metrics`) +- You need to scrape metrics without access to cacerts (i.e. enable `insecureSkipVerify`) + +The clients and proxy are created based on a Rancher fork of the [prometheus-community/PushProx](https://github.com/prometheus-community/PushProx) project. + +## Upgrading to Kubernetes v1.25+ + +Starting in Kubernetes v1.25, [Pod Security Policies](https://kubernetes.io/docs/concepts/security/pod-security-policy/) have been removed from the Kubernetes API. + +As a result, **before upgrading to Kubernetes v1.25** (or on a fresh install in a Kubernetes v1.25+ cluster), users are expected to perform an in-place upgrade of this chart with `global.cattle.psp.enabled` set to `false` if it has been previously set to `true`. +​ +> **Note:** +> In this chart release, any previous field that was associated with any PSP resources have been removed in favor of a single global field: `global.cattle.psp.enabled`. + +> **Note:** +> If you upgrade your cluster to Kubernetes v1.25+ before removing PSPs via a `helm upgrade` (even if you manually clean up resources), **it will leave the Helm release in a broken state within the cluster such that further Helm operations will not work (`helm uninstall`, `helm upgrade`, etc.).** +> +> If your charts get stuck in this state, please consult the Rancher docs on how to clean up your Helm release secrets. + +Upon setting `global.cattle.psp.enabled` to false, the chart will remove any PSP resources deployed on its behalf from the cluster. This is the default setting for this chart. + +As a replacement for PSPs, [Pod Security Admission](https://kubernetes.io/docs/concepts/security/pod-security-admission/) should be used. Please consult the Rancher docs for more details on how to configure your chart release namespaces to work with the new Pod Security Admission and apply Pod Security Standards. + +## Configuration + +The following tables list the configurable parameters of the rancher-pushprox chart and their default values. + +### General + +#### Required +| Parameter | Description | Example | +| ----- | ----------- | ------ | +| `component` | The component that is being monitored | `kube-etcd` +| `metricsPort` | The port on the host that contains the metrics you want to scrape (e.g. `http://:/metrics`) | `2379` | +| `namespaceOverride` | The namespace to install the chart | `""` + +#### Optional +| Parameter | Description | Default | +| ----- | ----------- | ------ | +| `serviceMonitor.enabled` | Deploys a [Prometheus Operator](https://github.com/coreos/prometheus-operator/blob/master/Documentation/api.md#servicemonitor) ServiceMonitor CR that is configured to scrape metrics on the hosts that the clients are deployed on via the proxy. Also deploys a Service that points to all pods with the expected client name that exposes the `metricsPort` selected | `true` | +| `serviceMonitor.endpoints` | A list of endpoints that will be added to the ServiceMonitor based on the [Endpoint spec](https://github.com/prometheus-operator/prometheus-operator/blob/master/Documentation/api.md#endpoint) | `[{port: metrics}]` | +| `service.selector` | The selector that is used to populate the Service's Endpoints object. The chart will error out on rendering templating if `.Values.clients.enabled` is set alongside this field, since it is expected that this service should point to the PushProx Clients Daemonset / Deployment | `{}` | +| `clients.enabled` | Deploys a DaemonSet of clients that are each capable of scraping endpoints on the hostNetwork it is deployed on | `true` | +| `clients.port` | The port where the client will publish PushProx client-specific metrics. If deploying multiple clients onto the same node, the clients should not have conflicting ports | `9369` | +| `clients.proxyUrl` | Overrides the default proxyUrl setting of `http://pushprox-{{ .Values.component }}-proxy.{{ . Release.Namespace }}.svc.cluster.local:{{ .Values.proxy.port }}"` with the `proxyUrl` specified | `""` | +| `clients.useLocalhost` | Sets a flag on each client deployment to redirect scrapes directed to `HOST_IP` to `127.0.0.1` | `false` | +| `clients.https.enabled` | Enables scraping metrics via HTTPS using the provided TLS certs that exist on each host | `false` | +| `clients.https.forceHTTPSScheme` | Forces scraping metrics via HTTPS using the provided TLS certs that exist on each host | `false` | +| `clients.https.useServiceAccountCredentials` | If set to true, the client will create a service account with permissions to scrape `/metrics` endpoint of Kubernetes components. The client will use the service account token provided to make authorized scrape requests to the Kubernetes API | `false` | +| `clients.https.authenticationMethod.bearerTokenFile.enabled` | If set to true, the client will use service account credentials mounted at the configured path `clients.https.authenticationMethod.bearerTokenFile.bearerTokenFilePath`. This requires permissions to scrape `/metrics` endpoint of Kubernetes components. This method is deprecated by the prometheus operator and may be removed in a future release | `false` | +| `clients.https.authenticationMethod.bearerTokenFile.bearerTokenFilePath` | This is a volume mount on the pod with permissions to scrape `/metrics` endpoint of Kubernetes components | `"/var/run/secrets/kubernetes.io/serviceaccount/token"` | +| `clients.https.authenticationMethod.bearerTokenSecret.enabled` | If set to true, the client will use service account credentials to scrape `/metrics` endpoint of Kubernetes components. This method is deprecated by the prometheus operator and may be removed in a future release | `false` | +| `clients.https.authenticationMethod.authorization.enabled` | If set to true, the client will use service account credentials to scrape `/metrics` endpoint of Kubernetes components | `false` | +| `clients.https.authenticationMethod.authorization.type` | If set, the client will use this type of authorization in its client requests for metrics | `"bearer"` | +| `clients.https.authenticationMethod.authorization.credentials.key` | If set, the client will use this key in the secret created by `clients.https.useServiceAccountCredentials` for authorization in its client requests for metrics | `"token"` | +| `clients.https.authenticationMethod.authorization.credentials.optional` | If set to false, the client will fail if the key in the secret created by `clients.https.useServiceAccountCredentials` does not exist | `false` | +| `clients.https.insecureSkipVerify` | If set to true, the client will disable SSL security checks | `false` | +| `clients.https.certDir` | A `hostPath` where TLS certs can be found. This path is mounted as a volume on an `initContainer` which copies only the necessary files over to an EmptyDir volume used by each client. Required and only used if `clients.https.enabled` is set | `""` | +| `clients.https.certFile` | The path to the TLS cert file located within `clients.https.certDir`. Required and only used if `clients.https.enabled` is set | `""` | +| `clients.https.keyFile` | The path to the TLS key file located within `clients.https.certDir`. Required and only used if `clients.https.enabled` is set | `""` | +| `clients.https.caCertFile` | The path to the TLS cacert file located within `clients.https.certDir`. Required and only used if `clients.https.enabled` is set | `""` | +| `clients.https.seLinuxOptions` | seLinuxOptions to be passed into the container that copies certs. Should define a container with permissions to read the files in the certDir provided on the host. Required and only used if `clients.https.enabled` is set and `clients.https.certDir` is provided. | `""` | +| `clients.metrics.enabled` | Whether the client should publish PushProx client-specific metrics. | `false` | +| `clients.rbac.additionalRules` | Additional permissions to provide to the ServiceAccount bound to the client. This can be used to provide additional permissions for the client to scrape metrics from the k8s API. Only enabled if clients.https.enabled and clients.https.useServiceAccountCredentials are true | `[]` | +| `clients.deployment.enabled` | Deploys the client as a Deployment (generally used if the underlying hostNetwork Pod that is being scraped is managed by a Deployment) | `false` | +| `clients.deployment.replicas` | The number of pods the Deployment has, it should match the number of pod the hostNetwork Deployment has. Required and only used if `client.deployment.enable` is set | `0` | +| `clients.deployment.affinity` | The affinity rules that allocate the pod to the node in which the hostNetwork Deployment's pods run. Required and only used if `client.deployment.enable` is set | `{}` | +| `clients.resources` | Set resource limits and requests for the client container | `{}` | +| `clients.nodeSelector` | Select which nodes to deploy the clients on | `{}` | +| `clients.tolerations` | Specify tolerations for clients | `[]` | +| `proxy.enabled` | Deploys the proxy that each client will register with | `true` | +| `proxy.port` | The port exposed by the proxy that each client will register with to allow metrics to be scraped from the host | `8080` | +| `proxy.resources` | Set resource limits and requests for the proxy container | `{}` | +| `proxy.nodeSelector` | Select which nodes the proxy can be deployed on | `{}` | +| `proxy.tolerations` | Specify tolerations (if necessary) to allow the proxy to be deployed on the selected node | `[]` | +| `kubeVersionOverrides` | A list of Semver constraint strings (defined by https://github.com/Masterminds/semver) and values.yaml overrides. For each key in kubeVersionOverrides, this chart will check to see if the current Kubernetes cluster's version matches any of the semver constraints provided as keys on the map. On seeing a match, the default value for each values.yaml field overridden will be updated with the new value. If multiple matches are encountered (due to overlapping semver ranges), the matches will be applied in order. | `[]` + +*Tip: The filepaths set in `clients.https.File` can include wildcard characters*. + +See [rancher-monitoring](https://github.com/rancher/charts/tree/gh-pages/packages/rancher-monitoring) for examples of how this chart can be used. diff --git a/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/rke2ControllerManager/templates/_helpers.tpl b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/rke2ControllerManager/templates/_helpers.tpl new file mode 100644 index 0000000000..1ba5093944 --- /dev/null +++ b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/rke2ControllerManager/templates/_helpers.tpl @@ -0,0 +1,170 @@ +# Rancher + +{{- define "system_default_registry" -}} +{{- if .Values.global.cattle.systemDefaultRegistry -}} +{{- printf "%s/" .Values.global.cattle.systemDefaultRegistry -}} +{{- end -}} +{{- end -}} + +# Windows Support + +{{/* +Windows cluster will add default taint for linux nodes, +add below linux tolerations to workloads could be scheduled to those linux nodes +*/}} + +{{- define "linux-node-tolerations" -}} +- key: "cattle.io/os" + value: "linux" + effect: "NoSchedule" + operator: "Equal" +{{- end -}} + +{{- define "linux-node-selector" -}} +{{- if semverCompare "<1.14-0" .Capabilities.KubeVersion.GitVersion -}} +beta.kubernetes.io/os: linux +{{- else -}} +kubernetes.io/os: linux +{{- end -}} +{{- end -}} + +# General + +{{- define "applyKubeVersionOverrides" -}} +{{- $overrides := dict -}} +{{- range $override := .Values.kubeVersionOverrides -}} +{{- if semverCompare $override.constraint $.Capabilities.KubeVersion.Version -}} +{{- $_ := mergeOverwrite $overrides $override.values -}} +{{- end -}} +{{- end -}} +{{- $_ := mergeOverwrite .Values $overrides -}} +{{- end -}} + +{{- define "pushprox.namespace" -}} + {{- if .Values.namespaceOverride -}} + {{- .Values.namespaceOverride -}} + {{- else -}} + {{- .Release.Namespace -}} + {{- end -}} +{{- end -}} + +{{- define "pushProxy.commonLabels" -}} +release: {{ .Release.Name }} +component: {{ .Values.component | quote }} +provider: kubernetes +{{- end -}} + +{{- define "pushProxy.proxyUrl" -}} +{{- $_ := (required "Template requires either .Values.proxy.port or .Values.client.proxyUrl to set proxyUrl for client" (or .Values.clients.proxyUrl .Values.proxy.port)) -}} +{{- if .Values.clients.proxyUrl -}} +{{ printf "%s" .Values.clients.proxyUrl }} +{{- else -}} +{{ printf "http://%s.%s.svc:%d" (include "pushProxy.proxy.name" .) (include "pushprox.namespace" .) (int .Values.proxy.port) }} +{{- end -}}{{- end -}} + +# Client + +{{- define "pushProxy.client.name" -}} +{{- printf "pushprox-%s-client" (required ".Values.component is required" .Values.component) -}} +{{- end -}} + +{{- define "pushProxy.client.serviceAccountTokenName" -}} +{{- printf "pushprox-%s-client-service-account-token" (required ".Values.component is required" .Values.component) -}} +{{- end -}} + +{{- define "pushProxy.client.labels" -}} +k8s-app: {{ template "pushProxy.client.name" . }} +{{ template "pushProxy.commonLabels" . }} +{{- end -}} + +# Proxy + +{{- define "pushProxy.proxy.name" -}} +{{- printf "pushprox-%s-proxy" (required ".Values.component is required" .Values.component) -}} +{{- end -}} + +{{- define "pushProxy.proxy.labels" -}} +k8s-app: {{ template "pushProxy.proxy.name" . }} +{{ template "pushProxy.commonLabels" . }} +{{- end -}} + +# ServiceMonitor + +{{- define "pushprox.serviceMonitor.name" -}} +{{- printf "%s-%s" .Release.Name (required ".Values.component is required" .Values.component) -}} +{{- end -}} + +{{- define "pushProxy.serviceMonitor.labels" -}} +app: {{ template "pushprox.serviceMonitor.name" . }} +{{ template "pushProxy.commonLabels" . }} +{{- end -}} + +{{- define "pushProxy.serviceMonitor.endpoints" -}} +{{- $proxyURL := (include "pushProxy.proxyUrl" .) -}} +{{- $useHTTPS := .Values.clients.https.enabled -}} +{{- $setHTTPSScheme := .Values.clients.https.forceHTTPSScheme -}} +{{- $insecureSkipVerify := .Values.clients.https.insecureSkipVerify -}} +{{- $useServiceAccountCredentials := .Values.clients.https.useServiceAccountCredentials -}} +{{- $serviceAccountTokenName := (include "pushProxy.client.serviceAccountTokenName" . ) -}} +{{- $metricRelabelings := list }} +{{- $endpoints := .Values.serviceMonitor.endpoints }} +{{- if .Values.proxy.enabled }} +{{- $_ := set . "proxyUrl" $proxyURL }} +{{- end }} +{{- range $endpoints }} +{{- if $.Values.proxy.enabled }} +{{- $_ := set . "proxyUrl" $proxyURL }} +{{- end }} +{{- $clusterIdRelabel := dict }} +{{- $metricRelabelings := list }} +{{- if $.Values.global.cattle.clusterId }} +{{- $_ := set $clusterIdRelabel "action" "replace" }} +{{- $_ := set $clusterIdRelabel "sourceLabels" (list "__address__") }} +{{- $_ := set $clusterIdRelabel "targetLabel" "cluster_id" }} +{{- $_ := set $clusterIdRelabel "replacement" $.Values.global.cattle.clusterId }} +{{- $metricRelabelings = append $metricRelabelings $clusterIdRelabel }} +{{- end }} +{{- $clusterNameRelabel := dict }} +{{- if $.Values.global.cattle.clusterName }} +{{- $_ := set $clusterNameRelabel "action" "replace" }} +{{- $_ := set $clusterNameRelabel "sourceLabels" (list "__address__") }} +{{- $_ := set $clusterNameRelabel "targetLabel" "cluster_name" }} +{{- $_ := set $clusterNameRelabel "replacement" $.Values.global.cattle.clusterName }} +{{- $metricRelabelings = append $metricRelabelings $clusterNameRelabel }} +{{- end }} +{{- if not (empty $metricRelabelings) }} +{{- $_ := set . "metricRelabelings" ($metricRelabelings)}} +{{- end }} +{{- if $setHTTPSScheme -}} +{{- $_ := set . "scheme" "https" }} +{{- end -}} +{{- if $useHTTPS -}} +{{- if (hasKey . "params") }} +{{- $_ := set (get . "params") "_scheme" (list "https") }} +{{- else }} +{{- $_ := set . "params" (dict "_scheme" (list "https")) }} +{{- end }} +{{- end }} +{{- if (hasKey . "tlsConfig") }} +{{- $_ := set (get . "tlsConfig") "insecureSkipVerify" $insecureSkipVerify }} +{{- else }} +{{- $_ := set . "tlsConfig" (dict "insecureSkipVerify" $insecureSkipVerify) }} +{{- end }} +{{- if $.Values.clients.https.authenticationMethod.bearerTokenFile.enabled }} +{{- $_ := set . "bearerTokenFile" $.Values.clients.https.authenticationMethod.bearerTokenFile.bearerTokenFilePath }} +{{- end }} +{{- if $.Values.clients.https.authenticationMethod.bearerTokenSecret.enabled }} +{{- $_ := set . "bearerTokenSecret" $serviceAccountTokenName }} +{{- end }} +{{- if $.Values.clients.https.authenticationMethod.authorization.enabled }} +{{- if (hasKey . "authorization") }} +{{- $_ := set (get . "authorization") "type" $.Values.clients.https.authenticationMethod.authorization.type }} +{{- $_ := set (get . "authorization") "credentials" (dict "name" $serviceAccountTokenName "key" $.Values.clients.https.authenticationMethod.authorization.credentials.key "optional" $.Values.clients.https.authenticationMethod.authorization.credentials.optional) }} +{{- else }} +{{- $_ := set . "authorization" (dict "type" $.Values.clients.https.authenticationMethod.authorization.type) }} +{{- $_ := set . "authorization" (dict "credentials" (dict "name" $serviceAccountTokenName "key" $.Values.clients.https.authenticationMethod.authorization.credentials.key "optional" $.Values.clients.https.authenticationMethod.authorization.credentials.optional)) }} +{{- end }} +{{- end }} +{{- end }} +{{- toYaml $endpoints }} +{{- end -}} diff --git a/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/rke2ControllerManager/templates/pushprox-clients-rbac.yaml b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/rke2ControllerManager/templates/pushprox-clients-rbac.yaml new file mode 100644 index 0000000000..a8e27c3735 --- /dev/null +++ b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/rke2ControllerManager/templates/pushprox-clients-rbac.yaml @@ -0,0 +1,97 @@ +{{- template "applyKubeVersionOverrides" . -}} +{{- if .Values.clients }}{{- if .Values.clients.enabled }} +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRole +metadata: + name: {{ template "pushProxy.client.name" . }} + labels: {{ include "pushProxy.client.labels" . | nindent 4 }} +rules: +{{- if .Values.global.cattle.psp.enabled }} +- apiGroups: ['policy'] + resources: ['podsecuritypolicies'] + verbs: ['use'] + resourceNames: + - {{ template "pushProxy.client.name" . }} +{{- end }} +{{- if and .Values.clients.https.enabled .Values.clients.https.useServiceAccountCredentials }} +- nonResourceURLs: ["/metrics"] + verbs: ["get"] +{{- if .Values.clients.rbac.additionalRules }} +{{ toYaml .Values.clients.rbac.additionalRules }} +{{- end }} +{{- end }} +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRoleBinding +metadata: + name: {{ template "pushProxy.client.name" . }} + labels: {{ include "pushProxy.client.labels" . | nindent 4 }} +roleRef: + apiGroup: rbac.authorization.k8s.io + kind: ClusterRole + name: {{ template "pushProxy.client.name" . }} +subjects: + - kind: ServiceAccount + name: {{ template "pushProxy.client.name" . }} + namespace: {{ include "pushprox.namespace" . }} +--- +apiVersion: v1 +kind: ServiceAccount +metadata: + name: {{ template "pushProxy.client.name" . }} + namespace: {{ include "pushprox.namespace" . }} + labels: {{ include "pushProxy.client.labels" . | nindent 4 }} +--- +{{- if .Values.clients.https.useServiceAccountCredentials }} +apiVersion: v1 +kind: Secret +type: kubernetes.io/service-account-token +metadata: + name: {{ template "pushProxy.client.serviceAccountTokenName" . }} + namespace: {{ include "pushprox.namespace" . }} + labels: {{ include "pushProxy.client.labels" . | nindent 4 }} + annotations: + kubernetes.io/service-account.name: {{ template "pushProxy.client.name" . }} +{{- end }} +--- +{{- if .Values.global.cattle.psp.enabled }} +apiVersion: policy/v1beta1 +kind: PodSecurityPolicy +metadata: + name: {{ template "pushProxy.client.name" . }} + namespace: {{ include "pushprox.namespace" . }} + labels: {{ include "pushProxy.client.labels" . | nindent 4 }} +spec: + privileged: false + hostNetwork: true + hostIPC: false + hostPID: false + runAsUser: + rule: 'RunAsAny' + seLinux: + rule: 'RunAsAny' +{{- if and .Values.clients.https.enabled .Values.clients.https.certDir .Values.global.seLinux.enabled .Values.clients.https.seLinuxOptions }} + seLinuxOptions: {{ .Values.clients.https.seLinuxOptions | toYaml | nindent 6 }} +{{- end }} + supplementalGroups: + rule: 'MustRunAs' + ranges: + - min: 0 + max: 65535 + fsGroup: + rule: 'MustRunAs' + ranges: + - min: 0 + max: 65535 + readOnlyRootFilesystem: false + volumes: + - 'secret' +{{- if and .Values.clients.https.enabled .Values.clients.https.certDir }} + - 'emptyDir' + - 'hostPath' + allowedHostPaths: + - pathPrefix: {{ required "Need access to volume on host with the SSL cert files to use HTTPs" .Values.clients.https.certDir }} + readOnly: true +{{- end }} +{{- end }} +{{- end }}{{- end }} diff --git a/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/rke2ControllerManager/templates/pushprox-clients.yaml b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/rke2ControllerManager/templates/pushprox-clients.yaml new file mode 100644 index 0000000000..e8fcfb3883 --- /dev/null +++ b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/rke2ControllerManager/templates/pushprox-clients.yaml @@ -0,0 +1,157 @@ +{{- template "applyKubeVersionOverrides" . -}} +{{- if .Values.clients }}{{- if .Values.clients.enabled }} +apiVersion: apps/v1 +{{- if .Values.clients.deployment.enabled }} +kind: Deployment +{{- else }} +kind: DaemonSet +{{- end }} +metadata: + name: {{ template "pushProxy.client.name" . }} + namespace: {{ template "pushprox.namespace" . }} + labels: {{ include "pushProxy.client.labels" . | nindent 4 }} + pushprox-exporter: "client" +spec: + {{- if .Values.clients.deployment.enabled }} + replicas: {{ .Values.clients.deployment.replicas }} + {{- end }} + selector: + matchLabels: {{ include "pushProxy.client.labels" . | nindent 6 }} + template: + metadata: + labels: {{ include "pushProxy.client.labels" . | nindent 8 }} + spec: + {{- if .Values.clients.affinity }} + affinity: {{ toYaml .Values.clients.affinity | nindent 8 }} + {{- end }} + nodeSelector: {{ include "linux-node-selector" . | nindent 8 }} +{{- if .Values.clients.nodeSelector }} +{{ toYaml .Values.clients.nodeSelector | indent 8 }} +{{- end }} + tolerations: {{ include "linux-node-tolerations" . | nindent 8 }} +{{- if .Values.clients.tolerations }} +{{ toYaml .Values.clients.tolerations | indent 8 }} +{{- end }} + hostNetwork: true + dnsPolicy: ClusterFirstWithHostNet + serviceAccountName: {{ template "pushProxy.client.name" . }} + {{- if .Values.global.imagePullSecretName }} + imagePullSecrets: + - name: {{ .Values.global.imagePullSecretName }} + {{- end }} + containers: + - name: pushprox-client + image: {{ template "system_default_registry" . }}{{ .Values.clients.image.repository }}:{{ .Values.clients.image.tag }} + command: + {{- range .Values.clients.command }} + - {{ . | quote }} + {{- end }} + args: + - --fqdn=$(HOST_IP) + - --proxy-url=$(PROXY_URL) + {{- if .Values.clients.metrics.enabled }} + - --metrics-addr=$(PORT) + {{- end }} + - --allow-port={{ required "Need .Values.metricsPort to configure client to be allowed to scrape metrics at port" .Values.metricsPort}} + {{- if .Values.clients.useLocalhost }} + - --use-localhost + {{- end }} + {{- if .Values.clients.https.enabled }} + {{- if .Values.clients.https.insecureSkipVerify }} + - --insecure-skip-verify + {{- end }} + {{- if .Values.clients.https.useServiceAccountCredentials }} + - --token-path=/var/run/secrets/kubernetes.io/serviceaccount/token + {{- end }} + {{- if .Values.clients.https.certDir }} + - --tls.cert=/etc/ssl/push-proxy/push-proxy.pem + - --tls.key=/etc/ssl/push-proxy/push-proxy-key.pem + - --tls.cacert=/etc/ssl/push-proxy/push-proxy-ca-cert.pem + {{- end }} + {{- end }} + env: + - name: HOST_IP + valueFrom: + fieldRef: + fieldPath: status.hostIP + {{- if .Values.clients.metrics.enabled }} + - name: PORT + value: :{{ .Values.clients.port }} + {{- end }} + - name: PROXY_URL + value: {{ template "pushProxy.proxyUrl" . }} + securityContext: + runAsNonRoot: true + runAsUser: 1000 + {{- if and .Values.clients.https.enabled .Values.clients.https.certDir }} + volumeMounts: + - name: metrics-cert-dir + mountPath: /etc/ssl/push-proxy + {{- end }} + {{- if .Values.clients.resources }} + resources: {{ toYaml .Values.clients.resources | nindent 10 }} + {{- end }} + {{- if and .Values.clients.https.enabled .Values.clients.https.certDir }} + initContainers: + - name: copy-certs + image: {{ template "system_default_registry" . }}{{ .Values.clients.copyCertsImage.repository }}:{{ .Values.clients.copyCertsImage.tag }} + command: + - sh + - -c + - | + echo "Searching for files to copy within the source volume" + echo "cert: ${CERT_FILE_NAME}" + echo "key: ${KEY_FILE_NAME}" + echo "cacert: ${CACERT_FILE_NAME}" + + CERT_FILE_SOURCE=$(find /etc/source/ -type f -name "${CERT_FILE_NAME}" | sort -r | head -n 1) + KEY_FILE_SOURCE=$(find /etc/source/ -type f -name "${KEY_FILE_NAME}" | sort -r | head -n 1) + CACERT_FILE_SOURCE=$(find /etc/source/ -type f -name "${CACERT_FILE_NAME}" | sort -r | head -n 1) + + test -z ${CERT_FILE_SOURCE} && echo "Failed to find cert file" && exit 1 + test -z ${KEY_FILE_SOURCE} && echo "Failed to find key file" && exit 1 + test -z ${CACERT_FILE_SOURCE} && echo "Failed to find cacert file" && exit 1 + + echo "Copying cert file from $CERT_FILE_SOURCE to $CERT_FILE_TARGET" + cp $CERT_FILE_SOURCE $CERT_FILE_TARGET || exit 1 + chmod 444 $CERT_FILE_TARGET || exit 1 + + echo "Copying key file from $KEY_FILE_SOURCE to $KEY_FILE_TARGET" + cp $KEY_FILE_SOURCE $KEY_FILE_TARGET || exit 1 + chmod 444 $KEY_FILE_TARGET || exit 1 + + echo "Copying cacert file from $CACERT_FILE_SOURCE to $CACERT_FILE_TARGET" + cp $CACERT_FILE_SOURCE $CACERT_FILE_TARGET || exit 1 + chmod 444 $CACERT_FILE_TARGET || exit 1 + env: + - name: CERT_FILE_NAME + value: {{ required "Need a TLS cert file for scraping metrics endpoint over HTTPs" .Values.clients.https.certFile }} + - name: KEY_FILE_NAME + value: {{ required "Need a TLS key file for scraping metrics endpoint over HTTPs" .Values.clients.https.keyFile }} + - name: CACERT_FILE_NAME + value: {{ required "Need a TLS CA cert file for scraping metrics endpoint over HTTPs" .Values.clients.https.caCertFile }} + - name: CERT_FILE_TARGET + value: /etc/ssl/push-proxy/push-proxy.pem + - name: KEY_FILE_TARGET + value: /etc/ssl/push-proxy/push-proxy-key.pem + - name: CACERT_FILE_TARGET + value: /etc/ssl/push-proxy/push-proxy-ca-cert.pem + securityContext: + runAsNonRoot: false +{{- if and .Values.global.seLinux.enabled .Values.clients.https.seLinuxOptions }} + seLinuxOptions: {{ .Values.clients.https.seLinuxOptions | toYaml | nindent 12 }} +{{- end }} + volumeMounts: + - name: metrics-cert-dir-source + mountPath: /etc/source + readOnly: true + - name: metrics-cert-dir + mountPath: /etc/ssl/push-proxy + volumes: + - name: metrics-cert-dir-source + hostPath: + path: {{ required "Need access to volume on host with the SSL cert files to use HTTPs" .Values.clients.https.certDir }} + - name: metrics-cert-dir + emptyDir: {} + {{- end }} +{{- end }}{{- end }} diff --git a/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/rke2ControllerManager/templates/pushprox-proxy-rbac.yaml b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/rke2ControllerManager/templates/pushprox-proxy-rbac.yaml new file mode 100644 index 0000000000..eefe609058 --- /dev/null +++ b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/rke2ControllerManager/templates/pushprox-proxy-rbac.yaml @@ -0,0 +1,68 @@ +{{- template "applyKubeVersionOverrides" . -}} +{{- if and .Values.proxy }}{{ if .Values.proxy.enabled }} +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRole +metadata: + name: {{ template "pushProxy.proxy.name" . }} + labels: {{ include "pushProxy.proxy.labels" . | nindent 4 }} +rules: +{{- if .Values.global.cattle.psp.enabled }} +- apiGroups: ['policy'] + resources: ['podsecuritypolicies'] + verbs: ['use'] + resourceNames: + - {{ template "pushProxy.proxy.name" . }} +{{- end }} +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRoleBinding +metadata: + name: {{ template "pushProxy.proxy.name" . }} + labels: {{ include "pushProxy.proxy.labels" . | nindent 4 }} +roleRef: + apiGroup: rbac.authorization.k8s.io + kind: ClusterRole + name: {{ template "pushProxy.proxy.name" . }} +subjects: + - kind: ServiceAccount + name: {{ template "pushProxy.proxy.name" . }} + namespace: {{ include "pushprox.namespace" . }} +--- +apiVersion: v1 +kind: ServiceAccount +metadata: + name: {{ template "pushProxy.proxy.name" . }} + namespace: {{ include "pushprox.namespace" . }} + labels: {{ include "pushProxy.proxy.labels" . | nindent 4 }} +--- +{{- if .Values.global.cattle.psp.enabled }} +apiVersion: policy/v1beta1 +kind: PodSecurityPolicy +metadata: + name: {{ template "pushProxy.proxy.name" . }} + namespace: {{ include "pushprox.namespace" . }} + labels: {{ include "pushProxy.proxy.labels" . | nindent 4 }} +spec: + privileged: false + hostNetwork: false + hostIPC: false + hostPID: false + runAsUser: + rule: 'MustRunAsNonRoot' + seLinux: + rule: 'RunAsAny' + supplementalGroups: + rule: 'MustRunAs' + ranges: + - min: 1 + max: 65535 + fsGroup: + rule: 'MustRunAs' + ranges: + - min: 1 + max: 65535 + readOnlyRootFilesystem: false + volumes: + - 'secret' +{{- end }}{{- end }} +{{- end }} diff --git a/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/rke2ControllerManager/templates/pushprox-proxy.yaml b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/rke2ControllerManager/templates/pushprox-proxy.yaml new file mode 100644 index 0000000000..723bbd6c00 --- /dev/null +++ b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/rke2ControllerManager/templates/pushprox-proxy.yaml @@ -0,0 +1,57 @@ +{{- template "applyKubeVersionOverrides" . -}} +{{- if and .Values.proxy }}{{ if .Values.proxy.enabled }} +apiVersion: apps/v1 +kind: Deployment +metadata: + name: {{ template "pushProxy.proxy.name" . }} + namespace: {{ template "pushprox.namespace" . }} + labels: {{ include "pushProxy.proxy.labels" . | nindent 4 }} + pushprox-exporter: "proxy" +spec: + selector: + matchLabels: {{ include "pushProxy.proxy.labels" . | nindent 6 }} + template: + metadata: + labels: {{ include "pushProxy.proxy.labels" . | nindent 8 }} + spec: + securityContext: + runAsNonRoot: true + runAsUser: 1000 + nodeSelector: {{ include "linux-node-selector" . | nindent 8 }} +{{- if .Values.proxy.nodeSelector }} +{{ toYaml .Values.proxy.nodeSelector | indent 8 }} +{{- end }} + tolerations: {{ include "linux-node-tolerations" . | nindent 8 }} +{{- if .Values.proxy.tolerations }} +{{ toYaml .Values.proxy.tolerations | indent 8 }} +{{- end }} + serviceAccountName: {{ template "pushProxy.proxy.name" . }} + {{- if .Values.global.imagePullSecretName }} + imagePullSecrets: + - name: {{ .Values.global.imagePullSecretName }} + {{- end }} + containers: + - name: pushprox-proxy + image: {{ template "system_default_registry" . }}{{ .Values.proxy.image.repository }}:{{ .Values.proxy.image.tag }} + command: + {{- range .Values.proxy.command }} + - {{ . | quote }} + {{- end }} + {{- if .Values.proxy.resources }} + resources: {{ toYaml .Values.proxy.resources | nindent 10 }} + {{- end }} +--- +apiVersion: v1 +kind: Service +metadata: + name: {{ template "pushProxy.proxy.name" . }} + namespace: {{ template "pushprox.namespace" . }} + labels: {{ include "pushProxy.proxy.labels" . | nindent 4 }} +spec: + ports: + - name: pp-proxy + port: {{ required "Need .Values.proxy.port to configure proxy" .Values.proxy.port }} + protocol: TCP + targetPort: {{ .Values.proxy.port }} + selector: {{ include "pushProxy.proxy.labels" . | nindent 4 }} +{{- end }}{{- end }} diff --git a/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/rke2ControllerManager/templates/pushprox-servicemonitor.yaml b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/rke2ControllerManager/templates/pushprox-servicemonitor.yaml new file mode 100644 index 0000000000..67eb2216b5 --- /dev/null +++ b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/rke2ControllerManager/templates/pushprox-servicemonitor.yaml @@ -0,0 +1,45 @@ +{{- template "applyKubeVersionOverrides" . -}} +{{- if .Values.serviceMonitor }}{{- if .Values.serviceMonitor.enabled }} +apiVersion: monitoring.coreos.com/v1 +kind: ServiceMonitor +metadata: + name: {{ template "pushprox.serviceMonitor.name" . }} + namespace: {{ template "pushprox.namespace" . }} + labels: {{ include "pushProxy.serviceMonitor.labels" . | nindent 4 }} +spec: + endpoints: {{include "pushProxy.serviceMonitor.endpoints" . | nindent 4 }} + jobLabel: component + podTargetLabels: + - component + - pushprox-exporter + namespaceSelector: + matchNames: + - {{ template "pushprox.namespace" . }} + selector: + matchLabels: {{ include "pushProxy.client.labels" . | nindent 6 }} +--- +{{- $selector := "" }} +{{- if not (kindIs "invalid" .Values.service) }} +{{- if not (kindIs "invalid" .Values.service.selector) }} +{{ if .Values.service.selector }} +{{- if .Values.clients.enabled }} +{{- required (printf "Cannot override .Values.service.selector=%s when .Values.clients.enabled=true" (toJson .Values.service.selector)) "" }} +{{- end }} +{{- $selector = (toYaml .Values.service.selector) }} +{{- end }} +{{- end }} +{{- end }} +apiVersion: v1 +kind: Service +metadata: + name: {{ template "pushProxy.client.name" . }} + namespace: {{ template "pushprox.namespace" . }} + labels: {{ include "pushProxy.client.labels" . | nindent 4 }} +spec: + ports: + - name: metrics + port: {{ required "Need .Values.metricsPort to configure client to listen to metrics at port" .Values.metricsPort}} + protocol: TCP + targetPort: {{ .Values.metricsPort }} + selector: {{ default (include "pushProxy.client.labels" .) $selector | nindent 4 }} +{{- end }}{{- end }} diff --git a/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/rke2ControllerManager/templates/validate-install-crd.yaml b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/rke2ControllerManager/templates/validate-install-crd.yaml new file mode 100644 index 0000000000..16abc2fa83 --- /dev/null +++ b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/rke2ControllerManager/templates/validate-install-crd.yaml @@ -0,0 +1,14 @@ +#{{- if gt (len (lookup "rbac.authorization.k8s.io/v1" "ClusterRole" "" "")) 0 -}} +# {{- $found := dict -}} +# {{- set $found "monitoring.coreos.com/v1/ServiceMonitor" false -}} +# {{- range .Capabilities.APIVersions -}} +# {{- if hasKey $found (toString .) -}} +# {{- set $found (toString .) true -}} +# {{- end -}} +# {{- end -}} +# {{- range $_, $exists := $found -}} +# {{- if (eq $exists false) -}} +# {{- required "Required CRDs are missing. Please install Prometheus Operator CRDs before installing this chart." "" -}} +# {{- end -}} +# {{- end -}} +#{{- end -}} diff --git a/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/rke2ControllerManager/templates/validate-psp-install.yaml b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/rke2ControllerManager/templates/validate-psp-install.yaml new file mode 100644 index 0000000000..a30c59d3b7 --- /dev/null +++ b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/rke2ControllerManager/templates/validate-psp-install.yaml @@ -0,0 +1,7 @@ +#{{- if gt (len (lookup "rbac.authorization.k8s.io/v1" "ClusterRole" "" "")) 0 -}} +#{{- if .Values.global.cattle.psp.enabled }} +#{{- if not (.Capabilities.APIVersions.Has "policy/v1beta1/PodSecurityPolicy") }} +#{{- fail "The target cluster does not have the PodSecurityPolicy API resource. Please disable PSPs in this chart before proceeding." -}} +#{{- end }} +#{{- end }} +#{{- end }} diff --git a/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/rke2ControllerManager/values.yaml b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/rke2ControllerManager/values.yaml new file mode 100644 index 0000000000..1e076041b3 --- /dev/null +++ b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/rke2ControllerManager/values.yaml @@ -0,0 +1,166 @@ +# Default values for rancher-pushprox. +# This is a YAML-formatted file. +# Declare variables to be passed into your templates. + +# Default image containing both the proxy and the client was generated from the following Dockerfile +# https://github.com/prometheus-community/PushProx/blob/eeadbe766641699129920ccfaaaa30a85c67fe81/Dockerfile#L1-L15 + +# Configuration + +global: + cattle: + psp: + enabled: false + systemDefaultRegistry: "" + seLinux: + enabled: false + +# A list of Semver constraint strings (defined by https://github.com/Masterminds/semver) and values.yaml overrides. +# +# For each key in kubeVersionOverrides, this chart will check to see if the current Kubernetes cluster's version matches +# any of the semver constraints provided as keys on the map. +# +# On seeing a match, the default value for each values.yaml field overridden will be updated with the new value. +# +# If multiple matches are encountered (due to overlapping semver ranges), the matches will be applied in order. +# +# Notes: +# - On running a helm template, Helm generally assumes the kubeVersion is v1.20.0 +# - On running a helm install --dry-run, the correct kubeVersion should be chosen. +kubeVersionOverrides: [] +# - constraint: "< 1.21" +# values: +# metricsPort: 10252 +# clients: +# https: +# enabled: false +# insecureSkipVerify: false +# useServiceAccountCredentials: false + +namespaceOverride: "" + +# The component that is being monitored (i.e. etcd) +component: "component" + +# The port containing the metrics that need to be scraped +metricsPort: 2739 + +# Configure ServiceMonitor that monitors metrics from the metricsPort endpoint +serviceMonitor: + enabled: true + # A list of endpoints that will be added to the ServiceMonitor based on the Endpoint spec + # Source: https://github.com/prometheus-operator/prometheus-operator/blob/master/Documentation/api.md#endpoint + # By default, proxyUrl and params._scheme will be overridden based on other values + endpoints: + - port: metrics + +# Configure Service that grabs scrape targets +service: + # The selector that is used to populate the Service's Endpoints object. + # The chart will error out on rendering templating if .Values.clients.enabled is set alongside this field, + # since it is expected that this service should point to the PushProx Clients Daemonset / Deployment + selector: {} + +clients: + enabled: true + # The port which the PushProx client will post PushProx metrics to + port: 9369 + # If unset, this will default to the URL for the proxy service: http://pushprox-{{component}}-proxy.{{namepsace}}.svc.cluster.local:{{proxy.port}} + # Should be modified if the clients are being deployed outside the cluster where the proxy rests, otherwise leave it null + proxyUrl: "" + # If set to true, the client will forward any requests from the host IP to 127.0.0.1 + # It will only allow proxy requests to the metricsPort specified + useLocalhost: false + # Configuration for accessing metrics via HTTPS + https: + # Does the client require https to access the metrics? + enabled: false + # Does the client require requests be sent to http or https? + forceHTTPSScheme: false + # If set to true, the client will create a service account with adequate permissions and set a flag + # on the client to use the service account token provided by it to make authorized scrape requests + useServiceAccountCredentials: false + # Configuration for authentication to metrics via https endpoint + authenticationMethod: + # Reads token from defined file in container + # This function is deprecated in the prometheus operator api and may be removed in a future version + bearerTokenFile: + enabled: false + bearerTokenFilePath: "/var/run/secrets/kubernetes.io/serviceaccount/token" + # Reads token from defined secret in namespace + # This function is deprecated in the prometheus operator api and may be removed in a future version + bearerTokenSecret: + enabled: false + # Reads token from defined secret in namespace + authorization: + enabled: false + type: "bearer" + credentials: + key: "token" + optional: false + # If set to true, the client will disable SSL security checks + insecureSkipVerify: false + # Directory on host where necessary TLS cert and key to scrape metrics can be found + certDir: "" + # Filenames for files located in .Values.clients.https.certDir that correspond to TLS settings + certFile: "" + keyFile: "" + caCertFile: "" + # seLinuxOptions to be passed into the container that copies certs. Should define a container with permissions to read the files in the certDir provided on the host. + # Required and only used if `clients.https.enabled` is set and `clients.https.certDir` is provided. + seLinuxOptions: {} + + metrics: + # Whether the client should publish PushProx client-specific metrics to .Values.clients.port + enabled: false + + rbac: + # Additional permissions to provide to the ServiceAccount bound to the client + # This can be used to provide additional permissions for the client to scrape metrics from the k8s API + # Only enabled if clients.https.enabled and clients.https.useServiceAccountCredentials are true + additionalRules: [] + + # Resource limits + resources: {} + + # Options to select all nodes to deploy client DaemonSet on + nodeSelector: {} + tolerations: [] + affinity: {} + + image: + repository: rancher/pushprox-client + tag: v0.1.3-rancher2-client + command: ["pushprox-client"] + + copyCertsImage: + repository: rancher/mirrored-library-busybox + tag: 1.31.1 + + # The default intention of rancher-pushprox clients is to scrape hostNetwork metrics across all nodes. + # This can be used to scrape internal Kubernetes components or DaemonSets of hostNetwork Pods in + # situations where a cloud provider firewall prevents Pod-To-Host communication but not Pod-To-Pod. + # However, if the underlying hostNetwork Pod that is being scraped is managed by a Deployment, + # this advanced option enables users to deploy the client as a Deployment instead of a DaemonSet. + # If a user deploys this feature and the underlying Deployment's number of replicas changes, the user will + # be responsible for upgrading this chart accordingly to the right number of replicas. + deployment: + enabled: false + replicas: 0 + +proxy: + enabled: true + # The port through which PushProx clients will communicate to the proxy + port: 8080 + + # Resource limits + resources: {} + + # Options to select a node to run a single proxy deployment on + nodeSelector: {} + tolerations: [] + + image: + repository: rancher/pushprox-proxy + tag: v0.1.3-rancher2-proxy + command: ["pushprox-proxy"] diff --git a/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/rke2Etcd/.helmignore b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/rke2Etcd/.helmignore new file mode 100644 index 0000000000..0e8a0eb36f --- /dev/null +++ b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/rke2Etcd/.helmignore @@ -0,0 +1,23 @@ +# Patterns to ignore when building packages. +# This supports shell glob matching, relative path matching, and +# negation (prefixed with !). Only one pattern per line. +.DS_Store +# Common VCS dirs +.git/ +.gitignore +.bzr/ +.bzrignore +.hg/ +.hgignore +.svn/ +# Common backup files +*.swp +*.bak +*.tmp +*.orig +*~ +# Various IDEs +.project +.idea/ +*.tmproj +.vscode/ diff --git a/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/rke2Etcd/Chart.yaml b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/rke2Etcd/Chart.yaml new file mode 100644 index 0000000000..87495a6a6b --- /dev/null +++ b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/rke2Etcd/Chart.yaml @@ -0,0 +1,15 @@ +annotations: + catalog.cattle.io/hidden: "true" + catalog.cattle.io/kube-version: '>= 1.26.0-0 < 1.31.0-0' + catalog.cattle.io/os: linux + catalog.rancher.io/certified: rancher + catalog.rancher.io/namespace: cattle-monitoring-system + catalog.rancher.io/release-name: rancher-pushprox +apiVersion: v1 +appVersion: 0.1.0 +description: Sets up a deployment of the PushProx proxy and a DaemonSet of PushProx + clients. +kubeVersion: '>=1.26.0-0' +name: rke2Etcd +type: application +version: 0.2.0 diff --git a/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/rke2Etcd/README.md b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/rke2Etcd/README.md new file mode 100644 index 0000000000..345002f48a --- /dev/null +++ b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/rke2Etcd/README.md @@ -0,0 +1,90 @@ +# rancher-pushprox + +A Rancher chart based on Rancher [PushProx](https://github.com/rancher/PushProx) that sets up a Deployment of a PushProx proxy and a DaemonSet of PushProx clients on a Kubernetes cluster. + +Installs [rancher-pushprox](https://github.com/rancher/charts/tree/gh-pages/packages/rancher-pushprox) to create PushProx clients that can access their host's network and register with a PushProx proxy. A [Prometheus Operator](https://github.com/coreos/prometheus-operator) ServiceMonitor CR is also included that is configured to scrape the metrics from each of the clients through the proxy. + +Using an instance of this chart is suitable for the following scenarios: +- You need to scrape metrics from a port that should not be accessible outside of the host (e.g. scraping `etcd` metrics in a hardened cluster) +- You need to scrape metrics on a host that are not exposed outside of 127.0.0.1 (e.g. scraping `kube-proxy` metrics) +- You need to scrape metrics through HTTPS using certs hosted directly on `hostPath` +- You need to scrape metrics from Kubernetes components that require authorization via a service account (e.g. permissions to make request to `/metrics`) +- You need to scrape metrics without access to cacerts (i.e. enable `insecureSkipVerify`) + +The clients and proxy are created based on a Rancher fork of the [prometheus-community/PushProx](https://github.com/prometheus-community/PushProx) project. + +## Upgrading to Kubernetes v1.25+ + +Starting in Kubernetes v1.25, [Pod Security Policies](https://kubernetes.io/docs/concepts/security/pod-security-policy/) have been removed from the Kubernetes API. + +As a result, **before upgrading to Kubernetes v1.25** (or on a fresh install in a Kubernetes v1.25+ cluster), users are expected to perform an in-place upgrade of this chart with `global.cattle.psp.enabled` set to `false` if it has been previously set to `true`. +​ +> **Note:** +> In this chart release, any previous field that was associated with any PSP resources have been removed in favor of a single global field: `global.cattle.psp.enabled`. + +> **Note:** +> If you upgrade your cluster to Kubernetes v1.25+ before removing PSPs via a `helm upgrade` (even if you manually clean up resources), **it will leave the Helm release in a broken state within the cluster such that further Helm operations will not work (`helm uninstall`, `helm upgrade`, etc.).** +> +> If your charts get stuck in this state, please consult the Rancher docs on how to clean up your Helm release secrets. + +Upon setting `global.cattle.psp.enabled` to false, the chart will remove any PSP resources deployed on its behalf from the cluster. This is the default setting for this chart. + +As a replacement for PSPs, [Pod Security Admission](https://kubernetes.io/docs/concepts/security/pod-security-admission/) should be used. Please consult the Rancher docs for more details on how to configure your chart release namespaces to work with the new Pod Security Admission and apply Pod Security Standards. + +## Configuration + +The following tables list the configurable parameters of the rancher-pushprox chart and their default values. + +### General + +#### Required +| Parameter | Description | Example | +| ----- | ----------- | ------ | +| `component` | The component that is being monitored | `kube-etcd` +| `metricsPort` | The port on the host that contains the metrics you want to scrape (e.g. `http://:/metrics`) | `2379` | +| `namespaceOverride` | The namespace to install the chart | `""` + +#### Optional +| Parameter | Description | Default | +| ----- | ----------- | ------ | +| `serviceMonitor.enabled` | Deploys a [Prometheus Operator](https://github.com/coreos/prometheus-operator/blob/master/Documentation/api.md#servicemonitor) ServiceMonitor CR that is configured to scrape metrics on the hosts that the clients are deployed on via the proxy. Also deploys a Service that points to all pods with the expected client name that exposes the `metricsPort` selected | `true` | +| `serviceMonitor.endpoints` | A list of endpoints that will be added to the ServiceMonitor based on the [Endpoint spec](https://github.com/prometheus-operator/prometheus-operator/blob/master/Documentation/api.md#endpoint) | `[{port: metrics}]` | +| `service.selector` | The selector that is used to populate the Service's Endpoints object. The chart will error out on rendering templating if `.Values.clients.enabled` is set alongside this field, since it is expected that this service should point to the PushProx Clients Daemonset / Deployment | `{}` | +| `clients.enabled` | Deploys a DaemonSet of clients that are each capable of scraping endpoints on the hostNetwork it is deployed on | `true` | +| `clients.port` | The port where the client will publish PushProx client-specific metrics. If deploying multiple clients onto the same node, the clients should not have conflicting ports | `9369` | +| `clients.proxyUrl` | Overrides the default proxyUrl setting of `http://pushprox-{{ .Values.component }}-proxy.{{ . Release.Namespace }}.svc.cluster.local:{{ .Values.proxy.port }}"` with the `proxyUrl` specified | `""` | +| `clients.useLocalhost` | Sets a flag on each client deployment to redirect scrapes directed to `HOST_IP` to `127.0.0.1` | `false` | +| `clients.https.enabled` | Enables scraping metrics via HTTPS using the provided TLS certs that exist on each host | `false` | +| `clients.https.forceHTTPSScheme` | Forces scraping metrics via HTTPS using the provided TLS certs that exist on each host | `false` | +| `clients.https.useServiceAccountCredentials` | If set to true, the client will create a service account with permissions to scrape `/metrics` endpoint of Kubernetes components. The client will use the service account token provided to make authorized scrape requests to the Kubernetes API | `false` | +| `clients.https.authenticationMethod.bearerTokenFile.enabled` | If set to true, the client will use service account credentials mounted at the configured path `clients.https.authenticationMethod.bearerTokenFile.bearerTokenFilePath`. This requires permissions to scrape `/metrics` endpoint of Kubernetes components. This method is deprecated by the prometheus operator and may be removed in a future release | `false` | +| `clients.https.authenticationMethod.bearerTokenFile.bearerTokenFilePath` | This is a volume mount on the pod with permissions to scrape `/metrics` endpoint of Kubernetes components | `"/var/run/secrets/kubernetes.io/serviceaccount/token"` | +| `clients.https.authenticationMethod.bearerTokenSecret.enabled` | If set to true, the client will use service account credentials to scrape `/metrics` endpoint of Kubernetes components. This method is deprecated by the prometheus operator and may be removed in a future release | `false` | +| `clients.https.authenticationMethod.authorization.enabled` | If set to true, the client will use service account credentials to scrape `/metrics` endpoint of Kubernetes components | `false` | +| `clients.https.authenticationMethod.authorization.type` | If set, the client will use this type of authorization in its client requests for metrics | `"bearer"` | +| `clients.https.authenticationMethod.authorization.credentials.key` | If set, the client will use this key in the secret created by `clients.https.useServiceAccountCredentials` for authorization in its client requests for metrics | `"token"` | +| `clients.https.authenticationMethod.authorization.credentials.optional` | If set to false, the client will fail if the key in the secret created by `clients.https.useServiceAccountCredentials` does not exist | `false` | +| `clients.https.insecureSkipVerify` | If set to true, the client will disable SSL security checks | `false` | +| `clients.https.certDir` | A `hostPath` where TLS certs can be found. This path is mounted as a volume on an `initContainer` which copies only the necessary files over to an EmptyDir volume used by each client. Required and only used if `clients.https.enabled` is set | `""` | +| `clients.https.certFile` | The path to the TLS cert file located within `clients.https.certDir`. Required and only used if `clients.https.enabled` is set | `""` | +| `clients.https.keyFile` | The path to the TLS key file located within `clients.https.certDir`. Required and only used if `clients.https.enabled` is set | `""` | +| `clients.https.caCertFile` | The path to the TLS cacert file located within `clients.https.certDir`. Required and only used if `clients.https.enabled` is set | `""` | +| `clients.https.seLinuxOptions` | seLinuxOptions to be passed into the container that copies certs. Should define a container with permissions to read the files in the certDir provided on the host. Required and only used if `clients.https.enabled` is set and `clients.https.certDir` is provided. | `""` | +| `clients.metrics.enabled` | Whether the client should publish PushProx client-specific metrics. | `false` | +| `clients.rbac.additionalRules` | Additional permissions to provide to the ServiceAccount bound to the client. This can be used to provide additional permissions for the client to scrape metrics from the k8s API. Only enabled if clients.https.enabled and clients.https.useServiceAccountCredentials are true | `[]` | +| `clients.deployment.enabled` | Deploys the client as a Deployment (generally used if the underlying hostNetwork Pod that is being scraped is managed by a Deployment) | `false` | +| `clients.deployment.replicas` | The number of pods the Deployment has, it should match the number of pod the hostNetwork Deployment has. Required and only used if `client.deployment.enable` is set | `0` | +| `clients.deployment.affinity` | The affinity rules that allocate the pod to the node in which the hostNetwork Deployment's pods run. Required and only used if `client.deployment.enable` is set | `{}` | +| `clients.resources` | Set resource limits and requests for the client container | `{}` | +| `clients.nodeSelector` | Select which nodes to deploy the clients on | `{}` | +| `clients.tolerations` | Specify tolerations for clients | `[]` | +| `proxy.enabled` | Deploys the proxy that each client will register with | `true` | +| `proxy.port` | The port exposed by the proxy that each client will register with to allow metrics to be scraped from the host | `8080` | +| `proxy.resources` | Set resource limits and requests for the proxy container | `{}` | +| `proxy.nodeSelector` | Select which nodes the proxy can be deployed on | `{}` | +| `proxy.tolerations` | Specify tolerations (if necessary) to allow the proxy to be deployed on the selected node | `[]` | +| `kubeVersionOverrides` | A list of Semver constraint strings (defined by https://github.com/Masterminds/semver) and values.yaml overrides. For each key in kubeVersionOverrides, this chart will check to see if the current Kubernetes cluster's version matches any of the semver constraints provided as keys on the map. On seeing a match, the default value for each values.yaml field overridden will be updated with the new value. If multiple matches are encountered (due to overlapping semver ranges), the matches will be applied in order. | `[]` + +*Tip: The filepaths set in `clients.https.File` can include wildcard characters*. + +See [rancher-monitoring](https://github.com/rancher/charts/tree/gh-pages/packages/rancher-monitoring) for examples of how this chart can be used. diff --git a/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/rke2Etcd/templates/_helpers.tpl b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/rke2Etcd/templates/_helpers.tpl new file mode 100644 index 0000000000..1ba5093944 --- /dev/null +++ b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/rke2Etcd/templates/_helpers.tpl @@ -0,0 +1,170 @@ +# Rancher + +{{- define "system_default_registry" -}} +{{- if .Values.global.cattle.systemDefaultRegistry -}} +{{- printf "%s/" .Values.global.cattle.systemDefaultRegistry -}} +{{- end -}} +{{- end -}} + +# Windows Support + +{{/* +Windows cluster will add default taint for linux nodes, +add below linux tolerations to workloads could be scheduled to those linux nodes +*/}} + +{{- define "linux-node-tolerations" -}} +- key: "cattle.io/os" + value: "linux" + effect: "NoSchedule" + operator: "Equal" +{{- end -}} + +{{- define "linux-node-selector" -}} +{{- if semverCompare "<1.14-0" .Capabilities.KubeVersion.GitVersion -}} +beta.kubernetes.io/os: linux +{{- else -}} +kubernetes.io/os: linux +{{- end -}} +{{- end -}} + +# General + +{{- define "applyKubeVersionOverrides" -}} +{{- $overrides := dict -}} +{{- range $override := .Values.kubeVersionOverrides -}} +{{- if semverCompare $override.constraint $.Capabilities.KubeVersion.Version -}} +{{- $_ := mergeOverwrite $overrides $override.values -}} +{{- end -}} +{{- end -}} +{{- $_ := mergeOverwrite .Values $overrides -}} +{{- end -}} + +{{- define "pushprox.namespace" -}} + {{- if .Values.namespaceOverride -}} + {{- .Values.namespaceOverride -}} + {{- else -}} + {{- .Release.Namespace -}} + {{- end -}} +{{- end -}} + +{{- define "pushProxy.commonLabels" -}} +release: {{ .Release.Name }} +component: {{ .Values.component | quote }} +provider: kubernetes +{{- end -}} + +{{- define "pushProxy.proxyUrl" -}} +{{- $_ := (required "Template requires either .Values.proxy.port or .Values.client.proxyUrl to set proxyUrl for client" (or .Values.clients.proxyUrl .Values.proxy.port)) -}} +{{- if .Values.clients.proxyUrl -}} +{{ printf "%s" .Values.clients.proxyUrl }} +{{- else -}} +{{ printf "http://%s.%s.svc:%d" (include "pushProxy.proxy.name" .) (include "pushprox.namespace" .) (int .Values.proxy.port) }} +{{- end -}}{{- end -}} + +# Client + +{{- define "pushProxy.client.name" -}} +{{- printf "pushprox-%s-client" (required ".Values.component is required" .Values.component) -}} +{{- end -}} + +{{- define "pushProxy.client.serviceAccountTokenName" -}} +{{- printf "pushprox-%s-client-service-account-token" (required ".Values.component is required" .Values.component) -}} +{{- end -}} + +{{- define "pushProxy.client.labels" -}} +k8s-app: {{ template "pushProxy.client.name" . }} +{{ template "pushProxy.commonLabels" . }} +{{- end -}} + +# Proxy + +{{- define "pushProxy.proxy.name" -}} +{{- printf "pushprox-%s-proxy" (required ".Values.component is required" .Values.component) -}} +{{- end -}} + +{{- define "pushProxy.proxy.labels" -}} +k8s-app: {{ template "pushProxy.proxy.name" . }} +{{ template "pushProxy.commonLabels" . }} +{{- end -}} + +# ServiceMonitor + +{{- define "pushprox.serviceMonitor.name" -}} +{{- printf "%s-%s" .Release.Name (required ".Values.component is required" .Values.component) -}} +{{- end -}} + +{{- define "pushProxy.serviceMonitor.labels" -}} +app: {{ template "pushprox.serviceMonitor.name" . }} +{{ template "pushProxy.commonLabels" . }} +{{- end -}} + +{{- define "pushProxy.serviceMonitor.endpoints" -}} +{{- $proxyURL := (include "pushProxy.proxyUrl" .) -}} +{{- $useHTTPS := .Values.clients.https.enabled -}} +{{- $setHTTPSScheme := .Values.clients.https.forceHTTPSScheme -}} +{{- $insecureSkipVerify := .Values.clients.https.insecureSkipVerify -}} +{{- $useServiceAccountCredentials := .Values.clients.https.useServiceAccountCredentials -}} +{{- $serviceAccountTokenName := (include "pushProxy.client.serviceAccountTokenName" . ) -}} +{{- $metricRelabelings := list }} +{{- $endpoints := .Values.serviceMonitor.endpoints }} +{{- if .Values.proxy.enabled }} +{{- $_ := set . "proxyUrl" $proxyURL }} +{{- end }} +{{- range $endpoints }} +{{- if $.Values.proxy.enabled }} +{{- $_ := set . "proxyUrl" $proxyURL }} +{{- end }} +{{- $clusterIdRelabel := dict }} +{{- $metricRelabelings := list }} +{{- if $.Values.global.cattle.clusterId }} +{{- $_ := set $clusterIdRelabel "action" "replace" }} +{{- $_ := set $clusterIdRelabel "sourceLabels" (list "__address__") }} +{{- $_ := set $clusterIdRelabel "targetLabel" "cluster_id" }} +{{- $_ := set $clusterIdRelabel "replacement" $.Values.global.cattle.clusterId }} +{{- $metricRelabelings = append $metricRelabelings $clusterIdRelabel }} +{{- end }} +{{- $clusterNameRelabel := dict }} +{{- if $.Values.global.cattle.clusterName }} +{{- $_ := set $clusterNameRelabel "action" "replace" }} +{{- $_ := set $clusterNameRelabel "sourceLabels" (list "__address__") }} +{{- $_ := set $clusterNameRelabel "targetLabel" "cluster_name" }} +{{- $_ := set $clusterNameRelabel "replacement" $.Values.global.cattle.clusterName }} +{{- $metricRelabelings = append $metricRelabelings $clusterNameRelabel }} +{{- end }} +{{- if not (empty $metricRelabelings) }} +{{- $_ := set . "metricRelabelings" ($metricRelabelings)}} +{{- end }} +{{- if $setHTTPSScheme -}} +{{- $_ := set . "scheme" "https" }} +{{- end -}} +{{- if $useHTTPS -}} +{{- if (hasKey . "params") }} +{{- $_ := set (get . "params") "_scheme" (list "https") }} +{{- else }} +{{- $_ := set . "params" (dict "_scheme" (list "https")) }} +{{- end }} +{{- end }} +{{- if (hasKey . "tlsConfig") }} +{{- $_ := set (get . "tlsConfig") "insecureSkipVerify" $insecureSkipVerify }} +{{- else }} +{{- $_ := set . "tlsConfig" (dict "insecureSkipVerify" $insecureSkipVerify) }} +{{- end }} +{{- if $.Values.clients.https.authenticationMethod.bearerTokenFile.enabled }} +{{- $_ := set . "bearerTokenFile" $.Values.clients.https.authenticationMethod.bearerTokenFile.bearerTokenFilePath }} +{{- end }} +{{- if $.Values.clients.https.authenticationMethod.bearerTokenSecret.enabled }} +{{- $_ := set . "bearerTokenSecret" $serviceAccountTokenName }} +{{- end }} +{{- if $.Values.clients.https.authenticationMethod.authorization.enabled }} +{{- if (hasKey . "authorization") }} +{{- $_ := set (get . "authorization") "type" $.Values.clients.https.authenticationMethod.authorization.type }} +{{- $_ := set (get . "authorization") "credentials" (dict "name" $serviceAccountTokenName "key" $.Values.clients.https.authenticationMethod.authorization.credentials.key "optional" $.Values.clients.https.authenticationMethod.authorization.credentials.optional) }} +{{- else }} +{{- $_ := set . "authorization" (dict "type" $.Values.clients.https.authenticationMethod.authorization.type) }} +{{- $_ := set . "authorization" (dict "credentials" (dict "name" $serviceAccountTokenName "key" $.Values.clients.https.authenticationMethod.authorization.credentials.key "optional" $.Values.clients.https.authenticationMethod.authorization.credentials.optional)) }} +{{- end }} +{{- end }} +{{- end }} +{{- toYaml $endpoints }} +{{- end -}} diff --git a/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/rke2Etcd/templates/pushprox-clients-rbac.yaml b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/rke2Etcd/templates/pushprox-clients-rbac.yaml new file mode 100644 index 0000000000..a8e27c3735 --- /dev/null +++ b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/rke2Etcd/templates/pushprox-clients-rbac.yaml @@ -0,0 +1,97 @@ +{{- template "applyKubeVersionOverrides" . -}} +{{- if .Values.clients }}{{- if .Values.clients.enabled }} +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRole +metadata: + name: {{ template "pushProxy.client.name" . }} + labels: {{ include "pushProxy.client.labels" . | nindent 4 }} +rules: +{{- if .Values.global.cattle.psp.enabled }} +- apiGroups: ['policy'] + resources: ['podsecuritypolicies'] + verbs: ['use'] + resourceNames: + - {{ template "pushProxy.client.name" . }} +{{- end }} +{{- if and .Values.clients.https.enabled .Values.clients.https.useServiceAccountCredentials }} +- nonResourceURLs: ["/metrics"] + verbs: ["get"] +{{- if .Values.clients.rbac.additionalRules }} +{{ toYaml .Values.clients.rbac.additionalRules }} +{{- end }} +{{- end }} +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRoleBinding +metadata: + name: {{ template "pushProxy.client.name" . }} + labels: {{ include "pushProxy.client.labels" . | nindent 4 }} +roleRef: + apiGroup: rbac.authorization.k8s.io + kind: ClusterRole + name: {{ template "pushProxy.client.name" . }} +subjects: + - kind: ServiceAccount + name: {{ template "pushProxy.client.name" . }} + namespace: {{ include "pushprox.namespace" . }} +--- +apiVersion: v1 +kind: ServiceAccount +metadata: + name: {{ template "pushProxy.client.name" . }} + namespace: {{ include "pushprox.namespace" . }} + labels: {{ include "pushProxy.client.labels" . | nindent 4 }} +--- +{{- if .Values.clients.https.useServiceAccountCredentials }} +apiVersion: v1 +kind: Secret +type: kubernetes.io/service-account-token +metadata: + name: {{ template "pushProxy.client.serviceAccountTokenName" . }} + namespace: {{ include "pushprox.namespace" . }} + labels: {{ include "pushProxy.client.labels" . | nindent 4 }} + annotations: + kubernetes.io/service-account.name: {{ template "pushProxy.client.name" . }} +{{- end }} +--- +{{- if .Values.global.cattle.psp.enabled }} +apiVersion: policy/v1beta1 +kind: PodSecurityPolicy +metadata: + name: {{ template "pushProxy.client.name" . }} + namespace: {{ include "pushprox.namespace" . }} + labels: {{ include "pushProxy.client.labels" . | nindent 4 }} +spec: + privileged: false + hostNetwork: true + hostIPC: false + hostPID: false + runAsUser: + rule: 'RunAsAny' + seLinux: + rule: 'RunAsAny' +{{- if and .Values.clients.https.enabled .Values.clients.https.certDir .Values.global.seLinux.enabled .Values.clients.https.seLinuxOptions }} + seLinuxOptions: {{ .Values.clients.https.seLinuxOptions | toYaml | nindent 6 }} +{{- end }} + supplementalGroups: + rule: 'MustRunAs' + ranges: + - min: 0 + max: 65535 + fsGroup: + rule: 'MustRunAs' + ranges: + - min: 0 + max: 65535 + readOnlyRootFilesystem: false + volumes: + - 'secret' +{{- if and .Values.clients.https.enabled .Values.clients.https.certDir }} + - 'emptyDir' + - 'hostPath' + allowedHostPaths: + - pathPrefix: {{ required "Need access to volume on host with the SSL cert files to use HTTPs" .Values.clients.https.certDir }} + readOnly: true +{{- end }} +{{- end }} +{{- end }}{{- end }} diff --git a/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/rke2Etcd/templates/pushprox-clients.yaml b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/rke2Etcd/templates/pushprox-clients.yaml new file mode 100644 index 0000000000..e8fcfb3883 --- /dev/null +++ b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/rke2Etcd/templates/pushprox-clients.yaml @@ -0,0 +1,157 @@ +{{- template "applyKubeVersionOverrides" . -}} +{{- if .Values.clients }}{{- if .Values.clients.enabled }} +apiVersion: apps/v1 +{{- if .Values.clients.deployment.enabled }} +kind: Deployment +{{- else }} +kind: DaemonSet +{{- end }} +metadata: + name: {{ template "pushProxy.client.name" . }} + namespace: {{ template "pushprox.namespace" . }} + labels: {{ include "pushProxy.client.labels" . | nindent 4 }} + pushprox-exporter: "client" +spec: + {{- if .Values.clients.deployment.enabled }} + replicas: {{ .Values.clients.deployment.replicas }} + {{- end }} + selector: + matchLabels: {{ include "pushProxy.client.labels" . | nindent 6 }} + template: + metadata: + labels: {{ include "pushProxy.client.labels" . | nindent 8 }} + spec: + {{- if .Values.clients.affinity }} + affinity: {{ toYaml .Values.clients.affinity | nindent 8 }} + {{- end }} + nodeSelector: {{ include "linux-node-selector" . | nindent 8 }} +{{- if .Values.clients.nodeSelector }} +{{ toYaml .Values.clients.nodeSelector | indent 8 }} +{{- end }} + tolerations: {{ include "linux-node-tolerations" . | nindent 8 }} +{{- if .Values.clients.tolerations }} +{{ toYaml .Values.clients.tolerations | indent 8 }} +{{- end }} + hostNetwork: true + dnsPolicy: ClusterFirstWithHostNet + serviceAccountName: {{ template "pushProxy.client.name" . }} + {{- if .Values.global.imagePullSecretName }} + imagePullSecrets: + - name: {{ .Values.global.imagePullSecretName }} + {{- end }} + containers: + - name: pushprox-client + image: {{ template "system_default_registry" . }}{{ .Values.clients.image.repository }}:{{ .Values.clients.image.tag }} + command: + {{- range .Values.clients.command }} + - {{ . | quote }} + {{- end }} + args: + - --fqdn=$(HOST_IP) + - --proxy-url=$(PROXY_URL) + {{- if .Values.clients.metrics.enabled }} + - --metrics-addr=$(PORT) + {{- end }} + - --allow-port={{ required "Need .Values.metricsPort to configure client to be allowed to scrape metrics at port" .Values.metricsPort}} + {{- if .Values.clients.useLocalhost }} + - --use-localhost + {{- end }} + {{- if .Values.clients.https.enabled }} + {{- if .Values.clients.https.insecureSkipVerify }} + - --insecure-skip-verify + {{- end }} + {{- if .Values.clients.https.useServiceAccountCredentials }} + - --token-path=/var/run/secrets/kubernetes.io/serviceaccount/token + {{- end }} + {{- if .Values.clients.https.certDir }} + - --tls.cert=/etc/ssl/push-proxy/push-proxy.pem + - --tls.key=/etc/ssl/push-proxy/push-proxy-key.pem + - --tls.cacert=/etc/ssl/push-proxy/push-proxy-ca-cert.pem + {{- end }} + {{- end }} + env: + - name: HOST_IP + valueFrom: + fieldRef: + fieldPath: status.hostIP + {{- if .Values.clients.metrics.enabled }} + - name: PORT + value: :{{ .Values.clients.port }} + {{- end }} + - name: PROXY_URL + value: {{ template "pushProxy.proxyUrl" . }} + securityContext: + runAsNonRoot: true + runAsUser: 1000 + {{- if and .Values.clients.https.enabled .Values.clients.https.certDir }} + volumeMounts: + - name: metrics-cert-dir + mountPath: /etc/ssl/push-proxy + {{- end }} + {{- if .Values.clients.resources }} + resources: {{ toYaml .Values.clients.resources | nindent 10 }} + {{- end }} + {{- if and .Values.clients.https.enabled .Values.clients.https.certDir }} + initContainers: + - name: copy-certs + image: {{ template "system_default_registry" . }}{{ .Values.clients.copyCertsImage.repository }}:{{ .Values.clients.copyCertsImage.tag }} + command: + - sh + - -c + - | + echo "Searching for files to copy within the source volume" + echo "cert: ${CERT_FILE_NAME}" + echo "key: ${KEY_FILE_NAME}" + echo "cacert: ${CACERT_FILE_NAME}" + + CERT_FILE_SOURCE=$(find /etc/source/ -type f -name "${CERT_FILE_NAME}" | sort -r | head -n 1) + KEY_FILE_SOURCE=$(find /etc/source/ -type f -name "${KEY_FILE_NAME}" | sort -r | head -n 1) + CACERT_FILE_SOURCE=$(find /etc/source/ -type f -name "${CACERT_FILE_NAME}" | sort -r | head -n 1) + + test -z ${CERT_FILE_SOURCE} && echo "Failed to find cert file" && exit 1 + test -z ${KEY_FILE_SOURCE} && echo "Failed to find key file" && exit 1 + test -z ${CACERT_FILE_SOURCE} && echo "Failed to find cacert file" && exit 1 + + echo "Copying cert file from $CERT_FILE_SOURCE to $CERT_FILE_TARGET" + cp $CERT_FILE_SOURCE $CERT_FILE_TARGET || exit 1 + chmod 444 $CERT_FILE_TARGET || exit 1 + + echo "Copying key file from $KEY_FILE_SOURCE to $KEY_FILE_TARGET" + cp $KEY_FILE_SOURCE $KEY_FILE_TARGET || exit 1 + chmod 444 $KEY_FILE_TARGET || exit 1 + + echo "Copying cacert file from $CACERT_FILE_SOURCE to $CACERT_FILE_TARGET" + cp $CACERT_FILE_SOURCE $CACERT_FILE_TARGET || exit 1 + chmod 444 $CACERT_FILE_TARGET || exit 1 + env: + - name: CERT_FILE_NAME + value: {{ required "Need a TLS cert file for scraping metrics endpoint over HTTPs" .Values.clients.https.certFile }} + - name: KEY_FILE_NAME + value: {{ required "Need a TLS key file for scraping metrics endpoint over HTTPs" .Values.clients.https.keyFile }} + - name: CACERT_FILE_NAME + value: {{ required "Need a TLS CA cert file for scraping metrics endpoint over HTTPs" .Values.clients.https.caCertFile }} + - name: CERT_FILE_TARGET + value: /etc/ssl/push-proxy/push-proxy.pem + - name: KEY_FILE_TARGET + value: /etc/ssl/push-proxy/push-proxy-key.pem + - name: CACERT_FILE_TARGET + value: /etc/ssl/push-proxy/push-proxy-ca-cert.pem + securityContext: + runAsNonRoot: false +{{- if and .Values.global.seLinux.enabled .Values.clients.https.seLinuxOptions }} + seLinuxOptions: {{ .Values.clients.https.seLinuxOptions | toYaml | nindent 12 }} +{{- end }} + volumeMounts: + - name: metrics-cert-dir-source + mountPath: /etc/source + readOnly: true + - name: metrics-cert-dir + mountPath: /etc/ssl/push-proxy + volumes: + - name: metrics-cert-dir-source + hostPath: + path: {{ required "Need access to volume on host with the SSL cert files to use HTTPs" .Values.clients.https.certDir }} + - name: metrics-cert-dir + emptyDir: {} + {{- end }} +{{- end }}{{- end }} diff --git a/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/rke2Etcd/templates/pushprox-proxy-rbac.yaml b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/rke2Etcd/templates/pushprox-proxy-rbac.yaml new file mode 100644 index 0000000000..eefe609058 --- /dev/null +++ b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/rke2Etcd/templates/pushprox-proxy-rbac.yaml @@ -0,0 +1,68 @@ +{{- template "applyKubeVersionOverrides" . -}} +{{- if and .Values.proxy }}{{ if .Values.proxy.enabled }} +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRole +metadata: + name: {{ template "pushProxy.proxy.name" . }} + labels: {{ include "pushProxy.proxy.labels" . | nindent 4 }} +rules: +{{- if .Values.global.cattle.psp.enabled }} +- apiGroups: ['policy'] + resources: ['podsecuritypolicies'] + verbs: ['use'] + resourceNames: + - {{ template "pushProxy.proxy.name" . }} +{{- end }} +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRoleBinding +metadata: + name: {{ template "pushProxy.proxy.name" . }} + labels: {{ include "pushProxy.proxy.labels" . | nindent 4 }} +roleRef: + apiGroup: rbac.authorization.k8s.io + kind: ClusterRole + name: {{ template "pushProxy.proxy.name" . }} +subjects: + - kind: ServiceAccount + name: {{ template "pushProxy.proxy.name" . }} + namespace: {{ include "pushprox.namespace" . }} +--- +apiVersion: v1 +kind: ServiceAccount +metadata: + name: {{ template "pushProxy.proxy.name" . }} + namespace: {{ include "pushprox.namespace" . }} + labels: {{ include "pushProxy.proxy.labels" . | nindent 4 }} +--- +{{- if .Values.global.cattle.psp.enabled }} +apiVersion: policy/v1beta1 +kind: PodSecurityPolicy +metadata: + name: {{ template "pushProxy.proxy.name" . }} + namespace: {{ include "pushprox.namespace" . }} + labels: {{ include "pushProxy.proxy.labels" . | nindent 4 }} +spec: + privileged: false + hostNetwork: false + hostIPC: false + hostPID: false + runAsUser: + rule: 'MustRunAsNonRoot' + seLinux: + rule: 'RunAsAny' + supplementalGroups: + rule: 'MustRunAs' + ranges: + - min: 1 + max: 65535 + fsGroup: + rule: 'MustRunAs' + ranges: + - min: 1 + max: 65535 + readOnlyRootFilesystem: false + volumes: + - 'secret' +{{- end }}{{- end }} +{{- end }} diff --git a/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/rke2Etcd/templates/pushprox-proxy.yaml b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/rke2Etcd/templates/pushprox-proxy.yaml new file mode 100644 index 0000000000..723bbd6c00 --- /dev/null +++ b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/rke2Etcd/templates/pushprox-proxy.yaml @@ -0,0 +1,57 @@ +{{- template "applyKubeVersionOverrides" . -}} +{{- if and .Values.proxy }}{{ if .Values.proxy.enabled }} +apiVersion: apps/v1 +kind: Deployment +metadata: + name: {{ template "pushProxy.proxy.name" . }} + namespace: {{ template "pushprox.namespace" . }} + labels: {{ include "pushProxy.proxy.labels" . | nindent 4 }} + pushprox-exporter: "proxy" +spec: + selector: + matchLabels: {{ include "pushProxy.proxy.labels" . | nindent 6 }} + template: + metadata: + labels: {{ include "pushProxy.proxy.labels" . | nindent 8 }} + spec: + securityContext: + runAsNonRoot: true + runAsUser: 1000 + nodeSelector: {{ include "linux-node-selector" . | nindent 8 }} +{{- if .Values.proxy.nodeSelector }} +{{ toYaml .Values.proxy.nodeSelector | indent 8 }} +{{- end }} + tolerations: {{ include "linux-node-tolerations" . | nindent 8 }} +{{- if .Values.proxy.tolerations }} +{{ toYaml .Values.proxy.tolerations | indent 8 }} +{{- end }} + serviceAccountName: {{ template "pushProxy.proxy.name" . }} + {{- if .Values.global.imagePullSecretName }} + imagePullSecrets: + - name: {{ .Values.global.imagePullSecretName }} + {{- end }} + containers: + - name: pushprox-proxy + image: {{ template "system_default_registry" . }}{{ .Values.proxy.image.repository }}:{{ .Values.proxy.image.tag }} + command: + {{- range .Values.proxy.command }} + - {{ . | quote }} + {{- end }} + {{- if .Values.proxy.resources }} + resources: {{ toYaml .Values.proxy.resources | nindent 10 }} + {{- end }} +--- +apiVersion: v1 +kind: Service +metadata: + name: {{ template "pushProxy.proxy.name" . }} + namespace: {{ template "pushprox.namespace" . }} + labels: {{ include "pushProxy.proxy.labels" . | nindent 4 }} +spec: + ports: + - name: pp-proxy + port: {{ required "Need .Values.proxy.port to configure proxy" .Values.proxy.port }} + protocol: TCP + targetPort: {{ .Values.proxy.port }} + selector: {{ include "pushProxy.proxy.labels" . | nindent 4 }} +{{- end }}{{- end }} diff --git a/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/rke2Etcd/templates/pushprox-servicemonitor.yaml b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/rke2Etcd/templates/pushprox-servicemonitor.yaml new file mode 100644 index 0000000000..67eb2216b5 --- /dev/null +++ b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/rke2Etcd/templates/pushprox-servicemonitor.yaml @@ -0,0 +1,45 @@ +{{- template "applyKubeVersionOverrides" . -}} +{{- if .Values.serviceMonitor }}{{- if .Values.serviceMonitor.enabled }} +apiVersion: monitoring.coreos.com/v1 +kind: ServiceMonitor +metadata: + name: {{ template "pushprox.serviceMonitor.name" . }} + namespace: {{ template "pushprox.namespace" . }} + labels: {{ include "pushProxy.serviceMonitor.labels" . | nindent 4 }} +spec: + endpoints: {{include "pushProxy.serviceMonitor.endpoints" . | nindent 4 }} + jobLabel: component + podTargetLabels: + - component + - pushprox-exporter + namespaceSelector: + matchNames: + - {{ template "pushprox.namespace" . }} + selector: + matchLabels: {{ include "pushProxy.client.labels" . | nindent 6 }} +--- +{{- $selector := "" }} +{{- if not (kindIs "invalid" .Values.service) }} +{{- if not (kindIs "invalid" .Values.service.selector) }} +{{ if .Values.service.selector }} +{{- if .Values.clients.enabled }} +{{- required (printf "Cannot override .Values.service.selector=%s when .Values.clients.enabled=true" (toJson .Values.service.selector)) "" }} +{{- end }} +{{- $selector = (toYaml .Values.service.selector) }} +{{- end }} +{{- end }} +{{- end }} +apiVersion: v1 +kind: Service +metadata: + name: {{ template "pushProxy.client.name" . }} + namespace: {{ template "pushprox.namespace" . }} + labels: {{ include "pushProxy.client.labels" . | nindent 4 }} +spec: + ports: + - name: metrics + port: {{ required "Need .Values.metricsPort to configure client to listen to metrics at port" .Values.metricsPort}} + protocol: TCP + targetPort: {{ .Values.metricsPort }} + selector: {{ default (include "pushProxy.client.labels" .) $selector | nindent 4 }} +{{- end }}{{- end }} diff --git a/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/rke2Etcd/templates/validate-install-crd.yaml b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/rke2Etcd/templates/validate-install-crd.yaml new file mode 100644 index 0000000000..16abc2fa83 --- /dev/null +++ b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/rke2Etcd/templates/validate-install-crd.yaml @@ -0,0 +1,14 @@ +#{{- if gt (len (lookup "rbac.authorization.k8s.io/v1" "ClusterRole" "" "")) 0 -}} +# {{- $found := dict -}} +# {{- set $found "monitoring.coreos.com/v1/ServiceMonitor" false -}} +# {{- range .Capabilities.APIVersions -}} +# {{- if hasKey $found (toString .) -}} +# {{- set $found (toString .) true -}} +# {{- end -}} +# {{- end -}} +# {{- range $_, $exists := $found -}} +# {{- if (eq $exists false) -}} +# {{- required "Required CRDs are missing. Please install Prometheus Operator CRDs before installing this chart." "" -}} +# {{- end -}} +# {{- end -}} +#{{- end -}} diff --git a/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/rke2Etcd/templates/validate-psp-install.yaml b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/rke2Etcd/templates/validate-psp-install.yaml new file mode 100644 index 0000000000..a30c59d3b7 --- /dev/null +++ b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/rke2Etcd/templates/validate-psp-install.yaml @@ -0,0 +1,7 @@ +#{{- if gt (len (lookup "rbac.authorization.k8s.io/v1" "ClusterRole" "" "")) 0 -}} +#{{- if .Values.global.cattle.psp.enabled }} +#{{- if not (.Capabilities.APIVersions.Has "policy/v1beta1/PodSecurityPolicy") }} +#{{- fail "The target cluster does not have the PodSecurityPolicy API resource. Please disable PSPs in this chart before proceeding." -}} +#{{- end }} +#{{- end }} +#{{- end }} diff --git a/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/rke2Etcd/values.yaml b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/rke2Etcd/values.yaml new file mode 100644 index 0000000000..1e076041b3 --- /dev/null +++ b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/rke2Etcd/values.yaml @@ -0,0 +1,166 @@ +# Default values for rancher-pushprox. +# This is a YAML-formatted file. +# Declare variables to be passed into your templates. + +# Default image containing both the proxy and the client was generated from the following Dockerfile +# https://github.com/prometheus-community/PushProx/blob/eeadbe766641699129920ccfaaaa30a85c67fe81/Dockerfile#L1-L15 + +# Configuration + +global: + cattle: + psp: + enabled: false + systemDefaultRegistry: "" + seLinux: + enabled: false + +# A list of Semver constraint strings (defined by https://github.com/Masterminds/semver) and values.yaml overrides. +# +# For each key in kubeVersionOverrides, this chart will check to see if the current Kubernetes cluster's version matches +# any of the semver constraints provided as keys on the map. +# +# On seeing a match, the default value for each values.yaml field overridden will be updated with the new value. +# +# If multiple matches are encountered (due to overlapping semver ranges), the matches will be applied in order. +# +# Notes: +# - On running a helm template, Helm generally assumes the kubeVersion is v1.20.0 +# - On running a helm install --dry-run, the correct kubeVersion should be chosen. +kubeVersionOverrides: [] +# - constraint: "< 1.21" +# values: +# metricsPort: 10252 +# clients: +# https: +# enabled: false +# insecureSkipVerify: false +# useServiceAccountCredentials: false + +namespaceOverride: "" + +# The component that is being monitored (i.e. etcd) +component: "component" + +# The port containing the metrics that need to be scraped +metricsPort: 2739 + +# Configure ServiceMonitor that monitors metrics from the metricsPort endpoint +serviceMonitor: + enabled: true + # A list of endpoints that will be added to the ServiceMonitor based on the Endpoint spec + # Source: https://github.com/prometheus-operator/prometheus-operator/blob/master/Documentation/api.md#endpoint + # By default, proxyUrl and params._scheme will be overridden based on other values + endpoints: + - port: metrics + +# Configure Service that grabs scrape targets +service: + # The selector that is used to populate the Service's Endpoints object. + # The chart will error out on rendering templating if .Values.clients.enabled is set alongside this field, + # since it is expected that this service should point to the PushProx Clients Daemonset / Deployment + selector: {} + +clients: + enabled: true + # The port which the PushProx client will post PushProx metrics to + port: 9369 + # If unset, this will default to the URL for the proxy service: http://pushprox-{{component}}-proxy.{{namepsace}}.svc.cluster.local:{{proxy.port}} + # Should be modified if the clients are being deployed outside the cluster where the proxy rests, otherwise leave it null + proxyUrl: "" + # If set to true, the client will forward any requests from the host IP to 127.0.0.1 + # It will only allow proxy requests to the metricsPort specified + useLocalhost: false + # Configuration for accessing metrics via HTTPS + https: + # Does the client require https to access the metrics? + enabled: false + # Does the client require requests be sent to http or https? + forceHTTPSScheme: false + # If set to true, the client will create a service account with adequate permissions and set a flag + # on the client to use the service account token provided by it to make authorized scrape requests + useServiceAccountCredentials: false + # Configuration for authentication to metrics via https endpoint + authenticationMethod: + # Reads token from defined file in container + # This function is deprecated in the prometheus operator api and may be removed in a future version + bearerTokenFile: + enabled: false + bearerTokenFilePath: "/var/run/secrets/kubernetes.io/serviceaccount/token" + # Reads token from defined secret in namespace + # This function is deprecated in the prometheus operator api and may be removed in a future version + bearerTokenSecret: + enabled: false + # Reads token from defined secret in namespace + authorization: + enabled: false + type: "bearer" + credentials: + key: "token" + optional: false + # If set to true, the client will disable SSL security checks + insecureSkipVerify: false + # Directory on host where necessary TLS cert and key to scrape metrics can be found + certDir: "" + # Filenames for files located in .Values.clients.https.certDir that correspond to TLS settings + certFile: "" + keyFile: "" + caCertFile: "" + # seLinuxOptions to be passed into the container that copies certs. Should define a container with permissions to read the files in the certDir provided on the host. + # Required and only used if `clients.https.enabled` is set and `clients.https.certDir` is provided. + seLinuxOptions: {} + + metrics: + # Whether the client should publish PushProx client-specific metrics to .Values.clients.port + enabled: false + + rbac: + # Additional permissions to provide to the ServiceAccount bound to the client + # This can be used to provide additional permissions for the client to scrape metrics from the k8s API + # Only enabled if clients.https.enabled and clients.https.useServiceAccountCredentials are true + additionalRules: [] + + # Resource limits + resources: {} + + # Options to select all nodes to deploy client DaemonSet on + nodeSelector: {} + tolerations: [] + affinity: {} + + image: + repository: rancher/pushprox-client + tag: v0.1.3-rancher2-client + command: ["pushprox-client"] + + copyCertsImage: + repository: rancher/mirrored-library-busybox + tag: 1.31.1 + + # The default intention of rancher-pushprox clients is to scrape hostNetwork metrics across all nodes. + # This can be used to scrape internal Kubernetes components or DaemonSets of hostNetwork Pods in + # situations where a cloud provider firewall prevents Pod-To-Host communication but not Pod-To-Pod. + # However, if the underlying hostNetwork Pod that is being scraped is managed by a Deployment, + # this advanced option enables users to deploy the client as a Deployment instead of a DaemonSet. + # If a user deploys this feature and the underlying Deployment's number of replicas changes, the user will + # be responsible for upgrading this chart accordingly to the right number of replicas. + deployment: + enabled: false + replicas: 0 + +proxy: + enabled: true + # The port through which PushProx clients will communicate to the proxy + port: 8080 + + # Resource limits + resources: {} + + # Options to select a node to run a single proxy deployment on + nodeSelector: {} + tolerations: [] + + image: + repository: rancher/pushprox-proxy + tag: v0.1.3-rancher2-proxy + command: ["pushprox-proxy"] diff --git a/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/rke2IngressNginx/.helmignore b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/rke2IngressNginx/.helmignore new file mode 100644 index 0000000000..0e8a0eb36f --- /dev/null +++ b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/rke2IngressNginx/.helmignore @@ -0,0 +1,23 @@ +# Patterns to ignore when building packages. +# This supports shell glob matching, relative path matching, and +# negation (prefixed with !). Only one pattern per line. +.DS_Store +# Common VCS dirs +.git/ +.gitignore +.bzr/ +.bzrignore +.hg/ +.hgignore +.svn/ +# Common backup files +*.swp +*.bak +*.tmp +*.orig +*~ +# Various IDEs +.project +.idea/ +*.tmproj +.vscode/ diff --git a/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/rke2IngressNginx/Chart.yaml b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/rke2IngressNginx/Chart.yaml new file mode 100644 index 0000000000..4bce6d41ea --- /dev/null +++ b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/rke2IngressNginx/Chart.yaml @@ -0,0 +1,15 @@ +annotations: + catalog.cattle.io/hidden: "true" + catalog.cattle.io/kube-version: '>= 1.26.0-0 < 1.31.0-0' + catalog.cattle.io/os: linux + catalog.rancher.io/certified: rancher + catalog.rancher.io/namespace: cattle-monitoring-system + catalog.rancher.io/release-name: rancher-pushprox +apiVersion: v1 +appVersion: 0.1.0 +description: Sets up a deployment of the PushProx proxy and a DaemonSet of PushProx + clients. +kubeVersion: '>=1.26.0-0' +name: rke2IngressNginx +type: application +version: 0.2.0 diff --git a/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/rke2IngressNginx/README.md b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/rke2IngressNginx/README.md new file mode 100644 index 0000000000..345002f48a --- /dev/null +++ b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/rke2IngressNginx/README.md @@ -0,0 +1,90 @@ +# rancher-pushprox + +A Rancher chart based on Rancher [PushProx](https://github.com/rancher/PushProx) that sets up a Deployment of a PushProx proxy and a DaemonSet of PushProx clients on a Kubernetes cluster. + +Installs [rancher-pushprox](https://github.com/rancher/charts/tree/gh-pages/packages/rancher-pushprox) to create PushProx clients that can access their host's network and register with a PushProx proxy. A [Prometheus Operator](https://github.com/coreos/prometheus-operator) ServiceMonitor CR is also included that is configured to scrape the metrics from each of the clients through the proxy. + +Using an instance of this chart is suitable for the following scenarios: +- You need to scrape metrics from a port that should not be accessible outside of the host (e.g. scraping `etcd` metrics in a hardened cluster) +- You need to scrape metrics on a host that are not exposed outside of 127.0.0.1 (e.g. scraping `kube-proxy` metrics) +- You need to scrape metrics through HTTPS using certs hosted directly on `hostPath` +- You need to scrape metrics from Kubernetes components that require authorization via a service account (e.g. permissions to make request to `/metrics`) +- You need to scrape metrics without access to cacerts (i.e. enable `insecureSkipVerify`) + +The clients and proxy are created based on a Rancher fork of the [prometheus-community/PushProx](https://github.com/prometheus-community/PushProx) project. + +## Upgrading to Kubernetes v1.25+ + +Starting in Kubernetes v1.25, [Pod Security Policies](https://kubernetes.io/docs/concepts/security/pod-security-policy/) have been removed from the Kubernetes API. + +As a result, **before upgrading to Kubernetes v1.25** (or on a fresh install in a Kubernetes v1.25+ cluster), users are expected to perform an in-place upgrade of this chart with `global.cattle.psp.enabled` set to `false` if it has been previously set to `true`. +​ +> **Note:** +> In this chart release, any previous field that was associated with any PSP resources have been removed in favor of a single global field: `global.cattle.psp.enabled`. + +> **Note:** +> If you upgrade your cluster to Kubernetes v1.25+ before removing PSPs via a `helm upgrade` (even if you manually clean up resources), **it will leave the Helm release in a broken state within the cluster such that further Helm operations will not work (`helm uninstall`, `helm upgrade`, etc.).** +> +> If your charts get stuck in this state, please consult the Rancher docs on how to clean up your Helm release secrets. + +Upon setting `global.cattle.psp.enabled` to false, the chart will remove any PSP resources deployed on its behalf from the cluster. This is the default setting for this chart. + +As a replacement for PSPs, [Pod Security Admission](https://kubernetes.io/docs/concepts/security/pod-security-admission/) should be used. Please consult the Rancher docs for more details on how to configure your chart release namespaces to work with the new Pod Security Admission and apply Pod Security Standards. + +## Configuration + +The following tables list the configurable parameters of the rancher-pushprox chart and their default values. + +### General + +#### Required +| Parameter | Description | Example | +| ----- | ----------- | ------ | +| `component` | The component that is being monitored | `kube-etcd` +| `metricsPort` | The port on the host that contains the metrics you want to scrape (e.g. `http://:/metrics`) | `2379` | +| `namespaceOverride` | The namespace to install the chart | `""` + +#### Optional +| Parameter | Description | Default | +| ----- | ----------- | ------ | +| `serviceMonitor.enabled` | Deploys a [Prometheus Operator](https://github.com/coreos/prometheus-operator/blob/master/Documentation/api.md#servicemonitor) ServiceMonitor CR that is configured to scrape metrics on the hosts that the clients are deployed on via the proxy. Also deploys a Service that points to all pods with the expected client name that exposes the `metricsPort` selected | `true` | +| `serviceMonitor.endpoints` | A list of endpoints that will be added to the ServiceMonitor based on the [Endpoint spec](https://github.com/prometheus-operator/prometheus-operator/blob/master/Documentation/api.md#endpoint) | `[{port: metrics}]` | +| `service.selector` | The selector that is used to populate the Service's Endpoints object. The chart will error out on rendering templating if `.Values.clients.enabled` is set alongside this field, since it is expected that this service should point to the PushProx Clients Daemonset / Deployment | `{}` | +| `clients.enabled` | Deploys a DaemonSet of clients that are each capable of scraping endpoints on the hostNetwork it is deployed on | `true` | +| `clients.port` | The port where the client will publish PushProx client-specific metrics. If deploying multiple clients onto the same node, the clients should not have conflicting ports | `9369` | +| `clients.proxyUrl` | Overrides the default proxyUrl setting of `http://pushprox-{{ .Values.component }}-proxy.{{ . Release.Namespace }}.svc.cluster.local:{{ .Values.proxy.port }}"` with the `proxyUrl` specified | `""` | +| `clients.useLocalhost` | Sets a flag on each client deployment to redirect scrapes directed to `HOST_IP` to `127.0.0.1` | `false` | +| `clients.https.enabled` | Enables scraping metrics via HTTPS using the provided TLS certs that exist on each host | `false` | +| `clients.https.forceHTTPSScheme` | Forces scraping metrics via HTTPS using the provided TLS certs that exist on each host | `false` | +| `clients.https.useServiceAccountCredentials` | If set to true, the client will create a service account with permissions to scrape `/metrics` endpoint of Kubernetes components. The client will use the service account token provided to make authorized scrape requests to the Kubernetes API | `false` | +| `clients.https.authenticationMethod.bearerTokenFile.enabled` | If set to true, the client will use service account credentials mounted at the configured path `clients.https.authenticationMethod.bearerTokenFile.bearerTokenFilePath`. This requires permissions to scrape `/metrics` endpoint of Kubernetes components. This method is deprecated by the prometheus operator and may be removed in a future release | `false` | +| `clients.https.authenticationMethod.bearerTokenFile.bearerTokenFilePath` | This is a volume mount on the pod with permissions to scrape `/metrics` endpoint of Kubernetes components | `"/var/run/secrets/kubernetes.io/serviceaccount/token"` | +| `clients.https.authenticationMethod.bearerTokenSecret.enabled` | If set to true, the client will use service account credentials to scrape `/metrics` endpoint of Kubernetes components. This method is deprecated by the prometheus operator and may be removed in a future release | `false` | +| `clients.https.authenticationMethod.authorization.enabled` | If set to true, the client will use service account credentials to scrape `/metrics` endpoint of Kubernetes components | `false` | +| `clients.https.authenticationMethod.authorization.type` | If set, the client will use this type of authorization in its client requests for metrics | `"bearer"` | +| `clients.https.authenticationMethod.authorization.credentials.key` | If set, the client will use this key in the secret created by `clients.https.useServiceAccountCredentials` for authorization in its client requests for metrics | `"token"` | +| `clients.https.authenticationMethod.authorization.credentials.optional` | If set to false, the client will fail if the key in the secret created by `clients.https.useServiceAccountCredentials` does not exist | `false` | +| `clients.https.insecureSkipVerify` | If set to true, the client will disable SSL security checks | `false` | +| `clients.https.certDir` | A `hostPath` where TLS certs can be found. This path is mounted as a volume on an `initContainer` which copies only the necessary files over to an EmptyDir volume used by each client. Required and only used if `clients.https.enabled` is set | `""` | +| `clients.https.certFile` | The path to the TLS cert file located within `clients.https.certDir`. Required and only used if `clients.https.enabled` is set | `""` | +| `clients.https.keyFile` | The path to the TLS key file located within `clients.https.certDir`. Required and only used if `clients.https.enabled` is set | `""` | +| `clients.https.caCertFile` | The path to the TLS cacert file located within `clients.https.certDir`. Required and only used if `clients.https.enabled` is set | `""` | +| `clients.https.seLinuxOptions` | seLinuxOptions to be passed into the container that copies certs. Should define a container with permissions to read the files in the certDir provided on the host. Required and only used if `clients.https.enabled` is set and `clients.https.certDir` is provided. | `""` | +| `clients.metrics.enabled` | Whether the client should publish PushProx client-specific metrics. | `false` | +| `clients.rbac.additionalRules` | Additional permissions to provide to the ServiceAccount bound to the client. This can be used to provide additional permissions for the client to scrape metrics from the k8s API. Only enabled if clients.https.enabled and clients.https.useServiceAccountCredentials are true | `[]` | +| `clients.deployment.enabled` | Deploys the client as a Deployment (generally used if the underlying hostNetwork Pod that is being scraped is managed by a Deployment) | `false` | +| `clients.deployment.replicas` | The number of pods the Deployment has, it should match the number of pod the hostNetwork Deployment has. Required and only used if `client.deployment.enable` is set | `0` | +| `clients.deployment.affinity` | The affinity rules that allocate the pod to the node in which the hostNetwork Deployment's pods run. Required and only used if `client.deployment.enable` is set | `{}` | +| `clients.resources` | Set resource limits and requests for the client container | `{}` | +| `clients.nodeSelector` | Select which nodes to deploy the clients on | `{}` | +| `clients.tolerations` | Specify tolerations for clients | `[]` | +| `proxy.enabled` | Deploys the proxy that each client will register with | `true` | +| `proxy.port` | The port exposed by the proxy that each client will register with to allow metrics to be scraped from the host | `8080` | +| `proxy.resources` | Set resource limits and requests for the proxy container | `{}` | +| `proxy.nodeSelector` | Select which nodes the proxy can be deployed on | `{}` | +| `proxy.tolerations` | Specify tolerations (if necessary) to allow the proxy to be deployed on the selected node | `[]` | +| `kubeVersionOverrides` | A list of Semver constraint strings (defined by https://github.com/Masterminds/semver) and values.yaml overrides. For each key in kubeVersionOverrides, this chart will check to see if the current Kubernetes cluster's version matches any of the semver constraints provided as keys on the map. On seeing a match, the default value for each values.yaml field overridden will be updated with the new value. If multiple matches are encountered (due to overlapping semver ranges), the matches will be applied in order. | `[]` + +*Tip: The filepaths set in `clients.https.File` can include wildcard characters*. + +See [rancher-monitoring](https://github.com/rancher/charts/tree/gh-pages/packages/rancher-monitoring) for examples of how this chart can be used. diff --git a/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/rke2IngressNginx/templates/_helpers.tpl b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/rke2IngressNginx/templates/_helpers.tpl new file mode 100644 index 0000000000..1ba5093944 --- /dev/null +++ b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/rke2IngressNginx/templates/_helpers.tpl @@ -0,0 +1,170 @@ +# Rancher + +{{- define "system_default_registry" -}} +{{- if .Values.global.cattle.systemDefaultRegistry -}} +{{- printf "%s/" .Values.global.cattle.systemDefaultRegistry -}} +{{- end -}} +{{- end -}} + +# Windows Support + +{{/* +Windows cluster will add default taint for linux nodes, +add below linux tolerations to workloads could be scheduled to those linux nodes +*/}} + +{{- define "linux-node-tolerations" -}} +- key: "cattle.io/os" + value: "linux" + effect: "NoSchedule" + operator: "Equal" +{{- end -}} + +{{- define "linux-node-selector" -}} +{{- if semverCompare "<1.14-0" .Capabilities.KubeVersion.GitVersion -}} +beta.kubernetes.io/os: linux +{{- else -}} +kubernetes.io/os: linux +{{- end -}} +{{- end -}} + +# General + +{{- define "applyKubeVersionOverrides" -}} +{{- $overrides := dict -}} +{{- range $override := .Values.kubeVersionOverrides -}} +{{- if semverCompare $override.constraint $.Capabilities.KubeVersion.Version -}} +{{- $_ := mergeOverwrite $overrides $override.values -}} +{{- end -}} +{{- end -}} +{{- $_ := mergeOverwrite .Values $overrides -}} +{{- end -}} + +{{- define "pushprox.namespace" -}} + {{- if .Values.namespaceOverride -}} + {{- .Values.namespaceOverride -}} + {{- else -}} + {{- .Release.Namespace -}} + {{- end -}} +{{- end -}} + +{{- define "pushProxy.commonLabels" -}} +release: {{ .Release.Name }} +component: {{ .Values.component | quote }} +provider: kubernetes +{{- end -}} + +{{- define "pushProxy.proxyUrl" -}} +{{- $_ := (required "Template requires either .Values.proxy.port or .Values.client.proxyUrl to set proxyUrl for client" (or .Values.clients.proxyUrl .Values.proxy.port)) -}} +{{- if .Values.clients.proxyUrl -}} +{{ printf "%s" .Values.clients.proxyUrl }} +{{- else -}} +{{ printf "http://%s.%s.svc:%d" (include "pushProxy.proxy.name" .) (include "pushprox.namespace" .) (int .Values.proxy.port) }} +{{- end -}}{{- end -}} + +# Client + +{{- define "pushProxy.client.name" -}} +{{- printf "pushprox-%s-client" (required ".Values.component is required" .Values.component) -}} +{{- end -}} + +{{- define "pushProxy.client.serviceAccountTokenName" -}} +{{- printf "pushprox-%s-client-service-account-token" (required ".Values.component is required" .Values.component) -}} +{{- end -}} + +{{- define "pushProxy.client.labels" -}} +k8s-app: {{ template "pushProxy.client.name" . }} +{{ template "pushProxy.commonLabels" . }} +{{- end -}} + +# Proxy + +{{- define "pushProxy.proxy.name" -}} +{{- printf "pushprox-%s-proxy" (required ".Values.component is required" .Values.component) -}} +{{- end -}} + +{{- define "pushProxy.proxy.labels" -}} +k8s-app: {{ template "pushProxy.proxy.name" . }} +{{ template "pushProxy.commonLabels" . }} +{{- end -}} + +# ServiceMonitor + +{{- define "pushprox.serviceMonitor.name" -}} +{{- printf "%s-%s" .Release.Name (required ".Values.component is required" .Values.component) -}} +{{- end -}} + +{{- define "pushProxy.serviceMonitor.labels" -}} +app: {{ template "pushprox.serviceMonitor.name" . }} +{{ template "pushProxy.commonLabels" . }} +{{- end -}} + +{{- define "pushProxy.serviceMonitor.endpoints" -}} +{{- $proxyURL := (include "pushProxy.proxyUrl" .) -}} +{{- $useHTTPS := .Values.clients.https.enabled -}} +{{- $setHTTPSScheme := .Values.clients.https.forceHTTPSScheme -}} +{{- $insecureSkipVerify := .Values.clients.https.insecureSkipVerify -}} +{{- $useServiceAccountCredentials := .Values.clients.https.useServiceAccountCredentials -}} +{{- $serviceAccountTokenName := (include "pushProxy.client.serviceAccountTokenName" . ) -}} +{{- $metricRelabelings := list }} +{{- $endpoints := .Values.serviceMonitor.endpoints }} +{{- if .Values.proxy.enabled }} +{{- $_ := set . "proxyUrl" $proxyURL }} +{{- end }} +{{- range $endpoints }} +{{- if $.Values.proxy.enabled }} +{{- $_ := set . "proxyUrl" $proxyURL }} +{{- end }} +{{- $clusterIdRelabel := dict }} +{{- $metricRelabelings := list }} +{{- if $.Values.global.cattle.clusterId }} +{{- $_ := set $clusterIdRelabel "action" "replace" }} +{{- $_ := set $clusterIdRelabel "sourceLabels" (list "__address__") }} +{{- $_ := set $clusterIdRelabel "targetLabel" "cluster_id" }} +{{- $_ := set $clusterIdRelabel "replacement" $.Values.global.cattle.clusterId }} +{{- $metricRelabelings = append $metricRelabelings $clusterIdRelabel }} +{{- end }} +{{- $clusterNameRelabel := dict }} +{{- if $.Values.global.cattle.clusterName }} +{{- $_ := set $clusterNameRelabel "action" "replace" }} +{{- $_ := set $clusterNameRelabel "sourceLabels" (list "__address__") }} +{{- $_ := set $clusterNameRelabel "targetLabel" "cluster_name" }} +{{- $_ := set $clusterNameRelabel "replacement" $.Values.global.cattle.clusterName }} +{{- $metricRelabelings = append $metricRelabelings $clusterNameRelabel }} +{{- end }} +{{- if not (empty $metricRelabelings) }} +{{- $_ := set . "metricRelabelings" ($metricRelabelings)}} +{{- end }} +{{- if $setHTTPSScheme -}} +{{- $_ := set . "scheme" "https" }} +{{- end -}} +{{- if $useHTTPS -}} +{{- if (hasKey . "params") }} +{{- $_ := set (get . "params") "_scheme" (list "https") }} +{{- else }} +{{- $_ := set . "params" (dict "_scheme" (list "https")) }} +{{- end }} +{{- end }} +{{- if (hasKey . "tlsConfig") }} +{{- $_ := set (get . "tlsConfig") "insecureSkipVerify" $insecureSkipVerify }} +{{- else }} +{{- $_ := set . "tlsConfig" (dict "insecureSkipVerify" $insecureSkipVerify) }} +{{- end }} +{{- if $.Values.clients.https.authenticationMethod.bearerTokenFile.enabled }} +{{- $_ := set . "bearerTokenFile" $.Values.clients.https.authenticationMethod.bearerTokenFile.bearerTokenFilePath }} +{{- end }} +{{- if $.Values.clients.https.authenticationMethod.bearerTokenSecret.enabled }} +{{- $_ := set . "bearerTokenSecret" $serviceAccountTokenName }} +{{- end }} +{{- if $.Values.clients.https.authenticationMethod.authorization.enabled }} +{{- if (hasKey . "authorization") }} +{{- $_ := set (get . "authorization") "type" $.Values.clients.https.authenticationMethod.authorization.type }} +{{- $_ := set (get . "authorization") "credentials" (dict "name" $serviceAccountTokenName "key" $.Values.clients.https.authenticationMethod.authorization.credentials.key "optional" $.Values.clients.https.authenticationMethod.authorization.credentials.optional) }} +{{- else }} +{{- $_ := set . "authorization" (dict "type" $.Values.clients.https.authenticationMethod.authorization.type) }} +{{- $_ := set . "authorization" (dict "credentials" (dict "name" $serviceAccountTokenName "key" $.Values.clients.https.authenticationMethod.authorization.credentials.key "optional" $.Values.clients.https.authenticationMethod.authorization.credentials.optional)) }} +{{- end }} +{{- end }} +{{- end }} +{{- toYaml $endpoints }} +{{- end -}} diff --git a/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/rke2IngressNginx/templates/pushprox-clients-rbac.yaml b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/rke2IngressNginx/templates/pushprox-clients-rbac.yaml new file mode 100644 index 0000000000..a8e27c3735 --- /dev/null +++ b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/rke2IngressNginx/templates/pushprox-clients-rbac.yaml @@ -0,0 +1,97 @@ +{{- template "applyKubeVersionOverrides" . -}} +{{- if .Values.clients }}{{- if .Values.clients.enabled }} +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRole +metadata: + name: {{ template "pushProxy.client.name" . }} + labels: {{ include "pushProxy.client.labels" . | nindent 4 }} +rules: +{{- if .Values.global.cattle.psp.enabled }} +- apiGroups: ['policy'] + resources: ['podsecuritypolicies'] + verbs: ['use'] + resourceNames: + - {{ template "pushProxy.client.name" . }} +{{- end }} +{{- if and .Values.clients.https.enabled .Values.clients.https.useServiceAccountCredentials }} +- nonResourceURLs: ["/metrics"] + verbs: ["get"] +{{- if .Values.clients.rbac.additionalRules }} +{{ toYaml .Values.clients.rbac.additionalRules }} +{{- end }} +{{- end }} +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRoleBinding +metadata: + name: {{ template "pushProxy.client.name" . }} + labels: {{ include "pushProxy.client.labels" . | nindent 4 }} +roleRef: + apiGroup: rbac.authorization.k8s.io + kind: ClusterRole + name: {{ template "pushProxy.client.name" . }} +subjects: + - kind: ServiceAccount + name: {{ template "pushProxy.client.name" . }} + namespace: {{ include "pushprox.namespace" . }} +--- +apiVersion: v1 +kind: ServiceAccount +metadata: + name: {{ template "pushProxy.client.name" . }} + namespace: {{ include "pushprox.namespace" . }} + labels: {{ include "pushProxy.client.labels" . | nindent 4 }} +--- +{{- if .Values.clients.https.useServiceAccountCredentials }} +apiVersion: v1 +kind: Secret +type: kubernetes.io/service-account-token +metadata: + name: {{ template "pushProxy.client.serviceAccountTokenName" . }} + namespace: {{ include "pushprox.namespace" . }} + labels: {{ include "pushProxy.client.labels" . | nindent 4 }} + annotations: + kubernetes.io/service-account.name: {{ template "pushProxy.client.name" . }} +{{- end }} +--- +{{- if .Values.global.cattle.psp.enabled }} +apiVersion: policy/v1beta1 +kind: PodSecurityPolicy +metadata: + name: {{ template "pushProxy.client.name" . }} + namespace: {{ include "pushprox.namespace" . }} + labels: {{ include "pushProxy.client.labels" . | nindent 4 }} +spec: + privileged: false + hostNetwork: true + hostIPC: false + hostPID: false + runAsUser: + rule: 'RunAsAny' + seLinux: + rule: 'RunAsAny' +{{- if and .Values.clients.https.enabled .Values.clients.https.certDir .Values.global.seLinux.enabled .Values.clients.https.seLinuxOptions }} + seLinuxOptions: {{ .Values.clients.https.seLinuxOptions | toYaml | nindent 6 }} +{{- end }} + supplementalGroups: + rule: 'MustRunAs' + ranges: + - min: 0 + max: 65535 + fsGroup: + rule: 'MustRunAs' + ranges: + - min: 0 + max: 65535 + readOnlyRootFilesystem: false + volumes: + - 'secret' +{{- if and .Values.clients.https.enabled .Values.clients.https.certDir }} + - 'emptyDir' + - 'hostPath' + allowedHostPaths: + - pathPrefix: {{ required "Need access to volume on host with the SSL cert files to use HTTPs" .Values.clients.https.certDir }} + readOnly: true +{{- end }} +{{- end }} +{{- end }}{{- end }} diff --git a/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/rke2IngressNginx/templates/pushprox-clients.yaml b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/rke2IngressNginx/templates/pushprox-clients.yaml new file mode 100644 index 0000000000..e8fcfb3883 --- /dev/null +++ b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/rke2IngressNginx/templates/pushprox-clients.yaml @@ -0,0 +1,157 @@ +{{- template "applyKubeVersionOverrides" . -}} +{{- if .Values.clients }}{{- if .Values.clients.enabled }} +apiVersion: apps/v1 +{{- if .Values.clients.deployment.enabled }} +kind: Deployment +{{- else }} +kind: DaemonSet +{{- end }} +metadata: + name: {{ template "pushProxy.client.name" . }} + namespace: {{ template "pushprox.namespace" . }} + labels: {{ include "pushProxy.client.labels" . | nindent 4 }} + pushprox-exporter: "client" +spec: + {{- if .Values.clients.deployment.enabled }} + replicas: {{ .Values.clients.deployment.replicas }} + {{- end }} + selector: + matchLabels: {{ include "pushProxy.client.labels" . | nindent 6 }} + template: + metadata: + labels: {{ include "pushProxy.client.labels" . | nindent 8 }} + spec: + {{- if .Values.clients.affinity }} + affinity: {{ toYaml .Values.clients.affinity | nindent 8 }} + {{- end }} + nodeSelector: {{ include "linux-node-selector" . | nindent 8 }} +{{- if .Values.clients.nodeSelector }} +{{ toYaml .Values.clients.nodeSelector | indent 8 }} +{{- end }} + tolerations: {{ include "linux-node-tolerations" . | nindent 8 }} +{{- if .Values.clients.tolerations }} +{{ toYaml .Values.clients.tolerations | indent 8 }} +{{- end }} + hostNetwork: true + dnsPolicy: ClusterFirstWithHostNet + serviceAccountName: {{ template "pushProxy.client.name" . }} + {{- if .Values.global.imagePullSecretName }} + imagePullSecrets: + - name: {{ .Values.global.imagePullSecretName }} + {{- end }} + containers: + - name: pushprox-client + image: {{ template "system_default_registry" . }}{{ .Values.clients.image.repository }}:{{ .Values.clients.image.tag }} + command: + {{- range .Values.clients.command }} + - {{ . | quote }} + {{- end }} + args: + - --fqdn=$(HOST_IP) + - --proxy-url=$(PROXY_URL) + {{- if .Values.clients.metrics.enabled }} + - --metrics-addr=$(PORT) + {{- end }} + - --allow-port={{ required "Need .Values.metricsPort to configure client to be allowed to scrape metrics at port" .Values.metricsPort}} + {{- if .Values.clients.useLocalhost }} + - --use-localhost + {{- end }} + {{- if .Values.clients.https.enabled }} + {{- if .Values.clients.https.insecureSkipVerify }} + - --insecure-skip-verify + {{- end }} + {{- if .Values.clients.https.useServiceAccountCredentials }} + - --token-path=/var/run/secrets/kubernetes.io/serviceaccount/token + {{- end }} + {{- if .Values.clients.https.certDir }} + - --tls.cert=/etc/ssl/push-proxy/push-proxy.pem + - --tls.key=/etc/ssl/push-proxy/push-proxy-key.pem + - --tls.cacert=/etc/ssl/push-proxy/push-proxy-ca-cert.pem + {{- end }} + {{- end }} + env: + - name: HOST_IP + valueFrom: + fieldRef: + fieldPath: status.hostIP + {{- if .Values.clients.metrics.enabled }} + - name: PORT + value: :{{ .Values.clients.port }} + {{- end }} + - name: PROXY_URL + value: {{ template "pushProxy.proxyUrl" . }} + securityContext: + runAsNonRoot: true + runAsUser: 1000 + {{- if and .Values.clients.https.enabled .Values.clients.https.certDir }} + volumeMounts: + - name: metrics-cert-dir + mountPath: /etc/ssl/push-proxy + {{- end }} + {{- if .Values.clients.resources }} + resources: {{ toYaml .Values.clients.resources | nindent 10 }} + {{- end }} + {{- if and .Values.clients.https.enabled .Values.clients.https.certDir }} + initContainers: + - name: copy-certs + image: {{ template "system_default_registry" . }}{{ .Values.clients.copyCertsImage.repository }}:{{ .Values.clients.copyCertsImage.tag }} + command: + - sh + - -c + - | + echo "Searching for files to copy within the source volume" + echo "cert: ${CERT_FILE_NAME}" + echo "key: ${KEY_FILE_NAME}" + echo "cacert: ${CACERT_FILE_NAME}" + + CERT_FILE_SOURCE=$(find /etc/source/ -type f -name "${CERT_FILE_NAME}" | sort -r | head -n 1) + KEY_FILE_SOURCE=$(find /etc/source/ -type f -name "${KEY_FILE_NAME}" | sort -r | head -n 1) + CACERT_FILE_SOURCE=$(find /etc/source/ -type f -name "${CACERT_FILE_NAME}" | sort -r | head -n 1) + + test -z ${CERT_FILE_SOURCE} && echo "Failed to find cert file" && exit 1 + test -z ${KEY_FILE_SOURCE} && echo "Failed to find key file" && exit 1 + test -z ${CACERT_FILE_SOURCE} && echo "Failed to find cacert file" && exit 1 + + echo "Copying cert file from $CERT_FILE_SOURCE to $CERT_FILE_TARGET" + cp $CERT_FILE_SOURCE $CERT_FILE_TARGET || exit 1 + chmod 444 $CERT_FILE_TARGET || exit 1 + + echo "Copying key file from $KEY_FILE_SOURCE to $KEY_FILE_TARGET" + cp $KEY_FILE_SOURCE $KEY_FILE_TARGET || exit 1 + chmod 444 $KEY_FILE_TARGET || exit 1 + + echo "Copying cacert file from $CACERT_FILE_SOURCE to $CACERT_FILE_TARGET" + cp $CACERT_FILE_SOURCE $CACERT_FILE_TARGET || exit 1 + chmod 444 $CACERT_FILE_TARGET || exit 1 + env: + - name: CERT_FILE_NAME + value: {{ required "Need a TLS cert file for scraping metrics endpoint over HTTPs" .Values.clients.https.certFile }} + - name: KEY_FILE_NAME + value: {{ required "Need a TLS key file for scraping metrics endpoint over HTTPs" .Values.clients.https.keyFile }} + - name: CACERT_FILE_NAME + value: {{ required "Need a TLS CA cert file for scraping metrics endpoint over HTTPs" .Values.clients.https.caCertFile }} + - name: CERT_FILE_TARGET + value: /etc/ssl/push-proxy/push-proxy.pem + - name: KEY_FILE_TARGET + value: /etc/ssl/push-proxy/push-proxy-key.pem + - name: CACERT_FILE_TARGET + value: /etc/ssl/push-proxy/push-proxy-ca-cert.pem + securityContext: + runAsNonRoot: false +{{- if and .Values.global.seLinux.enabled .Values.clients.https.seLinuxOptions }} + seLinuxOptions: {{ .Values.clients.https.seLinuxOptions | toYaml | nindent 12 }} +{{- end }} + volumeMounts: + - name: metrics-cert-dir-source + mountPath: /etc/source + readOnly: true + - name: metrics-cert-dir + mountPath: /etc/ssl/push-proxy + volumes: + - name: metrics-cert-dir-source + hostPath: + path: {{ required "Need access to volume on host with the SSL cert files to use HTTPs" .Values.clients.https.certDir }} + - name: metrics-cert-dir + emptyDir: {} + {{- end }} +{{- end }}{{- end }} diff --git a/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/rke2IngressNginx/templates/pushprox-proxy-rbac.yaml b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/rke2IngressNginx/templates/pushprox-proxy-rbac.yaml new file mode 100644 index 0000000000..eefe609058 --- /dev/null +++ b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/rke2IngressNginx/templates/pushprox-proxy-rbac.yaml @@ -0,0 +1,68 @@ +{{- template "applyKubeVersionOverrides" . -}} +{{- if and .Values.proxy }}{{ if .Values.proxy.enabled }} +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRole +metadata: + name: {{ template "pushProxy.proxy.name" . }} + labels: {{ include "pushProxy.proxy.labels" . | nindent 4 }} +rules: +{{- if .Values.global.cattle.psp.enabled }} +- apiGroups: ['policy'] + resources: ['podsecuritypolicies'] + verbs: ['use'] + resourceNames: + - {{ template "pushProxy.proxy.name" . }} +{{- end }} +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRoleBinding +metadata: + name: {{ template "pushProxy.proxy.name" . }} + labels: {{ include "pushProxy.proxy.labels" . | nindent 4 }} +roleRef: + apiGroup: rbac.authorization.k8s.io + kind: ClusterRole + name: {{ template "pushProxy.proxy.name" . }} +subjects: + - kind: ServiceAccount + name: {{ template "pushProxy.proxy.name" . }} + namespace: {{ include "pushprox.namespace" . }} +--- +apiVersion: v1 +kind: ServiceAccount +metadata: + name: {{ template "pushProxy.proxy.name" . }} + namespace: {{ include "pushprox.namespace" . }} + labels: {{ include "pushProxy.proxy.labels" . | nindent 4 }} +--- +{{- if .Values.global.cattle.psp.enabled }} +apiVersion: policy/v1beta1 +kind: PodSecurityPolicy +metadata: + name: {{ template "pushProxy.proxy.name" . }} + namespace: {{ include "pushprox.namespace" . }} + labels: {{ include "pushProxy.proxy.labels" . | nindent 4 }} +spec: + privileged: false + hostNetwork: false + hostIPC: false + hostPID: false + runAsUser: + rule: 'MustRunAsNonRoot' + seLinux: + rule: 'RunAsAny' + supplementalGroups: + rule: 'MustRunAs' + ranges: + - min: 1 + max: 65535 + fsGroup: + rule: 'MustRunAs' + ranges: + - min: 1 + max: 65535 + readOnlyRootFilesystem: false + volumes: + - 'secret' +{{- end }}{{- end }} +{{- end }} diff --git a/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/rke2IngressNginx/templates/pushprox-proxy.yaml b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/rke2IngressNginx/templates/pushprox-proxy.yaml new file mode 100644 index 0000000000..723bbd6c00 --- /dev/null +++ b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/rke2IngressNginx/templates/pushprox-proxy.yaml @@ -0,0 +1,57 @@ +{{- template "applyKubeVersionOverrides" . -}} +{{- if and .Values.proxy }}{{ if .Values.proxy.enabled }} +apiVersion: apps/v1 +kind: Deployment +metadata: + name: {{ template "pushProxy.proxy.name" . }} + namespace: {{ template "pushprox.namespace" . }} + labels: {{ include "pushProxy.proxy.labels" . | nindent 4 }} + pushprox-exporter: "proxy" +spec: + selector: + matchLabels: {{ include "pushProxy.proxy.labels" . | nindent 6 }} + template: + metadata: + labels: {{ include "pushProxy.proxy.labels" . | nindent 8 }} + spec: + securityContext: + runAsNonRoot: true + runAsUser: 1000 + nodeSelector: {{ include "linux-node-selector" . | nindent 8 }} +{{- if .Values.proxy.nodeSelector }} +{{ toYaml .Values.proxy.nodeSelector | indent 8 }} +{{- end }} + tolerations: {{ include "linux-node-tolerations" . | nindent 8 }} +{{- if .Values.proxy.tolerations }} +{{ toYaml .Values.proxy.tolerations | indent 8 }} +{{- end }} + serviceAccountName: {{ template "pushProxy.proxy.name" . }} + {{- if .Values.global.imagePullSecretName }} + imagePullSecrets: + - name: {{ .Values.global.imagePullSecretName }} + {{- end }} + containers: + - name: pushprox-proxy + image: {{ template "system_default_registry" . }}{{ .Values.proxy.image.repository }}:{{ .Values.proxy.image.tag }} + command: + {{- range .Values.proxy.command }} + - {{ . | quote }} + {{- end }} + {{- if .Values.proxy.resources }} + resources: {{ toYaml .Values.proxy.resources | nindent 10 }} + {{- end }} +--- +apiVersion: v1 +kind: Service +metadata: + name: {{ template "pushProxy.proxy.name" . }} + namespace: {{ template "pushprox.namespace" . }} + labels: {{ include "pushProxy.proxy.labels" . | nindent 4 }} +spec: + ports: + - name: pp-proxy + port: {{ required "Need .Values.proxy.port to configure proxy" .Values.proxy.port }} + protocol: TCP + targetPort: {{ .Values.proxy.port }} + selector: {{ include "pushProxy.proxy.labels" . | nindent 4 }} +{{- end }}{{- end }} diff --git a/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/rke2IngressNginx/templates/pushprox-servicemonitor.yaml b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/rke2IngressNginx/templates/pushprox-servicemonitor.yaml new file mode 100644 index 0000000000..67eb2216b5 --- /dev/null +++ b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/rke2IngressNginx/templates/pushprox-servicemonitor.yaml @@ -0,0 +1,45 @@ +{{- template "applyKubeVersionOverrides" . -}} +{{- if .Values.serviceMonitor }}{{- if .Values.serviceMonitor.enabled }} +apiVersion: monitoring.coreos.com/v1 +kind: ServiceMonitor +metadata: + name: {{ template "pushprox.serviceMonitor.name" . }} + namespace: {{ template "pushprox.namespace" . }} + labels: {{ include "pushProxy.serviceMonitor.labels" . | nindent 4 }} +spec: + endpoints: {{include "pushProxy.serviceMonitor.endpoints" . | nindent 4 }} + jobLabel: component + podTargetLabels: + - component + - pushprox-exporter + namespaceSelector: + matchNames: + - {{ template "pushprox.namespace" . }} + selector: + matchLabels: {{ include "pushProxy.client.labels" . | nindent 6 }} +--- +{{- $selector := "" }} +{{- if not (kindIs "invalid" .Values.service) }} +{{- if not (kindIs "invalid" .Values.service.selector) }} +{{ if .Values.service.selector }} +{{- if .Values.clients.enabled }} +{{- required (printf "Cannot override .Values.service.selector=%s when .Values.clients.enabled=true" (toJson .Values.service.selector)) "" }} +{{- end }} +{{- $selector = (toYaml .Values.service.selector) }} +{{- end }} +{{- end }} +{{- end }} +apiVersion: v1 +kind: Service +metadata: + name: {{ template "pushProxy.client.name" . }} + namespace: {{ template "pushprox.namespace" . }} + labels: {{ include "pushProxy.client.labels" . | nindent 4 }} +spec: + ports: + - name: metrics + port: {{ required "Need .Values.metricsPort to configure client to listen to metrics at port" .Values.metricsPort}} + protocol: TCP + targetPort: {{ .Values.metricsPort }} + selector: {{ default (include "pushProxy.client.labels" .) $selector | nindent 4 }} +{{- end }}{{- end }} diff --git a/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/rke2IngressNginx/templates/validate-install-crd.yaml b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/rke2IngressNginx/templates/validate-install-crd.yaml new file mode 100644 index 0000000000..16abc2fa83 --- /dev/null +++ b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/rke2IngressNginx/templates/validate-install-crd.yaml @@ -0,0 +1,14 @@ +#{{- if gt (len (lookup "rbac.authorization.k8s.io/v1" "ClusterRole" "" "")) 0 -}} +# {{- $found := dict -}} +# {{- set $found "monitoring.coreos.com/v1/ServiceMonitor" false -}} +# {{- range .Capabilities.APIVersions -}} +# {{- if hasKey $found (toString .) -}} +# {{- set $found (toString .) true -}} +# {{- end -}} +# {{- end -}} +# {{- range $_, $exists := $found -}} +# {{- if (eq $exists false) -}} +# {{- required "Required CRDs are missing. Please install Prometheus Operator CRDs before installing this chart." "" -}} +# {{- end -}} +# {{- end -}} +#{{- end -}} diff --git a/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/rke2IngressNginx/templates/validate-psp-install.yaml b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/rke2IngressNginx/templates/validate-psp-install.yaml new file mode 100644 index 0000000000..a30c59d3b7 --- /dev/null +++ b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/rke2IngressNginx/templates/validate-psp-install.yaml @@ -0,0 +1,7 @@ +#{{- if gt (len (lookup "rbac.authorization.k8s.io/v1" "ClusterRole" "" "")) 0 -}} +#{{- if .Values.global.cattle.psp.enabled }} +#{{- if not (.Capabilities.APIVersions.Has "policy/v1beta1/PodSecurityPolicy") }} +#{{- fail "The target cluster does not have the PodSecurityPolicy API resource. Please disable PSPs in this chart before proceeding." -}} +#{{- end }} +#{{- end }} +#{{- end }} diff --git a/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/rke2IngressNginx/values.yaml b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/rke2IngressNginx/values.yaml new file mode 100644 index 0000000000..1e076041b3 --- /dev/null +++ b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/rke2IngressNginx/values.yaml @@ -0,0 +1,166 @@ +# Default values for rancher-pushprox. +# This is a YAML-formatted file. +# Declare variables to be passed into your templates. + +# Default image containing both the proxy and the client was generated from the following Dockerfile +# https://github.com/prometheus-community/PushProx/blob/eeadbe766641699129920ccfaaaa30a85c67fe81/Dockerfile#L1-L15 + +# Configuration + +global: + cattle: + psp: + enabled: false + systemDefaultRegistry: "" + seLinux: + enabled: false + +# A list of Semver constraint strings (defined by https://github.com/Masterminds/semver) and values.yaml overrides. +# +# For each key in kubeVersionOverrides, this chart will check to see if the current Kubernetes cluster's version matches +# any of the semver constraints provided as keys on the map. +# +# On seeing a match, the default value for each values.yaml field overridden will be updated with the new value. +# +# If multiple matches are encountered (due to overlapping semver ranges), the matches will be applied in order. +# +# Notes: +# - On running a helm template, Helm generally assumes the kubeVersion is v1.20.0 +# - On running a helm install --dry-run, the correct kubeVersion should be chosen. +kubeVersionOverrides: [] +# - constraint: "< 1.21" +# values: +# metricsPort: 10252 +# clients: +# https: +# enabled: false +# insecureSkipVerify: false +# useServiceAccountCredentials: false + +namespaceOverride: "" + +# The component that is being monitored (i.e. etcd) +component: "component" + +# The port containing the metrics that need to be scraped +metricsPort: 2739 + +# Configure ServiceMonitor that monitors metrics from the metricsPort endpoint +serviceMonitor: + enabled: true + # A list of endpoints that will be added to the ServiceMonitor based on the Endpoint spec + # Source: https://github.com/prometheus-operator/prometheus-operator/blob/master/Documentation/api.md#endpoint + # By default, proxyUrl and params._scheme will be overridden based on other values + endpoints: + - port: metrics + +# Configure Service that grabs scrape targets +service: + # The selector that is used to populate the Service's Endpoints object. + # The chart will error out on rendering templating if .Values.clients.enabled is set alongside this field, + # since it is expected that this service should point to the PushProx Clients Daemonset / Deployment + selector: {} + +clients: + enabled: true + # The port which the PushProx client will post PushProx metrics to + port: 9369 + # If unset, this will default to the URL for the proxy service: http://pushprox-{{component}}-proxy.{{namepsace}}.svc.cluster.local:{{proxy.port}} + # Should be modified if the clients are being deployed outside the cluster where the proxy rests, otherwise leave it null + proxyUrl: "" + # If set to true, the client will forward any requests from the host IP to 127.0.0.1 + # It will only allow proxy requests to the metricsPort specified + useLocalhost: false + # Configuration for accessing metrics via HTTPS + https: + # Does the client require https to access the metrics? + enabled: false + # Does the client require requests be sent to http or https? + forceHTTPSScheme: false + # If set to true, the client will create a service account with adequate permissions and set a flag + # on the client to use the service account token provided by it to make authorized scrape requests + useServiceAccountCredentials: false + # Configuration for authentication to metrics via https endpoint + authenticationMethod: + # Reads token from defined file in container + # This function is deprecated in the prometheus operator api and may be removed in a future version + bearerTokenFile: + enabled: false + bearerTokenFilePath: "/var/run/secrets/kubernetes.io/serviceaccount/token" + # Reads token from defined secret in namespace + # This function is deprecated in the prometheus operator api and may be removed in a future version + bearerTokenSecret: + enabled: false + # Reads token from defined secret in namespace + authorization: + enabled: false + type: "bearer" + credentials: + key: "token" + optional: false + # If set to true, the client will disable SSL security checks + insecureSkipVerify: false + # Directory on host where necessary TLS cert and key to scrape metrics can be found + certDir: "" + # Filenames for files located in .Values.clients.https.certDir that correspond to TLS settings + certFile: "" + keyFile: "" + caCertFile: "" + # seLinuxOptions to be passed into the container that copies certs. Should define a container with permissions to read the files in the certDir provided on the host. + # Required and only used if `clients.https.enabled` is set and `clients.https.certDir` is provided. + seLinuxOptions: {} + + metrics: + # Whether the client should publish PushProx client-specific metrics to .Values.clients.port + enabled: false + + rbac: + # Additional permissions to provide to the ServiceAccount bound to the client + # This can be used to provide additional permissions for the client to scrape metrics from the k8s API + # Only enabled if clients.https.enabled and clients.https.useServiceAccountCredentials are true + additionalRules: [] + + # Resource limits + resources: {} + + # Options to select all nodes to deploy client DaemonSet on + nodeSelector: {} + tolerations: [] + affinity: {} + + image: + repository: rancher/pushprox-client + tag: v0.1.3-rancher2-client + command: ["pushprox-client"] + + copyCertsImage: + repository: rancher/mirrored-library-busybox + tag: 1.31.1 + + # The default intention of rancher-pushprox clients is to scrape hostNetwork metrics across all nodes. + # This can be used to scrape internal Kubernetes components or DaemonSets of hostNetwork Pods in + # situations where a cloud provider firewall prevents Pod-To-Host communication but not Pod-To-Pod. + # However, if the underlying hostNetwork Pod that is being scraped is managed by a Deployment, + # this advanced option enables users to deploy the client as a Deployment instead of a DaemonSet. + # If a user deploys this feature and the underlying Deployment's number of replicas changes, the user will + # be responsible for upgrading this chart accordingly to the right number of replicas. + deployment: + enabled: false + replicas: 0 + +proxy: + enabled: true + # The port through which PushProx clients will communicate to the proxy + port: 8080 + + # Resource limits + resources: {} + + # Options to select a node to run a single proxy deployment on + nodeSelector: {} + tolerations: [] + + image: + repository: rancher/pushprox-proxy + tag: v0.1.3-rancher2-proxy + command: ["pushprox-proxy"] diff --git a/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/rke2Proxy/.helmignore b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/rke2Proxy/.helmignore new file mode 100644 index 0000000000..0e8a0eb36f --- /dev/null +++ b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/rke2Proxy/.helmignore @@ -0,0 +1,23 @@ +# Patterns to ignore when building packages. +# This supports shell glob matching, relative path matching, and +# negation (prefixed with !). Only one pattern per line. +.DS_Store +# Common VCS dirs +.git/ +.gitignore +.bzr/ +.bzrignore +.hg/ +.hgignore +.svn/ +# Common backup files +*.swp +*.bak +*.tmp +*.orig +*~ +# Various IDEs +.project +.idea/ +*.tmproj +.vscode/ diff --git a/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/rke2Proxy/Chart.yaml b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/rke2Proxy/Chart.yaml new file mode 100644 index 0000000000..0a316e05c7 --- /dev/null +++ b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/rke2Proxy/Chart.yaml @@ -0,0 +1,15 @@ +annotations: + catalog.cattle.io/hidden: "true" + catalog.cattle.io/kube-version: '>= 1.26.0-0 < 1.31.0-0' + catalog.cattle.io/os: linux + catalog.rancher.io/certified: rancher + catalog.rancher.io/namespace: cattle-monitoring-system + catalog.rancher.io/release-name: rancher-pushprox +apiVersion: v1 +appVersion: 0.1.0 +description: Sets up a deployment of the PushProx proxy and a DaemonSet of PushProx + clients. +kubeVersion: '>=1.26.0-0' +name: rke2Proxy +type: application +version: 0.2.0 diff --git a/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/rke2Proxy/README.md b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/rke2Proxy/README.md new file mode 100644 index 0000000000..345002f48a --- /dev/null +++ b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/rke2Proxy/README.md @@ -0,0 +1,90 @@ +# rancher-pushprox + +A Rancher chart based on Rancher [PushProx](https://github.com/rancher/PushProx) that sets up a Deployment of a PushProx proxy and a DaemonSet of PushProx clients on a Kubernetes cluster. + +Installs [rancher-pushprox](https://github.com/rancher/charts/tree/gh-pages/packages/rancher-pushprox) to create PushProx clients that can access their host's network and register with a PushProx proxy. A [Prometheus Operator](https://github.com/coreos/prometheus-operator) ServiceMonitor CR is also included that is configured to scrape the metrics from each of the clients through the proxy. + +Using an instance of this chart is suitable for the following scenarios: +- You need to scrape metrics from a port that should not be accessible outside of the host (e.g. scraping `etcd` metrics in a hardened cluster) +- You need to scrape metrics on a host that are not exposed outside of 127.0.0.1 (e.g. scraping `kube-proxy` metrics) +- You need to scrape metrics through HTTPS using certs hosted directly on `hostPath` +- You need to scrape metrics from Kubernetes components that require authorization via a service account (e.g. permissions to make request to `/metrics`) +- You need to scrape metrics without access to cacerts (i.e. enable `insecureSkipVerify`) + +The clients and proxy are created based on a Rancher fork of the [prometheus-community/PushProx](https://github.com/prometheus-community/PushProx) project. + +## Upgrading to Kubernetes v1.25+ + +Starting in Kubernetes v1.25, [Pod Security Policies](https://kubernetes.io/docs/concepts/security/pod-security-policy/) have been removed from the Kubernetes API. + +As a result, **before upgrading to Kubernetes v1.25** (or on a fresh install in a Kubernetes v1.25+ cluster), users are expected to perform an in-place upgrade of this chart with `global.cattle.psp.enabled` set to `false` if it has been previously set to `true`. +​ +> **Note:** +> In this chart release, any previous field that was associated with any PSP resources have been removed in favor of a single global field: `global.cattle.psp.enabled`. + +> **Note:** +> If you upgrade your cluster to Kubernetes v1.25+ before removing PSPs via a `helm upgrade` (even if you manually clean up resources), **it will leave the Helm release in a broken state within the cluster such that further Helm operations will not work (`helm uninstall`, `helm upgrade`, etc.).** +> +> If your charts get stuck in this state, please consult the Rancher docs on how to clean up your Helm release secrets. + +Upon setting `global.cattle.psp.enabled` to false, the chart will remove any PSP resources deployed on its behalf from the cluster. This is the default setting for this chart. + +As a replacement for PSPs, [Pod Security Admission](https://kubernetes.io/docs/concepts/security/pod-security-admission/) should be used. Please consult the Rancher docs for more details on how to configure your chart release namespaces to work with the new Pod Security Admission and apply Pod Security Standards. + +## Configuration + +The following tables list the configurable parameters of the rancher-pushprox chart and their default values. + +### General + +#### Required +| Parameter | Description | Example | +| ----- | ----------- | ------ | +| `component` | The component that is being monitored | `kube-etcd` +| `metricsPort` | The port on the host that contains the metrics you want to scrape (e.g. `http://:/metrics`) | `2379` | +| `namespaceOverride` | The namespace to install the chart | `""` + +#### Optional +| Parameter | Description | Default | +| ----- | ----------- | ------ | +| `serviceMonitor.enabled` | Deploys a [Prometheus Operator](https://github.com/coreos/prometheus-operator/blob/master/Documentation/api.md#servicemonitor) ServiceMonitor CR that is configured to scrape metrics on the hosts that the clients are deployed on via the proxy. Also deploys a Service that points to all pods with the expected client name that exposes the `metricsPort` selected | `true` | +| `serviceMonitor.endpoints` | A list of endpoints that will be added to the ServiceMonitor based on the [Endpoint spec](https://github.com/prometheus-operator/prometheus-operator/blob/master/Documentation/api.md#endpoint) | `[{port: metrics}]` | +| `service.selector` | The selector that is used to populate the Service's Endpoints object. The chart will error out on rendering templating if `.Values.clients.enabled` is set alongside this field, since it is expected that this service should point to the PushProx Clients Daemonset / Deployment | `{}` | +| `clients.enabled` | Deploys a DaemonSet of clients that are each capable of scraping endpoints on the hostNetwork it is deployed on | `true` | +| `clients.port` | The port where the client will publish PushProx client-specific metrics. If deploying multiple clients onto the same node, the clients should not have conflicting ports | `9369` | +| `clients.proxyUrl` | Overrides the default proxyUrl setting of `http://pushprox-{{ .Values.component }}-proxy.{{ . Release.Namespace }}.svc.cluster.local:{{ .Values.proxy.port }}"` with the `proxyUrl` specified | `""` | +| `clients.useLocalhost` | Sets a flag on each client deployment to redirect scrapes directed to `HOST_IP` to `127.0.0.1` | `false` | +| `clients.https.enabled` | Enables scraping metrics via HTTPS using the provided TLS certs that exist on each host | `false` | +| `clients.https.forceHTTPSScheme` | Forces scraping metrics via HTTPS using the provided TLS certs that exist on each host | `false` | +| `clients.https.useServiceAccountCredentials` | If set to true, the client will create a service account with permissions to scrape `/metrics` endpoint of Kubernetes components. The client will use the service account token provided to make authorized scrape requests to the Kubernetes API | `false` | +| `clients.https.authenticationMethod.bearerTokenFile.enabled` | If set to true, the client will use service account credentials mounted at the configured path `clients.https.authenticationMethod.bearerTokenFile.bearerTokenFilePath`. This requires permissions to scrape `/metrics` endpoint of Kubernetes components. This method is deprecated by the prometheus operator and may be removed in a future release | `false` | +| `clients.https.authenticationMethod.bearerTokenFile.bearerTokenFilePath` | This is a volume mount on the pod with permissions to scrape `/metrics` endpoint of Kubernetes components | `"/var/run/secrets/kubernetes.io/serviceaccount/token"` | +| `clients.https.authenticationMethod.bearerTokenSecret.enabled` | If set to true, the client will use service account credentials to scrape `/metrics` endpoint of Kubernetes components. This method is deprecated by the prometheus operator and may be removed in a future release | `false` | +| `clients.https.authenticationMethod.authorization.enabled` | If set to true, the client will use service account credentials to scrape `/metrics` endpoint of Kubernetes components | `false` | +| `clients.https.authenticationMethod.authorization.type` | If set, the client will use this type of authorization in its client requests for metrics | `"bearer"` | +| `clients.https.authenticationMethod.authorization.credentials.key` | If set, the client will use this key in the secret created by `clients.https.useServiceAccountCredentials` for authorization in its client requests for metrics | `"token"` | +| `clients.https.authenticationMethod.authorization.credentials.optional` | If set to false, the client will fail if the key in the secret created by `clients.https.useServiceAccountCredentials` does not exist | `false` | +| `clients.https.insecureSkipVerify` | If set to true, the client will disable SSL security checks | `false` | +| `clients.https.certDir` | A `hostPath` where TLS certs can be found. This path is mounted as a volume on an `initContainer` which copies only the necessary files over to an EmptyDir volume used by each client. Required and only used if `clients.https.enabled` is set | `""` | +| `clients.https.certFile` | The path to the TLS cert file located within `clients.https.certDir`. Required and only used if `clients.https.enabled` is set | `""` | +| `clients.https.keyFile` | The path to the TLS key file located within `clients.https.certDir`. Required and only used if `clients.https.enabled` is set | `""` | +| `clients.https.caCertFile` | The path to the TLS cacert file located within `clients.https.certDir`. Required and only used if `clients.https.enabled` is set | `""` | +| `clients.https.seLinuxOptions` | seLinuxOptions to be passed into the container that copies certs. Should define a container with permissions to read the files in the certDir provided on the host. Required and only used if `clients.https.enabled` is set and `clients.https.certDir` is provided. | `""` | +| `clients.metrics.enabled` | Whether the client should publish PushProx client-specific metrics. | `false` | +| `clients.rbac.additionalRules` | Additional permissions to provide to the ServiceAccount bound to the client. This can be used to provide additional permissions for the client to scrape metrics from the k8s API. Only enabled if clients.https.enabled and clients.https.useServiceAccountCredentials are true | `[]` | +| `clients.deployment.enabled` | Deploys the client as a Deployment (generally used if the underlying hostNetwork Pod that is being scraped is managed by a Deployment) | `false` | +| `clients.deployment.replicas` | The number of pods the Deployment has, it should match the number of pod the hostNetwork Deployment has. Required and only used if `client.deployment.enable` is set | `0` | +| `clients.deployment.affinity` | The affinity rules that allocate the pod to the node in which the hostNetwork Deployment's pods run. Required and only used if `client.deployment.enable` is set | `{}` | +| `clients.resources` | Set resource limits and requests for the client container | `{}` | +| `clients.nodeSelector` | Select which nodes to deploy the clients on | `{}` | +| `clients.tolerations` | Specify tolerations for clients | `[]` | +| `proxy.enabled` | Deploys the proxy that each client will register with | `true` | +| `proxy.port` | The port exposed by the proxy that each client will register with to allow metrics to be scraped from the host | `8080` | +| `proxy.resources` | Set resource limits and requests for the proxy container | `{}` | +| `proxy.nodeSelector` | Select which nodes the proxy can be deployed on | `{}` | +| `proxy.tolerations` | Specify tolerations (if necessary) to allow the proxy to be deployed on the selected node | `[]` | +| `kubeVersionOverrides` | A list of Semver constraint strings (defined by https://github.com/Masterminds/semver) and values.yaml overrides. For each key in kubeVersionOverrides, this chart will check to see if the current Kubernetes cluster's version matches any of the semver constraints provided as keys on the map. On seeing a match, the default value for each values.yaml field overridden will be updated with the new value. If multiple matches are encountered (due to overlapping semver ranges), the matches will be applied in order. | `[]` + +*Tip: The filepaths set in `clients.https.File` can include wildcard characters*. + +See [rancher-monitoring](https://github.com/rancher/charts/tree/gh-pages/packages/rancher-monitoring) for examples of how this chart can be used. diff --git a/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/rke2Proxy/templates/_helpers.tpl b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/rke2Proxy/templates/_helpers.tpl new file mode 100644 index 0000000000..1ba5093944 --- /dev/null +++ b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/rke2Proxy/templates/_helpers.tpl @@ -0,0 +1,170 @@ +# Rancher + +{{- define "system_default_registry" -}} +{{- if .Values.global.cattle.systemDefaultRegistry -}} +{{- printf "%s/" .Values.global.cattle.systemDefaultRegistry -}} +{{- end -}} +{{- end -}} + +# Windows Support + +{{/* +Windows cluster will add default taint for linux nodes, +add below linux tolerations to workloads could be scheduled to those linux nodes +*/}} + +{{- define "linux-node-tolerations" -}} +- key: "cattle.io/os" + value: "linux" + effect: "NoSchedule" + operator: "Equal" +{{- end -}} + +{{- define "linux-node-selector" -}} +{{- if semverCompare "<1.14-0" .Capabilities.KubeVersion.GitVersion -}} +beta.kubernetes.io/os: linux +{{- else -}} +kubernetes.io/os: linux +{{- end -}} +{{- end -}} + +# General + +{{- define "applyKubeVersionOverrides" -}} +{{- $overrides := dict -}} +{{- range $override := .Values.kubeVersionOverrides -}} +{{- if semverCompare $override.constraint $.Capabilities.KubeVersion.Version -}} +{{- $_ := mergeOverwrite $overrides $override.values -}} +{{- end -}} +{{- end -}} +{{- $_ := mergeOverwrite .Values $overrides -}} +{{- end -}} + +{{- define "pushprox.namespace" -}} + {{- if .Values.namespaceOverride -}} + {{- .Values.namespaceOverride -}} + {{- else -}} + {{- .Release.Namespace -}} + {{- end -}} +{{- end -}} + +{{- define "pushProxy.commonLabels" -}} +release: {{ .Release.Name }} +component: {{ .Values.component | quote }} +provider: kubernetes +{{- end -}} + +{{- define "pushProxy.proxyUrl" -}} +{{- $_ := (required "Template requires either .Values.proxy.port or .Values.client.proxyUrl to set proxyUrl for client" (or .Values.clients.proxyUrl .Values.proxy.port)) -}} +{{- if .Values.clients.proxyUrl -}} +{{ printf "%s" .Values.clients.proxyUrl }} +{{- else -}} +{{ printf "http://%s.%s.svc:%d" (include "pushProxy.proxy.name" .) (include "pushprox.namespace" .) (int .Values.proxy.port) }} +{{- end -}}{{- end -}} + +# Client + +{{- define "pushProxy.client.name" -}} +{{- printf "pushprox-%s-client" (required ".Values.component is required" .Values.component) -}} +{{- end -}} + +{{- define "pushProxy.client.serviceAccountTokenName" -}} +{{- printf "pushprox-%s-client-service-account-token" (required ".Values.component is required" .Values.component) -}} +{{- end -}} + +{{- define "pushProxy.client.labels" -}} +k8s-app: {{ template "pushProxy.client.name" . }} +{{ template "pushProxy.commonLabels" . }} +{{- end -}} + +# Proxy + +{{- define "pushProxy.proxy.name" -}} +{{- printf "pushprox-%s-proxy" (required ".Values.component is required" .Values.component) -}} +{{- end -}} + +{{- define "pushProxy.proxy.labels" -}} +k8s-app: {{ template "pushProxy.proxy.name" . }} +{{ template "pushProxy.commonLabels" . }} +{{- end -}} + +# ServiceMonitor + +{{- define "pushprox.serviceMonitor.name" -}} +{{- printf "%s-%s" .Release.Name (required ".Values.component is required" .Values.component) -}} +{{- end -}} + +{{- define "pushProxy.serviceMonitor.labels" -}} +app: {{ template "pushprox.serviceMonitor.name" . }} +{{ template "pushProxy.commonLabels" . }} +{{- end -}} + +{{- define "pushProxy.serviceMonitor.endpoints" -}} +{{- $proxyURL := (include "pushProxy.proxyUrl" .) -}} +{{- $useHTTPS := .Values.clients.https.enabled -}} +{{- $setHTTPSScheme := .Values.clients.https.forceHTTPSScheme -}} +{{- $insecureSkipVerify := .Values.clients.https.insecureSkipVerify -}} +{{- $useServiceAccountCredentials := .Values.clients.https.useServiceAccountCredentials -}} +{{- $serviceAccountTokenName := (include "pushProxy.client.serviceAccountTokenName" . ) -}} +{{- $metricRelabelings := list }} +{{- $endpoints := .Values.serviceMonitor.endpoints }} +{{- if .Values.proxy.enabled }} +{{- $_ := set . "proxyUrl" $proxyURL }} +{{- end }} +{{- range $endpoints }} +{{- if $.Values.proxy.enabled }} +{{- $_ := set . "proxyUrl" $proxyURL }} +{{- end }} +{{- $clusterIdRelabel := dict }} +{{- $metricRelabelings := list }} +{{- if $.Values.global.cattle.clusterId }} +{{- $_ := set $clusterIdRelabel "action" "replace" }} +{{- $_ := set $clusterIdRelabel "sourceLabels" (list "__address__") }} +{{- $_ := set $clusterIdRelabel "targetLabel" "cluster_id" }} +{{- $_ := set $clusterIdRelabel "replacement" $.Values.global.cattle.clusterId }} +{{- $metricRelabelings = append $metricRelabelings $clusterIdRelabel }} +{{- end }} +{{- $clusterNameRelabel := dict }} +{{- if $.Values.global.cattle.clusterName }} +{{- $_ := set $clusterNameRelabel "action" "replace" }} +{{- $_ := set $clusterNameRelabel "sourceLabels" (list "__address__") }} +{{- $_ := set $clusterNameRelabel "targetLabel" "cluster_name" }} +{{- $_ := set $clusterNameRelabel "replacement" $.Values.global.cattle.clusterName }} +{{- $metricRelabelings = append $metricRelabelings $clusterNameRelabel }} +{{- end }} +{{- if not (empty $metricRelabelings) }} +{{- $_ := set . "metricRelabelings" ($metricRelabelings)}} +{{- end }} +{{- if $setHTTPSScheme -}} +{{- $_ := set . "scheme" "https" }} +{{- end -}} +{{- if $useHTTPS -}} +{{- if (hasKey . "params") }} +{{- $_ := set (get . "params") "_scheme" (list "https") }} +{{- else }} +{{- $_ := set . "params" (dict "_scheme" (list "https")) }} +{{- end }} +{{- end }} +{{- if (hasKey . "tlsConfig") }} +{{- $_ := set (get . "tlsConfig") "insecureSkipVerify" $insecureSkipVerify }} +{{- else }} +{{- $_ := set . "tlsConfig" (dict "insecureSkipVerify" $insecureSkipVerify) }} +{{- end }} +{{- if $.Values.clients.https.authenticationMethod.bearerTokenFile.enabled }} +{{- $_ := set . "bearerTokenFile" $.Values.clients.https.authenticationMethod.bearerTokenFile.bearerTokenFilePath }} +{{- end }} +{{- if $.Values.clients.https.authenticationMethod.bearerTokenSecret.enabled }} +{{- $_ := set . "bearerTokenSecret" $serviceAccountTokenName }} +{{- end }} +{{- if $.Values.clients.https.authenticationMethod.authorization.enabled }} +{{- if (hasKey . "authorization") }} +{{- $_ := set (get . "authorization") "type" $.Values.clients.https.authenticationMethod.authorization.type }} +{{- $_ := set (get . "authorization") "credentials" (dict "name" $serviceAccountTokenName "key" $.Values.clients.https.authenticationMethod.authorization.credentials.key "optional" $.Values.clients.https.authenticationMethod.authorization.credentials.optional) }} +{{- else }} +{{- $_ := set . "authorization" (dict "type" $.Values.clients.https.authenticationMethod.authorization.type) }} +{{- $_ := set . "authorization" (dict "credentials" (dict "name" $serviceAccountTokenName "key" $.Values.clients.https.authenticationMethod.authorization.credentials.key "optional" $.Values.clients.https.authenticationMethod.authorization.credentials.optional)) }} +{{- end }} +{{- end }} +{{- end }} +{{- toYaml $endpoints }} +{{- end -}} diff --git a/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/rke2Proxy/templates/pushprox-clients-rbac.yaml b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/rke2Proxy/templates/pushprox-clients-rbac.yaml new file mode 100644 index 0000000000..a8e27c3735 --- /dev/null +++ b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/rke2Proxy/templates/pushprox-clients-rbac.yaml @@ -0,0 +1,97 @@ +{{- template "applyKubeVersionOverrides" . -}} +{{- if .Values.clients }}{{- if .Values.clients.enabled }} +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRole +metadata: + name: {{ template "pushProxy.client.name" . }} + labels: {{ include "pushProxy.client.labels" . | nindent 4 }} +rules: +{{- if .Values.global.cattle.psp.enabled }} +- apiGroups: ['policy'] + resources: ['podsecuritypolicies'] + verbs: ['use'] + resourceNames: + - {{ template "pushProxy.client.name" . }} +{{- end }} +{{- if and .Values.clients.https.enabled .Values.clients.https.useServiceAccountCredentials }} +- nonResourceURLs: ["/metrics"] + verbs: ["get"] +{{- if .Values.clients.rbac.additionalRules }} +{{ toYaml .Values.clients.rbac.additionalRules }} +{{- end }} +{{- end }} +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRoleBinding +metadata: + name: {{ template "pushProxy.client.name" . }} + labels: {{ include "pushProxy.client.labels" . | nindent 4 }} +roleRef: + apiGroup: rbac.authorization.k8s.io + kind: ClusterRole + name: {{ template "pushProxy.client.name" . }} +subjects: + - kind: ServiceAccount + name: {{ template "pushProxy.client.name" . }} + namespace: {{ include "pushprox.namespace" . }} +--- +apiVersion: v1 +kind: ServiceAccount +metadata: + name: {{ template "pushProxy.client.name" . }} + namespace: {{ include "pushprox.namespace" . }} + labels: {{ include "pushProxy.client.labels" . | nindent 4 }} +--- +{{- if .Values.clients.https.useServiceAccountCredentials }} +apiVersion: v1 +kind: Secret +type: kubernetes.io/service-account-token +metadata: + name: {{ template "pushProxy.client.serviceAccountTokenName" . }} + namespace: {{ include "pushprox.namespace" . }} + labels: {{ include "pushProxy.client.labels" . | nindent 4 }} + annotations: + kubernetes.io/service-account.name: {{ template "pushProxy.client.name" . }} +{{- end }} +--- +{{- if .Values.global.cattle.psp.enabled }} +apiVersion: policy/v1beta1 +kind: PodSecurityPolicy +metadata: + name: {{ template "pushProxy.client.name" . }} + namespace: {{ include "pushprox.namespace" . }} + labels: {{ include "pushProxy.client.labels" . | nindent 4 }} +spec: + privileged: false + hostNetwork: true + hostIPC: false + hostPID: false + runAsUser: + rule: 'RunAsAny' + seLinux: + rule: 'RunAsAny' +{{- if and .Values.clients.https.enabled .Values.clients.https.certDir .Values.global.seLinux.enabled .Values.clients.https.seLinuxOptions }} + seLinuxOptions: {{ .Values.clients.https.seLinuxOptions | toYaml | nindent 6 }} +{{- end }} + supplementalGroups: + rule: 'MustRunAs' + ranges: + - min: 0 + max: 65535 + fsGroup: + rule: 'MustRunAs' + ranges: + - min: 0 + max: 65535 + readOnlyRootFilesystem: false + volumes: + - 'secret' +{{- if and .Values.clients.https.enabled .Values.clients.https.certDir }} + - 'emptyDir' + - 'hostPath' + allowedHostPaths: + - pathPrefix: {{ required "Need access to volume on host with the SSL cert files to use HTTPs" .Values.clients.https.certDir }} + readOnly: true +{{- end }} +{{- end }} +{{- end }}{{- end }} diff --git a/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/rke2Proxy/templates/pushprox-clients.yaml b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/rke2Proxy/templates/pushprox-clients.yaml new file mode 100644 index 0000000000..e8fcfb3883 --- /dev/null +++ b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/rke2Proxy/templates/pushprox-clients.yaml @@ -0,0 +1,157 @@ +{{- template "applyKubeVersionOverrides" . -}} +{{- if .Values.clients }}{{- if .Values.clients.enabled }} +apiVersion: apps/v1 +{{- if .Values.clients.deployment.enabled }} +kind: Deployment +{{- else }} +kind: DaemonSet +{{- end }} +metadata: + name: {{ template "pushProxy.client.name" . }} + namespace: {{ template "pushprox.namespace" . }} + labels: {{ include "pushProxy.client.labels" . | nindent 4 }} + pushprox-exporter: "client" +spec: + {{- if .Values.clients.deployment.enabled }} + replicas: {{ .Values.clients.deployment.replicas }} + {{- end }} + selector: + matchLabels: {{ include "pushProxy.client.labels" . | nindent 6 }} + template: + metadata: + labels: {{ include "pushProxy.client.labels" . | nindent 8 }} + spec: + {{- if .Values.clients.affinity }} + affinity: {{ toYaml .Values.clients.affinity | nindent 8 }} + {{- end }} + nodeSelector: {{ include "linux-node-selector" . | nindent 8 }} +{{- if .Values.clients.nodeSelector }} +{{ toYaml .Values.clients.nodeSelector | indent 8 }} +{{- end }} + tolerations: {{ include "linux-node-tolerations" . | nindent 8 }} +{{- if .Values.clients.tolerations }} +{{ toYaml .Values.clients.tolerations | indent 8 }} +{{- end }} + hostNetwork: true + dnsPolicy: ClusterFirstWithHostNet + serviceAccountName: {{ template "pushProxy.client.name" . }} + {{- if .Values.global.imagePullSecretName }} + imagePullSecrets: + - name: {{ .Values.global.imagePullSecretName }} + {{- end }} + containers: + - name: pushprox-client + image: {{ template "system_default_registry" . }}{{ .Values.clients.image.repository }}:{{ .Values.clients.image.tag }} + command: + {{- range .Values.clients.command }} + - {{ . | quote }} + {{- end }} + args: + - --fqdn=$(HOST_IP) + - --proxy-url=$(PROXY_URL) + {{- if .Values.clients.metrics.enabled }} + - --metrics-addr=$(PORT) + {{- end }} + - --allow-port={{ required "Need .Values.metricsPort to configure client to be allowed to scrape metrics at port" .Values.metricsPort}} + {{- if .Values.clients.useLocalhost }} + - --use-localhost + {{- end }} + {{- if .Values.clients.https.enabled }} + {{- if .Values.clients.https.insecureSkipVerify }} + - --insecure-skip-verify + {{- end }} + {{- if .Values.clients.https.useServiceAccountCredentials }} + - --token-path=/var/run/secrets/kubernetes.io/serviceaccount/token + {{- end }} + {{- if .Values.clients.https.certDir }} + - --tls.cert=/etc/ssl/push-proxy/push-proxy.pem + - --tls.key=/etc/ssl/push-proxy/push-proxy-key.pem + - --tls.cacert=/etc/ssl/push-proxy/push-proxy-ca-cert.pem + {{- end }} + {{- end }} + env: + - name: HOST_IP + valueFrom: + fieldRef: + fieldPath: status.hostIP + {{- if .Values.clients.metrics.enabled }} + - name: PORT + value: :{{ .Values.clients.port }} + {{- end }} + - name: PROXY_URL + value: {{ template "pushProxy.proxyUrl" . }} + securityContext: + runAsNonRoot: true + runAsUser: 1000 + {{- if and .Values.clients.https.enabled .Values.clients.https.certDir }} + volumeMounts: + - name: metrics-cert-dir + mountPath: /etc/ssl/push-proxy + {{- end }} + {{- if .Values.clients.resources }} + resources: {{ toYaml .Values.clients.resources | nindent 10 }} + {{- end }} + {{- if and .Values.clients.https.enabled .Values.clients.https.certDir }} + initContainers: + - name: copy-certs + image: {{ template "system_default_registry" . }}{{ .Values.clients.copyCertsImage.repository }}:{{ .Values.clients.copyCertsImage.tag }} + command: + - sh + - -c + - | + echo "Searching for files to copy within the source volume" + echo "cert: ${CERT_FILE_NAME}" + echo "key: ${KEY_FILE_NAME}" + echo "cacert: ${CACERT_FILE_NAME}" + + CERT_FILE_SOURCE=$(find /etc/source/ -type f -name "${CERT_FILE_NAME}" | sort -r | head -n 1) + KEY_FILE_SOURCE=$(find /etc/source/ -type f -name "${KEY_FILE_NAME}" | sort -r | head -n 1) + CACERT_FILE_SOURCE=$(find /etc/source/ -type f -name "${CACERT_FILE_NAME}" | sort -r | head -n 1) + + test -z ${CERT_FILE_SOURCE} && echo "Failed to find cert file" && exit 1 + test -z ${KEY_FILE_SOURCE} && echo "Failed to find key file" && exit 1 + test -z ${CACERT_FILE_SOURCE} && echo "Failed to find cacert file" && exit 1 + + echo "Copying cert file from $CERT_FILE_SOURCE to $CERT_FILE_TARGET" + cp $CERT_FILE_SOURCE $CERT_FILE_TARGET || exit 1 + chmod 444 $CERT_FILE_TARGET || exit 1 + + echo "Copying key file from $KEY_FILE_SOURCE to $KEY_FILE_TARGET" + cp $KEY_FILE_SOURCE $KEY_FILE_TARGET || exit 1 + chmod 444 $KEY_FILE_TARGET || exit 1 + + echo "Copying cacert file from $CACERT_FILE_SOURCE to $CACERT_FILE_TARGET" + cp $CACERT_FILE_SOURCE $CACERT_FILE_TARGET || exit 1 + chmod 444 $CACERT_FILE_TARGET || exit 1 + env: + - name: CERT_FILE_NAME + value: {{ required "Need a TLS cert file for scraping metrics endpoint over HTTPs" .Values.clients.https.certFile }} + - name: KEY_FILE_NAME + value: {{ required "Need a TLS key file for scraping metrics endpoint over HTTPs" .Values.clients.https.keyFile }} + - name: CACERT_FILE_NAME + value: {{ required "Need a TLS CA cert file for scraping metrics endpoint over HTTPs" .Values.clients.https.caCertFile }} + - name: CERT_FILE_TARGET + value: /etc/ssl/push-proxy/push-proxy.pem + - name: KEY_FILE_TARGET + value: /etc/ssl/push-proxy/push-proxy-key.pem + - name: CACERT_FILE_TARGET + value: /etc/ssl/push-proxy/push-proxy-ca-cert.pem + securityContext: + runAsNonRoot: false +{{- if and .Values.global.seLinux.enabled .Values.clients.https.seLinuxOptions }} + seLinuxOptions: {{ .Values.clients.https.seLinuxOptions | toYaml | nindent 12 }} +{{- end }} + volumeMounts: + - name: metrics-cert-dir-source + mountPath: /etc/source + readOnly: true + - name: metrics-cert-dir + mountPath: /etc/ssl/push-proxy + volumes: + - name: metrics-cert-dir-source + hostPath: + path: {{ required "Need access to volume on host with the SSL cert files to use HTTPs" .Values.clients.https.certDir }} + - name: metrics-cert-dir + emptyDir: {} + {{- end }} +{{- end }}{{- end }} diff --git a/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/rke2Proxy/templates/pushprox-proxy-rbac.yaml b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/rke2Proxy/templates/pushprox-proxy-rbac.yaml new file mode 100644 index 0000000000..eefe609058 --- /dev/null +++ b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/rke2Proxy/templates/pushprox-proxy-rbac.yaml @@ -0,0 +1,68 @@ +{{- template "applyKubeVersionOverrides" . -}} +{{- if and .Values.proxy }}{{ if .Values.proxy.enabled }} +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRole +metadata: + name: {{ template "pushProxy.proxy.name" . }} + labels: {{ include "pushProxy.proxy.labels" . | nindent 4 }} +rules: +{{- if .Values.global.cattle.psp.enabled }} +- apiGroups: ['policy'] + resources: ['podsecuritypolicies'] + verbs: ['use'] + resourceNames: + - {{ template "pushProxy.proxy.name" . }} +{{- end }} +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRoleBinding +metadata: + name: {{ template "pushProxy.proxy.name" . }} + labels: {{ include "pushProxy.proxy.labels" . | nindent 4 }} +roleRef: + apiGroup: rbac.authorization.k8s.io + kind: ClusterRole + name: {{ template "pushProxy.proxy.name" . }} +subjects: + - kind: ServiceAccount + name: {{ template "pushProxy.proxy.name" . }} + namespace: {{ include "pushprox.namespace" . }} +--- +apiVersion: v1 +kind: ServiceAccount +metadata: + name: {{ template "pushProxy.proxy.name" . }} + namespace: {{ include "pushprox.namespace" . }} + labels: {{ include "pushProxy.proxy.labels" . | nindent 4 }} +--- +{{- if .Values.global.cattle.psp.enabled }} +apiVersion: policy/v1beta1 +kind: PodSecurityPolicy +metadata: + name: {{ template "pushProxy.proxy.name" . }} + namespace: {{ include "pushprox.namespace" . }} + labels: {{ include "pushProxy.proxy.labels" . | nindent 4 }} +spec: + privileged: false + hostNetwork: false + hostIPC: false + hostPID: false + runAsUser: + rule: 'MustRunAsNonRoot' + seLinux: + rule: 'RunAsAny' + supplementalGroups: + rule: 'MustRunAs' + ranges: + - min: 1 + max: 65535 + fsGroup: + rule: 'MustRunAs' + ranges: + - min: 1 + max: 65535 + readOnlyRootFilesystem: false + volumes: + - 'secret' +{{- end }}{{- end }} +{{- end }} diff --git a/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/rke2Proxy/templates/pushprox-proxy.yaml b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/rke2Proxy/templates/pushprox-proxy.yaml new file mode 100644 index 0000000000..723bbd6c00 --- /dev/null +++ b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/rke2Proxy/templates/pushprox-proxy.yaml @@ -0,0 +1,57 @@ +{{- template "applyKubeVersionOverrides" . -}} +{{- if and .Values.proxy }}{{ if .Values.proxy.enabled }} +apiVersion: apps/v1 +kind: Deployment +metadata: + name: {{ template "pushProxy.proxy.name" . }} + namespace: {{ template "pushprox.namespace" . }} + labels: {{ include "pushProxy.proxy.labels" . | nindent 4 }} + pushprox-exporter: "proxy" +spec: + selector: + matchLabels: {{ include "pushProxy.proxy.labels" . | nindent 6 }} + template: + metadata: + labels: {{ include "pushProxy.proxy.labels" . | nindent 8 }} + spec: + securityContext: + runAsNonRoot: true + runAsUser: 1000 + nodeSelector: {{ include "linux-node-selector" . | nindent 8 }} +{{- if .Values.proxy.nodeSelector }} +{{ toYaml .Values.proxy.nodeSelector | indent 8 }} +{{- end }} + tolerations: {{ include "linux-node-tolerations" . | nindent 8 }} +{{- if .Values.proxy.tolerations }} +{{ toYaml .Values.proxy.tolerations | indent 8 }} +{{- end }} + serviceAccountName: {{ template "pushProxy.proxy.name" . }} + {{- if .Values.global.imagePullSecretName }} + imagePullSecrets: + - name: {{ .Values.global.imagePullSecretName }} + {{- end }} + containers: + - name: pushprox-proxy + image: {{ template "system_default_registry" . }}{{ .Values.proxy.image.repository }}:{{ .Values.proxy.image.tag }} + command: + {{- range .Values.proxy.command }} + - {{ . | quote }} + {{- end }} + {{- if .Values.proxy.resources }} + resources: {{ toYaml .Values.proxy.resources | nindent 10 }} + {{- end }} +--- +apiVersion: v1 +kind: Service +metadata: + name: {{ template "pushProxy.proxy.name" . }} + namespace: {{ template "pushprox.namespace" . }} + labels: {{ include "pushProxy.proxy.labels" . | nindent 4 }} +spec: + ports: + - name: pp-proxy + port: {{ required "Need .Values.proxy.port to configure proxy" .Values.proxy.port }} + protocol: TCP + targetPort: {{ .Values.proxy.port }} + selector: {{ include "pushProxy.proxy.labels" . | nindent 4 }} +{{- end }}{{- end }} diff --git a/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/rke2Proxy/templates/pushprox-servicemonitor.yaml b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/rke2Proxy/templates/pushprox-servicemonitor.yaml new file mode 100644 index 0000000000..67eb2216b5 --- /dev/null +++ b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/rke2Proxy/templates/pushprox-servicemonitor.yaml @@ -0,0 +1,45 @@ +{{- template "applyKubeVersionOverrides" . -}} +{{- if .Values.serviceMonitor }}{{- if .Values.serviceMonitor.enabled }} +apiVersion: monitoring.coreos.com/v1 +kind: ServiceMonitor +metadata: + name: {{ template "pushprox.serviceMonitor.name" . }} + namespace: {{ template "pushprox.namespace" . }} + labels: {{ include "pushProxy.serviceMonitor.labels" . | nindent 4 }} +spec: + endpoints: {{include "pushProxy.serviceMonitor.endpoints" . | nindent 4 }} + jobLabel: component + podTargetLabels: + - component + - pushprox-exporter + namespaceSelector: + matchNames: + - {{ template "pushprox.namespace" . }} + selector: + matchLabels: {{ include "pushProxy.client.labels" . | nindent 6 }} +--- +{{- $selector := "" }} +{{- if not (kindIs "invalid" .Values.service) }} +{{- if not (kindIs "invalid" .Values.service.selector) }} +{{ if .Values.service.selector }} +{{- if .Values.clients.enabled }} +{{- required (printf "Cannot override .Values.service.selector=%s when .Values.clients.enabled=true" (toJson .Values.service.selector)) "" }} +{{- end }} +{{- $selector = (toYaml .Values.service.selector) }} +{{- end }} +{{- end }} +{{- end }} +apiVersion: v1 +kind: Service +metadata: + name: {{ template "pushProxy.client.name" . }} + namespace: {{ template "pushprox.namespace" . }} + labels: {{ include "pushProxy.client.labels" . | nindent 4 }} +spec: + ports: + - name: metrics + port: {{ required "Need .Values.metricsPort to configure client to listen to metrics at port" .Values.metricsPort}} + protocol: TCP + targetPort: {{ .Values.metricsPort }} + selector: {{ default (include "pushProxy.client.labels" .) $selector | nindent 4 }} +{{- end }}{{- end }} diff --git a/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/rke2Proxy/templates/validate-install-crd.yaml b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/rke2Proxy/templates/validate-install-crd.yaml new file mode 100644 index 0000000000..16abc2fa83 --- /dev/null +++ b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/rke2Proxy/templates/validate-install-crd.yaml @@ -0,0 +1,14 @@ +#{{- if gt (len (lookup "rbac.authorization.k8s.io/v1" "ClusterRole" "" "")) 0 -}} +# {{- $found := dict -}} +# {{- set $found "monitoring.coreos.com/v1/ServiceMonitor" false -}} +# {{- range .Capabilities.APIVersions -}} +# {{- if hasKey $found (toString .) -}} +# {{- set $found (toString .) true -}} +# {{- end -}} +# {{- end -}} +# {{- range $_, $exists := $found -}} +# {{- if (eq $exists false) -}} +# {{- required "Required CRDs are missing. Please install Prometheus Operator CRDs before installing this chart." "" -}} +# {{- end -}} +# {{- end -}} +#{{- end -}} diff --git a/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/rke2Proxy/templates/validate-psp-install.yaml b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/rke2Proxy/templates/validate-psp-install.yaml new file mode 100644 index 0000000000..a30c59d3b7 --- /dev/null +++ b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/rke2Proxy/templates/validate-psp-install.yaml @@ -0,0 +1,7 @@ +#{{- if gt (len (lookup "rbac.authorization.k8s.io/v1" "ClusterRole" "" "")) 0 -}} +#{{- if .Values.global.cattle.psp.enabled }} +#{{- if not (.Capabilities.APIVersions.Has "policy/v1beta1/PodSecurityPolicy") }} +#{{- fail "The target cluster does not have the PodSecurityPolicy API resource. Please disable PSPs in this chart before proceeding." -}} +#{{- end }} +#{{- end }} +#{{- end }} diff --git a/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/rke2Proxy/values.yaml b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/rke2Proxy/values.yaml new file mode 100644 index 0000000000..1e076041b3 --- /dev/null +++ b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/rke2Proxy/values.yaml @@ -0,0 +1,166 @@ +# Default values for rancher-pushprox. +# This is a YAML-formatted file. +# Declare variables to be passed into your templates. + +# Default image containing both the proxy and the client was generated from the following Dockerfile +# https://github.com/prometheus-community/PushProx/blob/eeadbe766641699129920ccfaaaa30a85c67fe81/Dockerfile#L1-L15 + +# Configuration + +global: + cattle: + psp: + enabled: false + systemDefaultRegistry: "" + seLinux: + enabled: false + +# A list of Semver constraint strings (defined by https://github.com/Masterminds/semver) and values.yaml overrides. +# +# For each key in kubeVersionOverrides, this chart will check to see if the current Kubernetes cluster's version matches +# any of the semver constraints provided as keys on the map. +# +# On seeing a match, the default value for each values.yaml field overridden will be updated with the new value. +# +# If multiple matches are encountered (due to overlapping semver ranges), the matches will be applied in order. +# +# Notes: +# - On running a helm template, Helm generally assumes the kubeVersion is v1.20.0 +# - On running a helm install --dry-run, the correct kubeVersion should be chosen. +kubeVersionOverrides: [] +# - constraint: "< 1.21" +# values: +# metricsPort: 10252 +# clients: +# https: +# enabled: false +# insecureSkipVerify: false +# useServiceAccountCredentials: false + +namespaceOverride: "" + +# The component that is being monitored (i.e. etcd) +component: "component" + +# The port containing the metrics that need to be scraped +metricsPort: 2739 + +# Configure ServiceMonitor that monitors metrics from the metricsPort endpoint +serviceMonitor: + enabled: true + # A list of endpoints that will be added to the ServiceMonitor based on the Endpoint spec + # Source: https://github.com/prometheus-operator/prometheus-operator/blob/master/Documentation/api.md#endpoint + # By default, proxyUrl and params._scheme will be overridden based on other values + endpoints: + - port: metrics + +# Configure Service that grabs scrape targets +service: + # The selector that is used to populate the Service's Endpoints object. + # The chart will error out on rendering templating if .Values.clients.enabled is set alongside this field, + # since it is expected that this service should point to the PushProx Clients Daemonset / Deployment + selector: {} + +clients: + enabled: true + # The port which the PushProx client will post PushProx metrics to + port: 9369 + # If unset, this will default to the URL for the proxy service: http://pushprox-{{component}}-proxy.{{namepsace}}.svc.cluster.local:{{proxy.port}} + # Should be modified if the clients are being deployed outside the cluster where the proxy rests, otherwise leave it null + proxyUrl: "" + # If set to true, the client will forward any requests from the host IP to 127.0.0.1 + # It will only allow proxy requests to the metricsPort specified + useLocalhost: false + # Configuration for accessing metrics via HTTPS + https: + # Does the client require https to access the metrics? + enabled: false + # Does the client require requests be sent to http or https? + forceHTTPSScheme: false + # If set to true, the client will create a service account with adequate permissions and set a flag + # on the client to use the service account token provided by it to make authorized scrape requests + useServiceAccountCredentials: false + # Configuration for authentication to metrics via https endpoint + authenticationMethod: + # Reads token from defined file in container + # This function is deprecated in the prometheus operator api and may be removed in a future version + bearerTokenFile: + enabled: false + bearerTokenFilePath: "/var/run/secrets/kubernetes.io/serviceaccount/token" + # Reads token from defined secret in namespace + # This function is deprecated in the prometheus operator api and may be removed in a future version + bearerTokenSecret: + enabled: false + # Reads token from defined secret in namespace + authorization: + enabled: false + type: "bearer" + credentials: + key: "token" + optional: false + # If set to true, the client will disable SSL security checks + insecureSkipVerify: false + # Directory on host where necessary TLS cert and key to scrape metrics can be found + certDir: "" + # Filenames for files located in .Values.clients.https.certDir that correspond to TLS settings + certFile: "" + keyFile: "" + caCertFile: "" + # seLinuxOptions to be passed into the container that copies certs. Should define a container with permissions to read the files in the certDir provided on the host. + # Required and only used if `clients.https.enabled` is set and `clients.https.certDir` is provided. + seLinuxOptions: {} + + metrics: + # Whether the client should publish PushProx client-specific metrics to .Values.clients.port + enabled: false + + rbac: + # Additional permissions to provide to the ServiceAccount bound to the client + # This can be used to provide additional permissions for the client to scrape metrics from the k8s API + # Only enabled if clients.https.enabled and clients.https.useServiceAccountCredentials are true + additionalRules: [] + + # Resource limits + resources: {} + + # Options to select all nodes to deploy client DaemonSet on + nodeSelector: {} + tolerations: [] + affinity: {} + + image: + repository: rancher/pushprox-client + tag: v0.1.3-rancher2-client + command: ["pushprox-client"] + + copyCertsImage: + repository: rancher/mirrored-library-busybox + tag: 1.31.1 + + # The default intention of rancher-pushprox clients is to scrape hostNetwork metrics across all nodes. + # This can be used to scrape internal Kubernetes components or DaemonSets of hostNetwork Pods in + # situations where a cloud provider firewall prevents Pod-To-Host communication but not Pod-To-Pod. + # However, if the underlying hostNetwork Pod that is being scraped is managed by a Deployment, + # this advanced option enables users to deploy the client as a Deployment instead of a DaemonSet. + # If a user deploys this feature and the underlying Deployment's number of replicas changes, the user will + # be responsible for upgrading this chart accordingly to the right number of replicas. + deployment: + enabled: false + replicas: 0 + +proxy: + enabled: true + # The port through which PushProx clients will communicate to the proxy + port: 8080 + + # Resource limits + resources: {} + + # Options to select a node to run a single proxy deployment on + nodeSelector: {} + tolerations: [] + + image: + repository: rancher/pushprox-proxy + tag: v0.1.3-rancher2-proxy + command: ["pushprox-proxy"] diff --git a/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/rke2Scheduler/.helmignore b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/rke2Scheduler/.helmignore new file mode 100644 index 0000000000..0e8a0eb36f --- /dev/null +++ b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/rke2Scheduler/.helmignore @@ -0,0 +1,23 @@ +# Patterns to ignore when building packages. +# This supports shell glob matching, relative path matching, and +# negation (prefixed with !). Only one pattern per line. +.DS_Store +# Common VCS dirs +.git/ +.gitignore +.bzr/ +.bzrignore +.hg/ +.hgignore +.svn/ +# Common backup files +*.swp +*.bak +*.tmp +*.orig +*~ +# Various IDEs +.project +.idea/ +*.tmproj +.vscode/ diff --git a/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/rke2Scheduler/Chart.yaml b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/rke2Scheduler/Chart.yaml new file mode 100644 index 0000000000..fa6aa5ac66 --- /dev/null +++ b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/rke2Scheduler/Chart.yaml @@ -0,0 +1,15 @@ +annotations: + catalog.cattle.io/hidden: "true" + catalog.cattle.io/kube-version: '>= 1.26.0-0 < 1.31.0-0' + catalog.cattle.io/os: linux + catalog.rancher.io/certified: rancher + catalog.rancher.io/namespace: cattle-monitoring-system + catalog.rancher.io/release-name: rancher-pushprox +apiVersion: v1 +appVersion: 0.1.0 +description: Sets up a deployment of the PushProx proxy and a DaemonSet of PushProx + clients. +kubeVersion: '>=1.26.0-0' +name: rke2Scheduler +type: application +version: 0.2.0 diff --git a/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/rke2Scheduler/README.md b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/rke2Scheduler/README.md new file mode 100644 index 0000000000..345002f48a --- /dev/null +++ b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/rke2Scheduler/README.md @@ -0,0 +1,90 @@ +# rancher-pushprox + +A Rancher chart based on Rancher [PushProx](https://github.com/rancher/PushProx) that sets up a Deployment of a PushProx proxy and a DaemonSet of PushProx clients on a Kubernetes cluster. + +Installs [rancher-pushprox](https://github.com/rancher/charts/tree/gh-pages/packages/rancher-pushprox) to create PushProx clients that can access their host's network and register with a PushProx proxy. A [Prometheus Operator](https://github.com/coreos/prometheus-operator) ServiceMonitor CR is also included that is configured to scrape the metrics from each of the clients through the proxy. + +Using an instance of this chart is suitable for the following scenarios: +- You need to scrape metrics from a port that should not be accessible outside of the host (e.g. scraping `etcd` metrics in a hardened cluster) +- You need to scrape metrics on a host that are not exposed outside of 127.0.0.1 (e.g. scraping `kube-proxy` metrics) +- You need to scrape metrics through HTTPS using certs hosted directly on `hostPath` +- You need to scrape metrics from Kubernetes components that require authorization via a service account (e.g. permissions to make request to `/metrics`) +- You need to scrape metrics without access to cacerts (i.e. enable `insecureSkipVerify`) + +The clients and proxy are created based on a Rancher fork of the [prometheus-community/PushProx](https://github.com/prometheus-community/PushProx) project. + +## Upgrading to Kubernetes v1.25+ + +Starting in Kubernetes v1.25, [Pod Security Policies](https://kubernetes.io/docs/concepts/security/pod-security-policy/) have been removed from the Kubernetes API. + +As a result, **before upgrading to Kubernetes v1.25** (or on a fresh install in a Kubernetes v1.25+ cluster), users are expected to perform an in-place upgrade of this chart with `global.cattle.psp.enabled` set to `false` if it has been previously set to `true`. +​ +> **Note:** +> In this chart release, any previous field that was associated with any PSP resources have been removed in favor of a single global field: `global.cattle.psp.enabled`. + +> **Note:** +> If you upgrade your cluster to Kubernetes v1.25+ before removing PSPs via a `helm upgrade` (even if you manually clean up resources), **it will leave the Helm release in a broken state within the cluster such that further Helm operations will not work (`helm uninstall`, `helm upgrade`, etc.).** +> +> If your charts get stuck in this state, please consult the Rancher docs on how to clean up your Helm release secrets. + +Upon setting `global.cattle.psp.enabled` to false, the chart will remove any PSP resources deployed on its behalf from the cluster. This is the default setting for this chart. + +As a replacement for PSPs, [Pod Security Admission](https://kubernetes.io/docs/concepts/security/pod-security-admission/) should be used. Please consult the Rancher docs for more details on how to configure your chart release namespaces to work with the new Pod Security Admission and apply Pod Security Standards. + +## Configuration + +The following tables list the configurable parameters of the rancher-pushprox chart and their default values. + +### General + +#### Required +| Parameter | Description | Example | +| ----- | ----------- | ------ | +| `component` | The component that is being monitored | `kube-etcd` +| `metricsPort` | The port on the host that contains the metrics you want to scrape (e.g. `http://:/metrics`) | `2379` | +| `namespaceOverride` | The namespace to install the chart | `""` + +#### Optional +| Parameter | Description | Default | +| ----- | ----------- | ------ | +| `serviceMonitor.enabled` | Deploys a [Prometheus Operator](https://github.com/coreos/prometheus-operator/blob/master/Documentation/api.md#servicemonitor) ServiceMonitor CR that is configured to scrape metrics on the hosts that the clients are deployed on via the proxy. Also deploys a Service that points to all pods with the expected client name that exposes the `metricsPort` selected | `true` | +| `serviceMonitor.endpoints` | A list of endpoints that will be added to the ServiceMonitor based on the [Endpoint spec](https://github.com/prometheus-operator/prometheus-operator/blob/master/Documentation/api.md#endpoint) | `[{port: metrics}]` | +| `service.selector` | The selector that is used to populate the Service's Endpoints object. The chart will error out on rendering templating if `.Values.clients.enabled` is set alongside this field, since it is expected that this service should point to the PushProx Clients Daemonset / Deployment | `{}` | +| `clients.enabled` | Deploys a DaemonSet of clients that are each capable of scraping endpoints on the hostNetwork it is deployed on | `true` | +| `clients.port` | The port where the client will publish PushProx client-specific metrics. If deploying multiple clients onto the same node, the clients should not have conflicting ports | `9369` | +| `clients.proxyUrl` | Overrides the default proxyUrl setting of `http://pushprox-{{ .Values.component }}-proxy.{{ . Release.Namespace }}.svc.cluster.local:{{ .Values.proxy.port }}"` with the `proxyUrl` specified | `""` | +| `clients.useLocalhost` | Sets a flag on each client deployment to redirect scrapes directed to `HOST_IP` to `127.0.0.1` | `false` | +| `clients.https.enabled` | Enables scraping metrics via HTTPS using the provided TLS certs that exist on each host | `false` | +| `clients.https.forceHTTPSScheme` | Forces scraping metrics via HTTPS using the provided TLS certs that exist on each host | `false` | +| `clients.https.useServiceAccountCredentials` | If set to true, the client will create a service account with permissions to scrape `/metrics` endpoint of Kubernetes components. The client will use the service account token provided to make authorized scrape requests to the Kubernetes API | `false` | +| `clients.https.authenticationMethod.bearerTokenFile.enabled` | If set to true, the client will use service account credentials mounted at the configured path `clients.https.authenticationMethod.bearerTokenFile.bearerTokenFilePath`. This requires permissions to scrape `/metrics` endpoint of Kubernetes components. This method is deprecated by the prometheus operator and may be removed in a future release | `false` | +| `clients.https.authenticationMethod.bearerTokenFile.bearerTokenFilePath` | This is a volume mount on the pod with permissions to scrape `/metrics` endpoint of Kubernetes components | `"/var/run/secrets/kubernetes.io/serviceaccount/token"` | +| `clients.https.authenticationMethod.bearerTokenSecret.enabled` | If set to true, the client will use service account credentials to scrape `/metrics` endpoint of Kubernetes components. This method is deprecated by the prometheus operator and may be removed in a future release | `false` | +| `clients.https.authenticationMethod.authorization.enabled` | If set to true, the client will use service account credentials to scrape `/metrics` endpoint of Kubernetes components | `false` | +| `clients.https.authenticationMethod.authorization.type` | If set, the client will use this type of authorization in its client requests for metrics | `"bearer"` | +| `clients.https.authenticationMethod.authorization.credentials.key` | If set, the client will use this key in the secret created by `clients.https.useServiceAccountCredentials` for authorization in its client requests for metrics | `"token"` | +| `clients.https.authenticationMethod.authorization.credentials.optional` | If set to false, the client will fail if the key in the secret created by `clients.https.useServiceAccountCredentials` does not exist | `false` | +| `clients.https.insecureSkipVerify` | If set to true, the client will disable SSL security checks | `false` | +| `clients.https.certDir` | A `hostPath` where TLS certs can be found. This path is mounted as a volume on an `initContainer` which copies only the necessary files over to an EmptyDir volume used by each client. Required and only used if `clients.https.enabled` is set | `""` | +| `clients.https.certFile` | The path to the TLS cert file located within `clients.https.certDir`. Required and only used if `clients.https.enabled` is set | `""` | +| `clients.https.keyFile` | The path to the TLS key file located within `clients.https.certDir`. Required and only used if `clients.https.enabled` is set | `""` | +| `clients.https.caCertFile` | The path to the TLS cacert file located within `clients.https.certDir`. Required and only used if `clients.https.enabled` is set | `""` | +| `clients.https.seLinuxOptions` | seLinuxOptions to be passed into the container that copies certs. Should define a container with permissions to read the files in the certDir provided on the host. Required and only used if `clients.https.enabled` is set and `clients.https.certDir` is provided. | `""` | +| `clients.metrics.enabled` | Whether the client should publish PushProx client-specific metrics. | `false` | +| `clients.rbac.additionalRules` | Additional permissions to provide to the ServiceAccount bound to the client. This can be used to provide additional permissions for the client to scrape metrics from the k8s API. Only enabled if clients.https.enabled and clients.https.useServiceAccountCredentials are true | `[]` | +| `clients.deployment.enabled` | Deploys the client as a Deployment (generally used if the underlying hostNetwork Pod that is being scraped is managed by a Deployment) | `false` | +| `clients.deployment.replicas` | The number of pods the Deployment has, it should match the number of pod the hostNetwork Deployment has. Required and only used if `client.deployment.enable` is set | `0` | +| `clients.deployment.affinity` | The affinity rules that allocate the pod to the node in which the hostNetwork Deployment's pods run. Required and only used if `client.deployment.enable` is set | `{}` | +| `clients.resources` | Set resource limits and requests for the client container | `{}` | +| `clients.nodeSelector` | Select which nodes to deploy the clients on | `{}` | +| `clients.tolerations` | Specify tolerations for clients | `[]` | +| `proxy.enabled` | Deploys the proxy that each client will register with | `true` | +| `proxy.port` | The port exposed by the proxy that each client will register with to allow metrics to be scraped from the host | `8080` | +| `proxy.resources` | Set resource limits and requests for the proxy container | `{}` | +| `proxy.nodeSelector` | Select which nodes the proxy can be deployed on | `{}` | +| `proxy.tolerations` | Specify tolerations (if necessary) to allow the proxy to be deployed on the selected node | `[]` | +| `kubeVersionOverrides` | A list of Semver constraint strings (defined by https://github.com/Masterminds/semver) and values.yaml overrides. For each key in kubeVersionOverrides, this chart will check to see if the current Kubernetes cluster's version matches any of the semver constraints provided as keys on the map. On seeing a match, the default value for each values.yaml field overridden will be updated with the new value. If multiple matches are encountered (due to overlapping semver ranges), the matches will be applied in order. | `[]` + +*Tip: The filepaths set in `clients.https.File` can include wildcard characters*. + +See [rancher-monitoring](https://github.com/rancher/charts/tree/gh-pages/packages/rancher-monitoring) for examples of how this chart can be used. diff --git a/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/rke2Scheduler/templates/_helpers.tpl b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/rke2Scheduler/templates/_helpers.tpl new file mode 100644 index 0000000000..1ba5093944 --- /dev/null +++ b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/rke2Scheduler/templates/_helpers.tpl @@ -0,0 +1,170 @@ +# Rancher + +{{- define "system_default_registry" -}} +{{- if .Values.global.cattle.systemDefaultRegistry -}} +{{- printf "%s/" .Values.global.cattle.systemDefaultRegistry -}} +{{- end -}} +{{- end -}} + +# Windows Support + +{{/* +Windows cluster will add default taint for linux nodes, +add below linux tolerations to workloads could be scheduled to those linux nodes +*/}} + +{{- define "linux-node-tolerations" -}} +- key: "cattle.io/os" + value: "linux" + effect: "NoSchedule" + operator: "Equal" +{{- end -}} + +{{- define "linux-node-selector" -}} +{{- if semverCompare "<1.14-0" .Capabilities.KubeVersion.GitVersion -}} +beta.kubernetes.io/os: linux +{{- else -}} +kubernetes.io/os: linux +{{- end -}} +{{- end -}} + +# General + +{{- define "applyKubeVersionOverrides" -}} +{{- $overrides := dict -}} +{{- range $override := .Values.kubeVersionOverrides -}} +{{- if semverCompare $override.constraint $.Capabilities.KubeVersion.Version -}} +{{- $_ := mergeOverwrite $overrides $override.values -}} +{{- end -}} +{{- end -}} +{{- $_ := mergeOverwrite .Values $overrides -}} +{{- end -}} + +{{- define "pushprox.namespace" -}} + {{- if .Values.namespaceOverride -}} + {{- .Values.namespaceOverride -}} + {{- else -}} + {{- .Release.Namespace -}} + {{- end -}} +{{- end -}} + +{{- define "pushProxy.commonLabels" -}} +release: {{ .Release.Name }} +component: {{ .Values.component | quote }} +provider: kubernetes +{{- end -}} + +{{- define "pushProxy.proxyUrl" -}} +{{- $_ := (required "Template requires either .Values.proxy.port or .Values.client.proxyUrl to set proxyUrl for client" (or .Values.clients.proxyUrl .Values.proxy.port)) -}} +{{- if .Values.clients.proxyUrl -}} +{{ printf "%s" .Values.clients.proxyUrl }} +{{- else -}} +{{ printf "http://%s.%s.svc:%d" (include "pushProxy.proxy.name" .) (include "pushprox.namespace" .) (int .Values.proxy.port) }} +{{- end -}}{{- end -}} + +# Client + +{{- define "pushProxy.client.name" -}} +{{- printf "pushprox-%s-client" (required ".Values.component is required" .Values.component) -}} +{{- end -}} + +{{- define "pushProxy.client.serviceAccountTokenName" -}} +{{- printf "pushprox-%s-client-service-account-token" (required ".Values.component is required" .Values.component) -}} +{{- end -}} + +{{- define "pushProxy.client.labels" -}} +k8s-app: {{ template "pushProxy.client.name" . }} +{{ template "pushProxy.commonLabels" . }} +{{- end -}} + +# Proxy + +{{- define "pushProxy.proxy.name" -}} +{{- printf "pushprox-%s-proxy" (required ".Values.component is required" .Values.component) -}} +{{- end -}} + +{{- define "pushProxy.proxy.labels" -}} +k8s-app: {{ template "pushProxy.proxy.name" . }} +{{ template "pushProxy.commonLabels" . }} +{{- end -}} + +# ServiceMonitor + +{{- define "pushprox.serviceMonitor.name" -}} +{{- printf "%s-%s" .Release.Name (required ".Values.component is required" .Values.component) -}} +{{- end -}} + +{{- define "pushProxy.serviceMonitor.labels" -}} +app: {{ template "pushprox.serviceMonitor.name" . }} +{{ template "pushProxy.commonLabels" . }} +{{- end -}} + +{{- define "pushProxy.serviceMonitor.endpoints" -}} +{{- $proxyURL := (include "pushProxy.proxyUrl" .) -}} +{{- $useHTTPS := .Values.clients.https.enabled -}} +{{- $setHTTPSScheme := .Values.clients.https.forceHTTPSScheme -}} +{{- $insecureSkipVerify := .Values.clients.https.insecureSkipVerify -}} +{{- $useServiceAccountCredentials := .Values.clients.https.useServiceAccountCredentials -}} +{{- $serviceAccountTokenName := (include "pushProxy.client.serviceAccountTokenName" . ) -}} +{{- $metricRelabelings := list }} +{{- $endpoints := .Values.serviceMonitor.endpoints }} +{{- if .Values.proxy.enabled }} +{{- $_ := set . "proxyUrl" $proxyURL }} +{{- end }} +{{- range $endpoints }} +{{- if $.Values.proxy.enabled }} +{{- $_ := set . "proxyUrl" $proxyURL }} +{{- end }} +{{- $clusterIdRelabel := dict }} +{{- $metricRelabelings := list }} +{{- if $.Values.global.cattle.clusterId }} +{{- $_ := set $clusterIdRelabel "action" "replace" }} +{{- $_ := set $clusterIdRelabel "sourceLabels" (list "__address__") }} +{{- $_ := set $clusterIdRelabel "targetLabel" "cluster_id" }} +{{- $_ := set $clusterIdRelabel "replacement" $.Values.global.cattle.clusterId }} +{{- $metricRelabelings = append $metricRelabelings $clusterIdRelabel }} +{{- end }} +{{- $clusterNameRelabel := dict }} +{{- if $.Values.global.cattle.clusterName }} +{{- $_ := set $clusterNameRelabel "action" "replace" }} +{{- $_ := set $clusterNameRelabel "sourceLabels" (list "__address__") }} +{{- $_ := set $clusterNameRelabel "targetLabel" "cluster_name" }} +{{- $_ := set $clusterNameRelabel "replacement" $.Values.global.cattle.clusterName }} +{{- $metricRelabelings = append $metricRelabelings $clusterNameRelabel }} +{{- end }} +{{- if not (empty $metricRelabelings) }} +{{- $_ := set . "metricRelabelings" ($metricRelabelings)}} +{{- end }} +{{- if $setHTTPSScheme -}} +{{- $_ := set . "scheme" "https" }} +{{- end -}} +{{- if $useHTTPS -}} +{{- if (hasKey . "params") }} +{{- $_ := set (get . "params") "_scheme" (list "https") }} +{{- else }} +{{- $_ := set . "params" (dict "_scheme" (list "https")) }} +{{- end }} +{{- end }} +{{- if (hasKey . "tlsConfig") }} +{{- $_ := set (get . "tlsConfig") "insecureSkipVerify" $insecureSkipVerify }} +{{- else }} +{{- $_ := set . "tlsConfig" (dict "insecureSkipVerify" $insecureSkipVerify) }} +{{- end }} +{{- if $.Values.clients.https.authenticationMethod.bearerTokenFile.enabled }} +{{- $_ := set . "bearerTokenFile" $.Values.clients.https.authenticationMethod.bearerTokenFile.bearerTokenFilePath }} +{{- end }} +{{- if $.Values.clients.https.authenticationMethod.bearerTokenSecret.enabled }} +{{- $_ := set . "bearerTokenSecret" $serviceAccountTokenName }} +{{- end }} +{{- if $.Values.clients.https.authenticationMethod.authorization.enabled }} +{{- if (hasKey . "authorization") }} +{{- $_ := set (get . "authorization") "type" $.Values.clients.https.authenticationMethod.authorization.type }} +{{- $_ := set (get . "authorization") "credentials" (dict "name" $serviceAccountTokenName "key" $.Values.clients.https.authenticationMethod.authorization.credentials.key "optional" $.Values.clients.https.authenticationMethod.authorization.credentials.optional) }} +{{- else }} +{{- $_ := set . "authorization" (dict "type" $.Values.clients.https.authenticationMethod.authorization.type) }} +{{- $_ := set . "authorization" (dict "credentials" (dict "name" $serviceAccountTokenName "key" $.Values.clients.https.authenticationMethod.authorization.credentials.key "optional" $.Values.clients.https.authenticationMethod.authorization.credentials.optional)) }} +{{- end }} +{{- end }} +{{- end }} +{{- toYaml $endpoints }} +{{- end -}} diff --git a/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/rke2Scheduler/templates/pushprox-clients-rbac.yaml b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/rke2Scheduler/templates/pushprox-clients-rbac.yaml new file mode 100644 index 0000000000..a8e27c3735 --- /dev/null +++ b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/rke2Scheduler/templates/pushprox-clients-rbac.yaml @@ -0,0 +1,97 @@ +{{- template "applyKubeVersionOverrides" . -}} +{{- if .Values.clients }}{{- if .Values.clients.enabled }} +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRole +metadata: + name: {{ template "pushProxy.client.name" . }} + labels: {{ include "pushProxy.client.labels" . | nindent 4 }} +rules: +{{- if .Values.global.cattle.psp.enabled }} +- apiGroups: ['policy'] + resources: ['podsecuritypolicies'] + verbs: ['use'] + resourceNames: + - {{ template "pushProxy.client.name" . }} +{{- end }} +{{- if and .Values.clients.https.enabled .Values.clients.https.useServiceAccountCredentials }} +- nonResourceURLs: ["/metrics"] + verbs: ["get"] +{{- if .Values.clients.rbac.additionalRules }} +{{ toYaml .Values.clients.rbac.additionalRules }} +{{- end }} +{{- end }} +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRoleBinding +metadata: + name: {{ template "pushProxy.client.name" . }} + labels: {{ include "pushProxy.client.labels" . | nindent 4 }} +roleRef: + apiGroup: rbac.authorization.k8s.io + kind: ClusterRole + name: {{ template "pushProxy.client.name" . }} +subjects: + - kind: ServiceAccount + name: {{ template "pushProxy.client.name" . }} + namespace: {{ include "pushprox.namespace" . }} +--- +apiVersion: v1 +kind: ServiceAccount +metadata: + name: {{ template "pushProxy.client.name" . }} + namespace: {{ include "pushprox.namespace" . }} + labels: {{ include "pushProxy.client.labels" . | nindent 4 }} +--- +{{- if .Values.clients.https.useServiceAccountCredentials }} +apiVersion: v1 +kind: Secret +type: kubernetes.io/service-account-token +metadata: + name: {{ template "pushProxy.client.serviceAccountTokenName" . }} + namespace: {{ include "pushprox.namespace" . }} + labels: {{ include "pushProxy.client.labels" . | nindent 4 }} + annotations: + kubernetes.io/service-account.name: {{ template "pushProxy.client.name" . }} +{{- end }} +--- +{{- if .Values.global.cattle.psp.enabled }} +apiVersion: policy/v1beta1 +kind: PodSecurityPolicy +metadata: + name: {{ template "pushProxy.client.name" . }} + namespace: {{ include "pushprox.namespace" . }} + labels: {{ include "pushProxy.client.labels" . | nindent 4 }} +spec: + privileged: false + hostNetwork: true + hostIPC: false + hostPID: false + runAsUser: + rule: 'RunAsAny' + seLinux: + rule: 'RunAsAny' +{{- if and .Values.clients.https.enabled .Values.clients.https.certDir .Values.global.seLinux.enabled .Values.clients.https.seLinuxOptions }} + seLinuxOptions: {{ .Values.clients.https.seLinuxOptions | toYaml | nindent 6 }} +{{- end }} + supplementalGroups: + rule: 'MustRunAs' + ranges: + - min: 0 + max: 65535 + fsGroup: + rule: 'MustRunAs' + ranges: + - min: 0 + max: 65535 + readOnlyRootFilesystem: false + volumes: + - 'secret' +{{- if and .Values.clients.https.enabled .Values.clients.https.certDir }} + - 'emptyDir' + - 'hostPath' + allowedHostPaths: + - pathPrefix: {{ required "Need access to volume on host with the SSL cert files to use HTTPs" .Values.clients.https.certDir }} + readOnly: true +{{- end }} +{{- end }} +{{- end }}{{- end }} diff --git a/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/rke2Scheduler/templates/pushprox-clients.yaml b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/rke2Scheduler/templates/pushprox-clients.yaml new file mode 100644 index 0000000000..e8fcfb3883 --- /dev/null +++ b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/rke2Scheduler/templates/pushprox-clients.yaml @@ -0,0 +1,157 @@ +{{- template "applyKubeVersionOverrides" . -}} +{{- if .Values.clients }}{{- if .Values.clients.enabled }} +apiVersion: apps/v1 +{{- if .Values.clients.deployment.enabled }} +kind: Deployment +{{- else }} +kind: DaemonSet +{{- end }} +metadata: + name: {{ template "pushProxy.client.name" . }} + namespace: {{ template "pushprox.namespace" . }} + labels: {{ include "pushProxy.client.labels" . | nindent 4 }} + pushprox-exporter: "client" +spec: + {{- if .Values.clients.deployment.enabled }} + replicas: {{ .Values.clients.deployment.replicas }} + {{- end }} + selector: + matchLabels: {{ include "pushProxy.client.labels" . | nindent 6 }} + template: + metadata: + labels: {{ include "pushProxy.client.labels" . | nindent 8 }} + spec: + {{- if .Values.clients.affinity }} + affinity: {{ toYaml .Values.clients.affinity | nindent 8 }} + {{- end }} + nodeSelector: {{ include "linux-node-selector" . | nindent 8 }} +{{- if .Values.clients.nodeSelector }} +{{ toYaml .Values.clients.nodeSelector | indent 8 }} +{{- end }} + tolerations: {{ include "linux-node-tolerations" . | nindent 8 }} +{{- if .Values.clients.tolerations }} +{{ toYaml .Values.clients.tolerations | indent 8 }} +{{- end }} + hostNetwork: true + dnsPolicy: ClusterFirstWithHostNet + serviceAccountName: {{ template "pushProxy.client.name" . }} + {{- if .Values.global.imagePullSecretName }} + imagePullSecrets: + - name: {{ .Values.global.imagePullSecretName }} + {{- end }} + containers: + - name: pushprox-client + image: {{ template "system_default_registry" . }}{{ .Values.clients.image.repository }}:{{ .Values.clients.image.tag }} + command: + {{- range .Values.clients.command }} + - {{ . | quote }} + {{- end }} + args: + - --fqdn=$(HOST_IP) + - --proxy-url=$(PROXY_URL) + {{- if .Values.clients.metrics.enabled }} + - --metrics-addr=$(PORT) + {{- end }} + - --allow-port={{ required "Need .Values.metricsPort to configure client to be allowed to scrape metrics at port" .Values.metricsPort}} + {{- if .Values.clients.useLocalhost }} + - --use-localhost + {{- end }} + {{- if .Values.clients.https.enabled }} + {{- if .Values.clients.https.insecureSkipVerify }} + - --insecure-skip-verify + {{- end }} + {{- if .Values.clients.https.useServiceAccountCredentials }} + - --token-path=/var/run/secrets/kubernetes.io/serviceaccount/token + {{- end }} + {{- if .Values.clients.https.certDir }} + - --tls.cert=/etc/ssl/push-proxy/push-proxy.pem + - --tls.key=/etc/ssl/push-proxy/push-proxy-key.pem + - --tls.cacert=/etc/ssl/push-proxy/push-proxy-ca-cert.pem + {{- end }} + {{- end }} + env: + - name: HOST_IP + valueFrom: + fieldRef: + fieldPath: status.hostIP + {{- if .Values.clients.metrics.enabled }} + - name: PORT + value: :{{ .Values.clients.port }} + {{- end }} + - name: PROXY_URL + value: {{ template "pushProxy.proxyUrl" . }} + securityContext: + runAsNonRoot: true + runAsUser: 1000 + {{- if and .Values.clients.https.enabled .Values.clients.https.certDir }} + volumeMounts: + - name: metrics-cert-dir + mountPath: /etc/ssl/push-proxy + {{- end }} + {{- if .Values.clients.resources }} + resources: {{ toYaml .Values.clients.resources | nindent 10 }} + {{- end }} + {{- if and .Values.clients.https.enabled .Values.clients.https.certDir }} + initContainers: + - name: copy-certs + image: {{ template "system_default_registry" . }}{{ .Values.clients.copyCertsImage.repository }}:{{ .Values.clients.copyCertsImage.tag }} + command: + - sh + - -c + - | + echo "Searching for files to copy within the source volume" + echo "cert: ${CERT_FILE_NAME}" + echo "key: ${KEY_FILE_NAME}" + echo "cacert: ${CACERT_FILE_NAME}" + + CERT_FILE_SOURCE=$(find /etc/source/ -type f -name "${CERT_FILE_NAME}" | sort -r | head -n 1) + KEY_FILE_SOURCE=$(find /etc/source/ -type f -name "${KEY_FILE_NAME}" | sort -r | head -n 1) + CACERT_FILE_SOURCE=$(find /etc/source/ -type f -name "${CACERT_FILE_NAME}" | sort -r | head -n 1) + + test -z ${CERT_FILE_SOURCE} && echo "Failed to find cert file" && exit 1 + test -z ${KEY_FILE_SOURCE} && echo "Failed to find key file" && exit 1 + test -z ${CACERT_FILE_SOURCE} && echo "Failed to find cacert file" && exit 1 + + echo "Copying cert file from $CERT_FILE_SOURCE to $CERT_FILE_TARGET" + cp $CERT_FILE_SOURCE $CERT_FILE_TARGET || exit 1 + chmod 444 $CERT_FILE_TARGET || exit 1 + + echo "Copying key file from $KEY_FILE_SOURCE to $KEY_FILE_TARGET" + cp $KEY_FILE_SOURCE $KEY_FILE_TARGET || exit 1 + chmod 444 $KEY_FILE_TARGET || exit 1 + + echo "Copying cacert file from $CACERT_FILE_SOURCE to $CACERT_FILE_TARGET" + cp $CACERT_FILE_SOURCE $CACERT_FILE_TARGET || exit 1 + chmod 444 $CACERT_FILE_TARGET || exit 1 + env: + - name: CERT_FILE_NAME + value: {{ required "Need a TLS cert file for scraping metrics endpoint over HTTPs" .Values.clients.https.certFile }} + - name: KEY_FILE_NAME + value: {{ required "Need a TLS key file for scraping metrics endpoint over HTTPs" .Values.clients.https.keyFile }} + - name: CACERT_FILE_NAME + value: {{ required "Need a TLS CA cert file for scraping metrics endpoint over HTTPs" .Values.clients.https.caCertFile }} + - name: CERT_FILE_TARGET + value: /etc/ssl/push-proxy/push-proxy.pem + - name: KEY_FILE_TARGET + value: /etc/ssl/push-proxy/push-proxy-key.pem + - name: CACERT_FILE_TARGET + value: /etc/ssl/push-proxy/push-proxy-ca-cert.pem + securityContext: + runAsNonRoot: false +{{- if and .Values.global.seLinux.enabled .Values.clients.https.seLinuxOptions }} + seLinuxOptions: {{ .Values.clients.https.seLinuxOptions | toYaml | nindent 12 }} +{{- end }} + volumeMounts: + - name: metrics-cert-dir-source + mountPath: /etc/source + readOnly: true + - name: metrics-cert-dir + mountPath: /etc/ssl/push-proxy + volumes: + - name: metrics-cert-dir-source + hostPath: + path: {{ required "Need access to volume on host with the SSL cert files to use HTTPs" .Values.clients.https.certDir }} + - name: metrics-cert-dir + emptyDir: {} + {{- end }} +{{- end }}{{- end }} diff --git a/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/rke2Scheduler/templates/pushprox-proxy-rbac.yaml b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/rke2Scheduler/templates/pushprox-proxy-rbac.yaml new file mode 100644 index 0000000000..eefe609058 --- /dev/null +++ b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/rke2Scheduler/templates/pushprox-proxy-rbac.yaml @@ -0,0 +1,68 @@ +{{- template "applyKubeVersionOverrides" . -}} +{{- if and .Values.proxy }}{{ if .Values.proxy.enabled }} +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRole +metadata: + name: {{ template "pushProxy.proxy.name" . }} + labels: {{ include "pushProxy.proxy.labels" . | nindent 4 }} +rules: +{{- if .Values.global.cattle.psp.enabled }} +- apiGroups: ['policy'] + resources: ['podsecuritypolicies'] + verbs: ['use'] + resourceNames: + - {{ template "pushProxy.proxy.name" . }} +{{- end }} +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRoleBinding +metadata: + name: {{ template "pushProxy.proxy.name" . }} + labels: {{ include "pushProxy.proxy.labels" . | nindent 4 }} +roleRef: + apiGroup: rbac.authorization.k8s.io + kind: ClusterRole + name: {{ template "pushProxy.proxy.name" . }} +subjects: + - kind: ServiceAccount + name: {{ template "pushProxy.proxy.name" . }} + namespace: {{ include "pushprox.namespace" . }} +--- +apiVersion: v1 +kind: ServiceAccount +metadata: + name: {{ template "pushProxy.proxy.name" . }} + namespace: {{ include "pushprox.namespace" . }} + labels: {{ include "pushProxy.proxy.labels" . | nindent 4 }} +--- +{{- if .Values.global.cattle.psp.enabled }} +apiVersion: policy/v1beta1 +kind: PodSecurityPolicy +metadata: + name: {{ template "pushProxy.proxy.name" . }} + namespace: {{ include "pushprox.namespace" . }} + labels: {{ include "pushProxy.proxy.labels" . | nindent 4 }} +spec: + privileged: false + hostNetwork: false + hostIPC: false + hostPID: false + runAsUser: + rule: 'MustRunAsNonRoot' + seLinux: + rule: 'RunAsAny' + supplementalGroups: + rule: 'MustRunAs' + ranges: + - min: 1 + max: 65535 + fsGroup: + rule: 'MustRunAs' + ranges: + - min: 1 + max: 65535 + readOnlyRootFilesystem: false + volumes: + - 'secret' +{{- end }}{{- end }} +{{- end }} diff --git a/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/rke2Scheduler/templates/pushprox-proxy.yaml b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/rke2Scheduler/templates/pushprox-proxy.yaml new file mode 100644 index 0000000000..723bbd6c00 --- /dev/null +++ b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/rke2Scheduler/templates/pushprox-proxy.yaml @@ -0,0 +1,57 @@ +{{- template "applyKubeVersionOverrides" . -}} +{{- if and .Values.proxy }}{{ if .Values.proxy.enabled }} +apiVersion: apps/v1 +kind: Deployment +metadata: + name: {{ template "pushProxy.proxy.name" . }} + namespace: {{ template "pushprox.namespace" . }} + labels: {{ include "pushProxy.proxy.labels" . | nindent 4 }} + pushprox-exporter: "proxy" +spec: + selector: + matchLabels: {{ include "pushProxy.proxy.labels" . | nindent 6 }} + template: + metadata: + labels: {{ include "pushProxy.proxy.labels" . | nindent 8 }} + spec: + securityContext: + runAsNonRoot: true + runAsUser: 1000 + nodeSelector: {{ include "linux-node-selector" . | nindent 8 }} +{{- if .Values.proxy.nodeSelector }} +{{ toYaml .Values.proxy.nodeSelector | indent 8 }} +{{- end }} + tolerations: {{ include "linux-node-tolerations" . | nindent 8 }} +{{- if .Values.proxy.tolerations }} +{{ toYaml .Values.proxy.tolerations | indent 8 }} +{{- end }} + serviceAccountName: {{ template "pushProxy.proxy.name" . }} + {{- if .Values.global.imagePullSecretName }} + imagePullSecrets: + - name: {{ .Values.global.imagePullSecretName }} + {{- end }} + containers: + - name: pushprox-proxy + image: {{ template "system_default_registry" . }}{{ .Values.proxy.image.repository }}:{{ .Values.proxy.image.tag }} + command: + {{- range .Values.proxy.command }} + - {{ . | quote }} + {{- end }} + {{- if .Values.proxy.resources }} + resources: {{ toYaml .Values.proxy.resources | nindent 10 }} + {{- end }} +--- +apiVersion: v1 +kind: Service +metadata: + name: {{ template "pushProxy.proxy.name" . }} + namespace: {{ template "pushprox.namespace" . }} + labels: {{ include "pushProxy.proxy.labels" . | nindent 4 }} +spec: + ports: + - name: pp-proxy + port: {{ required "Need .Values.proxy.port to configure proxy" .Values.proxy.port }} + protocol: TCP + targetPort: {{ .Values.proxy.port }} + selector: {{ include "pushProxy.proxy.labels" . | nindent 4 }} +{{- end }}{{- end }} diff --git a/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/rke2Scheduler/templates/pushprox-servicemonitor.yaml b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/rke2Scheduler/templates/pushprox-servicemonitor.yaml new file mode 100644 index 0000000000..67eb2216b5 --- /dev/null +++ b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/rke2Scheduler/templates/pushprox-servicemonitor.yaml @@ -0,0 +1,45 @@ +{{- template "applyKubeVersionOverrides" . -}} +{{- if .Values.serviceMonitor }}{{- if .Values.serviceMonitor.enabled }} +apiVersion: monitoring.coreos.com/v1 +kind: ServiceMonitor +metadata: + name: {{ template "pushprox.serviceMonitor.name" . }} + namespace: {{ template "pushprox.namespace" . }} + labels: {{ include "pushProxy.serviceMonitor.labels" . | nindent 4 }} +spec: + endpoints: {{include "pushProxy.serviceMonitor.endpoints" . | nindent 4 }} + jobLabel: component + podTargetLabels: + - component + - pushprox-exporter + namespaceSelector: + matchNames: + - {{ template "pushprox.namespace" . }} + selector: + matchLabels: {{ include "pushProxy.client.labels" . | nindent 6 }} +--- +{{- $selector := "" }} +{{- if not (kindIs "invalid" .Values.service) }} +{{- if not (kindIs "invalid" .Values.service.selector) }} +{{ if .Values.service.selector }} +{{- if .Values.clients.enabled }} +{{- required (printf "Cannot override .Values.service.selector=%s when .Values.clients.enabled=true" (toJson .Values.service.selector)) "" }} +{{- end }} +{{- $selector = (toYaml .Values.service.selector) }} +{{- end }} +{{- end }} +{{- end }} +apiVersion: v1 +kind: Service +metadata: + name: {{ template "pushProxy.client.name" . }} + namespace: {{ template "pushprox.namespace" . }} + labels: {{ include "pushProxy.client.labels" . | nindent 4 }} +spec: + ports: + - name: metrics + port: {{ required "Need .Values.metricsPort to configure client to listen to metrics at port" .Values.metricsPort}} + protocol: TCP + targetPort: {{ .Values.metricsPort }} + selector: {{ default (include "pushProxy.client.labels" .) $selector | nindent 4 }} +{{- end }}{{- end }} diff --git a/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/rke2Scheduler/templates/validate-install-crd.yaml b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/rke2Scheduler/templates/validate-install-crd.yaml new file mode 100644 index 0000000000..16abc2fa83 --- /dev/null +++ b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/rke2Scheduler/templates/validate-install-crd.yaml @@ -0,0 +1,14 @@ +#{{- if gt (len (lookup "rbac.authorization.k8s.io/v1" "ClusterRole" "" "")) 0 -}} +# {{- $found := dict -}} +# {{- set $found "monitoring.coreos.com/v1/ServiceMonitor" false -}} +# {{- range .Capabilities.APIVersions -}} +# {{- if hasKey $found (toString .) -}} +# {{- set $found (toString .) true -}} +# {{- end -}} +# {{- end -}} +# {{- range $_, $exists := $found -}} +# {{- if (eq $exists false) -}} +# {{- required "Required CRDs are missing. Please install Prometheus Operator CRDs before installing this chart." "" -}} +# {{- end -}} +# {{- end -}} +#{{- end -}} diff --git a/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/rke2Scheduler/templates/validate-psp-install.yaml b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/rke2Scheduler/templates/validate-psp-install.yaml new file mode 100644 index 0000000000..a30c59d3b7 --- /dev/null +++ b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/rke2Scheduler/templates/validate-psp-install.yaml @@ -0,0 +1,7 @@ +#{{- if gt (len (lookup "rbac.authorization.k8s.io/v1" "ClusterRole" "" "")) 0 -}} +#{{- if .Values.global.cattle.psp.enabled }} +#{{- if not (.Capabilities.APIVersions.Has "policy/v1beta1/PodSecurityPolicy") }} +#{{- fail "The target cluster does not have the PodSecurityPolicy API resource. Please disable PSPs in this chart before proceeding." -}} +#{{- end }} +#{{- end }} +#{{- end }} diff --git a/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/rke2Scheduler/values.yaml b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/rke2Scheduler/values.yaml new file mode 100644 index 0000000000..1e076041b3 --- /dev/null +++ b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/rke2Scheduler/values.yaml @@ -0,0 +1,166 @@ +# Default values for rancher-pushprox. +# This is a YAML-formatted file. +# Declare variables to be passed into your templates. + +# Default image containing both the proxy and the client was generated from the following Dockerfile +# https://github.com/prometheus-community/PushProx/blob/eeadbe766641699129920ccfaaaa30a85c67fe81/Dockerfile#L1-L15 + +# Configuration + +global: + cattle: + psp: + enabled: false + systemDefaultRegistry: "" + seLinux: + enabled: false + +# A list of Semver constraint strings (defined by https://github.com/Masterminds/semver) and values.yaml overrides. +# +# For each key in kubeVersionOverrides, this chart will check to see if the current Kubernetes cluster's version matches +# any of the semver constraints provided as keys on the map. +# +# On seeing a match, the default value for each values.yaml field overridden will be updated with the new value. +# +# If multiple matches are encountered (due to overlapping semver ranges), the matches will be applied in order. +# +# Notes: +# - On running a helm template, Helm generally assumes the kubeVersion is v1.20.0 +# - On running a helm install --dry-run, the correct kubeVersion should be chosen. +kubeVersionOverrides: [] +# - constraint: "< 1.21" +# values: +# metricsPort: 10252 +# clients: +# https: +# enabled: false +# insecureSkipVerify: false +# useServiceAccountCredentials: false + +namespaceOverride: "" + +# The component that is being monitored (i.e. etcd) +component: "component" + +# The port containing the metrics that need to be scraped +metricsPort: 2739 + +# Configure ServiceMonitor that monitors metrics from the metricsPort endpoint +serviceMonitor: + enabled: true + # A list of endpoints that will be added to the ServiceMonitor based on the Endpoint spec + # Source: https://github.com/prometheus-operator/prometheus-operator/blob/master/Documentation/api.md#endpoint + # By default, proxyUrl and params._scheme will be overridden based on other values + endpoints: + - port: metrics + +# Configure Service that grabs scrape targets +service: + # The selector that is used to populate the Service's Endpoints object. + # The chart will error out on rendering templating if .Values.clients.enabled is set alongside this field, + # since it is expected that this service should point to the PushProx Clients Daemonset / Deployment + selector: {} + +clients: + enabled: true + # The port which the PushProx client will post PushProx metrics to + port: 9369 + # If unset, this will default to the URL for the proxy service: http://pushprox-{{component}}-proxy.{{namepsace}}.svc.cluster.local:{{proxy.port}} + # Should be modified if the clients are being deployed outside the cluster where the proxy rests, otherwise leave it null + proxyUrl: "" + # If set to true, the client will forward any requests from the host IP to 127.0.0.1 + # It will only allow proxy requests to the metricsPort specified + useLocalhost: false + # Configuration for accessing metrics via HTTPS + https: + # Does the client require https to access the metrics? + enabled: false + # Does the client require requests be sent to http or https? + forceHTTPSScheme: false + # If set to true, the client will create a service account with adequate permissions and set a flag + # on the client to use the service account token provided by it to make authorized scrape requests + useServiceAccountCredentials: false + # Configuration for authentication to metrics via https endpoint + authenticationMethod: + # Reads token from defined file in container + # This function is deprecated in the prometheus operator api and may be removed in a future version + bearerTokenFile: + enabled: false + bearerTokenFilePath: "/var/run/secrets/kubernetes.io/serviceaccount/token" + # Reads token from defined secret in namespace + # This function is deprecated in the prometheus operator api and may be removed in a future version + bearerTokenSecret: + enabled: false + # Reads token from defined secret in namespace + authorization: + enabled: false + type: "bearer" + credentials: + key: "token" + optional: false + # If set to true, the client will disable SSL security checks + insecureSkipVerify: false + # Directory on host where necessary TLS cert and key to scrape metrics can be found + certDir: "" + # Filenames for files located in .Values.clients.https.certDir that correspond to TLS settings + certFile: "" + keyFile: "" + caCertFile: "" + # seLinuxOptions to be passed into the container that copies certs. Should define a container with permissions to read the files in the certDir provided on the host. + # Required and only used if `clients.https.enabled` is set and `clients.https.certDir` is provided. + seLinuxOptions: {} + + metrics: + # Whether the client should publish PushProx client-specific metrics to .Values.clients.port + enabled: false + + rbac: + # Additional permissions to provide to the ServiceAccount bound to the client + # This can be used to provide additional permissions for the client to scrape metrics from the k8s API + # Only enabled if clients.https.enabled and clients.https.useServiceAccountCredentials are true + additionalRules: [] + + # Resource limits + resources: {} + + # Options to select all nodes to deploy client DaemonSet on + nodeSelector: {} + tolerations: [] + affinity: {} + + image: + repository: rancher/pushprox-client + tag: v0.1.3-rancher2-client + command: ["pushprox-client"] + + copyCertsImage: + repository: rancher/mirrored-library-busybox + tag: 1.31.1 + + # The default intention of rancher-pushprox clients is to scrape hostNetwork metrics across all nodes. + # This can be used to scrape internal Kubernetes components or DaemonSets of hostNetwork Pods in + # situations where a cloud provider firewall prevents Pod-To-Host communication but not Pod-To-Pod. + # However, if the underlying hostNetwork Pod that is being scraped is managed by a Deployment, + # this advanced option enables users to deploy the client as a Deployment instead of a DaemonSet. + # If a user deploys this feature and the underlying Deployment's number of replicas changes, the user will + # be responsible for upgrading this chart accordingly to the right number of replicas. + deployment: + enabled: false + replicas: 0 + +proxy: + enabled: true + # The port through which PushProx clients will communicate to the proxy + port: 8080 + + # Resource limits + resources: {} + + # Options to select a node to run a single proxy deployment on + nodeSelector: {} + tolerations: [] + + image: + repository: rancher/pushprox-proxy + tag: v0.1.3-rancher2-proxy + command: ["pushprox-proxy"] diff --git a/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/rkeControllerManager/.helmignore b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/rkeControllerManager/.helmignore new file mode 100644 index 0000000000..0e8a0eb36f --- /dev/null +++ b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/rkeControllerManager/.helmignore @@ -0,0 +1,23 @@ +# Patterns to ignore when building packages. +# This supports shell glob matching, relative path matching, and +# negation (prefixed with !). Only one pattern per line. +.DS_Store +# Common VCS dirs +.git/ +.gitignore +.bzr/ +.bzrignore +.hg/ +.hgignore +.svn/ +# Common backup files +*.swp +*.bak +*.tmp +*.orig +*~ +# Various IDEs +.project +.idea/ +*.tmproj +.vscode/ diff --git a/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/rkeControllerManager/Chart.yaml b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/rkeControllerManager/Chart.yaml new file mode 100644 index 0000000000..df00a46b66 --- /dev/null +++ b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/rkeControllerManager/Chart.yaml @@ -0,0 +1,15 @@ +annotations: + catalog.cattle.io/hidden: "true" + catalog.cattle.io/kube-version: '>= 1.26.0-0 < 1.31.0-0' + catalog.cattle.io/os: linux + catalog.rancher.io/certified: rancher + catalog.rancher.io/namespace: cattle-monitoring-system + catalog.rancher.io/release-name: rancher-pushprox +apiVersion: v1 +appVersion: 0.1.0 +description: Sets up a deployment of the PushProx proxy and a DaemonSet of PushProx + clients. +kubeVersion: '>=1.26.0-0' +name: rkeControllerManager +type: application +version: 0.2.0 diff --git a/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/rkeControllerManager/README.md b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/rkeControllerManager/README.md new file mode 100644 index 0000000000..345002f48a --- /dev/null +++ b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/rkeControllerManager/README.md @@ -0,0 +1,90 @@ +# rancher-pushprox + +A Rancher chart based on Rancher [PushProx](https://github.com/rancher/PushProx) that sets up a Deployment of a PushProx proxy and a DaemonSet of PushProx clients on a Kubernetes cluster. + +Installs [rancher-pushprox](https://github.com/rancher/charts/tree/gh-pages/packages/rancher-pushprox) to create PushProx clients that can access their host's network and register with a PushProx proxy. A [Prometheus Operator](https://github.com/coreos/prometheus-operator) ServiceMonitor CR is also included that is configured to scrape the metrics from each of the clients through the proxy. + +Using an instance of this chart is suitable for the following scenarios: +- You need to scrape metrics from a port that should not be accessible outside of the host (e.g. scraping `etcd` metrics in a hardened cluster) +- You need to scrape metrics on a host that are not exposed outside of 127.0.0.1 (e.g. scraping `kube-proxy` metrics) +- You need to scrape metrics through HTTPS using certs hosted directly on `hostPath` +- You need to scrape metrics from Kubernetes components that require authorization via a service account (e.g. permissions to make request to `/metrics`) +- You need to scrape metrics without access to cacerts (i.e. enable `insecureSkipVerify`) + +The clients and proxy are created based on a Rancher fork of the [prometheus-community/PushProx](https://github.com/prometheus-community/PushProx) project. + +## Upgrading to Kubernetes v1.25+ + +Starting in Kubernetes v1.25, [Pod Security Policies](https://kubernetes.io/docs/concepts/security/pod-security-policy/) have been removed from the Kubernetes API. + +As a result, **before upgrading to Kubernetes v1.25** (or on a fresh install in a Kubernetes v1.25+ cluster), users are expected to perform an in-place upgrade of this chart with `global.cattle.psp.enabled` set to `false` if it has been previously set to `true`. +​ +> **Note:** +> In this chart release, any previous field that was associated with any PSP resources have been removed in favor of a single global field: `global.cattle.psp.enabled`. + +> **Note:** +> If you upgrade your cluster to Kubernetes v1.25+ before removing PSPs via a `helm upgrade` (even if you manually clean up resources), **it will leave the Helm release in a broken state within the cluster such that further Helm operations will not work (`helm uninstall`, `helm upgrade`, etc.).** +> +> If your charts get stuck in this state, please consult the Rancher docs on how to clean up your Helm release secrets. + +Upon setting `global.cattle.psp.enabled` to false, the chart will remove any PSP resources deployed on its behalf from the cluster. This is the default setting for this chart. + +As a replacement for PSPs, [Pod Security Admission](https://kubernetes.io/docs/concepts/security/pod-security-admission/) should be used. Please consult the Rancher docs for more details on how to configure your chart release namespaces to work with the new Pod Security Admission and apply Pod Security Standards. + +## Configuration + +The following tables list the configurable parameters of the rancher-pushprox chart and their default values. + +### General + +#### Required +| Parameter | Description | Example | +| ----- | ----------- | ------ | +| `component` | The component that is being monitored | `kube-etcd` +| `metricsPort` | The port on the host that contains the metrics you want to scrape (e.g. `http://:/metrics`) | `2379` | +| `namespaceOverride` | The namespace to install the chart | `""` + +#### Optional +| Parameter | Description | Default | +| ----- | ----------- | ------ | +| `serviceMonitor.enabled` | Deploys a [Prometheus Operator](https://github.com/coreos/prometheus-operator/blob/master/Documentation/api.md#servicemonitor) ServiceMonitor CR that is configured to scrape metrics on the hosts that the clients are deployed on via the proxy. Also deploys a Service that points to all pods with the expected client name that exposes the `metricsPort` selected | `true` | +| `serviceMonitor.endpoints` | A list of endpoints that will be added to the ServiceMonitor based on the [Endpoint spec](https://github.com/prometheus-operator/prometheus-operator/blob/master/Documentation/api.md#endpoint) | `[{port: metrics}]` | +| `service.selector` | The selector that is used to populate the Service's Endpoints object. The chart will error out on rendering templating if `.Values.clients.enabled` is set alongside this field, since it is expected that this service should point to the PushProx Clients Daemonset / Deployment | `{}` | +| `clients.enabled` | Deploys a DaemonSet of clients that are each capable of scraping endpoints on the hostNetwork it is deployed on | `true` | +| `clients.port` | The port where the client will publish PushProx client-specific metrics. If deploying multiple clients onto the same node, the clients should not have conflicting ports | `9369` | +| `clients.proxyUrl` | Overrides the default proxyUrl setting of `http://pushprox-{{ .Values.component }}-proxy.{{ . Release.Namespace }}.svc.cluster.local:{{ .Values.proxy.port }}"` with the `proxyUrl` specified | `""` | +| `clients.useLocalhost` | Sets a flag on each client deployment to redirect scrapes directed to `HOST_IP` to `127.0.0.1` | `false` | +| `clients.https.enabled` | Enables scraping metrics via HTTPS using the provided TLS certs that exist on each host | `false` | +| `clients.https.forceHTTPSScheme` | Forces scraping metrics via HTTPS using the provided TLS certs that exist on each host | `false` | +| `clients.https.useServiceAccountCredentials` | If set to true, the client will create a service account with permissions to scrape `/metrics` endpoint of Kubernetes components. The client will use the service account token provided to make authorized scrape requests to the Kubernetes API | `false` | +| `clients.https.authenticationMethod.bearerTokenFile.enabled` | If set to true, the client will use service account credentials mounted at the configured path `clients.https.authenticationMethod.bearerTokenFile.bearerTokenFilePath`. This requires permissions to scrape `/metrics` endpoint of Kubernetes components. This method is deprecated by the prometheus operator and may be removed in a future release | `false` | +| `clients.https.authenticationMethod.bearerTokenFile.bearerTokenFilePath` | This is a volume mount on the pod with permissions to scrape `/metrics` endpoint of Kubernetes components | `"/var/run/secrets/kubernetes.io/serviceaccount/token"` | +| `clients.https.authenticationMethod.bearerTokenSecret.enabled` | If set to true, the client will use service account credentials to scrape `/metrics` endpoint of Kubernetes components. This method is deprecated by the prometheus operator and may be removed in a future release | `false` | +| `clients.https.authenticationMethod.authorization.enabled` | If set to true, the client will use service account credentials to scrape `/metrics` endpoint of Kubernetes components | `false` | +| `clients.https.authenticationMethod.authorization.type` | If set, the client will use this type of authorization in its client requests for metrics | `"bearer"` | +| `clients.https.authenticationMethod.authorization.credentials.key` | If set, the client will use this key in the secret created by `clients.https.useServiceAccountCredentials` for authorization in its client requests for metrics | `"token"` | +| `clients.https.authenticationMethod.authorization.credentials.optional` | If set to false, the client will fail if the key in the secret created by `clients.https.useServiceAccountCredentials` does not exist | `false` | +| `clients.https.insecureSkipVerify` | If set to true, the client will disable SSL security checks | `false` | +| `clients.https.certDir` | A `hostPath` where TLS certs can be found. This path is mounted as a volume on an `initContainer` which copies only the necessary files over to an EmptyDir volume used by each client. Required and only used if `clients.https.enabled` is set | `""` | +| `clients.https.certFile` | The path to the TLS cert file located within `clients.https.certDir`. Required and only used if `clients.https.enabled` is set | `""` | +| `clients.https.keyFile` | The path to the TLS key file located within `clients.https.certDir`. Required and only used if `clients.https.enabled` is set | `""` | +| `clients.https.caCertFile` | The path to the TLS cacert file located within `clients.https.certDir`. Required and only used if `clients.https.enabled` is set | `""` | +| `clients.https.seLinuxOptions` | seLinuxOptions to be passed into the container that copies certs. Should define a container with permissions to read the files in the certDir provided on the host. Required and only used if `clients.https.enabled` is set and `clients.https.certDir` is provided. | `""` | +| `clients.metrics.enabled` | Whether the client should publish PushProx client-specific metrics. | `false` | +| `clients.rbac.additionalRules` | Additional permissions to provide to the ServiceAccount bound to the client. This can be used to provide additional permissions for the client to scrape metrics from the k8s API. Only enabled if clients.https.enabled and clients.https.useServiceAccountCredentials are true | `[]` | +| `clients.deployment.enabled` | Deploys the client as a Deployment (generally used if the underlying hostNetwork Pod that is being scraped is managed by a Deployment) | `false` | +| `clients.deployment.replicas` | The number of pods the Deployment has, it should match the number of pod the hostNetwork Deployment has. Required and only used if `client.deployment.enable` is set | `0` | +| `clients.deployment.affinity` | The affinity rules that allocate the pod to the node in which the hostNetwork Deployment's pods run. Required and only used if `client.deployment.enable` is set | `{}` | +| `clients.resources` | Set resource limits and requests for the client container | `{}` | +| `clients.nodeSelector` | Select which nodes to deploy the clients on | `{}` | +| `clients.tolerations` | Specify tolerations for clients | `[]` | +| `proxy.enabled` | Deploys the proxy that each client will register with | `true` | +| `proxy.port` | The port exposed by the proxy that each client will register with to allow metrics to be scraped from the host | `8080` | +| `proxy.resources` | Set resource limits and requests for the proxy container | `{}` | +| `proxy.nodeSelector` | Select which nodes the proxy can be deployed on | `{}` | +| `proxy.tolerations` | Specify tolerations (if necessary) to allow the proxy to be deployed on the selected node | `[]` | +| `kubeVersionOverrides` | A list of Semver constraint strings (defined by https://github.com/Masterminds/semver) and values.yaml overrides. For each key in kubeVersionOverrides, this chart will check to see if the current Kubernetes cluster's version matches any of the semver constraints provided as keys on the map. On seeing a match, the default value for each values.yaml field overridden will be updated with the new value. If multiple matches are encountered (due to overlapping semver ranges), the matches will be applied in order. | `[]` + +*Tip: The filepaths set in `clients.https.File` can include wildcard characters*. + +See [rancher-monitoring](https://github.com/rancher/charts/tree/gh-pages/packages/rancher-monitoring) for examples of how this chart can be used. diff --git a/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/rkeControllerManager/templates/_helpers.tpl b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/rkeControllerManager/templates/_helpers.tpl new file mode 100644 index 0000000000..1ba5093944 --- /dev/null +++ b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/rkeControllerManager/templates/_helpers.tpl @@ -0,0 +1,170 @@ +# Rancher + +{{- define "system_default_registry" -}} +{{- if .Values.global.cattle.systemDefaultRegistry -}} +{{- printf "%s/" .Values.global.cattle.systemDefaultRegistry -}} +{{- end -}} +{{- end -}} + +# Windows Support + +{{/* +Windows cluster will add default taint for linux nodes, +add below linux tolerations to workloads could be scheduled to those linux nodes +*/}} + +{{- define "linux-node-tolerations" -}} +- key: "cattle.io/os" + value: "linux" + effect: "NoSchedule" + operator: "Equal" +{{- end -}} + +{{- define "linux-node-selector" -}} +{{- if semverCompare "<1.14-0" .Capabilities.KubeVersion.GitVersion -}} +beta.kubernetes.io/os: linux +{{- else -}} +kubernetes.io/os: linux +{{- end -}} +{{- end -}} + +# General + +{{- define "applyKubeVersionOverrides" -}} +{{- $overrides := dict -}} +{{- range $override := .Values.kubeVersionOverrides -}} +{{- if semverCompare $override.constraint $.Capabilities.KubeVersion.Version -}} +{{- $_ := mergeOverwrite $overrides $override.values -}} +{{- end -}} +{{- end -}} +{{- $_ := mergeOverwrite .Values $overrides -}} +{{- end -}} + +{{- define "pushprox.namespace" -}} + {{- if .Values.namespaceOverride -}} + {{- .Values.namespaceOverride -}} + {{- else -}} + {{- .Release.Namespace -}} + {{- end -}} +{{- end -}} + +{{- define "pushProxy.commonLabels" -}} +release: {{ .Release.Name }} +component: {{ .Values.component | quote }} +provider: kubernetes +{{- end -}} + +{{- define "pushProxy.proxyUrl" -}} +{{- $_ := (required "Template requires either .Values.proxy.port or .Values.client.proxyUrl to set proxyUrl for client" (or .Values.clients.proxyUrl .Values.proxy.port)) -}} +{{- if .Values.clients.proxyUrl -}} +{{ printf "%s" .Values.clients.proxyUrl }} +{{- else -}} +{{ printf "http://%s.%s.svc:%d" (include "pushProxy.proxy.name" .) (include "pushprox.namespace" .) (int .Values.proxy.port) }} +{{- end -}}{{- end -}} + +# Client + +{{- define "pushProxy.client.name" -}} +{{- printf "pushprox-%s-client" (required ".Values.component is required" .Values.component) -}} +{{- end -}} + +{{- define "pushProxy.client.serviceAccountTokenName" -}} +{{- printf "pushprox-%s-client-service-account-token" (required ".Values.component is required" .Values.component) -}} +{{- end -}} + +{{- define "pushProxy.client.labels" -}} +k8s-app: {{ template "pushProxy.client.name" . }} +{{ template "pushProxy.commonLabels" . }} +{{- end -}} + +# Proxy + +{{- define "pushProxy.proxy.name" -}} +{{- printf "pushprox-%s-proxy" (required ".Values.component is required" .Values.component) -}} +{{- end -}} + +{{- define "pushProxy.proxy.labels" -}} +k8s-app: {{ template "pushProxy.proxy.name" . }} +{{ template "pushProxy.commonLabels" . }} +{{- end -}} + +# ServiceMonitor + +{{- define "pushprox.serviceMonitor.name" -}} +{{- printf "%s-%s" .Release.Name (required ".Values.component is required" .Values.component) -}} +{{- end -}} + +{{- define "pushProxy.serviceMonitor.labels" -}} +app: {{ template "pushprox.serviceMonitor.name" . }} +{{ template "pushProxy.commonLabels" . }} +{{- end -}} + +{{- define "pushProxy.serviceMonitor.endpoints" -}} +{{- $proxyURL := (include "pushProxy.proxyUrl" .) -}} +{{- $useHTTPS := .Values.clients.https.enabled -}} +{{- $setHTTPSScheme := .Values.clients.https.forceHTTPSScheme -}} +{{- $insecureSkipVerify := .Values.clients.https.insecureSkipVerify -}} +{{- $useServiceAccountCredentials := .Values.clients.https.useServiceAccountCredentials -}} +{{- $serviceAccountTokenName := (include "pushProxy.client.serviceAccountTokenName" . ) -}} +{{- $metricRelabelings := list }} +{{- $endpoints := .Values.serviceMonitor.endpoints }} +{{- if .Values.proxy.enabled }} +{{- $_ := set . "proxyUrl" $proxyURL }} +{{- end }} +{{- range $endpoints }} +{{- if $.Values.proxy.enabled }} +{{- $_ := set . "proxyUrl" $proxyURL }} +{{- end }} +{{- $clusterIdRelabel := dict }} +{{- $metricRelabelings := list }} +{{- if $.Values.global.cattle.clusterId }} +{{- $_ := set $clusterIdRelabel "action" "replace" }} +{{- $_ := set $clusterIdRelabel "sourceLabels" (list "__address__") }} +{{- $_ := set $clusterIdRelabel "targetLabel" "cluster_id" }} +{{- $_ := set $clusterIdRelabel "replacement" $.Values.global.cattle.clusterId }} +{{- $metricRelabelings = append $metricRelabelings $clusterIdRelabel }} +{{- end }} +{{- $clusterNameRelabel := dict }} +{{- if $.Values.global.cattle.clusterName }} +{{- $_ := set $clusterNameRelabel "action" "replace" }} +{{- $_ := set $clusterNameRelabel "sourceLabels" (list "__address__") }} +{{- $_ := set $clusterNameRelabel "targetLabel" "cluster_name" }} +{{- $_ := set $clusterNameRelabel "replacement" $.Values.global.cattle.clusterName }} +{{- $metricRelabelings = append $metricRelabelings $clusterNameRelabel }} +{{- end }} +{{- if not (empty $metricRelabelings) }} +{{- $_ := set . "metricRelabelings" ($metricRelabelings)}} +{{- end }} +{{- if $setHTTPSScheme -}} +{{- $_ := set . "scheme" "https" }} +{{- end -}} +{{- if $useHTTPS -}} +{{- if (hasKey . "params") }} +{{- $_ := set (get . "params") "_scheme" (list "https") }} +{{- else }} +{{- $_ := set . "params" (dict "_scheme" (list "https")) }} +{{- end }} +{{- end }} +{{- if (hasKey . "tlsConfig") }} +{{- $_ := set (get . "tlsConfig") "insecureSkipVerify" $insecureSkipVerify }} +{{- else }} +{{- $_ := set . "tlsConfig" (dict "insecureSkipVerify" $insecureSkipVerify) }} +{{- end }} +{{- if $.Values.clients.https.authenticationMethod.bearerTokenFile.enabled }} +{{- $_ := set . "bearerTokenFile" $.Values.clients.https.authenticationMethod.bearerTokenFile.bearerTokenFilePath }} +{{- end }} +{{- if $.Values.clients.https.authenticationMethod.bearerTokenSecret.enabled }} +{{- $_ := set . "bearerTokenSecret" $serviceAccountTokenName }} +{{- end }} +{{- if $.Values.clients.https.authenticationMethod.authorization.enabled }} +{{- if (hasKey . "authorization") }} +{{- $_ := set (get . "authorization") "type" $.Values.clients.https.authenticationMethod.authorization.type }} +{{- $_ := set (get . "authorization") "credentials" (dict "name" $serviceAccountTokenName "key" $.Values.clients.https.authenticationMethod.authorization.credentials.key "optional" $.Values.clients.https.authenticationMethod.authorization.credentials.optional) }} +{{- else }} +{{- $_ := set . "authorization" (dict "type" $.Values.clients.https.authenticationMethod.authorization.type) }} +{{- $_ := set . "authorization" (dict "credentials" (dict "name" $serviceAccountTokenName "key" $.Values.clients.https.authenticationMethod.authorization.credentials.key "optional" $.Values.clients.https.authenticationMethod.authorization.credentials.optional)) }} +{{- end }} +{{- end }} +{{- end }} +{{- toYaml $endpoints }} +{{- end -}} diff --git a/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/rkeControllerManager/templates/pushprox-clients-rbac.yaml b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/rkeControllerManager/templates/pushprox-clients-rbac.yaml new file mode 100644 index 0000000000..a8e27c3735 --- /dev/null +++ b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/rkeControllerManager/templates/pushprox-clients-rbac.yaml @@ -0,0 +1,97 @@ +{{- template "applyKubeVersionOverrides" . -}} +{{- if .Values.clients }}{{- if .Values.clients.enabled }} +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRole +metadata: + name: {{ template "pushProxy.client.name" . }} + labels: {{ include "pushProxy.client.labels" . | nindent 4 }} +rules: +{{- if .Values.global.cattle.psp.enabled }} +- apiGroups: ['policy'] + resources: ['podsecuritypolicies'] + verbs: ['use'] + resourceNames: + - {{ template "pushProxy.client.name" . }} +{{- end }} +{{- if and .Values.clients.https.enabled .Values.clients.https.useServiceAccountCredentials }} +- nonResourceURLs: ["/metrics"] + verbs: ["get"] +{{- if .Values.clients.rbac.additionalRules }} +{{ toYaml .Values.clients.rbac.additionalRules }} +{{- end }} +{{- end }} +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRoleBinding +metadata: + name: {{ template "pushProxy.client.name" . }} + labels: {{ include "pushProxy.client.labels" . | nindent 4 }} +roleRef: + apiGroup: rbac.authorization.k8s.io + kind: ClusterRole + name: {{ template "pushProxy.client.name" . }} +subjects: + - kind: ServiceAccount + name: {{ template "pushProxy.client.name" . }} + namespace: {{ include "pushprox.namespace" . }} +--- +apiVersion: v1 +kind: ServiceAccount +metadata: + name: {{ template "pushProxy.client.name" . }} + namespace: {{ include "pushprox.namespace" . }} + labels: {{ include "pushProxy.client.labels" . | nindent 4 }} +--- +{{- if .Values.clients.https.useServiceAccountCredentials }} +apiVersion: v1 +kind: Secret +type: kubernetes.io/service-account-token +metadata: + name: {{ template "pushProxy.client.serviceAccountTokenName" . }} + namespace: {{ include "pushprox.namespace" . }} + labels: {{ include "pushProxy.client.labels" . | nindent 4 }} + annotations: + kubernetes.io/service-account.name: {{ template "pushProxy.client.name" . }} +{{- end }} +--- +{{- if .Values.global.cattle.psp.enabled }} +apiVersion: policy/v1beta1 +kind: PodSecurityPolicy +metadata: + name: {{ template "pushProxy.client.name" . }} + namespace: {{ include "pushprox.namespace" . }} + labels: {{ include "pushProxy.client.labels" . | nindent 4 }} +spec: + privileged: false + hostNetwork: true + hostIPC: false + hostPID: false + runAsUser: + rule: 'RunAsAny' + seLinux: + rule: 'RunAsAny' +{{- if and .Values.clients.https.enabled .Values.clients.https.certDir .Values.global.seLinux.enabled .Values.clients.https.seLinuxOptions }} + seLinuxOptions: {{ .Values.clients.https.seLinuxOptions | toYaml | nindent 6 }} +{{- end }} + supplementalGroups: + rule: 'MustRunAs' + ranges: + - min: 0 + max: 65535 + fsGroup: + rule: 'MustRunAs' + ranges: + - min: 0 + max: 65535 + readOnlyRootFilesystem: false + volumes: + - 'secret' +{{- if and .Values.clients.https.enabled .Values.clients.https.certDir }} + - 'emptyDir' + - 'hostPath' + allowedHostPaths: + - pathPrefix: {{ required "Need access to volume on host with the SSL cert files to use HTTPs" .Values.clients.https.certDir }} + readOnly: true +{{- end }} +{{- end }} +{{- end }}{{- end }} diff --git a/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/rkeControllerManager/templates/pushprox-clients.yaml b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/rkeControllerManager/templates/pushprox-clients.yaml new file mode 100644 index 0000000000..e8fcfb3883 --- /dev/null +++ b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/rkeControllerManager/templates/pushprox-clients.yaml @@ -0,0 +1,157 @@ +{{- template "applyKubeVersionOverrides" . -}} +{{- if .Values.clients }}{{- if .Values.clients.enabled }} +apiVersion: apps/v1 +{{- if .Values.clients.deployment.enabled }} +kind: Deployment +{{- else }} +kind: DaemonSet +{{- end }} +metadata: + name: {{ template "pushProxy.client.name" . }} + namespace: {{ template "pushprox.namespace" . }} + labels: {{ include "pushProxy.client.labels" . | nindent 4 }} + pushprox-exporter: "client" +spec: + {{- if .Values.clients.deployment.enabled }} + replicas: {{ .Values.clients.deployment.replicas }} + {{- end }} + selector: + matchLabels: {{ include "pushProxy.client.labels" . | nindent 6 }} + template: + metadata: + labels: {{ include "pushProxy.client.labels" . | nindent 8 }} + spec: + {{- if .Values.clients.affinity }} + affinity: {{ toYaml .Values.clients.affinity | nindent 8 }} + {{- end }} + nodeSelector: {{ include "linux-node-selector" . | nindent 8 }} +{{- if .Values.clients.nodeSelector }} +{{ toYaml .Values.clients.nodeSelector | indent 8 }} +{{- end }} + tolerations: {{ include "linux-node-tolerations" . | nindent 8 }} +{{- if .Values.clients.tolerations }} +{{ toYaml .Values.clients.tolerations | indent 8 }} +{{- end }} + hostNetwork: true + dnsPolicy: ClusterFirstWithHostNet + serviceAccountName: {{ template "pushProxy.client.name" . }} + {{- if .Values.global.imagePullSecretName }} + imagePullSecrets: + - name: {{ .Values.global.imagePullSecretName }} + {{- end }} + containers: + - name: pushprox-client + image: {{ template "system_default_registry" . }}{{ .Values.clients.image.repository }}:{{ .Values.clients.image.tag }} + command: + {{- range .Values.clients.command }} + - {{ . | quote }} + {{- end }} + args: + - --fqdn=$(HOST_IP) + - --proxy-url=$(PROXY_URL) + {{- if .Values.clients.metrics.enabled }} + - --metrics-addr=$(PORT) + {{- end }} + - --allow-port={{ required "Need .Values.metricsPort to configure client to be allowed to scrape metrics at port" .Values.metricsPort}} + {{- if .Values.clients.useLocalhost }} + - --use-localhost + {{- end }} + {{- if .Values.clients.https.enabled }} + {{- if .Values.clients.https.insecureSkipVerify }} + - --insecure-skip-verify + {{- end }} + {{- if .Values.clients.https.useServiceAccountCredentials }} + - --token-path=/var/run/secrets/kubernetes.io/serviceaccount/token + {{- end }} + {{- if .Values.clients.https.certDir }} + - --tls.cert=/etc/ssl/push-proxy/push-proxy.pem + - --tls.key=/etc/ssl/push-proxy/push-proxy-key.pem + - --tls.cacert=/etc/ssl/push-proxy/push-proxy-ca-cert.pem + {{- end }} + {{- end }} + env: + - name: HOST_IP + valueFrom: + fieldRef: + fieldPath: status.hostIP + {{- if .Values.clients.metrics.enabled }} + - name: PORT + value: :{{ .Values.clients.port }} + {{- end }} + - name: PROXY_URL + value: {{ template "pushProxy.proxyUrl" . }} + securityContext: + runAsNonRoot: true + runAsUser: 1000 + {{- if and .Values.clients.https.enabled .Values.clients.https.certDir }} + volumeMounts: + - name: metrics-cert-dir + mountPath: /etc/ssl/push-proxy + {{- end }} + {{- if .Values.clients.resources }} + resources: {{ toYaml .Values.clients.resources | nindent 10 }} + {{- end }} + {{- if and .Values.clients.https.enabled .Values.clients.https.certDir }} + initContainers: + - name: copy-certs + image: {{ template "system_default_registry" . }}{{ .Values.clients.copyCertsImage.repository }}:{{ .Values.clients.copyCertsImage.tag }} + command: + - sh + - -c + - | + echo "Searching for files to copy within the source volume" + echo "cert: ${CERT_FILE_NAME}" + echo "key: ${KEY_FILE_NAME}" + echo "cacert: ${CACERT_FILE_NAME}" + + CERT_FILE_SOURCE=$(find /etc/source/ -type f -name "${CERT_FILE_NAME}" | sort -r | head -n 1) + KEY_FILE_SOURCE=$(find /etc/source/ -type f -name "${KEY_FILE_NAME}" | sort -r | head -n 1) + CACERT_FILE_SOURCE=$(find /etc/source/ -type f -name "${CACERT_FILE_NAME}" | sort -r | head -n 1) + + test -z ${CERT_FILE_SOURCE} && echo "Failed to find cert file" && exit 1 + test -z ${KEY_FILE_SOURCE} && echo "Failed to find key file" && exit 1 + test -z ${CACERT_FILE_SOURCE} && echo "Failed to find cacert file" && exit 1 + + echo "Copying cert file from $CERT_FILE_SOURCE to $CERT_FILE_TARGET" + cp $CERT_FILE_SOURCE $CERT_FILE_TARGET || exit 1 + chmod 444 $CERT_FILE_TARGET || exit 1 + + echo "Copying key file from $KEY_FILE_SOURCE to $KEY_FILE_TARGET" + cp $KEY_FILE_SOURCE $KEY_FILE_TARGET || exit 1 + chmod 444 $KEY_FILE_TARGET || exit 1 + + echo "Copying cacert file from $CACERT_FILE_SOURCE to $CACERT_FILE_TARGET" + cp $CACERT_FILE_SOURCE $CACERT_FILE_TARGET || exit 1 + chmod 444 $CACERT_FILE_TARGET || exit 1 + env: + - name: CERT_FILE_NAME + value: {{ required "Need a TLS cert file for scraping metrics endpoint over HTTPs" .Values.clients.https.certFile }} + - name: KEY_FILE_NAME + value: {{ required "Need a TLS key file for scraping metrics endpoint over HTTPs" .Values.clients.https.keyFile }} + - name: CACERT_FILE_NAME + value: {{ required "Need a TLS CA cert file for scraping metrics endpoint over HTTPs" .Values.clients.https.caCertFile }} + - name: CERT_FILE_TARGET + value: /etc/ssl/push-proxy/push-proxy.pem + - name: KEY_FILE_TARGET + value: /etc/ssl/push-proxy/push-proxy-key.pem + - name: CACERT_FILE_TARGET + value: /etc/ssl/push-proxy/push-proxy-ca-cert.pem + securityContext: + runAsNonRoot: false +{{- if and .Values.global.seLinux.enabled .Values.clients.https.seLinuxOptions }} + seLinuxOptions: {{ .Values.clients.https.seLinuxOptions | toYaml | nindent 12 }} +{{- end }} + volumeMounts: + - name: metrics-cert-dir-source + mountPath: /etc/source + readOnly: true + - name: metrics-cert-dir + mountPath: /etc/ssl/push-proxy + volumes: + - name: metrics-cert-dir-source + hostPath: + path: {{ required "Need access to volume on host with the SSL cert files to use HTTPs" .Values.clients.https.certDir }} + - name: metrics-cert-dir + emptyDir: {} + {{- end }} +{{- end }}{{- end }} diff --git a/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/rkeControllerManager/templates/pushprox-proxy-rbac.yaml b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/rkeControllerManager/templates/pushprox-proxy-rbac.yaml new file mode 100644 index 0000000000..eefe609058 --- /dev/null +++ b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/rkeControllerManager/templates/pushprox-proxy-rbac.yaml @@ -0,0 +1,68 @@ +{{- template "applyKubeVersionOverrides" . -}} +{{- if and .Values.proxy }}{{ if .Values.proxy.enabled }} +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRole +metadata: + name: {{ template "pushProxy.proxy.name" . }} + labels: {{ include "pushProxy.proxy.labels" . | nindent 4 }} +rules: +{{- if .Values.global.cattle.psp.enabled }} +- apiGroups: ['policy'] + resources: ['podsecuritypolicies'] + verbs: ['use'] + resourceNames: + - {{ template "pushProxy.proxy.name" . }} +{{- end }} +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRoleBinding +metadata: + name: {{ template "pushProxy.proxy.name" . }} + labels: {{ include "pushProxy.proxy.labels" . | nindent 4 }} +roleRef: + apiGroup: rbac.authorization.k8s.io + kind: ClusterRole + name: {{ template "pushProxy.proxy.name" . }} +subjects: + - kind: ServiceAccount + name: {{ template "pushProxy.proxy.name" . }} + namespace: {{ include "pushprox.namespace" . }} +--- +apiVersion: v1 +kind: ServiceAccount +metadata: + name: {{ template "pushProxy.proxy.name" . }} + namespace: {{ include "pushprox.namespace" . }} + labels: {{ include "pushProxy.proxy.labels" . | nindent 4 }} +--- +{{- if .Values.global.cattle.psp.enabled }} +apiVersion: policy/v1beta1 +kind: PodSecurityPolicy +metadata: + name: {{ template "pushProxy.proxy.name" . }} + namespace: {{ include "pushprox.namespace" . }} + labels: {{ include "pushProxy.proxy.labels" . | nindent 4 }} +spec: + privileged: false + hostNetwork: false + hostIPC: false + hostPID: false + runAsUser: + rule: 'MustRunAsNonRoot' + seLinux: + rule: 'RunAsAny' + supplementalGroups: + rule: 'MustRunAs' + ranges: + - min: 1 + max: 65535 + fsGroup: + rule: 'MustRunAs' + ranges: + - min: 1 + max: 65535 + readOnlyRootFilesystem: false + volumes: + - 'secret' +{{- end }}{{- end }} +{{- end }} diff --git a/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/rkeControllerManager/templates/pushprox-proxy.yaml b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/rkeControllerManager/templates/pushprox-proxy.yaml new file mode 100644 index 0000000000..723bbd6c00 --- /dev/null +++ b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/rkeControllerManager/templates/pushprox-proxy.yaml @@ -0,0 +1,57 @@ +{{- template "applyKubeVersionOverrides" . -}} +{{- if and .Values.proxy }}{{ if .Values.proxy.enabled }} +apiVersion: apps/v1 +kind: Deployment +metadata: + name: {{ template "pushProxy.proxy.name" . }} + namespace: {{ template "pushprox.namespace" . }} + labels: {{ include "pushProxy.proxy.labels" . | nindent 4 }} + pushprox-exporter: "proxy" +spec: + selector: + matchLabels: {{ include "pushProxy.proxy.labels" . | nindent 6 }} + template: + metadata: + labels: {{ include "pushProxy.proxy.labels" . | nindent 8 }} + spec: + securityContext: + runAsNonRoot: true + runAsUser: 1000 + nodeSelector: {{ include "linux-node-selector" . | nindent 8 }} +{{- if .Values.proxy.nodeSelector }} +{{ toYaml .Values.proxy.nodeSelector | indent 8 }} +{{- end }} + tolerations: {{ include "linux-node-tolerations" . | nindent 8 }} +{{- if .Values.proxy.tolerations }} +{{ toYaml .Values.proxy.tolerations | indent 8 }} +{{- end }} + serviceAccountName: {{ template "pushProxy.proxy.name" . }} + {{- if .Values.global.imagePullSecretName }} + imagePullSecrets: + - name: {{ .Values.global.imagePullSecretName }} + {{- end }} + containers: + - name: pushprox-proxy + image: {{ template "system_default_registry" . }}{{ .Values.proxy.image.repository }}:{{ .Values.proxy.image.tag }} + command: + {{- range .Values.proxy.command }} + - {{ . | quote }} + {{- end }} + {{- if .Values.proxy.resources }} + resources: {{ toYaml .Values.proxy.resources | nindent 10 }} + {{- end }} +--- +apiVersion: v1 +kind: Service +metadata: + name: {{ template "pushProxy.proxy.name" . }} + namespace: {{ template "pushprox.namespace" . }} + labels: {{ include "pushProxy.proxy.labels" . | nindent 4 }} +spec: + ports: + - name: pp-proxy + port: {{ required "Need .Values.proxy.port to configure proxy" .Values.proxy.port }} + protocol: TCP + targetPort: {{ .Values.proxy.port }} + selector: {{ include "pushProxy.proxy.labels" . | nindent 4 }} +{{- end }}{{- end }} diff --git a/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/rkeControllerManager/templates/pushprox-servicemonitor.yaml b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/rkeControllerManager/templates/pushprox-servicemonitor.yaml new file mode 100644 index 0000000000..67eb2216b5 --- /dev/null +++ b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/rkeControllerManager/templates/pushprox-servicemonitor.yaml @@ -0,0 +1,45 @@ +{{- template "applyKubeVersionOverrides" . -}} +{{- if .Values.serviceMonitor }}{{- if .Values.serviceMonitor.enabled }} +apiVersion: monitoring.coreos.com/v1 +kind: ServiceMonitor +metadata: + name: {{ template "pushprox.serviceMonitor.name" . }} + namespace: {{ template "pushprox.namespace" . }} + labels: {{ include "pushProxy.serviceMonitor.labels" . | nindent 4 }} +spec: + endpoints: {{include "pushProxy.serviceMonitor.endpoints" . | nindent 4 }} + jobLabel: component + podTargetLabels: + - component + - pushprox-exporter + namespaceSelector: + matchNames: + - {{ template "pushprox.namespace" . }} + selector: + matchLabels: {{ include "pushProxy.client.labels" . | nindent 6 }} +--- +{{- $selector := "" }} +{{- if not (kindIs "invalid" .Values.service) }} +{{- if not (kindIs "invalid" .Values.service.selector) }} +{{ if .Values.service.selector }} +{{- if .Values.clients.enabled }} +{{- required (printf "Cannot override .Values.service.selector=%s when .Values.clients.enabled=true" (toJson .Values.service.selector)) "" }} +{{- end }} +{{- $selector = (toYaml .Values.service.selector) }} +{{- end }} +{{- end }} +{{- end }} +apiVersion: v1 +kind: Service +metadata: + name: {{ template "pushProxy.client.name" . }} + namespace: {{ template "pushprox.namespace" . }} + labels: {{ include "pushProxy.client.labels" . | nindent 4 }} +spec: + ports: + - name: metrics + port: {{ required "Need .Values.metricsPort to configure client to listen to metrics at port" .Values.metricsPort}} + protocol: TCP + targetPort: {{ .Values.metricsPort }} + selector: {{ default (include "pushProxy.client.labels" .) $selector | nindent 4 }} +{{- end }}{{- end }} diff --git a/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/rkeControllerManager/templates/validate-install-crd.yaml b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/rkeControllerManager/templates/validate-install-crd.yaml new file mode 100644 index 0000000000..16abc2fa83 --- /dev/null +++ b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/rkeControllerManager/templates/validate-install-crd.yaml @@ -0,0 +1,14 @@ +#{{- if gt (len (lookup "rbac.authorization.k8s.io/v1" "ClusterRole" "" "")) 0 -}} +# {{- $found := dict -}} +# {{- set $found "monitoring.coreos.com/v1/ServiceMonitor" false -}} +# {{- range .Capabilities.APIVersions -}} +# {{- if hasKey $found (toString .) -}} +# {{- set $found (toString .) true -}} +# {{- end -}} +# {{- end -}} +# {{- range $_, $exists := $found -}} +# {{- if (eq $exists false) -}} +# {{- required "Required CRDs are missing. Please install Prometheus Operator CRDs before installing this chart." "" -}} +# {{- end -}} +# {{- end -}} +#{{- end -}} diff --git a/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/rkeControllerManager/templates/validate-psp-install.yaml b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/rkeControllerManager/templates/validate-psp-install.yaml new file mode 100644 index 0000000000..a30c59d3b7 --- /dev/null +++ b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/rkeControllerManager/templates/validate-psp-install.yaml @@ -0,0 +1,7 @@ +#{{- if gt (len (lookup "rbac.authorization.k8s.io/v1" "ClusterRole" "" "")) 0 -}} +#{{- if .Values.global.cattle.psp.enabled }} +#{{- if not (.Capabilities.APIVersions.Has "policy/v1beta1/PodSecurityPolicy") }} +#{{- fail "The target cluster does not have the PodSecurityPolicy API resource. Please disable PSPs in this chart before proceeding." -}} +#{{- end }} +#{{- end }} +#{{- end }} diff --git a/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/rkeControllerManager/values.yaml b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/rkeControllerManager/values.yaml new file mode 100644 index 0000000000..1e076041b3 --- /dev/null +++ b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/rkeControllerManager/values.yaml @@ -0,0 +1,166 @@ +# Default values for rancher-pushprox. +# This is a YAML-formatted file. +# Declare variables to be passed into your templates. + +# Default image containing both the proxy and the client was generated from the following Dockerfile +# https://github.com/prometheus-community/PushProx/blob/eeadbe766641699129920ccfaaaa30a85c67fe81/Dockerfile#L1-L15 + +# Configuration + +global: + cattle: + psp: + enabled: false + systemDefaultRegistry: "" + seLinux: + enabled: false + +# A list of Semver constraint strings (defined by https://github.com/Masterminds/semver) and values.yaml overrides. +# +# For each key in kubeVersionOverrides, this chart will check to see if the current Kubernetes cluster's version matches +# any of the semver constraints provided as keys on the map. +# +# On seeing a match, the default value for each values.yaml field overridden will be updated with the new value. +# +# If multiple matches are encountered (due to overlapping semver ranges), the matches will be applied in order. +# +# Notes: +# - On running a helm template, Helm generally assumes the kubeVersion is v1.20.0 +# - On running a helm install --dry-run, the correct kubeVersion should be chosen. +kubeVersionOverrides: [] +# - constraint: "< 1.21" +# values: +# metricsPort: 10252 +# clients: +# https: +# enabled: false +# insecureSkipVerify: false +# useServiceAccountCredentials: false + +namespaceOverride: "" + +# The component that is being monitored (i.e. etcd) +component: "component" + +# The port containing the metrics that need to be scraped +metricsPort: 2739 + +# Configure ServiceMonitor that monitors metrics from the metricsPort endpoint +serviceMonitor: + enabled: true + # A list of endpoints that will be added to the ServiceMonitor based on the Endpoint spec + # Source: https://github.com/prometheus-operator/prometheus-operator/blob/master/Documentation/api.md#endpoint + # By default, proxyUrl and params._scheme will be overridden based on other values + endpoints: + - port: metrics + +# Configure Service that grabs scrape targets +service: + # The selector that is used to populate the Service's Endpoints object. + # The chart will error out on rendering templating if .Values.clients.enabled is set alongside this field, + # since it is expected that this service should point to the PushProx Clients Daemonset / Deployment + selector: {} + +clients: + enabled: true + # The port which the PushProx client will post PushProx metrics to + port: 9369 + # If unset, this will default to the URL for the proxy service: http://pushprox-{{component}}-proxy.{{namepsace}}.svc.cluster.local:{{proxy.port}} + # Should be modified if the clients are being deployed outside the cluster where the proxy rests, otherwise leave it null + proxyUrl: "" + # If set to true, the client will forward any requests from the host IP to 127.0.0.1 + # It will only allow proxy requests to the metricsPort specified + useLocalhost: false + # Configuration for accessing metrics via HTTPS + https: + # Does the client require https to access the metrics? + enabled: false + # Does the client require requests be sent to http or https? + forceHTTPSScheme: false + # If set to true, the client will create a service account with adequate permissions and set a flag + # on the client to use the service account token provided by it to make authorized scrape requests + useServiceAccountCredentials: false + # Configuration for authentication to metrics via https endpoint + authenticationMethod: + # Reads token from defined file in container + # This function is deprecated in the prometheus operator api and may be removed in a future version + bearerTokenFile: + enabled: false + bearerTokenFilePath: "/var/run/secrets/kubernetes.io/serviceaccount/token" + # Reads token from defined secret in namespace + # This function is deprecated in the prometheus operator api and may be removed in a future version + bearerTokenSecret: + enabled: false + # Reads token from defined secret in namespace + authorization: + enabled: false + type: "bearer" + credentials: + key: "token" + optional: false + # If set to true, the client will disable SSL security checks + insecureSkipVerify: false + # Directory on host where necessary TLS cert and key to scrape metrics can be found + certDir: "" + # Filenames for files located in .Values.clients.https.certDir that correspond to TLS settings + certFile: "" + keyFile: "" + caCertFile: "" + # seLinuxOptions to be passed into the container that copies certs. Should define a container with permissions to read the files in the certDir provided on the host. + # Required and only used if `clients.https.enabled` is set and `clients.https.certDir` is provided. + seLinuxOptions: {} + + metrics: + # Whether the client should publish PushProx client-specific metrics to .Values.clients.port + enabled: false + + rbac: + # Additional permissions to provide to the ServiceAccount bound to the client + # This can be used to provide additional permissions for the client to scrape metrics from the k8s API + # Only enabled if clients.https.enabled and clients.https.useServiceAccountCredentials are true + additionalRules: [] + + # Resource limits + resources: {} + + # Options to select all nodes to deploy client DaemonSet on + nodeSelector: {} + tolerations: [] + affinity: {} + + image: + repository: rancher/pushprox-client + tag: v0.1.3-rancher2-client + command: ["pushprox-client"] + + copyCertsImage: + repository: rancher/mirrored-library-busybox + tag: 1.31.1 + + # The default intention of rancher-pushprox clients is to scrape hostNetwork metrics across all nodes. + # This can be used to scrape internal Kubernetes components or DaemonSets of hostNetwork Pods in + # situations where a cloud provider firewall prevents Pod-To-Host communication but not Pod-To-Pod. + # However, if the underlying hostNetwork Pod that is being scraped is managed by a Deployment, + # this advanced option enables users to deploy the client as a Deployment instead of a DaemonSet. + # If a user deploys this feature and the underlying Deployment's number of replicas changes, the user will + # be responsible for upgrading this chart accordingly to the right number of replicas. + deployment: + enabled: false + replicas: 0 + +proxy: + enabled: true + # The port through which PushProx clients will communicate to the proxy + port: 8080 + + # Resource limits + resources: {} + + # Options to select a node to run a single proxy deployment on + nodeSelector: {} + tolerations: [] + + image: + repository: rancher/pushprox-proxy + tag: v0.1.3-rancher2-proxy + command: ["pushprox-proxy"] diff --git a/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/rkeEtcd/.helmignore b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/rkeEtcd/.helmignore new file mode 100644 index 0000000000..0e8a0eb36f --- /dev/null +++ b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/rkeEtcd/.helmignore @@ -0,0 +1,23 @@ +# Patterns to ignore when building packages. +# This supports shell glob matching, relative path matching, and +# negation (prefixed with !). Only one pattern per line. +.DS_Store +# Common VCS dirs +.git/ +.gitignore +.bzr/ +.bzrignore +.hg/ +.hgignore +.svn/ +# Common backup files +*.swp +*.bak +*.tmp +*.orig +*~ +# Various IDEs +.project +.idea/ +*.tmproj +.vscode/ diff --git a/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/rkeEtcd/Chart.yaml b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/rkeEtcd/Chart.yaml new file mode 100644 index 0000000000..96b33f0bc8 --- /dev/null +++ b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/rkeEtcd/Chart.yaml @@ -0,0 +1,15 @@ +annotations: + catalog.cattle.io/hidden: "true" + catalog.cattle.io/kube-version: '>= 1.26.0-0 < 1.31.0-0' + catalog.cattle.io/os: linux + catalog.rancher.io/certified: rancher + catalog.rancher.io/namespace: cattle-monitoring-system + catalog.rancher.io/release-name: rancher-pushprox +apiVersion: v1 +appVersion: 0.1.0 +description: Sets up a deployment of the PushProx proxy and a DaemonSet of PushProx + clients. +kubeVersion: '>=1.26.0-0' +name: rkeEtcd +type: application +version: 0.2.0 diff --git a/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/rkeEtcd/README.md b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/rkeEtcd/README.md new file mode 100644 index 0000000000..345002f48a --- /dev/null +++ b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/rkeEtcd/README.md @@ -0,0 +1,90 @@ +# rancher-pushprox + +A Rancher chart based on Rancher [PushProx](https://github.com/rancher/PushProx) that sets up a Deployment of a PushProx proxy and a DaemonSet of PushProx clients on a Kubernetes cluster. + +Installs [rancher-pushprox](https://github.com/rancher/charts/tree/gh-pages/packages/rancher-pushprox) to create PushProx clients that can access their host's network and register with a PushProx proxy. A [Prometheus Operator](https://github.com/coreos/prometheus-operator) ServiceMonitor CR is also included that is configured to scrape the metrics from each of the clients through the proxy. + +Using an instance of this chart is suitable for the following scenarios: +- You need to scrape metrics from a port that should not be accessible outside of the host (e.g. scraping `etcd` metrics in a hardened cluster) +- You need to scrape metrics on a host that are not exposed outside of 127.0.0.1 (e.g. scraping `kube-proxy` metrics) +- You need to scrape metrics through HTTPS using certs hosted directly on `hostPath` +- You need to scrape metrics from Kubernetes components that require authorization via a service account (e.g. permissions to make request to `/metrics`) +- You need to scrape metrics without access to cacerts (i.e. enable `insecureSkipVerify`) + +The clients and proxy are created based on a Rancher fork of the [prometheus-community/PushProx](https://github.com/prometheus-community/PushProx) project. + +## Upgrading to Kubernetes v1.25+ + +Starting in Kubernetes v1.25, [Pod Security Policies](https://kubernetes.io/docs/concepts/security/pod-security-policy/) have been removed from the Kubernetes API. + +As a result, **before upgrading to Kubernetes v1.25** (or on a fresh install in a Kubernetes v1.25+ cluster), users are expected to perform an in-place upgrade of this chart with `global.cattle.psp.enabled` set to `false` if it has been previously set to `true`. +​ +> **Note:** +> In this chart release, any previous field that was associated with any PSP resources have been removed in favor of a single global field: `global.cattle.psp.enabled`. + +> **Note:** +> If you upgrade your cluster to Kubernetes v1.25+ before removing PSPs via a `helm upgrade` (even if you manually clean up resources), **it will leave the Helm release in a broken state within the cluster such that further Helm operations will not work (`helm uninstall`, `helm upgrade`, etc.).** +> +> If your charts get stuck in this state, please consult the Rancher docs on how to clean up your Helm release secrets. + +Upon setting `global.cattle.psp.enabled` to false, the chart will remove any PSP resources deployed on its behalf from the cluster. This is the default setting for this chart. + +As a replacement for PSPs, [Pod Security Admission](https://kubernetes.io/docs/concepts/security/pod-security-admission/) should be used. Please consult the Rancher docs for more details on how to configure your chart release namespaces to work with the new Pod Security Admission and apply Pod Security Standards. + +## Configuration + +The following tables list the configurable parameters of the rancher-pushprox chart and their default values. + +### General + +#### Required +| Parameter | Description | Example | +| ----- | ----------- | ------ | +| `component` | The component that is being monitored | `kube-etcd` +| `metricsPort` | The port on the host that contains the metrics you want to scrape (e.g. `http://:/metrics`) | `2379` | +| `namespaceOverride` | The namespace to install the chart | `""` + +#### Optional +| Parameter | Description | Default | +| ----- | ----------- | ------ | +| `serviceMonitor.enabled` | Deploys a [Prometheus Operator](https://github.com/coreos/prometheus-operator/blob/master/Documentation/api.md#servicemonitor) ServiceMonitor CR that is configured to scrape metrics on the hosts that the clients are deployed on via the proxy. Also deploys a Service that points to all pods with the expected client name that exposes the `metricsPort` selected | `true` | +| `serviceMonitor.endpoints` | A list of endpoints that will be added to the ServiceMonitor based on the [Endpoint spec](https://github.com/prometheus-operator/prometheus-operator/blob/master/Documentation/api.md#endpoint) | `[{port: metrics}]` | +| `service.selector` | The selector that is used to populate the Service's Endpoints object. The chart will error out on rendering templating if `.Values.clients.enabled` is set alongside this field, since it is expected that this service should point to the PushProx Clients Daemonset / Deployment | `{}` | +| `clients.enabled` | Deploys a DaemonSet of clients that are each capable of scraping endpoints on the hostNetwork it is deployed on | `true` | +| `clients.port` | The port where the client will publish PushProx client-specific metrics. If deploying multiple clients onto the same node, the clients should not have conflicting ports | `9369` | +| `clients.proxyUrl` | Overrides the default proxyUrl setting of `http://pushprox-{{ .Values.component }}-proxy.{{ . Release.Namespace }}.svc.cluster.local:{{ .Values.proxy.port }}"` with the `proxyUrl` specified | `""` | +| `clients.useLocalhost` | Sets a flag on each client deployment to redirect scrapes directed to `HOST_IP` to `127.0.0.1` | `false` | +| `clients.https.enabled` | Enables scraping metrics via HTTPS using the provided TLS certs that exist on each host | `false` | +| `clients.https.forceHTTPSScheme` | Forces scraping metrics via HTTPS using the provided TLS certs that exist on each host | `false` | +| `clients.https.useServiceAccountCredentials` | If set to true, the client will create a service account with permissions to scrape `/metrics` endpoint of Kubernetes components. The client will use the service account token provided to make authorized scrape requests to the Kubernetes API | `false` | +| `clients.https.authenticationMethod.bearerTokenFile.enabled` | If set to true, the client will use service account credentials mounted at the configured path `clients.https.authenticationMethod.bearerTokenFile.bearerTokenFilePath`. This requires permissions to scrape `/metrics` endpoint of Kubernetes components. This method is deprecated by the prometheus operator and may be removed in a future release | `false` | +| `clients.https.authenticationMethod.bearerTokenFile.bearerTokenFilePath` | This is a volume mount on the pod with permissions to scrape `/metrics` endpoint of Kubernetes components | `"/var/run/secrets/kubernetes.io/serviceaccount/token"` | +| `clients.https.authenticationMethod.bearerTokenSecret.enabled` | If set to true, the client will use service account credentials to scrape `/metrics` endpoint of Kubernetes components. This method is deprecated by the prometheus operator and may be removed in a future release | `false` | +| `clients.https.authenticationMethod.authorization.enabled` | If set to true, the client will use service account credentials to scrape `/metrics` endpoint of Kubernetes components | `false` | +| `clients.https.authenticationMethod.authorization.type` | If set, the client will use this type of authorization in its client requests for metrics | `"bearer"` | +| `clients.https.authenticationMethod.authorization.credentials.key` | If set, the client will use this key in the secret created by `clients.https.useServiceAccountCredentials` for authorization in its client requests for metrics | `"token"` | +| `clients.https.authenticationMethod.authorization.credentials.optional` | If set to false, the client will fail if the key in the secret created by `clients.https.useServiceAccountCredentials` does not exist | `false` | +| `clients.https.insecureSkipVerify` | If set to true, the client will disable SSL security checks | `false` | +| `clients.https.certDir` | A `hostPath` where TLS certs can be found. This path is mounted as a volume on an `initContainer` which copies only the necessary files over to an EmptyDir volume used by each client. Required and only used if `clients.https.enabled` is set | `""` | +| `clients.https.certFile` | The path to the TLS cert file located within `clients.https.certDir`. Required and only used if `clients.https.enabled` is set | `""` | +| `clients.https.keyFile` | The path to the TLS key file located within `clients.https.certDir`. Required and only used if `clients.https.enabled` is set | `""` | +| `clients.https.caCertFile` | The path to the TLS cacert file located within `clients.https.certDir`. Required and only used if `clients.https.enabled` is set | `""` | +| `clients.https.seLinuxOptions` | seLinuxOptions to be passed into the container that copies certs. Should define a container with permissions to read the files in the certDir provided on the host. Required and only used if `clients.https.enabled` is set and `clients.https.certDir` is provided. | `""` | +| `clients.metrics.enabled` | Whether the client should publish PushProx client-specific metrics. | `false` | +| `clients.rbac.additionalRules` | Additional permissions to provide to the ServiceAccount bound to the client. This can be used to provide additional permissions for the client to scrape metrics from the k8s API. Only enabled if clients.https.enabled and clients.https.useServiceAccountCredentials are true | `[]` | +| `clients.deployment.enabled` | Deploys the client as a Deployment (generally used if the underlying hostNetwork Pod that is being scraped is managed by a Deployment) | `false` | +| `clients.deployment.replicas` | The number of pods the Deployment has, it should match the number of pod the hostNetwork Deployment has. Required and only used if `client.deployment.enable` is set | `0` | +| `clients.deployment.affinity` | The affinity rules that allocate the pod to the node in which the hostNetwork Deployment's pods run. Required and only used if `client.deployment.enable` is set | `{}` | +| `clients.resources` | Set resource limits and requests for the client container | `{}` | +| `clients.nodeSelector` | Select which nodes to deploy the clients on | `{}` | +| `clients.tolerations` | Specify tolerations for clients | `[]` | +| `proxy.enabled` | Deploys the proxy that each client will register with | `true` | +| `proxy.port` | The port exposed by the proxy that each client will register with to allow metrics to be scraped from the host | `8080` | +| `proxy.resources` | Set resource limits and requests for the proxy container | `{}` | +| `proxy.nodeSelector` | Select which nodes the proxy can be deployed on | `{}` | +| `proxy.tolerations` | Specify tolerations (if necessary) to allow the proxy to be deployed on the selected node | `[]` | +| `kubeVersionOverrides` | A list of Semver constraint strings (defined by https://github.com/Masterminds/semver) and values.yaml overrides. For each key in kubeVersionOverrides, this chart will check to see if the current Kubernetes cluster's version matches any of the semver constraints provided as keys on the map. On seeing a match, the default value for each values.yaml field overridden will be updated with the new value. If multiple matches are encountered (due to overlapping semver ranges), the matches will be applied in order. | `[]` + +*Tip: The filepaths set in `clients.https.File` can include wildcard characters*. + +See [rancher-monitoring](https://github.com/rancher/charts/tree/gh-pages/packages/rancher-monitoring) for examples of how this chart can be used. diff --git a/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/rkeEtcd/templates/_helpers.tpl b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/rkeEtcd/templates/_helpers.tpl new file mode 100644 index 0000000000..1ba5093944 --- /dev/null +++ b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/rkeEtcd/templates/_helpers.tpl @@ -0,0 +1,170 @@ +# Rancher + +{{- define "system_default_registry" -}} +{{- if .Values.global.cattle.systemDefaultRegistry -}} +{{- printf "%s/" .Values.global.cattle.systemDefaultRegistry -}} +{{- end -}} +{{- end -}} + +# Windows Support + +{{/* +Windows cluster will add default taint for linux nodes, +add below linux tolerations to workloads could be scheduled to those linux nodes +*/}} + +{{- define "linux-node-tolerations" -}} +- key: "cattle.io/os" + value: "linux" + effect: "NoSchedule" + operator: "Equal" +{{- end -}} + +{{- define "linux-node-selector" -}} +{{- if semverCompare "<1.14-0" .Capabilities.KubeVersion.GitVersion -}} +beta.kubernetes.io/os: linux +{{- else -}} +kubernetes.io/os: linux +{{- end -}} +{{- end -}} + +# General + +{{- define "applyKubeVersionOverrides" -}} +{{- $overrides := dict -}} +{{- range $override := .Values.kubeVersionOverrides -}} +{{- if semverCompare $override.constraint $.Capabilities.KubeVersion.Version -}} +{{- $_ := mergeOverwrite $overrides $override.values -}} +{{- end -}} +{{- end -}} +{{- $_ := mergeOverwrite .Values $overrides -}} +{{- end -}} + +{{- define "pushprox.namespace" -}} + {{- if .Values.namespaceOverride -}} + {{- .Values.namespaceOverride -}} + {{- else -}} + {{- .Release.Namespace -}} + {{- end -}} +{{- end -}} + +{{- define "pushProxy.commonLabels" -}} +release: {{ .Release.Name }} +component: {{ .Values.component | quote }} +provider: kubernetes +{{- end -}} + +{{- define "pushProxy.proxyUrl" -}} +{{- $_ := (required "Template requires either .Values.proxy.port or .Values.client.proxyUrl to set proxyUrl for client" (or .Values.clients.proxyUrl .Values.proxy.port)) -}} +{{- if .Values.clients.proxyUrl -}} +{{ printf "%s" .Values.clients.proxyUrl }} +{{- else -}} +{{ printf "http://%s.%s.svc:%d" (include "pushProxy.proxy.name" .) (include "pushprox.namespace" .) (int .Values.proxy.port) }} +{{- end -}}{{- end -}} + +# Client + +{{- define "pushProxy.client.name" -}} +{{- printf "pushprox-%s-client" (required ".Values.component is required" .Values.component) -}} +{{- end -}} + +{{- define "pushProxy.client.serviceAccountTokenName" -}} +{{- printf "pushprox-%s-client-service-account-token" (required ".Values.component is required" .Values.component) -}} +{{- end -}} + +{{- define "pushProxy.client.labels" -}} +k8s-app: {{ template "pushProxy.client.name" . }} +{{ template "pushProxy.commonLabels" . }} +{{- end -}} + +# Proxy + +{{- define "pushProxy.proxy.name" -}} +{{- printf "pushprox-%s-proxy" (required ".Values.component is required" .Values.component) -}} +{{- end -}} + +{{- define "pushProxy.proxy.labels" -}} +k8s-app: {{ template "pushProxy.proxy.name" . }} +{{ template "pushProxy.commonLabels" . }} +{{- end -}} + +# ServiceMonitor + +{{- define "pushprox.serviceMonitor.name" -}} +{{- printf "%s-%s" .Release.Name (required ".Values.component is required" .Values.component) -}} +{{- end -}} + +{{- define "pushProxy.serviceMonitor.labels" -}} +app: {{ template "pushprox.serviceMonitor.name" . }} +{{ template "pushProxy.commonLabels" . }} +{{- end -}} + +{{- define "pushProxy.serviceMonitor.endpoints" -}} +{{- $proxyURL := (include "pushProxy.proxyUrl" .) -}} +{{- $useHTTPS := .Values.clients.https.enabled -}} +{{- $setHTTPSScheme := .Values.clients.https.forceHTTPSScheme -}} +{{- $insecureSkipVerify := .Values.clients.https.insecureSkipVerify -}} +{{- $useServiceAccountCredentials := .Values.clients.https.useServiceAccountCredentials -}} +{{- $serviceAccountTokenName := (include "pushProxy.client.serviceAccountTokenName" . ) -}} +{{- $metricRelabelings := list }} +{{- $endpoints := .Values.serviceMonitor.endpoints }} +{{- if .Values.proxy.enabled }} +{{- $_ := set . "proxyUrl" $proxyURL }} +{{- end }} +{{- range $endpoints }} +{{- if $.Values.proxy.enabled }} +{{- $_ := set . "proxyUrl" $proxyURL }} +{{- end }} +{{- $clusterIdRelabel := dict }} +{{- $metricRelabelings := list }} +{{- if $.Values.global.cattle.clusterId }} +{{- $_ := set $clusterIdRelabel "action" "replace" }} +{{- $_ := set $clusterIdRelabel "sourceLabels" (list "__address__") }} +{{- $_ := set $clusterIdRelabel "targetLabel" "cluster_id" }} +{{- $_ := set $clusterIdRelabel "replacement" $.Values.global.cattle.clusterId }} +{{- $metricRelabelings = append $metricRelabelings $clusterIdRelabel }} +{{- end }} +{{- $clusterNameRelabel := dict }} +{{- if $.Values.global.cattle.clusterName }} +{{- $_ := set $clusterNameRelabel "action" "replace" }} +{{- $_ := set $clusterNameRelabel "sourceLabels" (list "__address__") }} +{{- $_ := set $clusterNameRelabel "targetLabel" "cluster_name" }} +{{- $_ := set $clusterNameRelabel "replacement" $.Values.global.cattle.clusterName }} +{{- $metricRelabelings = append $metricRelabelings $clusterNameRelabel }} +{{- end }} +{{- if not (empty $metricRelabelings) }} +{{- $_ := set . "metricRelabelings" ($metricRelabelings)}} +{{- end }} +{{- if $setHTTPSScheme -}} +{{- $_ := set . "scheme" "https" }} +{{- end -}} +{{- if $useHTTPS -}} +{{- if (hasKey . "params") }} +{{- $_ := set (get . "params") "_scheme" (list "https") }} +{{- else }} +{{- $_ := set . "params" (dict "_scheme" (list "https")) }} +{{- end }} +{{- end }} +{{- if (hasKey . "tlsConfig") }} +{{- $_ := set (get . "tlsConfig") "insecureSkipVerify" $insecureSkipVerify }} +{{- else }} +{{- $_ := set . "tlsConfig" (dict "insecureSkipVerify" $insecureSkipVerify) }} +{{- end }} +{{- if $.Values.clients.https.authenticationMethod.bearerTokenFile.enabled }} +{{- $_ := set . "bearerTokenFile" $.Values.clients.https.authenticationMethod.bearerTokenFile.bearerTokenFilePath }} +{{- end }} +{{- if $.Values.clients.https.authenticationMethod.bearerTokenSecret.enabled }} +{{- $_ := set . "bearerTokenSecret" $serviceAccountTokenName }} +{{- end }} +{{- if $.Values.clients.https.authenticationMethod.authorization.enabled }} +{{- if (hasKey . "authorization") }} +{{- $_ := set (get . "authorization") "type" $.Values.clients.https.authenticationMethod.authorization.type }} +{{- $_ := set (get . "authorization") "credentials" (dict "name" $serviceAccountTokenName "key" $.Values.clients.https.authenticationMethod.authorization.credentials.key "optional" $.Values.clients.https.authenticationMethod.authorization.credentials.optional) }} +{{- else }} +{{- $_ := set . "authorization" (dict "type" $.Values.clients.https.authenticationMethod.authorization.type) }} +{{- $_ := set . "authorization" (dict "credentials" (dict "name" $serviceAccountTokenName "key" $.Values.clients.https.authenticationMethod.authorization.credentials.key "optional" $.Values.clients.https.authenticationMethod.authorization.credentials.optional)) }} +{{- end }} +{{- end }} +{{- end }} +{{- toYaml $endpoints }} +{{- end -}} diff --git a/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/rkeEtcd/templates/pushprox-clients-rbac.yaml b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/rkeEtcd/templates/pushprox-clients-rbac.yaml new file mode 100644 index 0000000000..a8e27c3735 --- /dev/null +++ b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/rkeEtcd/templates/pushprox-clients-rbac.yaml @@ -0,0 +1,97 @@ +{{- template "applyKubeVersionOverrides" . -}} +{{- if .Values.clients }}{{- if .Values.clients.enabled }} +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRole +metadata: + name: {{ template "pushProxy.client.name" . }} + labels: {{ include "pushProxy.client.labels" . | nindent 4 }} +rules: +{{- if .Values.global.cattle.psp.enabled }} +- apiGroups: ['policy'] + resources: ['podsecuritypolicies'] + verbs: ['use'] + resourceNames: + - {{ template "pushProxy.client.name" . }} +{{- end }} +{{- if and .Values.clients.https.enabled .Values.clients.https.useServiceAccountCredentials }} +- nonResourceURLs: ["/metrics"] + verbs: ["get"] +{{- if .Values.clients.rbac.additionalRules }} +{{ toYaml .Values.clients.rbac.additionalRules }} +{{- end }} +{{- end }} +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRoleBinding +metadata: + name: {{ template "pushProxy.client.name" . }} + labels: {{ include "pushProxy.client.labels" . | nindent 4 }} +roleRef: + apiGroup: rbac.authorization.k8s.io + kind: ClusterRole + name: {{ template "pushProxy.client.name" . }} +subjects: + - kind: ServiceAccount + name: {{ template "pushProxy.client.name" . }} + namespace: {{ include "pushprox.namespace" . }} +--- +apiVersion: v1 +kind: ServiceAccount +metadata: + name: {{ template "pushProxy.client.name" . }} + namespace: {{ include "pushprox.namespace" . }} + labels: {{ include "pushProxy.client.labels" . | nindent 4 }} +--- +{{- if .Values.clients.https.useServiceAccountCredentials }} +apiVersion: v1 +kind: Secret +type: kubernetes.io/service-account-token +metadata: + name: {{ template "pushProxy.client.serviceAccountTokenName" . }} + namespace: {{ include "pushprox.namespace" . }} + labels: {{ include "pushProxy.client.labels" . | nindent 4 }} + annotations: + kubernetes.io/service-account.name: {{ template "pushProxy.client.name" . }} +{{- end }} +--- +{{- if .Values.global.cattle.psp.enabled }} +apiVersion: policy/v1beta1 +kind: PodSecurityPolicy +metadata: + name: {{ template "pushProxy.client.name" . }} + namespace: {{ include "pushprox.namespace" . }} + labels: {{ include "pushProxy.client.labels" . | nindent 4 }} +spec: + privileged: false + hostNetwork: true + hostIPC: false + hostPID: false + runAsUser: + rule: 'RunAsAny' + seLinux: + rule: 'RunAsAny' +{{- if and .Values.clients.https.enabled .Values.clients.https.certDir .Values.global.seLinux.enabled .Values.clients.https.seLinuxOptions }} + seLinuxOptions: {{ .Values.clients.https.seLinuxOptions | toYaml | nindent 6 }} +{{- end }} + supplementalGroups: + rule: 'MustRunAs' + ranges: + - min: 0 + max: 65535 + fsGroup: + rule: 'MustRunAs' + ranges: + - min: 0 + max: 65535 + readOnlyRootFilesystem: false + volumes: + - 'secret' +{{- if and .Values.clients.https.enabled .Values.clients.https.certDir }} + - 'emptyDir' + - 'hostPath' + allowedHostPaths: + - pathPrefix: {{ required "Need access to volume on host with the SSL cert files to use HTTPs" .Values.clients.https.certDir }} + readOnly: true +{{- end }} +{{- end }} +{{- end }}{{- end }} diff --git a/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/rkeEtcd/templates/pushprox-clients.yaml b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/rkeEtcd/templates/pushprox-clients.yaml new file mode 100644 index 0000000000..e8fcfb3883 --- /dev/null +++ b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/rkeEtcd/templates/pushprox-clients.yaml @@ -0,0 +1,157 @@ +{{- template "applyKubeVersionOverrides" . -}} +{{- if .Values.clients }}{{- if .Values.clients.enabled }} +apiVersion: apps/v1 +{{- if .Values.clients.deployment.enabled }} +kind: Deployment +{{- else }} +kind: DaemonSet +{{- end }} +metadata: + name: {{ template "pushProxy.client.name" . }} + namespace: {{ template "pushprox.namespace" . }} + labels: {{ include "pushProxy.client.labels" . | nindent 4 }} + pushprox-exporter: "client" +spec: + {{- if .Values.clients.deployment.enabled }} + replicas: {{ .Values.clients.deployment.replicas }} + {{- end }} + selector: + matchLabels: {{ include "pushProxy.client.labels" . | nindent 6 }} + template: + metadata: + labels: {{ include "pushProxy.client.labels" . | nindent 8 }} + spec: + {{- if .Values.clients.affinity }} + affinity: {{ toYaml .Values.clients.affinity | nindent 8 }} + {{- end }} + nodeSelector: {{ include "linux-node-selector" . | nindent 8 }} +{{- if .Values.clients.nodeSelector }} +{{ toYaml .Values.clients.nodeSelector | indent 8 }} +{{- end }} + tolerations: {{ include "linux-node-tolerations" . | nindent 8 }} +{{- if .Values.clients.tolerations }} +{{ toYaml .Values.clients.tolerations | indent 8 }} +{{- end }} + hostNetwork: true + dnsPolicy: ClusterFirstWithHostNet + serviceAccountName: {{ template "pushProxy.client.name" . }} + {{- if .Values.global.imagePullSecretName }} + imagePullSecrets: + - name: {{ .Values.global.imagePullSecretName }} + {{- end }} + containers: + - name: pushprox-client + image: {{ template "system_default_registry" . }}{{ .Values.clients.image.repository }}:{{ .Values.clients.image.tag }} + command: + {{- range .Values.clients.command }} + - {{ . | quote }} + {{- end }} + args: + - --fqdn=$(HOST_IP) + - --proxy-url=$(PROXY_URL) + {{- if .Values.clients.metrics.enabled }} + - --metrics-addr=$(PORT) + {{- end }} + - --allow-port={{ required "Need .Values.metricsPort to configure client to be allowed to scrape metrics at port" .Values.metricsPort}} + {{- if .Values.clients.useLocalhost }} + - --use-localhost + {{- end }} + {{- if .Values.clients.https.enabled }} + {{- if .Values.clients.https.insecureSkipVerify }} + - --insecure-skip-verify + {{- end }} + {{- if .Values.clients.https.useServiceAccountCredentials }} + - --token-path=/var/run/secrets/kubernetes.io/serviceaccount/token + {{- end }} + {{- if .Values.clients.https.certDir }} + - --tls.cert=/etc/ssl/push-proxy/push-proxy.pem + - --tls.key=/etc/ssl/push-proxy/push-proxy-key.pem + - --tls.cacert=/etc/ssl/push-proxy/push-proxy-ca-cert.pem + {{- end }} + {{- end }} + env: + - name: HOST_IP + valueFrom: + fieldRef: + fieldPath: status.hostIP + {{- if .Values.clients.metrics.enabled }} + - name: PORT + value: :{{ .Values.clients.port }} + {{- end }} + - name: PROXY_URL + value: {{ template "pushProxy.proxyUrl" . }} + securityContext: + runAsNonRoot: true + runAsUser: 1000 + {{- if and .Values.clients.https.enabled .Values.clients.https.certDir }} + volumeMounts: + - name: metrics-cert-dir + mountPath: /etc/ssl/push-proxy + {{- end }} + {{- if .Values.clients.resources }} + resources: {{ toYaml .Values.clients.resources | nindent 10 }} + {{- end }} + {{- if and .Values.clients.https.enabled .Values.clients.https.certDir }} + initContainers: + - name: copy-certs + image: {{ template "system_default_registry" . }}{{ .Values.clients.copyCertsImage.repository }}:{{ .Values.clients.copyCertsImage.tag }} + command: + - sh + - -c + - | + echo "Searching for files to copy within the source volume" + echo "cert: ${CERT_FILE_NAME}" + echo "key: ${KEY_FILE_NAME}" + echo "cacert: ${CACERT_FILE_NAME}" + + CERT_FILE_SOURCE=$(find /etc/source/ -type f -name "${CERT_FILE_NAME}" | sort -r | head -n 1) + KEY_FILE_SOURCE=$(find /etc/source/ -type f -name "${KEY_FILE_NAME}" | sort -r | head -n 1) + CACERT_FILE_SOURCE=$(find /etc/source/ -type f -name "${CACERT_FILE_NAME}" | sort -r | head -n 1) + + test -z ${CERT_FILE_SOURCE} && echo "Failed to find cert file" && exit 1 + test -z ${KEY_FILE_SOURCE} && echo "Failed to find key file" && exit 1 + test -z ${CACERT_FILE_SOURCE} && echo "Failed to find cacert file" && exit 1 + + echo "Copying cert file from $CERT_FILE_SOURCE to $CERT_FILE_TARGET" + cp $CERT_FILE_SOURCE $CERT_FILE_TARGET || exit 1 + chmod 444 $CERT_FILE_TARGET || exit 1 + + echo "Copying key file from $KEY_FILE_SOURCE to $KEY_FILE_TARGET" + cp $KEY_FILE_SOURCE $KEY_FILE_TARGET || exit 1 + chmod 444 $KEY_FILE_TARGET || exit 1 + + echo "Copying cacert file from $CACERT_FILE_SOURCE to $CACERT_FILE_TARGET" + cp $CACERT_FILE_SOURCE $CACERT_FILE_TARGET || exit 1 + chmod 444 $CACERT_FILE_TARGET || exit 1 + env: + - name: CERT_FILE_NAME + value: {{ required "Need a TLS cert file for scraping metrics endpoint over HTTPs" .Values.clients.https.certFile }} + - name: KEY_FILE_NAME + value: {{ required "Need a TLS key file for scraping metrics endpoint over HTTPs" .Values.clients.https.keyFile }} + - name: CACERT_FILE_NAME + value: {{ required "Need a TLS CA cert file for scraping metrics endpoint over HTTPs" .Values.clients.https.caCertFile }} + - name: CERT_FILE_TARGET + value: /etc/ssl/push-proxy/push-proxy.pem + - name: KEY_FILE_TARGET + value: /etc/ssl/push-proxy/push-proxy-key.pem + - name: CACERT_FILE_TARGET + value: /etc/ssl/push-proxy/push-proxy-ca-cert.pem + securityContext: + runAsNonRoot: false +{{- if and .Values.global.seLinux.enabled .Values.clients.https.seLinuxOptions }} + seLinuxOptions: {{ .Values.clients.https.seLinuxOptions | toYaml | nindent 12 }} +{{- end }} + volumeMounts: + - name: metrics-cert-dir-source + mountPath: /etc/source + readOnly: true + - name: metrics-cert-dir + mountPath: /etc/ssl/push-proxy + volumes: + - name: metrics-cert-dir-source + hostPath: + path: {{ required "Need access to volume on host with the SSL cert files to use HTTPs" .Values.clients.https.certDir }} + - name: metrics-cert-dir + emptyDir: {} + {{- end }} +{{- end }}{{- end }} diff --git a/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/rkeEtcd/templates/pushprox-proxy-rbac.yaml b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/rkeEtcd/templates/pushprox-proxy-rbac.yaml new file mode 100644 index 0000000000..eefe609058 --- /dev/null +++ b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/rkeEtcd/templates/pushprox-proxy-rbac.yaml @@ -0,0 +1,68 @@ +{{- template "applyKubeVersionOverrides" . -}} +{{- if and .Values.proxy }}{{ if .Values.proxy.enabled }} +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRole +metadata: + name: {{ template "pushProxy.proxy.name" . }} + labels: {{ include "pushProxy.proxy.labels" . | nindent 4 }} +rules: +{{- if .Values.global.cattle.psp.enabled }} +- apiGroups: ['policy'] + resources: ['podsecuritypolicies'] + verbs: ['use'] + resourceNames: + - {{ template "pushProxy.proxy.name" . }} +{{- end }} +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRoleBinding +metadata: + name: {{ template "pushProxy.proxy.name" . }} + labels: {{ include "pushProxy.proxy.labels" . | nindent 4 }} +roleRef: + apiGroup: rbac.authorization.k8s.io + kind: ClusterRole + name: {{ template "pushProxy.proxy.name" . }} +subjects: + - kind: ServiceAccount + name: {{ template "pushProxy.proxy.name" . }} + namespace: {{ include "pushprox.namespace" . }} +--- +apiVersion: v1 +kind: ServiceAccount +metadata: + name: {{ template "pushProxy.proxy.name" . }} + namespace: {{ include "pushprox.namespace" . }} + labels: {{ include "pushProxy.proxy.labels" . | nindent 4 }} +--- +{{- if .Values.global.cattle.psp.enabled }} +apiVersion: policy/v1beta1 +kind: PodSecurityPolicy +metadata: + name: {{ template "pushProxy.proxy.name" . }} + namespace: {{ include "pushprox.namespace" . }} + labels: {{ include "pushProxy.proxy.labels" . | nindent 4 }} +spec: + privileged: false + hostNetwork: false + hostIPC: false + hostPID: false + runAsUser: + rule: 'MustRunAsNonRoot' + seLinux: + rule: 'RunAsAny' + supplementalGroups: + rule: 'MustRunAs' + ranges: + - min: 1 + max: 65535 + fsGroup: + rule: 'MustRunAs' + ranges: + - min: 1 + max: 65535 + readOnlyRootFilesystem: false + volumes: + - 'secret' +{{- end }}{{- end }} +{{- end }} diff --git a/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/rkeEtcd/templates/pushprox-proxy.yaml b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/rkeEtcd/templates/pushprox-proxy.yaml new file mode 100644 index 0000000000..723bbd6c00 --- /dev/null +++ b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/rkeEtcd/templates/pushprox-proxy.yaml @@ -0,0 +1,57 @@ +{{- template "applyKubeVersionOverrides" . -}} +{{- if and .Values.proxy }}{{ if .Values.proxy.enabled }} +apiVersion: apps/v1 +kind: Deployment +metadata: + name: {{ template "pushProxy.proxy.name" . }} + namespace: {{ template "pushprox.namespace" . }} + labels: {{ include "pushProxy.proxy.labels" . | nindent 4 }} + pushprox-exporter: "proxy" +spec: + selector: + matchLabels: {{ include "pushProxy.proxy.labels" . | nindent 6 }} + template: + metadata: + labels: {{ include "pushProxy.proxy.labels" . | nindent 8 }} + spec: + securityContext: + runAsNonRoot: true + runAsUser: 1000 + nodeSelector: {{ include "linux-node-selector" . | nindent 8 }} +{{- if .Values.proxy.nodeSelector }} +{{ toYaml .Values.proxy.nodeSelector | indent 8 }} +{{- end }} + tolerations: {{ include "linux-node-tolerations" . | nindent 8 }} +{{- if .Values.proxy.tolerations }} +{{ toYaml .Values.proxy.tolerations | indent 8 }} +{{- end }} + serviceAccountName: {{ template "pushProxy.proxy.name" . }} + {{- if .Values.global.imagePullSecretName }} + imagePullSecrets: + - name: {{ .Values.global.imagePullSecretName }} + {{- end }} + containers: + - name: pushprox-proxy + image: {{ template "system_default_registry" . }}{{ .Values.proxy.image.repository }}:{{ .Values.proxy.image.tag }} + command: + {{- range .Values.proxy.command }} + - {{ . | quote }} + {{- end }} + {{- if .Values.proxy.resources }} + resources: {{ toYaml .Values.proxy.resources | nindent 10 }} + {{- end }} +--- +apiVersion: v1 +kind: Service +metadata: + name: {{ template "pushProxy.proxy.name" . }} + namespace: {{ template "pushprox.namespace" . }} + labels: {{ include "pushProxy.proxy.labels" . | nindent 4 }} +spec: + ports: + - name: pp-proxy + port: {{ required "Need .Values.proxy.port to configure proxy" .Values.proxy.port }} + protocol: TCP + targetPort: {{ .Values.proxy.port }} + selector: {{ include "pushProxy.proxy.labels" . | nindent 4 }} +{{- end }}{{- end }} diff --git a/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/rkeEtcd/templates/pushprox-servicemonitor.yaml b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/rkeEtcd/templates/pushprox-servicemonitor.yaml new file mode 100644 index 0000000000..67eb2216b5 --- /dev/null +++ b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/rkeEtcd/templates/pushprox-servicemonitor.yaml @@ -0,0 +1,45 @@ +{{- template "applyKubeVersionOverrides" . -}} +{{- if .Values.serviceMonitor }}{{- if .Values.serviceMonitor.enabled }} +apiVersion: monitoring.coreos.com/v1 +kind: ServiceMonitor +metadata: + name: {{ template "pushprox.serviceMonitor.name" . }} + namespace: {{ template "pushprox.namespace" . }} + labels: {{ include "pushProxy.serviceMonitor.labels" . | nindent 4 }} +spec: + endpoints: {{include "pushProxy.serviceMonitor.endpoints" . | nindent 4 }} + jobLabel: component + podTargetLabels: + - component + - pushprox-exporter + namespaceSelector: + matchNames: + - {{ template "pushprox.namespace" . }} + selector: + matchLabels: {{ include "pushProxy.client.labels" . | nindent 6 }} +--- +{{- $selector := "" }} +{{- if not (kindIs "invalid" .Values.service) }} +{{- if not (kindIs "invalid" .Values.service.selector) }} +{{ if .Values.service.selector }} +{{- if .Values.clients.enabled }} +{{- required (printf "Cannot override .Values.service.selector=%s when .Values.clients.enabled=true" (toJson .Values.service.selector)) "" }} +{{- end }} +{{- $selector = (toYaml .Values.service.selector) }} +{{- end }} +{{- end }} +{{- end }} +apiVersion: v1 +kind: Service +metadata: + name: {{ template "pushProxy.client.name" . }} + namespace: {{ template "pushprox.namespace" . }} + labels: {{ include "pushProxy.client.labels" . | nindent 4 }} +spec: + ports: + - name: metrics + port: {{ required "Need .Values.metricsPort to configure client to listen to metrics at port" .Values.metricsPort}} + protocol: TCP + targetPort: {{ .Values.metricsPort }} + selector: {{ default (include "pushProxy.client.labels" .) $selector | nindent 4 }} +{{- end }}{{- end }} diff --git a/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/rkeEtcd/templates/validate-install-crd.yaml b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/rkeEtcd/templates/validate-install-crd.yaml new file mode 100644 index 0000000000..16abc2fa83 --- /dev/null +++ b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/rkeEtcd/templates/validate-install-crd.yaml @@ -0,0 +1,14 @@ +#{{- if gt (len (lookup "rbac.authorization.k8s.io/v1" "ClusterRole" "" "")) 0 -}} +# {{- $found := dict -}} +# {{- set $found "monitoring.coreos.com/v1/ServiceMonitor" false -}} +# {{- range .Capabilities.APIVersions -}} +# {{- if hasKey $found (toString .) -}} +# {{- set $found (toString .) true -}} +# {{- end -}} +# {{- end -}} +# {{- range $_, $exists := $found -}} +# {{- if (eq $exists false) -}} +# {{- required "Required CRDs are missing. Please install Prometheus Operator CRDs before installing this chart." "" -}} +# {{- end -}} +# {{- end -}} +#{{- end -}} diff --git a/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/rkeEtcd/templates/validate-psp-install.yaml b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/rkeEtcd/templates/validate-psp-install.yaml new file mode 100644 index 0000000000..a30c59d3b7 --- /dev/null +++ b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/rkeEtcd/templates/validate-psp-install.yaml @@ -0,0 +1,7 @@ +#{{- if gt (len (lookup "rbac.authorization.k8s.io/v1" "ClusterRole" "" "")) 0 -}} +#{{- if .Values.global.cattle.psp.enabled }} +#{{- if not (.Capabilities.APIVersions.Has "policy/v1beta1/PodSecurityPolicy") }} +#{{- fail "The target cluster does not have the PodSecurityPolicy API resource. Please disable PSPs in this chart before proceeding." -}} +#{{- end }} +#{{- end }} +#{{- end }} diff --git a/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/rkeEtcd/values.yaml b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/rkeEtcd/values.yaml new file mode 100644 index 0000000000..1e076041b3 --- /dev/null +++ b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/rkeEtcd/values.yaml @@ -0,0 +1,166 @@ +# Default values for rancher-pushprox. +# This is a YAML-formatted file. +# Declare variables to be passed into your templates. + +# Default image containing both the proxy and the client was generated from the following Dockerfile +# https://github.com/prometheus-community/PushProx/blob/eeadbe766641699129920ccfaaaa30a85c67fe81/Dockerfile#L1-L15 + +# Configuration + +global: + cattle: + psp: + enabled: false + systemDefaultRegistry: "" + seLinux: + enabled: false + +# A list of Semver constraint strings (defined by https://github.com/Masterminds/semver) and values.yaml overrides. +# +# For each key in kubeVersionOverrides, this chart will check to see if the current Kubernetes cluster's version matches +# any of the semver constraints provided as keys on the map. +# +# On seeing a match, the default value for each values.yaml field overridden will be updated with the new value. +# +# If multiple matches are encountered (due to overlapping semver ranges), the matches will be applied in order. +# +# Notes: +# - On running a helm template, Helm generally assumes the kubeVersion is v1.20.0 +# - On running a helm install --dry-run, the correct kubeVersion should be chosen. +kubeVersionOverrides: [] +# - constraint: "< 1.21" +# values: +# metricsPort: 10252 +# clients: +# https: +# enabled: false +# insecureSkipVerify: false +# useServiceAccountCredentials: false + +namespaceOverride: "" + +# The component that is being monitored (i.e. etcd) +component: "component" + +# The port containing the metrics that need to be scraped +metricsPort: 2739 + +# Configure ServiceMonitor that monitors metrics from the metricsPort endpoint +serviceMonitor: + enabled: true + # A list of endpoints that will be added to the ServiceMonitor based on the Endpoint spec + # Source: https://github.com/prometheus-operator/prometheus-operator/blob/master/Documentation/api.md#endpoint + # By default, proxyUrl and params._scheme will be overridden based on other values + endpoints: + - port: metrics + +# Configure Service that grabs scrape targets +service: + # The selector that is used to populate the Service's Endpoints object. + # The chart will error out on rendering templating if .Values.clients.enabled is set alongside this field, + # since it is expected that this service should point to the PushProx Clients Daemonset / Deployment + selector: {} + +clients: + enabled: true + # The port which the PushProx client will post PushProx metrics to + port: 9369 + # If unset, this will default to the URL for the proxy service: http://pushprox-{{component}}-proxy.{{namepsace}}.svc.cluster.local:{{proxy.port}} + # Should be modified if the clients are being deployed outside the cluster where the proxy rests, otherwise leave it null + proxyUrl: "" + # If set to true, the client will forward any requests from the host IP to 127.0.0.1 + # It will only allow proxy requests to the metricsPort specified + useLocalhost: false + # Configuration for accessing metrics via HTTPS + https: + # Does the client require https to access the metrics? + enabled: false + # Does the client require requests be sent to http or https? + forceHTTPSScheme: false + # If set to true, the client will create a service account with adequate permissions and set a flag + # on the client to use the service account token provided by it to make authorized scrape requests + useServiceAccountCredentials: false + # Configuration for authentication to metrics via https endpoint + authenticationMethod: + # Reads token from defined file in container + # This function is deprecated in the prometheus operator api and may be removed in a future version + bearerTokenFile: + enabled: false + bearerTokenFilePath: "/var/run/secrets/kubernetes.io/serviceaccount/token" + # Reads token from defined secret in namespace + # This function is deprecated in the prometheus operator api and may be removed in a future version + bearerTokenSecret: + enabled: false + # Reads token from defined secret in namespace + authorization: + enabled: false + type: "bearer" + credentials: + key: "token" + optional: false + # If set to true, the client will disable SSL security checks + insecureSkipVerify: false + # Directory on host where necessary TLS cert and key to scrape metrics can be found + certDir: "" + # Filenames for files located in .Values.clients.https.certDir that correspond to TLS settings + certFile: "" + keyFile: "" + caCertFile: "" + # seLinuxOptions to be passed into the container that copies certs. Should define a container with permissions to read the files in the certDir provided on the host. + # Required and only used if `clients.https.enabled` is set and `clients.https.certDir` is provided. + seLinuxOptions: {} + + metrics: + # Whether the client should publish PushProx client-specific metrics to .Values.clients.port + enabled: false + + rbac: + # Additional permissions to provide to the ServiceAccount bound to the client + # This can be used to provide additional permissions for the client to scrape metrics from the k8s API + # Only enabled if clients.https.enabled and clients.https.useServiceAccountCredentials are true + additionalRules: [] + + # Resource limits + resources: {} + + # Options to select all nodes to deploy client DaemonSet on + nodeSelector: {} + tolerations: [] + affinity: {} + + image: + repository: rancher/pushprox-client + tag: v0.1.3-rancher2-client + command: ["pushprox-client"] + + copyCertsImage: + repository: rancher/mirrored-library-busybox + tag: 1.31.1 + + # The default intention of rancher-pushprox clients is to scrape hostNetwork metrics across all nodes. + # This can be used to scrape internal Kubernetes components or DaemonSets of hostNetwork Pods in + # situations where a cloud provider firewall prevents Pod-To-Host communication but not Pod-To-Pod. + # However, if the underlying hostNetwork Pod that is being scraped is managed by a Deployment, + # this advanced option enables users to deploy the client as a Deployment instead of a DaemonSet. + # If a user deploys this feature and the underlying Deployment's number of replicas changes, the user will + # be responsible for upgrading this chart accordingly to the right number of replicas. + deployment: + enabled: false + replicas: 0 + +proxy: + enabled: true + # The port through which PushProx clients will communicate to the proxy + port: 8080 + + # Resource limits + resources: {} + + # Options to select a node to run a single proxy deployment on + nodeSelector: {} + tolerations: [] + + image: + repository: rancher/pushprox-proxy + tag: v0.1.3-rancher2-proxy + command: ["pushprox-proxy"] diff --git a/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/rkeIngressNginx/.helmignore b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/rkeIngressNginx/.helmignore new file mode 100644 index 0000000000..0e8a0eb36f --- /dev/null +++ b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/rkeIngressNginx/.helmignore @@ -0,0 +1,23 @@ +# Patterns to ignore when building packages. +# This supports shell glob matching, relative path matching, and +# negation (prefixed with !). Only one pattern per line. +.DS_Store +# Common VCS dirs +.git/ +.gitignore +.bzr/ +.bzrignore +.hg/ +.hgignore +.svn/ +# Common backup files +*.swp +*.bak +*.tmp +*.orig +*~ +# Various IDEs +.project +.idea/ +*.tmproj +.vscode/ diff --git a/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/rkeIngressNginx/Chart.yaml b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/rkeIngressNginx/Chart.yaml new file mode 100644 index 0000000000..bc49bcefd2 --- /dev/null +++ b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/rkeIngressNginx/Chart.yaml @@ -0,0 +1,15 @@ +annotations: + catalog.cattle.io/hidden: "true" + catalog.cattle.io/kube-version: '>= 1.26.0-0 < 1.31.0-0' + catalog.cattle.io/os: linux + catalog.rancher.io/certified: rancher + catalog.rancher.io/namespace: cattle-monitoring-system + catalog.rancher.io/release-name: rancher-pushprox +apiVersion: v1 +appVersion: 0.1.0 +description: Sets up a deployment of the PushProx proxy and a DaemonSet of PushProx + clients. +kubeVersion: '>=1.26.0-0' +name: rkeIngressNginx +type: application +version: 0.2.0 diff --git a/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/rkeIngressNginx/README.md b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/rkeIngressNginx/README.md new file mode 100644 index 0000000000..345002f48a --- /dev/null +++ b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/rkeIngressNginx/README.md @@ -0,0 +1,90 @@ +# rancher-pushprox + +A Rancher chart based on Rancher [PushProx](https://github.com/rancher/PushProx) that sets up a Deployment of a PushProx proxy and a DaemonSet of PushProx clients on a Kubernetes cluster. + +Installs [rancher-pushprox](https://github.com/rancher/charts/tree/gh-pages/packages/rancher-pushprox) to create PushProx clients that can access their host's network and register with a PushProx proxy. A [Prometheus Operator](https://github.com/coreos/prometheus-operator) ServiceMonitor CR is also included that is configured to scrape the metrics from each of the clients through the proxy. + +Using an instance of this chart is suitable for the following scenarios: +- You need to scrape metrics from a port that should not be accessible outside of the host (e.g. scraping `etcd` metrics in a hardened cluster) +- You need to scrape metrics on a host that are not exposed outside of 127.0.0.1 (e.g. scraping `kube-proxy` metrics) +- You need to scrape metrics through HTTPS using certs hosted directly on `hostPath` +- You need to scrape metrics from Kubernetes components that require authorization via a service account (e.g. permissions to make request to `/metrics`) +- You need to scrape metrics without access to cacerts (i.e. enable `insecureSkipVerify`) + +The clients and proxy are created based on a Rancher fork of the [prometheus-community/PushProx](https://github.com/prometheus-community/PushProx) project. + +## Upgrading to Kubernetes v1.25+ + +Starting in Kubernetes v1.25, [Pod Security Policies](https://kubernetes.io/docs/concepts/security/pod-security-policy/) have been removed from the Kubernetes API. + +As a result, **before upgrading to Kubernetes v1.25** (or on a fresh install in a Kubernetes v1.25+ cluster), users are expected to perform an in-place upgrade of this chart with `global.cattle.psp.enabled` set to `false` if it has been previously set to `true`. +​ +> **Note:** +> In this chart release, any previous field that was associated with any PSP resources have been removed in favor of a single global field: `global.cattle.psp.enabled`. + +> **Note:** +> If you upgrade your cluster to Kubernetes v1.25+ before removing PSPs via a `helm upgrade` (even if you manually clean up resources), **it will leave the Helm release in a broken state within the cluster such that further Helm operations will not work (`helm uninstall`, `helm upgrade`, etc.).** +> +> If your charts get stuck in this state, please consult the Rancher docs on how to clean up your Helm release secrets. + +Upon setting `global.cattle.psp.enabled` to false, the chart will remove any PSP resources deployed on its behalf from the cluster. This is the default setting for this chart. + +As a replacement for PSPs, [Pod Security Admission](https://kubernetes.io/docs/concepts/security/pod-security-admission/) should be used. Please consult the Rancher docs for more details on how to configure your chart release namespaces to work with the new Pod Security Admission and apply Pod Security Standards. + +## Configuration + +The following tables list the configurable parameters of the rancher-pushprox chart and their default values. + +### General + +#### Required +| Parameter | Description | Example | +| ----- | ----------- | ------ | +| `component` | The component that is being monitored | `kube-etcd` +| `metricsPort` | The port on the host that contains the metrics you want to scrape (e.g. `http://:/metrics`) | `2379` | +| `namespaceOverride` | The namespace to install the chart | `""` + +#### Optional +| Parameter | Description | Default | +| ----- | ----------- | ------ | +| `serviceMonitor.enabled` | Deploys a [Prometheus Operator](https://github.com/coreos/prometheus-operator/blob/master/Documentation/api.md#servicemonitor) ServiceMonitor CR that is configured to scrape metrics on the hosts that the clients are deployed on via the proxy. Also deploys a Service that points to all pods with the expected client name that exposes the `metricsPort` selected | `true` | +| `serviceMonitor.endpoints` | A list of endpoints that will be added to the ServiceMonitor based on the [Endpoint spec](https://github.com/prometheus-operator/prometheus-operator/blob/master/Documentation/api.md#endpoint) | `[{port: metrics}]` | +| `service.selector` | The selector that is used to populate the Service's Endpoints object. The chart will error out on rendering templating if `.Values.clients.enabled` is set alongside this field, since it is expected that this service should point to the PushProx Clients Daemonset / Deployment | `{}` | +| `clients.enabled` | Deploys a DaemonSet of clients that are each capable of scraping endpoints on the hostNetwork it is deployed on | `true` | +| `clients.port` | The port where the client will publish PushProx client-specific metrics. If deploying multiple clients onto the same node, the clients should not have conflicting ports | `9369` | +| `clients.proxyUrl` | Overrides the default proxyUrl setting of `http://pushprox-{{ .Values.component }}-proxy.{{ . Release.Namespace }}.svc.cluster.local:{{ .Values.proxy.port }}"` with the `proxyUrl` specified | `""` | +| `clients.useLocalhost` | Sets a flag on each client deployment to redirect scrapes directed to `HOST_IP` to `127.0.0.1` | `false` | +| `clients.https.enabled` | Enables scraping metrics via HTTPS using the provided TLS certs that exist on each host | `false` | +| `clients.https.forceHTTPSScheme` | Forces scraping metrics via HTTPS using the provided TLS certs that exist on each host | `false` | +| `clients.https.useServiceAccountCredentials` | If set to true, the client will create a service account with permissions to scrape `/metrics` endpoint of Kubernetes components. The client will use the service account token provided to make authorized scrape requests to the Kubernetes API | `false` | +| `clients.https.authenticationMethod.bearerTokenFile.enabled` | If set to true, the client will use service account credentials mounted at the configured path `clients.https.authenticationMethod.bearerTokenFile.bearerTokenFilePath`. This requires permissions to scrape `/metrics` endpoint of Kubernetes components. This method is deprecated by the prometheus operator and may be removed in a future release | `false` | +| `clients.https.authenticationMethod.bearerTokenFile.bearerTokenFilePath` | This is a volume mount on the pod with permissions to scrape `/metrics` endpoint of Kubernetes components | `"/var/run/secrets/kubernetes.io/serviceaccount/token"` | +| `clients.https.authenticationMethod.bearerTokenSecret.enabled` | If set to true, the client will use service account credentials to scrape `/metrics` endpoint of Kubernetes components. This method is deprecated by the prometheus operator and may be removed in a future release | `false` | +| `clients.https.authenticationMethod.authorization.enabled` | If set to true, the client will use service account credentials to scrape `/metrics` endpoint of Kubernetes components | `false` | +| `clients.https.authenticationMethod.authorization.type` | If set, the client will use this type of authorization in its client requests for metrics | `"bearer"` | +| `clients.https.authenticationMethod.authorization.credentials.key` | If set, the client will use this key in the secret created by `clients.https.useServiceAccountCredentials` for authorization in its client requests for metrics | `"token"` | +| `clients.https.authenticationMethod.authorization.credentials.optional` | If set to false, the client will fail if the key in the secret created by `clients.https.useServiceAccountCredentials` does not exist | `false` | +| `clients.https.insecureSkipVerify` | If set to true, the client will disable SSL security checks | `false` | +| `clients.https.certDir` | A `hostPath` where TLS certs can be found. This path is mounted as a volume on an `initContainer` which copies only the necessary files over to an EmptyDir volume used by each client. Required and only used if `clients.https.enabled` is set | `""` | +| `clients.https.certFile` | The path to the TLS cert file located within `clients.https.certDir`. Required and only used if `clients.https.enabled` is set | `""` | +| `clients.https.keyFile` | The path to the TLS key file located within `clients.https.certDir`. Required and only used if `clients.https.enabled` is set | `""` | +| `clients.https.caCertFile` | The path to the TLS cacert file located within `clients.https.certDir`. Required and only used if `clients.https.enabled` is set | `""` | +| `clients.https.seLinuxOptions` | seLinuxOptions to be passed into the container that copies certs. Should define a container with permissions to read the files in the certDir provided on the host. Required and only used if `clients.https.enabled` is set and `clients.https.certDir` is provided. | `""` | +| `clients.metrics.enabled` | Whether the client should publish PushProx client-specific metrics. | `false` | +| `clients.rbac.additionalRules` | Additional permissions to provide to the ServiceAccount bound to the client. This can be used to provide additional permissions for the client to scrape metrics from the k8s API. Only enabled if clients.https.enabled and clients.https.useServiceAccountCredentials are true | `[]` | +| `clients.deployment.enabled` | Deploys the client as a Deployment (generally used if the underlying hostNetwork Pod that is being scraped is managed by a Deployment) | `false` | +| `clients.deployment.replicas` | The number of pods the Deployment has, it should match the number of pod the hostNetwork Deployment has. Required and only used if `client.deployment.enable` is set | `0` | +| `clients.deployment.affinity` | The affinity rules that allocate the pod to the node in which the hostNetwork Deployment's pods run. Required and only used if `client.deployment.enable` is set | `{}` | +| `clients.resources` | Set resource limits and requests for the client container | `{}` | +| `clients.nodeSelector` | Select which nodes to deploy the clients on | `{}` | +| `clients.tolerations` | Specify tolerations for clients | `[]` | +| `proxy.enabled` | Deploys the proxy that each client will register with | `true` | +| `proxy.port` | The port exposed by the proxy that each client will register with to allow metrics to be scraped from the host | `8080` | +| `proxy.resources` | Set resource limits and requests for the proxy container | `{}` | +| `proxy.nodeSelector` | Select which nodes the proxy can be deployed on | `{}` | +| `proxy.tolerations` | Specify tolerations (if necessary) to allow the proxy to be deployed on the selected node | `[]` | +| `kubeVersionOverrides` | A list of Semver constraint strings (defined by https://github.com/Masterminds/semver) and values.yaml overrides. For each key in kubeVersionOverrides, this chart will check to see if the current Kubernetes cluster's version matches any of the semver constraints provided as keys on the map. On seeing a match, the default value for each values.yaml field overridden will be updated with the new value. If multiple matches are encountered (due to overlapping semver ranges), the matches will be applied in order. | `[]` + +*Tip: The filepaths set in `clients.https.File` can include wildcard characters*. + +See [rancher-monitoring](https://github.com/rancher/charts/tree/gh-pages/packages/rancher-monitoring) for examples of how this chart can be used. diff --git a/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/rkeIngressNginx/templates/_helpers.tpl b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/rkeIngressNginx/templates/_helpers.tpl new file mode 100644 index 0000000000..1ba5093944 --- /dev/null +++ b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/rkeIngressNginx/templates/_helpers.tpl @@ -0,0 +1,170 @@ +# Rancher + +{{- define "system_default_registry" -}} +{{- if .Values.global.cattle.systemDefaultRegistry -}} +{{- printf "%s/" .Values.global.cattle.systemDefaultRegistry -}} +{{- end -}} +{{- end -}} + +# Windows Support + +{{/* +Windows cluster will add default taint for linux nodes, +add below linux tolerations to workloads could be scheduled to those linux nodes +*/}} + +{{- define "linux-node-tolerations" -}} +- key: "cattle.io/os" + value: "linux" + effect: "NoSchedule" + operator: "Equal" +{{- end -}} + +{{- define "linux-node-selector" -}} +{{- if semverCompare "<1.14-0" .Capabilities.KubeVersion.GitVersion -}} +beta.kubernetes.io/os: linux +{{- else -}} +kubernetes.io/os: linux +{{- end -}} +{{- end -}} + +# General + +{{- define "applyKubeVersionOverrides" -}} +{{- $overrides := dict -}} +{{- range $override := .Values.kubeVersionOverrides -}} +{{- if semverCompare $override.constraint $.Capabilities.KubeVersion.Version -}} +{{- $_ := mergeOverwrite $overrides $override.values -}} +{{- end -}} +{{- end -}} +{{- $_ := mergeOverwrite .Values $overrides -}} +{{- end -}} + +{{- define "pushprox.namespace" -}} + {{- if .Values.namespaceOverride -}} + {{- .Values.namespaceOverride -}} + {{- else -}} + {{- .Release.Namespace -}} + {{- end -}} +{{- end -}} + +{{- define "pushProxy.commonLabels" -}} +release: {{ .Release.Name }} +component: {{ .Values.component | quote }} +provider: kubernetes +{{- end -}} + +{{- define "pushProxy.proxyUrl" -}} +{{- $_ := (required "Template requires either .Values.proxy.port or .Values.client.proxyUrl to set proxyUrl for client" (or .Values.clients.proxyUrl .Values.proxy.port)) -}} +{{- if .Values.clients.proxyUrl -}} +{{ printf "%s" .Values.clients.proxyUrl }} +{{- else -}} +{{ printf "http://%s.%s.svc:%d" (include "pushProxy.proxy.name" .) (include "pushprox.namespace" .) (int .Values.proxy.port) }} +{{- end -}}{{- end -}} + +# Client + +{{- define "pushProxy.client.name" -}} +{{- printf "pushprox-%s-client" (required ".Values.component is required" .Values.component) -}} +{{- end -}} + +{{- define "pushProxy.client.serviceAccountTokenName" -}} +{{- printf "pushprox-%s-client-service-account-token" (required ".Values.component is required" .Values.component) -}} +{{- end -}} + +{{- define "pushProxy.client.labels" -}} +k8s-app: {{ template "pushProxy.client.name" . }} +{{ template "pushProxy.commonLabels" . }} +{{- end -}} + +# Proxy + +{{- define "pushProxy.proxy.name" -}} +{{- printf "pushprox-%s-proxy" (required ".Values.component is required" .Values.component) -}} +{{- end -}} + +{{- define "pushProxy.proxy.labels" -}} +k8s-app: {{ template "pushProxy.proxy.name" . }} +{{ template "pushProxy.commonLabels" . }} +{{- end -}} + +# ServiceMonitor + +{{- define "pushprox.serviceMonitor.name" -}} +{{- printf "%s-%s" .Release.Name (required ".Values.component is required" .Values.component) -}} +{{- end -}} + +{{- define "pushProxy.serviceMonitor.labels" -}} +app: {{ template "pushprox.serviceMonitor.name" . }} +{{ template "pushProxy.commonLabels" . }} +{{- end -}} + +{{- define "pushProxy.serviceMonitor.endpoints" -}} +{{- $proxyURL := (include "pushProxy.proxyUrl" .) -}} +{{- $useHTTPS := .Values.clients.https.enabled -}} +{{- $setHTTPSScheme := .Values.clients.https.forceHTTPSScheme -}} +{{- $insecureSkipVerify := .Values.clients.https.insecureSkipVerify -}} +{{- $useServiceAccountCredentials := .Values.clients.https.useServiceAccountCredentials -}} +{{- $serviceAccountTokenName := (include "pushProxy.client.serviceAccountTokenName" . ) -}} +{{- $metricRelabelings := list }} +{{- $endpoints := .Values.serviceMonitor.endpoints }} +{{- if .Values.proxy.enabled }} +{{- $_ := set . "proxyUrl" $proxyURL }} +{{- end }} +{{- range $endpoints }} +{{- if $.Values.proxy.enabled }} +{{- $_ := set . "proxyUrl" $proxyURL }} +{{- end }} +{{- $clusterIdRelabel := dict }} +{{- $metricRelabelings := list }} +{{- if $.Values.global.cattle.clusterId }} +{{- $_ := set $clusterIdRelabel "action" "replace" }} +{{- $_ := set $clusterIdRelabel "sourceLabels" (list "__address__") }} +{{- $_ := set $clusterIdRelabel "targetLabel" "cluster_id" }} +{{- $_ := set $clusterIdRelabel "replacement" $.Values.global.cattle.clusterId }} +{{- $metricRelabelings = append $metricRelabelings $clusterIdRelabel }} +{{- end }} +{{- $clusterNameRelabel := dict }} +{{- if $.Values.global.cattle.clusterName }} +{{- $_ := set $clusterNameRelabel "action" "replace" }} +{{- $_ := set $clusterNameRelabel "sourceLabels" (list "__address__") }} +{{- $_ := set $clusterNameRelabel "targetLabel" "cluster_name" }} +{{- $_ := set $clusterNameRelabel "replacement" $.Values.global.cattle.clusterName }} +{{- $metricRelabelings = append $metricRelabelings $clusterNameRelabel }} +{{- end }} +{{- if not (empty $metricRelabelings) }} +{{- $_ := set . "metricRelabelings" ($metricRelabelings)}} +{{- end }} +{{- if $setHTTPSScheme -}} +{{- $_ := set . "scheme" "https" }} +{{- end -}} +{{- if $useHTTPS -}} +{{- if (hasKey . "params") }} +{{- $_ := set (get . "params") "_scheme" (list "https") }} +{{- else }} +{{- $_ := set . "params" (dict "_scheme" (list "https")) }} +{{- end }} +{{- end }} +{{- if (hasKey . "tlsConfig") }} +{{- $_ := set (get . "tlsConfig") "insecureSkipVerify" $insecureSkipVerify }} +{{- else }} +{{- $_ := set . "tlsConfig" (dict "insecureSkipVerify" $insecureSkipVerify) }} +{{- end }} +{{- if $.Values.clients.https.authenticationMethod.bearerTokenFile.enabled }} +{{- $_ := set . "bearerTokenFile" $.Values.clients.https.authenticationMethod.bearerTokenFile.bearerTokenFilePath }} +{{- end }} +{{- if $.Values.clients.https.authenticationMethod.bearerTokenSecret.enabled }} +{{- $_ := set . "bearerTokenSecret" $serviceAccountTokenName }} +{{- end }} +{{- if $.Values.clients.https.authenticationMethod.authorization.enabled }} +{{- if (hasKey . "authorization") }} +{{- $_ := set (get . "authorization") "type" $.Values.clients.https.authenticationMethod.authorization.type }} +{{- $_ := set (get . "authorization") "credentials" (dict "name" $serviceAccountTokenName "key" $.Values.clients.https.authenticationMethod.authorization.credentials.key "optional" $.Values.clients.https.authenticationMethod.authorization.credentials.optional) }} +{{- else }} +{{- $_ := set . "authorization" (dict "type" $.Values.clients.https.authenticationMethod.authorization.type) }} +{{- $_ := set . "authorization" (dict "credentials" (dict "name" $serviceAccountTokenName "key" $.Values.clients.https.authenticationMethod.authorization.credentials.key "optional" $.Values.clients.https.authenticationMethod.authorization.credentials.optional)) }} +{{- end }} +{{- end }} +{{- end }} +{{- toYaml $endpoints }} +{{- end -}} diff --git a/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/rkeIngressNginx/templates/pushprox-clients-rbac.yaml b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/rkeIngressNginx/templates/pushprox-clients-rbac.yaml new file mode 100644 index 0000000000..a8e27c3735 --- /dev/null +++ b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/rkeIngressNginx/templates/pushprox-clients-rbac.yaml @@ -0,0 +1,97 @@ +{{- template "applyKubeVersionOverrides" . -}} +{{- if .Values.clients }}{{- if .Values.clients.enabled }} +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRole +metadata: + name: {{ template "pushProxy.client.name" . }} + labels: {{ include "pushProxy.client.labels" . | nindent 4 }} +rules: +{{- if .Values.global.cattle.psp.enabled }} +- apiGroups: ['policy'] + resources: ['podsecuritypolicies'] + verbs: ['use'] + resourceNames: + - {{ template "pushProxy.client.name" . }} +{{- end }} +{{- if and .Values.clients.https.enabled .Values.clients.https.useServiceAccountCredentials }} +- nonResourceURLs: ["/metrics"] + verbs: ["get"] +{{- if .Values.clients.rbac.additionalRules }} +{{ toYaml .Values.clients.rbac.additionalRules }} +{{- end }} +{{- end }} +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRoleBinding +metadata: + name: {{ template "pushProxy.client.name" . }} + labels: {{ include "pushProxy.client.labels" . | nindent 4 }} +roleRef: + apiGroup: rbac.authorization.k8s.io + kind: ClusterRole + name: {{ template "pushProxy.client.name" . }} +subjects: + - kind: ServiceAccount + name: {{ template "pushProxy.client.name" . }} + namespace: {{ include "pushprox.namespace" . }} +--- +apiVersion: v1 +kind: ServiceAccount +metadata: + name: {{ template "pushProxy.client.name" . }} + namespace: {{ include "pushprox.namespace" . }} + labels: {{ include "pushProxy.client.labels" . | nindent 4 }} +--- +{{- if .Values.clients.https.useServiceAccountCredentials }} +apiVersion: v1 +kind: Secret +type: kubernetes.io/service-account-token +metadata: + name: {{ template "pushProxy.client.serviceAccountTokenName" . }} + namespace: {{ include "pushprox.namespace" . }} + labels: {{ include "pushProxy.client.labels" . | nindent 4 }} + annotations: + kubernetes.io/service-account.name: {{ template "pushProxy.client.name" . }} +{{- end }} +--- +{{- if .Values.global.cattle.psp.enabled }} +apiVersion: policy/v1beta1 +kind: PodSecurityPolicy +metadata: + name: {{ template "pushProxy.client.name" . }} + namespace: {{ include "pushprox.namespace" . }} + labels: {{ include "pushProxy.client.labels" . | nindent 4 }} +spec: + privileged: false + hostNetwork: true + hostIPC: false + hostPID: false + runAsUser: + rule: 'RunAsAny' + seLinux: + rule: 'RunAsAny' +{{- if and .Values.clients.https.enabled .Values.clients.https.certDir .Values.global.seLinux.enabled .Values.clients.https.seLinuxOptions }} + seLinuxOptions: {{ .Values.clients.https.seLinuxOptions | toYaml | nindent 6 }} +{{- end }} + supplementalGroups: + rule: 'MustRunAs' + ranges: + - min: 0 + max: 65535 + fsGroup: + rule: 'MustRunAs' + ranges: + - min: 0 + max: 65535 + readOnlyRootFilesystem: false + volumes: + - 'secret' +{{- if and .Values.clients.https.enabled .Values.clients.https.certDir }} + - 'emptyDir' + - 'hostPath' + allowedHostPaths: + - pathPrefix: {{ required "Need access to volume on host with the SSL cert files to use HTTPs" .Values.clients.https.certDir }} + readOnly: true +{{- end }} +{{- end }} +{{- end }}{{- end }} diff --git a/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/rkeIngressNginx/templates/pushprox-clients.yaml b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/rkeIngressNginx/templates/pushprox-clients.yaml new file mode 100644 index 0000000000..e8fcfb3883 --- /dev/null +++ b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/rkeIngressNginx/templates/pushprox-clients.yaml @@ -0,0 +1,157 @@ +{{- template "applyKubeVersionOverrides" . -}} +{{- if .Values.clients }}{{- if .Values.clients.enabled }} +apiVersion: apps/v1 +{{- if .Values.clients.deployment.enabled }} +kind: Deployment +{{- else }} +kind: DaemonSet +{{- end }} +metadata: + name: {{ template "pushProxy.client.name" . }} + namespace: {{ template "pushprox.namespace" . }} + labels: {{ include "pushProxy.client.labels" . | nindent 4 }} + pushprox-exporter: "client" +spec: + {{- if .Values.clients.deployment.enabled }} + replicas: {{ .Values.clients.deployment.replicas }} + {{- end }} + selector: + matchLabels: {{ include "pushProxy.client.labels" . | nindent 6 }} + template: + metadata: + labels: {{ include "pushProxy.client.labels" . | nindent 8 }} + spec: + {{- if .Values.clients.affinity }} + affinity: {{ toYaml .Values.clients.affinity | nindent 8 }} + {{- end }} + nodeSelector: {{ include "linux-node-selector" . | nindent 8 }} +{{- if .Values.clients.nodeSelector }} +{{ toYaml .Values.clients.nodeSelector | indent 8 }} +{{- end }} + tolerations: {{ include "linux-node-tolerations" . | nindent 8 }} +{{- if .Values.clients.tolerations }} +{{ toYaml .Values.clients.tolerations | indent 8 }} +{{- end }} + hostNetwork: true + dnsPolicy: ClusterFirstWithHostNet + serviceAccountName: {{ template "pushProxy.client.name" . }} + {{- if .Values.global.imagePullSecretName }} + imagePullSecrets: + - name: {{ .Values.global.imagePullSecretName }} + {{- end }} + containers: + - name: pushprox-client + image: {{ template "system_default_registry" . }}{{ .Values.clients.image.repository }}:{{ .Values.clients.image.tag }} + command: + {{- range .Values.clients.command }} + - {{ . | quote }} + {{- end }} + args: + - --fqdn=$(HOST_IP) + - --proxy-url=$(PROXY_URL) + {{- if .Values.clients.metrics.enabled }} + - --metrics-addr=$(PORT) + {{- end }} + - --allow-port={{ required "Need .Values.metricsPort to configure client to be allowed to scrape metrics at port" .Values.metricsPort}} + {{- if .Values.clients.useLocalhost }} + - --use-localhost + {{- end }} + {{- if .Values.clients.https.enabled }} + {{- if .Values.clients.https.insecureSkipVerify }} + - --insecure-skip-verify + {{- end }} + {{- if .Values.clients.https.useServiceAccountCredentials }} + - --token-path=/var/run/secrets/kubernetes.io/serviceaccount/token + {{- end }} + {{- if .Values.clients.https.certDir }} + - --tls.cert=/etc/ssl/push-proxy/push-proxy.pem + - --tls.key=/etc/ssl/push-proxy/push-proxy-key.pem + - --tls.cacert=/etc/ssl/push-proxy/push-proxy-ca-cert.pem + {{- end }} + {{- end }} + env: + - name: HOST_IP + valueFrom: + fieldRef: + fieldPath: status.hostIP + {{- if .Values.clients.metrics.enabled }} + - name: PORT + value: :{{ .Values.clients.port }} + {{- end }} + - name: PROXY_URL + value: {{ template "pushProxy.proxyUrl" . }} + securityContext: + runAsNonRoot: true + runAsUser: 1000 + {{- if and .Values.clients.https.enabled .Values.clients.https.certDir }} + volumeMounts: + - name: metrics-cert-dir + mountPath: /etc/ssl/push-proxy + {{- end }} + {{- if .Values.clients.resources }} + resources: {{ toYaml .Values.clients.resources | nindent 10 }} + {{- end }} + {{- if and .Values.clients.https.enabled .Values.clients.https.certDir }} + initContainers: + - name: copy-certs + image: {{ template "system_default_registry" . }}{{ .Values.clients.copyCertsImage.repository }}:{{ .Values.clients.copyCertsImage.tag }} + command: + - sh + - -c + - | + echo "Searching for files to copy within the source volume" + echo "cert: ${CERT_FILE_NAME}" + echo "key: ${KEY_FILE_NAME}" + echo "cacert: ${CACERT_FILE_NAME}" + + CERT_FILE_SOURCE=$(find /etc/source/ -type f -name "${CERT_FILE_NAME}" | sort -r | head -n 1) + KEY_FILE_SOURCE=$(find /etc/source/ -type f -name "${KEY_FILE_NAME}" | sort -r | head -n 1) + CACERT_FILE_SOURCE=$(find /etc/source/ -type f -name "${CACERT_FILE_NAME}" | sort -r | head -n 1) + + test -z ${CERT_FILE_SOURCE} && echo "Failed to find cert file" && exit 1 + test -z ${KEY_FILE_SOURCE} && echo "Failed to find key file" && exit 1 + test -z ${CACERT_FILE_SOURCE} && echo "Failed to find cacert file" && exit 1 + + echo "Copying cert file from $CERT_FILE_SOURCE to $CERT_FILE_TARGET" + cp $CERT_FILE_SOURCE $CERT_FILE_TARGET || exit 1 + chmod 444 $CERT_FILE_TARGET || exit 1 + + echo "Copying key file from $KEY_FILE_SOURCE to $KEY_FILE_TARGET" + cp $KEY_FILE_SOURCE $KEY_FILE_TARGET || exit 1 + chmod 444 $KEY_FILE_TARGET || exit 1 + + echo "Copying cacert file from $CACERT_FILE_SOURCE to $CACERT_FILE_TARGET" + cp $CACERT_FILE_SOURCE $CACERT_FILE_TARGET || exit 1 + chmod 444 $CACERT_FILE_TARGET || exit 1 + env: + - name: CERT_FILE_NAME + value: {{ required "Need a TLS cert file for scraping metrics endpoint over HTTPs" .Values.clients.https.certFile }} + - name: KEY_FILE_NAME + value: {{ required "Need a TLS key file for scraping metrics endpoint over HTTPs" .Values.clients.https.keyFile }} + - name: CACERT_FILE_NAME + value: {{ required "Need a TLS CA cert file for scraping metrics endpoint over HTTPs" .Values.clients.https.caCertFile }} + - name: CERT_FILE_TARGET + value: /etc/ssl/push-proxy/push-proxy.pem + - name: KEY_FILE_TARGET + value: /etc/ssl/push-proxy/push-proxy-key.pem + - name: CACERT_FILE_TARGET + value: /etc/ssl/push-proxy/push-proxy-ca-cert.pem + securityContext: + runAsNonRoot: false +{{- if and .Values.global.seLinux.enabled .Values.clients.https.seLinuxOptions }} + seLinuxOptions: {{ .Values.clients.https.seLinuxOptions | toYaml | nindent 12 }} +{{- end }} + volumeMounts: + - name: metrics-cert-dir-source + mountPath: /etc/source + readOnly: true + - name: metrics-cert-dir + mountPath: /etc/ssl/push-proxy + volumes: + - name: metrics-cert-dir-source + hostPath: + path: {{ required "Need access to volume on host with the SSL cert files to use HTTPs" .Values.clients.https.certDir }} + - name: metrics-cert-dir + emptyDir: {} + {{- end }} +{{- end }}{{- end }} diff --git a/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/rkeIngressNginx/templates/pushprox-proxy-rbac.yaml b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/rkeIngressNginx/templates/pushprox-proxy-rbac.yaml new file mode 100644 index 0000000000..eefe609058 --- /dev/null +++ b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/rkeIngressNginx/templates/pushprox-proxy-rbac.yaml @@ -0,0 +1,68 @@ +{{- template "applyKubeVersionOverrides" . -}} +{{- if and .Values.proxy }}{{ if .Values.proxy.enabled }} +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRole +metadata: + name: {{ template "pushProxy.proxy.name" . }} + labels: {{ include "pushProxy.proxy.labels" . | nindent 4 }} +rules: +{{- if .Values.global.cattle.psp.enabled }} +- apiGroups: ['policy'] + resources: ['podsecuritypolicies'] + verbs: ['use'] + resourceNames: + - {{ template "pushProxy.proxy.name" . }} +{{- end }} +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRoleBinding +metadata: + name: {{ template "pushProxy.proxy.name" . }} + labels: {{ include "pushProxy.proxy.labels" . | nindent 4 }} +roleRef: + apiGroup: rbac.authorization.k8s.io + kind: ClusterRole + name: {{ template "pushProxy.proxy.name" . }} +subjects: + - kind: ServiceAccount + name: {{ template "pushProxy.proxy.name" . }} + namespace: {{ include "pushprox.namespace" . }} +--- +apiVersion: v1 +kind: ServiceAccount +metadata: + name: {{ template "pushProxy.proxy.name" . }} + namespace: {{ include "pushprox.namespace" . }} + labels: {{ include "pushProxy.proxy.labels" . | nindent 4 }} +--- +{{- if .Values.global.cattle.psp.enabled }} +apiVersion: policy/v1beta1 +kind: PodSecurityPolicy +metadata: + name: {{ template "pushProxy.proxy.name" . }} + namespace: {{ include "pushprox.namespace" . }} + labels: {{ include "pushProxy.proxy.labels" . | nindent 4 }} +spec: + privileged: false + hostNetwork: false + hostIPC: false + hostPID: false + runAsUser: + rule: 'MustRunAsNonRoot' + seLinux: + rule: 'RunAsAny' + supplementalGroups: + rule: 'MustRunAs' + ranges: + - min: 1 + max: 65535 + fsGroup: + rule: 'MustRunAs' + ranges: + - min: 1 + max: 65535 + readOnlyRootFilesystem: false + volumes: + - 'secret' +{{- end }}{{- end }} +{{- end }} diff --git a/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/rkeIngressNginx/templates/pushprox-proxy.yaml b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/rkeIngressNginx/templates/pushprox-proxy.yaml new file mode 100644 index 0000000000..723bbd6c00 --- /dev/null +++ b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/rkeIngressNginx/templates/pushprox-proxy.yaml @@ -0,0 +1,57 @@ +{{- template "applyKubeVersionOverrides" . -}} +{{- if and .Values.proxy }}{{ if .Values.proxy.enabled }} +apiVersion: apps/v1 +kind: Deployment +metadata: + name: {{ template "pushProxy.proxy.name" . }} + namespace: {{ template "pushprox.namespace" . }} + labels: {{ include "pushProxy.proxy.labels" . | nindent 4 }} + pushprox-exporter: "proxy" +spec: + selector: + matchLabels: {{ include "pushProxy.proxy.labels" . | nindent 6 }} + template: + metadata: + labels: {{ include "pushProxy.proxy.labels" . | nindent 8 }} + spec: + securityContext: + runAsNonRoot: true + runAsUser: 1000 + nodeSelector: {{ include "linux-node-selector" . | nindent 8 }} +{{- if .Values.proxy.nodeSelector }} +{{ toYaml .Values.proxy.nodeSelector | indent 8 }} +{{- end }} + tolerations: {{ include "linux-node-tolerations" . | nindent 8 }} +{{- if .Values.proxy.tolerations }} +{{ toYaml .Values.proxy.tolerations | indent 8 }} +{{- end }} + serviceAccountName: {{ template "pushProxy.proxy.name" . }} + {{- if .Values.global.imagePullSecretName }} + imagePullSecrets: + - name: {{ .Values.global.imagePullSecretName }} + {{- end }} + containers: + - name: pushprox-proxy + image: {{ template "system_default_registry" . }}{{ .Values.proxy.image.repository }}:{{ .Values.proxy.image.tag }} + command: + {{- range .Values.proxy.command }} + - {{ . | quote }} + {{- end }} + {{- if .Values.proxy.resources }} + resources: {{ toYaml .Values.proxy.resources | nindent 10 }} + {{- end }} +--- +apiVersion: v1 +kind: Service +metadata: + name: {{ template "pushProxy.proxy.name" . }} + namespace: {{ template "pushprox.namespace" . }} + labels: {{ include "pushProxy.proxy.labels" . | nindent 4 }} +spec: + ports: + - name: pp-proxy + port: {{ required "Need .Values.proxy.port to configure proxy" .Values.proxy.port }} + protocol: TCP + targetPort: {{ .Values.proxy.port }} + selector: {{ include "pushProxy.proxy.labels" . | nindent 4 }} +{{- end }}{{- end }} diff --git a/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/rkeIngressNginx/templates/pushprox-servicemonitor.yaml b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/rkeIngressNginx/templates/pushprox-servicemonitor.yaml new file mode 100644 index 0000000000..67eb2216b5 --- /dev/null +++ b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/rkeIngressNginx/templates/pushprox-servicemonitor.yaml @@ -0,0 +1,45 @@ +{{- template "applyKubeVersionOverrides" . -}} +{{- if .Values.serviceMonitor }}{{- if .Values.serviceMonitor.enabled }} +apiVersion: monitoring.coreos.com/v1 +kind: ServiceMonitor +metadata: + name: {{ template "pushprox.serviceMonitor.name" . }} + namespace: {{ template "pushprox.namespace" . }} + labels: {{ include "pushProxy.serviceMonitor.labels" . | nindent 4 }} +spec: + endpoints: {{include "pushProxy.serviceMonitor.endpoints" . | nindent 4 }} + jobLabel: component + podTargetLabels: + - component + - pushprox-exporter + namespaceSelector: + matchNames: + - {{ template "pushprox.namespace" . }} + selector: + matchLabels: {{ include "pushProxy.client.labels" . | nindent 6 }} +--- +{{- $selector := "" }} +{{- if not (kindIs "invalid" .Values.service) }} +{{- if not (kindIs "invalid" .Values.service.selector) }} +{{ if .Values.service.selector }} +{{- if .Values.clients.enabled }} +{{- required (printf "Cannot override .Values.service.selector=%s when .Values.clients.enabled=true" (toJson .Values.service.selector)) "" }} +{{- end }} +{{- $selector = (toYaml .Values.service.selector) }} +{{- end }} +{{- end }} +{{- end }} +apiVersion: v1 +kind: Service +metadata: + name: {{ template "pushProxy.client.name" . }} + namespace: {{ template "pushprox.namespace" . }} + labels: {{ include "pushProxy.client.labels" . | nindent 4 }} +spec: + ports: + - name: metrics + port: {{ required "Need .Values.metricsPort to configure client to listen to metrics at port" .Values.metricsPort}} + protocol: TCP + targetPort: {{ .Values.metricsPort }} + selector: {{ default (include "pushProxy.client.labels" .) $selector | nindent 4 }} +{{- end }}{{- end }} diff --git a/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/rkeIngressNginx/templates/validate-install-crd.yaml b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/rkeIngressNginx/templates/validate-install-crd.yaml new file mode 100644 index 0000000000..16abc2fa83 --- /dev/null +++ b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/rkeIngressNginx/templates/validate-install-crd.yaml @@ -0,0 +1,14 @@ +#{{- if gt (len (lookup "rbac.authorization.k8s.io/v1" "ClusterRole" "" "")) 0 -}} +# {{- $found := dict -}} +# {{- set $found "monitoring.coreos.com/v1/ServiceMonitor" false -}} +# {{- range .Capabilities.APIVersions -}} +# {{- if hasKey $found (toString .) -}} +# {{- set $found (toString .) true -}} +# {{- end -}} +# {{- end -}} +# {{- range $_, $exists := $found -}} +# {{- if (eq $exists false) -}} +# {{- required "Required CRDs are missing. Please install Prometheus Operator CRDs before installing this chart." "" -}} +# {{- end -}} +# {{- end -}} +#{{- end -}} diff --git a/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/rkeIngressNginx/templates/validate-psp-install.yaml b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/rkeIngressNginx/templates/validate-psp-install.yaml new file mode 100644 index 0000000000..a30c59d3b7 --- /dev/null +++ b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/rkeIngressNginx/templates/validate-psp-install.yaml @@ -0,0 +1,7 @@ +#{{- if gt (len (lookup "rbac.authorization.k8s.io/v1" "ClusterRole" "" "")) 0 -}} +#{{- if .Values.global.cattle.psp.enabled }} +#{{- if not (.Capabilities.APIVersions.Has "policy/v1beta1/PodSecurityPolicy") }} +#{{- fail "The target cluster does not have the PodSecurityPolicy API resource. Please disable PSPs in this chart before proceeding." -}} +#{{- end }} +#{{- end }} +#{{- end }} diff --git a/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/rkeIngressNginx/values.yaml b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/rkeIngressNginx/values.yaml new file mode 100644 index 0000000000..1e076041b3 --- /dev/null +++ b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/rkeIngressNginx/values.yaml @@ -0,0 +1,166 @@ +# Default values for rancher-pushprox. +# This is a YAML-formatted file. +# Declare variables to be passed into your templates. + +# Default image containing both the proxy and the client was generated from the following Dockerfile +# https://github.com/prometheus-community/PushProx/blob/eeadbe766641699129920ccfaaaa30a85c67fe81/Dockerfile#L1-L15 + +# Configuration + +global: + cattle: + psp: + enabled: false + systemDefaultRegistry: "" + seLinux: + enabled: false + +# A list of Semver constraint strings (defined by https://github.com/Masterminds/semver) and values.yaml overrides. +# +# For each key in kubeVersionOverrides, this chart will check to see if the current Kubernetes cluster's version matches +# any of the semver constraints provided as keys on the map. +# +# On seeing a match, the default value for each values.yaml field overridden will be updated with the new value. +# +# If multiple matches are encountered (due to overlapping semver ranges), the matches will be applied in order. +# +# Notes: +# - On running a helm template, Helm generally assumes the kubeVersion is v1.20.0 +# - On running a helm install --dry-run, the correct kubeVersion should be chosen. +kubeVersionOverrides: [] +# - constraint: "< 1.21" +# values: +# metricsPort: 10252 +# clients: +# https: +# enabled: false +# insecureSkipVerify: false +# useServiceAccountCredentials: false + +namespaceOverride: "" + +# The component that is being monitored (i.e. etcd) +component: "component" + +# The port containing the metrics that need to be scraped +metricsPort: 2739 + +# Configure ServiceMonitor that monitors metrics from the metricsPort endpoint +serviceMonitor: + enabled: true + # A list of endpoints that will be added to the ServiceMonitor based on the Endpoint spec + # Source: https://github.com/prometheus-operator/prometheus-operator/blob/master/Documentation/api.md#endpoint + # By default, proxyUrl and params._scheme will be overridden based on other values + endpoints: + - port: metrics + +# Configure Service that grabs scrape targets +service: + # The selector that is used to populate the Service's Endpoints object. + # The chart will error out on rendering templating if .Values.clients.enabled is set alongside this field, + # since it is expected that this service should point to the PushProx Clients Daemonset / Deployment + selector: {} + +clients: + enabled: true + # The port which the PushProx client will post PushProx metrics to + port: 9369 + # If unset, this will default to the URL for the proxy service: http://pushprox-{{component}}-proxy.{{namepsace}}.svc.cluster.local:{{proxy.port}} + # Should be modified if the clients are being deployed outside the cluster where the proxy rests, otherwise leave it null + proxyUrl: "" + # If set to true, the client will forward any requests from the host IP to 127.0.0.1 + # It will only allow proxy requests to the metricsPort specified + useLocalhost: false + # Configuration for accessing metrics via HTTPS + https: + # Does the client require https to access the metrics? + enabled: false + # Does the client require requests be sent to http or https? + forceHTTPSScheme: false + # If set to true, the client will create a service account with adequate permissions and set a flag + # on the client to use the service account token provided by it to make authorized scrape requests + useServiceAccountCredentials: false + # Configuration for authentication to metrics via https endpoint + authenticationMethod: + # Reads token from defined file in container + # This function is deprecated in the prometheus operator api and may be removed in a future version + bearerTokenFile: + enabled: false + bearerTokenFilePath: "/var/run/secrets/kubernetes.io/serviceaccount/token" + # Reads token from defined secret in namespace + # This function is deprecated in the prometheus operator api and may be removed in a future version + bearerTokenSecret: + enabled: false + # Reads token from defined secret in namespace + authorization: + enabled: false + type: "bearer" + credentials: + key: "token" + optional: false + # If set to true, the client will disable SSL security checks + insecureSkipVerify: false + # Directory on host where necessary TLS cert and key to scrape metrics can be found + certDir: "" + # Filenames for files located in .Values.clients.https.certDir that correspond to TLS settings + certFile: "" + keyFile: "" + caCertFile: "" + # seLinuxOptions to be passed into the container that copies certs. Should define a container with permissions to read the files in the certDir provided on the host. + # Required and only used if `clients.https.enabled` is set and `clients.https.certDir` is provided. + seLinuxOptions: {} + + metrics: + # Whether the client should publish PushProx client-specific metrics to .Values.clients.port + enabled: false + + rbac: + # Additional permissions to provide to the ServiceAccount bound to the client + # This can be used to provide additional permissions for the client to scrape metrics from the k8s API + # Only enabled if clients.https.enabled and clients.https.useServiceAccountCredentials are true + additionalRules: [] + + # Resource limits + resources: {} + + # Options to select all nodes to deploy client DaemonSet on + nodeSelector: {} + tolerations: [] + affinity: {} + + image: + repository: rancher/pushprox-client + tag: v0.1.3-rancher2-client + command: ["pushprox-client"] + + copyCertsImage: + repository: rancher/mirrored-library-busybox + tag: 1.31.1 + + # The default intention of rancher-pushprox clients is to scrape hostNetwork metrics across all nodes. + # This can be used to scrape internal Kubernetes components or DaemonSets of hostNetwork Pods in + # situations where a cloud provider firewall prevents Pod-To-Host communication but not Pod-To-Pod. + # However, if the underlying hostNetwork Pod that is being scraped is managed by a Deployment, + # this advanced option enables users to deploy the client as a Deployment instead of a DaemonSet. + # If a user deploys this feature and the underlying Deployment's number of replicas changes, the user will + # be responsible for upgrading this chart accordingly to the right number of replicas. + deployment: + enabled: false + replicas: 0 + +proxy: + enabled: true + # The port through which PushProx clients will communicate to the proxy + port: 8080 + + # Resource limits + resources: {} + + # Options to select a node to run a single proxy deployment on + nodeSelector: {} + tolerations: [] + + image: + repository: rancher/pushprox-proxy + tag: v0.1.3-rancher2-proxy + command: ["pushprox-proxy"] diff --git a/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/rkeProxy/.helmignore b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/rkeProxy/.helmignore new file mode 100644 index 0000000000..0e8a0eb36f --- /dev/null +++ b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/rkeProxy/.helmignore @@ -0,0 +1,23 @@ +# Patterns to ignore when building packages. +# This supports shell glob matching, relative path matching, and +# negation (prefixed with !). Only one pattern per line. +.DS_Store +# Common VCS dirs +.git/ +.gitignore +.bzr/ +.bzrignore +.hg/ +.hgignore +.svn/ +# Common backup files +*.swp +*.bak +*.tmp +*.orig +*~ +# Various IDEs +.project +.idea/ +*.tmproj +.vscode/ diff --git a/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/rkeProxy/Chart.yaml b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/rkeProxy/Chart.yaml new file mode 100644 index 0000000000..18eac324d4 --- /dev/null +++ b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/rkeProxy/Chart.yaml @@ -0,0 +1,15 @@ +annotations: + catalog.cattle.io/hidden: "true" + catalog.cattle.io/kube-version: '>= 1.26.0-0 < 1.31.0-0' + catalog.cattle.io/os: linux + catalog.rancher.io/certified: rancher + catalog.rancher.io/namespace: cattle-monitoring-system + catalog.rancher.io/release-name: rancher-pushprox +apiVersion: v1 +appVersion: 0.1.0 +description: Sets up a deployment of the PushProx proxy and a DaemonSet of PushProx + clients. +kubeVersion: '>=1.26.0-0' +name: rkeProxy +type: application +version: 0.2.0 diff --git a/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/rkeProxy/README.md b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/rkeProxy/README.md new file mode 100644 index 0000000000..345002f48a --- /dev/null +++ b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/rkeProxy/README.md @@ -0,0 +1,90 @@ +# rancher-pushprox + +A Rancher chart based on Rancher [PushProx](https://github.com/rancher/PushProx) that sets up a Deployment of a PushProx proxy and a DaemonSet of PushProx clients on a Kubernetes cluster. + +Installs [rancher-pushprox](https://github.com/rancher/charts/tree/gh-pages/packages/rancher-pushprox) to create PushProx clients that can access their host's network and register with a PushProx proxy. A [Prometheus Operator](https://github.com/coreos/prometheus-operator) ServiceMonitor CR is also included that is configured to scrape the metrics from each of the clients through the proxy. + +Using an instance of this chart is suitable for the following scenarios: +- You need to scrape metrics from a port that should not be accessible outside of the host (e.g. scraping `etcd` metrics in a hardened cluster) +- You need to scrape metrics on a host that are not exposed outside of 127.0.0.1 (e.g. scraping `kube-proxy` metrics) +- You need to scrape metrics through HTTPS using certs hosted directly on `hostPath` +- You need to scrape metrics from Kubernetes components that require authorization via a service account (e.g. permissions to make request to `/metrics`) +- You need to scrape metrics without access to cacerts (i.e. enable `insecureSkipVerify`) + +The clients and proxy are created based on a Rancher fork of the [prometheus-community/PushProx](https://github.com/prometheus-community/PushProx) project. + +## Upgrading to Kubernetes v1.25+ + +Starting in Kubernetes v1.25, [Pod Security Policies](https://kubernetes.io/docs/concepts/security/pod-security-policy/) have been removed from the Kubernetes API. + +As a result, **before upgrading to Kubernetes v1.25** (or on a fresh install in a Kubernetes v1.25+ cluster), users are expected to perform an in-place upgrade of this chart with `global.cattle.psp.enabled` set to `false` if it has been previously set to `true`. +​ +> **Note:** +> In this chart release, any previous field that was associated with any PSP resources have been removed in favor of a single global field: `global.cattle.psp.enabled`. + +> **Note:** +> If you upgrade your cluster to Kubernetes v1.25+ before removing PSPs via a `helm upgrade` (even if you manually clean up resources), **it will leave the Helm release in a broken state within the cluster such that further Helm operations will not work (`helm uninstall`, `helm upgrade`, etc.).** +> +> If your charts get stuck in this state, please consult the Rancher docs on how to clean up your Helm release secrets. + +Upon setting `global.cattle.psp.enabled` to false, the chart will remove any PSP resources deployed on its behalf from the cluster. This is the default setting for this chart. + +As a replacement for PSPs, [Pod Security Admission](https://kubernetes.io/docs/concepts/security/pod-security-admission/) should be used. Please consult the Rancher docs for more details on how to configure your chart release namespaces to work with the new Pod Security Admission and apply Pod Security Standards. + +## Configuration + +The following tables list the configurable parameters of the rancher-pushprox chart and their default values. + +### General + +#### Required +| Parameter | Description | Example | +| ----- | ----------- | ------ | +| `component` | The component that is being monitored | `kube-etcd` +| `metricsPort` | The port on the host that contains the metrics you want to scrape (e.g. `http://:/metrics`) | `2379` | +| `namespaceOverride` | The namespace to install the chart | `""` + +#### Optional +| Parameter | Description | Default | +| ----- | ----------- | ------ | +| `serviceMonitor.enabled` | Deploys a [Prometheus Operator](https://github.com/coreos/prometheus-operator/blob/master/Documentation/api.md#servicemonitor) ServiceMonitor CR that is configured to scrape metrics on the hosts that the clients are deployed on via the proxy. Also deploys a Service that points to all pods with the expected client name that exposes the `metricsPort` selected | `true` | +| `serviceMonitor.endpoints` | A list of endpoints that will be added to the ServiceMonitor based on the [Endpoint spec](https://github.com/prometheus-operator/prometheus-operator/blob/master/Documentation/api.md#endpoint) | `[{port: metrics}]` | +| `service.selector` | The selector that is used to populate the Service's Endpoints object. The chart will error out on rendering templating if `.Values.clients.enabled` is set alongside this field, since it is expected that this service should point to the PushProx Clients Daemonset / Deployment | `{}` | +| `clients.enabled` | Deploys a DaemonSet of clients that are each capable of scraping endpoints on the hostNetwork it is deployed on | `true` | +| `clients.port` | The port where the client will publish PushProx client-specific metrics. If deploying multiple clients onto the same node, the clients should not have conflicting ports | `9369` | +| `clients.proxyUrl` | Overrides the default proxyUrl setting of `http://pushprox-{{ .Values.component }}-proxy.{{ . Release.Namespace }}.svc.cluster.local:{{ .Values.proxy.port }}"` with the `proxyUrl` specified | `""` | +| `clients.useLocalhost` | Sets a flag on each client deployment to redirect scrapes directed to `HOST_IP` to `127.0.0.1` | `false` | +| `clients.https.enabled` | Enables scraping metrics via HTTPS using the provided TLS certs that exist on each host | `false` | +| `clients.https.forceHTTPSScheme` | Forces scraping metrics via HTTPS using the provided TLS certs that exist on each host | `false` | +| `clients.https.useServiceAccountCredentials` | If set to true, the client will create a service account with permissions to scrape `/metrics` endpoint of Kubernetes components. The client will use the service account token provided to make authorized scrape requests to the Kubernetes API | `false` | +| `clients.https.authenticationMethod.bearerTokenFile.enabled` | If set to true, the client will use service account credentials mounted at the configured path `clients.https.authenticationMethod.bearerTokenFile.bearerTokenFilePath`. This requires permissions to scrape `/metrics` endpoint of Kubernetes components. This method is deprecated by the prometheus operator and may be removed in a future release | `false` | +| `clients.https.authenticationMethod.bearerTokenFile.bearerTokenFilePath` | This is a volume mount on the pod with permissions to scrape `/metrics` endpoint of Kubernetes components | `"/var/run/secrets/kubernetes.io/serviceaccount/token"` | +| `clients.https.authenticationMethod.bearerTokenSecret.enabled` | If set to true, the client will use service account credentials to scrape `/metrics` endpoint of Kubernetes components. This method is deprecated by the prometheus operator and may be removed in a future release | `false` | +| `clients.https.authenticationMethod.authorization.enabled` | If set to true, the client will use service account credentials to scrape `/metrics` endpoint of Kubernetes components | `false` | +| `clients.https.authenticationMethod.authorization.type` | If set, the client will use this type of authorization in its client requests for metrics | `"bearer"` | +| `clients.https.authenticationMethod.authorization.credentials.key` | If set, the client will use this key in the secret created by `clients.https.useServiceAccountCredentials` for authorization in its client requests for metrics | `"token"` | +| `clients.https.authenticationMethod.authorization.credentials.optional` | If set to false, the client will fail if the key in the secret created by `clients.https.useServiceAccountCredentials` does not exist | `false` | +| `clients.https.insecureSkipVerify` | If set to true, the client will disable SSL security checks | `false` | +| `clients.https.certDir` | A `hostPath` where TLS certs can be found. This path is mounted as a volume on an `initContainer` which copies only the necessary files over to an EmptyDir volume used by each client. Required and only used if `clients.https.enabled` is set | `""` | +| `clients.https.certFile` | The path to the TLS cert file located within `clients.https.certDir`. Required and only used if `clients.https.enabled` is set | `""` | +| `clients.https.keyFile` | The path to the TLS key file located within `clients.https.certDir`. Required and only used if `clients.https.enabled` is set | `""` | +| `clients.https.caCertFile` | The path to the TLS cacert file located within `clients.https.certDir`. Required and only used if `clients.https.enabled` is set | `""` | +| `clients.https.seLinuxOptions` | seLinuxOptions to be passed into the container that copies certs. Should define a container with permissions to read the files in the certDir provided on the host. Required and only used if `clients.https.enabled` is set and `clients.https.certDir` is provided. | `""` | +| `clients.metrics.enabled` | Whether the client should publish PushProx client-specific metrics. | `false` | +| `clients.rbac.additionalRules` | Additional permissions to provide to the ServiceAccount bound to the client. This can be used to provide additional permissions for the client to scrape metrics from the k8s API. Only enabled if clients.https.enabled and clients.https.useServiceAccountCredentials are true | `[]` | +| `clients.deployment.enabled` | Deploys the client as a Deployment (generally used if the underlying hostNetwork Pod that is being scraped is managed by a Deployment) | `false` | +| `clients.deployment.replicas` | The number of pods the Deployment has, it should match the number of pod the hostNetwork Deployment has. Required and only used if `client.deployment.enable` is set | `0` | +| `clients.deployment.affinity` | The affinity rules that allocate the pod to the node in which the hostNetwork Deployment's pods run. Required and only used if `client.deployment.enable` is set | `{}` | +| `clients.resources` | Set resource limits and requests for the client container | `{}` | +| `clients.nodeSelector` | Select which nodes to deploy the clients on | `{}` | +| `clients.tolerations` | Specify tolerations for clients | `[]` | +| `proxy.enabled` | Deploys the proxy that each client will register with | `true` | +| `proxy.port` | The port exposed by the proxy that each client will register with to allow metrics to be scraped from the host | `8080` | +| `proxy.resources` | Set resource limits and requests for the proxy container | `{}` | +| `proxy.nodeSelector` | Select which nodes the proxy can be deployed on | `{}` | +| `proxy.tolerations` | Specify tolerations (if necessary) to allow the proxy to be deployed on the selected node | `[]` | +| `kubeVersionOverrides` | A list of Semver constraint strings (defined by https://github.com/Masterminds/semver) and values.yaml overrides. For each key in kubeVersionOverrides, this chart will check to see if the current Kubernetes cluster's version matches any of the semver constraints provided as keys on the map. On seeing a match, the default value for each values.yaml field overridden will be updated with the new value. If multiple matches are encountered (due to overlapping semver ranges), the matches will be applied in order. | `[]` + +*Tip: The filepaths set in `clients.https.File` can include wildcard characters*. + +See [rancher-monitoring](https://github.com/rancher/charts/tree/gh-pages/packages/rancher-monitoring) for examples of how this chart can be used. diff --git a/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/rkeProxy/templates/_helpers.tpl b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/rkeProxy/templates/_helpers.tpl new file mode 100644 index 0000000000..1ba5093944 --- /dev/null +++ b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/rkeProxy/templates/_helpers.tpl @@ -0,0 +1,170 @@ +# Rancher + +{{- define "system_default_registry" -}} +{{- if .Values.global.cattle.systemDefaultRegistry -}} +{{- printf "%s/" .Values.global.cattle.systemDefaultRegistry -}} +{{- end -}} +{{- end -}} + +# Windows Support + +{{/* +Windows cluster will add default taint for linux nodes, +add below linux tolerations to workloads could be scheduled to those linux nodes +*/}} + +{{- define "linux-node-tolerations" -}} +- key: "cattle.io/os" + value: "linux" + effect: "NoSchedule" + operator: "Equal" +{{- end -}} + +{{- define "linux-node-selector" -}} +{{- if semverCompare "<1.14-0" .Capabilities.KubeVersion.GitVersion -}} +beta.kubernetes.io/os: linux +{{- else -}} +kubernetes.io/os: linux +{{- end -}} +{{- end -}} + +# General + +{{- define "applyKubeVersionOverrides" -}} +{{- $overrides := dict -}} +{{- range $override := .Values.kubeVersionOverrides -}} +{{- if semverCompare $override.constraint $.Capabilities.KubeVersion.Version -}} +{{- $_ := mergeOverwrite $overrides $override.values -}} +{{- end -}} +{{- end -}} +{{- $_ := mergeOverwrite .Values $overrides -}} +{{- end -}} + +{{- define "pushprox.namespace" -}} + {{- if .Values.namespaceOverride -}} + {{- .Values.namespaceOverride -}} + {{- else -}} + {{- .Release.Namespace -}} + {{- end -}} +{{- end -}} + +{{- define "pushProxy.commonLabels" -}} +release: {{ .Release.Name }} +component: {{ .Values.component | quote }} +provider: kubernetes +{{- end -}} + +{{- define "pushProxy.proxyUrl" -}} +{{- $_ := (required "Template requires either .Values.proxy.port or .Values.client.proxyUrl to set proxyUrl for client" (or .Values.clients.proxyUrl .Values.proxy.port)) -}} +{{- if .Values.clients.proxyUrl -}} +{{ printf "%s" .Values.clients.proxyUrl }} +{{- else -}} +{{ printf "http://%s.%s.svc:%d" (include "pushProxy.proxy.name" .) (include "pushprox.namespace" .) (int .Values.proxy.port) }} +{{- end -}}{{- end -}} + +# Client + +{{- define "pushProxy.client.name" -}} +{{- printf "pushprox-%s-client" (required ".Values.component is required" .Values.component) -}} +{{- end -}} + +{{- define "pushProxy.client.serviceAccountTokenName" -}} +{{- printf "pushprox-%s-client-service-account-token" (required ".Values.component is required" .Values.component) -}} +{{- end -}} + +{{- define "pushProxy.client.labels" -}} +k8s-app: {{ template "pushProxy.client.name" . }} +{{ template "pushProxy.commonLabels" . }} +{{- end -}} + +# Proxy + +{{- define "pushProxy.proxy.name" -}} +{{- printf "pushprox-%s-proxy" (required ".Values.component is required" .Values.component) -}} +{{- end -}} + +{{- define "pushProxy.proxy.labels" -}} +k8s-app: {{ template "pushProxy.proxy.name" . }} +{{ template "pushProxy.commonLabels" . }} +{{- end -}} + +# ServiceMonitor + +{{- define "pushprox.serviceMonitor.name" -}} +{{- printf "%s-%s" .Release.Name (required ".Values.component is required" .Values.component) -}} +{{- end -}} + +{{- define "pushProxy.serviceMonitor.labels" -}} +app: {{ template "pushprox.serviceMonitor.name" . }} +{{ template "pushProxy.commonLabels" . }} +{{- end -}} + +{{- define "pushProxy.serviceMonitor.endpoints" -}} +{{- $proxyURL := (include "pushProxy.proxyUrl" .) -}} +{{- $useHTTPS := .Values.clients.https.enabled -}} +{{- $setHTTPSScheme := .Values.clients.https.forceHTTPSScheme -}} +{{- $insecureSkipVerify := .Values.clients.https.insecureSkipVerify -}} +{{- $useServiceAccountCredentials := .Values.clients.https.useServiceAccountCredentials -}} +{{- $serviceAccountTokenName := (include "pushProxy.client.serviceAccountTokenName" . ) -}} +{{- $metricRelabelings := list }} +{{- $endpoints := .Values.serviceMonitor.endpoints }} +{{- if .Values.proxy.enabled }} +{{- $_ := set . "proxyUrl" $proxyURL }} +{{- end }} +{{- range $endpoints }} +{{- if $.Values.proxy.enabled }} +{{- $_ := set . "proxyUrl" $proxyURL }} +{{- end }} +{{- $clusterIdRelabel := dict }} +{{- $metricRelabelings := list }} +{{- if $.Values.global.cattle.clusterId }} +{{- $_ := set $clusterIdRelabel "action" "replace" }} +{{- $_ := set $clusterIdRelabel "sourceLabels" (list "__address__") }} +{{- $_ := set $clusterIdRelabel "targetLabel" "cluster_id" }} +{{- $_ := set $clusterIdRelabel "replacement" $.Values.global.cattle.clusterId }} +{{- $metricRelabelings = append $metricRelabelings $clusterIdRelabel }} +{{- end }} +{{- $clusterNameRelabel := dict }} +{{- if $.Values.global.cattle.clusterName }} +{{- $_ := set $clusterNameRelabel "action" "replace" }} +{{- $_ := set $clusterNameRelabel "sourceLabels" (list "__address__") }} +{{- $_ := set $clusterNameRelabel "targetLabel" "cluster_name" }} +{{- $_ := set $clusterNameRelabel "replacement" $.Values.global.cattle.clusterName }} +{{- $metricRelabelings = append $metricRelabelings $clusterNameRelabel }} +{{- end }} +{{- if not (empty $metricRelabelings) }} +{{- $_ := set . "metricRelabelings" ($metricRelabelings)}} +{{- end }} +{{- if $setHTTPSScheme -}} +{{- $_ := set . "scheme" "https" }} +{{- end -}} +{{- if $useHTTPS -}} +{{- if (hasKey . "params") }} +{{- $_ := set (get . "params") "_scheme" (list "https") }} +{{- else }} +{{- $_ := set . "params" (dict "_scheme" (list "https")) }} +{{- end }} +{{- end }} +{{- if (hasKey . "tlsConfig") }} +{{- $_ := set (get . "tlsConfig") "insecureSkipVerify" $insecureSkipVerify }} +{{- else }} +{{- $_ := set . "tlsConfig" (dict "insecureSkipVerify" $insecureSkipVerify) }} +{{- end }} +{{- if $.Values.clients.https.authenticationMethod.bearerTokenFile.enabled }} +{{- $_ := set . "bearerTokenFile" $.Values.clients.https.authenticationMethod.bearerTokenFile.bearerTokenFilePath }} +{{- end }} +{{- if $.Values.clients.https.authenticationMethod.bearerTokenSecret.enabled }} +{{- $_ := set . "bearerTokenSecret" $serviceAccountTokenName }} +{{- end }} +{{- if $.Values.clients.https.authenticationMethod.authorization.enabled }} +{{- if (hasKey . "authorization") }} +{{- $_ := set (get . "authorization") "type" $.Values.clients.https.authenticationMethod.authorization.type }} +{{- $_ := set (get . "authorization") "credentials" (dict "name" $serviceAccountTokenName "key" $.Values.clients.https.authenticationMethod.authorization.credentials.key "optional" $.Values.clients.https.authenticationMethod.authorization.credentials.optional) }} +{{- else }} +{{- $_ := set . "authorization" (dict "type" $.Values.clients.https.authenticationMethod.authorization.type) }} +{{- $_ := set . "authorization" (dict "credentials" (dict "name" $serviceAccountTokenName "key" $.Values.clients.https.authenticationMethod.authorization.credentials.key "optional" $.Values.clients.https.authenticationMethod.authorization.credentials.optional)) }} +{{- end }} +{{- end }} +{{- end }} +{{- toYaml $endpoints }} +{{- end -}} diff --git a/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/rkeProxy/templates/pushprox-clients-rbac.yaml b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/rkeProxy/templates/pushprox-clients-rbac.yaml new file mode 100644 index 0000000000..a8e27c3735 --- /dev/null +++ b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/rkeProxy/templates/pushprox-clients-rbac.yaml @@ -0,0 +1,97 @@ +{{- template "applyKubeVersionOverrides" . -}} +{{- if .Values.clients }}{{- if .Values.clients.enabled }} +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRole +metadata: + name: {{ template "pushProxy.client.name" . }} + labels: {{ include "pushProxy.client.labels" . | nindent 4 }} +rules: +{{- if .Values.global.cattle.psp.enabled }} +- apiGroups: ['policy'] + resources: ['podsecuritypolicies'] + verbs: ['use'] + resourceNames: + - {{ template "pushProxy.client.name" . }} +{{- end }} +{{- if and .Values.clients.https.enabled .Values.clients.https.useServiceAccountCredentials }} +- nonResourceURLs: ["/metrics"] + verbs: ["get"] +{{- if .Values.clients.rbac.additionalRules }} +{{ toYaml .Values.clients.rbac.additionalRules }} +{{- end }} +{{- end }} +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRoleBinding +metadata: + name: {{ template "pushProxy.client.name" . }} + labels: {{ include "pushProxy.client.labels" . | nindent 4 }} +roleRef: + apiGroup: rbac.authorization.k8s.io + kind: ClusterRole + name: {{ template "pushProxy.client.name" . }} +subjects: + - kind: ServiceAccount + name: {{ template "pushProxy.client.name" . }} + namespace: {{ include "pushprox.namespace" . }} +--- +apiVersion: v1 +kind: ServiceAccount +metadata: + name: {{ template "pushProxy.client.name" . }} + namespace: {{ include "pushprox.namespace" . }} + labels: {{ include "pushProxy.client.labels" . | nindent 4 }} +--- +{{- if .Values.clients.https.useServiceAccountCredentials }} +apiVersion: v1 +kind: Secret +type: kubernetes.io/service-account-token +metadata: + name: {{ template "pushProxy.client.serviceAccountTokenName" . }} + namespace: {{ include "pushprox.namespace" . }} + labels: {{ include "pushProxy.client.labels" . | nindent 4 }} + annotations: + kubernetes.io/service-account.name: {{ template "pushProxy.client.name" . }} +{{- end }} +--- +{{- if .Values.global.cattle.psp.enabled }} +apiVersion: policy/v1beta1 +kind: PodSecurityPolicy +metadata: + name: {{ template "pushProxy.client.name" . }} + namespace: {{ include "pushprox.namespace" . }} + labels: {{ include "pushProxy.client.labels" . | nindent 4 }} +spec: + privileged: false + hostNetwork: true + hostIPC: false + hostPID: false + runAsUser: + rule: 'RunAsAny' + seLinux: + rule: 'RunAsAny' +{{- if and .Values.clients.https.enabled .Values.clients.https.certDir .Values.global.seLinux.enabled .Values.clients.https.seLinuxOptions }} + seLinuxOptions: {{ .Values.clients.https.seLinuxOptions | toYaml | nindent 6 }} +{{- end }} + supplementalGroups: + rule: 'MustRunAs' + ranges: + - min: 0 + max: 65535 + fsGroup: + rule: 'MustRunAs' + ranges: + - min: 0 + max: 65535 + readOnlyRootFilesystem: false + volumes: + - 'secret' +{{- if and .Values.clients.https.enabled .Values.clients.https.certDir }} + - 'emptyDir' + - 'hostPath' + allowedHostPaths: + - pathPrefix: {{ required "Need access to volume on host with the SSL cert files to use HTTPs" .Values.clients.https.certDir }} + readOnly: true +{{- end }} +{{- end }} +{{- end }}{{- end }} diff --git a/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/rkeProxy/templates/pushprox-clients.yaml b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/rkeProxy/templates/pushprox-clients.yaml new file mode 100644 index 0000000000..e8fcfb3883 --- /dev/null +++ b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/rkeProxy/templates/pushprox-clients.yaml @@ -0,0 +1,157 @@ +{{- template "applyKubeVersionOverrides" . -}} +{{- if .Values.clients }}{{- if .Values.clients.enabled }} +apiVersion: apps/v1 +{{- if .Values.clients.deployment.enabled }} +kind: Deployment +{{- else }} +kind: DaemonSet +{{- end }} +metadata: + name: {{ template "pushProxy.client.name" . }} + namespace: {{ template "pushprox.namespace" . }} + labels: {{ include "pushProxy.client.labels" . | nindent 4 }} + pushprox-exporter: "client" +spec: + {{- if .Values.clients.deployment.enabled }} + replicas: {{ .Values.clients.deployment.replicas }} + {{- end }} + selector: + matchLabels: {{ include "pushProxy.client.labels" . | nindent 6 }} + template: + metadata: + labels: {{ include "pushProxy.client.labels" . | nindent 8 }} + spec: + {{- if .Values.clients.affinity }} + affinity: {{ toYaml .Values.clients.affinity | nindent 8 }} + {{- end }} + nodeSelector: {{ include "linux-node-selector" . | nindent 8 }} +{{- if .Values.clients.nodeSelector }} +{{ toYaml .Values.clients.nodeSelector | indent 8 }} +{{- end }} + tolerations: {{ include "linux-node-tolerations" . | nindent 8 }} +{{- if .Values.clients.tolerations }} +{{ toYaml .Values.clients.tolerations | indent 8 }} +{{- end }} + hostNetwork: true + dnsPolicy: ClusterFirstWithHostNet + serviceAccountName: {{ template "pushProxy.client.name" . }} + {{- if .Values.global.imagePullSecretName }} + imagePullSecrets: + - name: {{ .Values.global.imagePullSecretName }} + {{- end }} + containers: + - name: pushprox-client + image: {{ template "system_default_registry" . }}{{ .Values.clients.image.repository }}:{{ .Values.clients.image.tag }} + command: + {{- range .Values.clients.command }} + - {{ . | quote }} + {{- end }} + args: + - --fqdn=$(HOST_IP) + - --proxy-url=$(PROXY_URL) + {{- if .Values.clients.metrics.enabled }} + - --metrics-addr=$(PORT) + {{- end }} + - --allow-port={{ required "Need .Values.metricsPort to configure client to be allowed to scrape metrics at port" .Values.metricsPort}} + {{- if .Values.clients.useLocalhost }} + - --use-localhost + {{- end }} + {{- if .Values.clients.https.enabled }} + {{- if .Values.clients.https.insecureSkipVerify }} + - --insecure-skip-verify + {{- end }} + {{- if .Values.clients.https.useServiceAccountCredentials }} + - --token-path=/var/run/secrets/kubernetes.io/serviceaccount/token + {{- end }} + {{- if .Values.clients.https.certDir }} + - --tls.cert=/etc/ssl/push-proxy/push-proxy.pem + - --tls.key=/etc/ssl/push-proxy/push-proxy-key.pem + - --tls.cacert=/etc/ssl/push-proxy/push-proxy-ca-cert.pem + {{- end }} + {{- end }} + env: + - name: HOST_IP + valueFrom: + fieldRef: + fieldPath: status.hostIP + {{- if .Values.clients.metrics.enabled }} + - name: PORT + value: :{{ .Values.clients.port }} + {{- end }} + - name: PROXY_URL + value: {{ template "pushProxy.proxyUrl" . }} + securityContext: + runAsNonRoot: true + runAsUser: 1000 + {{- if and .Values.clients.https.enabled .Values.clients.https.certDir }} + volumeMounts: + - name: metrics-cert-dir + mountPath: /etc/ssl/push-proxy + {{- end }} + {{- if .Values.clients.resources }} + resources: {{ toYaml .Values.clients.resources | nindent 10 }} + {{- end }} + {{- if and .Values.clients.https.enabled .Values.clients.https.certDir }} + initContainers: + - name: copy-certs + image: {{ template "system_default_registry" . }}{{ .Values.clients.copyCertsImage.repository }}:{{ .Values.clients.copyCertsImage.tag }} + command: + - sh + - -c + - | + echo "Searching for files to copy within the source volume" + echo "cert: ${CERT_FILE_NAME}" + echo "key: ${KEY_FILE_NAME}" + echo "cacert: ${CACERT_FILE_NAME}" + + CERT_FILE_SOURCE=$(find /etc/source/ -type f -name "${CERT_FILE_NAME}" | sort -r | head -n 1) + KEY_FILE_SOURCE=$(find /etc/source/ -type f -name "${KEY_FILE_NAME}" | sort -r | head -n 1) + CACERT_FILE_SOURCE=$(find /etc/source/ -type f -name "${CACERT_FILE_NAME}" | sort -r | head -n 1) + + test -z ${CERT_FILE_SOURCE} && echo "Failed to find cert file" && exit 1 + test -z ${KEY_FILE_SOURCE} && echo "Failed to find key file" && exit 1 + test -z ${CACERT_FILE_SOURCE} && echo "Failed to find cacert file" && exit 1 + + echo "Copying cert file from $CERT_FILE_SOURCE to $CERT_FILE_TARGET" + cp $CERT_FILE_SOURCE $CERT_FILE_TARGET || exit 1 + chmod 444 $CERT_FILE_TARGET || exit 1 + + echo "Copying key file from $KEY_FILE_SOURCE to $KEY_FILE_TARGET" + cp $KEY_FILE_SOURCE $KEY_FILE_TARGET || exit 1 + chmod 444 $KEY_FILE_TARGET || exit 1 + + echo "Copying cacert file from $CACERT_FILE_SOURCE to $CACERT_FILE_TARGET" + cp $CACERT_FILE_SOURCE $CACERT_FILE_TARGET || exit 1 + chmod 444 $CACERT_FILE_TARGET || exit 1 + env: + - name: CERT_FILE_NAME + value: {{ required "Need a TLS cert file for scraping metrics endpoint over HTTPs" .Values.clients.https.certFile }} + - name: KEY_FILE_NAME + value: {{ required "Need a TLS key file for scraping metrics endpoint over HTTPs" .Values.clients.https.keyFile }} + - name: CACERT_FILE_NAME + value: {{ required "Need a TLS CA cert file for scraping metrics endpoint over HTTPs" .Values.clients.https.caCertFile }} + - name: CERT_FILE_TARGET + value: /etc/ssl/push-proxy/push-proxy.pem + - name: KEY_FILE_TARGET + value: /etc/ssl/push-proxy/push-proxy-key.pem + - name: CACERT_FILE_TARGET + value: /etc/ssl/push-proxy/push-proxy-ca-cert.pem + securityContext: + runAsNonRoot: false +{{- if and .Values.global.seLinux.enabled .Values.clients.https.seLinuxOptions }} + seLinuxOptions: {{ .Values.clients.https.seLinuxOptions | toYaml | nindent 12 }} +{{- end }} + volumeMounts: + - name: metrics-cert-dir-source + mountPath: /etc/source + readOnly: true + - name: metrics-cert-dir + mountPath: /etc/ssl/push-proxy + volumes: + - name: metrics-cert-dir-source + hostPath: + path: {{ required "Need access to volume on host with the SSL cert files to use HTTPs" .Values.clients.https.certDir }} + - name: metrics-cert-dir + emptyDir: {} + {{- end }} +{{- end }}{{- end }} diff --git a/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/rkeProxy/templates/pushprox-proxy-rbac.yaml b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/rkeProxy/templates/pushprox-proxy-rbac.yaml new file mode 100644 index 0000000000..eefe609058 --- /dev/null +++ b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/rkeProxy/templates/pushprox-proxy-rbac.yaml @@ -0,0 +1,68 @@ +{{- template "applyKubeVersionOverrides" . -}} +{{- if and .Values.proxy }}{{ if .Values.proxy.enabled }} +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRole +metadata: + name: {{ template "pushProxy.proxy.name" . }} + labels: {{ include "pushProxy.proxy.labels" . | nindent 4 }} +rules: +{{- if .Values.global.cattle.psp.enabled }} +- apiGroups: ['policy'] + resources: ['podsecuritypolicies'] + verbs: ['use'] + resourceNames: + - {{ template "pushProxy.proxy.name" . }} +{{- end }} +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRoleBinding +metadata: + name: {{ template "pushProxy.proxy.name" . }} + labels: {{ include "pushProxy.proxy.labels" . | nindent 4 }} +roleRef: + apiGroup: rbac.authorization.k8s.io + kind: ClusterRole + name: {{ template "pushProxy.proxy.name" . }} +subjects: + - kind: ServiceAccount + name: {{ template "pushProxy.proxy.name" . }} + namespace: {{ include "pushprox.namespace" . }} +--- +apiVersion: v1 +kind: ServiceAccount +metadata: + name: {{ template "pushProxy.proxy.name" . }} + namespace: {{ include "pushprox.namespace" . }} + labels: {{ include "pushProxy.proxy.labels" . | nindent 4 }} +--- +{{- if .Values.global.cattle.psp.enabled }} +apiVersion: policy/v1beta1 +kind: PodSecurityPolicy +metadata: + name: {{ template "pushProxy.proxy.name" . }} + namespace: {{ include "pushprox.namespace" . }} + labels: {{ include "pushProxy.proxy.labels" . | nindent 4 }} +spec: + privileged: false + hostNetwork: false + hostIPC: false + hostPID: false + runAsUser: + rule: 'MustRunAsNonRoot' + seLinux: + rule: 'RunAsAny' + supplementalGroups: + rule: 'MustRunAs' + ranges: + - min: 1 + max: 65535 + fsGroup: + rule: 'MustRunAs' + ranges: + - min: 1 + max: 65535 + readOnlyRootFilesystem: false + volumes: + - 'secret' +{{- end }}{{- end }} +{{- end }} diff --git a/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/rkeProxy/templates/pushprox-proxy.yaml b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/rkeProxy/templates/pushprox-proxy.yaml new file mode 100644 index 0000000000..723bbd6c00 --- /dev/null +++ b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/rkeProxy/templates/pushprox-proxy.yaml @@ -0,0 +1,57 @@ +{{- template "applyKubeVersionOverrides" . -}} +{{- if and .Values.proxy }}{{ if .Values.proxy.enabled }} +apiVersion: apps/v1 +kind: Deployment +metadata: + name: {{ template "pushProxy.proxy.name" . }} + namespace: {{ template "pushprox.namespace" . }} + labels: {{ include "pushProxy.proxy.labels" . | nindent 4 }} + pushprox-exporter: "proxy" +spec: + selector: + matchLabels: {{ include "pushProxy.proxy.labels" . | nindent 6 }} + template: + metadata: + labels: {{ include "pushProxy.proxy.labels" . | nindent 8 }} + spec: + securityContext: + runAsNonRoot: true + runAsUser: 1000 + nodeSelector: {{ include "linux-node-selector" . | nindent 8 }} +{{- if .Values.proxy.nodeSelector }} +{{ toYaml .Values.proxy.nodeSelector | indent 8 }} +{{- end }} + tolerations: {{ include "linux-node-tolerations" . | nindent 8 }} +{{- if .Values.proxy.tolerations }} +{{ toYaml .Values.proxy.tolerations | indent 8 }} +{{- end }} + serviceAccountName: {{ template "pushProxy.proxy.name" . }} + {{- if .Values.global.imagePullSecretName }} + imagePullSecrets: + - name: {{ .Values.global.imagePullSecretName }} + {{- end }} + containers: + - name: pushprox-proxy + image: {{ template "system_default_registry" . }}{{ .Values.proxy.image.repository }}:{{ .Values.proxy.image.tag }} + command: + {{- range .Values.proxy.command }} + - {{ . | quote }} + {{- end }} + {{- if .Values.proxy.resources }} + resources: {{ toYaml .Values.proxy.resources | nindent 10 }} + {{- end }} +--- +apiVersion: v1 +kind: Service +metadata: + name: {{ template "pushProxy.proxy.name" . }} + namespace: {{ template "pushprox.namespace" . }} + labels: {{ include "pushProxy.proxy.labels" . | nindent 4 }} +spec: + ports: + - name: pp-proxy + port: {{ required "Need .Values.proxy.port to configure proxy" .Values.proxy.port }} + protocol: TCP + targetPort: {{ .Values.proxy.port }} + selector: {{ include "pushProxy.proxy.labels" . | nindent 4 }} +{{- end }}{{- end }} diff --git a/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/rkeProxy/templates/pushprox-servicemonitor.yaml b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/rkeProxy/templates/pushprox-servicemonitor.yaml new file mode 100644 index 0000000000..67eb2216b5 --- /dev/null +++ b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/rkeProxy/templates/pushprox-servicemonitor.yaml @@ -0,0 +1,45 @@ +{{- template "applyKubeVersionOverrides" . -}} +{{- if .Values.serviceMonitor }}{{- if .Values.serviceMonitor.enabled }} +apiVersion: monitoring.coreos.com/v1 +kind: ServiceMonitor +metadata: + name: {{ template "pushprox.serviceMonitor.name" . }} + namespace: {{ template "pushprox.namespace" . }} + labels: {{ include "pushProxy.serviceMonitor.labels" . | nindent 4 }} +spec: + endpoints: {{include "pushProxy.serviceMonitor.endpoints" . | nindent 4 }} + jobLabel: component + podTargetLabels: + - component + - pushprox-exporter + namespaceSelector: + matchNames: + - {{ template "pushprox.namespace" . }} + selector: + matchLabels: {{ include "pushProxy.client.labels" . | nindent 6 }} +--- +{{- $selector := "" }} +{{- if not (kindIs "invalid" .Values.service) }} +{{- if not (kindIs "invalid" .Values.service.selector) }} +{{ if .Values.service.selector }} +{{- if .Values.clients.enabled }} +{{- required (printf "Cannot override .Values.service.selector=%s when .Values.clients.enabled=true" (toJson .Values.service.selector)) "" }} +{{- end }} +{{- $selector = (toYaml .Values.service.selector) }} +{{- end }} +{{- end }} +{{- end }} +apiVersion: v1 +kind: Service +metadata: + name: {{ template "pushProxy.client.name" . }} + namespace: {{ template "pushprox.namespace" . }} + labels: {{ include "pushProxy.client.labels" . | nindent 4 }} +spec: + ports: + - name: metrics + port: {{ required "Need .Values.metricsPort to configure client to listen to metrics at port" .Values.metricsPort}} + protocol: TCP + targetPort: {{ .Values.metricsPort }} + selector: {{ default (include "pushProxy.client.labels" .) $selector | nindent 4 }} +{{- end }}{{- end }} diff --git a/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/rkeProxy/templates/validate-install-crd.yaml b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/rkeProxy/templates/validate-install-crd.yaml new file mode 100644 index 0000000000..16abc2fa83 --- /dev/null +++ b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/rkeProxy/templates/validate-install-crd.yaml @@ -0,0 +1,14 @@ +#{{- if gt (len (lookup "rbac.authorization.k8s.io/v1" "ClusterRole" "" "")) 0 -}} +# {{- $found := dict -}} +# {{- set $found "monitoring.coreos.com/v1/ServiceMonitor" false -}} +# {{- range .Capabilities.APIVersions -}} +# {{- if hasKey $found (toString .) -}} +# {{- set $found (toString .) true -}} +# {{- end -}} +# {{- end -}} +# {{- range $_, $exists := $found -}} +# {{- if (eq $exists false) -}} +# {{- required "Required CRDs are missing. Please install Prometheus Operator CRDs before installing this chart." "" -}} +# {{- end -}} +# {{- end -}} +#{{- end -}} diff --git a/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/rkeProxy/templates/validate-psp-install.yaml b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/rkeProxy/templates/validate-psp-install.yaml new file mode 100644 index 0000000000..a30c59d3b7 --- /dev/null +++ b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/rkeProxy/templates/validate-psp-install.yaml @@ -0,0 +1,7 @@ +#{{- if gt (len (lookup "rbac.authorization.k8s.io/v1" "ClusterRole" "" "")) 0 -}} +#{{- if .Values.global.cattle.psp.enabled }} +#{{- if not (.Capabilities.APIVersions.Has "policy/v1beta1/PodSecurityPolicy") }} +#{{- fail "The target cluster does not have the PodSecurityPolicy API resource. Please disable PSPs in this chart before proceeding." -}} +#{{- end }} +#{{- end }} +#{{- end }} diff --git a/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/rkeProxy/values.yaml b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/rkeProxy/values.yaml new file mode 100644 index 0000000000..1e076041b3 --- /dev/null +++ b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/rkeProxy/values.yaml @@ -0,0 +1,166 @@ +# Default values for rancher-pushprox. +# This is a YAML-formatted file. +# Declare variables to be passed into your templates. + +# Default image containing both the proxy and the client was generated from the following Dockerfile +# https://github.com/prometheus-community/PushProx/blob/eeadbe766641699129920ccfaaaa30a85c67fe81/Dockerfile#L1-L15 + +# Configuration + +global: + cattle: + psp: + enabled: false + systemDefaultRegistry: "" + seLinux: + enabled: false + +# A list of Semver constraint strings (defined by https://github.com/Masterminds/semver) and values.yaml overrides. +# +# For each key in kubeVersionOverrides, this chart will check to see if the current Kubernetes cluster's version matches +# any of the semver constraints provided as keys on the map. +# +# On seeing a match, the default value for each values.yaml field overridden will be updated with the new value. +# +# If multiple matches are encountered (due to overlapping semver ranges), the matches will be applied in order. +# +# Notes: +# - On running a helm template, Helm generally assumes the kubeVersion is v1.20.0 +# - On running a helm install --dry-run, the correct kubeVersion should be chosen. +kubeVersionOverrides: [] +# - constraint: "< 1.21" +# values: +# metricsPort: 10252 +# clients: +# https: +# enabled: false +# insecureSkipVerify: false +# useServiceAccountCredentials: false + +namespaceOverride: "" + +# The component that is being monitored (i.e. etcd) +component: "component" + +# The port containing the metrics that need to be scraped +metricsPort: 2739 + +# Configure ServiceMonitor that monitors metrics from the metricsPort endpoint +serviceMonitor: + enabled: true + # A list of endpoints that will be added to the ServiceMonitor based on the Endpoint spec + # Source: https://github.com/prometheus-operator/prometheus-operator/blob/master/Documentation/api.md#endpoint + # By default, proxyUrl and params._scheme will be overridden based on other values + endpoints: + - port: metrics + +# Configure Service that grabs scrape targets +service: + # The selector that is used to populate the Service's Endpoints object. + # The chart will error out on rendering templating if .Values.clients.enabled is set alongside this field, + # since it is expected that this service should point to the PushProx Clients Daemonset / Deployment + selector: {} + +clients: + enabled: true + # The port which the PushProx client will post PushProx metrics to + port: 9369 + # If unset, this will default to the URL for the proxy service: http://pushprox-{{component}}-proxy.{{namepsace}}.svc.cluster.local:{{proxy.port}} + # Should be modified if the clients are being deployed outside the cluster where the proxy rests, otherwise leave it null + proxyUrl: "" + # If set to true, the client will forward any requests from the host IP to 127.0.0.1 + # It will only allow proxy requests to the metricsPort specified + useLocalhost: false + # Configuration for accessing metrics via HTTPS + https: + # Does the client require https to access the metrics? + enabled: false + # Does the client require requests be sent to http or https? + forceHTTPSScheme: false + # If set to true, the client will create a service account with adequate permissions and set a flag + # on the client to use the service account token provided by it to make authorized scrape requests + useServiceAccountCredentials: false + # Configuration for authentication to metrics via https endpoint + authenticationMethod: + # Reads token from defined file in container + # This function is deprecated in the prometheus operator api and may be removed in a future version + bearerTokenFile: + enabled: false + bearerTokenFilePath: "/var/run/secrets/kubernetes.io/serviceaccount/token" + # Reads token from defined secret in namespace + # This function is deprecated in the prometheus operator api and may be removed in a future version + bearerTokenSecret: + enabled: false + # Reads token from defined secret in namespace + authorization: + enabled: false + type: "bearer" + credentials: + key: "token" + optional: false + # If set to true, the client will disable SSL security checks + insecureSkipVerify: false + # Directory on host where necessary TLS cert and key to scrape metrics can be found + certDir: "" + # Filenames for files located in .Values.clients.https.certDir that correspond to TLS settings + certFile: "" + keyFile: "" + caCertFile: "" + # seLinuxOptions to be passed into the container that copies certs. Should define a container with permissions to read the files in the certDir provided on the host. + # Required and only used if `clients.https.enabled` is set and `clients.https.certDir` is provided. + seLinuxOptions: {} + + metrics: + # Whether the client should publish PushProx client-specific metrics to .Values.clients.port + enabled: false + + rbac: + # Additional permissions to provide to the ServiceAccount bound to the client + # This can be used to provide additional permissions for the client to scrape metrics from the k8s API + # Only enabled if clients.https.enabled and clients.https.useServiceAccountCredentials are true + additionalRules: [] + + # Resource limits + resources: {} + + # Options to select all nodes to deploy client DaemonSet on + nodeSelector: {} + tolerations: [] + affinity: {} + + image: + repository: rancher/pushprox-client + tag: v0.1.3-rancher2-client + command: ["pushprox-client"] + + copyCertsImage: + repository: rancher/mirrored-library-busybox + tag: 1.31.1 + + # The default intention of rancher-pushprox clients is to scrape hostNetwork metrics across all nodes. + # This can be used to scrape internal Kubernetes components or DaemonSets of hostNetwork Pods in + # situations where a cloud provider firewall prevents Pod-To-Host communication but not Pod-To-Pod. + # However, if the underlying hostNetwork Pod that is being scraped is managed by a Deployment, + # this advanced option enables users to deploy the client as a Deployment instead of a DaemonSet. + # If a user deploys this feature and the underlying Deployment's number of replicas changes, the user will + # be responsible for upgrading this chart accordingly to the right number of replicas. + deployment: + enabled: false + replicas: 0 + +proxy: + enabled: true + # The port through which PushProx clients will communicate to the proxy + port: 8080 + + # Resource limits + resources: {} + + # Options to select a node to run a single proxy deployment on + nodeSelector: {} + tolerations: [] + + image: + repository: rancher/pushprox-proxy + tag: v0.1.3-rancher2-proxy + command: ["pushprox-proxy"] diff --git a/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/rkeScheduler/.helmignore b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/rkeScheduler/.helmignore new file mode 100644 index 0000000000..0e8a0eb36f --- /dev/null +++ b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/rkeScheduler/.helmignore @@ -0,0 +1,23 @@ +# Patterns to ignore when building packages. +# This supports shell glob matching, relative path matching, and +# negation (prefixed with !). Only one pattern per line. +.DS_Store +# Common VCS dirs +.git/ +.gitignore +.bzr/ +.bzrignore +.hg/ +.hgignore +.svn/ +# Common backup files +*.swp +*.bak +*.tmp +*.orig +*~ +# Various IDEs +.project +.idea/ +*.tmproj +.vscode/ diff --git a/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/rkeScheduler/Chart.yaml b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/rkeScheduler/Chart.yaml new file mode 100644 index 0000000000..8c53b63bcc --- /dev/null +++ b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/rkeScheduler/Chart.yaml @@ -0,0 +1,15 @@ +annotations: + catalog.cattle.io/hidden: "true" + catalog.cattle.io/kube-version: '>= 1.26.0-0 < 1.31.0-0' + catalog.cattle.io/os: linux + catalog.rancher.io/certified: rancher + catalog.rancher.io/namespace: cattle-monitoring-system + catalog.rancher.io/release-name: rancher-pushprox +apiVersion: v1 +appVersion: 0.1.0 +description: Sets up a deployment of the PushProx proxy and a DaemonSet of PushProx + clients. +kubeVersion: '>=1.26.0-0' +name: rkeScheduler +type: application +version: 0.2.0 diff --git a/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/rkeScheduler/README.md b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/rkeScheduler/README.md new file mode 100644 index 0000000000..345002f48a --- /dev/null +++ b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/rkeScheduler/README.md @@ -0,0 +1,90 @@ +# rancher-pushprox + +A Rancher chart based on Rancher [PushProx](https://github.com/rancher/PushProx) that sets up a Deployment of a PushProx proxy and a DaemonSet of PushProx clients on a Kubernetes cluster. + +Installs [rancher-pushprox](https://github.com/rancher/charts/tree/gh-pages/packages/rancher-pushprox) to create PushProx clients that can access their host's network and register with a PushProx proxy. A [Prometheus Operator](https://github.com/coreos/prometheus-operator) ServiceMonitor CR is also included that is configured to scrape the metrics from each of the clients through the proxy. + +Using an instance of this chart is suitable for the following scenarios: +- You need to scrape metrics from a port that should not be accessible outside of the host (e.g. scraping `etcd` metrics in a hardened cluster) +- You need to scrape metrics on a host that are not exposed outside of 127.0.0.1 (e.g. scraping `kube-proxy` metrics) +- You need to scrape metrics through HTTPS using certs hosted directly on `hostPath` +- You need to scrape metrics from Kubernetes components that require authorization via a service account (e.g. permissions to make request to `/metrics`) +- You need to scrape metrics without access to cacerts (i.e. enable `insecureSkipVerify`) + +The clients and proxy are created based on a Rancher fork of the [prometheus-community/PushProx](https://github.com/prometheus-community/PushProx) project. + +## Upgrading to Kubernetes v1.25+ + +Starting in Kubernetes v1.25, [Pod Security Policies](https://kubernetes.io/docs/concepts/security/pod-security-policy/) have been removed from the Kubernetes API. + +As a result, **before upgrading to Kubernetes v1.25** (or on a fresh install in a Kubernetes v1.25+ cluster), users are expected to perform an in-place upgrade of this chart with `global.cattle.psp.enabled` set to `false` if it has been previously set to `true`. +​ +> **Note:** +> In this chart release, any previous field that was associated with any PSP resources have been removed in favor of a single global field: `global.cattle.psp.enabled`. + +> **Note:** +> If you upgrade your cluster to Kubernetes v1.25+ before removing PSPs via a `helm upgrade` (even if you manually clean up resources), **it will leave the Helm release in a broken state within the cluster such that further Helm operations will not work (`helm uninstall`, `helm upgrade`, etc.).** +> +> If your charts get stuck in this state, please consult the Rancher docs on how to clean up your Helm release secrets. + +Upon setting `global.cattle.psp.enabled` to false, the chart will remove any PSP resources deployed on its behalf from the cluster. This is the default setting for this chart. + +As a replacement for PSPs, [Pod Security Admission](https://kubernetes.io/docs/concepts/security/pod-security-admission/) should be used. Please consult the Rancher docs for more details on how to configure your chart release namespaces to work with the new Pod Security Admission and apply Pod Security Standards. + +## Configuration + +The following tables list the configurable parameters of the rancher-pushprox chart and their default values. + +### General + +#### Required +| Parameter | Description | Example | +| ----- | ----------- | ------ | +| `component` | The component that is being monitored | `kube-etcd` +| `metricsPort` | The port on the host that contains the metrics you want to scrape (e.g. `http://:/metrics`) | `2379` | +| `namespaceOverride` | The namespace to install the chart | `""` + +#### Optional +| Parameter | Description | Default | +| ----- | ----------- | ------ | +| `serviceMonitor.enabled` | Deploys a [Prometheus Operator](https://github.com/coreos/prometheus-operator/blob/master/Documentation/api.md#servicemonitor) ServiceMonitor CR that is configured to scrape metrics on the hosts that the clients are deployed on via the proxy. Also deploys a Service that points to all pods with the expected client name that exposes the `metricsPort` selected | `true` | +| `serviceMonitor.endpoints` | A list of endpoints that will be added to the ServiceMonitor based on the [Endpoint spec](https://github.com/prometheus-operator/prometheus-operator/blob/master/Documentation/api.md#endpoint) | `[{port: metrics}]` | +| `service.selector` | The selector that is used to populate the Service's Endpoints object. The chart will error out on rendering templating if `.Values.clients.enabled` is set alongside this field, since it is expected that this service should point to the PushProx Clients Daemonset / Deployment | `{}` | +| `clients.enabled` | Deploys a DaemonSet of clients that are each capable of scraping endpoints on the hostNetwork it is deployed on | `true` | +| `clients.port` | The port where the client will publish PushProx client-specific metrics. If deploying multiple clients onto the same node, the clients should not have conflicting ports | `9369` | +| `clients.proxyUrl` | Overrides the default proxyUrl setting of `http://pushprox-{{ .Values.component }}-proxy.{{ . Release.Namespace }}.svc.cluster.local:{{ .Values.proxy.port }}"` with the `proxyUrl` specified | `""` | +| `clients.useLocalhost` | Sets a flag on each client deployment to redirect scrapes directed to `HOST_IP` to `127.0.0.1` | `false` | +| `clients.https.enabled` | Enables scraping metrics via HTTPS using the provided TLS certs that exist on each host | `false` | +| `clients.https.forceHTTPSScheme` | Forces scraping metrics via HTTPS using the provided TLS certs that exist on each host | `false` | +| `clients.https.useServiceAccountCredentials` | If set to true, the client will create a service account with permissions to scrape `/metrics` endpoint of Kubernetes components. The client will use the service account token provided to make authorized scrape requests to the Kubernetes API | `false` | +| `clients.https.authenticationMethod.bearerTokenFile.enabled` | If set to true, the client will use service account credentials mounted at the configured path `clients.https.authenticationMethod.bearerTokenFile.bearerTokenFilePath`. This requires permissions to scrape `/metrics` endpoint of Kubernetes components. This method is deprecated by the prometheus operator and may be removed in a future release | `false` | +| `clients.https.authenticationMethod.bearerTokenFile.bearerTokenFilePath` | This is a volume mount on the pod with permissions to scrape `/metrics` endpoint of Kubernetes components | `"/var/run/secrets/kubernetes.io/serviceaccount/token"` | +| `clients.https.authenticationMethod.bearerTokenSecret.enabled` | If set to true, the client will use service account credentials to scrape `/metrics` endpoint of Kubernetes components. This method is deprecated by the prometheus operator and may be removed in a future release | `false` | +| `clients.https.authenticationMethod.authorization.enabled` | If set to true, the client will use service account credentials to scrape `/metrics` endpoint of Kubernetes components | `false` | +| `clients.https.authenticationMethod.authorization.type` | If set, the client will use this type of authorization in its client requests for metrics | `"bearer"` | +| `clients.https.authenticationMethod.authorization.credentials.key` | If set, the client will use this key in the secret created by `clients.https.useServiceAccountCredentials` for authorization in its client requests for metrics | `"token"` | +| `clients.https.authenticationMethod.authorization.credentials.optional` | If set to false, the client will fail if the key in the secret created by `clients.https.useServiceAccountCredentials` does not exist | `false` | +| `clients.https.insecureSkipVerify` | If set to true, the client will disable SSL security checks | `false` | +| `clients.https.certDir` | A `hostPath` where TLS certs can be found. This path is mounted as a volume on an `initContainer` which copies only the necessary files over to an EmptyDir volume used by each client. Required and only used if `clients.https.enabled` is set | `""` | +| `clients.https.certFile` | The path to the TLS cert file located within `clients.https.certDir`. Required and only used if `clients.https.enabled` is set | `""` | +| `clients.https.keyFile` | The path to the TLS key file located within `clients.https.certDir`. Required and only used if `clients.https.enabled` is set | `""` | +| `clients.https.caCertFile` | The path to the TLS cacert file located within `clients.https.certDir`. Required and only used if `clients.https.enabled` is set | `""` | +| `clients.https.seLinuxOptions` | seLinuxOptions to be passed into the container that copies certs. Should define a container with permissions to read the files in the certDir provided on the host. Required and only used if `clients.https.enabled` is set and `clients.https.certDir` is provided. | `""` | +| `clients.metrics.enabled` | Whether the client should publish PushProx client-specific metrics. | `false` | +| `clients.rbac.additionalRules` | Additional permissions to provide to the ServiceAccount bound to the client. This can be used to provide additional permissions for the client to scrape metrics from the k8s API. Only enabled if clients.https.enabled and clients.https.useServiceAccountCredentials are true | `[]` | +| `clients.deployment.enabled` | Deploys the client as a Deployment (generally used if the underlying hostNetwork Pod that is being scraped is managed by a Deployment) | `false` | +| `clients.deployment.replicas` | The number of pods the Deployment has, it should match the number of pod the hostNetwork Deployment has. Required and only used if `client.deployment.enable` is set | `0` | +| `clients.deployment.affinity` | The affinity rules that allocate the pod to the node in which the hostNetwork Deployment's pods run. Required and only used if `client.deployment.enable` is set | `{}` | +| `clients.resources` | Set resource limits and requests for the client container | `{}` | +| `clients.nodeSelector` | Select which nodes to deploy the clients on | `{}` | +| `clients.tolerations` | Specify tolerations for clients | `[]` | +| `proxy.enabled` | Deploys the proxy that each client will register with | `true` | +| `proxy.port` | The port exposed by the proxy that each client will register with to allow metrics to be scraped from the host | `8080` | +| `proxy.resources` | Set resource limits and requests for the proxy container | `{}` | +| `proxy.nodeSelector` | Select which nodes the proxy can be deployed on | `{}` | +| `proxy.tolerations` | Specify tolerations (if necessary) to allow the proxy to be deployed on the selected node | `[]` | +| `kubeVersionOverrides` | A list of Semver constraint strings (defined by https://github.com/Masterminds/semver) and values.yaml overrides. For each key in kubeVersionOverrides, this chart will check to see if the current Kubernetes cluster's version matches any of the semver constraints provided as keys on the map. On seeing a match, the default value for each values.yaml field overridden will be updated with the new value. If multiple matches are encountered (due to overlapping semver ranges), the matches will be applied in order. | `[]` + +*Tip: The filepaths set in `clients.https.File` can include wildcard characters*. + +See [rancher-monitoring](https://github.com/rancher/charts/tree/gh-pages/packages/rancher-monitoring) for examples of how this chart can be used. diff --git a/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/rkeScheduler/templates/_helpers.tpl b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/rkeScheduler/templates/_helpers.tpl new file mode 100644 index 0000000000..1ba5093944 --- /dev/null +++ b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/rkeScheduler/templates/_helpers.tpl @@ -0,0 +1,170 @@ +# Rancher + +{{- define "system_default_registry" -}} +{{- if .Values.global.cattle.systemDefaultRegistry -}} +{{- printf "%s/" .Values.global.cattle.systemDefaultRegistry -}} +{{- end -}} +{{- end -}} + +# Windows Support + +{{/* +Windows cluster will add default taint for linux nodes, +add below linux tolerations to workloads could be scheduled to those linux nodes +*/}} + +{{- define "linux-node-tolerations" -}} +- key: "cattle.io/os" + value: "linux" + effect: "NoSchedule" + operator: "Equal" +{{- end -}} + +{{- define "linux-node-selector" -}} +{{- if semverCompare "<1.14-0" .Capabilities.KubeVersion.GitVersion -}} +beta.kubernetes.io/os: linux +{{- else -}} +kubernetes.io/os: linux +{{- end -}} +{{- end -}} + +# General + +{{- define "applyKubeVersionOverrides" -}} +{{- $overrides := dict -}} +{{- range $override := .Values.kubeVersionOverrides -}} +{{- if semverCompare $override.constraint $.Capabilities.KubeVersion.Version -}} +{{- $_ := mergeOverwrite $overrides $override.values -}} +{{- end -}} +{{- end -}} +{{- $_ := mergeOverwrite .Values $overrides -}} +{{- end -}} + +{{- define "pushprox.namespace" -}} + {{- if .Values.namespaceOverride -}} + {{- .Values.namespaceOverride -}} + {{- else -}} + {{- .Release.Namespace -}} + {{- end -}} +{{- end -}} + +{{- define "pushProxy.commonLabels" -}} +release: {{ .Release.Name }} +component: {{ .Values.component | quote }} +provider: kubernetes +{{- end -}} + +{{- define "pushProxy.proxyUrl" -}} +{{- $_ := (required "Template requires either .Values.proxy.port or .Values.client.proxyUrl to set proxyUrl for client" (or .Values.clients.proxyUrl .Values.proxy.port)) -}} +{{- if .Values.clients.proxyUrl -}} +{{ printf "%s" .Values.clients.proxyUrl }} +{{- else -}} +{{ printf "http://%s.%s.svc:%d" (include "pushProxy.proxy.name" .) (include "pushprox.namespace" .) (int .Values.proxy.port) }} +{{- end -}}{{- end -}} + +# Client + +{{- define "pushProxy.client.name" -}} +{{- printf "pushprox-%s-client" (required ".Values.component is required" .Values.component) -}} +{{- end -}} + +{{- define "pushProxy.client.serviceAccountTokenName" -}} +{{- printf "pushprox-%s-client-service-account-token" (required ".Values.component is required" .Values.component) -}} +{{- end -}} + +{{- define "pushProxy.client.labels" -}} +k8s-app: {{ template "pushProxy.client.name" . }} +{{ template "pushProxy.commonLabels" . }} +{{- end -}} + +# Proxy + +{{- define "pushProxy.proxy.name" -}} +{{- printf "pushprox-%s-proxy" (required ".Values.component is required" .Values.component) -}} +{{- end -}} + +{{- define "pushProxy.proxy.labels" -}} +k8s-app: {{ template "pushProxy.proxy.name" . }} +{{ template "pushProxy.commonLabels" . }} +{{- end -}} + +# ServiceMonitor + +{{- define "pushprox.serviceMonitor.name" -}} +{{- printf "%s-%s" .Release.Name (required ".Values.component is required" .Values.component) -}} +{{- end -}} + +{{- define "pushProxy.serviceMonitor.labels" -}} +app: {{ template "pushprox.serviceMonitor.name" . }} +{{ template "pushProxy.commonLabels" . }} +{{- end -}} + +{{- define "pushProxy.serviceMonitor.endpoints" -}} +{{- $proxyURL := (include "pushProxy.proxyUrl" .) -}} +{{- $useHTTPS := .Values.clients.https.enabled -}} +{{- $setHTTPSScheme := .Values.clients.https.forceHTTPSScheme -}} +{{- $insecureSkipVerify := .Values.clients.https.insecureSkipVerify -}} +{{- $useServiceAccountCredentials := .Values.clients.https.useServiceAccountCredentials -}} +{{- $serviceAccountTokenName := (include "pushProxy.client.serviceAccountTokenName" . ) -}} +{{- $metricRelabelings := list }} +{{- $endpoints := .Values.serviceMonitor.endpoints }} +{{- if .Values.proxy.enabled }} +{{- $_ := set . "proxyUrl" $proxyURL }} +{{- end }} +{{- range $endpoints }} +{{- if $.Values.proxy.enabled }} +{{- $_ := set . "proxyUrl" $proxyURL }} +{{- end }} +{{- $clusterIdRelabel := dict }} +{{- $metricRelabelings := list }} +{{- if $.Values.global.cattle.clusterId }} +{{- $_ := set $clusterIdRelabel "action" "replace" }} +{{- $_ := set $clusterIdRelabel "sourceLabels" (list "__address__") }} +{{- $_ := set $clusterIdRelabel "targetLabel" "cluster_id" }} +{{- $_ := set $clusterIdRelabel "replacement" $.Values.global.cattle.clusterId }} +{{- $metricRelabelings = append $metricRelabelings $clusterIdRelabel }} +{{- end }} +{{- $clusterNameRelabel := dict }} +{{- if $.Values.global.cattle.clusterName }} +{{- $_ := set $clusterNameRelabel "action" "replace" }} +{{- $_ := set $clusterNameRelabel "sourceLabels" (list "__address__") }} +{{- $_ := set $clusterNameRelabel "targetLabel" "cluster_name" }} +{{- $_ := set $clusterNameRelabel "replacement" $.Values.global.cattle.clusterName }} +{{- $metricRelabelings = append $metricRelabelings $clusterNameRelabel }} +{{- end }} +{{- if not (empty $metricRelabelings) }} +{{- $_ := set . "metricRelabelings" ($metricRelabelings)}} +{{- end }} +{{- if $setHTTPSScheme -}} +{{- $_ := set . "scheme" "https" }} +{{- end -}} +{{- if $useHTTPS -}} +{{- if (hasKey . "params") }} +{{- $_ := set (get . "params") "_scheme" (list "https") }} +{{- else }} +{{- $_ := set . "params" (dict "_scheme" (list "https")) }} +{{- end }} +{{- end }} +{{- if (hasKey . "tlsConfig") }} +{{- $_ := set (get . "tlsConfig") "insecureSkipVerify" $insecureSkipVerify }} +{{- else }} +{{- $_ := set . "tlsConfig" (dict "insecureSkipVerify" $insecureSkipVerify) }} +{{- end }} +{{- if $.Values.clients.https.authenticationMethod.bearerTokenFile.enabled }} +{{- $_ := set . "bearerTokenFile" $.Values.clients.https.authenticationMethod.bearerTokenFile.bearerTokenFilePath }} +{{- end }} +{{- if $.Values.clients.https.authenticationMethod.bearerTokenSecret.enabled }} +{{- $_ := set . "bearerTokenSecret" $serviceAccountTokenName }} +{{- end }} +{{- if $.Values.clients.https.authenticationMethod.authorization.enabled }} +{{- if (hasKey . "authorization") }} +{{- $_ := set (get . "authorization") "type" $.Values.clients.https.authenticationMethod.authorization.type }} +{{- $_ := set (get . "authorization") "credentials" (dict "name" $serviceAccountTokenName "key" $.Values.clients.https.authenticationMethod.authorization.credentials.key "optional" $.Values.clients.https.authenticationMethod.authorization.credentials.optional) }} +{{- else }} +{{- $_ := set . "authorization" (dict "type" $.Values.clients.https.authenticationMethod.authorization.type) }} +{{- $_ := set . "authorization" (dict "credentials" (dict "name" $serviceAccountTokenName "key" $.Values.clients.https.authenticationMethod.authorization.credentials.key "optional" $.Values.clients.https.authenticationMethod.authorization.credentials.optional)) }} +{{- end }} +{{- end }} +{{- end }} +{{- toYaml $endpoints }} +{{- end -}} diff --git a/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/rkeScheduler/templates/pushprox-clients-rbac.yaml b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/rkeScheduler/templates/pushprox-clients-rbac.yaml new file mode 100644 index 0000000000..a8e27c3735 --- /dev/null +++ b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/rkeScheduler/templates/pushprox-clients-rbac.yaml @@ -0,0 +1,97 @@ +{{- template "applyKubeVersionOverrides" . -}} +{{- if .Values.clients }}{{- if .Values.clients.enabled }} +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRole +metadata: + name: {{ template "pushProxy.client.name" . }} + labels: {{ include "pushProxy.client.labels" . | nindent 4 }} +rules: +{{- if .Values.global.cattle.psp.enabled }} +- apiGroups: ['policy'] + resources: ['podsecuritypolicies'] + verbs: ['use'] + resourceNames: + - {{ template "pushProxy.client.name" . }} +{{- end }} +{{- if and .Values.clients.https.enabled .Values.clients.https.useServiceAccountCredentials }} +- nonResourceURLs: ["/metrics"] + verbs: ["get"] +{{- if .Values.clients.rbac.additionalRules }} +{{ toYaml .Values.clients.rbac.additionalRules }} +{{- end }} +{{- end }} +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRoleBinding +metadata: + name: {{ template "pushProxy.client.name" . }} + labels: {{ include "pushProxy.client.labels" . | nindent 4 }} +roleRef: + apiGroup: rbac.authorization.k8s.io + kind: ClusterRole + name: {{ template "pushProxy.client.name" . }} +subjects: + - kind: ServiceAccount + name: {{ template "pushProxy.client.name" . }} + namespace: {{ include "pushprox.namespace" . }} +--- +apiVersion: v1 +kind: ServiceAccount +metadata: + name: {{ template "pushProxy.client.name" . }} + namespace: {{ include "pushprox.namespace" . }} + labels: {{ include "pushProxy.client.labels" . | nindent 4 }} +--- +{{- if .Values.clients.https.useServiceAccountCredentials }} +apiVersion: v1 +kind: Secret +type: kubernetes.io/service-account-token +metadata: + name: {{ template "pushProxy.client.serviceAccountTokenName" . }} + namespace: {{ include "pushprox.namespace" . }} + labels: {{ include "pushProxy.client.labels" . | nindent 4 }} + annotations: + kubernetes.io/service-account.name: {{ template "pushProxy.client.name" . }} +{{- end }} +--- +{{- if .Values.global.cattle.psp.enabled }} +apiVersion: policy/v1beta1 +kind: PodSecurityPolicy +metadata: + name: {{ template "pushProxy.client.name" . }} + namespace: {{ include "pushprox.namespace" . }} + labels: {{ include "pushProxy.client.labels" . | nindent 4 }} +spec: + privileged: false + hostNetwork: true + hostIPC: false + hostPID: false + runAsUser: + rule: 'RunAsAny' + seLinux: + rule: 'RunAsAny' +{{- if and .Values.clients.https.enabled .Values.clients.https.certDir .Values.global.seLinux.enabled .Values.clients.https.seLinuxOptions }} + seLinuxOptions: {{ .Values.clients.https.seLinuxOptions | toYaml | nindent 6 }} +{{- end }} + supplementalGroups: + rule: 'MustRunAs' + ranges: + - min: 0 + max: 65535 + fsGroup: + rule: 'MustRunAs' + ranges: + - min: 0 + max: 65535 + readOnlyRootFilesystem: false + volumes: + - 'secret' +{{- if and .Values.clients.https.enabled .Values.clients.https.certDir }} + - 'emptyDir' + - 'hostPath' + allowedHostPaths: + - pathPrefix: {{ required "Need access to volume on host with the SSL cert files to use HTTPs" .Values.clients.https.certDir }} + readOnly: true +{{- end }} +{{- end }} +{{- end }}{{- end }} diff --git a/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/rkeScheduler/templates/pushprox-clients.yaml b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/rkeScheduler/templates/pushprox-clients.yaml new file mode 100644 index 0000000000..e8fcfb3883 --- /dev/null +++ b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/rkeScheduler/templates/pushprox-clients.yaml @@ -0,0 +1,157 @@ +{{- template "applyKubeVersionOverrides" . -}} +{{- if .Values.clients }}{{- if .Values.clients.enabled }} +apiVersion: apps/v1 +{{- if .Values.clients.deployment.enabled }} +kind: Deployment +{{- else }} +kind: DaemonSet +{{- end }} +metadata: + name: {{ template "pushProxy.client.name" . }} + namespace: {{ template "pushprox.namespace" . }} + labels: {{ include "pushProxy.client.labels" . | nindent 4 }} + pushprox-exporter: "client" +spec: + {{- if .Values.clients.deployment.enabled }} + replicas: {{ .Values.clients.deployment.replicas }} + {{- end }} + selector: + matchLabels: {{ include "pushProxy.client.labels" . | nindent 6 }} + template: + metadata: + labels: {{ include "pushProxy.client.labels" . | nindent 8 }} + spec: + {{- if .Values.clients.affinity }} + affinity: {{ toYaml .Values.clients.affinity | nindent 8 }} + {{- end }} + nodeSelector: {{ include "linux-node-selector" . | nindent 8 }} +{{- if .Values.clients.nodeSelector }} +{{ toYaml .Values.clients.nodeSelector | indent 8 }} +{{- end }} + tolerations: {{ include "linux-node-tolerations" . | nindent 8 }} +{{- if .Values.clients.tolerations }} +{{ toYaml .Values.clients.tolerations | indent 8 }} +{{- end }} + hostNetwork: true + dnsPolicy: ClusterFirstWithHostNet + serviceAccountName: {{ template "pushProxy.client.name" . }} + {{- if .Values.global.imagePullSecretName }} + imagePullSecrets: + - name: {{ .Values.global.imagePullSecretName }} + {{- end }} + containers: + - name: pushprox-client + image: {{ template "system_default_registry" . }}{{ .Values.clients.image.repository }}:{{ .Values.clients.image.tag }} + command: + {{- range .Values.clients.command }} + - {{ . | quote }} + {{- end }} + args: + - --fqdn=$(HOST_IP) + - --proxy-url=$(PROXY_URL) + {{- if .Values.clients.metrics.enabled }} + - --metrics-addr=$(PORT) + {{- end }} + - --allow-port={{ required "Need .Values.metricsPort to configure client to be allowed to scrape metrics at port" .Values.metricsPort}} + {{- if .Values.clients.useLocalhost }} + - --use-localhost + {{- end }} + {{- if .Values.clients.https.enabled }} + {{- if .Values.clients.https.insecureSkipVerify }} + - --insecure-skip-verify + {{- end }} + {{- if .Values.clients.https.useServiceAccountCredentials }} + - --token-path=/var/run/secrets/kubernetes.io/serviceaccount/token + {{- end }} + {{- if .Values.clients.https.certDir }} + - --tls.cert=/etc/ssl/push-proxy/push-proxy.pem + - --tls.key=/etc/ssl/push-proxy/push-proxy-key.pem + - --tls.cacert=/etc/ssl/push-proxy/push-proxy-ca-cert.pem + {{- end }} + {{- end }} + env: + - name: HOST_IP + valueFrom: + fieldRef: + fieldPath: status.hostIP + {{- if .Values.clients.metrics.enabled }} + - name: PORT + value: :{{ .Values.clients.port }} + {{- end }} + - name: PROXY_URL + value: {{ template "pushProxy.proxyUrl" . }} + securityContext: + runAsNonRoot: true + runAsUser: 1000 + {{- if and .Values.clients.https.enabled .Values.clients.https.certDir }} + volumeMounts: + - name: metrics-cert-dir + mountPath: /etc/ssl/push-proxy + {{- end }} + {{- if .Values.clients.resources }} + resources: {{ toYaml .Values.clients.resources | nindent 10 }} + {{- end }} + {{- if and .Values.clients.https.enabled .Values.clients.https.certDir }} + initContainers: + - name: copy-certs + image: {{ template "system_default_registry" . }}{{ .Values.clients.copyCertsImage.repository }}:{{ .Values.clients.copyCertsImage.tag }} + command: + - sh + - -c + - | + echo "Searching for files to copy within the source volume" + echo "cert: ${CERT_FILE_NAME}" + echo "key: ${KEY_FILE_NAME}" + echo "cacert: ${CACERT_FILE_NAME}" + + CERT_FILE_SOURCE=$(find /etc/source/ -type f -name "${CERT_FILE_NAME}" | sort -r | head -n 1) + KEY_FILE_SOURCE=$(find /etc/source/ -type f -name "${KEY_FILE_NAME}" | sort -r | head -n 1) + CACERT_FILE_SOURCE=$(find /etc/source/ -type f -name "${CACERT_FILE_NAME}" | sort -r | head -n 1) + + test -z ${CERT_FILE_SOURCE} && echo "Failed to find cert file" && exit 1 + test -z ${KEY_FILE_SOURCE} && echo "Failed to find key file" && exit 1 + test -z ${CACERT_FILE_SOURCE} && echo "Failed to find cacert file" && exit 1 + + echo "Copying cert file from $CERT_FILE_SOURCE to $CERT_FILE_TARGET" + cp $CERT_FILE_SOURCE $CERT_FILE_TARGET || exit 1 + chmod 444 $CERT_FILE_TARGET || exit 1 + + echo "Copying key file from $KEY_FILE_SOURCE to $KEY_FILE_TARGET" + cp $KEY_FILE_SOURCE $KEY_FILE_TARGET || exit 1 + chmod 444 $KEY_FILE_TARGET || exit 1 + + echo "Copying cacert file from $CACERT_FILE_SOURCE to $CACERT_FILE_TARGET" + cp $CACERT_FILE_SOURCE $CACERT_FILE_TARGET || exit 1 + chmod 444 $CACERT_FILE_TARGET || exit 1 + env: + - name: CERT_FILE_NAME + value: {{ required "Need a TLS cert file for scraping metrics endpoint over HTTPs" .Values.clients.https.certFile }} + - name: KEY_FILE_NAME + value: {{ required "Need a TLS key file for scraping metrics endpoint over HTTPs" .Values.clients.https.keyFile }} + - name: CACERT_FILE_NAME + value: {{ required "Need a TLS CA cert file for scraping metrics endpoint over HTTPs" .Values.clients.https.caCertFile }} + - name: CERT_FILE_TARGET + value: /etc/ssl/push-proxy/push-proxy.pem + - name: KEY_FILE_TARGET + value: /etc/ssl/push-proxy/push-proxy-key.pem + - name: CACERT_FILE_TARGET + value: /etc/ssl/push-proxy/push-proxy-ca-cert.pem + securityContext: + runAsNonRoot: false +{{- if and .Values.global.seLinux.enabled .Values.clients.https.seLinuxOptions }} + seLinuxOptions: {{ .Values.clients.https.seLinuxOptions | toYaml | nindent 12 }} +{{- end }} + volumeMounts: + - name: metrics-cert-dir-source + mountPath: /etc/source + readOnly: true + - name: metrics-cert-dir + mountPath: /etc/ssl/push-proxy + volumes: + - name: metrics-cert-dir-source + hostPath: + path: {{ required "Need access to volume on host with the SSL cert files to use HTTPs" .Values.clients.https.certDir }} + - name: metrics-cert-dir + emptyDir: {} + {{- end }} +{{- end }}{{- end }} diff --git a/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/rkeScheduler/templates/pushprox-proxy-rbac.yaml b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/rkeScheduler/templates/pushprox-proxy-rbac.yaml new file mode 100644 index 0000000000..eefe609058 --- /dev/null +++ b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/rkeScheduler/templates/pushprox-proxy-rbac.yaml @@ -0,0 +1,68 @@ +{{- template "applyKubeVersionOverrides" . -}} +{{- if and .Values.proxy }}{{ if .Values.proxy.enabled }} +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRole +metadata: + name: {{ template "pushProxy.proxy.name" . }} + labels: {{ include "pushProxy.proxy.labels" . | nindent 4 }} +rules: +{{- if .Values.global.cattle.psp.enabled }} +- apiGroups: ['policy'] + resources: ['podsecuritypolicies'] + verbs: ['use'] + resourceNames: + - {{ template "pushProxy.proxy.name" . }} +{{- end }} +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRoleBinding +metadata: + name: {{ template "pushProxy.proxy.name" . }} + labels: {{ include "pushProxy.proxy.labels" . | nindent 4 }} +roleRef: + apiGroup: rbac.authorization.k8s.io + kind: ClusterRole + name: {{ template "pushProxy.proxy.name" . }} +subjects: + - kind: ServiceAccount + name: {{ template "pushProxy.proxy.name" . }} + namespace: {{ include "pushprox.namespace" . }} +--- +apiVersion: v1 +kind: ServiceAccount +metadata: + name: {{ template "pushProxy.proxy.name" . }} + namespace: {{ include "pushprox.namespace" . }} + labels: {{ include "pushProxy.proxy.labels" . | nindent 4 }} +--- +{{- if .Values.global.cattle.psp.enabled }} +apiVersion: policy/v1beta1 +kind: PodSecurityPolicy +metadata: + name: {{ template "pushProxy.proxy.name" . }} + namespace: {{ include "pushprox.namespace" . }} + labels: {{ include "pushProxy.proxy.labels" . | nindent 4 }} +spec: + privileged: false + hostNetwork: false + hostIPC: false + hostPID: false + runAsUser: + rule: 'MustRunAsNonRoot' + seLinux: + rule: 'RunAsAny' + supplementalGroups: + rule: 'MustRunAs' + ranges: + - min: 1 + max: 65535 + fsGroup: + rule: 'MustRunAs' + ranges: + - min: 1 + max: 65535 + readOnlyRootFilesystem: false + volumes: + - 'secret' +{{- end }}{{- end }} +{{- end }} diff --git a/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/rkeScheduler/templates/pushprox-proxy.yaml b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/rkeScheduler/templates/pushprox-proxy.yaml new file mode 100644 index 0000000000..723bbd6c00 --- /dev/null +++ b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/rkeScheduler/templates/pushprox-proxy.yaml @@ -0,0 +1,57 @@ +{{- template "applyKubeVersionOverrides" . -}} +{{- if and .Values.proxy }}{{ if .Values.proxy.enabled }} +apiVersion: apps/v1 +kind: Deployment +metadata: + name: {{ template "pushProxy.proxy.name" . }} + namespace: {{ template "pushprox.namespace" . }} + labels: {{ include "pushProxy.proxy.labels" . | nindent 4 }} + pushprox-exporter: "proxy" +spec: + selector: + matchLabels: {{ include "pushProxy.proxy.labels" . | nindent 6 }} + template: + metadata: + labels: {{ include "pushProxy.proxy.labels" . | nindent 8 }} + spec: + securityContext: + runAsNonRoot: true + runAsUser: 1000 + nodeSelector: {{ include "linux-node-selector" . | nindent 8 }} +{{- if .Values.proxy.nodeSelector }} +{{ toYaml .Values.proxy.nodeSelector | indent 8 }} +{{- end }} + tolerations: {{ include "linux-node-tolerations" . | nindent 8 }} +{{- if .Values.proxy.tolerations }} +{{ toYaml .Values.proxy.tolerations | indent 8 }} +{{- end }} + serviceAccountName: {{ template "pushProxy.proxy.name" . }} + {{- if .Values.global.imagePullSecretName }} + imagePullSecrets: + - name: {{ .Values.global.imagePullSecretName }} + {{- end }} + containers: + - name: pushprox-proxy + image: {{ template "system_default_registry" . }}{{ .Values.proxy.image.repository }}:{{ .Values.proxy.image.tag }} + command: + {{- range .Values.proxy.command }} + - {{ . | quote }} + {{- end }} + {{- if .Values.proxy.resources }} + resources: {{ toYaml .Values.proxy.resources | nindent 10 }} + {{- end }} +--- +apiVersion: v1 +kind: Service +metadata: + name: {{ template "pushProxy.proxy.name" . }} + namespace: {{ template "pushprox.namespace" . }} + labels: {{ include "pushProxy.proxy.labels" . | nindent 4 }} +spec: + ports: + - name: pp-proxy + port: {{ required "Need .Values.proxy.port to configure proxy" .Values.proxy.port }} + protocol: TCP + targetPort: {{ .Values.proxy.port }} + selector: {{ include "pushProxy.proxy.labels" . | nindent 4 }} +{{- end }}{{- end }} diff --git a/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/rkeScheduler/templates/pushprox-servicemonitor.yaml b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/rkeScheduler/templates/pushprox-servicemonitor.yaml new file mode 100644 index 0000000000..67eb2216b5 --- /dev/null +++ b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/rkeScheduler/templates/pushprox-servicemonitor.yaml @@ -0,0 +1,45 @@ +{{- template "applyKubeVersionOverrides" . -}} +{{- if .Values.serviceMonitor }}{{- if .Values.serviceMonitor.enabled }} +apiVersion: monitoring.coreos.com/v1 +kind: ServiceMonitor +metadata: + name: {{ template "pushprox.serviceMonitor.name" . }} + namespace: {{ template "pushprox.namespace" . }} + labels: {{ include "pushProxy.serviceMonitor.labels" . | nindent 4 }} +spec: + endpoints: {{include "pushProxy.serviceMonitor.endpoints" . | nindent 4 }} + jobLabel: component + podTargetLabels: + - component + - pushprox-exporter + namespaceSelector: + matchNames: + - {{ template "pushprox.namespace" . }} + selector: + matchLabels: {{ include "pushProxy.client.labels" . | nindent 6 }} +--- +{{- $selector := "" }} +{{- if not (kindIs "invalid" .Values.service) }} +{{- if not (kindIs "invalid" .Values.service.selector) }} +{{ if .Values.service.selector }} +{{- if .Values.clients.enabled }} +{{- required (printf "Cannot override .Values.service.selector=%s when .Values.clients.enabled=true" (toJson .Values.service.selector)) "" }} +{{- end }} +{{- $selector = (toYaml .Values.service.selector) }} +{{- end }} +{{- end }} +{{- end }} +apiVersion: v1 +kind: Service +metadata: + name: {{ template "pushProxy.client.name" . }} + namespace: {{ template "pushprox.namespace" . }} + labels: {{ include "pushProxy.client.labels" . | nindent 4 }} +spec: + ports: + - name: metrics + port: {{ required "Need .Values.metricsPort to configure client to listen to metrics at port" .Values.metricsPort}} + protocol: TCP + targetPort: {{ .Values.metricsPort }} + selector: {{ default (include "pushProxy.client.labels" .) $selector | nindent 4 }} +{{- end }}{{- end }} diff --git a/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/rkeScheduler/templates/validate-install-crd.yaml b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/rkeScheduler/templates/validate-install-crd.yaml new file mode 100644 index 0000000000..16abc2fa83 --- /dev/null +++ b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/rkeScheduler/templates/validate-install-crd.yaml @@ -0,0 +1,14 @@ +#{{- if gt (len (lookup "rbac.authorization.k8s.io/v1" "ClusterRole" "" "")) 0 -}} +# {{- $found := dict -}} +# {{- set $found "monitoring.coreos.com/v1/ServiceMonitor" false -}} +# {{- range .Capabilities.APIVersions -}} +# {{- if hasKey $found (toString .) -}} +# {{- set $found (toString .) true -}} +# {{- end -}} +# {{- end -}} +# {{- range $_, $exists := $found -}} +# {{- if (eq $exists false) -}} +# {{- required "Required CRDs are missing. Please install Prometheus Operator CRDs before installing this chart." "" -}} +# {{- end -}} +# {{- end -}} +#{{- end -}} diff --git a/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/rkeScheduler/templates/validate-psp-install.yaml b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/rkeScheduler/templates/validate-psp-install.yaml new file mode 100644 index 0000000000..a30c59d3b7 --- /dev/null +++ b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/rkeScheduler/templates/validate-psp-install.yaml @@ -0,0 +1,7 @@ +#{{- if gt (len (lookup "rbac.authorization.k8s.io/v1" "ClusterRole" "" "")) 0 -}} +#{{- if .Values.global.cattle.psp.enabled }} +#{{- if not (.Capabilities.APIVersions.Has "policy/v1beta1/PodSecurityPolicy") }} +#{{- fail "The target cluster does not have the PodSecurityPolicy API resource. Please disable PSPs in this chart before proceeding." -}} +#{{- end }} +#{{- end }} +#{{- end }} diff --git a/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/rkeScheduler/values.yaml b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/rkeScheduler/values.yaml new file mode 100644 index 0000000000..1e076041b3 --- /dev/null +++ b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/rkeScheduler/values.yaml @@ -0,0 +1,166 @@ +# Default values for rancher-pushprox. +# This is a YAML-formatted file. +# Declare variables to be passed into your templates. + +# Default image containing both the proxy and the client was generated from the following Dockerfile +# https://github.com/prometheus-community/PushProx/blob/eeadbe766641699129920ccfaaaa30a85c67fe81/Dockerfile#L1-L15 + +# Configuration + +global: + cattle: + psp: + enabled: false + systemDefaultRegistry: "" + seLinux: + enabled: false + +# A list of Semver constraint strings (defined by https://github.com/Masterminds/semver) and values.yaml overrides. +# +# For each key in kubeVersionOverrides, this chart will check to see if the current Kubernetes cluster's version matches +# any of the semver constraints provided as keys on the map. +# +# On seeing a match, the default value for each values.yaml field overridden will be updated with the new value. +# +# If multiple matches are encountered (due to overlapping semver ranges), the matches will be applied in order. +# +# Notes: +# - On running a helm template, Helm generally assumes the kubeVersion is v1.20.0 +# - On running a helm install --dry-run, the correct kubeVersion should be chosen. +kubeVersionOverrides: [] +# - constraint: "< 1.21" +# values: +# metricsPort: 10252 +# clients: +# https: +# enabled: false +# insecureSkipVerify: false +# useServiceAccountCredentials: false + +namespaceOverride: "" + +# The component that is being monitored (i.e. etcd) +component: "component" + +# The port containing the metrics that need to be scraped +metricsPort: 2739 + +# Configure ServiceMonitor that monitors metrics from the metricsPort endpoint +serviceMonitor: + enabled: true + # A list of endpoints that will be added to the ServiceMonitor based on the Endpoint spec + # Source: https://github.com/prometheus-operator/prometheus-operator/blob/master/Documentation/api.md#endpoint + # By default, proxyUrl and params._scheme will be overridden based on other values + endpoints: + - port: metrics + +# Configure Service that grabs scrape targets +service: + # The selector that is used to populate the Service's Endpoints object. + # The chart will error out on rendering templating if .Values.clients.enabled is set alongside this field, + # since it is expected that this service should point to the PushProx Clients Daemonset / Deployment + selector: {} + +clients: + enabled: true + # The port which the PushProx client will post PushProx metrics to + port: 9369 + # If unset, this will default to the URL for the proxy service: http://pushprox-{{component}}-proxy.{{namepsace}}.svc.cluster.local:{{proxy.port}} + # Should be modified if the clients are being deployed outside the cluster where the proxy rests, otherwise leave it null + proxyUrl: "" + # If set to true, the client will forward any requests from the host IP to 127.0.0.1 + # It will only allow proxy requests to the metricsPort specified + useLocalhost: false + # Configuration for accessing metrics via HTTPS + https: + # Does the client require https to access the metrics? + enabled: false + # Does the client require requests be sent to http or https? + forceHTTPSScheme: false + # If set to true, the client will create a service account with adequate permissions and set a flag + # on the client to use the service account token provided by it to make authorized scrape requests + useServiceAccountCredentials: false + # Configuration for authentication to metrics via https endpoint + authenticationMethod: + # Reads token from defined file in container + # This function is deprecated in the prometheus operator api and may be removed in a future version + bearerTokenFile: + enabled: false + bearerTokenFilePath: "/var/run/secrets/kubernetes.io/serviceaccount/token" + # Reads token from defined secret in namespace + # This function is deprecated in the prometheus operator api and may be removed in a future version + bearerTokenSecret: + enabled: false + # Reads token from defined secret in namespace + authorization: + enabled: false + type: "bearer" + credentials: + key: "token" + optional: false + # If set to true, the client will disable SSL security checks + insecureSkipVerify: false + # Directory on host where necessary TLS cert and key to scrape metrics can be found + certDir: "" + # Filenames for files located in .Values.clients.https.certDir that correspond to TLS settings + certFile: "" + keyFile: "" + caCertFile: "" + # seLinuxOptions to be passed into the container that copies certs. Should define a container with permissions to read the files in the certDir provided on the host. + # Required and only used if `clients.https.enabled` is set and `clients.https.certDir` is provided. + seLinuxOptions: {} + + metrics: + # Whether the client should publish PushProx client-specific metrics to .Values.clients.port + enabled: false + + rbac: + # Additional permissions to provide to the ServiceAccount bound to the client + # This can be used to provide additional permissions for the client to scrape metrics from the k8s API + # Only enabled if clients.https.enabled and clients.https.useServiceAccountCredentials are true + additionalRules: [] + + # Resource limits + resources: {} + + # Options to select all nodes to deploy client DaemonSet on + nodeSelector: {} + tolerations: [] + affinity: {} + + image: + repository: rancher/pushprox-client + tag: v0.1.3-rancher2-client + command: ["pushprox-client"] + + copyCertsImage: + repository: rancher/mirrored-library-busybox + tag: 1.31.1 + + # The default intention of rancher-pushprox clients is to scrape hostNetwork metrics across all nodes. + # This can be used to scrape internal Kubernetes components or DaemonSets of hostNetwork Pods in + # situations where a cloud provider firewall prevents Pod-To-Host communication but not Pod-To-Pod. + # However, if the underlying hostNetwork Pod that is being scraped is managed by a Deployment, + # this advanced option enables users to deploy the client as a Deployment instead of a DaemonSet. + # If a user deploys this feature and the underlying Deployment's number of replicas changes, the user will + # be responsible for upgrading this chart accordingly to the right number of replicas. + deployment: + enabled: false + replicas: 0 + +proxy: + enabled: true + # The port through which PushProx clients will communicate to the proxy + port: 8080 + + # Resource limits + resources: {} + + # Options to select a node to run a single proxy deployment on + nodeSelector: {} + tolerations: [] + + image: + repository: rancher/pushprox-proxy + tag: v0.1.3-rancher2-proxy + command: ["pushprox-proxy"] diff --git a/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/windowsExporter/.helmignore b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/windowsExporter/.helmignore new file mode 100644 index 0000000000..f0c1319444 --- /dev/null +++ b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/windowsExporter/.helmignore @@ -0,0 +1,21 @@ +# Patterns to ignore when building packages. +# This supports shell glob matching, relative path matching, and +# negation (prefixed with !). Only one pattern per line. +.DS_Store +# Common VCS dirs +.git/ +.gitignore +.bzr/ +.bzrignore +.hg/ +.hgignore +.svn/ +# Common backup files +*.swp +*.bak +*.tmp +*~ +# Various IDEs +.project +.idea/ +*.tmproj diff --git a/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/windowsExporter/Chart.yaml b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/windowsExporter/Chart.yaml new file mode 100644 index 0000000000..784bb0ec7e --- /dev/null +++ b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/windowsExporter/Chart.yaml @@ -0,0 +1,17 @@ +apiVersion: v2 +appVersion: 0.25.1 +description: A Helm chart for prometheus windows-exporter +home: https://github.com/prometheus-community/windows_exporter/ +keywords: +- windows-exporter +- windows +- prometheus +- exporter +maintainers: +- email: github@jkroepke.de + name: jkroepke +name: windowsExporter +sources: +- https://github.com/prometheus-community/windows_exporter/ +type: application +version: 0.3.1 diff --git a/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/windowsExporter/README.md b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/windowsExporter/README.md new file mode 100644 index 0000000000..1da1c64e12 --- /dev/null +++ b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/windowsExporter/README.md @@ -0,0 +1,42 @@ +# Prometheus `Windows Exporter` + +Prometheus exporter for hardware and OS metrics exposed by Windows kernels, written in Go with pluggable metric collectors. + +This chart bootstraps a prometheus [`Windows Exporter`](http://github.com/prometheus-community/windows_exporter) daemonset on a [Kubernetes](http://kubernetes.io) cluster using the [Helm](https://helm.sh) package manager. + +## Get Repository Info + +```console +helm repo add prometheus-community https://prometheus-community.github.io/helm-charts +helm repo update +``` + +_See [`helm repo`](https://helm.sh/docs/helm/helm_repo/) for command documentation._ + +## Install Chart + +```console +helm install [RELEASE_NAME] prometheus-community/prometheus-windows-exporter +``` + +_See [configuration](#configuring) below._ + +_See [helm install](https://helm.sh/docs/helm/helm_install/) for command documentation._ + +## Uninstall Chart + +```console +helm uninstall [RELEASE_NAME] +``` + +This removes all the Kubernetes components associated with the chart and deletes the release. + +_See [helm uninstall](https://helm.sh/docs/helm/helm_uninstall/) for command documentation._ + +## Configuring + +See [Customizing the Chart Before Installing](https://helm.sh/docs/intro/using_helm/#customizing-the-chart-before-installing). To see all configurable options with detailed comments, visit the chart's [values.yaml](./values.yaml), or run these configuration commands: + +```console +helm show values prometheus-community/prometheus-windows-exporter +``` diff --git a/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/windowsExporter/scripts/configure-firewall.ps1 b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/windowsExporter/scripts/configure-firewall.ps1 new file mode 100644 index 0000000000..9cbed7112d --- /dev/null +++ b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/windowsExporter/scripts/configure-firewall.ps1 @@ -0,0 +1,31 @@ +$ErrorActionPreference = 'Continue' + +function CheckFirewallRuleError { + # We hit an error. This can happen for a number of reasons, including if the rule already exists + if ($error[0]) { + if (($error[0].Exception.NativeErrorCode) -and ($error[0].Exception.NativeErrorCode.ToString() -eq "AlreadyExists")) { + # Previous versions of monitoring may have already created this Firewall Rule + # Because of this, if the rule alreadys exists there is no need to delete and recreate it. + Write-Host "Detected Existing Firewall Rule, Nothing To Do" + } else { + Write-Host "Error Encountered Setting Up Required Firewall Rule" + $error[0].Exception + exit 1 + } + } +} + +Write-Host "Attempting To Configure Firewall Rules For Ports 9796, 10250" + +# This is the exact same firewall rule that has historically been created by rancher-wins +# https://github.com/rancher/wins/blob/91f670c47f19c6d9fe97d8f66a695d3081ad994f/pkg/apis/process_service_mgmt.go#L149 +New-NetFirewallRule -DisplayName rancher-wins-windows-exporter-TCP-9796 -Name rancher-wins-windows-exporter-TCP-9796 -Action Allow -Protocol TCP -LocalPort 9796 -Enabled True -PolicyStore ActiveStore +CheckFirewallRuleError +Write-Host "Windows Node Exporter Firewall Rule Successfully Created" + +# This rule is required in order to have the Rancher UI display node metrics in the 'Nodes' tab of the cluster explorer +New-NetFirewallRule -DisplayName rancher-wins-windows-exporter-TCP-10250 -Name rancher-wins-windows-exporter-TCP-10250 -Action Allow -Protocol TCP -LocalPort 10250 -Enabled True -PolicyStore ActiveStore +CheckFirewallRuleError +Write-Host "Windows Prometheus Metrics Firewall Rule Successfully Created" + +Write-Host "All Firewall Rules Successfully Configured" diff --git a/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/windowsExporter/templates/_helpers.tpl b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/windowsExporter/templates/_helpers.tpl new file mode 100644 index 0000000000..c9a5d6db8c --- /dev/null +++ b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/windowsExporter/templates/_helpers.tpl @@ -0,0 +1,216 @@ +{{/* +Create a default fully qualified app name. +We truncate at 63 chars because some Kubernetes name fields are limited to this (by the DNS naming spec). +If release name contains chart name it will be used as a full name. +The components in this chart create additional resources that expand the longest created name strings. +The longest name that gets created adds and extra 37 characters, so truncation should be 63-35=26. +*/}} +{{- define "prometheus-windows-exporter.fullname" -}} +{{ printf "%s-windows-exporter" .Release.Name }} +{{- end -}} + +{{- define "system_default_registry" -}} +{{- if .Values.global.cattle.systemDefaultRegistry -}} +{{- printf "%s/" .Values.global.cattle.systemDefaultRegistry -}} +{{- end -}} +{{- end -}} + +{{- define "windowsExporter.renamedMetricsRelabeling" -}} +{{- range $original, $new := (include "windowsExporter.renamedMetrics" . | fromJson) -}} +- sourceLabels: [__name__] + regex: {{ $original }} + replacement: '{{ $new }}' + targetLabel: __name__ +{{ end -}} +{{- end -}} + +{{- define "windowsExporter.labels" -}} +k8s-app: {{ template "prometheus-windows-exporter.fullname" . }} +release: {{ .Release.Name }} +component: "windows-exporter" +provider: kubernetes +{{- end -}} + +{{- define "windowsExporter.renamedMetrics" -}} +{{- $renamed := dict -}} +{{/* v0.15.0 */}} +{{- $_ := set $renamed "windows_mssql_transactions_active_total" "windows_mssql_transactions_active" -}} +{{/* v0.16.0 */}} +{{- $_ := set $renamed "windows_adfs_ad_login_connection_failures" "windows_adfs_ad_login_connection_failures_total" -}} +{{- $_ := set $renamed "windows_adfs_certificate_authentications" "windows_adfs_certificate_authentications_total" -}} +{{- $_ := set $renamed "windows_adfs_device_authentications" "windows_adfs_device_authentications_total" -}} +{{- $_ := set $renamed "windows_adfs_extranet_account_lockouts" "windows_adfs_extranet_account_lockouts_total" -}} +{{- $_ := set $renamed "windows_adfs_federated_authentications" "windows_adfs_federated_authentications_total" -}} +{{- $_ := set $renamed "windows_adfs_passport_authentications" "windows_adfs_passport_authentications_total" -}} +{{- $_ := set $renamed "windows_adfs_password_change_failed" "windows_adfs_password_change_failed_total" -}} +{{- $_ := set $renamed "windows_adfs_password_change_succeeded" "windows_adfs_password_change_succeeded_total" -}} +{{- $_ := set $renamed "windows_adfs_token_requests" "windows_adfs_token_requests_total" -}} +{{- $_ := set $renamed "windows_adfs_windows_integrated_authentications" "windows_adfs_windows_integrated_authentications_total" -}} +{{- $_ := set $renamed "windows_net_packets_outbound_errors" "windows_net_packets_outbound_errors_total" -}} +{{- $_ := set $renamed "windows_net_packets_received_discarded" "windows_net_packets_received_discarded_total" -}} +{{- $_ := set $renamed "windows_net_packets_received_errors" "windows_net_packets_received_errors_total" -}} +{{- $_ := set $renamed "windows_net_packets_received_total" "windows_net_packets_received_total_total" -}} +{{- $_ := set $renamed "windows_net_packets_received_unknown" "windows_net_packets_received_unknown_total" -}} +{{- $_ := set $renamed "windows_dns_memory_used_bytes_total" "windows_dns_memory_used_bytes" -}} +{{- $renamed | toJson -}} +{{- end -}} + +{{/* +Create chart name and version as used by the chart label. +*/}} +{{- define "prometheus-windows-exporter.chart" -}} +{{- printf "%s-%s" .Chart.Name .Chart.Version | replace "+" "_" | trunc 63 | trimSuffix "-" }} +{{- end }} + +{{/* +Common labels +*/}} +{{- define "prometheus-windows-exporter.labels" -}} +helm.sh/chart: {{ include "prometheus-windows-exporter.chart" . }} +app.kubernetes.io/managed-by: {{ .Release.Service }} +app.kubernetes.io/component: metrics +app.kubernetes.io/part-of: {{ include "prometheus-windows-exporter.name" . }} +{{ include "prometheus-windows-exporter.selectorLabels" . }} +{{- with .Chart.AppVersion }} +app.kubernetes.io/version: {{ . | quote }} +{{- end }} +{{- with .Values.podLabels }} +{{ toYaml . }} +{{- end }} +{{- if .Values.releaseLabel }} +release: {{ .Release.Name }} +{{- end }} +{{- end }} + +{{/* +Selector labels +*/}} +{{- define "prometheus-windows-exporter.selectorLabels" -}} +app.kubernetes.io/name: {{ include "prometheus-windows-exporter.fullname" . }} +app.kubernetes.io/instance: {{ .Release.Name }} +{{- end }} + + +{{/* +Create the name of the service account to use +*/}} +{{- define "prometheus-windows-exporter.serviceAccountName" -}} +{{- if .Values.serviceAccount.create }} +{{- default (include "prometheus-windows-exporter.fullname" .) .Values.serviceAccount.name }} +{{- else }} +{{- default "default" .Values.serviceAccount.name }} +{{- end }} +{{- end }} + +{{/* +The image to use +*/}} +{{- define "prometheus-windows-exporter.image" -}} +{{- if .Values.image.sha }} +{{- fail "image.sha forbidden. Use image.digest instead" }} +{{- else if .Values.image.digest }} +{{- if .Values.global.cattle.systemDefaultRegistry }} +{{- printf "%s/%s:%s@%s" .Values.global.cattle.systemDefaultRegistry .Values.image.repository (default .Chart.AppVersion .Values.image.tag) .Values.image.digest }} +{{- else }} +{{- printf "%s/%s:%s@%s" .Values.image.registry .Values.image.repository (default .Chart.AppVersion .Values.image.tag) .Values.image.digest }} +{{- end }} +{{- else }} +{{- if .Values.global.cattle.systemDefaultRegistry }} +{{- printf "%s/%s:%s" .Values.global.cattle.systemDefaultRegistry .Values.image.repository (default .Chart.AppVersion .Values.image.tag) }} +{{- else }} +{{- printf "%s/%s:%s" .Values.image.registry .Values.image.repository (default .Chart.AppVersion .Values.image.tag) }} +{{- end }} +{{- end }} +{{- end }} + +{{/* +Allow the release namespace to be overridden for multi-namespace deployments in combined charts +*/}} +{{- define "prometheus-windows-exporter.namespace" -}} +{{- if .Values.namespaceOverride }} +{{- .Values.namespaceOverride }} +{{- else }} +{{- .Release.Namespace }} +{{- end }} +{{- end }} + +{{/* +Create the namespace name of the service monitor +*/}} +{{- define "prometheus-windows-exporter.monitor-namespace" -}} +{{- if .Values.namespaceOverride }} +{{- .Values.namespaceOverride }} +{{- else }} +{{- if .Values.prometheus.monitor.namespace }} +{{- .Values.prometheus.monitor.namespace }} +{{- else }} +{{- .Release.Namespace }} +{{- end }} +{{- end }} +{{- end }} + +{{/* Sets default scrape limits for servicemonitor */}} +{{- define "servicemonitor.scrapeLimits" -}} +{{- with .sampleLimit }} +sampleLimit: {{ . }} +{{- end }} +{{- with .targetLimit }} +targetLimit: {{ . }} +{{- end }} +{{- with .labelLimit }} +labelLimit: {{ . }} +{{- end }} +{{- with .labelNameLengthLimit }} +labelNameLengthLimit: {{ . }} +{{- end }} +{{- with .labelValueLengthLimit }} +labelValueLengthLimit: {{ . }} +{{- end }} +{{- end }} + +{{/* +Formats imagePullSecrets. Input is (dict "Values" .Values "imagePullSecrets" .{specific imagePullSecrets}) +*/}} +{{- define "prometheus-windows-exporter.imagePullSecrets" -}} +{{- range (concat .Values.global.imagePullSecrets .imagePullSecrets) }} + {{- if eq (typeOf .) "map[string]interface {}" }} +- {{ toYaml . | trim }} + {{- else }} +- name: {{ . }} + {{- end }} +{{- end }} +{{- end -}} + +{{/* +Create the namespace name of the pod monitor +*/}} +{{- define "prometheus-windows-exporter.podmonitor-namespace" -}} +{{- if .Values.namespaceOverride }} +{{- .Values.namespaceOverride }} +{{- else }} +{{- if .Values.prometheus.podMonitor.namespace }} +{{- .Values.prometheus.podMonitor.namespace }} +{{- else }} +{{- .Release.Namespace }} +{{- end }} +{{- end }} +{{- end }} + +{{/* Sets default scrape limits for podmonitor */}} +{{- define "podmonitor.scrapeLimits" -}} +{{- with .sampleLimit }} +sampleLimit: {{ . }} +{{- end }} +{{- with .targetLimit }} +targetLimit: {{ . }} +{{- end }} +{{- with .labelLimit }} +labelLimit: {{ . }} +{{- end }} +{{- with .labelNameLengthLimit }} +labelNameLengthLimit: {{ . }} +{{- end }} +{{- with .labelValueLengthLimit }} +labelValueLengthLimit: {{ . }} +{{- end }} +{{- end }} diff --git a/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/windowsExporter/templates/config.yaml b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/windowsExporter/templates/config.yaml new file mode 100644 index 0000000000..25f1fa69c2 --- /dev/null +++ b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/windowsExporter/templates/config.yaml @@ -0,0 +1,14 @@ +apiVersion: v1 +kind: ConfigMap +metadata: + name: {{ include "prometheus-windows-exporter.fullname" . }} + namespace: {{ include "prometheus-windows-exporter.namespace" . }} + labels: + {{- include "windowsExporter.labels" $ | nindent 4 }} + {{- with .Values.service.annotations }} + annotations: + {{- toYaml . | nindent 4 }} + {{- end }} +data: + config.yml: | + {{- .Values.config | nindent 4 }} diff --git a/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/windowsExporter/templates/daemonset.yaml b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/windowsExporter/templates/daemonset.yaml new file mode 100644 index 0000000000..be7feb3ed1 --- /dev/null +++ b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/windowsExporter/templates/daemonset.yaml @@ -0,0 +1,200 @@ +apiVersion: apps/v1 +kind: DaemonSet +metadata: + name: {{ include "prometheus-windows-exporter.fullname" . }} + namespace: {{ include "prometheus-windows-exporter.namespace" . }} + labels: + {{- include "windowsExporter.labels" . | nindent 4 }} + {{- with .Values.daemonsetAnnotations }} + annotations: + {{- toYaml . | nindent 4 }} + {{- end }} +spec: + selector: + matchLabels: + {{- include "windowsExporter.labels" . | nindent 6 }} + {{- with .Values.updateStrategy }} + updateStrategy: + {{- toYaml . | nindent 4 }} + {{- end }} + template: + metadata: + {{- with .Values.podAnnotations }} + annotations: + {{- toYaml . | nindent 8 }} + {{- end }} + labels: + {{- include "windowsExporter.labels" . | nindent 8 }} + spec: + automountServiceAccountToken: {{ .Values.serviceAccount.automountServiceAccountToken }} + {{- with .Values.securityContext }} + securityContext: + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.priorityClassName }} + priorityClassName: {{ . }} + {{- end }} + initContainers: + - name: configure-firewall + image: {{ include "prometheus-windows-exporter.image" . }} + command: + - C:\WINDOWS\System32\WindowsPowerShell\v1.0\powershell.exe + args: ["-f", "scripts/configure-firewall.ps1"] + volumeMounts: + - mountPath: /scripts + name: exporter-scripts + {{- with .Values.extraInitContainers }} + {{- toYaml . | nindent 8 }} + {{- end }} + serviceAccountName: {{ include "prometheus-windows-exporter.fullname" . }} + containers: + - name: windows-exporter + image: {{ include "prometheus-windows-exporter.image" . }} + imagePullPolicy: {{ .Values.image.pullPolicy }} + args: + - --config.file=%CONTAINER_SANDBOX_MOUNT_POINT%/config.yml + - --collector.textfile.directories=%CONTAINER_SANDBOX_MOUNT_POINT% + - --web.listen-address=:{{ .Values.service.port }} + {{- with .Values.extraArgs }} + {{- toYaml . | nindent 12 }} + {{- end }} + {{- with .Values.securityContext }} + securityContext: + {{- toYaml . | nindent 12 }} + {{- end }} + env: + {{- range $key, $value := .Values.env }} + - name: {{ $key }} + value: {{ $value | quote }} + {{- end }} + ports: + - name: http + containerPort: {{ .Values.service.port }} + hostPort: {{ .Values.service.port }} + protocol: TCP + livenessProbe: + failureThreshold: {{ .Values.livenessProbe.failureThreshold }} + httpGet: + httpHeaders: + {{- range $_, $header := .Values.livenessProbe.httpGet.httpHeaders }} + - name: {{ $header.name }} + value: {{ $header.value }} + {{- end }} + path: / + port: {{ .Values.service.port }} + scheme: {{ upper .Values.livenessProbe.httpGet.scheme }} + initialDelaySeconds: {{ .Values.livenessProbe.initialDelaySeconds }} + periodSeconds: {{ .Values.livenessProbe.periodSeconds }} + successThreshold: {{ .Values.livenessProbe.successThreshold }} + timeoutSeconds: {{ .Values.livenessProbe.timeoutSeconds }} + readinessProbe: + failureThreshold: {{ .Values.readinessProbe.failureThreshold }} + httpGet: + httpHeaders: + {{- range $_, $header := .Values.readinessProbe.httpGet.httpHeaders }} + - name: {{ $header.name }} + value: {{ $header.value }} + {{- end }} + path: / + port: {{ .Values.service.port }} + scheme: {{ upper .Values.readinessProbe.httpGet.scheme }} + initialDelaySeconds: {{ .Values.readinessProbe.initialDelaySeconds }} + periodSeconds: {{ .Values.readinessProbe.periodSeconds }} + successThreshold: {{ .Values.readinessProbe.successThreshold }} + timeoutSeconds: {{ .Values.readinessProbe.timeoutSeconds }} + {{- with .Values.resources }} + resources: + {{- toYaml . | nindent 12 }} + {{- end }} + volumeMounts: + - name: config + mountPath: /config.yml + subPath: config.yml + {{- range $_, $mount := .Values.extraHostVolumeMounts }} + - name: {{ $mount.name }} + mountPath: {{ $mount.mountPath }} + readOnly: {{ $mount.readOnly }} + {{- end }} + {{- range $_, $mount := .Values.sidecarVolumeMount }} + - name: {{ $mount.name }} + mountPath: {{ $mount.mountPath }} + readOnly: true + {{- end }} + {{- range $_, $mount := .Values.configmaps }} + - name: {{ $mount.name }} + mountPath: {{ $mount.mountPath }} + {{- end }} + {{- range $_, $mount := .Values.secrets }} + - name: {{ .name }} + mountPath: {{ .mountPath }} + {{- end }} + {{- with .Values.sidecars }} + {{- toYaml . | nindent 8 }} + {{- if or .Values.sidecarVolumeMount .Values.sidecarHostVolumeMounts }} + volumeMounts: + {{- range $_, $mount := .Values.sidecarVolumeMount }} + - name: {{ $mount.name }} + mountPath: {{ $mount.mountPath }} + readOnly: {{ $mount.readOnly }} + {{- end }} + {{- range $_, $mount := .Values.sidecarHostVolumeMounts }} + - name: {{ $mount.name }} + mountPath: {{ $mount.mountPath }} + readOnly: {{ $mount.readOnly }} + {{- end }} + {{- end }} + {{- end }} + {{- if or .Values.imagePullSecrets .Values.global.imagePullSecrets }} + imagePullSecrets: + {{- include "prometheus-windows-exporter.imagePullSecrets" (dict "Values" .Values "imagePullSecrets" .Values.imagePullSecrets) | indent 8 }} + {{- end }} + hostNetwork: {{ .Values.hostNetwork }} + hostPID: {{ .Values.hostPID }} + {{- with .Values.affinity }} + affinity: + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.dnsConfig }} + dnsConfig: + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.nodeSelector }} + nodeSelector: + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.tolerations }} + tolerations: + {{- toYaml . | nindent 8 }} + {{- end }} + volumes: + - name: exporter-scripts + configMap: + name: {{ include "prometheus-windows-exporter.fullname" . }}-scripts + - name: config + configMap: + name: {{ include "prometheus-windows-exporter.fullname" . }} + {{- range $_, $mount := .Values.extraHostVolumeMounts }} + - name: {{ $mount.name }} + hostPath: + path: {{ $mount.hostPath }} + {{- end }} + {{- range $_, $mount := .Values.sidecarVolumeMount }} + - name: {{ $mount.name }} + emptyDir: + medium: Memory + {{- end }} + {{- range $_, $mount := .Values.sidecarHostVolumeMounts }} + - name: {{ $mount.name }} + hostPath: + path: {{ $mount.hostPath }} + {{- end }} + {{- range $_, $mount := .Values.configmaps }} + - name: {{ $mount.name }} + configMap: + name: {{ $mount.name }} + {{- end }} + {{- range $_, $mount := .Values.secrets }} + - name: {{ $mount.name }} + secret: + secretName: {{ $mount.name }} + {{- end }} diff --git a/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/windowsExporter/templates/podmonitor.yaml b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/windowsExporter/templates/podmonitor.yaml new file mode 100644 index 0000000000..bbb6c39340 --- /dev/null +++ b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/windowsExporter/templates/podmonitor.yaml @@ -0,0 +1,91 @@ +{{- if .Values.prometheus.podMonitor.enabled }} +apiVersion: {{ .Values.prometheus.podMonitor.apiVersion | default "monitoring.coreos.com/v1" }} +kind: PodMonitor +metadata: + name: {{ include "prometheus-windows-exporter.fullname" . }} + namespace: {{ include "prometheus-windows-exporter.podmonitor-namespace" . }} + labels: + {{- include "windowsExporter.labels" . | nindent 4 }} + {{- with .Values.prometheus.podMonitor.additionalLabels }} + {{- toYaml . | nindent 4 }} + {{- end }} +spec: + jobLabel: {{ default "app.kubernetes.io/name" .Values.prometheus.podMonitor.jobLabel }} + {{- include "podmonitor.scrapeLimits" .Values.prometheus.podMonitor | nindent 2 }} + selector: + matchLabels: + {{- with .Values.prometheus.podMonitor.selectorOverride }} + {{- toYaml . | nindent 6 }} + {{- else }} + {{- include "prometheus-windows-exporter.selectorLabels" . | nindent 6 }} + {{- end }} + namespaceSelector: + matchNames: + - {{ include "prometheus-windows-exporter.namespace" . }} + {{- with .Values.prometheus.podMonitor.attachMetadata }} + attachMetadata: + {{- toYaml . | nindent 4 }} + {{- end }} + {{- with .Values.prometheus.podMonitor.podTargetLabels }} + podTargetLabels: + {{- toYaml . | nindent 4 }} + {{- end }} + podMetricsEndpoints: + - port: {{ .Values.service.portName }} + {{- with .Values.prometheus.podMonitor.scheme }} + scheme: {{ . }} + {{- end }} + {{- with .Values.prometheus.podMonitor.path }} + path: {{ . }} + {{- end }} + {{- with .Values.prometheus.podMonitor.basicAuth }} + basicAuth: + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.prometheus.podMonitor.bearerTokenSecret }} + bearerTokenSecret: + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.prometheus.podMonitor.tlsConfig }} + tlsConfig: + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.prometheus.podMonitor.authorization }} + authorization: + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.prometheus.podMonitor.oauth2 }} + oauth2: + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.prometheus.podMonitor.proxyUrl }} + proxyUrl: {{ . }} + {{- end }} + {{- with .Values.prometheus.podMonitor.interval }} + interval: {{ . }} + {{- end }} + {{- with .Values.prometheus.podMonitor.honorTimestamps }} + honorTimestamps: {{ . }} + {{- end }} + {{- with .Values.prometheus.podMonitor.honorLabels }} + honorLabels: {{ . }} + {{- end }} + {{- with .Values.prometheus.podMonitor.scrapeTimeout }} + scrapeTimeout: {{ . }} + {{- end }} + {{- with .Values.prometheus.podMonitor.relabelings }} + relabelings: + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.prometheus.podMonitor.metricRelabelings }} + metricRelabelings: + {{- toYaml . | nindent 8 }} + {{- end }} + enableHttp2: {{ default false .Values.prometheus.podMonitor.enableHttp2 }} + filterRunning: {{ default true .Values.prometheus.podMonitor.filterRunning }} + followRedirects: {{ default false .Values.prometheus.podMonitor.followRedirects }} + {{- with .Values.prometheus.podMonitor.params }} + params: + {{- toYaml . | nindent 8 }} + {{- end }} +{{- end }} diff --git a/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/windowsExporter/templates/scriptConfig.yaml b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/windowsExporter/templates/scriptConfig.yaml new file mode 100644 index 0000000000..f514c8161a --- /dev/null +++ b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/windowsExporter/templates/scriptConfig.yaml @@ -0,0 +1,14 @@ +apiVersion: v1 +kind: ConfigMap +metadata: + name: {{ include "prometheus-windows-exporter.fullname" . }}-scripts + namespace: {{ include "prometheus-windows-exporter.namespace" . }} + labels: + {{- include "windowsExporter.labels" $ | nindent 4 }} + {{- with .Values.service.annotations }} + annotations: + {{- toYaml . | nindent 4 }} + {{- end }} +data: +{{ (.Files.Glob "scripts/*").AsConfig | indent 2 }} + diff --git a/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/windowsExporter/templates/service.yaml b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/windowsExporter/templates/service.yaml new file mode 100644 index 0000000000..267b796f63 --- /dev/null +++ b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/windowsExporter/templates/service.yaml @@ -0,0 +1,32 @@ +apiVersion: v1 +kind: Service +metadata: + name: {{ include "prometheus-windows-exporter.fullname" . }} + namespace: {{ include "prometheus-windows-exporter.namespace" . }} + labels: + {{- include "windowsExporter.labels" $ | nindent 4 }} + {{- if or .Values.prometheus.monitor.enabled .Values.prometheus.podMonitor.enabled }} + {{- with .Values.service.annotations }} + annotations: + {{- unset . "prometheus.io/scrape" | toYaml | nindent 4 }} + {{- end }} + {{- else }} + annotations: + prometheus.io/scrape: "true" + {{- with .Values.service.annotations }} + {{- toYaml . | nindent 4 }} + {{- end }} + {{- end }} +spec: + type: {{ .Values.service.type }} + ports: + - port: {{ .Values.service.port }} + {{- if ( and (eq .Values.service.type "NodePort" ) (not (empty .Values.service.nodePort)) ) }} + nodePort: {{ .Values.service.nodePort }} + {{- end }} + targetPort: {{ .Values.service.port }} + protocol: TCP + appProtocol: http + name: {{ .Values.service.portName }} + selector: + {{- include "windowsExporter.labels" . | nindent 4 }} diff --git a/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/windowsExporter/templates/serviceaccount.yaml b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/windowsExporter/templates/serviceaccount.yaml new file mode 100644 index 0000000000..14c1c46807 --- /dev/null +++ b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/windowsExporter/templates/serviceaccount.yaml @@ -0,0 +1,17 @@ +{{- if and .Values.rbac.create .Values.serviceAccount.create -}} +apiVersion: v1 +kind: ServiceAccount +metadata: + name: {{ include "prometheus-windows-exporter.serviceAccountName" . }} + namespace: {{ include "prometheus-windows-exporter.namespace" . }} + labels: + {{- include "windowsExporter.labels" . | nindent 4 }} + {{- with .Values.serviceAccount.annotations }} + annotations: + {{- toYaml . | nindent 4 }} + {{- end }} +{{- if or .Values.serviceAccount.imagePullSecrets .Values.global.imagePullSecrets }} +imagePullSecrets: + {{- include "prometheus-windows-exporter.imagePullSecrets" (dict "Values" .Values "imagePullSecrets" .Values.serviceAccount.imagePullSecrets) | indent 2 }} +{{- end }} +{{- end -}} diff --git a/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/windowsExporter/templates/servicemonitor.yaml b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/windowsExporter/templates/servicemonitor.yaml new file mode 100644 index 0000000000..2effc07758 --- /dev/null +++ b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/windowsExporter/templates/servicemonitor.yaml @@ -0,0 +1,75 @@ +{{- if .Values.prometheus.monitor.enabled }} +apiVersion: {{ .Values.prometheus.monitor.apiVersion | default "monitoring.coreos.com/v1" }} +kind: ServiceMonitor +metadata: + name: {{ include "prometheus-windows-exporter.fullname" . }} + namespace: {{ include "prometheus-windows-exporter.monitor-namespace" . }} + labels: + {{- include "windowsExporter.labels" . | nindent 4 }} + {{- with .Values.prometheus.monitor.additionalLabels }} + {{- toYaml . | nindent 4 }} + {{- end }} +spec: + jobLabel: {{ default "app.kubernetes.io/name" .Values.prometheus.monitor.jobLabel }} + {{- include "servicemonitor.scrapeLimits" .Values.prometheus.monitor | nindent 2 }} + {{- with .Values.prometheus.monitor.podTargetLabels }} + podTargetLabels: + {{- toYaml . | nindent 4 }} + {{- end }} + selector: + matchLabels: + {{- with .Values.prometheus.monitor.selectorOverride }} + {{- toYaml . | nindent 6 }} + {{- else }} + {{- include "windowsExporter.labels" . | nindent 6 }} + {{- end }} + {{- with .Values.prometheus.monitor.attachMetadata }} + attachMetadata: + {{- toYaml . | nindent 4 }} + {{- end }} + endpoints: + - port: {{ .Values.service.portName }} + scheme: {{ .Values.prometheus.monitor.scheme }} + {{- with .Values.prometheus.monitor.basicAuth }} + basicAuth: + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.prometheus.monitor.bearerTokenFile }} + bearerTokenFile: {{ . }} + {{- end }} + {{- with .Values.prometheus.monitor.tlsConfig }} + tlsConfig: + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.prometheus.monitor.proxyUrl }} + proxyUrl: {{ . }} + {{- end }} + {{- with .Values.prometheus.monitor.interval }} + interval: {{ . }} + {{- end }} + {{- with .Values.prometheus.monitor.scrapeTimeout }} + scrapeTimeout: {{ . }} + {{- end }} + metricRelabelings: +{{- include "windowsExporter.renamedMetricsRelabeling" . | nindent 6 -}} + - sourceLabels: [__name__] + regex: 'wmi_(.*)' + replacement: 'windows_$1' + targetLabel: __name__ + - sourceLabels: [volume, nic] + regex: (.*);(.*) + separator: '' + targetLabel: device + action: replace + replacement: $1$2 + - sourceLabels: [__name__] + regex: windows_cs_logical_processors + replacement: 'system' + targetLabel: mode + relabelings: + - separator: ':' + sourceLabels: + - __meta_kubernetes_pod_host_ip + - __meta_kubernetes_pod_container_port_number + targetLabel: instance +{{- end }} diff --git a/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/windowsExporter/values.yaml b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/windowsExporter/values.yaml new file mode 100644 index 0000000000..04569505d6 --- /dev/null +++ b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/charts/windowsExporter/values.yaml @@ -0,0 +1,366 @@ +# Default values for prometheus-windows-exporter. +# This is a YAML-formatted file. +# Declare variables to be passed into your templates. + +image: + registry: docker.io + repository: rancher/mirrored-prometheus-windows-exporter + # Overrides the image tag whose default is {{ printf "v%s" .Chart.AppVersion }} + tag: "0.25.1" + pullPolicy: IfNotPresent + digest: "" + +config: |- + collectors: + enabled: '[defaults],tcp,memory,container' + +imagePullSecrets: [] +# - name: "image-pull-secret" +nameOverride: "" +fullnameOverride: "" + +global: + # To help compatibility with other charts which use global.imagePullSecrets. + # Allow either an array of {name: pullSecret} maps (k8s-style), or an array of strings (more common helm-style). + # global: + # imagePullSecrets: + # - name: pullSecret1 + # - name: pullSecret2 + # or + # global: + # imagePullSecrets: + # - pullSecret1 + # - pullSecret2 + imagePullSecrets: [] + cattle: + systemDefaultRegistry: "" + +service: + type: ClusterIP + port: 9796 + nodePort: + portName: windows-metrics + annotations: {} + +# Additional environment variables that will be passed to the daemonset +env: {} +## env: +## VARIABLE: value + +prometheus: + monitor: + enabled: true + additionalLabels: {} + namespace: "" + + jobLabel: "component" + + # List of pod labels to add to windows exporter metrics + # https://github.com/prometheus-operator/prometheus-operator/blob/main/Documentation/api.md#servicemonitor + podTargetLabels: ["component"] + + scheme: http + basicAuth: {} + bearerTokenFile: + tlsConfig: {} + + ## proxyUrl: URL of a proxy that should be used for scraping. + ## + proxyUrl: "" + + ## Override serviceMonitor selector + ## + selectorOverride: {} + + ## Attach node metadata to discovered targets. Requires Prometheus v2.35.0 and above. + ## + attachMetadata: + node: false + + relabelings: [] + metricRelabelings: [] + interval: "" + scrapeTimeout: 10s + ## prometheus.monitor.apiVersion ApiVersion for the serviceMonitor Resource(defaults to "monitoring.coreos.com/v1") + apiVersion: "" + + ## SampleLimit defines per-scrape limit on number of scraped samples that will be accepted. + ## + sampleLimit: 0 + + ## TargetLimit defines a limit on the number of scraped targets that will be accepted. + ## + targetLimit: 0 + + ## Per-scrape limit on number of labels that will be accepted for a sample. Only valid in Prometheus versions 2.27.0 and newer. + ## + labelLimit: 0 + + ## Per-scrape limit on length of labels name that will be accepted for a sample. Only valid in Prometheus versions 2.27.0 and newer. + ## + labelNameLengthLimit: 0 + + ## Per-scrape limit on length of labels value that will be accepted for a sample. Only valid in Prometheus versions 2.27.0 and newer. + ## + labelValueLengthLimit: 0 + + # PodMonitor defines monitoring for a set of pods. + # ref. https://github.com/prometheus-operator/prometheus-operator/blob/main/Documentation/api.md#monitoring.coreos.com/v1.PodMonitor + # Using a PodMonitor may be preferred in some environments where there is very large number + # of Windows Exporter endpoints (1000+) behind a single service. + # The PodMonitor is disabled by default. When switching from ServiceMonitor to PodMonitor, + # the time series resulting from the configuration through PodMonitor may have different labels. + # For instance, there will not be the service label any longer which might + # affect PromQL queries selecting that label. + podMonitor: + enabled: false + # Namespace in which to deploy the pod monitor. Defaults to the release namespace. + namespace: "" + # Additional labels, e.g. setting a label for pod monitor selector as set in prometheus + additionalLabels: {} + # release: kube-prometheus-stack + # PodTargetLabels transfers labels of the Kubernetes Pod onto the target. + podTargetLabels: [] + # apiVersion defaults to monitoring.coreos.com/v1. + apiVersion: "" + # Override pod selector to select pod objects. + selectorOverride: {} + # Attach node metadata to discovered targets. Requires Prometheus v2.35.0 and above. + attachMetadata: + node: false + # The label to use to retrieve the job name from. Defaults to label app.kubernetes.io/name. + jobLabel: "" + + # Scheme/protocol to use for scraping. + scheme: "http" + # Path to scrape metrics at. + path: "/metrics" + + # BasicAuth allow an endpoint to authenticate over basic authentication. + # More info: https://prometheus.io/docs/operating/configuration/#endpoint + basicAuth: {} + # Secret to mount to read bearer token for scraping targets. + # The secret needs to be in the same namespace as the pod monitor and accessible by the Prometheus Operator. + # https://kubernetes.io/docs/reference/generated/kubernetes-api/v1.24/#secretkeyselector-v1-core + bearerTokenSecret: {} + # TLS configuration to use when scraping the endpoint. + tlsConfig: {} + # Authorization section for this endpoint. + # https://github.com/prometheus-operator/prometheus-operator/blob/main/Documentation/api.md#monitoring.coreos.com/v1.SafeAuthorization + authorization: {} + # OAuth2 for the URL. Only valid in Prometheus versions 2.27.0 and newer. + # https://github.com/prometheus-operator/prometheus-operator/blob/main/Documentation/api.md#monitoring.coreos.com/v1.OAuth2 + oauth2: {} + + # ProxyURL eg http://proxyserver:2195. Directs scrapes through proxy to this endpoint. + proxyUrl: "" + # Interval at which endpoints should be scraped. If not specified Prometheus’ global scrape interval is used. + interval: "" + # Timeout after which the scrape is ended. If not specified, the Prometheus global scrape interval is used. + scrapeTimeout: "" + # HonorTimestamps controls whether Prometheus respects the timestamps present in scraped data. + honorTimestamps: true + # HonorLabels chooses the metric’s labels on collisions with target labels. + honorLabels: true + # Whether to enable HTTP2. Default false. + enableHttp2: "" + # Drop pods that are not running. (Failed, Succeeded). + # Enabled by default. More info: https://kubernetes.io/docs/concepts/workloads/pods/pod-lifecycle/#pod-phase + filterRunning: "" + # FollowRedirects configures whether scrape requests follow HTTP 3xx redirects. Default false. + followRedirects: "" + # Optional HTTP URL parameters + params: {} + + # RelabelConfigs to apply to samples before scraping. Prometheus Operator automatically adds + # relabelings for a few standard Kubernetes fields. The original scrape job’s name + # is available via the __tmp_prometheus_job_name label. + # More info: https://prometheus.io/docs/prometheus/latest/configuration/configuration/#relabel_config + relabelings: [] + # MetricRelabelConfigs to apply to samples before ingestion. + metricRelabelings: [] + + # SampleLimit defines per-scrape limit on number of scraped samples that will be accepted. + sampleLimit: 0 + # TargetLimit defines a limit on the number of scraped targets that will be accepted. + targetLimit: 0 + # Per-scrape limit on number of labels that will be accepted for a sample. + # Only valid in Prometheus versions 2.27.0 and newer. + labelLimit: 0 + # Per-scrape limit on length of labels name that will be accepted for a sample. + # Only valid in Prometheus versions 2.27.0 and newer. + labelNameLengthLimit: 0 + # Per-scrape limit on length of labels value that will be accepted for a sample. + # Only valid in Prometheus versions 2.27.0 and newer. + labelValueLengthLimit: 0 + +## Customize the updateStrategy if set +updateStrategy: + type: RollingUpdate + rollingUpdate: + maxUnavailable: 1 + +resources: {} + # We usually recommend not to specify default resources and to leave this as a conscious + # choice for the user. This also increases chances charts run on environments with little + # resources, such as Minikube. If you do want to specify resources, uncomment the following + # lines, adjust them as necessary, and remove the curly braces after 'resources:'. + # limits: + # cpu: 200m + # memory: 50Mi + # requests: + # cpu: 100m +# memory: 30Mi + +serviceAccount: + # Specifies whether a ServiceAccount should be created + create: true + # The name of the ServiceAccount to use. + # If not set and create is true, a name is generated using the fullname template + name: + annotations: {} + imagePullSecrets: [] + automountServiceAccountToken: false + +securityContext: + windowsOptions: + hostProcess: true + runAsUserName: "NT AUTHORITY\\system" + +rbac: + ## If true, create & use RBAC resources + ## + create: true + +# Expose the service to the host network +hostNetwork: true + +# Share the host process ID namespace +hostPID: true + +## Assign a group of affinity scheduling rules +## +affinity: {} +# nodeAffinity: +# requiredDuringSchedulingIgnoredDuringExecution: +# nodeSelectorTerms: +# - matchFields: +# - key: metadata.name +# operator: In +# values: +# - target-host-name + +# Annotations to be added to windows exporter pods +podAnnotations: + # Fix for very slow GKE cluster upgrades + cluster-autoscaler.kubernetes.io/safe-to-evict: "true" + +# Extra labels to be added to windows exporter pods +podLabels: {} + +# Annotations to be added to windows exporter daemonset +daemonsetAnnotations: {} + +## set to true to add the release label so scraping of the servicemonitor with kube-prometheus-stack works out of the box +releaseLabel: false + +# Custom DNS configuration to be added to prometheus-windows-exporter pods +dnsConfig: {} +# nameservers: +# - 1.2.3.4 +# searches: +# - ns1.svc.cluster-domain.example +# - my.dns.search.suffix +# options: +# - name: ndots +# value: "2" +# - name: edns0 + +## Assign a nodeSelector if operating a hybrid cluster +## +nodeSelector: + kubernetes.io/os: windows + # kubernetes.io/arch: amd64 + +tolerations: + - effect: NoSchedule + operator: Exists + +## Assign a PriorityClassName to pods if set +# priorityClassName: "" + +## Additional container arguments +## +extraArgs: [] +# - --collector.service.services-where +# - "Name LIKE 'sql%'" + +## Additional mounts from the host to windows-exporter container +## +extraHostVolumeMounts: [] +# - name: +# hostPath: +# mountPath: +# readOnly: true|false + +## Additional configmaps to be mounted. +## +configmaps: [] +# - name: +# mountPath: +secrets: [] +# - name: +# mountPath: +## Override the deployment namespace +## +namespaceOverride: "" + +## Additional containers for export metrics to text file +## +sidecars: [] +## - name: nvidia-dcgm-exporter +## image: nvidia/dcgm-exporter:1.4.3 + +## Volume for sidecar containers +## +sidecarVolumeMount: [] +## - name: collector-textfiles +## mountPath: /run/prometheus +## readOnly: false + +## Additional mounts from the host to sidecar containers +## +sidecarHostVolumeMounts: [] +# - name: +# hostPath: +# mountPath: +# readOnly: true|false +# mountPropagation: None|HostToContainer|Bidirectional + +## Additional InitContainers to initialize the pod +## +extraInitContainers: [] + +## Liveness probe +## +livenessProbe: + failureThreshold: 3 + httpGet: + httpHeaders: [] + scheme: http + initialDelaySeconds: 0 + periodSeconds: 10 + successThreshold: 1 + timeoutSeconds: 1 + +## Readiness probe +## +readinessProbe: + failureThreshold: 3 + httpGet: + httpHeaders: [] + scheme: http + initialDelaySeconds: 0 + periodSeconds: 10 + successThreshold: 1 + timeoutSeconds: 1 diff --git a/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/files/ingress-nginx/nginx.json b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/files/ingress-nginx/nginx.json new file mode 100644 index 0000000000..565352235a --- /dev/null +++ b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/files/ingress-nginx/nginx.json @@ -0,0 +1,1445 @@ +{ + "__inputs": [ + + ], + "__requires": [ + + ], + "annotations": { + "list": [ + { + "builtIn": 1, + "datasource": "-- Grafana --", + "enable": true, + "hide": true, + "iconColor": "rgba(0, 211, 255, 1)", + "name": "Annotations & Alerts", + "type": "dashboard" + }, + { + "datasource": "$datasource", + "enable": true, + "expr": "sum(changes(nginx_ingress_controller_config_last_reload_successful_timestamp_seconds{instance!=\"unknown\",controller_class=~\"$controller_class\",namespace=~\"$namespace\"}[30s])) by (controller_class)", + "hide": false, + "iconColor": "rgba(255, 96, 96, 1)", + "limit": 100, + "name": "Config Reloads", + "showIn": 0, + "step": "30s", + "tagKeys": "controller_class", + "tags": [], + "titleFormat": "Config Reloaded", + "type": "tags" + } + ] + }, + "editable": true, + "gnetId": null, + "graphTooltip": 0, + "iteration": 1534359654832, + "links": [], + "panels": [ + { + "cacheTimeout": null, + "colorBackground": false, + "colorValue": false, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "datasource": "$datasource", + "format": "ops", + "gauge": { + "maxValue": 100, + "minValue": 0, + "show": false, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "gridPos": { + "h": 3, + "w": 6, + "x": 0, + "y": 0 + }, + "id": 20, + "interval": null, + "links": [], + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": 100, + "nullPointMode": "connected", + "nullText": null, + "postfix": "", + "postfixFontSize": "50%", + "prefix": "", + "prefixFontSize": "50%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": true, + "lineColor": "rgb(31, 120, 193)", + "show": true + }, + "tableColumn": "", + "targets": [ + { + "expr": "round(sum(irate(nginx_ingress_controller_requests{controller_pod=~\"$controller\",controller_class=~\"$controller_class\",namespace=~\"$namespace\"}[2m])), 0.001)", + "format": "time_series", + "intervalFactor": 1, + "refId": "A", + "step": 4 + } + ], + "thresholds": "", + "title": "Controller Request Volume", + "transparent": false, + "type": "singlestat", + "valueFontSize": "80%", + "valueMaps": [ + { + "op": "=", + "text": "N/A", + "value": "null" + } + ], + "valueName": "avg" + }, + { + "cacheTimeout": null, + "colorBackground": false, + "colorValue": false, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "datasource": "$datasource", + "format": "none", + "gauge": { + "maxValue": 100, + "minValue": 0, + "show": false, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "gridPos": { + "h": 3, + "w": 6, + "x": 6, + "y": 0 + }, + "id": 82, + "interval": null, + "links": [], + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": 100, + "nullPointMode": "connected", + "nullText": null, + "postfix": "", + "postfixFontSize": "50%", + "prefix": "", + "prefixFontSize": "50%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": true, + "lineColor": "rgb(31, 120, 193)", + "show": true + }, + "tableColumn": "", + "targets": [ + { + "expr": "sum(avg_over_time(nginx_ingress_controller_nginx_process_connections{controller_pod=~\"$controller\",controller_class=~\"$controller_class\",controller_namespace=~\"$namespace\",state=\"active\"}[2m]))", + "format": "time_series", + "instant": false, + "intervalFactor": 1, + "refId": "A", + "step": 4 + } + ], + "thresholds": "", + "title": "Controller Connections", + "transparent": false, + "type": "singlestat", + "valueFontSize": "80%", + "valueMaps": [ + { + "op": "=", + "text": "N/A", + "value": "null" + } + ], + "valueName": "avg" + }, + { + "cacheTimeout": null, + "colorBackground": false, + "colorValue": false, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "datasource": "$datasource", + "format": "percentunit", + "gauge": { + "maxValue": 100, + "minValue": 80, + "show": false, + "thresholdLabels": false, + "thresholdMarkers": false + }, + "gridPos": { + "h": 3, + "w": 6, + "x": 12, + "y": 0 + }, + "id": 21, + "interval": null, + "links": [], + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": 100, + "nullPointMode": "connected", + "nullText": null, + "postfix": "", + "postfixFontSize": "50%", + "prefix": "", + "prefixFontSize": "50%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": true, + "lineColor": "rgb(31, 120, 193)", + "show": true + }, + "tableColumn": "", + "targets": [ + { + "expr": "sum(rate(nginx_ingress_controller_requests{controller_pod=~\"$controller\",controller_class=~\"$controller_class\",namespace=~\"$namespace\",status!~\"[4-5].*\"}[2m])) / sum(rate(nginx_ingress_controller_requests{controller_pod=~\"$controller\",controller_class=~\"$controller_class\",namespace=~\"$namespace\"}[2m]))", + "format": "time_series", + "intervalFactor": 1, + "refId": "A", + "step": 4 + } + ], + "thresholds": "95, 99, 99.5", + "title": "Controller Success Rate (non-4|5xx responses)", + "transparent": false, + "type": "singlestat", + "valueFontSize": "80%", + "valueMaps": [ + { + "op": "=", + "text": "N/A", + "value": "null" + } + ], + "valueName": "avg" + }, + { + "cacheTimeout": null, + "colorBackground": false, + "colorValue": false, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "datasource": "$datasource", + "decimals": 0, + "format": "none", + "gauge": { + "maxValue": 100, + "minValue": 0, + "show": false, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "gridPos": { + "h": 3, + "w": 3, + "x": 18, + "y": 0 + }, + "id": 81, + "interval": null, + "links": [], + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": 100, + "nullPointMode": "connected", + "nullText": null, + "postfix": "", + "postfixFontSize": "50%", + "prefix": "", + "prefixFontSize": "50%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": true, + "lineColor": "rgb(31, 120, 193)", + "show": true + }, + "tableColumn": "", + "targets": [ + { + "expr": "avg(irate(nginx_ingress_controller_success{controller_pod=~\"$controller\",controller_class=~\"$controller_class\",controller_namespace=~\"$namespace\"}[1m])) * 60", + "format": "time_series", + "instant": false, + "intervalFactor": 1, + "refId": "A", + "step": 4 + } + ], + "thresholds": "", + "title": "Config Reloads", + "transparent": false, + "type": "singlestat", + "valueFontSize": "80%", + "valueMaps": [ + { + "op": "=", + "text": "N/A", + "value": "null" + } + ], + "valueName": "total" + }, + { + "cacheTimeout": null, + "colorBackground": false, + "colorValue": false, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "datasource": "$datasource", + "decimals": 0, + "format": "none", + "gauge": { + "maxValue": 100, + "minValue": 0, + "show": false, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "gridPos": { + "h": 3, + "w": 3, + "x": 21, + "y": 0 + }, + "id": 83, + "interval": null, + "links": [], + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": 100, + "nullPointMode": "connected", + "nullText": null, + "postfix": "", + "postfixFontSize": "50%", + "prefix": "", + "prefixFontSize": "50%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": true, + "lineColor": "rgb(31, 120, 193)", + "show": true + }, + "tableColumn": "", + "targets": [ + { + "expr": "count(nginx_ingress_controller_config_last_reload_successful{controller_pod=~\"$controller\",controller_namespace=~\"$namespace\"} == 0)", + "format": "time_series", + "instant": true, + "intervalFactor": 1, + "refId": "A", + "step": 4 + } + ], + "thresholds": "", + "title": "Last Config Failed", + "transparent": false, + "type": "singlestat", + "valueFontSize": "80%", + "valueMaps": [ + { + "op": "=", + "text": "N/A", + "value": "null" + } + ], + "valueName": "avg" + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "decimals": 2, + "editable": true, + "error": false, + "fill": 1, + "grid": {}, + "gridPos": { + "h": 7, + "w": 12, + "x": 0, + "y": 3 + }, + "height": "200px", + "id": 86, + "isNew": true, + "legend": { + "alignAsTable": true, + "avg": true, + "current": false, + "hideEmpty": false, + "hideZero": true, + "max": false, + "min": false, + "rightSide": true, + "show": true, + "sideWidth": 300, + "sort": "current", + "sortDesc": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 2, + "links": [], + "nullPointMode": "connected", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "repeat": null, + "repeatDirection": "h", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "round(sum(irate(nginx_ingress_controller_requests{controller_pod=~\"$controller\",controller_class=~\"$controller_class\",controller_namespace=~\"$namespace\",ingress=~\"$ingress\"}[2m])) by (ingress), 0.001)", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "{{ ingress }}", + "metric": "network", + "refId": "A", + "step": 10 + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "Ingress Request Volume", + "tooltip": { + "msResolution": false, + "shared": true, + "sort": 2, + "value_type": "cumulative" + }, + "transparent": false, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "reqps", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "Bps", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": { + "max - istio-proxy": "#890f02", + "max - master": "#bf1b00", + "max - prometheus": "#bf1b00" + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "decimals": 2, + "editable": false, + "error": false, + "fill": 0, + "grid": {}, + "gridPos": { + "h": 7, + "w": 12, + "x": 12, + "y": 3 + }, + "id": 87, + "isNew": true, + "legend": { + "alignAsTable": true, + "avg": true, + "current": false, + "hideEmpty": true, + "hideZero": false, + "max": false, + "min": false, + "rightSide": true, + "show": true, + "sideWidth": 300, + "sort": "avg", + "sortDesc": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 2, + "links": [], + "nullPointMode": "connected", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum(rate(nginx_ingress_controller_requests{controller_pod=~\"$controller\",controller_class=~\"$controller_class\",namespace=~\"$namespace\",ingress=~\"$ingress\",status!~\"[4-5].*\"}[2m])) by (ingress) / sum(rate(nginx_ingress_controller_requests{controller_pod=~\"$controller\",controller_class=~\"$controller_class\",namespace=~\"$namespace\",ingress=~\"$ingress\"}[2m])) by (ingress)", + "format": "time_series", + "instant": false, + "interval": "10s", + "intervalFactor": 1, + "legendFormat": "{{ ingress }}", + "metric": "container_memory_usage:sort_desc", + "refId": "A", + "step": 10 + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "Ingress Success Rate (non-4|5xx responses)", + "tooltip": { + "msResolution": false, + "shared": true, + "sort": 1, + "value_type": "cumulative" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "percentunit", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "decimals": 2, + "editable": true, + "error": false, + "fill": 1, + "grid": {}, + "gridPos": { + "h": 6, + "w": 8, + "x": 0, + "y": 10 + }, + "height": "200px", + "id": 32, + "isNew": true, + "legend": { + "alignAsTable": false, + "avg": true, + "current": true, + "max": false, + "min": false, + "rightSide": false, + "show": false, + "sideWidth": 200, + "sort": "current", + "sortDesc": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 2, + "links": [], + "nullPointMode": "connected", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum (irate (nginx_ingress_controller_request_size_sum{controller_pod=~\"$controller\",controller_class=~\"$controller_class\",controller_namespace=~\"$namespace\"}[2m]))", + "format": "time_series", + "instant": false, + "interval": "10s", + "intervalFactor": 1, + "legendFormat": "Received", + "metric": "network", + "refId": "A", + "step": 10 + }, + { + "expr": "- sum (irate (nginx_ingress_controller_response_size_sum{controller_pod=~\"$controller\",controller_class=~\"$controller_class\",controller_namespace=~\"$namespace\"}[2m]))", + "format": "time_series", + "hide": false, + "interval": "10s", + "intervalFactor": 1, + "legendFormat": "Sent", + "metric": "network", + "refId": "B", + "step": 10 + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "Network I/O pressure", + "tooltip": { + "msResolution": false, + "shared": true, + "sort": 0, + "value_type": "cumulative" + }, + "transparent": false, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "Bps", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "Bps", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": { + "max - istio-proxy": "#890f02", + "max - master": "#bf1b00", + "max - prometheus": "#bf1b00" + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "decimals": 2, + "editable": false, + "error": false, + "fill": 0, + "grid": {}, + "gridPos": { + "h": 6, + "w": 8, + "x": 8, + "y": 10 + }, + "id": 77, + "isNew": true, + "legend": { + "alignAsTable": true, + "avg": true, + "current": true, + "max": false, + "min": false, + "rightSide": false, + "show": false, + "sideWidth": 200, + "sort": "current", + "sortDesc": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 2, + "links": [], + "nullPointMode": "connected", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "avg(nginx_ingress_controller_nginx_process_resident_memory_bytes{controller_pod=~\"$controller\",controller_class=~\"$controller_class\",controller_namespace=~\"$namespace\"}) ", + "format": "time_series", + "instant": false, + "interval": "10s", + "intervalFactor": 1, + "legendFormat": "nginx", + "metric": "container_memory_usage:sort_desc", + "refId": "A", + "step": 10 + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "Average Memory Usage", + "tooltip": { + "msResolution": false, + "shared": true, + "sort": 2, + "value_type": "cumulative" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "bytes", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": { + "max - istio-proxy": "#890f02", + "max - master": "#bf1b00" + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "decimals": 3, + "editable": false, + "error": false, + "fill": 0, + "grid": {}, + "gridPos": { + "h": 6, + "w": 8, + "x": 16, + "y": 10 + }, + "height": "", + "id": 79, + "isNew": true, + "legend": { + "alignAsTable": true, + "avg": true, + "current": true, + "max": false, + "min": false, + "rightSide": false, + "show": false, + "sort": null, + "sortDesc": null, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 2, + "links": [], + "nullPointMode": "connected", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "avg (rate (nginx_ingress_controller_nginx_process_cpu_seconds_total{controller_pod=~\"$controller\",controller_class=~\"$controller_class\",controller_namespace=~\"$namespace\"}[2m])) ", + "format": "time_series", + "interval": "10s", + "intervalFactor": 1, + "legendFormat": "nginx", + "metric": "container_cpu", + "refId": "A", + "step": 10 + } + ], + "thresholds": [ + { + "colorMode": "critical", + "fill": true, + "line": true, + "op": "gt" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Average CPU Usage", + "tooltip": { + "msResolution": true, + "shared": true, + "sort": 2, + "value_type": "cumulative" + }, + "transparent": false, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "none", + "label": "cores", + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "columns": [], + "datasource": "$datasource", + "fontSize": "100%", + "gridPos": { + "h": 8, + "w": 24, + "x": 0, + "y": 16 + }, + "hideTimeOverride": false, + "id": 75, + "links": [], + "pageSize": 7, + "repeat": null, + "repeatDirection": "h", + "scroll": true, + "showHeader": true, + "sort": { + "col": 1, + "desc": true + }, + "styles": [ + { + "alias": "Ingress", + "colorMode": null, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "pattern": "ingress", + "preserveFormat": false, + "sanitize": false, + "thresholds": [], + "type": "string", + "unit": "short" + }, + { + "alias": "Requests", + "colorMode": null, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "pattern": "Value #A", + "thresholds": [ + "" + ], + "type": "number", + "unit": "ops" + }, + { + "alias": "Errors", + "colorMode": null, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "pattern": "Value #B", + "thresholds": [], + "type": "number", + "unit": "ops" + }, + { + "alias": "P50 Latency", + "colorMode": null, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 0, + "link": false, + "pattern": "Value #C", + "thresholds": [], + "type": "number", + "unit": "dtdurations" + }, + { + "alias": "P90 Latency", + "colorMode": null, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 0, + "pattern": "Value #D", + "thresholds": [], + "type": "number", + "unit": "dtdurations" + }, + { + "alias": "P99 Latency", + "colorMode": null, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 0, + "pattern": "Value #E", + "thresholds": [], + "type": "number", + "unit": "dtdurations" + }, + { + "alias": "IN", + "colorMode": null, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "pattern": "Value #F", + "thresholds": [ + "" + ], + "type": "number", + "unit": "Bps" + }, + { + "alias": "", + "colorMode": null, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "pattern": "Time", + "thresholds": [], + "type": "hidden", + "unit": "short" + }, + { + "alias": "OUT", + "colorMode": null, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "mappingType": 1, + "pattern": "Value #G", + "thresholds": [], + "type": "number", + "unit": "Bps" + } + ], + "targets": [ + { + "expr": "histogram_quantile(0.50, sum(rate(nginx_ingress_controller_request_duration_seconds_bucket{ingress!=\"\",controller_pod=~\"$controller\",controller_class=~\"$controller_class\",controller_namespace=~\"$namespace\",ingress=~\"$ingress\"}[2m])) by (le, ingress))", + "format": "table", + "hide": false, + "instant": true, + "intervalFactor": 1, + "legendFormat": "{{ ingress }}", + "refId": "C" + }, + { + "expr": "histogram_quantile(0.90, sum(rate(nginx_ingress_controller_request_duration_seconds_bucket{ingress!=\"\",controller_pod=~\"$controller\",controller_class=~\"$controller_class\",controller_namespace=~\"$namespace\",ingress=~\"$ingress\"}[2m])) by (le, ingress))", + "format": "table", + "hide": false, + "instant": true, + "intervalFactor": 1, + "legendFormat": "{{ ingress }}", + "refId": "D" + }, + { + "expr": "histogram_quantile(0.99, sum(rate(nginx_ingress_controller_request_duration_seconds_bucket{ingress!=\"\",controller_pod=~\"$controller\",controller_class=~\"$controller_class\",controller_namespace=~\"$namespace\",ingress=~\"$ingress\"}[2m])) by (le, ingress))", + "format": "table", + "hide": false, + "instant": true, + "intervalFactor": 1, + "legendFormat": "{{ destination_service }}", + "refId": "E" + }, + { + "expr": "sum(irate(nginx_ingress_controller_request_size_sum{ingress!=\"\",controller_pod=~\"$controller\",controller_class=~\"$controller_class\",controller_namespace=~\"$namespace\",ingress=~\"$ingress\"}[2m])) by (ingress)", + "format": "table", + "hide": false, + "instant": true, + "interval": "", + "intervalFactor": 1, + "legendFormat": "{{ ingress }}", + "refId": "F" + }, + { + "expr": "sum(irate(nginx_ingress_controller_response_size_sum{ingress!=\"\",controller_pod=~\"$controller\",controller_class=~\"$controller_class\",controller_namespace=~\"$namespace\",ingress=~\"$ingress\"}[2m])) by (ingress)", + "format": "table", + "instant": true, + "intervalFactor": 1, + "legendFormat": "{{ ingress }}", + "refId": "G" + } + ], + "timeFrom": null, + "title": "Ingress Percentile Response Times and Transfer Rates", + "transform": "table", + "transparent": false, + "type": "table" + }, + { + "columns": [ + { + "text": "Current", + "value": "current" + } + ], + "datasource": "$datasource", + "fontSize": "100%", + "gridPos": { + "h": 8, + "w": 24, + "x": 0, + "y": 24 + }, + "height": "1024", + "id": 85, + "links": [], + "pageSize": 7, + "scroll": true, + "showHeader": true, + "sort": { + "col": 1, + "desc": false + }, + "styles": [ + { + "alias": "Time", + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "pattern": "Time", + "type": "date" + }, + { + "alias": "TTL", + "colorMode": "cell", + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 0, + "pattern": "Current", + "thresholds": [ + "0", + "691200" + ], + "type": "number", + "unit": "s" + }, + { + "alias": "", + "colorMode": null, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "decimals": 2, + "pattern": "/.*/", + "thresholds": [], + "type": "number", + "unit": "short" + } + ], + "targets": [ + { + "expr": "avg(nginx_ingress_controller_ssl_expire_time_seconds{kubernetes_pod_name=~\"$controller\",namespace=~\"$namespace\",ingress=~\"$ingress\"}) by (host) - time()", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "{{ host }}", + "metric": "gke_letsencrypt_cert_expiration", + "refId": "A", + "step": 1 + } + ], + "title": "Ingress Certificate Expiry", + "transform": "timeseries_aggregations", + "type": "table" + } + ], + "refresh": "5s", + "schemaVersion": 16, + "style": "dark", + "tags": [ + "nginx" + ], + "templating": { + "list": [ + { + "current": { + "text": "Prometheus", + "value": "Prometheus" + }, + "hide": 0, + "label": "Data Source", + "name": "datasource", + "options": [ + + ], + "query": "prometheus", + "refresh": 1, + "regex": "", + "type": "datasource" + }, + { + "allValue": ".*", + "current": { + "text": "All", + "value": "$__all" + }, + "datasource": "$datasource", + "hide": 0, + "includeAll": true, + "label": "Namespace", + "multi": false, + "name": "namespace", + "options": [], + "query": "label_values(nginx_ingress_controller_config_hash, controller_namespace)", + "refresh": 1, + "regex": "", + "sort": 0, + "tagValuesQuery": "", + "tags": [], + "tagsQuery": "", + "type": "query", + "useTags": false + }, + { + "allValue": ".*", + "current": { + "text": "All", + "value": "$__all" + }, + "datasource": "$datasource", + "hide": 0, + "includeAll": true, + "label": "Controller Class", + "multi": false, + "name": "controller_class", + "options": [], + "query": "label_values(nginx_ingress_controller_config_hash{namespace=~\"$namespace\"}, controller_class) ", + "refresh": 1, + "regex": "", + "sort": 0, + "tagValuesQuery": "", + "tags": [], + "tagsQuery": "", + "type": "query", + "useTags": false + }, + { + "allValue": ".*", + "current": { + "text": "All", + "value": "$__all" + }, + "datasource": "$datasource", + "hide": 0, + "includeAll": true, + "label": "Controller", + "multi": false, + "name": "controller", + "options": [], + "query": "label_values(nginx_ingress_controller_config_hash{namespace=~\"$namespace\",controller_class=~\"$controller_class\"}, controller_pod) ", + "refresh": 1, + "regex": "", + "sort": 0, + "tagValuesQuery": "", + "tags": [], + "tagsQuery": "", + "type": "query", + "useTags": false + }, + { + "allValue": ".*", + "current": { + "tags": [], + "text": "All", + "value": "$__all" + }, + "datasource": "$datasource", + "hide": 0, + "includeAll": true, + "label": "Ingress", + "multi": false, + "name": "ingress", + "options": [], + "query": "label_values(nginx_ingress_controller_requests{namespace=~\"$namespace\",controller_class=~\"$controller_class\",controller_pod=~\"$controller\"}, ingress) ", + "refresh": 1, + "regex": "", + "sort": 2, + "tagValuesQuery": "", + "tags": [], + "tagsQuery": "", + "type": "query", + "useTags": false + } + ] + }, + "time": { + "from": "now-1h", + "to": "now" + }, + "timepicker": { + "refresh_intervals": [ + "5s", + "10s", + "30s", + "2m", + "5m", + "15m", + "30m", + "1h", + "2h", + "1d" + ], + "time_options": [ + "5m", + "15m", + "1h", + "6h", + "12h", + "24h", + "2d", + "7d", + "30d" + ] + }, + "timezone": "browser", + "title": "NGINX / Ingress Controller", + "uid": "nginx", + "version": 1 +} \ No newline at end of file diff --git a/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/files/ingress-nginx/request-handling-performance.json b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/files/ingress-nginx/request-handling-performance.json new file mode 100644 index 0000000000..156e33123d --- /dev/null +++ b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/files/ingress-nginx/request-handling-performance.json @@ -0,0 +1,963 @@ +{ + "__inputs": [ + + ], + "__requires": [ + + ], + "annotations": { + "list": [ + { + "builtIn": 1, + "datasource": "-- Grafana --", + "enable": true, + "hide": true, + "iconColor": "rgba(0, 211, 255, 1)", + "name": "Annotations & Alerts", + "type": "dashboard" + } + ] + }, + "description": "", + "editable": true, + "gnetId": 9614, + "graphTooltip": 1, + "id": null, + "iteration": 1582146566338, + "links": [], + "panels": [ + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "description": "Total time taken for nginx and upstream servers to process a request and send a response", + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 0 + }, + "hiddenSeries": false, + "id": 91, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "nullPointMode": "null", + "options": { + "dataLinks": [] + }, + "percentage": false, + "pointradius": 2, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "histogram_quantile(\n 0.5,\n sum by (le)(\n rate(\n nginx_ingress_controller_request_duration_seconds_bucket{\n ingress =~ \"$ingress\"\n }[1m]\n )\n )\n)", + "interval": "", + "legendFormat": ".5", + "refId": "D" + }, + { + "expr": "histogram_quantile(\n 0.95,\n sum by (le)(\n rate(\n nginx_ingress_controller_request_duration_seconds_bucket{\n ingress =~ \"$ingress\"\n }[1m]\n )\n )\n)", + "interval": "", + "legendFormat": ".95", + "refId": "B" + }, + { + "expr": "histogram_quantile(\n 0.99,\n sum by (le)(\n rate(\n nginx_ingress_controller_request_duration_seconds_bucket{\n ingress =~ \"$ingress\"\n }[1m]\n )\n )\n)", + "interval": "", + "legendFormat": ".99", + "refId": "A" + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Total request handling time", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "s", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "description": "The time spent on receiving the response from the upstream server", + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 0 + }, + "hiddenSeries": false, + "id": 94, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "nullPointMode": "null", + "options": { + "dataLinks": [] + }, + "percentage": false, + "pointradius": 2, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "histogram_quantile(\n 0.5,\n sum by (le)(\n rate(\n nginx_ingress_controller_response_duration_seconds_bucket{\n ingress =~ \"$ingress\"\n }[1m]\n )\n )\n)", + "instant": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": ".5", + "refId": "D" + }, + { + "expr": "histogram_quantile(\n 0.95,\n sum by (le)(\n rate(\n nginx_ingress_controller_response_duration_seconds_bucket{\n ingress =~ \"$ingress\"\n }[1m]\n )\n )\n)", + "interval": "", + "legendFormat": ".95", + "refId": "B" + }, + { + "expr": "histogram_quantile(\n 0.99,\n sum by (le)(\n rate(\n nginx_ingress_controller_response_duration_seconds_bucket{\n ingress =~ \"$ingress\"\n }[1m]\n )\n )\n)", + "interval": "", + "legendFormat": ".99", + "refId": "A" + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Upstream response time", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "s", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 8 + }, + "hiddenSeries": false, + "id": 93, + "legend": { + "alignAsTable": true, + "avg": false, + "current": false, + "max": false, + "min": false, + "rightSide": true, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "nullPointMode": "null", + "options": { + "dataLinks": [] + }, + "percentage": false, + "pointradius": 2, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": " sum by (path)(\n rate(\n nginx_ingress_controller_request_duration_seconds_count{\n ingress =~ \"$ingress\"\n }[1m]\n )\n )\n", + "interval": "", + "intervalFactor": 1, + "legendFormat": "{{ path }}", + "refId": "A" + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Request volume by Path", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "reqps", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "description": "For each path observed, its median upstream response time", + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 8 + }, + "hiddenSeries": false, + "id": 98, + "legend": { + "alignAsTable": true, + "avg": false, + "current": false, + "max": false, + "min": false, + "rightSide": true, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "nullPointMode": "null", + "options": { + "dataLinks": [] + }, + "percentage": false, + "pointradius": 2, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "histogram_quantile(\n .5,\n sum by (le, path)(\n rate(\n nginx_ingress_controller_response_duration_seconds_bucket{\n ingress =~ \"$ingress\"\n }[1m]\n )\n )\n)", + "interval": "", + "intervalFactor": 1, + "legendFormat": "{{ path }}", + "refId": "A" + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Median upstream response time by Path", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "s", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "description": "Percentage of 4xx and 5xx responses among all responses.", + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 16 + }, + "hiddenSeries": false, + "id": 100, + "legend": { + "alignAsTable": true, + "avg": false, + "current": false, + "max": false, + "min": false, + "rightSide": true, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "nullPointMode": "null as zero", + "options": { + "dataLinks": [] + }, + "percentage": false, + "pointradius": 2, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum by (path) (rate(nginx_ingress_controller_request_duration_seconds_count{\n ingress =~ \"$ingress\",\n status =~ \"[4-5].*\"\n}[1m])) / sum by (path) (rate(nginx_ingress_controller_request_duration_seconds_count{\n ingress =~ \"$ingress\",\n}[1m]))", + "interval": "", + "intervalFactor": 1, + "legendFormat": "{{ path }}", + "refId": "A" + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Response error rate by Path", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "percentunit", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "description": "For each path observed, the sum of upstream request time", + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 16 + }, + "hiddenSeries": false, + "id": 102, + "legend": { + "alignAsTable": true, + "avg": false, + "current": false, + "max": false, + "min": false, + "rightSide": true, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "nullPointMode": "null", + "options": { + "dataLinks": [] + }, + "percentage": false, + "pointradius": 2, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum by (path) (rate(nginx_ingress_controller_response_duration_seconds_sum{ingress =~ \"$ingress\"}[1m]))", + "interval": "", + "intervalFactor": 1, + "legendFormat": "{{ path }}", + "refId": "A" + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Upstream time consumed by Path", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "s", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 24 + }, + "hiddenSeries": false, + "id": 101, + "legend": { + "alignAsTable": true, + "avg": false, + "current": false, + "max": false, + "min": false, + "rightSide": true, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "nullPointMode": "null", + "options": { + "dataLinks": [] + }, + "percentage": false, + "pointradius": 2, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": " sum (\n rate(\n nginx_ingress_controller_request_duration_seconds_count{\n ingress =~ \"$ingress\",\n status =~\"[4-5].*\",\n }[1m]\n )\n ) by(path, status)\n", + "interval": "", + "intervalFactor": 1, + "legendFormat": "{{ path }} {{ status }}", + "refId": "A" + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Response error volume by Path", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "reqps", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 24 + }, + "hiddenSeries": false, + "id": 99, + "legend": { + "alignAsTable": true, + "avg": false, + "current": false, + "max": false, + "min": false, + "rightSide": true, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "nullPointMode": "null", + "options": { + "dataLinks": [] + }, + "percentage": false, + "pointradius": 2, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum (\n rate (\n nginx_ingress_controller_response_size_sum {\n ingress =~ \"$ingress\",\n }[1m]\n )\n) by (path) / sum (\n rate(\n nginx_ingress_controller_response_size_count {\n ingress =~ \"$ingress\",\n }[1m]\n )\n) by (path)\n", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "{{ path }}", + "refId": "D" + }, + { + "expr": " sum (rate(nginx_ingress_controller_response_size_bucket{\n ingress =~ \"$ingress\",\n }[1m])) by (le)\n", + "hide": true, + "legendFormat": "{{le}}", + "refId": "A" + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Average response size by Path", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "decbytes", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 32 + }, + "hiddenSeries": false, + "id": 96, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "nullPointMode": "null", + "options": { + "dataLinks": [] + }, + "percentage": false, + "pointradius": 2, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum (\n rate(\n nginx_ingress_controller_ingress_upstream_latency_seconds_sum {\n ingress =~ \"$ingress\",\n }[1m]\n)) / sum (\n rate(\n nginx_ingress_controller_ingress_upstream_latency_seconds_count {\n ingress =~ \"$ingress\",\n }[1m]\n )\n)\n", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "average", + "refId": "B" + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Upstream service latency", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "s", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + } + ], + "refresh": "30s", + "schemaVersion": 22, + "style": "dark", + "tags": [ + "nginx" + ], + "templating": { + "list": [ + { + "current": { + "text": "Prometheus", + "value": "Prometheus" + }, + "hide": 0, + "label": "Data Source", + "name": "datasource", + "options": [ + + ], + "query": "prometheus", + "refresh": 1, + "regex": "", + "type": "datasource" + }, + { + "allValue": ".*", + "current": {}, + "datasource": "$datasource", + "definition": "label_values(nginx_ingress_controller_requests, ingress) ", + "hide": 0, + "includeAll": true, + "label": "Service Ingress", + "multi": false, + "name": "ingress", + "options": [], + "query": "label_values(nginx_ingress_controller_requests, ingress) ", + "refresh": 1, + "regex": "", + "skipUrlSync": false, + "sort": 2, + "tagValuesQuery": "", + "tags": [], + "tagsQuery": "", + "type": "query", + "useTags": false + } + ] + }, + "time": { + "from": "now-15m", + "to": "now" + }, + "timepicker": { + "refresh_intervals": [ + "5s", + "10s", + "30s", + "2m", + "5m", + "15m", + "30m", + "1h", + "2h", + "1d" + ], + "time_options": [ + "5m", + "15m", + "1h", + "6h", + "12h", + "24h", + "2d", + "7d", + "30d" + ] + }, + "timezone": "browser", + "title": "NGINX / Request Handling Performance", + "uid": "4GFbkOsZk", + "version": 1 +} diff --git a/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/files/rancher/cluster/rancher-cluster-nodes.json b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/files/rancher/cluster/rancher-cluster-nodes.json new file mode 100644 index 0000000000..1d4943501b --- /dev/null +++ b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/files/rancher/cluster/rancher-cluster-nodes.json @@ -0,0 +1,793 @@ +{ + "annotations": { + "list": [ + { + "builtIn": 1, + "datasource": "-- Grafana --", + "enable": true, + "hide": true, + "iconColor": "rgba(0, 211, 255, 1)", + "name": "Annotations & Alerts", + "type": "dashboard" + } + ] + }, + "editable": true, + "gnetId": null, + "graphTooltip": 0, + "id": 28, + "links": [], + "panels": [ + { + "aliasColors": { + "{{instance}}": "#3797d5" + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": {} + }, + "overrides": [] + }, + "fill": 0, + "fillGradient": 0, + "gridPos": { + "h": 7, + "w": 8, + "x": 0, + "y": 0 + }, + "hiddenSeries": false, + "id": 2, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": false, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "nullPointMode": "null", + "percentage": false, + "pluginVersion": "7.1.5", + "pointradius": 2, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "1 - avg(irate({__name__=~\"node_cpu_seconds_total|windows_cpu_time_total\",mode=\"idle\"}[$__rate_interval])) by (instance)", + "interval": "", + "legendFormat": "{{instance}}", + "refId": "A" + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "CPU Utilization", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "decimals": 1, + "format": "percentunit", + "label": null, + "logBase": 1, + "max": "1", + "min": "0", + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": { + "{{instance}}": "#3797d5" + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": {} + }, + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "Load[5m] ({{instance}})" + }, + "properties": [] + } + ] + }, + "fill": 0, + "fillGradient": 0, + "gridPos": { + "h": 7, + "w": 8, + "x": 8, + "y": 0 + }, + "hiddenSeries": false, + "id": 3, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": false, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "nullPointMode": "null", + "percentage": false, + "pluginVersion": "7.1.5", + "pointradius": 2, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum(node_load5 OR avg_over_time(windows_system_processor_queue_length[5m])) by (instance)", + "interval": "", + "legendFormat": "Load[5m] ({{instance}})", + "refId": "A" + }, + { + "expr": "sum(node_load1 OR avg_over_time(windows_system_processor_queue_length[1m])) by (instance)", + "interval": "", + "legendFormat": "Load[1m] ({{instance}})", + "refId": "B" + }, + { + "expr": "sum(node_load15 OR avg_over_time(windows_system_processor_queue_length[15m])) by (instance)", + "interval": "", + "legendFormat": "Load[15m] ({{instance}})", + "refId": "C" + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Load Average", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": { + "{{instance}}": "#3797d5" + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": {} + }, + "overrides": [] + }, + "fill": 0, + "fillGradient": 0, + "gridPos": { + "h": 7, + "w": 8, + "x": 16, + "y": 0 + }, + "hiddenSeries": false, + "id": 4, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": false, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "nullPointMode": "null", + "percentage": false, + "pluginVersion": "7.1.5", + "pointradius": 2, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "1 - sum(node_memory_MemAvailable_bytes OR windows_os_physical_memory_free_bytes) by (instance) / sum(node_memory_MemTotal_bytes OR windows_cs_physical_memory_bytes) by (instance) ", + "interval": "", + "legendFormat": "{{instance}}", + "refId": "A" + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Memory Utilization", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "decimals": 1, + "format": "percentunit", + "label": null, + "logBase": 1, + "max": "1", + "min": "0", + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": { + "{{instance}}": "#3797d5" + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": {} + }, + "overrides": [] + }, + "fill": 0, + "fillGradient": 0, + "gridPos": { + "h": 7, + "w": 8, + "x": 0, + "y": 7 + }, + "hiddenSeries": false, + "id": 5, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": false, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "nullPointMode": "null", + "percentage": false, + "pluginVersion": "7.1.5", + "pointradius": 2, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "1 - (sum(node_filesystem_free_bytes{device!~\"rootfs|HarddiskVolume.+\"} OR windows_logical_disk_free_bytes{volume!~\"(HarddiskVolume.+|[A-Z]:.+)\"}) by (instance) / sum(node_filesystem_size_bytes{device!~\"rootfs|HarddiskVolume.+\"} OR windows_logical_disk_size_bytes{volume!~\"(HarddiskVolume.+|[A-Z]:.+)\"}) by (instance))", + "interval": "", + "legendFormat": "{{instance}}", + "refId": "A" + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Disk Utilization", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "decimals": 1, + "format": "percentunit", + "label": null, + "logBase": 1, + "max": "1", + "min": "0", + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": { + "{{instance}}": "#3797d5" + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": {} + }, + "overrides": [] + }, + "fill": 0, + "fillGradient": 0, + "gridPos": { + "h": 7, + "w": 8, + "x": 8, + "y": 7 + }, + "hiddenSeries": false, + "id": 6, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": false, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "nullPointMode": "null", + "percentage": false, + "pluginVersion": "7.1.5", + "pointradius": 2, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum(rate(node_disk_read_bytes_total[$__rate_interval]) OR rate(windows_logical_disk_read_bytes_total[$__rate_interval])) by (instance)", + "interval": "", + "legendFormat": "Read ({{instance}})", + "refId": "A" + }, + { + "expr": "sum(rate(node_disk_written_bytes_total[$__rate_interval]) OR rate(windows_logical_disk_write_bytes_total[$__rate_interval])) by (instance)", + "interval": "", + "legendFormat": "Write ({{instance}})", + "refId": "B" + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Disk I/O", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "decimals": 1, + "format": "Bps", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": { + "{{instance}}": "#3797d5" + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": {} + }, + "overrides": [] + }, + "fill": 0, + "fillGradient": 0, + "gridPos": { + "h": 7, + "w": 8, + "x": 16, + "y": 7 + }, + "hiddenSeries": false, + "id": 7, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": false, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "nullPointMode": "null", + "percentage": false, + "pluginVersion": "7.1.5", + "pointradius": 2, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum(rate(node_network_receive_errs_total{device!~\"lo|veth.*|docker.*|flannel.*|cali.*|cbr.*\"}[$__rate_interval])) by (instance) OR sum(rate(windows_net_packets_received_errors_total{nic!~'.*isatap.*|.*VPN.*|.*Pseudo.*|.*tunneling.*'}[$__rate_interval])) by (instance)", + "interval": "", + "legendFormat": "Receive Errors ({{instance}})", + "refId": "A" + }, + { + "expr": "sum(rate(node_network_receive_packets_total{device!~\"lo|veth.*|docker.*|flannel.*|cali.*|cbr.*\"}[$__rate_interval])) by (instance) OR sum(rate(windows_net_packets_received_total_total{nic!~'.*isatap.*|.*VPN.*|.*Pseudo.*|.*tunneling.*'}[$__rate_interval])) by (instance)", + "interval": "", + "legendFormat": "Receive Total ({{instance}})", + "refId": "B" + }, + { + "expr": "sum(rate(node_network_transmit_errs_total{device!~\"lo|veth.*|docker.*|flannel.*|cali.*|cbr.*\"}[$__rate_interval])) by (instance) OR sum(rate(windows_net_packets_outbound_errors_total{nic!~'.*isatap.*|.*VPN.*|.*Pseudo.*|.*tunneling.*'}[$__rate_interval])) by (instance)", + "interval": "", + "legendFormat": "Transmit Errors ({{instance}})", + "refId": "C" + }, + { + "expr": "sum(rate(node_network_receive_drop_total{device!~\"lo|veth.*|docker.*|flannel.*|cali.*|cbr.*\"}[$__rate_interval])) by (instance) OR sum(rate(windows_net_packets_received_discarded_total{nic!~'.*isatap.*|.*VPN.*|.*Pseudo.*|.*tunneling.*'}[$__rate_interval])) by (instance)", + "interval": "", + "legendFormat": "Receive Dropped ({{instance}})", + "refId": "D" + }, + { + "expr": "sum(rate(node_network_transmit_drop_total{device!~\"lo|veth.*|docker.*|flannel.*|cali.*|cbr.*\"}[$__rate_interval])) by (instance) OR sum(rate(windows_net_packets_outbound_discarded{nic!~'.*isatap.*|.*VPN.*|.*Pseudo.*|.*tunneling.*'}[$__rate_interval])) by (instance)", + "interval": "", + "legendFormat": "Transmit Dropped ({{instance}})", + "refId": "E" + }, + { + "expr": "sum(rate(node_network_transmit_packets_total{device!~\"lo|veth.*|docker.*|flannel.*|cali.*|cbr.*\"}[$__rate_interval])) by (instance) OR sum(rate(windows_net_packets_sent_total{nic!~'.*isatap.*|.*VPN.*|.*Pseudo.*|.*tunneling.*'}[$__rate_interval])) by (instance)", + "interval": "", + "legendFormat": "Transmit Total ({{instance}})", + "refId": "F" + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Network Traffic", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "decimals": 1, + "format": "pps", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": { + "{{instance}}": "#3797d5" + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": {} + }, + "overrides": [] + }, + "fill": 0, + "fillGradient": 0, + "gridPos": { + "h": 7, + "w": 8, + "x": 0, + "y": 14 + }, + "hiddenSeries": false, + "id": 8, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": false, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "nullPointMode": "null", + "percentage": false, + "pluginVersion": "7.1.5", + "pointradius": 2, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum(rate(node_network_transmit_bytes_total{device!~\"lo|veth.*|docker.*|flannel.*|cali.*|cbr.*\"}[$__rate_interval]) OR rate(windows_net_packets_sent_total{nic!~'.*isatap.*|.*VPN.*|.*Pseudo.*|.*tunneling.*'}[$__rate_interval])) by (instance)", + "interval": "", + "legendFormat": "Transmit Total ({{instance}})", + "refId": "A" + }, + { + "expr": "sum(rate(node_network_receive_bytes_total{device!~\"lo|veth.*|docker.*|flannel.*|cali.*|cbr.*\"}[$__rate_interval]) OR rate(windows_net_packets_received_total_total{nic!~'.*isatap.*|.*VPN.*|.*Pseudo.*|.*tunneling.*'}[$__rate_interval])) by (instance)", + "interval": "", + "legendFormat": "Receive Total ({{instance}})", + "refId": "B" + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Network I/O", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "decimals": 1, + "format": "Bps", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + } + ], + "schemaVersion": 26, + "style": "dark", + "tags": [], + "templating": { + "list": [ + { + "current": { + "text": "Prometheus", + "value": "Prometheus" + }, + "hide": 0, + "label": "Data Source", + "name": "datasource", + "options": [ + + ], + "query": "prometheus", + "refresh": 1, + "regex": "", + "type": "datasource" + } + ] + }, + "time": { + "from": "now-1h", + "to": "now" + }, + "timepicker": { + "refresh_intervals": [ + "5s", + "10s", + "30s", + "1m", + "5m", + "15m", + "30m", + "1h", + "2h", + "1d" + ] + }, + "timezone": "", + "title": "Rancher / Cluster (Nodes)", + "uid": "rancher-cluster-nodes-1", + "version": 3 +} diff --git a/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/files/rancher/cluster/rancher-cluster.json b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/files/rancher/cluster/rancher-cluster.json new file mode 100644 index 0000000000..24385a237a --- /dev/null +++ b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/files/rancher/cluster/rancher-cluster.json @@ -0,0 +1,776 @@ +{ + "annotations": { + "list": [ + { + "builtIn": 1, + "datasource": "-- Grafana --", + "enable": true, + "hide": true, + "iconColor": "rgba(0, 211, 255, 1)", + "name": "Annotations & Alerts", + "type": "dashboard" + } + ] + }, + "editable": true, + "gnetId": null, + "graphTooltip": 0, + "id": 28, + "links": [], + "panels": [ + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": {} + }, + "overrides": [] + }, + "fill": 0, + "fillGradient": 0, + "gridPos": { + "h": 7, + "w": 8, + "x": 0, + "y": 0 + }, + "hiddenSeries": false, + "id": 2, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": false, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "nullPointMode": "null", + "percentage": false, + "pluginVersion": "7.1.5", + "pointradius": 2, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "1 - avg(irate({__name__=~\"node_cpu_seconds_total|windows_cpu_time_total\",mode=\"idle\"}[$__rate_interval]))", + "legendFormat": "Total", + "interval": "", + "refId": "A" + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "CPU Utilization", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "decimals": 1, + "format": "percentunit", + "label": null, + "logBase": 1, + "max": "1", + "min": "0", + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": {} + }, + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "Load[5m]" + }, + "properties": [] + } + ] + }, + "fill": 0, + "fillGradient": 0, + "gridPos": { + "h": 7, + "w": 8, + "x": 8, + "y": 0 + }, + "hiddenSeries": false, + "id": 3, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": false, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "nullPointMode": "null", + "percentage": false, + "pluginVersion": "7.1.5", + "pointradius": 2, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum(node_load5 OR avg_over_time(windows_system_processor_queue_length[5m]))", + "interval": "", + "legendFormat": "Load[5m]", + "refId": "A" + }, + { + "expr": "sum(node_load1 OR avg_over_time(windows_system_processor_queue_length[1m]))", + "interval": "", + "legendFormat": "Load[1m]", + "refId": "B" + }, + { + "expr": "sum(node_load15 OR avg_over_time(windows_system_processor_queue_length[15m]))", + "interval": "", + "legendFormat": "Load[15m]", + "refId": "C" + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Load Average", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": {} + }, + "overrides": [] + }, + "fill": 0, + "fillGradient": 0, + "gridPos": { + "h": 7, + "w": 8, + "x": 16, + "y": 0 + }, + "hiddenSeries": false, + "id": 4, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": false, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "nullPointMode": "null", + "percentage": false, + "pluginVersion": "7.1.5", + "pointradius": 2, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "1 - sum(node_memory_MemAvailable_bytes OR windows_os_physical_memory_free_bytes) / sum(node_memory_MemTotal_bytes OR windows_cs_physical_memory_bytes)", + "legendFormat": "Total", + "interval": "", + "refId": "A" + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Memory Utilization", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "decimals": 1, + "format": "percentunit", + "label": null, + "logBase": 1, + "max": "1", + "min": "0", + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": {} + }, + "overrides": [] + }, + "fill": 0, + "fillGradient": 0, + "gridPos": { + "h": 7, + "w": 8, + "x": 0, + "y": 7 + }, + "hiddenSeries": false, + "id": 5, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": false, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "nullPointMode": "null", + "percentage": false, + "pluginVersion": "7.1.5", + "pointradius": 2, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "1 - (sum(node_filesystem_free_bytes{device!~\"rootfs|HarddiskVolume.+\"} OR windows_logical_disk_free_bytes{volume!~\"(HarddiskVolume.+|[A-Z]:.+)\"}) / sum(node_filesystem_size_bytes{device!~\"rootfs|HarddiskVolume.+\"} OR windows_logical_disk_size_bytes{volume!~\"(HarddiskVolume.+|[A-Z]:.+)\"}))", + "legendFormat": "Total", + "interval": "", + "refId": "A" + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Disk Utilization", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "decimals": 1, + "format": "percentunit", + "label": null, + "logBase": 1, + "max": "1", + "min": "0", + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": {} + }, + "overrides": [] + }, + "fill": 0, + "fillGradient": 0, + "gridPos": { + "h": 7, + "w": 8, + "x": 8, + "y": 7 + }, + "hiddenSeries": false, + "id": 6, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": false, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "nullPointMode": "null", + "percentage": false, + "pluginVersion": "7.1.5", + "pointradius": 2, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum(rate(node_disk_read_bytes_total[$__rate_interval]) OR rate(windows_logical_disk_read_bytes_total[$__rate_interval]))", + "interval": "", + "legendFormat": "Read", + "refId": "A" + }, + { + "expr": "sum(rate(node_disk_written_bytes_total[$__rate_interval]) OR rate(windows_logical_disk_write_bytes_total[$__rate_interval]))", + "interval": "", + "legendFormat": "Write", + "refId": "B" + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Disk I/O", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "decimals": 1, + "format": "Bps", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": {} + }, + "overrides": [] + }, + "fill": 0, + "fillGradient": 0, + "gridPos": { + "h": 7, + "w": 8, + "x": 16, + "y": 7 + }, + "hiddenSeries": false, + "id": 7, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": false, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "nullPointMode": "null", + "percentage": false, + "pluginVersion": "7.1.5", + "pointradius": 2, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "(sum(rate(node_network_receive_errs_total{device!~\"lo|veth.*|docker.*|flannel.*|cali.*|cbr.*\"}[$__rate_interval])) OR on() vector(0)) + (sum(rate(windows_net_packets_received_errors_total{nic!~'.*isatap.*|.*VPN.*|.*Pseudo.*|.*tunneling.*'}[$__rate_interval])) OR on() vector(0))", + "interval": "", + "legendFormat": "Receive Errors", + "refId": "A" + }, + { + "expr": "(sum(rate(node_network_receive_packets_total{device!~\"lo|veth.*|docker.*|flannel.*|cali.*|cbr.*\"}[$__rate_interval])) OR on() vector(0)) + (sum(rate(windows_net_packets_received_total_total{nic!~'.*isatap.*|.*VPN.*|.*Pseudo.*|.*tunneling.*'}[$__rate_interval])) OR on() vector(0))", + "interval": "", + "legendFormat": "Receive Total", + "refId": "B" + }, + { + "expr": "(sum(rate(node_network_transmit_errs_total{device!~\"lo|veth.*|docker.*|flannel.*|cali.*|cbr.*\"}[$__rate_interval])) OR on() vector(0)) + (sum(rate(windows_net_packets_outbound_errors_total{nic!~'.*isatap.*|.*VPN.*|.*Pseudo.*|.*tunneling.*'}[$__rate_interval])) OR on() vector(0))", + "interval": "", + "legendFormat": "Transmit Errors", + "refId": "C" + }, + { + "expr": "(sum(rate(node_network_receive_drop_total{device!~\"lo|veth.*|docker.*|flannel.*|cali.*|cbr.*\"}[$__rate_interval])) OR on() vector(0)) + (sum(rate(windows_net_packets_received_discarded_total{nic!~'.*isatap.*|.*VPN.*|.*Pseudo.*|.*tunneling.*'}[$__rate_interval])) OR on() vector(0))", + "interval": "", + "legendFormat": "Receive Dropped", + "refId": "D" + }, + { + "expr": "(sum(rate(node_network_transmit_drop_total{device!~\"lo|veth.*|docker.*|flannel.*|cali.*|cbr.*\"}[$__rate_interval])) OR on() vector(0)) + (sum(rate(windows_net_packets_outbound_discarded{nic!~'.*isatap.*|.*VPN.*|.*Pseudo.*|.*tunneling.*'}[$__rate_interval])) OR on() vector(0))", + "interval": "", + "legendFormat": "Transmit Dropped", + "refId": "E" + }, + { + "expr": "(sum(rate(node_network_transmit_packets_total{device!~\"lo|veth.*|docker.*|flannel.*|cali.*|cbr.*\"}[$__rate_interval])) OR on() vector(0)) + (sum(rate(windows_net_packets_sent_total{nic!~'.*isatap.*|.*VPN.*|.*Pseudo.*|.*tunneling.*'}[$__rate_interval])) OR on() vector(0))", + "interval": "", + "legendFormat": "Transmit Total", + "refId": "F" + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Network Traffic", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "decimals": 1, + "format": "pps", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": {} + }, + "overrides": [] + }, + "fill": 0, + "fillGradient": 0, + "gridPos": { + "h": 7, + "w": 8, + "x": 0, + "y": 14 + }, + "hiddenSeries": false, + "id": 8, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": false, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "nullPointMode": "null", + "percentage": false, + "pluginVersion": "7.1.5", + "pointradius": 2, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum(rate(node_network_transmit_bytes_total{device!~\"lo|veth.*|docker.*|flannel.*|cali.*|cbr.*\"}[$__rate_interval]) OR rate(windows_net_packets_sent_total{nic!~'.*isatap.*|.*VPN.*|.*Pseudo.*|.*tunneling.*'}[$__rate_interval]))", + "interval": "", + "legendFormat": "Transmit Total", + "refId": "A" + }, + { + "expr": "sum(rate(node_network_receive_bytes_total{device!~\"lo|veth.*|docker.*|flannel.*|cali.*|cbr.*\"}[$__rate_interval]) OR rate(windows_net_packets_received_total_total{nic!~'.*isatap.*|.*VPN.*|.*Pseudo.*|.*tunneling.*'}[$__rate_interval]))", + "interval": "", + "legendFormat": "Receive Total", + "refId": "B" + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Network I/O", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "decimals": 1, + "format": "Bps", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + } + ], + "schemaVersion": 26, + "style": "dark", + "tags": [], + "templating": { + "list": [ + { + "current": { + "text": "Prometheus", + "value": "Prometheus" + }, + "hide": 0, + "label": "Data Source", + "name": "datasource", + "options": [ + + ], + "query": "prometheus", + "refresh": 1, + "regex": "", + "type": "datasource" + } + ] + }, + "time": { + "from": "now-1h", + "to": "now" + }, + "timepicker": { + "refresh_intervals": [ + "5s", + "10s", + "30s", + "1m", + "5m", + "15m", + "30m", + "1h", + "2h", + "1d" + ] + }, + "timezone": "", + "title": "Rancher / Cluster", + "uid": "rancher-cluster-1", + "version": 3 +} diff --git a/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/files/rancher/fleet/bundle.json b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/files/rancher/fleet/bundle.json new file mode 100644 index 0000000000..698f48aeed --- /dev/null +++ b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/files/rancher/fleet/bundle.json @@ -0,0 +1,246 @@ +{ + "description": "Bundle", + "graphTooltip": 1, + "panels": [ + { + "datasource": { + "type": "datasource", + "uid": "-- Mixed --" + }, + "fieldConfig": { + "defaults": { + "decimals": 0, + "unit": "percentunit" + } + }, + "gridPos": { + "h": 5, + "w": 7, + "x": 0, + "y": 0 + }, + "id": 1, + "pluginVersion": "v11.0.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "expr": "sum(fleet_bundle_ready{exported_namespace=\"$namespace\",name=~\"$name\"}) / sum(fleet_bundle_desired_ready{exported_namespace=\"$namespace\",name=~\"$name\"})" + } + ], + "title": "Ready Bundles", + "type": "stat" + }, + { + "datasource": { + "type": "datasource", + "uid": "-- Mixed --" + }, + "fieldConfig": { + "defaults": { + "decimals": 0, + "unit": null + } + }, + "gridPos": { + "h": 5, + "w": 17, + "x": 7, + "y": 0 + }, + "id": 2, + "pluginVersion": "v11.0.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "expr": "sum(fleet_bundle_desired_ready{exported_namespace=\"$namespace\",name=~\"$name\"})", + "legendFormat": "Desired Ready" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "expr": "sum(fleet_bundle_ready{exported_namespace=\"$namespace\",name=~\"$name\"})", + "legendFormat": "Ready" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "expr": "sum(fleet_bundle_not_ready{exported_namespace=\"$namespace\",name=~\"$name\"})", + "legendFormat": "Not Ready" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "expr": "sum(fleet_bundle_out_of_sync{exported_namespace=\"$namespace\",name=~\"$name\"})", + "legendFormat": "Out of Sync" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "expr": "sum(fleet_bundle_err_applied{exported_namespace=\"$namespace\",name=~\"$name\"})", + "legendFormat": "Err Applied" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "expr": "sum(fleet_bundle_modified{exported_namespace=\"$namespace\",name=~\"$name\"})", + "legendFormat": "Modified" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "expr": "sum(fleet_bundle_pending{exported_namespace=\"$namespace\",name=~\"$name\"})", + "legendFormat": "Pending" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "expr": "sum(fleet_bundle_wait_applied{exported_namespace=\"$namespace\",name=~\"$name\"})", + "legendFormat": "Wait Applied" + } + ], + "title": "Bundles", + "type": "stat" + }, + { + "datasource": { + "type": "datasource", + "uid": "-- Mixed --" + }, + "fieldConfig": { + "defaults": { + "decimals": 0, + "unit": null + } + }, + "gridPos": { + "h": 8, + "w": 24, + "x": 0, + "y": 8 + }, + "id": 3, + "pluginVersion": "v11.0.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "expr": "sum(fleet_bundle_desired_ready{exported_namespace=\"$namespace\",name=~\"$name\"})", + "legendFormat": "Desired Ready" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "expr": "sum(fleet_bundle_ready{exported_namespace=\"$namespace\",name=~\"$name\"})", + "legendFormat": "Ready" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "expr": "sum(fleet_bundle_not_ready{exported_namespace=\"$namespace\",name=~\"$name\"})", + "legendFormat": "Not Ready" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "expr": "sum(fleet_bundle_out_of_sync{exported_namespace=\"$namespace\",name=~\"$name\"})", + "legendFormat": "Out of Sync" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "expr": "sum(fleet_bundle_err_applied{exported_namespace=\"$namespace\",name=~\"$name\"})", + "legendFormat": "Err Applied" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "expr": "sum(fleet_bundle_modified{exported_namespace=\"$namespace\",name=~\"$name\"})", + "legendFormat": "Modified" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "expr": "sum(fleet_bundle_pending{exported_namespace=\"$namespace\",name=~\"$name\"})", + "legendFormat": "Pending" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "expr": "sum(fleet_bundle_wait_applied{exported_namespace=\"$namespace\",name=~\"$name\"})", + "legendFormat": "Wait Applied" + } + ], + "title": "Bundles", + "type": "timeseries" + } + ], + "schemaVersion": 39, + "templating": { + "list": [ + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "name": "namespace", + "query": "label_values(fleet_bundle_desired_ready, exported_namespace)", + "refresh": 2, + "type": "query" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "includeAll": true, + "name": "name", + "query": "label_values(fleet_bundle_desired_ready{exported_namespace=~\"$namespace\"}, name)", + "refresh": 2, + "type": "query" + } + ] + }, + "time": { + "from": "now-1h", + "to": "now" + }, + "timezone": "utc", + "title": "Fleet / Bundle", + "uid": "fleet-bundle" +} diff --git a/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/files/rancher/fleet/bundledeployment.json b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/files/rancher/fleet/bundledeployment.json new file mode 100644 index 0000000000..c81f7a6212 --- /dev/null +++ b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/files/rancher/fleet/bundledeployment.json @@ -0,0 +1,219 @@ +{ + "description": "BundleDeployment", + "graphTooltip": 1, + "panels": [ + { + "datasource": { + "type": "datasource", + "uid": "-- Mixed --" + }, + "fieldConfig": { + "defaults": { + "decimals": 0, + "unit": "percentunit" + } + }, + "gridPos": { + "h": 5, + "w": 7, + "x": 0, + "y": 0 + }, + "id": 1, + "pluginVersion": "v11.0.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "expr": "sum(fleet_bundledeployment_state{cluster_namespace=~\"$namespace\",state=\"Ready\"}) / sum(fleet_bundledeployment_state{cluster_namespace=~\"$namespace\"})" + } + ], + "title": "Ready BundleDeployments", + "type": "stat" + }, + { + "datasource": { + "type": "datasource", + "uid": "-- Mixed --" + }, + "fieldConfig": { + "defaults": { + "decimals": 0, + "unit": null + } + }, + "gridPos": { + "h": 5, + "w": 17, + "x": 7, + "y": 0 + }, + "id": 2, + "pluginVersion": "v11.0.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "expr": "sum(fleet_bundledeployment_state{cluster_namespace=~\"$namespace\",state=\"Ready\"})", + "legendFormat": "Ready" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "expr": "sum(fleet_bundledeployment_state{cluster_namespace=~\"$namespace\",state=\"NotReady\"})", + "legendFormat": "Not Ready" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "expr": "sum(fleet_bundledeployment_state{cluster_namespace=~\"$namespace\",state=\"WaitApplied\"})", + "legendFormat": "Wait Applied" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "expr": "sum(fleet_bundledeployment_state{cluster_namespace=~\"$namespace\",state=\"ErrApplied\"})", + "legendFormat": "Err Applied" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "expr": "sum(fleet_bundledeployment_state{cluster_namespace=~\"$namespace\",state=\"OutOfSync\"})", + "legendFormat": "OutOfSync" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "expr": "sum(fleet_bundledeployment_state{cluster_namespace=~\"$namespace\",state=\"Pending\"})", + "legendFormat": "Pending" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "expr": "sum(fleet_bundledeployment_state{cluster_namespace=~\"$namespace\",state=\"Modified\"})", + "legendFormat": "Modified" + } + ], + "title": "BundleDeployments", + "type": "stat" + }, + { + "datasource": { + "type": "datasource", + "uid": "-- Mixed --" + }, + "fieldConfig": { + "defaults": { + "decimals": 0, + "unit": null + } + }, + "gridPos": { + "h": 8, + "w": 24, + "x": 0, + "y": 8 + }, + "id": 3, + "pluginVersion": "v11.0.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "expr": "sum(fleet_bundledeployment_state{cluster_namespace=~\"$namespace\",state=\"Ready\"})", + "legendFormat": "Ready" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "expr": "sum(fleet_bundledeployment_state{cluster_namespace=~\"$namespace\",state=\"NotReady\"})", + "legendFormat": "Not Ready" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "expr": "sum(fleet_bundledeployment_state{cluster_namespace=~\"$namespace\",state=\"WaitApplied\"})", + "legendFormat": "Wait Applied" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "expr": "sum(fleet_bundledeployment_state{cluster_namespace=~\"$namespace\",state=\"ErrApplied\"})", + "legendFormat": "Err Applied" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "expr": "sum(fleet_bundledeployment_state{cluster_namespace=~\"$namespace\",state=\"OutOfSync\"})", + "legendFormat": "OutOfSync" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "expr": "sum(fleet_bundledeployment_state{cluster_namespace=~\"$namespace\",state=\"Pending\"})", + "legendFormat": "Pending" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "expr": "sum(fleet_bundledeployment_state{cluster_namespace=~\"$namespace\",state=\"Modified\"})", + "legendFormat": "Modified" + } + ], + "title": "BundleDeployments", + "type": "timeseries" + } + ], + "schemaVersion": 39, + "templating": { + "list": [ + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "name": "namespace", + "query": "label_values(fleet_bundledeployment_state, cluster_namespace)", + "refresh": 2, + "type": "query" + } + ] + }, + "time": { + "from": "now-1h", + "to": "now" + }, + "timezone": "utc", + "title": "Fleet / BundleDeployment", + "uid": "fleet-bundledeployment" +} diff --git a/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/files/rancher/fleet/cluster.json b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/files/rancher/fleet/cluster.json new file mode 100644 index 0000000000..73bdea4834 --- /dev/null +++ b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/files/rancher/fleet/cluster.json @@ -0,0 +1,484 @@ +{ + "description": "Cluster", + "graphTooltip": 1, + "panels": [ + { + "datasource": { + "type": "datasource", + "uid": "-- Mixed --" + }, + "fieldConfig": { + "defaults": { + "decimals": 0, + "unit": "percentunit" + } + }, + "gridPos": { + "h": 5, + "w": 7, + "x": 0, + "y": 0 + }, + "id": 1, + "pluginVersion": "v11.0.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "expr": "sum(fleet_cluster_ready_git_repos{exported_namespace=\"$namespace\",name=~\"$name\"}) / sum(fleet_cluster_desired_ready_git_repos{exported_namespace=\"$namespace\",name=~\"$name\"})" + } + ], + "title": "Ready Git Repos", + "type": "stat" + }, + { + "datasource": { + "type": "datasource", + "uid": "-- Mixed --" + }, + "fieldConfig": { + "defaults": { + "decimals": 0, + "unit": null + } + }, + "gridPos": { + "h": 5, + "w": 17, + "x": 7, + "y": 0 + }, + "id": 2, + "pluginVersion": "v11.0.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "expr": "sum(fleet_cluster_desired_ready_git_repos{exported_namespace=\"$namespace\",name=~\"$name\"})", + "legendFormat": "Desired Ready" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "expr": "sum(fleet_cluster_ready_git_repos{exported_namespace=\"$namespace\",name=~\"$name\"})", + "legendFormat": "Ready" + } + ], + "title": "Git Repos", + "type": "stat" + }, + { + "datasource": { + "type": "datasource", + "uid": "-- Mixed --" + }, + "fieldConfig": { + "defaults": { + "decimals": 0, + "unit": null + } + }, + "gridPos": { + "h": 8, + "w": 24, + "x": 0, + "y": 8 + }, + "id": 3, + "pluginVersion": "v11.0.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "expr": "sum(fleet_cluster_desired_ready_git_repos{exported_namespace=\"$namespace\",name=~\"$name\"})", + "legendFormat": "Desired Ready" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "expr": "sum(fleet_cluster_ready_git_repos{exported_namespace=\"$namespace\",name=~\"$name\"})", + "legendFormat": "Ready" + } + ], + "title": "Git Repos", + "type": "timeseries" + }, + { + "datasource": { + "type": "datasource", + "uid": "-- Mixed --" + }, + "fieldConfig": { + "defaults": { + "decimals": 0, + "unit": "percentunit" + } + }, + "gridPos": { + "h": 5, + "w": 7, + "x": 0, + "y": 13 + }, + "id": 4, + "pluginVersion": "v11.0.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "expr": "sum(fleet_cluster_resources_count_ready{exported_namespace=\"$namespace\",name=~\"$name\"}) / sum(fleet_cluster_resources_count_desiredready{exported_namespace=\"$namespace\",name=~\"$name\"})" + } + ], + "title": "Ready Resources", + "type": "stat" + }, + { + "datasource": { + "type": "datasource", + "uid": "-- Mixed --" + }, + "fieldConfig": { + "defaults": { + "decimals": 0, + "unit": null + } + }, + "gridPos": { + "h": 5, + "w": 17, + "x": 7, + "y": 13 + }, + "id": 5, + "pluginVersion": "v11.0.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "expr": "sum(fleet_cluster_resources_count_desiredready{exported_namespace=\"$namespace\",name=~\"$name\"})", + "legendFormat": "Desired Ready" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "expr": "sum(fleet_cluster_resources_count_ready{exported_namespace=\"$namespace\",name=~\"$name\"})", + "legendFormat": "Ready" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "expr": "sum(fleet_cluster_resources_count_notready{exported_namespace=\"$namespace\",name=~\"$name\"})", + "legendFormat": "Not Ready" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "expr": "sum(fleet_cluster_resources_count_missing{exported_namespace=\"$namespace\",name=~\"$name\"})", + "legendFormat": "Missing" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "expr": "sum(fleet_cluster_resources_count_modified{exported_namespace=\"$namespace\",name=~\"$name\"})", + "legendFormat": "Modified" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "expr": "sum(fleet_cluster_resources_count_unknown{exported_namespace=\"$namespace\",name=~\"$name\"})", + "legendFormat": "Unknown" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "expr": "sum(fleet_cluster_resources_count_orphaned{exported_namespace=\"$namespace\",name=~\"$name\"})", + "legendFormat": "Orphaned" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "expr": "sum(fleet_cluster_resources_count_waitapplied{exported_namespace=\"$namespace\",name=~\"$name\"})", + "legendFormat": "Wait Applied" + } + ], + "title": "Resources", + "type": "stat" + }, + { + "datasource": { + "type": "datasource", + "uid": "-- Mixed --" + }, + "fieldConfig": { + "defaults": { + "decimals": 0, + "unit": null + } + }, + "gridPos": { + "h": 8, + "w": 24, + "x": 0, + "y": 21 + }, + "id": 6, + "pluginVersion": "v11.0.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "expr": "sum(fleet_cluster_resources_count_desiredready{exported_namespace=\"$namespace\",name=~\"$name\"})", + "legendFormat": "Desired Ready" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "expr": "sum(fleet_cluster_resources_count_ready{exported_namespace=\"$namespace\",name=~\"$name\"})", + "legendFormat": "Ready" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "expr": "sum(fleet_cluster_resources_count_notready{exported_namespace=\"$namespace\",name=~\"$name\"})", + "legendFormat": "Not Ready" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "expr": "sum(fleet_cluster_resources_count_missing{exported_namespace=\"$namespace\",name=~\"$name\"})", + "legendFormat": "Missing" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "expr": "sum(fleet_cluster_resources_count_modified{exported_namespace=\"$namespace\",name=~\"$name\"})", + "legendFormat": "Modified" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "expr": "sum(fleet_cluster_resources_count_unknown{exported_namespace=\"$namespace\",name=~\"$name\"})", + "legendFormat": "Unknown" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "expr": "sum(fleet_cluster_resources_count_orphaned{exported_namespace=\"$namespace\",name=~\"$name\"})", + "legendFormat": "Orphaned" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "expr": "sum(fleet_cluster_resources_count_waitapplied{exported_namespace=\"$namespace\",name=~\"$name\"})", + "legendFormat": "Wait Applied" + } + ], + "title": "Resources", + "type": "timeseries" + }, + { + "datasource": { + "type": "datasource", + "uid": "-- Mixed --" + }, + "fieldConfig": { + "defaults": { + "decimals": 0, + "unit": "percentunit" + } + }, + "gridPos": { + "h": 5, + "w": 7, + "x": 0, + "y": 26 + }, + "id": 7, + "pluginVersion": "v11.0.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "expr": "sum(fleet_cluster_state{exported_namespace=\"$namespace\",name=~\"$name\",state=\"Ready\"}) / sum(fleet_cluster_state{exported_namespace=\"$namespace\",name=~\"$name\"})" + } + ], + "title": "Ready Clusters", + "type": "stat" + }, + { + "datasource": { + "type": "datasource", + "uid": "-- Mixed --" + }, + "fieldConfig": { + "defaults": { + "decimals": 0, + "unit": null + } + }, + "gridPos": { + "h": 5, + "w": 17, + "x": 7, + "y": 26 + }, + "id": 8, + "pluginVersion": "v11.0.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "expr": "sum(fleet_cluster_state{exported_namespace=\"$namespace\",name=~\"$name\",state=\"Ready\"})", + "legendFormat": "Ready" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "expr": "sum(fleet_cluster_state{exported_namespace=\"$namespace\",name=~\"$name\",state=\"NotReady\"})", + "legendFormat": "Not Ready" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "expr": "sum(fleet_cluster_state{exported_namespace=\"$namespace\",name=~\"$name\",state=\"WaitCheckIn\"})", + "legendFormat": "Wait Check In" + } + ], + "title": "Clusters", + "type": "stat" + }, + { + "datasource": { + "type": "datasource", + "uid": "-- Mixed --" + }, + "fieldConfig": { + "defaults": { + "decimals": 0, + "unit": null + } + }, + "gridPos": { + "h": 8, + "w": 24, + "x": 0, + "y": 34 + }, + "id": 9, + "pluginVersion": "v11.0.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "expr": "sum(fleet_cluster_state{exported_namespace=\"$namespace\",name=~\"$name\",state=\"Ready\"})", + "legendFormat": "Ready" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "expr": "sum(fleet_cluster_state{exported_namespace=\"$namespace\",name=~\"$name\",state=\"NotReady\"})", + "legendFormat": "Not Ready" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "expr": "sum(fleet_cluster_state{exported_namespace=\"$namespace\",name=~\"$name\",state=\"WaitCheckIn\"})", + "legendFormat": "Wait Check In" + } + ], + "title": "Clusters", + "type": "timeseries" + } + ], + "schemaVersion": 39, + "templating": { + "list": [ + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "name": "namespace", + "query": "label_values(fleet_cluster_desired_ready_git_repos, exported_namespace)", + "refresh": 2, + "type": "query" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "includeAll": true, + "name": "name", + "query": "label_values(fleet_cluster_desired_ready_git_repos{exported_namespace=~\"$namespace\"}, name)", + "refresh": 2, + "type": "query" + } + ] + }, + "time": { + "from": "now-1h", + "to": "now" + }, + "timezone": "utc", + "title": "Fleet / Cluster", + "uid": "fleet-cluster" +} diff --git a/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/files/rancher/fleet/clustergroup.json b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/files/rancher/fleet/clustergroup.json new file mode 100644 index 0000000000..ce3df87b21 --- /dev/null +++ b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/files/rancher/fleet/clustergroup.json @@ -0,0 +1,468 @@ +{ + "description": "ClusterGroup", + "graphTooltip": 1, + "panels": [ + { + "datasource": { + "type": "datasource", + "uid": "-- Mixed --" + }, + "fieldConfig": { + "defaults": { + "decimals": 0, + "unit": "percentunit" + } + }, + "gridPos": { + "h": 5, + "w": 7, + "x": 0, + "y": 0 + }, + "id": 1, + "pluginVersion": "v11.0.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "expr": "sum(fleet_cluster_group_bundle_ready{exported_namespace=\"$namespace\",name=~\"$name\"}) / sum(fleet_cluster_group_bundle_desired_ready{exported_namespace=\"$namespace\",name=~\"$name\"})" + } + ], + "title": "Ready Bundles", + "type": "stat" + }, + { + "datasource": { + "type": "datasource", + "uid": "-- Mixed --" + }, + "fieldConfig": { + "defaults": { + "decimals": 0, + "unit": null + } + }, + "gridPos": { + "h": 5, + "w": 17, + "x": 7, + "y": 0 + }, + "id": 2, + "pluginVersion": "v11.0.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "expr": "sum(fleet_cluster_group_bundle_desired_ready{exported_namespace=\"$namespace\",name=~\"$name\"})", + "legendFormat": "Desired Ready" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "expr": "sum(fleet_cluster_group_bundle_ready{exported_namespace=\"$namespace\",name=~\"$name\"})", + "legendFormat": "Ready" + } + ], + "title": "Bundles", + "type": "stat" + }, + { + "datasource": { + "type": "datasource", + "uid": "-- Mixed --" + }, + "fieldConfig": { + "defaults": { + "decimals": 0, + "unit": null + } + }, + "gridPos": { + "h": 8, + "w": 24, + "x": 0, + "y": 8 + }, + "id": 3, + "pluginVersion": "v11.0.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "expr": "sum(fleet_cluster_group_bundle_desired_ready{exported_namespace=\"$namespace\",name=~\"$name\"})", + "legendFormat": "Desired Ready" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "expr": "sum(fleet_cluster_group_bundle_ready{exported_namespace=\"$namespace\",name=~\"$name\"})", + "legendFormat": "Ready" + } + ], + "title": "Bundles", + "type": "timeseries" + }, + { + "datasource": { + "type": "datasource", + "uid": "-- Mixed --" + }, + "fieldConfig": { + "defaults": { + "decimals": 0, + "unit": "percentunit" + } + }, + "gridPos": { + "h": 5, + "w": 7, + "x": 0, + "y": 13 + }, + "id": 4, + "pluginVersion": "v11.0.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "expr": "(sum(fleet_cluster_group_cluster_count{exported_namespace=\"$namespace\",name=~\"$name\"}) - sum(fleet_cluster_group_non_ready_cluster_count{exported_namespace=\"$namespace\",name=~\"$name\"})) / sum(fleet_cluster_group_cluster_count{exported_namespace=\"$namespace\",name=~\"$name\"})" + } + ], + "title": "Ready Clusters", + "type": "stat" + }, + { + "datasource": { + "type": "datasource", + "uid": "-- Mixed --" + }, + "fieldConfig": { + "defaults": { + "decimals": 0, + "unit": null + } + }, + "gridPos": { + "h": 5, + "w": 17, + "x": 7, + "y": 13 + }, + "id": 5, + "pluginVersion": "v11.0.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "expr": "sum(fleet_cluster_group_cluster_count{exported_namespace=\"$namespace\",name=~\"$name\"})", + "legendFormat": "Total" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "expr": "sum(fleet_cluster_group_non_ready_cluster_count{exported_namespace=\"$namespace\",name=~\"$name\"})", + "legendFormat": "Non Ready" + } + ], + "title": "Clusters", + "type": "stat" + }, + { + "datasource": { + "type": "datasource", + "uid": "-- Mixed --" + }, + "fieldConfig": { + "defaults": { + "decimals": 0, + "unit": null + } + }, + "gridPos": { + "h": 8, + "w": 24, + "x": 0, + "y": 21 + }, + "id": 6, + "pluginVersion": "v11.0.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "expr": "sum(fleet_cluster_group_cluster_count{exported_namespace=\"$namespace\",name=~\"$name\"})", + "legendFormat": "Total" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "expr": "sum(fleet_cluster_group_non_ready_cluster_count{exported_namespace=\"$namespace\",name=~\"$name\"})", + "legendFormat": "Non Ready" + } + ], + "title": "Clusters", + "type": "timeseries" + }, + { + "datasource": { + "type": "datasource", + "uid": "-- Mixed --" + }, + "fieldConfig": { + "defaults": { + "decimals": 0, + "unit": "percentunit" + } + }, + "gridPos": { + "h": 5, + "w": 7, + "x": 0, + "y": 26 + }, + "id": 7, + "pluginVersion": "v11.0.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "expr": "sum(fleet_cluster_group_resource_count_ready{exported_namespace=\"$namespace\",name=~\"$name\"}) / sum(fleet_cluster_group_resource_count_desired_ready{exported_namespace=\"$namespace\",name=~\"$name\"})" + } + ], + "title": "Ready Resources", + "type": "stat" + }, + { + "datasource": { + "type": "datasource", + "uid": "-- Mixed --" + }, + "fieldConfig": { + "defaults": { + "decimals": 0, + "unit": null + } + }, + "gridPos": { + "h": 5, + "w": 17, + "x": 7, + "y": 26 + }, + "id": 8, + "pluginVersion": "v11.0.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "expr": "sum(fleet_cluster_group_resource_count_desired_ready{exported_namespace=\"$namespace\",name=~\"$name\"})", + "legendFormat": "Desired Ready" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "expr": "sum(fleet_cluster_group_resource_count_ready{exported_namespace=\"$namespace\",name=~\"$name\"})", + "legendFormat": "Ready" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "expr": "sum(fleet_cluster_group_resource_count_notready{exported_namespace=\"$namespace\",name=~\"$name\"})", + "legendFormat": "Not Ready" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "expr": "sum(fleet_cluster_group_resource_count_missing{exported_namespace=\"$namespace\",name=~\"$name\"})", + "legendFormat": "Missing" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "expr": "sum(fleet_cluster_group_resource_count_modified{exported_namespace=\"$namespace\",name=~\"$name\"})", + "legendFormat": "Modified" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "expr": "sum(fleet_cluster_group_resource_count_orphaned{exported_namespace=\"$namespace\",name=~\"$name\"})", + "legendFormat": "Orphaned" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "expr": "sum(fleet_cluster_group_resource_count_unknown{exported_namespace=\"$namespace\",name=~\"$name\"})", + "legendFormat": "Unknown" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "expr": "sum(fleet_cluster_group_resource_count_waitapplied{exported_namespace=\"$namespace\",name=~\"$name\"})", + "legendFormat": "Wait Applied" + } + ], + "title": "Resources", + "type": "stat" + }, + { + "datasource": { + "type": "datasource", + "uid": "-- Mixed --" + }, + "fieldConfig": { + "defaults": { + "decimals": 0, + "unit": null + } + }, + "gridPos": { + "h": 8, + "w": 24, + "x": 0, + "y": 34 + }, + "id": 9, + "pluginVersion": "v11.0.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "expr": "sum(fleet_cluster_group_resource_count_desired_ready{exported_namespace=\"$namespace\",name=~\"$name\"})", + "legendFormat": "Desired Ready" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "expr": "sum(fleet_cluster_group_resource_count_ready{exported_namespace=\"$namespace\",name=~\"$name\"})", + "legendFormat": "Ready" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "expr": "sum(fleet_cluster_group_resource_count_notready{exported_namespace=\"$namespace\",name=~\"$name\"})", + "legendFormat": "Not Ready" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "expr": "sum(fleet_cluster_group_resource_count_missing{exported_namespace=\"$namespace\",name=~\"$name\"})", + "legendFormat": "Missing" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "expr": "sum(fleet_cluster_group_resource_count_modified{exported_namespace=\"$namespace\",name=~\"$name\"})", + "legendFormat": "Modified" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "expr": "sum(fleet_cluster_group_resource_count_orphaned{exported_namespace=\"$namespace\",name=~\"$name\"})", + "legendFormat": "Orphaned" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "expr": "sum(fleet_cluster_group_resource_count_unknown{exported_namespace=\"$namespace\",name=~\"$name\"})", + "legendFormat": "Unknown" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "expr": "sum(fleet_cluster_group_resource_count_waitapplied{exported_namespace=\"$namespace\",name=~\"$name\"})", + "legendFormat": "Wait Applied" + } + ], + "title": "Resources", + "type": "timeseries" + } + ], + "schemaVersion": 39, + "templating": { + "list": [ + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "name": "namespace", + "query": "label_values(fleet_cluster_group_bundle_desired_ready, exported_namespace)", + "refresh": 2, + "type": "query" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "includeAll": true, + "name": "name", + "query": "label_values(fleet_cluster_group_bundle_desired_ready{exported_namespace=~\"$namespace\"}, name)", + "refresh": 2, + "type": "query" + } + ] + }, + "time": { + "from": "now-1h", + "to": "now" + }, + "timezone": "utc", + "title": "Fleet / ClusterGroup", + "uid": "fleet-cluster-group" +} diff --git a/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/files/rancher/fleet/controller-runtime.json b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/files/rancher/fleet/controller-runtime.json new file mode 100644 index 0000000000..23a81f2a8c --- /dev/null +++ b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/files/rancher/fleet/controller-runtime.json @@ -0,0 +1,454 @@ +{ + "description": "Controller Runtime", + "graphTooltip": 1, + "panels": [ + { + "datasource": { + "type": "datasource", + "uid": "-- Mixed --" + }, + "fieldConfig": { + "defaults": { + "decimals": 0, + "unit": null + } + }, + "gridPos": { + "h": 8, + "w": 24, + "x": 0, + "y": 0 + }, + "id": 1, + "pluginVersion": "v11.0.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "expr": "controller_runtime_active_workers{job=\"$job\", namespace=\"$namespace\"}", + "legendFormat": "{{controller}} {{instance}}" + } + ], + "title": "Number of Workers in Use", + "type": "stat" + }, + { + "datasource": { + "type": "datasource", + "uid": "-- Mixed --" + }, + "fieldConfig": { + "defaults": { + "decimals": null, + "unit": null + } + }, + "gridPos": { + "h": 8, + "w": 24, + "x": 0, + "y": 8 + }, + "id": 2, + "pluginVersion": "v11.0.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "expr": "sum(rate(controller_runtime_reconcile_errors_total{job=\"$job\", namespace=\"$namespace\"}[5m])) by (instance, pod)", + "legendFormat": "{{instance}} {{pod}}" + } + ], + "title": "Reconciliation Error Count per Controller", + "type": "timeseries" + }, + { + "datasource": { + "type": "datasource", + "uid": "-- Mixed --" + }, + "fieldConfig": { + "defaults": { + "decimals": null, + "unit": null + } + }, + "gridPos": { + "h": 8, + "w": 24, + "x": 0, + "y": 16 + }, + "id": 3, + "pluginVersion": "v11.0.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "expr": "sum(rate(controller_runtime_reconcile_total{job=\"$job\", namespace=\"$namespace\"}[5m])) by (instance, pod)", + "legendFormat": "{{instance}} {{pod}}" + } + ], + "title": "Total Reconciliation Count per Controller", + "type": "timeseries" + }, + { + "datasource": { + "type": "datasource", + "uid": "-- Mixed --" + }, + "fieldConfig": { + "defaults": { + "decimals": 0, + "unit": null + } + }, + "gridPos": { + "h": 8, + "w": 24, + "x": 0, + "y": 24 + }, + "id": 4, + "pluginVersion": "v11.0.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "expr": "workqueue_depth{job=\"$job\", namespace=\"$namespace\"}", + "legendFormat": "{{instance}} {{pod}}" + } + ], + "title": "WorkQueue Depth", + "type": "stat" + }, + { + "datasource": { + "type": "datasource", + "uid": "-- Mixed --" + }, + "fieldConfig": { + "defaults": { + "decimals": null, + "unit": null + } + }, + "gridPos": { + "h": 8, + "w": 24, + "x": 0, + "y": 32 + }, + "id": 5, + "pluginVersion": "v11.0.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "expr": "histogram_quantile(0.50, sum(rate(workqueue_queue_duration_seconds_bucket{job=\"$job\", namespace=\"$namespace\"}[5m])) by (instance, name, le))", + "legendFormat": "P50 {{name}}" + } + ], + "title": "Seconds for Items Stay in Queue (before being requested) P50", + "type": "timeseries" + }, + { + "datasource": { + "type": "datasource", + "uid": "-- Mixed --" + }, + "fieldConfig": { + "defaults": { + "decimals": null, + "unit": null + } + }, + "gridPos": { + "h": 8, + "w": 24, + "x": 0, + "y": 40 + }, + "id": 6, + "pluginVersion": "v11.0.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "expr": "histogram_quantile(0.90, sum(rate(workqueue_queue_duration_seconds_bucket{job=\"$job\", namespace=\"$namespace\"}[5m])) by (instance, name, le))", + "legendFormat": "P90 {{name}}" + } + ], + "title": "Seconds for Items Stay in Queue (before being requested) P90", + "type": "timeseries" + }, + { + "datasource": { + "type": "datasource", + "uid": "-- Mixed --" + }, + "fieldConfig": { + "defaults": { + "decimals": null, + "unit": null + } + }, + "gridPos": { + "h": 8, + "w": 24, + "x": 0, + "y": 48 + }, + "id": 7, + "pluginVersion": "v11.0.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "expr": "histogram_quantile(0.99, sum(rate(workqueue_queue_duration_seconds_bucket{job=\"$job\", namespace=\"$namespace\"}[5m])) by (instance, name, le))", + "legendFormat": "P99 {{name}}" + } + ], + "title": "Seconds for Items Stay in Queue (before being requested) P99", + "type": "timeseries" + }, + { + "datasource": { + "type": "datasource", + "uid": "-- Mixed --" + }, + "fieldConfig": { + "defaults": { + "decimals": null, + "unit": null + } + }, + "gridPos": { + "h": 8, + "w": 24, + "x": 0, + "y": 56 + }, + "id": 8, + "pluginVersion": "v11.0.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "expr": "sum(rate(workqueue_adds_total{job=\"$job\", namespace=\"$namespace\"}[2m])) by (instance, name)", + "legendFormat": "{{name}} {{instance}}" + } + ], + "title": "Work Queue Add Rate", + "type": "timeseries" + }, + { + "datasource": { + "type": "datasource", + "uid": "-- Mixed --" + }, + "fieldConfig": { + "defaults": { + "decimals": null, + "unit": null + } + }, + "gridPos": { + "h": 8, + "w": 24, + "x": 0, + "y": 64 + }, + "id": 9, + "pluginVersion": "v11.0.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "expr": "rate(workqueue_unfinished_work_seconds{job=\"$job\", namespace=\"$namespace\"}[5m])", + "legendFormat": "{{name}} {{instance}}" + } + ], + "title": "Unfinished Seconds", + "type": "stat" + }, + { + "datasource": { + "type": "datasource", + "uid": "-- Mixed --" + }, + "fieldConfig": { + "defaults": { + "decimals": null, + "unit": null + } + }, + "gridPos": { + "h": 8, + "w": 24, + "x": 0, + "y": 72 + }, + "id": 10, + "pluginVersion": "v11.0.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "expr": "histogram_quantile(0.50, sum(rate(workqueue_work_duration_seconds_bucket{job=\"$job\", namespace=\"$namespace\"}[5m])) by (instance, name, le))", + "legendFormat": "P50 {{name}}" + } + ], + "title": "Seconds Processing Items from WorkQueue - 50th Percentile", + "type": "timeseries" + }, + { + "datasource": { + "type": "datasource", + "uid": "-- Mixed --" + }, + "fieldConfig": { + "defaults": { + "decimals": null, + "unit": null + } + }, + "gridPos": { + "h": 8, + "w": 24, + "x": 0, + "y": 80 + }, + "id": 11, + "pluginVersion": "v11.0.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "expr": "histogram_quantile(0.90, sum(rate(workqueue_work_duration_seconds_bucket{job=\"$job\", namespace=\"$namespace\"}[5m])) by (instance, name, le))", + "legendFormat": "P90 {{name}}" + } + ], + "title": "Seconds Processing Items from WorkQueue - 90th Percentile", + "type": "timeseries" + }, + { + "datasource": { + "type": "datasource", + "uid": "-- Mixed --" + }, + "fieldConfig": { + "defaults": { + "decimals": null, + "unit": null + } + }, + "gridPos": { + "h": 8, + "w": 24, + "x": 0, + "y": 88 + }, + "id": 12, + "pluginVersion": "v11.0.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "expr": "histogram_quantile(0.99, sum(rate(workqueue_work_duration_seconds_bucket{job=\"$job\", namespace=\"$namespace\"}[5m])) by (instance, name, le))", + "legendFormat": "P99 {{name}}" + } + ], + "title": "Seconds Processing Items from WorkQueue - 99th Percentile", + "type": "timeseries" + }, + { + "datasource": { + "type": "datasource", + "uid": "-- Mixed --" + }, + "fieldConfig": { + "defaults": { + "decimals": null, + "unit": null + } + }, + "gridPos": { + "h": 8, + "w": 24, + "x": 0, + "y": 96 + }, + "id": 13, + "pluginVersion": "v11.0.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "expr": "sum(rate(workqueue_retries_total{job=\"$job\", namespace=\"$namespace\"}[5m])) by (instance, name)", + "legendFormat": "{{name}} {{instance}}" + } + ], + "title": "Work Queue Retries Rate", + "type": "timeseries" + } + ], + "schemaVersion": 39, + "templating": { + "list": [ + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "name": "namespace", + "query": "label_values(controller_runtime_reconcile_total, namespace)", + "refresh": 2, + "type": "query" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "name": "job", + "query": "label_values(controller_runtime_reconcile_total{namespace=~\"$namespace\"}, job)", + "refresh": 2, + "type": "query" + } + ] + }, + "time": { + "from": "now-1h", + "to": "now" + }, + "timezone": "utc", + "title": "Fleet / Controller-Runtime", + "uid": "fleet-controller-runtime" +} diff --git a/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/files/rancher/fleet/gitrepo.json b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/files/rancher/fleet/gitrepo.json new file mode 100644 index 0000000000..1a50c2937d --- /dev/null +++ b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/files/rancher/fleet/gitrepo.json @@ -0,0 +1,325 @@ +{ + "description": "GitRepo", + "graphTooltip": 1, + "panels": [ + { + "datasource": { + "type": "datasource", + "uid": "-- Mixed --" + }, + "fieldConfig": { + "defaults": { + "decimals": 0, + "unit": "percentunit" + } + }, + "gridPos": { + "h": 5, + "w": 7, + "x": 0, + "y": 0 + }, + "id": 1, + "pluginVersion": "v11.0.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "expr": "sum(fleet_gitrepo_ready_clusters{exported_namespace=\"$namespace\",name=~\"$name\"}) / sum(fleet_gitrepo_desired_ready_clusters{exported_namespace=\"$namespace\",name=~\"$name\"})" + } + ], + "title": "Ready Clusters", + "type": "stat" + }, + { + "datasource": { + "type": "datasource", + "uid": "-- Mixed --" + }, + "fieldConfig": { + "defaults": { + "decimals": 0, + "unit": null + } + }, + "gridPos": { + "h": 5, + "w": 17, + "x": 7, + "y": 0 + }, + "id": 2, + "pluginVersion": "v11.0.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "expr": "sum(fleet_gitrepo_desired_ready_clusters{exported_namespace=\"$namespace\",name=~\"$name\"})", + "legendFormat": "Desired Ready" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "expr": "sum(fleet_gitrepo_ready_clusters{exported_namespace=\"$namespace\",name=~\"$name\"})", + "legendFormat": "Ready" + } + ], + "title": "Clusters", + "type": "stat" + }, + { + "datasource": { + "type": "datasource", + "uid": "-- Mixed --" + }, + "fieldConfig": { + "defaults": { + "decimals": 0, + "unit": null + } + }, + "gridPos": { + "h": 8, + "w": 24, + "x": 0, + "y": 8 + }, + "id": 3, + "pluginVersion": "v11.0.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "expr": "sum(fleet_gitrepo_desired_ready_clusters{exported_namespace=\"$namespace\",name=~\"$name\"})", + "legendFormat": "Desired Ready" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "expr": "sum(fleet_gitrepo_ready_clusters{exported_namespace=\"$namespace\",name=~\"$name\"})", + "legendFormat": "Ready" + } + ], + "title": "Clusters", + "type": "timeseries" + }, + { + "datasource": { + "type": "datasource", + "uid": "-- Mixed --" + }, + "fieldConfig": { + "defaults": { + "decimals": 0, + "unit": "percentunit" + } + }, + "gridPos": { + "h": 5, + "w": 7, + "x": 0, + "y": 13 + }, + "id": 4, + "pluginVersion": "v11.0.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "expr": "sum(fleet_gitrepo_resources_ready{exported_namespace=\"$namespace\",name=~\"$name\"}) / sum(fleet_gitrepo_resources_desired_ready{exported_namespace=\"$namespace\",name=~\"$name\"})" + } + ], + "title": "Ready Resources", + "type": "stat" + }, + { + "datasource": { + "type": "datasource", + "uid": "-- Mixed --" + }, + "fieldConfig": { + "defaults": { + "decimals": 0, + "unit": null + } + }, + "gridPos": { + "h": 5, + "w": 17, + "x": 7, + "y": 13 + }, + "id": 5, + "pluginVersion": "v11.0.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "expr": "sum(fleet_gitrepo_resources_desired_ready{exported_namespace=\"$namespace\",name=~\"$name\"})", + "legendFormat": "Desired Ready" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "expr": "sum(fleet_gitrepo_resources_ready{exported_namespace=\"$namespace\",name=~\"$name\"})", + "legendFormat": "Ready" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "expr": "sum(fleet_gitrepo_resources_not_ready{exported_namespace=\"$namespace\",name=~\"$name\"})", + "legendFormat": "Not Ready" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "expr": "sum(fleet_gitrepo_resources_missing{exported_namespace=\"$namespace\",name=~\"$name\"})", + "legendFormat": "Missing" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "expr": "sum(fleet_gitrepo_resources_modified{exported_namespace=\"$namespace\",name=~\"$name\"})", + "legendFormat": "Modified" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "expr": "sum(fleet_gitrepo_resources_unknown{exported_namespace=\"$namespace\",name=~\"$name\"})", + "legendFormat": "Unknown" + } + ], + "title": "Resources", + "type": "stat" + }, + { + "datasource": { + "type": "datasource", + "uid": "-- Mixed --" + }, + "fieldConfig": { + "defaults": { + "decimals": 0, + "unit": null + } + }, + "gridPos": { + "h": 8, + "w": 24, + "x": 0, + "y": 21 + }, + "id": 6, + "pluginVersion": "v11.0.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "expr": "sum(fleet_gitrepo_resources_desired_ready{exported_namespace=\"$namespace\",name=~\"$name\"})", + "legendFormat": "Desired Ready" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "expr": "sum(fleet_gitrepo_resources_ready{exported_namespace=\"$namespace\",name=~\"$name\"})", + "legendFormat": "Ready" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "expr": "sum(fleet_gitrepo_resources_not_ready{exported_namespace=\"$namespace\",name=~\"$name\"})", + "legendFormat": "Not Ready" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "expr": "sum(fleet_gitrepo_resources_missing{exported_namespace=\"$namespace\",name=~\"$name\"})", + "legendFormat": "Missing" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "expr": "sum(fleet_gitrepo_resources_modified{exported_namespace=\"$namespace\",name=~\"$name\"})", + "legendFormat": "Modified" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "expr": "sum(fleet_gitrepo_resources_unknown{exported_namespace=\"$namespace\",name=~\"$name\"})", + "legendFormat": "Unknown" + } + ], + "title": "Resources", + "type": "timeseries" + } + ], + "schemaVersion": 39, + "templating": { + "list": [ + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "name": "namespace", + "query": "label_values(fleet_gitrepo_desired_ready_clusters, exported_namespace)", + "refresh": 2, + "type": "query" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "includeAll": true, + "name": "name", + "query": "label_values(fleet_gitrepo_desired_ready_clusters{exported_namespace=~\"$namespace\"}, name)", + "refresh": 2, + "type": "query" + } + ] + }, + "time": { + "from": "now-1h", + "to": "now" + }, + "timezone": "utc", + "title": "Fleet / GitRepo", + "uid": "fleet-gitrepo" +} diff --git a/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/files/rancher/home/rancher-default-home.json b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/files/rancher/home/rancher-default-home.json new file mode 100644 index 0000000000..3fce207561 --- /dev/null +++ b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/files/rancher/home/rancher-default-home.json @@ -0,0 +1,1290 @@ +{ + "annotations": { + "list": [] + }, + "editable": false, + "gnetId": null, + "graphTooltip": 0, + "id": null, + "links": [], + "panels": [ + { + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": {} + }, + "overrides": [] + }, + "gridPos": { + "h": 3, + "w": 24, + "x": 0, + "y": 0 + }, + "id": 1, + "title": "", + "type": "welcome" + }, + { + "cacheTimeout": null, + "colorBackground": false, + "colorValue": true, + "colors": [ + "rgba(50, 172, 45, 0.97)", + "rgba(237, 129, 40, 0.89)", + "rgba(245, 54, 54, 0.9)" + ], + "datasource": "Prometheus", + "decimals": 2, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "custom": {} + }, + "overrides": [] + }, + "format": "percent", + "gauge": { + "maxValue": 100, + "minValue": 0, + "show": true, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "gridPos": { + "h": 5, + "w": 8, + "x": 0, + "y": 4 + }, + "height": "180px", + "id": 6, + "interval": null, + "isNew": true, + "links": [], + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": 100, + "nullPointMode": "connected", + "nullText": null, + "postfix": "", + "postfixFontSize": "50%", + "prefix": "", + "prefixFontSize": "50%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": false, + "lineColor": "rgb(31, 120, 193)", + "show": false + }, + "tableColumn": "", + "targets": [ + { + "expr": "(1 - (avg(irate({__name__=~\"node_cpu_seconds_total|windows_cpu_time_total\",mode=\"idle\"}[5m])))) * 100", + "format": "time_series", + "interval": "10s", + "intervalFactor": 1, + "refId": "A", + "step": 10 + } + ], + "thresholds": "65, 90", + "title": "CPU Utilization", + "type": "singlestat", + "valueFontSize": "80%", + "valueMaps": [ + { + "op": "=", + "text": "0", + "value": "null" + } + ], + "valueName": "current" + }, + { + "cacheTimeout": null, + "colorBackground": false, + "colorValue": true, + "colors": [ + "rgba(50, 172, 45, 0.97)", + "rgba(237, 129, 40, 0.89)", + "rgba(245, 54, 54, 0.9)" + ], + "datasource": "Prometheus", + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "custom": {} + }, + "overrides": [] + }, + "format": "percent", + "gauge": { + "maxValue": 100, + "minValue": 0, + "show": true, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "gridPos": { + "h": 5, + "w": 8, + "x": 8, + "y": 4 + }, + "height": "180px", + "id": 4, + "interval": null, + "isNew": true, + "links": [], + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": 100, + "nullPointMode": "connected", + "nullText": null, + "postfix": "", + "postfixFontSize": "50%", + "prefix": "", + "prefixFontSize": "50%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": false, + "lineColor": "rgb(31, 120, 193)", + "show": false + }, + "tableColumn": "", + "targets": [ + { + "expr": "(1 - sum({__name__=~\"node_memory_MemAvailable_bytes|windows_os_physical_memory_free_bytes\"}) / sum({__name__=~\"node_memory_MemTotal_bytes|windows_cs_physical_memory_bytes\"})) * 100", + "format": "time_series", + "interval": "10s", + "intervalFactor": 1, + "refId": "A", + "step": 10 + } + ], + "thresholds": "65, 90", + "title": "Memory Utilization", + "type": "singlestat", + "valueFontSize": "80%", + "valueMaps": [ + { + "op": "=", + "text": "0", + "value": "null" + } + ], + "valueName": "current" + }, + { + "cacheTimeout": null, + "colorBackground": false, + "colorValue": true, + "colors": [ + "rgba(50, 172, 45, 0.97)", + "rgba(237, 129, 40, 0.89)", + "rgba(245, 54, 54, 0.9)" + ], + "datasource": "Prometheus", + "decimals": 2, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "custom": {} + }, + "overrides": [] + }, + "format": "percent", + "gauge": { + "maxValue": 100, + "minValue": 0, + "show": true, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "gridPos": { + "h": 5, + "w": 8, + "x": 16, + "y": 4 + }, + "height": "180px", + "id": 7, + "interval": null, + "isNew": true, + "links": [], + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": 100, + "nullPointMode": "connected", + "nullText": null, + "postfix": "", + "postfixFontSize": "50%", + "prefix": "", + "prefixFontSize": "50%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": false, + "lineColor": "rgb(31, 120, 193)", + "show": false + }, + "tableColumn": "", + "targets": [ + { + "expr": "(1 - (((sum(node_filesystem_free_bytes{device!~\"rootfs|HarddiskVolume.+\"}) OR on() vector(0)) + (sum(windows_logical_disk_free_bytes{volume!~\"(HarddiskVolume.+|[A-Z]:.+)\"}) OR on() vector(0))) / ((sum(node_filesystem_size_bytes{device!~\"rootfs|HarddiskVolume.+\"}) OR on() vector(0)) + (sum(windows_logical_disk_size_bytes{volume!~\"(HarddiskVolume.+|[A-Z]:.+)\"}) OR on() vector(0))))) * 100", + "format": "time_series", + "interval": "10s", + "intervalFactor": 1, + "metric": "", + "refId": "A", + "step": 10 + } + ], + "thresholds": "65, 90", + "title": "Disk Utilization", + "type": "singlestat", + "valueFontSize": "80%", + "valueMaps": [ + { + "op": "=", + "text": "0", + "value": "null" + } + ], + "valueName": "current" + }, + { + "cacheTimeout": null, + "colorBackground": false, + "colorValue": false, + "colors": [ + "rgba(50, 172, 45, 0.97)", + "rgba(237, 129, 40, 0.89)", + "rgba(245, 54, 54, 0.9)" + ], + "datasource": "Prometheus", + "decimals": 2, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "custom": {} + }, + "overrides": [] + }, + "format": "none", + "gauge": { + "maxValue": 100, + "minValue": 0, + "show": false, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "gridPos": { + "h": 3, + "w": 4, + "x": 0, + "y": 9 + }, + "height": "1px", + "id": 11, + "interval": null, + "isNew": true, + "links": [], + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": 100, + "nullPointMode": "connected", + "nullText": null, + "postfix": " cores", + "postfixFontSize": "30%", + "prefix": "", + "prefixFontSize": "50%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": false, + "lineColor": "rgb(31, 120, 193)", + "show": false + }, + "tableColumn": "", + "targets": [ + { + "expr": "sum(irate({__name__=~\"node_cpu_seconds_total|windows_cpu_time_total\",mode!=\"idle\"}[5m]))", + "format": "time_series", + "interval": "10s", + "intervalFactor": 1, + "refId": "A", + "step": 10 + } + ], + "thresholds": "", + "title": "CPU Used", + "type": "singlestat", + "valueFontSize": "50%", + "valueMaps": [ + { + "op": "=", + "text": "0", + "value": "null" + } + ], + "valueName": "current" + }, + { + "cacheTimeout": null, + "colorBackground": false, + "colorValue": false, + "colors": [ + "rgba(50, 172, 45, 0.97)", + "rgba(237, 129, 40, 0.89)", + "rgba(245, 54, 54, 0.9)" + ], + "datasource": "Prometheus", + "decimals": 2, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "custom": {} + }, + "overrides": [] + }, + "format": "none", + "gauge": { + "maxValue": 100, + "minValue": 0, + "show": false, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "gridPos": { + "h": 3, + "w": 4, + "x": 4, + "y": 9 + }, + "height": "1px", + "id": 12, + "interval": null, + "isNew": true, + "links": [], + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": 100, + "nullPointMode": "connected", + "nullText": null, + "postfix": " cores", + "postfixFontSize": "30%", + "prefix": "", + "prefixFontSize": "50%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": false, + "lineColor": "rgb(31, 120, 193)", + "show": false + }, + "tableColumn": "", + "targets": [ + { + "expr": "sum(kube_node_status_allocatable_cpu_cores{}) OR sum(kube_node_status_allocatable{resource=\"cpu\",unit=\"core\"})", + "interval": "10s", + "intervalFactor": 1, + "refId": "A", + "step": 10 + } + ], + "thresholds": "", + "title": "CPU Total", + "type": "singlestat", + "valueFontSize": "50%", + "valueMaps": [ + { + "op": "=", + "text": "0", + "value": "null" + } + ], + "valueName": "current" + }, + { + "cacheTimeout": null, + "colorBackground": false, + "colorValue": false, + "colors": [ + "rgba(50, 172, 45, 0.97)", + "rgba(237, 129, 40, 0.89)", + "rgba(245, 54, 54, 0.9)" + ], + "datasource": "Prometheus", + "decimals": 2, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "custom": {} + }, + "overrides": [] + }, + "format": "bytes", + "gauge": { + "maxValue": 100, + "minValue": 0, + "show": false, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "gridPos": { + "h": 3, + "w": 4, + "x": 8, + "y": 9 + }, + "height": "1px", + "id": 9, + "interval": null, + "isNew": true, + "links": [], + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": 100, + "nullPointMode": "connected", + "nullText": null, + "postfix": "", + "postfixFontSize": "20%", + "prefix": "", + "prefixFontSize": "20%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": false, + "lineColor": "rgb(31, 120, 193)", + "show": false + }, + "tableColumn": "", + "targets": [ + { + "expr": "sum({__name__=~\"node_memory_MemTotal_bytes|windows_cs_physical_memory_bytes\"}) - sum({__name__=~\"node_memory_MemAvailable_bytes|windows_os_physical_memory_free_bytes\"})", + "interval": "10s", + "intervalFactor": 1, + "refId": "A", + "step": 10 + } + ], + "thresholds": "", + "title": "Memory Used", + "type": "singlestat", + "valueFontSize": "50%", + "valueMaps": [ + { + "op": "=", + "text": "0", + "value": "null" + } + ], + "valueName": "current" + }, + { + "cacheTimeout": null, + "colorBackground": false, + "colorValue": false, + "colors": [ + "rgba(50, 172, 45, 0.97)", + "rgba(237, 129, 40, 0.89)", + "rgba(245, 54, 54, 0.9)" + ], + "datasource": "Prometheus", + "decimals": 2, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "custom": {} + }, + "overrides": [] + }, + "format": "bytes", + "gauge": { + "maxValue": 100, + "minValue": 0, + "show": false, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "gridPos": { + "h": 3, + "w": 4, + "x": 12, + "y": 9 + }, + "height": "1px", + "id": 10, + "interval": null, + "isNew": true, + "links": [], + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": 100, + "nullPointMode": "connected", + "nullText": null, + "postfix": "", + "postfixFontSize": "50%", + "prefix": "", + "prefixFontSize": "50%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": false, + "lineColor": "rgb(31, 120, 193)", + "show": false + }, + "tableColumn": "", + "targets": [ + { + "expr": "sum(kube_node_status_allocatable_memory_bytes{}) OR sum(kube_node_status_allocatable{resource=\"memory\", unit=\"byte\"})", + "interval": "10s", + "intervalFactor": 1, + "refId": "A", + "step": 10 + } + ], + "thresholds": "", + "title": "Memory Total", + "type": "singlestat", + "valueFontSize": "50%", + "valueMaps": [ + { + "op": "=", + "text": "0", + "value": "null" + } + ], + "valueName": "current" + }, + { + "cacheTimeout": null, + "colorBackground": false, + "colorValue": false, + "colors": [ + "rgba(50, 172, 45, 0.97)", + "rgba(237, 129, 40, 0.89)", + "rgba(245, 54, 54, 0.9)" + ], + "datasource": "Prometheus", + "decimals": 2, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "custom": {} + }, + "overrides": [] + }, + "format": "bytes", + "gauge": { + "maxValue": 100, + "minValue": 0, + "show": false, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "gridPos": { + "h": 3, + "w": 4, + "x": 16, + "y": 9 + }, + "height": "1px", + "id": 13, + "interval": null, + "isNew": true, + "links": [], + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": 100, + "nullPointMode": "connected", + "nullText": null, + "postfix": "", + "postfixFontSize": "50%", + "prefix": "", + "prefixFontSize": "50%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": false, + "lineColor": "rgb(31, 120, 193)", + "show": false + }, + "tableColumn": "", + "targets": [ + { + "expr": "(sum(node_filesystem_size_bytes{device!~\"rootfs|HarddiskVolume.+\"}) - sum(node_filesystem_free_bytes{device!~\"rootfs|HarddiskVolume.+\"}) OR on() vector(0)) + (sum(windows_logical_disk_size_bytes{volume!~\"(HarddiskVolume.+|[A-Z]:.+)\"}) - sum(windows_logical_disk_free_bytes{volume!~\"(HarddiskVolume.+|[A-Z]:.+)\"}) OR on() vector(0))", + "interval": "10s", + "intervalFactor": 1, + "refId": "A", + "step": 10 + } + ], + "thresholds": "", + "title": "Disk Used", + "type": "singlestat", + "valueFontSize": "50%", + "valueMaps": [ + { + "op": "=", + "text": "0", + "value": "null" + } + ], + "valueName": "current" + }, + { + "cacheTimeout": null, + "colorBackground": false, + "colorValue": false, + "colors": [ + "rgba(50, 172, 45, 0.97)", + "rgba(237, 129, 40, 0.89)", + "rgba(245, 54, 54, 0.9)" + ], + "datasource": "Prometheus", + "decimals": 2, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "custom": {} + }, + "overrides": [] + }, + "format": "bytes", + "gauge": { + "maxValue": 100, + "minValue": 0, + "show": false, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "gridPos": { + "h": 3, + "w": 4, + "x": 20, + "y": 9 + }, + "height": "1px", + "id": 14, + "interval": null, + "isNew": true, + "links": [], + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": 100, + "nullPointMode": "connected", + "nullText": null, + "postfix": "", + "postfixFontSize": "50%", + "prefix": "", + "prefixFontSize": "50%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": false, + "lineColor": "rgb(31, 120, 193)", + "show": false + }, + "tableColumn": "", + "targets": [ + { + "expr": "(sum(node_filesystem_size_bytes{device!~\"rootfs|HarddiskVolume.+\"}) OR on() vector(0)) + (sum(windows_logical_disk_size_bytes{volume!~\"(HarddiskVolume.+|[A-Z]:.+)\"}) OR on() vector(0))", + "interval": "10s", + "intervalFactor": 1, + "refId": "A", + "step": 10 + } + ], + "thresholds": "", + "title": "Disk Total", + "type": "singlestat", + "valueFontSize": "50%", + "valueMaps": [ + { + "op": "=", + "text": "0", + "value": "null" + } + ], + "valueName": "current" + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "Prometheus", + "fieldConfig": { + "defaults": { + "custom": {}, + "links": [] + }, + "overrides": [] + }, + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 6, + "w": 8, + "x": 0, + "y": 12 + }, + "hiddenSeries": false, + "id": 2051, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "percentage": false, + "pluginVersion": "7.1.5", + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "1 - (avg(irate({__name__=~\"node_cpu_seconds_total|windows_cpu_time_total\",mode=\"idle\"}[$__rate_interval])))", + "format": "time_series", + "hide": false, + "instant": false, + "intervalFactor": 1, + "legendFormat": "Cluster", + "refId": "A" + }, + { + "expr": "1 - avg(irate({__name__=~\"node_cpu_seconds_total|windows_cpu_time_total\", mode=\"idle\"}[$__rate_interval])) by (instance)", + "format": "time_series", + "hide": false, + "intervalFactor": 1, + "legendFormat": "{{ instance }}", + "refId": "B" + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "CPU Usage", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "percentunit", + "label": "", + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": "", + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "Prometheus", + "fieldConfig": { + "defaults": { + "custom": {}, + "links": [] + }, + "overrides": [] + }, + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 6, + "w": 8, + "x": 8, + "y": 12 + }, + "hiddenSeries": false, + "id": 2052, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "percentage": false, + "pluginVersion": "7.1.5", + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "100 * (1 - sum({__name__=~\"node_memory_MemAvailable_bytes|windows_os_physical_memory_free_bytes\"}) / sum({__name__=~\"node_memory_MemTotal_bytes|windows_cs_physical_memory_bytes\"}))", + "format": "time_series", + "hide": false, + "instant": false, + "intervalFactor": 1, + "legendFormat": "Cluster", + "refId": "A" + }, + { + "expr": "100 * (1- sum({__name__=~\"node_memory_MemAvailable_bytes|windows_os_physical_memory_free_bytes\"}) by (instance) / sum({__name__=~\"node_memory_MemTotal_bytes|windows_cs_physical_memory_bytes\"}) by (instance))", + "format": "time_series", + "hide": false, + "intervalFactor": 1, + "legendFormat": "{{ instance }}", + "refId": "B" + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Memory Usage", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "percent", + "label": "", + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": "", + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "Prometheus", + "fieldConfig": { + "defaults": { + "custom": {}, + "links": [] + }, + "overrides": [] + }, + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 6, + "w": 8, + "x": 16, + "y": 12 + }, + "hiddenSeries": false, + "id": 2053, + "legend": { + "alignAsTable": false, + "avg": false, + "current": false, + "max": false, + "min": false, + "rightSide": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "percentage": false, + "pluginVersion": "7.1.5", + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "(1 - ((sum(node_filesystem_free_bytes{device!~\"rootfs|HarddiskVolume.+\"}) OR on() vector(0)) + (sum(windows_logical_disk_free_bytes{volume!~\"(HarddiskVolume.+|[A-Z]:.+)\"} OR on() vector(0)))) / ((sum(node_filesystem_size_bytes{device!~\"rootfs|HarddiskVolume.+\"}) OR on() vector(0)) + (sum(windows_logical_disk_size_bytes{volume!~\"(HarddiskVolume.+|[A-Z]:.+)\"}) OR on() vector(0)))) * 100", + "legendFormat": "Cluster", + "refId": "A" + }, + { + "expr": "(1 - (sum(node_filesystem_free_bytes{device!~\"rootfs|HarddiskVolume.+\"}) by (instance)) / sum(node_filesystem_size_bytes{device!~\"rootfs|HarddiskVolume.+\"}) by (instance)) * 100", + "hide": false, + "legendFormat": "{{ instance }}", + "refId": "B" + }, + { + "expr": "(1 - (sum(windows_logical_disk_free_bytes{volume!~\"(HarddiskVolume.+|[A-Z]:.+)\"}) by (instance)) / sum(windows_logical_disk_size_bytes{volume!~\"(HarddiskVolume.+|[A-Z]:.+)\"}) by (instance)) * 100", + "hide": false, + "legendFormat": "{{ instance }}", + "refId": "C" + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Disk Usage", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "decimals": null, + "format": "percent", + "label": "", + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": "", + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": {} + }, + "overrides": [] + }, + "folderId": 0, + "gridPos": { + "h": 15, + "w": 12, + "x": 0, + "y": 18 + }, + "headings": true, + "id": 3, + "limit": 30, + "links": [], + "query": "", + "recent": true, + "search": true, + "starred": false, + "tags": [], + "title": "Dashboards", + "type": "dashlist" + }, + { + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": {} + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 18 + }, + "id": 2055, + "options": { + "content": "## About Rancher Monitoring\n\nRancher Monitoring is a Helm chart developed by Rancher that is powered by [Prometheus Operator](https://github.com/prometheus-operator/prometheus-operator). It is based on the upstream [kube-prometheus-stack](https://github.com/prometheus-community/helm-charts/tree/main/charts/kube-prometheus-stack) Helm chart maintained by the Prometheus community.\n\nBy default, the chart deploys Grafana alongside a set of Grafana dashboards curated by the [kube-prometheus](https://github.com/prometheus-operator/kube-prometheus) project.\n\nFor more information on how Rancher Monitoring differs from [kube-prometheus-stack](https://github.com/prometheus-community/helm-charts/tree/main/charts/kube-prometheus-stack), please view the CHANGELOG.md of the rancher-monitoring chart located in the [rancher/charts](https://github.com/rancher/charts) repository.\n\nFor more information about how to configure Rancher Monitoring, please view the [Rancher docs](https://rancher.com/docs/rancher/v2.x/en/).\n\n", + "mode": "markdown" + }, + "pluginVersion": "7.1.0", + "timeFrom": null, + "timeShift": null, + "title": "", + "type": "text" + } + ], + "schemaVersion": 26, + "style": "dark", + "tags": [], + "templating": { + "list": [ + { + "current": { + "text": "Prometheus", + "value": "Prometheus" + }, + "hide": 0, + "label": "Data Source", + "name": "datasource", + "options": [ + + ], + "query": "prometheus", + "refresh": 1, + "regex": "", + "type": "datasource" + } + ] + }, + "time": { + "from": "now-1h", + "to": "now" + }, + "timepicker": { + "hidden": true, + "refresh_intervals": [ + "5s", + "10s", + "30s", + "1m", + "5m", + "15m", + "30m", + "1h", + "2h", + "1d" + ], + "time_options": [ + "5m", + "15m", + "1h", + "6h", + "12h", + "24h", + "2d", + "7d", + "30d" + ], + "type": "timepicker" + }, + "timezone": "browser", + "title": "Home", + "uid": "rancher-home-1", + "version": 5 +} diff --git a/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/files/rancher/k8s/rancher-etcd-nodes.json b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/files/rancher/k8s/rancher-etcd-nodes.json new file mode 100644 index 0000000000..8af4b81ce0 --- /dev/null +++ b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/files/rancher/k8s/rancher-etcd-nodes.json @@ -0,0 +1,687 @@ +{ + "annotations": { + "list": [ + { + "builtIn": 1, + "datasource": "-- Grafana --", + "enable": true, + "hide": true, + "iconColor": "rgba(0, 211, 255, 1)", + "name": "Annotations & Alerts", + "type": "dashboard" + } + ] + }, + "editable": true, + "gnetId": null, + "graphTooltip": 0, + "id": 32, + "links": [], + "panels": [ + { + "aliasColors": { + "{{instance}}": "#3797d5" + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "Prometheus", + "fieldConfig": { + "defaults": { + "custom": {} + }, + "overrides": [] + }, + "fill": 0, + "fillGradient": 0, + "gridPos": { + "h": 7, + "w": 8, + "x": 0, + "y": 0 + }, + "hiddenSeries": false, + "id": 2, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": false, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "nullPointMode": "null", + "percentage": false, + "pluginVersion": "7.1.5", + "pointradius": 2, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum(rate(etcd_network_client_grpc_received_bytes_total{job=\"kube-etcd\"}[$__rate_interval])) by (instance)", + "interval": "", + "legendFormat": "Client Traffic In ({{instance}})", + "refId": "A" + }, + { + "expr": "sum(rate(etcd_network_client_grpc_sent_bytes_total{job=\"kube-etcd\"}[$__rate_interval])) by (instance)", + "interval": "", + "legendFormat": "Client Traffic Out ({{instance}})", + "refId": "B" + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "GRPC Client Traffic", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "decimals": null, + "format": "Bps", + "label": null, + "logBase": 1, + "max": null, + "min": "0", + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": { + "{{instance}}": "#3797d5" + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": {} + }, + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "Load[5m]({{instance}})" + }, + "properties": [] + } + ] + }, + "fill": 0, + "fillGradient": 0, + "gridPos": { + "h": 7, + "w": 8, + "x": 8, + "y": 0 + }, + "hiddenSeries": false, + "id": 3, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": false, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "nullPointMode": "null", + "percentage": false, + "pluginVersion": "7.1.5", + "pointradius": 2, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum(etcd_mvcc_db_total_size_in_bytes) by (instance)", + "interval": "", + "legendFormat": "DB Size ({{instance}})", + "refId": "A" + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "DB Size", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "bytes", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": { + "{{instance}}": "#3797d5" + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": {} + }, + "overrides": [] + }, + "fill": 0, + "fillGradient": 0, + "gridPos": { + "h": 7, + "w": 8, + "x": 16, + "y": 0 + }, + "hiddenSeries": false, + "id": 4, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": false, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "nullPointMode": "null", + "percentage": false, + "pluginVersion": "7.1.5", + "pointradius": 2, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum(grpc_server_started_total{grpc_service=\"etcdserverpb.Watch\",grpc_type=\"bidi_stream\"}) by (instance) - sum(grpc_server_handled_total{grpc_service=\"etcdserverpb.Watch\",grpc_type=\"bidi_stream\"}) by (instance)", + "interval": "", + "legendFormat": "Watch Streams ({{instance}})", + "refId": "A" + }, + { + "expr": "sum(grpc_server_started_total{grpc_service=\"etcdserverpb.Lease\",grpc_type=\"bidi_stream\"}) by (instance) - sum(grpc_server_handled_total{grpc_service=\"etcdserverpb.Lease\",grpc_type=\"bidi_stream\"}) by (instance)", + "interval": "", + "legendFormat": "Lease Watch Stream ({{instance}})", + "refId": "B" + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Active Streams", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "decimals": 1, + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": { + "{{instance}}": "#3797d5" + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": {} + }, + "overrides": [] + }, + "fill": 0, + "fillGradient": 0, + "gridPos": { + "h": 7, + "w": 8, + "x": 0, + "y": 7 + }, + "hiddenSeries": false, + "id": 5, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": false, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "nullPointMode": "null", + "percentage": false, + "pluginVersion": "7.1.5", + "pointradius": 2, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum(rate(etcd_server_proposals_committed_total[$__rate_interval])) by (instance)", + "interval": "", + "legendFormat": "Proposal Committed ({{instance}})", + "refId": "A" + }, + { + "expr": "sum(rate(etcd_server_proposals_applied_total[$__rate_interval])) by (instance)", + "interval": "", + "legendFormat": "Proposal Applied ({{instance}})", + "refId": "B" + }, + { + "expr": "sum(rate(etcd_server_proposals_failed_total[$__rate_interval])) by (instance)", + "interval": "", + "legendFormat": "Proposal Failed ({{instance}})", + "refId": "C" + }, + { + "expr": "sum(etcd_server_proposals_pending) by (instance)", + "interval": "", + "legendFormat": "Proposal Pending ({{instance}})", + "refId": "D" + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Raft Proposals", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "decimals": 1, + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": { + "{{instance}}": "#3797d5" + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": {} + }, + "overrides": [] + }, + "fill": 0, + "fillGradient": 0, + "gridPos": { + "h": 7, + "w": 8, + "x": 8, + "y": 7 + }, + "hiddenSeries": false, + "id": 6, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": false, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "nullPointMode": "null", + "percentage": false, + "pluginVersion": "7.1.5", + "pointradius": 2, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum(rate(grpc_server_started_total{grpc_type=\"unary\"}[$__rate_interval])) by (instance)", + "interval": "", + "legendFormat": "RPC Rate ({{instance}})", + "refId": "A" + }, + { + "expr": "sum(rate(grpc_server_handled_total{grpc_type=\"unary\",grpc_code!=\"OK\"}[$__rate_interval])) by (instance)", + "interval": "", + "legendFormat": "RPC Failure Rate ({{instance}})", + "refId": "B" + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "RPC Rate", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "decimals": 0, + "format": "ops", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "decimals": null, + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": { + "{{instance}}": "#3797d5" + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": {} + }, + "overrides": [] + }, + "fill": 0, + "fillGradient": 0, + "gridPos": { + "h": 7, + "w": 8, + "x": 16, + "y": 7 + }, + "hiddenSeries": false, + "id": 7, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": false, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "nullPointMode": "null", + "percentage": false, + "pluginVersion": "7.1.5", + "pointradius": 2, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "histogram_quantile(0.99, sum(rate(etcd_disk_wal_fsync_duration_seconds_bucket[$__rate_interval])) by (instance, le))", + "interval": "", + "legendFormat": "WAL fsync ({{instance}})", + "refId": "A" + }, + { + "expr": "histogram_quantile(0.99, sum(rate(etcd_disk_backend_commit_duration_seconds_bucket[$__rate_interval])) by (instance, le))", + "interval": "", + "legendFormat": "DB fsync ({{instance}})", + "refId": "B" + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Disk Sync Duration", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "decimals": 2, + "format": "s", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + } + ], + "schemaVersion": 26, + "style": "dark", + "tags": [], + "templating": { + "list": [ + { + "current": { + "text": "Prometheus", + "value": "Prometheus" + }, + "hide": 0, + "label": "Data Source", + "name": "datasource", + "options": [ + + ], + "query": "prometheus", + "refresh": 1, + "regex": "", + "type": "datasource" + } + ] + }, + "time": { + "from": "now-1h", + "to": "now" + }, + "timepicker": { + "refresh_intervals": [ + "5s", + "10s", + "30s", + "1m", + "5m", + "15m", + "30m", + "1h", + "2h", + "1d" + ] + }, + "timezone": "", + "title": "Rancher / etcd (Nodes)", + "uid": "rancher-etcd-nodes-1", + "version": 5 +} diff --git a/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/files/rancher/k8s/rancher-etcd.json b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/files/rancher/k8s/rancher-etcd.json new file mode 100644 index 0000000000..0c058cafb9 --- /dev/null +++ b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/files/rancher/k8s/rancher-etcd.json @@ -0,0 +1,669 @@ +{ + "annotations": { + "list": [ + { + "builtIn": 1, + "datasource": "-- Grafana --", + "enable": true, + "hide": true, + "iconColor": "rgba(0, 211, 255, 1)", + "name": "Annotations & Alerts", + "type": "dashboard" + } + ] + }, + "editable": true, + "gnetId": null, + "graphTooltip": 0, + "id": 33, + "links": [], + "panels": [ + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "Prometheus", + "fieldConfig": { + "defaults": { + "custom": {} + }, + "overrides": [] + }, + "fill": 0, + "fillGradient": 0, + "gridPos": { + "h": 7, + "w": 8, + "x": 0, + "y": 0 + }, + "hiddenSeries": false, + "id": 2, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": false, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "nullPointMode": "null", + "percentage": false, + "pluginVersion": "7.1.5", + "pointradius": 2, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum(rate(etcd_network_client_grpc_received_bytes_total{job=\"kube-etcd\"}[$__rate_interval]))", + "interval": "", + "legendFormat": "Client Traffic In", + "refId": "A" + }, + { + "expr": "sum(rate(etcd_network_client_grpc_sent_bytes_total{job=\"kube-etcd\"}[$__rate_interval]))", + "interval": "", + "legendFormat": "Client Traffic Out", + "refId": "B" + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "GRPC Client Traffic", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "decimals": null, + "format": "Bps", + "label": null, + "logBase": 1, + "max": null, + "min": "0", + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": {} + }, + "overrides": [ + { + "properties": [] + } + ] + }, + "fill": 0, + "fillGradient": 0, + "gridPos": { + "h": 7, + "w": 8, + "x": 8, + "y": 0 + }, + "hiddenSeries": false, + "id": 3, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": false, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "nullPointMode": "null", + "percentage": false, + "pluginVersion": "7.1.5", + "pointradius": 2, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum(etcd_mvcc_db_total_size_in_bytes)", + "interval": "", + "legendFormat": "DB Size", + "refId": "A" + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "DB Size", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "bytes", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": {} + }, + "overrides": [] + }, + "fill": 0, + "fillGradient": 0, + "gridPos": { + "h": 7, + "w": 8, + "x": 16, + "y": 0 + }, + "hiddenSeries": false, + "id": 4, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": false, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "nullPointMode": "null", + "percentage": false, + "pluginVersion": "7.1.5", + "pointradius": 2, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum(grpc_server_started_total{grpc_service=\"etcdserverpb.Watch\",grpc_type=\"bidi_stream\"}) - sum(grpc_server_handled_total{grpc_service=\"etcdserverpb.Watch\",grpc_type=\"bidi_stream\"})", + "interval": "", + "legendFormat": "Watch Streams", + "refId": "A" + }, + { + "expr": "sum(grpc_server_started_total{grpc_service=\"etcdserverpb.Lease\",grpc_type=\"bidi_stream\"}) - sum(grpc_server_handled_total{grpc_service=\"etcdserverpb.Lease\",grpc_type=\"bidi_stream\"})", + "interval": "", + "legendFormat": "Lease Watch Stream", + "refId": "B" + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Active Streams", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "decimals": 1, + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": {} + }, + "overrides": [] + }, + "fill": 0, + "fillGradient": 0, + "gridPos": { + "h": 7, + "w": 8, + "x": 0, + "y": 7 + }, + "hiddenSeries": false, + "id": 5, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": false, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "nullPointMode": "null", + "percentage": false, + "pluginVersion": "7.1.5", + "pointradius": 2, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum(rate(etcd_server_proposals_committed_total[$__rate_interval]))", + "interval": "", + "legendFormat": "Proposal Committed", + "refId": "A" + }, + { + "expr": "sum(rate(etcd_server_proposals_applied_total[$__rate_interval]))", + "interval": "", + "legendFormat": "Proposal Applied", + "refId": "B" + }, + { + "expr": "sum(rate(etcd_server_proposals_failed_total[$__rate_interval]))", + "interval": "", + "legendFormat": "Proposal Failed", + "refId": "C" + }, + { + "expr": "sum(etcd_server_proposals_pending)", + "interval": "", + "legendFormat": "Proposal Pending", + "refId": "D" + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Raft Proposals", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "decimals": 1, + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": {} + }, + "overrides": [] + }, + "fill": 0, + "fillGradient": 0, + "gridPos": { + "h": 7, + "w": 8, + "x": 8, + "y": 7 + }, + "hiddenSeries": false, + "id": 6, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": false, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "nullPointMode": "null", + "percentage": false, + "pluginVersion": "7.1.5", + "pointradius": 2, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum(rate(grpc_server_started_total{grpc_type=\"unary\"}[$__rate_interval]))", + "interval": "", + "legendFormat": "RPC Rate", + "refId": "A" + }, + { + "expr": "sum(rate(grpc_server_handled_total{grpc_type=\"unary\",grpc_code!=\"OK\"}[$__rate_interval]))", + "interval": "", + "legendFormat": "RPC Failure Rate", + "refId": "B" + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "RPC Rate", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "decimals": 0, + "format": "ops", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "decimals": null, + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": {} + }, + "overrides": [] + }, + "fill": 0, + "fillGradient": 0, + "gridPos": { + "h": 7, + "w": 8, + "x": 16, + "y": 7 + }, + "hiddenSeries": false, + "id": 7, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": false, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "nullPointMode": "null", + "percentage": false, + "pluginVersion": "7.1.5", + "pointradius": 2, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "histogram_quantile(0.99, sum(rate(etcd_disk_wal_fsync_duration_seconds_bucket[$__rate_interval])) by (instance, le))", + "interval": "", + "legendFormat": "WAL fsync", + "refId": "A" + }, + { + "expr": "histogram_quantile(0.99, sum(rate(etcd_disk_backend_commit_duration_seconds_bucket[$__rate_interval])) by (instance, le))", + "interval": "", + "legendFormat": "DB fsync", + "refId": "B" + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Disk Sync Duration", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "decimals": 2, + "format": "s", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + } + ], + "schemaVersion": 26, + "style": "dark", + "tags": [], + "templating": { + "list": [ + { + "current": { + "text": "Prometheus", + "value": "Prometheus" + }, + "hide": 0, + "label": "Data Source", + "name": "datasource", + "options": [ + + ], + "query": "prometheus", + "refresh": 1, + "regex": "", + "type": "datasource" + } + ] + }, + "time": { + "from": "now-1h", + "to": "now" + }, + "timepicker": { + "refresh_intervals": [ + "5s", + "10s", + "30s", + "1m", + "5m", + "15m", + "30m", + "1h", + "2h", + "1d" + ] + }, + "timezone": "", + "title": "Rancher / etcd", + "uid": "rancher-etcd-1", + "version": 4 +} diff --git a/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/files/rancher/k8s/rancher-k8s-components-nodes.json b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/files/rancher/k8s/rancher-k8s-components-nodes.json new file mode 100644 index 0000000000..b31358eaaf --- /dev/null +++ b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/files/rancher/k8s/rancher-k8s-components-nodes.json @@ -0,0 +1,527 @@ +{ + "annotations": { + "list": [ + { + "builtIn": 1, + "datasource": "-- Grafana --", + "enable": true, + "hide": true, + "iconColor": "rgba(0, 211, 255, 1)", + "name": "Annotations & Alerts", + "type": "dashboard" + } + ] + }, + "editable": true, + "gnetId": null, + "graphTooltip": 0, + "id": 30, + "links": [], + "panels": [ + { + "aliasColors": { + "{{instance}}": "#3797d5" + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": {} + }, + "overrides": [] + }, + "fill": 0, + "fillGradient": 0, + "gridPos": { + "h": 7, + "w": 8, + "x": 0, + "y": 0 + }, + "hiddenSeries": false, + "id": 2, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": false, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "nullPointMode": "null", + "percentage": false, + "pluginVersion": "7.1.5", + "pointradius": 2, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum(rate(apiserver_request_total[$__rate_interval])) by (instance, code)", + "interval": "", + "legendFormat": "{{code}}({{instance}})", + "refId": "A" + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "API Server Request Rate", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "decimals": 0, + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": { + "{{instance}}": "#3797d5" + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": {} + }, + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "Load[5m]({{instance}})" + }, + "properties": [] + } + ] + }, + "fill": 0, + "fillGradient": 0, + "gridPos": { + "h": 7, + "w": 8, + "x": 8, + "y": 0 + }, + "hiddenSeries": false, + "id": 3, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": false, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "nullPointMode": "null", + "percentage": false, + "pluginVersion": "7.1.5", + "pointradius": 2, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum(workqueue_depth{component=\"kube-controller-manager\", name=\"deployment\"}) by (instance, name)", + "interval": "", + "legendFormat": "Deployment Depth ({{instance}})", + "refId": "A" + }, + { + "expr": "sum(workqueue_depth{component=\"kube-controller-manager\", name=\"volumes\"}) by (instance, name)", + "interval": "", + "legendFormat": "Volumes Depth ({{instance}})", + "refId": "B" + }, + { + "expr": "sum(workqueue_depth{component=\"kube-controller-manager\", name=\"replicaset\"}) by (instance, name)", + "interval": "", + "legendFormat": "ReplicaSet Depth ({{instance}})", + "refId": "C" + }, + { + "expr": "sum(workqueue_depth{component=\"kube-controller-manager\", name=\"service\"}) by (instance, name)", + "interval": "", + "legendFormat": "Service Depth ({{instance}})", + "refId": "D" + }, + { + "expr": "sum(workqueue_depth{component=\"kube-controller-manager\", name=\"serviceaccount\"}) by (instance, name)", + "interval": "", + "legendFormat": "ServiceAccount Depth ({{instance}})", + "refId": "E" + }, + { + "expr": "sum(workqueue_depth{component=\"kube-controller-manager\", name=\"endpoint\"}) by (instance, name)", + "interval": "", + "legendFormat": "Endpoint Depth ({{instance}})", + "refId": "F" + }, + { + "expr": "sum(workqueue_depth{component=\"kube-controller-manager\", name=\"daemonset\"}) by (instance, name)", + "interval": "", + "legendFormat": "DaemonSet Depth ({{instance}})", + "refId": "G" + }, + { + "expr": "sum(workqueue_depth{component=\"kube-controller-manager\", name=\"statefulset\"}) by (instance, name)", + "interval": "", + "legendFormat": "StatefulSet Depth ({{instance}})", + "refId": "H" + }, + { + "expr": "sum(workqueue_depth{component=\"kube-controller-manager\", name=\"replicationmanager\"}) by (instance, name)", + "interval": "", + "legendFormat": "ReplicationManager Depth ({{instance}})", + "refId": "I" + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Controller Manager Queue Depth", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "decimals": 1, + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": "0", + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": { + "{{instance}}": "#3797d5" + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": {} + }, + "overrides": [] + }, + "fill": 0, + "fillGradient": 0, + "gridPos": { + "h": 7, + "w": 8, + "x": 16, + "y": 0 + }, + "hiddenSeries": false, + "id": 4, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": false, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "nullPointMode": "null", + "percentage": false, + "pluginVersion": "7.1.5", + "pointradius": 2, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum(kube_pod_status_scheduled{condition=\"false\"})", + "interval": "", + "legendFormat": "Failed To Schedule", + "refId": "A" + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Pod Scheduling Status", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "decimals": 1, + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": "0", + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": { + "{{instance}}": "#3797d5" + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": {} + }, + "overrides": [] + }, + "fill": 0, + "fillGradient": 0, + "gridPos": { + "h": 7, + "w": 8, + "x": 0, + "y": 7 + }, + "hiddenSeries": false, + "id": 5, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": false, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "nullPointMode": "null", + "percentage": false, + "pluginVersion": "7.1.5", + "pointradius": 2, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum(nginx_ingress_controller_nginx_process_connections{state=\"reading\"}) by (instance)", + "interval": "", + "legendFormat": "Reading ({{instance}})", + "refId": "A" + }, + { + "expr": "sum(nginx_ingress_controller_nginx_process_connections{state=\"waiting\"}) by (instance)", + "interval": "", + "legendFormat": "Waiting ({{instance}})", + "refId": "B" + }, + { + "expr": "sum(nginx_ingress_controller_nginx_process_connections{state=\"writing\"}) by (instance)", + "interval": "", + "legendFormat": "Writing ({{instance}})", + "refId": "C" + }, + { + "expr": "sum(ceil(increase(nginx_ingress_controller_nginx_process_connections_total{state=\"accepted\"}[$__rate_interval]))) by (instance)", + "interval": "", + "legendFormat": "Accepted ({{instance}})", + "refId": "D" + }, + { + "expr": "sum(ceil(increase(nginx_ingress_controller_nginx_process_connections_total{state=\"handled\"}[$__rate_interval]))) by (instance)", + "interval": "", + "legendFormat": "Handled ({{instance}})", + "refId": "E" + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Ingress Controller Connections", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "decimals": null, + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": "0", + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + } + ], + "schemaVersion": 26, + "style": "dark", + "tags": [], + "templating": { + "list": [ + { + "current": { + "text": "Prometheus", + "value": "Prometheus" + }, + "hide": 0, + "label": "Data Source", + "name": "datasource", + "options": [ + + ], + "query": "prometheus", + "refresh": 1, + "regex": "", + "type": "datasource" + } + ] + }, + "time": { + "from": "now-1h", + "to": "now" + }, + "timepicker": { + "refresh_intervals": [ + "5s", + "10s", + "30s", + "1m", + "5m", + "15m", + "30m", + "1h", + "2h", + "1d" + ] + }, + "timezone": "", + "title": "Rancher / Kubernetes Components (Nodes)", + "uid": "rancher-k8s-components-nodes-1", + "version": 5 +} diff --git a/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/files/rancher/k8s/rancher-k8s-components.json b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/files/rancher/k8s/rancher-k8s-components.json new file mode 100644 index 0000000000..44cf97f9fd --- /dev/null +++ b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/files/rancher/k8s/rancher-k8s-components.json @@ -0,0 +1,519 @@ +{ + "annotations": { + "list": [ + { + "builtIn": 1, + "datasource": "-- Grafana --", + "enable": true, + "hide": true, + "iconColor": "rgba(0, 211, 255, 1)", + "name": "Annotations & Alerts", + "type": "dashboard" + } + ] + }, + "editable": true, + "gnetId": null, + "graphTooltip": 0, + "id": 31, + "links": [], + "panels": [ + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": {} + }, + "overrides": [] + }, + "fill": 0, + "fillGradient": 0, + "gridPos": { + "h": 7, + "w": 8, + "x": 0, + "y": 0 + }, + "hiddenSeries": false, + "id": 2, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": false, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "nullPointMode": "null", + "percentage": false, + "pluginVersion": "7.1.5", + "pointradius": 2, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum(rate(apiserver_request_total[$__rate_interval])) by (code)", + "interval": "", + "legendFormat": "{{code}}", + "refId": "A" + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "API Server Request Rate", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "decimals": 0, + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": {} + }, + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "Load[5m]({{instance}})" + }, + "properties": [] + } + ] + }, + "fill": 0, + "fillGradient": 0, + "gridPos": { + "h": 7, + "w": 8, + "x": 8, + "y": 0 + }, + "hiddenSeries": false, + "id": 3, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": false, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "nullPointMode": "null", + "percentage": false, + "pluginVersion": "7.1.5", + "pointradius": 2, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum(workqueue_depth{component=\"kube-controller-manager\", name=\"deployment\"}) by (name)", + "interval": "", + "legendFormat": "Deployment Depth", + "refId": "A" + }, + { + "expr": "sum(workqueue_depth{component=\"kube-controller-manager\", name=\"volumes\"}) by (name)", + "interval": "", + "legendFormat": "Volumes Depth", + "refId": "B" + }, + { + "expr": "sum(workqueue_depth{component=\"kube-controller-manager\", name=\"replicaset\"}) by (name)", + "interval": "", + "legendFormat": "Replicaset Depth", + "refId": "C" + }, + { + "expr": "sum(workqueue_depth{component=\"kube-controller-manager\", name=\"service\"}) by (name)", + "interval": "", + "legendFormat": "Service Depth", + "refId": "D" + }, + { + "expr": "sum(workqueue_depth{component=\"kube-controller-manager\", name=\"serviceaccount\"}) by (name)", + "interval": "", + "legendFormat": "ServiceAccount Depth", + "refId": "E" + }, + { + "expr": "sum(workqueue_depth{component=\"kube-controller-manager\", name=\"endpoint\"}) by (name)", + "interval": "", + "legendFormat": "Endpoint Depth", + "refId": "F" + }, + { + "expr": "sum(workqueue_depth{component=\"kube-controller-manager\", name=\"daemonset\"}) by (name)", + "interval": "", + "legendFormat": "DaemonSet Depth", + "refId": "G" + }, + { + "expr": "sum(workqueue_depth{component=\"kube-controller-manager\", name=\"statefulset\"}) by (name)", + "interval": "", + "legendFormat": "StatefulSet Depth", + "refId": "H" + }, + { + "expr": "sum(workqueue_depth{component=\"kube-controller-manager\", name=\"replicationmanager\"}) by (name)", + "interval": "", + "legendFormat": "ReplicationManager Depth", + "refId": "I" + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Controller Manager Queue Depth", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "decimals": 1, + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": "0", + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": {} + }, + "overrides": [] + }, + "fill": 0, + "fillGradient": 0, + "gridPos": { + "h": 7, + "w": 8, + "x": 16, + "y": 0 + }, + "hiddenSeries": false, + "id": 4, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": false, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "nullPointMode": "null", + "percentage": false, + "pluginVersion": "7.1.5", + "pointradius": 2, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum(kube_pod_status_scheduled{condition=\"false\"})", + "interval": "", + "legendFormat": "Failed To Schedule", + "refId": "A" + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Pod Scheduling Status", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "decimals": 1, + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": "0", + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": {} + }, + "overrides": [] + }, + "fill": 0, + "fillGradient": 0, + "gridPos": { + "h": 7, + "w": 8, + "x": 0, + "y": 7 + }, + "hiddenSeries": false, + "id": 5, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": false, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "nullPointMode": "null", + "percentage": false, + "pluginVersion": "7.1.5", + "pointradius": 2, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum(nginx_ingress_controller_nginx_process_connections{state=\"reading\"})", + "interval": "", + "legendFormat": "Reading", + "refId": "A" + }, + { + "expr": "sum(nginx_ingress_controller_nginx_process_connections{state=\"waiting\"})", + "interval": "", + "legendFormat": "Waiting", + "refId": "B" + }, + { + "expr": "sum(nginx_ingress_controller_nginx_process_connections{state=\"writing\"})", + "interval": "", + "legendFormat": "Writing", + "refId": "C" + }, + { + "expr": "sum(ceil(increase(nginx_ingress_controller_nginx_process_connections_total{state=\"accepted\"}[$__rate_interval])))", + "interval": "", + "legendFormat": "Accepted", + "refId": "D" + }, + { + "expr": "sum(ceil(increase(nginx_ingress_controller_nginx_process_connections_total{state=\"handled\"}[$__rate_interval])))", + "interval": "", + "legendFormat": "Handled", + "refId": "E" + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Ingress Controller Connections", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "decimals": null, + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": "0", + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + } + ], + "schemaVersion": 26, + "style": "dark", + "tags": [], + "templating": { + "list": [ + { + "current": { + "text": "Prometheus", + "value": "Prometheus" + }, + "hide": 0, + "label": "Data Source", + "name": "datasource", + "options": [ + + ], + "query": "prometheus", + "refresh": 1, + "regex": "", + "type": "datasource" + } + ] + }, + "time": { + "from": "now-1h", + "to": "now" + }, + "timepicker": { + "refresh_intervals": [ + "5s", + "10s", + "30s", + "1m", + "5m", + "15m", + "30m", + "1h", + "2h", + "1d" + ] + }, + "timezone": "", + "title": "Rancher / Kubernetes Components", + "uid": "rancher-k8s-components-1", + "version": 5 +} diff --git a/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/files/rancher/nodes/rancher-node-detail.json b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/files/rancher/nodes/rancher-node-detail.json new file mode 100644 index 0000000000..920fb94cf7 --- /dev/null +++ b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/files/rancher/nodes/rancher-node-detail.json @@ -0,0 +1,805 @@ +{ + "annotations": { + "list": [ + { + "builtIn": 1, + "datasource": "-- Grafana --", + "enable": true, + "hide": true, + "iconColor": "rgba(0, 211, 255, 1)", + "name": "Annotations & Alerts", + "type": "dashboard" + } + ] + }, + "editable": true, + "gnetId": null, + "graphTooltip": 0, + "id": 28, + "links": [], + "panels": [ + { + "aliasColors": { + "{{mode}}": "#3797d5" + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": {} + }, + "overrides": [] + }, + "fill": 0, + "fillGradient": 0, + "gridPos": { + "h": 7, + "w": 8, + "x": 0, + "y": 0 + }, + "hiddenSeries": false, + "id": 2, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": false, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "nullPointMode": "null", + "percentage": false, + "pluginVersion": "7.1.5", + "pointradius": 2, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "avg(irate({__name__=~\"node_cpu_seconds_total|windows_cpu_time_total\", instance=\"$instance\"}[$__rate_interval])) by (mode)", + "interval": "", + "legendFormat": "{{mode}}", + "refId": "A" + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "CPU Utilization", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "decimals": 1, + "format": "percentunit", + "label": null, + "logBase": 1, + "max": "1", + "min": "0", + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": {} + }, + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "Load[5m]" + }, + "properties": [] + } + ] + }, + "fill": 0, + "fillGradient": 0, + "gridPos": { + "h": 7, + "w": 8, + "x": 8, + "y": 0 + }, + "hiddenSeries": false, + "id": 3, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": false, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "nullPointMode": "null", + "percentage": false, + "pluginVersion": "7.1.5", + "pointradius": 2, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum(node_load5{instance=~\"$instance\"} OR avg_over_time(windows_system_processor_queue_length{instance=~\"$instance\"}[5m]))", + "interval": "", + "legendFormat": "Load[5m]", + "refId": "A" + }, + { + "expr": "sum(node_load1{instance=~\"$instance\"} OR avg_over_time(windows_system_processor_queue_length{instance=~\"$instance\"}[1m]))", + "interval": "", + "legendFormat": "Load[1m]", + "refId": "B" + }, + { + "expr": "sum(node_load15{instance=~\"$instance\"} OR avg_over_time(windows_system_processor_queue_length{instance=~\"$instance\"}[15m]))", + "interval": "", + "legendFormat": "Load[15m]", + "refId": "C" + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Load Average", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": {} + }, + "overrides": [] + }, + "fill": 0, + "fillGradient": 0, + "gridPos": { + "h": 7, + "w": 8, + "x": 16, + "y": 0 + }, + "hiddenSeries": false, + "id": 4, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": false, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "nullPointMode": "null", + "percentage": false, + "pluginVersion": "7.1.5", + "pointradius": 2, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "1 - (node_memory_MemAvailable_bytes{instance=~\"$instance\"} OR windows_os_physical_memory_free_bytes{instance=~\"$instance\"}) / (node_memory_MemTotal_bytes{instance=~\"$instance\"} OR windows_cs_physical_memory_bytes{instance=~\"$instance\"})", + "interval": "", + "legendFormat": "Total", + "refId": "A" + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Memory Utilization", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "decimals": 1, + "format": "percentunit", + "label": null, + "logBase": 1, + "max": "1", + "min": "0", + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": { + "{{device}}": "#3797d5" + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": {} + }, + "overrides": [] + }, + "fill": 0, + "fillGradient": 0, + "gridPos": { + "h": 7, + "w": 8, + "x": 0, + "y": 7 + }, + "hiddenSeries": false, + "id": 5, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": false, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "nullPointMode": "null", + "percentage": false, + "pluginVersion": "7.1.5", + "pointradius": 2, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "1 - (sum(node_filesystem_free_bytes{device!~\"rootfs|HarddiskVolume.+\", instance=~\"$instance\"} OR windows_logical_disk_free_bytes{volume!~\"(HarddiskVolume.+|[A-Z]:.+)\", instance=~\"$instance\"}) by (device) / sum(node_filesystem_size_bytes{device!~\"rootfs|HarddiskVolume.+\", instance=~\"$instance\"} OR windows_logical_disk_size_bytes{volume!~\"(HarddiskVolume.+|[A-Z]:.+)\", instance=~\"$instance\"}) by (device))", + "interval": "", + "legendFormat": "{{device}}", + "refId": "A" + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Disk Utilization", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "decimals": 1, + "format": "percentunit", + "label": null, + "logBase": 1, + "max": "1", + "min": "0", + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": { + "{{device}}": "#3797d5" + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": {} + }, + "overrides": [] + }, + "fill": 0, + "fillGradient": 0, + "gridPos": { + "h": 7, + "w": 8, + "x": 8, + "y": 7 + }, + "hiddenSeries": false, + "id": 6, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": false, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "nullPointMode": "null", + "percentage": false, + "pluginVersion": "7.1.5", + "pointradius": 2, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum(rate(node_disk_read_bytes_total{instance=~\"$instance\"}[$__rate_interval]) OR rate(windows_logical_disk_read_bytes_total{instance=~\"$instance\"}[$__rate_interval])) by (device)", + "interval": "", + "legendFormat": "Read ({{device}})", + "refId": "A" + }, + { + "expr": "sum(rate(node_disk_written_bytes_total{instance=~\"$instance\"}[$__rate_interval]) OR rate(windows_logical_disk_write_bytes_total{instance=~\"$instance\"}[$__rate_interval])) by (device)", + "interval": "", + "legendFormat": "Write ({{device}})", + "refId": "B" + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Disk I/O", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "decimals": 1, + "format": "Bps", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": { + "{{device}}": "#3797d5" + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": {} + }, + "overrides": [] + }, + "fill": 0, + "fillGradient": 0, + "gridPos": { + "h": 7, + "w": 8, + "x": 16, + "y": 7 + }, + "hiddenSeries": false, + "id": 7, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": false, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "nullPointMode": "null", + "percentage": false, + "pluginVersion": "7.1.5", + "pointradius": 2, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum(rate(node_network_receive_errs_total{device!~\"lo|veth.*|docker.*|flannel.*|cali.*|cbr.*\", instance=~\"$instance\"}[$__rate_interval])) by (device) OR sum(rate(windows_net_packets_received_errors_total{nic!~'.*isatap.*|.*VPN.*|.*Pseudo.*|.*tunneling.*', instance=~\"$instance\"}[$__rate_interval])) by (device)", + "interval": "", + "legendFormat": "Receive Errors ({{device}})", + "refId": "A" + }, + { + "expr": "sum(rate(node_network_receive_packets_total{device!~\"lo|veth.*|docker.*|flannel.*|cali.*|cbr.*\", instance=~\"$instance\"}[$__rate_interval])) by (device) OR sum(rate(windows_net_packets_received_total_total{nic!~'.*isatap.*|.*VPN.*|.*Pseudo.*|.*tunneling.*', instance=~\"$instance\"}[$__rate_interval])) by (device)", + "interval": "", + "legendFormat": "Receive Total ({{device}})", + "refId": "B" + }, + { + "expr": "sum(rate(node_network_transmit_errs_total{device!~\"lo|veth.*|docker.*|flannel.*|cali.*|cbr.*\", instance=~\"$instance\"}[$__rate_interval])) by (device) OR sum(rate(windows_net_packets_outbound_errors_total{nic!~'.*isatap.*|.*VPN.*|.*Pseudo.*|.*tunneling.*', instance=~\"$instance\"}[$__rate_interval])) by (device)", + "interval": "", + "legendFormat": "Transmit Errors ({{device}})", + "refId": "C" + }, + { + "expr": "sum(rate(node_network_receive_drop_total{device!~\"lo|veth.*|docker.*|flannel.*|cali.*|cbr.*\", instance=~\"$instance\"}[$__rate_interval])) by (device) OR sum(rate(windows_net_packets_received_discarded_total{nic!~'.*isatap.*|.*VPN.*|.*Pseudo.*|.*tunneling.*', instance=~\"$instance\"}[$__rate_interval])) by (device)", + "interval": "", + "legendFormat": "Receive Dropped ({{device}})", + "refId": "D" + }, + { + "expr": "sum(rate(node_network_transmit_drop_total{device!~\"lo|veth.*|docker.*|flannel.*|cali.*|cbr.*\", instance=~\"$instance\"}[$__rate_interval])) by (device) OR sum(rate(windows_net_packets_outbound_discarded{nic!~'.*isatap.*|.*VPN.*|.*Pseudo.*|.*tunneling.*', instance=~\"$instance\"}[$__rate_interval])) by (device)", + "interval": "", + "legendFormat": "Transmit Dropped ({{device}})", + "refId": "E" + }, + { + "expr": "sum(rate(node_network_transmit_packets_total{device!~\"lo|veth.*|docker.*|flannel.*|cali.*|cbr.*\", instance=~\"$instance\"}[$__rate_interval])) by (device) OR sum(rate(windows_net_packets_sent_total{nic!~'.*isatap.*|.*VPN.*|.*Pseudo.*|.*tunneling.*', instance=~\"$instance\"}[$__rate_interval])) by (device)", + "interval": "", + "legendFormat": "Transmit Total ({{device}})", + "refId": "F" + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Network Traffic", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "decimals": 1, + "format": "pps", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": { + "{{device}}": "#3797d5" + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": {} + }, + "overrides": [] + }, + "fill": 0, + "fillGradient": 0, + "gridPos": { + "h": 7, + "w": 8, + "x": 0, + "y": 14 + }, + "hiddenSeries": false, + "id": 8, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": false, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "nullPointMode": "null", + "percentage": false, + "pluginVersion": "7.1.5", + "pointradius": 2, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum(rate(node_network_transmit_bytes_total{device!~\"lo|veth.*|docker.*|flannel.*|cali.*|cbr.*\", instance=~\"$instance\"}[$__rate_interval]) OR rate(windows_net_packets_sent_total{nic!~'.*isatap.*|.*VPN.*|.*Pseudo.*|.*tunneling.*', instance=~\"$instance\"}[$__rate_interval])) by (device)", + "interval": "", + "legendFormat": "Transmit Total ({{device}})", + "refId": "A" + }, + { + "expr": "sum(rate(node_network_receive_bytes_total{device!~\"lo|veth.*|docker.*|flannel.*|cali.*|cbr.*\", instance=~\"$instance\"}[$__rate_interval]) OR rate(windows_net_packets_received_total_total{nic!~'.*isatap.*|.*VPN.*|.*Pseudo.*|.*tunneling.*', instance=~\"$instance\"}[$__rate_interval])) by (device)", + "interval": "", + "legendFormat": "Receive Total ({{device}})", + "refId": "B" + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Network I/O", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "decimals": 1, + "format": "Bps", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + } + ], + "schemaVersion": 26, + "style": "dark", + "tags": [], + "templating": { + "list": [ + { + "current": { + "text": "Prometheus", + "value": "Prometheus" + }, + "hide": 0, + "label": "Data Source", + "name": "datasource", + "options": [ + + ], + "query": "prometheus", + "refresh": 1, + "regex": "", + "type": "datasource" + }, + { + "allValue": null, + "hide": 0, + "includeAll": false, + "label": null, + "multi": false, + "name": "instance", + "query": "label_values({__name__=~\"node_exporter_build_info|windows_exporter_build_info\"}, instance)", + "refresh": 2, + "regex": "", + "sort": 0, + "tagValuesQuery": "", + "tagsQuery": "", + "type": "query", + "useTags": false + } + ] + }, + "time": { + "from": "now-1h", + "to": "now" + }, + "timepicker": { + "refresh_intervals": [ + "5s", + "10s", + "30s", + "1m", + "5m", + "15m", + "30m", + "1h", + "2h", + "1d" + ] + }, + "timezone": "", + "title": "Rancher / Node (Detail)", + "uid": "rancher-node-detail-1", + "version": 3 +} diff --git a/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/files/rancher/nodes/rancher-node.json b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/files/rancher/nodes/rancher-node.json new file mode 100644 index 0000000000..367df3cc9d --- /dev/null +++ b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/files/rancher/nodes/rancher-node.json @@ -0,0 +1,792 @@ +{ + "annotations": { + "list": [ + { + "builtIn": 1, + "datasource": "-- Grafana --", + "enable": true, + "hide": true, + "iconColor": "rgba(0, 211, 255, 1)", + "name": "Annotations & Alerts", + "type": "dashboard" + } + ] + }, + "editable": true, + "gnetId": null, + "graphTooltip": 0, + "id": 28, + "links": [], + "panels": [ + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": {} + }, + "overrides": [] + }, + "fill": 0, + "fillGradient": 0, + "gridPos": { + "h": 7, + "w": 8, + "x": 0, + "y": 0 + }, + "hiddenSeries": false, + "id": 2, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": false, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "nullPointMode": "null", + "percentage": false, + "pluginVersion": "7.1.5", + "pointradius": 2, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "1 - avg(irate({__name__=~\"node_cpu_seconds_total|windows_cpu_time_total\", instance=\"$instance\", mode=\"idle\"}[$__rate_interval]))", + "interval": "", + "legendFormat": "Total", + "refId": "A" + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "CPU Utilization", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "decimals": 1, + "format": "percentunit", + "label": null, + "logBase": 1, + "max": "1", + "min": "0", + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": {} + }, + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "Load[5m]" + }, + "properties": [] + } + ] + }, + "fill": 0, + "fillGradient": 0, + "gridPos": { + "h": 7, + "w": 8, + "x": 8, + "y": 0 + }, + "hiddenSeries": false, + "id": 3, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": false, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "nullPointMode": "null", + "percentage": false, + "pluginVersion": "7.1.5", + "pointradius": 2, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum(node_load5{instance=~\"$instance\"} OR avg_over_time(windows_system_processor_queue_length{instance=~\"$instance\"}[5m]))", + "interval": "", + "legendFormat": "Load[5m]", + "refId": "A" + }, + { + "expr": "sum(node_load1{instance=~\"$instance\"} OR avg_over_time(windows_system_processor_queue_length{instance=~\"$instance\"}[1m]))", + "interval": "", + "legendFormat": "Load[1m]", + "refId": "B" + }, + { + "expr": "sum(node_load15{instance=~\"$instance\"} OR avg_over_time(windows_system_processor_queue_length{instance=~\"$instance\"}[15m]))", + "interval": "", + "legendFormat": "Load[15m]", + "refId": "C" + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Load Average", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": {} + }, + "overrides": [] + }, + "fill": 0, + "fillGradient": 0, + "gridPos": { + "h": 7, + "w": 8, + "x": 16, + "y": 0 + }, + "hiddenSeries": false, + "id": 4, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": false, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "nullPointMode": "null", + "percentage": false, + "pluginVersion": "7.1.5", + "pointradius": 2, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "1 - sum(node_memory_MemAvailable_bytes{instance=~\"$instance\"} OR windows_os_physical_memory_free_bytes{instance=~\"$instance\"}) / sum(node_memory_MemTotal_bytes{instance=~\"$instance\"} OR windows_cs_physical_memory_bytes{instance=~\"$instance\"})", + "interval": "", + "legendFormat": "Total", + "refId": "A" + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Memory Utilization", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "decimals": 1, + "format": "percentunit", + "label": null, + "logBase": 1, + "max": "1", + "min": "0", + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": {} + }, + "overrides": [] + }, + "fill": 0, + "fillGradient": 0, + "gridPos": { + "h": 7, + "w": 8, + "x": 0, + "y": 7 + }, + "hiddenSeries": false, + "id": 5, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": false, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "nullPointMode": "null", + "percentage": false, + "pluginVersion": "7.1.5", + "pointradius": 2, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "1 - (sum(node_filesystem_free_bytes{device!~\"rootfs|HarddiskVolume.+\", instance=~\"$instance\"} OR windows_logical_disk_free_bytes{volume!~\"(HarddiskVolume.+|[A-Z]:.+)\", instance=~\"$instance\"}) / sum(node_filesystem_size_bytes{device!~\"rootfs|HarddiskVolume.+\", instance=~\"$instance\"} OR windows_logical_disk_size_bytes{volume!~\"(HarddiskVolume.+|[A-Z]:.+)\", instance=~\"$instance\"}))", + "interval": "", + "legendFormat": "Total", + "refId": "A" + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Disk Utilization", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "decimals": 1, + "format": "percentunit", + "label": null, + "logBase": 1, + "max": "1", + "min": "0", + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": {} + }, + "overrides": [] + }, + "fill": 0, + "fillGradient": 0, + "gridPos": { + "h": 7, + "w": 8, + "x": 8, + "y": 7 + }, + "hiddenSeries": false, + "id": 6, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": false, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "nullPointMode": "null", + "percentage": false, + "pluginVersion": "7.1.5", + "pointradius": 2, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum(rate(node_disk_read_bytes_total{instance=~\"$instance\"}[$__rate_interval]) OR rate(windows_logical_disk_read_bytes_total{instance=~\"$instance\"}[$__rate_interval]))", + "interval": "", + "legendFormat": "Read", + "refId": "A" + }, + { + "expr": "sum(rate(node_disk_written_bytes_total{instance=~\"$instance\"}[$__rate_interval]) OR rate(windows_logical_disk_write_bytes_total{instance=~\"$instance\"}[$__rate_interval]))", + "interval": "", + "legendFormat": "Write", + "refId": "B" + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Disk I/O", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "decimals": 1, + "format": "Bps", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": {} + }, + "overrides": [] + }, + "fill": 0, + "fillGradient": 0, + "gridPos": { + "h": 7, + "w": 8, + "x": 16, + "y": 7 + }, + "hiddenSeries": false, + "id": 7, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": false, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "nullPointMode": "null", + "percentage": false, + "pluginVersion": "7.1.5", + "pointradius": 2, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "(sum(rate(node_network_receive_errs_total{device!~\"lo|veth.*|docker.*|flannel.*|cali.*|cbr.*\", instance=~\"$instance\"}[$__rate_interval])) OR on() vector(0)) + (sum(rate(windows_net_packets_received_errors_total{nic!~'.*isatap.*|.*VPN.*|.*Pseudo.*|.*tunneling.*', instance=~\"$instance\"}[$__rate_interval])) OR on() vector(0))", + "interval": "", + "legendFormat": "Receive Errors", + "refId": "A" + }, + { + "expr": "(sum(rate(node_network_receive_packets_total{device!~\"lo|veth.*|docker.*|flannel.*|cali.*|cbr.*\", instance=~\"$instance\"}[$__rate_interval])) OR on() vector(0)) + (sum(rate(windows_net_packets_received_total_total{nic!~'.*isatap.*|.*VPN.*|.*Pseudo.*|.*tunneling.*', instance=~\"$instance\"}[$__rate_interval])) OR on() vector(0))", + "interval": "", + "legendFormat": "Receive Total", + "refId": "B" + }, + { + "expr": "(sum(rate(node_network_transmit_errs_total{device!~\"lo|veth.*|docker.*|flannel.*|cali.*|cbr.*\", instance=~\"$instance\"}[$__rate_interval])) OR on() vector(0)) + (sum(rate(windows_net_packets_outbound_errors_total{nic!~'.*isatap.*|.*VPN.*|.*Pseudo.*|.*tunneling.*', instance=~\"$instance\"}[$__rate_interval])) OR on() vector(0))", + "interval": "", + "legendFormat": "Transmit Errors", + "refId": "C" + }, + { + "expr": "(sum(rate(node_network_receive_drop_total{device!~\"lo|veth.*|docker.*|flannel.*|cali.*|cbr.*\", instance=~\"$instance\"}[$__rate_interval])) OR on() vector(0)) + (sum(rate(windows_net_packets_received_discarded_total{nic!~'.*isatap.*|.*VPN.*|.*Pseudo.*|.*tunneling.*', instance=~\"$instance\"}[$__rate_interval])) OR on() vector(0))", + "interval": "", + "legendFormat": "Receive Dropped", + "refId": "D" + }, + { + "expr": "(sum(rate(node_network_transmit_drop_total{device!~\"lo|veth.*|docker.*|flannel.*|cali.*|cbr.*\", instance=~\"$instance\"}[$__rate_interval])) OR on() vector(0)) + (sum(rate(windows_net_packets_outbound_discarded{nic!~'.*isatap.*|.*VPN.*|.*Pseudo.*|.*tunneling.*', instance=~\"$instance\"}[$__rate_interval])) OR on() vector(0))", + "interval": "", + "legendFormat": "Transmit Dropped", + "refId": "E" + }, + { + "expr": "(sum(rate(node_network_transmit_packets_total{device!~\"lo|veth.*|docker.*|flannel.*|cali.*|cbr.*\", instance=~\"$instance\"}[$__rate_interval])) OR on() vector(0)) + (sum(rate(windows_net_packets_sent_total{nic!~'.*isatap.*|.*VPN.*|.*Pseudo.*|.*tunneling.*', instance=~\"$instance\"}[$__rate_interval])) OR on() vector(0))", + "interval": "", + "legendFormat": "Transmit Total", + "refId": "F" + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Network Traffic", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "decimals": 1, + "format": "pps", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": {} + }, + "overrides": [] + }, + "fill": 0, + "fillGradient": 0, + "gridPos": { + "h": 7, + "w": 8, + "x": 0, + "y": 14 + }, + "hiddenSeries": false, + "id": 8, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": false, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "nullPointMode": "null", + "percentage": false, + "pluginVersion": "7.1.5", + "pointradius": 2, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum(rate(node_network_transmit_bytes_total{device!~\"lo|veth.*|docker.*|flannel.*|cali.*|cbr.*\", instance=~\"$instance\"}[$__rate_interval]) OR rate(windows_net_packets_sent_total{nic!~'.*isatap.*|.*VPN.*|.*Pseudo.*|.*tunneling.*', instance=~\"$instance\"}[$__rate_interval]))", + "interval": "", + "legendFormat": "Transmit Total", + "refId": "A" + }, + { + "expr": "sum(rate(node_network_receive_bytes_total{device!~\"lo|veth.*|docker.*|flannel.*|cali.*|cbr.*\", instance=~\"$instance\"}[$__rate_interval]) OR rate(windows_net_packets_received_total_total{nic!~'.*isatap.*|.*VPN.*|.*Pseudo.*|.*tunneling.*', instance=~\"$instance\"}[$__rate_interval]))", + "interval": "", + "legendFormat": "Receive Total", + "refId": "B" + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Network I/O", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "decimals": 1, + "format": "Bps", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + } + ], + "schemaVersion": 26, + "style": "dark", + "tags": [], + "templating": { + "list": [ + { + "current": { + "text": "Prometheus", + "value": "Prometheus" + }, + "hide": 0, + "label": "Data Source", + "name": "datasource", + "options": [ + + ], + "query": "prometheus", + "refresh": 1, + "regex": "", + "type": "datasource" + }, + { + "allValue": null, + "hide": 0, + "includeAll": false, + "label": null, + "multi": false, + "name": "instance", + "query": "label_values({__name__=~\"node_exporter_build_info|windows_exporter_build_info\"}, instance)", + "refresh": 2, + "regex": "", + "sort": 0, + "tagValuesQuery": "", + "tagsQuery": "", + "type": "query", + "useTags": false + } + ] + }, + "time": { + "from": "now-1h", + "to": "now" + }, + "timepicker": { + "refresh_intervals": [ + "5s", + "10s", + "30s", + "1m", + "5m", + "15m", + "30m", + "1h", + "2h", + "1d" + ] + }, + "timezone": "", + "title": "Rancher / Node", + "uid": "rancher-node-1", + "version": 3 +} diff --git a/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/files/rancher/performance/performance-debugging.json b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/files/rancher/performance/performance-debugging.json new file mode 100644 index 0000000000..454bc39390 --- /dev/null +++ b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/files/rancher/performance/performance-debugging.json @@ -0,0 +1,1652 @@ +{ + "annotations": { + "list": [ + { + "builtIn": 1, + "datasource": { + "type": "datasource", + "uid": "grafana" + }, + "enable": true, + "hide": true, + "iconColor": "rgba(0, 211, 255, 1)", + "name": "Annotations & Alerts", + "target": { + "limit": 100, + "matchAny": false, + "tags": [], + "type": "dashboard" + }, + "type": "dashboard" + } + ] + }, + "editable": true, + "fiscalYearStartMonth": 0, + "graphTooltip": 0, + "links": [], + "liveNow": false, + "panels": [ + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": { + "uid": "$datasource" + }, + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 8, + "w": 16, + "x": 0, + "y": 0 + }, + "hiddenSeries": false, + "id": 22, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "rightSide": true, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "nullPointMode": "null", + "options": { + "alertThreshold": true + }, + "percentage": false, + "pluginVersion": "9.1.5", + "pointradius": 2, + "points": true, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "datasource": { + "uid": "$datasource" + }, + "exemplar": true, + "expr": "topk(20,sum by (handler_name) (rate(lasso_controller_reconcile_time_seconds_sum[5m]))\n/\nsum by (handler_name) (rate(lasso_controller_reconcile_time_seconds_count[5m])))", + "interval": "", + "legendFormat": "{{handler_name}}", + "refId": "A" + } + ], + "thresholds": [], + "timeRegions": [], + "title": "Handler Average Execution Times Over Last 5 Minutes (Top 20)", + "tooltip": { + "shared": true, + "sort": 2, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "mode": "time", + "show": true, + "values": [] + }, + "yaxes": [ + { + "$$hashKey": "object:1390", + "format": "short", + "label": "Execution Time in Seconds", + "logBase": 1, + "show": true + }, + { + "$$hashKey": "object:1391", + "format": "short", + "logBase": 1, + "show": true + } + ], + "yaxis": { + "align": false + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": { + "uid": "$datasource" + }, + "description": "", + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 8, + "w": 16, + "x": 0, + "y": 8 + }, + "hiddenSeries": false, + "id": 28, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "rightSide": true, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "nullPointMode": "null", + "options": { + "alertThreshold": true + }, + "percentage": false, + "pluginVersion": "9.1.5", + "pointradius": 2, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "datasource": { + "uid": "$datasource" + }, + "exemplar": true, + "expr": "topk(20,sum by (resource, method, code) (rate(steve_api_request_time_sum{resource!=\"subscribe\"}[5m]))\n/\nsum by (resource, method, code) (rate(steve_api_request_time_count{resource!=\"subscribe\"}[5m])))", + "interval": "", + "legendFormat": "{{resource}} {{method}} {{code}}", + "refId": "A" + } + ], + "thresholds": [], + "timeRegions": [], + "title": "Rancher API Average Request Times Over Last 5 Minutes (Top 20) (Subscribes Omitted)", + "tooltip": { + "shared": true, + "sort": 2, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "mode": "time", + "show": true, + "values": [] + }, + "yaxes": [ + { + "$$hashKey": "object:178", + "format": "ms", + "label": "", + "logBase": 1, + "show": true + }, + { + "$$hashKey": "object:179", + "format": "short", + "logBase": 1, + "show": true + } + ], + "yaxis": { + "align": false + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": { + "uid": "$datasource" + }, + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 8, + "w": 16, + "x": 0, + "y": 16 + }, + "hiddenSeries": false, + "id": 30, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "rightSide": true, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "nullPointMode": "null", + "options": { + "alertThreshold": true + }, + "percentage": false, + "pluginVersion": "9.1.5", + "pointradius": 2, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "datasource": { + "uid": "$datasource" + }, + "exemplar": true, + "expr": "rate(steve_api_request_time_sum{resource=\"subscribe\"}[5m])\n/\nrate(steve_api_request_time_count{resource=\"subscribe\"}[5m])", + "interval": "", + "legendFormat": "{{resource}} {{method}} {{code}}", + "refId": "A" + } + ], + "thresholds": [], + "timeRegions": [], + "title": "Subscribe Average Request Times Over Last 5 Minutes", + "tooltip": { + "shared": true, + "sort": 2, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "mode": "time", + "show": true, + "values": [] + }, + "yaxes": [ + { + "$$hashKey": "object:368", + "format": "ms", + "label": "", + "logBase": 1, + "show": true + }, + { + "$$hashKey": "object:369", + "format": "short", + "logBase": 1, + "show": true + } + ], + "yaxis": { + "align": false + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": { + "uid": "$datasource" + }, + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 8, + "w": 16, + "x": 0, + "y": 24 + }, + "hiddenSeries": false, + "id": 14, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "rightSide": true, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "nullPointMode": "null", + "options": { + "alertThreshold": true + }, + "percentage": false, + "pluginVersion": "9.1.5", + "pointradius": 2, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "datasource": { + "uid": "$datasource" + }, + "exemplar": true, + "expr": "topk(20,workqueue_depth)", + "interval": "", + "legendFormat": "{{name}}", + "refId": "A" + } + ], + "thresholds": [], + "timeRegions": [], + "title": "Lasso Controller Work Queue Depth (Top 20)", + "tooltip": { + "shared": true, + "sort": 2, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "mode": "time", + "show": true, + "values": [] + }, + "yaxes": [ + { + "$$hashKey": "object:1553", + "format": "short", + "logBase": 1, + "show": true + }, + { + "$$hashKey": "object:1554", + "format": "short", + "logBase": 1, + "show": true + } + ], + "yaxis": { + "align": false + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": { + "uid": "$datasource" + }, + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 13, + "w": 16, + "x": 0, + "y": 32 + }, + "hiddenSeries": false, + "id": 2, + "legend": { + "alignAsTable": false, + "avg": false, + "current": false, + "hideEmpty": false, + "hideZero": false, + "max": false, + "min": false, + "rightSide": true, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "nullPointMode": "null", + "options": { + "alertThreshold": true + }, + "percentage": false, + "pluginVersion": "9.1.5", + "pointradius": 2, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "datasource": { + "uid": "$datasource" + }, + "exemplar": true, + "expr": "topk(20,sum by (id, resource, method, code) (steve_api_total_requests))", + "instant": false, + "interval": "", + "legendFormat": "{{id}} {{resource}} {{method}} {{code}}", + "refId": "A" + } + ], + "thresholds": [], + "timeRegions": [], + "title": "Number of Rancher Requests (Top 20)", + "tooltip": { + "shared": true, + "sort": 2, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "mode": "time", + "show": true, + "values": [] + }, + "yaxes": [ + { + "$$hashKey": "object:290", + "format": "short", + "logBase": 1, + "show": true + }, + { + "$$hashKey": "object:291", + "format": "short", + "logBase": 1, + "show": true + } + ], + "yaxis": { + "align": false + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": { + "uid": "$datasource" + }, + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 9, + "w": 16, + "x": 0, + "y": 45 + }, + "hiddenSeries": false, + "id": 4, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "rightSide": true, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "nullPointMode": "null", + "options": { + "alertThreshold": true + }, + "percentage": false, + "pluginVersion": "9.1.5", + "pointradius": 2, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "datasource": { + "uid": "$datasource" + }, + "exemplar": true, + "expr": "topk(20,sum by (id, resource, method) (steve_api_total_requests{code!=\"200\",code!=\"201\"}))", + "interval": "", + "legendFormat": "{{id}} {{resource}} {{method}} {{code}}", + "refId": "A" + } + ], + "thresholds": [], + "timeRegions": [], + "title": "Number of Failed Rancher API Requests (Top 20)", + "tooltip": { + "shared": true, + "sort": 2, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "mode": "time", + "show": true, + "values": [] + }, + "yaxes": [ + { + "$$hashKey": "object:428", + "format": "short", + "logBase": 1, + "show": true + }, + { + "$$hashKey": "object:429", + "format": "short", + "logBase": 1, + "show": true + } + ], + "yaxis": { + "align": false + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": { + "uid": "$datasource" + }, + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 8, + "w": 16, + "x": 0, + "y": 54 + }, + "hiddenSeries": false, + "id": 6, + "legend": { + "alignAsTable": false, + "avg": false, + "current": false, + "max": false, + "min": false, + "rightSide": true, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "nullPointMode": "null", + "options": { + "alertThreshold": true + }, + "percentage": false, + "pluginVersion": "9.1.5", + "pointradius": 2, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "datasource": { + "uid": "$datasource" + }, + "exemplar": true, + "expr": "topk(20,sum by (resource, method, code) (rate(k8s_proxy_store_request_time_sum[5m]))\n/\nsum by (resource, method, code) (rate(k8s_proxy_store_request_time_count[5m])))", + "interval": "", + "legendFormat": "{{resource}} {{method}} {{code}}", + "refId": "A" + } + ], + "thresholds": [], + "timeRegions": [], + "title": "K8s Proxy Store Average Request Times Over Last 5 Minutes (Top 20)", + "tooltip": { + "shared": true, + "sort": 2, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "mode": "time", + "show": true, + "values": [] + }, + "yaxes": [ + { + "$$hashKey": "object:662", + "format": "ms", + "label": "", + "logBase": 1, + "show": true + }, + { + "$$hashKey": "object:663", + "format": "short", + "logBase": 1, + "show": true + } + ], + "yaxis": { + "align": false + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": { + "uid": "$datasource" + }, + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 8, + "w": 16, + "x": 0, + "y": 62 + }, + "hiddenSeries": false, + "id": 8, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "rightSide": true, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "nullPointMode": "null", + "options": { + "alertThreshold": true + }, + "percentage": false, + "pluginVersion": "9.1.5", + "pointradius": 2, + "points": true, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "datasource": { + "uid": "$datasource" + }, + "exemplar": true, + "expr": "topk(20,sum by (resource, method, code) (rate(k8s_proxy_client_request_time_sum[5m]))\n/\nsum by (resource, method, code) (rate(k8s_proxy_client_request_time_count[5m])))", + "interval": "", + "legendFormat": "{{resource}} {{method}} {{code}}", + "refId": "A" + } + ], + "thresholds": [], + "timeRegions": [], + "title": "K8s Proxy Client Average Request Times Over Last 5 Minutes (Top 20)", + "tooltip": { + "shared": true, + "sort": 2, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "mode": "time", + "show": true, + "values": [] + }, + "yaxes": [ + { + "$$hashKey": "object:1710", + "format": "ms", + "label": "", + "logBase": 1, + "show": true + }, + { + "$$hashKey": "object:1711", + "format": "short", + "logBase": 1, + "show": true + } + ], + "yaxis": { + "align": false + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": { + "uid": "$datasource" + }, + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 8, + "w": 16, + "x": 0, + "y": 70 + }, + "hiddenSeries": false, + "id": 10, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "rightSide": true, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "nullPointMode": "null", + "options": { + "alertThreshold": true + }, + "percentage": false, + "pluginVersion": "9.1.5", + "pointradius": 2, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "datasource": { + "uid": "$datasource" + }, + "exemplar": true, + "expr": "topk(20,lasso_controller_total_cached_object)", + "interval": "", + "legendFormat": "{{kind}} {{version}} {{group}} {{pod}}", + "refId": "A" + } + ], + "thresholds": [], + "timeRegions": [], + "title": "Cached Objects by GroupVersionKind (Top 20)", + "tooltip": { + "shared": true, + "sort": 2, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "mode": "time", + "show": true, + "values": [] + }, + "yaxes": [ + { + "$$hashKey": "object:744", + "format": "short", + "logBase": 1, + "show": true + }, + { + "$$hashKey": "object:745", + "format": "short", + "logBase": 1, + "show": true + } + ], + "yaxis": { + "align": false + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": { + "uid": "$datasource" + }, + "description": "", + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 8, + "w": 16, + "x": 0, + "y": 78 + }, + "hiddenSeries": false, + "id": 12, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "rightSide": true, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "nullPointMode": "null", + "options": { + "alertThreshold": true + }, + "percentage": false, + "pluginVersion": "9.1.5", + "pointradius": 2, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "datasource": { + "uid": "$datasource" + }, + "exemplar": true, + "expr": "topk(20,sum by (handler_name) (\nlasso_controller_total_handler_execution\n))", + "interval": "", + "legendFormat": "{{handler_name}}", + "refId": "A" + } + ], + "thresholds": [], + "timeRegions": [], + "title": "Lasso Handler Executions (Top 20)", + "tooltip": { + "shared": true, + "sort": 2, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "mode": "time", + "show": true, + "values": [] + }, + "yaxes": [ + { + "$$hashKey": "object:824", + "format": "short", + "logBase": 1, + "show": true + }, + { + "$$hashKey": "object:825", + "format": "short", + "logBase": 1, + "show": true + } + ], + "yaxis": { + "align": false + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": { + "uid": "$datasource" + }, + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 8, + "w": 16, + "x": 0, + "y": 86 + }, + "hiddenSeries": false, + "id": 32, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "rightSide": true, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "nullPointMode": "null", + "options": { + "alertThreshold": true + }, + "percentage": false, + "pluginVersion": "9.1.5", + "pointradius": 2, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "datasource": { + "uid": "$datasource" + }, + "exemplar": true, + "expr": "topk(20, sum by (handler_name,controller_name) (\nincrease(lasso_controller_total_handler_execution[2m])\n))", + "interval": "", + "legendFormat": "{{controller_name}}.{{handler_name}}", + "refId": "A" + } + ], + "thresholds": [], + "timeRegions": [], + "title": "Handler Executions Over Last 2 Minutes (Top 20)", + "tooltip": { + "shared": true, + "sort": 2, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "mode": "time", + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "logBase": 1, + "show": true + }, + { + "format": "short", + "logBase": 1, + "show": true + } + ], + "yaxis": { + "align": false + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": { + "uid": "$datasource" + }, + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 8, + "w": 16, + "x": 0, + "y": 94 + }, + "hiddenSeries": false, + "id": 20, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "rightSide": true, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "nullPointMode": "null", + "options": { + "alertThreshold": true + }, + "percentage": false, + "pluginVersion": "9.1.5", + "pointradius": 2, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "datasource": { + "uid": "$datasource" + }, + "exemplar": true, + "expr": "topk(20,sum by (handler_name) (\nlasso_controller_total_handler_execution{has_error=\"true\"}\n))", + "interval": "", + "legendFormat": "{{handler_name}}", + "refId": "A" + } + ], + "thresholds": [], + "timeRegions": [], + "title": "Total Handler Executions with Error (Top 20)", + "tooltip": { + "shared": true, + "sort": 2, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "mode": "time", + "show": true, + "values": [] + }, + "yaxes": [ + { + "$$hashKey": "object:1230", + "format": "short", + "logBase": 1, + "show": true + }, + { + "$$hashKey": "object:1231", + "format": "short", + "logBase": 1, + "show": true + } + ], + "yaxis": { + "align": false + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": { + "uid": "$datasource" + }, + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 8, + "w": 16, + "x": 0, + "y": 102 + }, + "hiddenSeries": false, + "id": 34, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "rightSide": true, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "nullPointMode": "null", + "options": { + "alertThreshold": true + }, + "percentage": false, + "pluginVersion": "9.1.5", + "pointradius": 2, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "datasource": { + "uid": "$datasource" + }, + "exemplar": true, + "expr": "topk(20,sum by (handler_name,controller_name) (\nincrease(lasso_controller_total_handler_execution{has_error=\"true\"}[2m])\n))", + "interval": "", + "legendFormat": "{{controller_name}}.{{handler_name}}", + "refId": "A" + } + ], + "thresholds": [], + "timeRegions": [], + "title": "Handler Executions Over Last 2 Minutes (Top 20)", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "mode": "time", + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "logBase": 1, + "show": true + }, + { + "format": "short", + "logBase": 1, + "show": true + } + ], + "yaxis": { + "align": false + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": { + "uid": "$datasource" + }, + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 8, + "w": 16, + "x": 0, + "y": 110 + }, + "hiddenSeries": false, + "id": 16, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "rightSide": true, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "nullPointMode": "null", + "options": { + "alertThreshold": true + }, + "percentage": false, + "pluginVersion": "9.1.5", + "pointradius": 2, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "datasource": { + "uid": "$datasource" + }, + "exemplar": true, + "expr": "topk(20,session_server_total_transmit_bytes)", + "interval": "", + "legendFormat": "{{clientkey}} {{pod}}", + "refId": "A" + } + ], + "thresholds": [], + "timeRegions": [], + "title": "Data Transmitted by Remote Dialer Sessions (Top 20)", + "tooltip": { + "shared": true, + "sort": 2, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "mode": "time", + "show": true, + "values": [] + }, + "yaxes": [ + { + "$$hashKey": "object:1953", + "format": "decbytes", + "label": "", + "logBase": 1, + "show": true + }, + { + "$$hashKey": "object:1954", + "format": "short", + "logBase": 1, + "show": true + } + ], + "yaxis": { + "align": false + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": { + "uid": "$datasource" + }, + "description": "", + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 8, + "w": 16, + "x": 0, + "y": 118 + }, + "hiddenSeries": false, + "id": 18, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "rightSide": true, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "nullPointMode": "null", + "options": { + "alertThreshold": true + }, + "percentage": false, + "pluginVersion": "9.1.5", + "pointradius": 2, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "datasource": { + "uid": "$datasource" + }, + "exemplar": true, + "expr": "session_server_total_transmit_error_bytes", + "interval": "", + "legendFormat": "{{clientkey}} {{pod}}", + "refId": "A" + } + ], + "thresholds": [], + "timeRegions": [], + "title": "Errors for Remote Dialer Sessions (Top 20)", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "mode": "time", + "show": true, + "values": [] + }, + "yaxes": [ + { + "$$hashKey": "object:2045", + "format": "ms", + "label": "Error Data", + "logBase": 1, + "show": true + }, + { + "$$hashKey": "object:2046", + "format": "short", + "logBase": 1, + "show": true + } + ], + "yaxis": { + "align": false + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": { + "type": "prometheus", + "uid": "$datasource" + }, + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 8, + "w": 16, + "x": 0, + "y": 126 + }, + "hiddenSeries": false, + "id": 26, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "rightSide": true, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "nullPointMode": "null", + "options": { + "alertThreshold": true + }, + "percentage": false, + "pluginVersion": "9.1.5", + "pointradius": 2, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "datasource": { + "uid": "$datasource" + }, + "editorMode": "code", + "exemplar": true, + "expr": "session_server_total_add_websocket_session - (session_server_total_remove_websocket_session or (0 * session_server_total_add_websocket_session))", + "interval": "", + "legendFormat": "{{clientkey}} {{pod}}", + "range": true, + "refId": "A" + } + ], + "thresholds": [], + "timeRegions": [], + "title": "Remote Dialer Active Connections (Top 20)", + "tooltip": { + "shared": true, + "sort": 2, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "mode": "time", + "show": true, + "values": [] + }, + "yaxes": [ + { + "$$hashKey": "object:2199", + "format": "short", + "logBase": 1, + "show": true + }, + { + "$$hashKey": "object:2200", + "format": "short", + "logBase": 1, + "show": true + } + ], + "yaxis": { + "align": false + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": { + "type": "prometheus", + "uid": "$datasource" + }, + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 8, + "w": 16, + "x": 0, + "y": 134 + }, + "hiddenSeries": false, + "id": 35, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "rightSide": true, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "nullPointMode": "null", + "options": { + "alertThreshold": true + }, + "percentage": false, + "pluginVersion": "9.1.5", + "pointradius": 2, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "datasource": { + "uid": "$datasource" + }, + "editorMode": "code", + "exemplar": true, + "expr": "rate(session_server_total_remove_connections[$__rate_interval])", + "interval": "", + "legendFormat": "{{clientkey}} {{pod}}", + "range": true, + "refId": "A" + } + ], + "thresholds": [], + "timeRegions": [], + "title": "Remote Dialer Removed Connections Rate (Top 20)", + "tooltip": { + "shared": true, + "sort": 2, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "mode": "time", + "show": true, + "values": [] + }, + "yaxes": [ + { + "$$hashKey": "object:2199", + "format": "short", + "logBase": 1, + "show": true + }, + { + "$$hashKey": "object:2200", + "format": "short", + "logBase": 1, + "show": true + } + ], + "yaxis": { + "align": false + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": { + "type": "prometheus", + "uid": "$datasource" + }, + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 8, + "w": 16, + "x": 0, + "y": 142 + }, + "hiddenSeries": false, + "id": 24, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "rightSide": true, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "nullPointMode": "null", + "options": { + "alertThreshold": true + }, + "percentage": false, + "pluginVersion": "9.1.5", + "pointradius": 2, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "datasource": { + "uid": "$datasource" + }, + "editorMode": "code", + "exemplar": true, + "expr": "rate(session_server_total_add_connections[$__rate_interval])", + "interval": "", + "legendFormat": "{{clientkey}} {{pod}}", + "range": true, + "refId": "A" + } + ], + "thresholds": [], + "timeRegions": [], + "title": "Remote Dialer Added Connections Rate (Top 20)", + "tooltip": { + "shared": true, + "sort": 2, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "mode": "time", + "show": true, + "values": [] + }, + "yaxes": [ + { + "$$hashKey": "object:2117", + "format": "short", + "logBase": 1, + "show": true + }, + { + "$$hashKey": "object:2118", + "format": "short", + "logBase": 1, + "show": true + } + ], + "yaxis": { + "align": false + } + } + ], + "schemaVersion": 37, + "style": "dark", + "tags": [], + "templating": { + "list": [ + { + "current": { + "selected": false, + "text": "Prometheus", + "value": "Prometheus" + }, + "hide": 0, + "includeAll": false, + "label": "Data Source", + "multi": false, + "name": "datasource", + "options": [], + "query": "prometheus", + "refresh": 1, + "regex": "", + "skipUrlSync": false, + "type": "datasource" + } + ] + }, + "time": { + "from": "now-15m", + "to": "now" + }, + "timepicker": {}, + "timezone": "", + "title": "Rancher Performance Debugging", + "uid": "tfrfU0a7k", + "version": 1, + "weekStart": "" +} \ No newline at end of file diff --git a/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/files/rancher/pods/rancher-pod-containers.json b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/files/rancher/pods/rancher-pod-containers.json new file mode 100644 index 0000000000..cf78a2204c --- /dev/null +++ b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/files/rancher/pods/rancher-pod-containers.json @@ -0,0 +1,636 @@ +{ + "annotations": { + "list": [ + { + "builtIn": 1, + "datasource": "-- Grafana --", + "enable": true, + "hide": true, + "iconColor": "rgba(0, 211, 255, 1)", + "name": "Annotations & Alerts", + "type": "dashboard" + } + ] + }, + "editable": true, + "gnetId": null, + "graphTooltip": 0, + "id": 28, + "iteration": 1618265214337, + "links": [], + "panels": [ + { + "aliasColors": { + "{{container}}": "#3797d5" + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": {} + }, + "overrides": [] + }, + "fill": 0, + "fillGradient": 0, + "gridPos": { + "h": 7, + "w": 8, + "x": 0, + "y": 0 + }, + "hiddenSeries": false, + "id": 2, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": false, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "nullPointMode": "null", + "percentage": false, + "pluginVersion": "7.1.5", + "pointradius": 2, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum(rate(container_cpu_cfs_throttled_seconds_total{container!=\"POD\",namespace=~\"$namespace\",pod=~\"$pod\", container!=\"\"}[$__rate_interval])) by (container)", + "interval": "", + "legendFormat": "CFS throttled ({{container}})", + "refId": "A" + }, + { + "expr": "sum(rate(container_cpu_system_seconds_total{container!=\"POD\",namespace=~\"$namespace\",pod=~\"$pod\",container!=\"\"}[$__rate_interval])) by (container) OR sum(rate(windows_container_cpu_usage_seconds_kernelmode{container!=\"POD\",namespace=~\"$namespace\",pod=~\"$pod\",container!=\"\"}[$__rate_interval])) by (container)", + "interval": "", + "legendFormat": "System ({{container}})", + "refId": "B" + }, + { + "expr": "sum(rate(container_cpu_usage_seconds_total{container!=\"POD\",namespace=~\"$namespace\",pod=~\"$pod\",container!=\"\"}[$__rate_interval])) by (container) OR sum(rate(windows_container_cpu_usage_seconds_total{container!=\"POD\",namespace=~\"$namespace\",pod=~\"$pod\",container!=\"\"}[$__rate_interval])) by (container)", + "interval": "", + "legendFormat": "Total ({{container}})", + "refId": "C" + }, + { + "expr": "sum(rate(container_cpu_user_seconds_total{container!=\"POD\",namespace=~\"$namespace\",pod=~\"$pod\",container!=\"\"}[$__rate_interval])) by (container) OR sum(rate(windows_container_cpu_usage_seconds_usermode{container!=\"POD\",namespace=~\"$namespace\",pod=~\"$pod\",container!=\"\"}[$__rate_interval])) by (container)", + "interval": "", + "legendFormat": "User ({{container}})", + "refId": "D" + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "CPU Utilization", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "decimals": null, + "format": "cpu", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": { + "{{container}}": "#3797d5" + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": {} + }, + "overrides": [] + }, + "fill": 0, + "fillGradient": 0, + "gridPos": { + "h": 7, + "w": 8, + "x": 8, + "y": 0 + }, + "hiddenSeries": false, + "id": 4, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": false, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "nullPointMode": "null", + "percentage": false, + "pluginVersion": "7.1.5", + "pointradius": 2, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum(container_memory_working_set_bytes{container!=\"POD\",namespace=~\"$namespace\",pod=~\"$pod\", container!=\"\"} OR windows_container_memory_usage_commit_bytes{container!=\"POD\",namespace=~\"$namespace\",pod=~\"$pod\", container!=\"\"}) by (container)", + "interval": "", + "legendFormat": "({{container}})", + "refId": "A" + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Memory Utilization", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "decimals": 1, + "format": "bytes", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": { + "{{container}}": "#3797d5" + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": {} + }, + "overrides": [] + }, + "fill": 0, + "fillGradient": 0, + "gridPos": { + "h": 7, + "w": 8, + "x": 16, + "y": 0 + }, + "hiddenSeries": false, + "id": 7, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": false, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "nullPointMode": "null", + "percentage": false, + "pluginVersion": "7.1.5", + "pointradius": 2, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum(irate(container_network_receive_packets_total{namespace=~\"$namespace\",pod=~\"$pod\"}[$__rate_interval])) by (container) OR sum(irate(windows_container_network_receive_packets_total{namespace=~\"$namespace\",pod=~\"$pod\"}[$__rate_interval])) by (container)", + "interval": "", + "legendFormat": "Receive Total ({{container}})", + "refId": "A" + }, + { + "expr": "sum(irate(container_network_transmit_packets_total{namespace=~\"$namespace\",pod=~\"$pod\"}[$__rate_interval])) by (container) OR sum(irate(windows_container_network_transmit_packets_total{namespace=~\"$namespace\",pod=~\"$pod\"}[$__rate_interval])) by (container)", + "interval": "", + "legendFormat": "Transmit Total ({{container}})", + "refId": "B" + }, + { + "expr": "sum(irate(container_network_receive_packets_dropped_total{namespace=~\"$namespace\",pod=~\"$pod\"}[$__rate_interval])) by (container) OR sum(irate(windows_container_network_receive_packets_dropped_total{namespace=~\"$namespace\",pod=~\"$pod\"}[$__rate_interval])) by (container)", + "interval": "", + "legendFormat": "Receive Dropped ({{container}})", + "refId": "C" + }, + { + "expr": "sum(irate(container_network_receive_errors_total{namespace=~\"$namespace\",pod=~\"$pod\"}[$__rate_interval])) by (container)", + "interval": "", + "legendFormat": "Receive Errors ({{container}})", + "refId": "D" + }, + { + "expr": "sum(irate(container_network_transmit_errors_total{namespace=~\"$namespace\",pod=~\"$pod\"}[$__rate_interval])) by (container)", + "interval": "", + "legendFormat": "Transmit Errors ({{container}})", + "refId": "E" + }, + { + "expr": "sum(irate(container_network_transmit_packets_dropped_total{namespace=~\"$namespace\",pod=~\"$pod\"}[$__rate_interval])) by (container) OR sum(irate(windows_container_network_transmit_packets_dropped_total{namespace=~\"$namespace\",pod=~\"$pod\"}[$__rate_interval])) by (container)", + "interval": "", + "legendFormat": "Transmit Dropped ({{container}})", + "refId": "F" + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Network Traffic", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "decimals": 1, + "format": "pps", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": { + "{{container}}": "#3797d5" + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": {} + }, + "overrides": [] + }, + "fill": 0, + "fillGradient": 0, + "gridPos": { + "h": 7, + "w": 8, + "x": 0, + "y": 7 + }, + "hiddenSeries": false, + "id": 8, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": false, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "nullPointMode": "null", + "percentage": false, + "pluginVersion": "7.1.5", + "pointradius": 2, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum(irate(container_network_receive_bytes_total{namespace=~\"$namespace\",pod=~\"$pod\"}[$__rate_interval])) by (container) OR sum(irate(windows_container_network_receive_bytes_total{namespace=~\"$namespace\",pod=~\"$pod\"}[$__rate_interval])) by (container)", + "interval": "", + "legendFormat": "Receive Total ({{container}})", + "refId": "A" + }, + { + "expr": "sum(irate(container_network_transmit_bytes_total{namespace=~\"$namespace\",pod=~\"$pod\"}[$__rate_interval])) by (container) OR sum(irate(windows_container_network_transmit_bytes_total{namespace=~\"$namespace\",pod=~\"$pod\"}[$__rate_interval])) by (container)", + "interval": "", + "legendFormat": "Transmit Total ({{container}})", + "refId": "B" + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Network I/O", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "decimals": 1, + "format": "Bps", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": { + "{{container}}": "#3797d5" + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": {} + }, + "overrides": [] + }, + "fill": 0, + "fillGradient": 0, + "gridPos": { + "h": 7, + "w": 8, + "x": 8, + "y": 7 + }, + "hiddenSeries": false, + "id": 6, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": false, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "nullPointMode": "null", + "percentage": false, + "pluginVersion": "7.1.5", + "pointradius": 2, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum(rate(container_fs_writes_bytes_total{namespace=~\"$namespace\",pod=~\"$pod\",container!=\"\"}[$__rate_interval])) by (container)", + "interval": "", + "legendFormat": "Write ({{container}})", + "refId": "A" + }, + { + "expr": "sum(rate(container_fs_reads_bytes_total{namespace=~\"$namespace\",pod=~\"$pod\",container!=\"\"}[$__rate_interval])) by (container)", + "interval": "", + "legendFormat": "Read ({{container}})", + "refId": "B" + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Disk I/O", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "decimals": 1, + "format": "Bps", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + } + ], + "refresh": false, + "schemaVersion": 26, + "style": "dark", + "tags": [], + "templating": { + "list": [ + { + "current": { + "text": "Prometheus", + "value": "Prometheus" + }, + "hide": 0, + "label": "Data Source", + "name": "datasource", + "options": [ + + ], + "query": "prometheus", + "refresh": 1, + "regex": "", + "type": "datasource" + }, + { + "allValue": null, + "hide": 0, + "includeAll": false, + "label": null, + "multi": false, + "name": "namespace", + "query": "label_values(kube_pod_info{}, namespace)", + "refresh": 2, + "regex": "", + "sort": 0, + "tagValuesQuery": "", + "tagsQuery": "", + "type": "query", + "useTags": false + }, + { + "allValue": null, + "hide": 0, + "includeAll": false, + "label": null, + "multi": false, + "name": "pod", + "query": "label_values(kube_pod_info{namespace=\"$namespace\"}, pod)", + "refresh": 2, + "regex": "", + "sort": 0, + "tagValuesQuery": "", + "tagsQuery": "", + "type": "query", + "useTags": false + } + ] + }, + "time": { + "from": "now-1h", + "to": "now" + }, + "timepicker": { + "refresh_intervals": [ + "5s", + "10s", + "30s", + "1m", + "5m", + "15m", + "30m", + "1h", + "2h", + "1d" + ] + }, + "timezone": "", + "title": "Rancher / Pod (Containers)", + "uid": "rancher-pod-containers-1", + "version": 8 +} diff --git a/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/files/rancher/pods/rancher-pod.json b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/files/rancher/pods/rancher-pod.json new file mode 100644 index 0000000000..4859eccc74 --- /dev/null +++ b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/files/rancher/pods/rancher-pod.json @@ -0,0 +1,636 @@ +{ + "annotations": { + "list": [ + { + "builtIn": 1, + "datasource": "-- Grafana --", + "enable": true, + "hide": true, + "iconColor": "rgba(0, 211, 255, 1)", + "name": "Annotations & Alerts", + "type": "dashboard" + } + ] + }, + "editable": true, + "gnetId": null, + "graphTooltip": 0, + "id": 28, + "iteration": 1618265214337, + "links": [], + "panels": [ + { + "aliasColors": { + "": "#3797d5" + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": {} + }, + "overrides": [] + }, + "fill": 0, + "fillGradient": 0, + "gridPos": { + "h": 7, + "w": 8, + "x": 0, + "y": 0 + }, + "hiddenSeries": false, + "id": 2, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": false, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "nullPointMode": "null", + "percentage": false, + "pluginVersion": "7.1.5", + "pointradius": 2, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum(rate(container_cpu_cfs_throttled_seconds_total{container!=\"POD\",namespace=~\"$namespace\",pod=~\"$pod\", container!=\"\"}[$__rate_interval]))", + "interval": "", + "legendFormat": "CFS throttled", + "refId": "A" + }, + { + "expr": "sum(rate(container_cpu_system_seconds_total{container!=\"POD\",namespace=~\"$namespace\",pod=~\"$pod\",container!=\"\"}[$__rate_interval])) OR sum(rate(windows_container_cpu_usage_seconds_kernelmode{container!=\"POD\",namespace=~\"$namespace\",pod=~\"$pod\",container!=\"\"}[$__rate_interval]))", + "interval": "", + "legendFormat": "System", + "refId": "B" + }, + { + "expr": "sum(rate(container_cpu_usage_seconds_total{container!=\"POD\",namespace=~\"$namespace\",pod=~\"$pod\",container!=\"\"}[$__rate_interval])) OR sum(rate(windows_container_cpu_usage_seconds_total{container!=\"POD\",namespace=~\"$namespace\",pod=~\"$pod\",container!=\"\"}[$__rate_interval]))", + "interval": "", + "legendFormat": "Total", + "refId": "C" + }, + { + "expr": "sum(rate(container_cpu_user_seconds_total{container!=\"POD\",namespace=~\"$namespace\",pod=~\"$pod\",container!=\"\"}[$__rate_interval])) OR sum(rate(windows_container_cpu_usage_seconds_usermode{container!=\"POD\",namespace=~\"$namespace\",pod=~\"$pod\",container!=\"\"}[$__rate_interval]))", + "interval": "", + "legendFormat": "User", + "refId": "D" + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "CPU Utilization", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "decimals": null, + "format": "cpu", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": { + "": "#3797d5" + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": {} + }, + "overrides": [] + }, + "fill": 0, + "fillGradient": 0, + "gridPos": { + "h": 7, + "w": 8, + "x": 8, + "y": 0 + }, + "hiddenSeries": false, + "id": 4, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": false, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "nullPointMode": "null", + "percentage": false, + "pluginVersion": "7.1.5", + "pointradius": 2, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum(container_memory_working_set_bytes{container!=\"POD\",namespace=~\"$namespace\",pod=~\"$pod\", container!=\"\"} OR windows_container_memory_usage_commit_bytes{container!=\"POD\",namespace=~\"$namespace\",pod=~\"$pod\", container!=\"\"})", + "interval": "", + "legendFormat": "Total", + "refId": "A" + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Memory Utilization", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "decimals": 1, + "format": "bytes", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": { + "": "#3797d5" + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": {} + }, + "overrides": [] + }, + "fill": 0, + "fillGradient": 0, + "gridPos": { + "h": 7, + "w": 8, + "x": 16, + "y": 0 + }, + "hiddenSeries": false, + "id": 7, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": false, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "nullPointMode": "null", + "percentage": false, + "pluginVersion": "7.1.5", + "pointradius": 2, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum(irate(container_network_receive_packets_total{namespace=~\"$namespace\",pod=~\"$pod\"}[$__rate_interval])) OR sum(irate(windows_container_network_receive_packets_total{namespace=~\"$namespace\",pod=~\"$pod\"}[$__rate_interval]))", + "interval": "", + "legendFormat": "Receive Total", + "refId": "A" + }, + { + "expr": "sum(irate(container_network_transmit_packets_total{namespace=~\"$namespace\",pod=~\"$pod\"}[$__rate_interval])) OR sum(irate(windows_container_network_transmit_packets_total{namespace=~\"$namespace\",pod=~\"$pod\"}[$__rate_interval]))", + "interval": "", + "legendFormat": "Transmit Total", + "refId": "B" + }, + { + "expr": "sum(irate(container_network_receive_packets_dropped_total{namespace=~\"$namespace\",pod=~\"$pod\"}[$__rate_interval])) OR sum(irate(windows_container_network_receive_packets_dropped_total{namespace=~\"$namespace\",pod=~\"$pod\"}[$__rate_interval]))", + "interval": "", + "legendFormat": "Receive Dropped", + "refId": "C" + }, + { + "expr": "sum(irate(container_network_receive_errors_total{namespace=~\"$namespace\",pod=~\"$pod\"}[$__rate_interval]))", + "interval": "", + "legendFormat": "Receive Errors", + "refId": "D" + }, + { + "expr": "sum(irate(container_network_transmit_errors_total{namespace=~\"$namespace\",pod=~\"$pod\"}[$__rate_interval]))", + "interval": "", + "legendFormat": "Transmit Errors", + "refId": "E" + }, + { + "expr": "sum(irate(container_network_transmit_packets_dropped_total{namespace=~\"$namespace\",pod=~\"$pod\"}[$__rate_interval])) OR sum(irate(windows_container_network_transmit_packets_dropped_total{namespace=~\"$namespace\",pod=~\"$pod\"}[$__rate_interval]))", + "interval": "", + "legendFormat": "Transmit Dropped", + "refId": "F" + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Network Traffic", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "decimals": 1, + "format": "pps", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": { + "": "#3797d5" + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": {} + }, + "overrides": [] + }, + "fill": 0, + "fillGradient": 0, + "gridPos": { + "h": 7, + "w": 8, + "x": 0, + "y": 7 + }, + "hiddenSeries": false, + "id": 8, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": false, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "nullPointMode": "null", + "percentage": false, + "pluginVersion": "7.1.5", + "pointradius": 2, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum(irate(container_network_receive_bytes_total{namespace=~\"$namespace\",pod=~\"$pod\"}[$__rate_interval])) OR sum(irate(windows_container_network_receive_bytes_total{namespace=~\"$namespace\",pod=~\"$pod\"}[$__rate_interval]))", + "interval": "", + "legendFormat": "Receive Total", + "refId": "A" + }, + { + "expr": "sum(irate(container_network_transmit_bytes_total{namespace=~\"$namespace\",pod=~\"$pod\"}[$__rate_interval])) OR sum(irate(windows_container_network_transmit_bytes_total{namespace=~\"$namespace\",pod=~\"$pod\"}[$__rate_interval]))", + "interval": "", + "legendFormat": "Transmit Total", + "refId": "B" + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Network I/O", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "decimals": 1, + "format": "Bps", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": { + "": "#3797d5" + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": {} + }, + "overrides": [] + }, + "fill": 0, + "fillGradient": 0, + "gridPos": { + "h": 7, + "w": 8, + "x": 8, + "y": 7 + }, + "hiddenSeries": false, + "id": 6, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": false, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "nullPointMode": "null", + "percentage": false, + "pluginVersion": "7.1.5", + "pointradius": 2, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum(rate(container_fs_writes_bytes_total{namespace=~\"$namespace\",pod=~\"$pod\",container!=\"\"}[$__rate_interval]))", + "interval": "", + "legendFormat": "Write", + "refId": "A" + }, + { + "expr": "sum(rate(container_fs_reads_bytes_total{namespace=~\"$namespace\",pod=~\"$pod\",container!=\"\"}[$__rate_interval]))", + "interval": "", + "legendFormat": "Read", + "refId": "B" + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Disk I/O", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "decimals": 1, + "format": "Bps", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + } + ], + "refresh": false, + "schemaVersion": 26, + "style": "dark", + "tags": [], + "templating": { + "list": [ + { + "current": { + "text": "Prometheus", + "value": "Prometheus" + }, + "hide": 0, + "label": "Data Source", + "name": "datasource", + "options": [ + + ], + "query": "prometheus", + "refresh": 1, + "regex": "", + "type": "datasource" + }, + { + "allValue": null, + "hide": 0, + "includeAll": false, + "label": null, + "multi": false, + "name": "namespace", + "query": "label_values(kube_pod_info{}, namespace)", + "refresh": 2, + "regex": "", + "sort": 0, + "tagValuesQuery": "", + "tagsQuery": "", + "type": "query", + "useTags": false + }, + { + "allValue": null, + "hide": 0, + "includeAll": false, + "label": null, + "multi": false, + "name": "pod", + "query": "label_values(kube_pod_info{namespace=\"$namespace\"}, pod)", + "refresh": 2, + "regex": "", + "sort": 0, + "tagValuesQuery": "", + "tagsQuery": "", + "type": "query", + "useTags": false + } + ] + }, + "time": { + "from": "now-1h", + "to": "now" + }, + "timepicker": { + "refresh_intervals": [ + "5s", + "10s", + "30s", + "1m", + "5m", + "15m", + "30m", + "1h", + "2h", + "1d" + ] + }, + "timezone": "", + "title": "Rancher / Pod", + "uid": "rancher-pod-1", + "version": 8 +} diff --git a/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/files/rancher/workloads/rancher-workload-pods.json b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/files/rancher/workloads/rancher-workload-pods.json new file mode 100644 index 0000000000..92c0d24a6e --- /dev/null +++ b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/files/rancher/workloads/rancher-workload-pods.json @@ -0,0 +1,652 @@ +{ + "annotations": { + "list": [ + { + "builtIn": 1, + "datasource": "-- Grafana --", + "enable": true, + "hide": true, + "iconColor": "rgba(0, 211, 255, 1)", + "name": "Annotations & Alerts", + "type": "dashboard" + } + ] + }, + "editable": true, + "gnetId": null, + "graphTooltip": 0, + "id": 28, + "iteration": 1618265214337, + "links": [], + "panels": [ + { + "aliasColors": { + "{{pod}}": "#3797d5" + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": {} + }, + "overrides": [] + }, + "fill": 0, + "fillGradient": 0, + "gridPos": { + "h": 7, + "w": 8, + "x": 0, + "y": 0 + }, + "hiddenSeries": false, + "id": 2, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": false, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "nullPointMode": "null", + "percentage": false, + "pluginVersion": "7.1.5", + "pointradius": 2, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "(sum(rate(container_cpu_cfs_throttled_seconds_total{namespace=~\"$namespace\",container=\"\"}[$__rate_interval])) by (pod)) * on(pod) kube_pod_info{namespace=~\"$namespace\", created_by_kind=\"$kind\", created_by_name=\"$workload\"}", + "interval": "", + "legendFormat": "CFS throttled ({{pod}})", + "refId": "A" + }, + { + "expr": "(sum(rate(container_cpu_system_seconds_total{namespace=~\"$namespace\",container=\"\"}[$__rate_interval])) by (pod) OR sum(rate(windows_container_cpu_usage_seconds_kernelmode{namespace=~\"$namespace\"}[$__rate_interval])) by (pod)) * on(pod) kube_pod_info{namespace=~\"$namespace\", created_by_kind=\"$kind\", created_by_name=\"$workload\"}", + "interval": "", + "legendFormat": "System ({{pod}})", + "refId": "B" + }, + { + "expr": "(sum(rate(container_cpu_usage_seconds_total{namespace=~\"$namespace\",container=\"\"}[$__rate_interval])) by (pod) OR sum(rate(windows_container_cpu_usage_seconds_total{namespace=~\"$namespace\"}[$__rate_interval])) by (pod)) * on(pod) kube_pod_info{namespace=~\"$namespace\", created_by_kind=\"$kind\", created_by_name=\"$workload\"}", + "interval": "", + "legendFormat": "Total ({{pod}})", + "refId": "C" + }, + { + "expr": "(sum(rate(container_cpu_user_seconds_total{namespace=~\"$namespace\",container=\"\"}[$__rate_interval])) by (pod) OR sum(rate(windows_container_cpu_usage_seconds_usermode{namespace=~\"$namespace\"}[$__rate_interval])) by (pod)) * on(pod) kube_pod_info{namespace=~\"$namespace\", created_by_kind=\"$kind\", created_by_name=\"$workload\"}", + "interval": "", + "legendFormat": "User ({{pod}})", + "refId": "D" + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "CPU Utilization", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "decimals": null, + "format": "cpu", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": { + "{{pod}}": "#3797d5" + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": {} + }, + "overrides": [] + }, + "fill": 0, + "fillGradient": 0, + "gridPos": { + "h": 7, + "w": 8, + "x": 8, + "y": 0 + }, + "hiddenSeries": false, + "id": 4, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": false, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "nullPointMode": "null", + "percentage": false, + "pluginVersion": "7.1.5", + "pointradius": 2, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "(sum(container_memory_working_set_bytes{namespace=~\"$namespace\",container=\"\"} OR windows_container_memory_usage_commit_bytes{namespace=~\"$namespace\"}) by (pod)) * on(pod) kube_pod_info{namespace=~\"$namespace\", created_by_kind=\"$kind\", created_by_name=\"$workload\"}", + "interval": "", + "legendFormat": "({{pod}})", + "refId": "A" + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Memory Utilization", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "decimals": 1, + "format": "bytes", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": { + "{{pod}}": "#3797d5" + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": {} + }, + "overrides": [] + }, + "fill": 0, + "fillGradient": 0, + "gridPos": { + "h": 7, + "w": 8, + "x": 16, + "y": 0 + }, + "hiddenSeries": false, + "id": 7, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": false, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "nullPointMode": "null", + "percentage": false, + "pluginVersion": "7.1.5", + "pointradius": 2, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "(sum(irate(container_network_receive_packets_total{namespace=~\"$namespace\"}[$__rate_interval])) by (pod) OR sum(irate(windows_container_network_receive_packets_total{namespace=~\"$namespace\"}[$__rate_interval])) by (pod)) * on(pod) kube_pod_info{namespace=~\"$namespace\", created_by_kind=\"$kind\", created_by_name=\"$workload\"}", + "interval": "", + "legendFormat": "Receive Total ({{pod}})", + "refId": "A" + }, + { + "expr": "(sum(irate(container_network_transmit_packets_total{namespace=~\"$namespace\"}[$__rate_interval])) by (pod) OR sum(irate(windows_container_network_transmit_packets_total{namespace=~\"$namespace\"}[$__rate_interval])) by (pod)) * on(pod) kube_pod_info{namespace=~\"$namespace\", created_by_kind=\"$kind\", created_by_name=\"$workload\"}", + "interval": "", + "legendFormat": "Transmit Total ({{pod}})", + "refId": "B" + }, + { + "expr": "(sum(irate(container_network_receive_packets_dropped_total{namespace=~\"$namespace\"}[$__rate_interval])) by (pod) OR sum(irate(windows_container_network_receive_packets_dropped_total{namespace=~\"$namespace\"}[$__rate_interval])) by (pod)) * on(pod) kube_pod_info{namespace=~\"$namespace\", created_by_kind=\"$kind\", created_by_name=\"$workload\"}", + "interval": "", + "legendFormat": "Receive Dropped ({{pod}})", + "refId": "C" + }, + { + "expr": "(sum(irate(container_network_receive_errors_total{namespace=~\"$namespace\"}[$__rate_interval])) by (pod)) * on(pod) kube_pod_info{namespace=~\"$namespace\", created_by_kind=\"$kind\", created_by_name=\"$workload\"}", + "interval": "", + "legendFormat": "Receive Errors ({{pod}})", + "refId": "D" + }, + { + "expr": "(sum(irate(container_network_transmit_errors_total{namespace=~\"$namespace\"}[$__rate_interval])) by (pod)) * on(pod) kube_pod_info{namespace=~\"$namespace\", created_by_kind=\"$kind\", created_by_name=\"$workload\"}", + "interval": "", + "legendFormat": "Transmit Errors ({{pod}})", + "refId": "E" + }, + { + "expr": "(sum(irate(container_network_transmit_packets_dropped_total{namespace=~\"$namespace\"}[$__rate_interval])) by (pod) OR sum(irate(windows_container_network_transmit_packets_dropped_total{namespace=~\"$namespace\"}[$__rate_interval])) by (pod)) * on(pod) kube_pod_info{namespace=~\"$namespace\", created_by_kind=\"$kind\", created_by_name=\"$workload\"}", + "interval": "", + "legendFormat": "Transmit Dropped ({{pod}})", + "refId": "F" + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Network Traffic", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "decimals": 1, + "format": "pps", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": { + "{{pod}}": "#3797d5" + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": {} + }, + "overrides": [] + }, + "fill": 0, + "fillGradient": 0, + "gridPos": { + "h": 7, + "w": 8, + "x": 0, + "y": 7 + }, + "hiddenSeries": false, + "id": 8, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": false, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "nullPointMode": "null", + "percentage": false, + "pluginVersion": "7.1.5", + "pointradius": 2, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "(sum(irate(container_network_receive_bytes_total{namespace=~\"$namespace\"}[$__rate_interval])) by (pod) OR sum(irate(windows_container_network_receive_bytes_total{namespace=~\"$namespace\"}[$__rate_interval])) by (pod)) * on(pod) kube_pod_info{namespace=~\"$namespace\", created_by_kind=\"$kind\", created_by_name=\"$workload\"}", + "interval": "", + "legendFormat": "Receive Total ({{pod}})", + "refId": "A" + }, + { + "expr": "(sum(irate(container_network_transmit_bytes_total{namespace=~\"$namespace\"}[$__rate_interval])) by (pod) OR sum(irate(windows_container_network_transmit_bytes_total{namespace=~\"$namespace\"}[$__rate_interval])) by (pod)) * on(pod) kube_pod_info{namespace=~\"$namespace\", created_by_kind=\"$kind\", created_by_name=\"$workload\"}", + "interval": "", + "legendFormat": "Transmit Total ({{pod}})", + "refId": "B" + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Network I/O", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "decimals": 1, + "format": "Bps", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": { + "{{pod}}": "#3797d5" + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": {} + }, + "overrides": [] + }, + "fill": 0, + "fillGradient": 0, + "gridPos": { + "h": 7, + "w": 8, + "x": 8, + "y": 7 + }, + "hiddenSeries": false, + "id": 6, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": false, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "nullPointMode": "null", + "percentage": false, + "pluginVersion": "7.1.5", + "pointradius": 2, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "(sum(rate(container_fs_writes_bytes_total{namespace=~\"$namespace\"}[$__rate_interval])) by (pod)) * on(pod) kube_pod_info{namespace=~\"$namespace\", created_by_kind=\"$kind\", created_by_name=\"$workload\"}", + "interval": "", + "legendFormat": "Write ({{pod}})", + "refId": "A" + }, + { + "expr": "(sum(rate(container_fs_reads_bytes_total{namespace=~\"$namespace\"}[$__rate_interval])) by (pod)) * on(pod) kube_pod_info{namespace=~\"$namespace\", created_by_kind=\"$kind\", created_by_name=\"$workload\"}", + "interval": "", + "legendFormat": "Read ({{pod}})", + "refId": "B" + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Disk I/O", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "decimals": 1, + "format": "Bps", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + } + ], + "refresh": false, + "schemaVersion": 26, + "style": "dark", + "tags": [], + "templating": { + "list": [ + { + "current": { + "text": "Prometheus", + "value": "Prometheus" + }, + "hide": 0, + "label": "Data Source", + "name": "datasource", + "options": [ + + ], + "query": "prometheus", + "refresh": 1, + "regex": "", + "type": "datasource" + }, + { + "allValue": null, + "hide": 0, + "includeAll": false, + "label": null, + "multi": false, + "name": "namespace", + "query": "query_result(kube_pod_info{namespace!=\"\"} * on(pod) group_right(namespace, created_by_kind, created_by_name) count({__name__=~\"container_.*|windows_container_.*\", pod!=\"\"}) by (pod))", + "refresh": 2, + "regex": "/.*namespace=\"([^\"]*)\"/", + "sort": 0, + "tagValuesQuery": "", + "tagsQuery": "", + "type": "query", + "useTags": false + }, + { + "allValue": null, + "hide": 0, + "includeAll": false, + "label": null, + "multi": false, + "name": "kind", + "query": "query_result(kube_pod_info{namespace=\"$namespace\", created_by_kind!=\"\"} * on(pod) group_right(namespace, created_by_kind, created_by_name) count({__name__=~\"container_.*|windows_container_.*\", pod!=\"\"}) by (pod))", + "refresh": 2, + "regex": "/.*created_by_kind=\"([^\"]*)\"/", + "sort": 0, + "tagValuesQuery": "", + "tagsQuery": "", + "type": "query", + "useTags": false + }, + { + "allValue": null, + "hide": 0, + "includeAll": false, + "label": null, + "multi": false, + "name": "workload", + "query": "query_result(kube_pod_info{namespace=\"$namespace\", created_by_kind=\"$kind\", created_by_name!=\"\"} * on(pod) group_right(namespace, created_by_kind, created_by_name) count({__name__=~\"container_.*|windows_container_.*\", pod!=\"\"}) by (pod))", + "refresh": 2, + "regex": "/.*created_by_name=\"([^\"]*)\"/", + "sort": 0, + "tagValuesQuery": "", + "tagsQuery": "", + "type": "query", + "useTags": false + } + ] + }, + "time": { + "from": "now-1h", + "to": "now" + }, + "timepicker": { + "refresh_intervals": [ + "5s", + "10s", + "30s", + "1m", + "5m", + "15m", + "30m", + "1h", + "2h", + "1d" + ] + }, + "timezone": "", + "title": "Rancher / Workload (Pods)", + "uid": "rancher-workload-pods-1", + "version": 8 +} diff --git a/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/files/rancher/workloads/rancher-workload.json b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/files/rancher/workloads/rancher-workload.json new file mode 100644 index 0000000000..9f5317c2f0 --- /dev/null +++ b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/files/rancher/workloads/rancher-workload.json @@ -0,0 +1,652 @@ +{ + "annotations": { + "list": [ + { + "builtIn": 1, + "datasource": "-- Grafana --", + "enable": true, + "hide": true, + "iconColor": "rgba(0, 211, 255, 1)", + "name": "Annotations & Alerts", + "type": "dashboard" + } + ] + }, + "editable": true, + "gnetId": null, + "graphTooltip": 0, + "id": 28, + "iteration": 1618265214337, + "links": [], + "panels": [ + { + "aliasColors": { + "{{pod}}": "#3797d5" + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": {} + }, + "overrides": [] + }, + "fill": 0, + "fillGradient": 0, + "gridPos": { + "h": 7, + "w": 8, + "x": 0, + "y": 0 + }, + "hiddenSeries": false, + "id": 2, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": false, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "nullPointMode": "null", + "percentage": false, + "pluginVersion": "7.1.5", + "pointradius": 2, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum((sum(rate(container_cpu_cfs_throttled_seconds_total{namespace=~\"$namespace\"}[$__rate_interval])) by (pod)) * on(pod) kube_pod_info{namespace=~\"$namespace\", created_by_kind=\"$kind\", created_by_name=\"$workload\"})", + "interval": "", + "legendFormat": "CFS throttled", + "refId": "A" + }, + { + "expr": "sum((sum(rate(container_cpu_system_seconds_total{namespace=~\"$namespace\"}[$__rate_interval])) by (pod) OR sum(rate(windows_container_cpu_usage_seconds_kernelmode{namespace=~\"$namespace\"}[$__rate_interval])) by (pod)) * on(pod) kube_pod_info{namespace=~\"$namespace\", created_by_kind=\"$kind\", created_by_name=\"$workload\"})", + "interval": "", + "legendFormat": "System", + "refId": "B" + }, + { + "expr": "sum((sum(rate(container_cpu_usage_seconds_total{namespace=~\"$namespace\"}[$__rate_interval])) by (pod) OR sum(rate(windows_container_cpu_usage_seconds_total{namespace=~\"$namespace\"}[$__rate_interval])) by (pod)) * on(pod) kube_pod_info{namespace=~\"$namespace\", created_by_kind=\"$kind\", created_by_name=\"$workload\"})", + "interval": "", + "legendFormat": "Total", + "refId": "C" + }, + { + "expr": "sum((sum(rate(container_cpu_user_seconds_total{namespace=~\"$namespace\"}[$__rate_interval])) by (pod) OR sum(rate(windows_container_cpu_usage_seconds_usermode{namespace=~\"$namespace\"}[$__rate_interval])) by (pod)) * on(pod) kube_pod_info{namespace=~\"$namespace\", created_by_kind=\"$kind\", created_by_name=\"$workload\"})", + "interval": "", + "legendFormat": "User", + "refId": "D" + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "CPU Utilization", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "decimals": null, + "format": "cpu", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": { + "{{pod}}": "#3797d5" + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": {} + }, + "overrides": [] + }, + "fill": 0, + "fillGradient": 0, + "gridPos": { + "h": 7, + "w": 8, + "x": 8, + "y": 0 + }, + "hiddenSeries": false, + "id": 4, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": false, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "nullPointMode": "null", + "percentage": false, + "pluginVersion": "7.1.5", + "pointradius": 2, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum((sum(container_memory_working_set_bytes{namespace=~\"$namespace\"} OR windows_container_memory_usage_commit_bytes{namespace=~\"$namespace\"}) by (pod)) * on(pod) kube_pod_info{namespace=~\"$namespace\", created_by_kind=\"$kind\", created_by_name=\"$workload\"})", + "interval": "", + "legendFormat": "Total", + "refId": "A" + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Memory Utilization", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "decimals": 1, + "format": "bytes", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": { + "{{pod}}": "#3797d5" + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": {} + }, + "overrides": [] + }, + "fill": 0, + "fillGradient": 0, + "gridPos": { + "h": 7, + "w": 8, + "x": 16, + "y": 0 + }, + "hiddenSeries": false, + "id": 7, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": false, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "nullPointMode": "null", + "percentage": false, + "pluginVersion": "7.1.5", + "pointradius": 2, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum((sum(irate(container_network_receive_packets_total{namespace=~\"$namespace\"}[$__rate_interval])) by (pod) OR sum(irate(windows_container_network_receive_packets_total{namespace=~\"$namespace\"}[$__rate_interval])) by (pod)) * on(pod) kube_pod_info{namespace=~\"$namespace\", created_by_kind=\"$kind\", created_by_name=\"$workload\"})", + "interval": "", + "legendFormat": "Receive Total", + "refId": "A" + }, + { + "expr": "sum((sum(irate(container_network_transmit_packets_total{namespace=~\"$namespace\"}[$__rate_interval])) by (pod) OR sum(irate(windows_container_network_transmit_packets_total{namespace=~\"$namespace\"}[$__rate_interval])) by (pod)) * on(pod) kube_pod_info{namespace=~\"$namespace\", created_by_kind=\"$kind\", created_by_name=\"$workload\"})", + "interval": "", + "legendFormat": "Transmit Total", + "refId": "B" + }, + { + "expr": "sum((sum(irate(container_network_receive_packets_dropped_total{namespace=~\"$namespace\"}[$__rate_interval])) by (pod) OR sum(irate(windows_container_network_receive_packets_dropped_total{namespace=~\"$namespace\"}[$__rate_interval])) by (pod)) * on(pod) kube_pod_info{namespace=~\"$namespace\", created_by_kind=\"$kind\", created_by_name=\"$workload\"})", + "interval": "", + "legendFormat": "Receive Dropped", + "refId": "C" + }, + { + "expr": "sum((sum(irate(container_network_receive_errors_total{namespace=~\"$namespace\"}[$__rate_interval])) by (pod)) * on(pod) kube_pod_info{namespace=~\"$namespace\", created_by_kind=\"$kind\", created_by_name=\"$workload\"})", + "interval": "", + "legendFormat": "Receive Errors", + "refId": "D" + }, + { + "expr": "sum((sum(irate(container_network_transmit_errors_total{namespace=~\"$namespace\"}[$__rate_interval])) by (pod)) * on(pod) kube_pod_info{namespace=~\"$namespace\", created_by_kind=\"$kind\", created_by_name=\"$workload\"})", + "interval": "", + "legendFormat": "Transmit Errors", + "refId": "E" + }, + { + "expr": "sum((sum(irate(container_network_transmit_packets_dropped_total{namespace=~\"$namespace\"}[$__rate_interval])) by (pod) OR sum(irate(windows_container_network_transmit_packets_dropped_total{namespace=~\"$namespace\"}[$__rate_interval])) by (pod)) * on(pod) kube_pod_info{namespace=~\"$namespace\", created_by_kind=\"$kind\", created_by_name=\"$workload\"})", + "interval": "", + "legendFormat": "Transmit Dropped", + "refId": "F" + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Network Traffic", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "decimals": 1, + "format": "pps", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": { + "{{pod}}": "#3797d5" + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": {} + }, + "overrides": [] + }, + "fill": 0, + "fillGradient": 0, + "gridPos": { + "h": 7, + "w": 8, + "x": 0, + "y": 7 + }, + "hiddenSeries": false, + "id": 8, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": false, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "nullPointMode": "null", + "percentage": false, + "pluginVersion": "7.1.5", + "pointradius": 2, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum((sum(irate(container_network_receive_bytes_total{namespace=~\"$namespace\"}[$__rate_interval])) by (pod) OR sum(irate(windows_container_network_receive_bytes_total{namespace=~\"$namespace\"}[$__rate_interval])) by (pod)) * on(pod) kube_pod_info{namespace=~\"$namespace\", created_by_kind=\"$kind\", created_by_name=\"$workload\"})", + "interval": "", + "legendFormat": "Receive Total", + "refId": "A" + }, + { + "expr": "sum((sum(irate(container_network_transmit_bytes_total{namespace=~\"$namespace\"}[$__rate_interval])) by (pod) OR sum(irate(windows_container_network_transmit_bytes_total{namespace=~\"$namespace\"}[$__rate_interval])) by (pod)) * on(pod) kube_pod_info{namespace=~\"$namespace\", created_by_kind=\"$kind\", created_by_name=\"$workload\"})", + "interval": "", + "legendFormat": "Transmit Total", + "refId": "B" + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Network I/O", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "decimals": 1, + "format": "Bps", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": { + "{{pod}}": "#3797d5" + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": {} + }, + "overrides": [] + }, + "fill": 0, + "fillGradient": 0, + "gridPos": { + "h": 7, + "w": 8, + "x": 8, + "y": 7 + }, + "hiddenSeries": false, + "id": 6, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": false, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "nullPointMode": "null", + "percentage": false, + "pluginVersion": "7.1.5", + "pointradius": 2, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum((sum(rate(container_fs_writes_bytes_total{namespace=~\"$namespace\"}[$__rate_interval])) by (pod)) * on(pod) kube_pod_info{namespace=~\"$namespace\", created_by_kind=\"$kind\", created_by_name=\"$workload\"})", + "interval": "", + "legendFormat": "Write", + "refId": "A" + }, + { + "expr": "sum((sum(rate(container_fs_reads_bytes_total{namespace=~\"$namespace\"}[$__rate_interval])) by (pod)) * on(pod) kube_pod_info{namespace=~\"$namespace\", created_by_kind=\"$kind\", created_by_name=\"$workload\"})", + "interval": "", + "legendFormat": "Read", + "refId": "B" + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Disk I/O", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "decimals": 1, + "format": "Bps", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + } + ], + "refresh": false, + "schemaVersion": 26, + "style": "dark", + "tags": [], + "templating": { + "list": [ + { + "current": { + "text": "Prometheus", + "value": "Prometheus" + }, + "hide": 0, + "label": "Data Source", + "name": "datasource", + "options": [ + + ], + "query": "prometheus", + "refresh": 1, + "regex": "", + "type": "datasource" + }, + { + "allValue": null, + "hide": 0, + "includeAll": false, + "label": null, + "multi": false, + "name": "namespace", + "query": "query_result(kube_pod_info{namespace!=\"\"} * on(pod) group_right(namespace, created_by_kind, created_by_name) count({__name__=~\"container_.*|windows_container_.*\", pod!=\"\"}) by (pod))", + "refresh": 2, + "regex": "/.*namespace=\"([^\"]*)\"/", + "sort": 0, + "tagValuesQuery": "", + "tagsQuery": "", + "type": "query", + "useTags": false + }, + { + "allValue": null, + "hide": 0, + "includeAll": false, + "label": null, + "multi": false, + "name": "kind", + "query": "query_result(kube_pod_info{namespace=\"$namespace\", created_by_kind!=\"\"} * on(pod) group_right(namespace, created_by_kind, created_by_name) count({__name__=~\"container_.*|windows_container_.*\", pod!=\"\"}) by (pod))", + "refresh": 2, + "regex": "/.*created_by_kind=\"([^\"]*)\"/", + "sort": 0, + "tagValuesQuery": "", + "tagsQuery": "", + "type": "query", + "useTags": false + }, + { + "allValue": null, + "hide": 0, + "includeAll": false, + "label": null, + "multi": false, + "name": "workload", + "query": "query_result(kube_pod_info{namespace=\"$namespace\", created_by_kind=\"$kind\", created_by_name!=\"\"} * on(pod) group_right(namespace, created_by_kind, created_by_name) count({__name__=~\"container_.*|windows_container_.*\", pod!=\"\"}) by (pod))", + "refresh": 2, + "regex": "/.*created_by_name=\"([^\"]*)\"/", + "sort": 0, + "tagValuesQuery": "", + "tagsQuery": "", + "type": "query", + "useTags": false + } + ] + }, + "time": { + "from": "now-1h", + "to": "now" + }, + "timepicker": { + "refresh_intervals": [ + "5s", + "10s", + "30s", + "1m", + "5m", + "15m", + "30m", + "1h", + "2h", + "1d" + ] + }, + "timezone": "", + "title": "Rancher / Workload", + "uid": "rancher-workload-1", + "version": 8 +} diff --git a/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/files/upgrade/scripts/delete-workloads-with-old-labels.sh b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/files/upgrade/scripts/delete-workloads-with-old-labels.sh new file mode 100644 index 0000000000..89431e7132 --- /dev/null +++ b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/files/upgrade/scripts/delete-workloads-with-old-labels.sh @@ -0,0 +1,14 @@ +#!/bin/bash + +set -e +set -x + +# node-exporter +kubectl delete daemonset -l app=prometheus-node-exporter,release=rancher-monitoring --ignore-not-found=true + +# prometheus-adapter +kubectl delete deployments -l app=prometheus-adapter,release=rancher-monitoring --ignore-not-found=true + +# kube-state-metrics +kubectl delete deployments -l app.kubernetes.io/instance=rancher-monitoring,app.kubernetes.io/name=kube-state-metrics --cascade=orphan --ignore-not-found=true +kubectl delete statefulsets -l app.kubernetes.io/instance=rancher-monitoring,app.kubernetes.io/name=kube-state-metrics --cascade=orphan --ignore-not-found=true diff --git a/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/templates/NOTES.txt b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/templates/NOTES.txt new file mode 100644 index 0000000000..371f3ae398 --- /dev/null +++ b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/templates/NOTES.txt @@ -0,0 +1,4 @@ +{{ $.Chart.Name }} has been installed. Check its status by running: + kubectl --namespace {{ template "kube-prometheus-stack.namespace" . }} get pods -l "release={{ $.Release.Name }}" + +Visit https://github.com/prometheus-operator/kube-prometheus for instructions on how to create & configure Alertmanager and Prometheus instances using the Operator. diff --git a/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/templates/_helpers.tpl b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/templates/_helpers.tpl new file mode 100644 index 0000000000..d2207dd90d --- /dev/null +++ b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/templates/_helpers.tpl @@ -0,0 +1,459 @@ +# Rancher +{{- define "system_default_registry" -}} +{{- if .Values.global.cattle.systemDefaultRegistry -}} +{{- printf "%s/" .Values.global.cattle.systemDefaultRegistry -}} +{{- end -}} +{{- end -}} + +{{- define "monitoring_registry" -}} + {{- $temp_registry := (include "system_default_registry" .) -}} + {{- if $temp_registry -}} + {{- trimSuffix "/" $temp_registry -}} + {{- else -}} + {{- .Values.global.imageRegistry -}} + {{- end -}} +{{- end -}} + +{{/* +https://github.com/helm/helm/issues/4535#issuecomment-477778391 +Usage: {{ include "call-nested" (list . "SUBCHART_NAME" "TEMPLATE") }} +e.g. {{ include "call-nested" (list . "grafana" "grafana.fullname") }} +*/}} +{{- define "call-nested" }} +{{- $dot := index . 0 }} +{{- $subchart := index . 1 | splitList "." }} +{{- $template := index . 2 }} +{{- $values := $dot.Values }} +{{- range $subchart }} +{{- $values = index $values . }} +{{- end }} +{{- include $template (dict "Chart" (dict "Name" (last $subchart)) "Values" $values "Release" $dot.Release "Capabilities" $dot.Capabilities) }} +{{- end }} + +# Special Exporters +{{- define "exporter.kubeEtcd.enabled" -}} +{{- if or .Values.kubeEtcd.enabled .Values.rkeEtcd.enabled .Values.kubeAdmEtcd.enabled .Values.rke2Etcd.enabled -}} +"true" +{{- end -}} +{{- end }} + +{{- define "exporter.kubeControllerManager.enabled" -}} +{{- if or .Values.kubeControllerManager.enabled .Values.rkeControllerManager.enabled .Values.k3sServer.enabled .Values.kubeAdmControllerManager.enabled .Values.rke2ControllerManager.enabled -}} +"true" +{{- end -}} +{{- end }} + +{{- define "exporter.kubeScheduler.enabled" -}} +{{- if or .Values.kubeScheduler.enabled .Values.rkeScheduler.enabled .Values.k3sServer.enabled .Values.kubeAdmScheduler.enabled .Values.rke2Scheduler.enabled -}} +"true" +{{- end -}} +{{- end }} + +{{- define "exporter.kubeProxy.enabled" -}} +{{- if or .Values.kubeProxy.enabled .Values.rkeProxy.enabled .Values.k3sServer.enabled .Values.kubeAdmProxy.enabled .Values.rke2Proxy.enabled -}} +"true" +{{- end -}} +{{- end }} + +{{- define "exporter.kubelet.enabled" -}} +{{- if or .Values.kubelet.enabled .Values.hardenedKubelet.enabled .Values.k3sServer.enabled -}} +"true" +{{- end -}} +{{- end }} + +{{- define "exporter.kubeControllerManager.jobName" -}} +{{- if .Values.k3sServer.enabled -}} +k3s-server +{{- else -}} +kube-controller-manager +{{- end -}} +{{- end }} + +{{- define "exporter.kubeScheduler.jobName" -}} +{{- if .Values.k3sServer.enabled -}} +k3s-server +{{- else -}} +kube-scheduler +{{- end -}} +{{- end }} + +{{- define "exporter.kubeProxy.jobName" -}} +{{- if .Values.k3sServer.enabled -}} +k3s-server +{{- else -}} +kube-proxy +{{- end -}} +{{- end }} + +{{- define "exporter.kubelet.jobName" -}} +{{- if .Values.k3sServer.enabled -}} +k3s-server +{{- else -}} +kubelet +{{- end -}} +{{- end }} + +{{- define "kubelet.serviceMonitor.resourcePath" -}} +{{- $kubeTargetVersion := default .Capabilities.KubeVersion.GitVersion .Values.kubeTargetVersionOverride }} +{{- if not (eq .Values.kubelet.serviceMonitor.resourcePath "/metrics/resource/v1alpha1") -}} +{{ .Values.kubelet.serviceMonitor.resourcePath }} +{{- else if semverCompare ">=1.20.0-0" $kubeTargetVersion -}} +/metrics/resource +{{- else -}} +/metrics/resource/v1alpha1 +{{- end -}} +{{- end }} + +{{- define "rancher.serviceMonitor.selector" -}} +{{- if .Values.rancherMonitoring.selector }} +{{ .Values.rancherMonitoring.selector | toYaml }} +{{- else }} +{{- $rancherDeployment := (lookup "apps/v1" "Deployment" "cattle-system" "rancher") }} +{{- if $rancherDeployment }} +matchLabels: + app: rancher + chart: {{ index $rancherDeployment.metadata.labels "chart" }} + release: rancher +{{- end }} +{{- end }} +{{- end }} + +# Windows Support + +{{/* +Windows cluster will add default taint for linux nodes, +add below linux tolerations to workloads could be scheduled to those linux nodes +*/}} + +{{- define "linux-node-tolerations" -}} +- key: "cattle.io/os" + value: "linux" + effect: "NoSchedule" + operator: "Equal" +{{- end -}} + +{{- define "linux-node-selector" -}} +{{- if semverCompare "<1.14-0" .Capabilities.KubeVersion.GitVersion -}} +beta.kubernetes.io/os: linux +{{- else -}} +kubernetes.io/os: linux +{{- end -}} +{{- end -}} + +# Prometheus Operator + +{{/* vim: set filetype=mustache: */}} +{{/* Expand the name of the chart. This is suffixed with -alertmanager, which means subtract 13 from longest 63 available */}} +{{- define "kube-prometheus-stack.name" -}} +{{- default .Chart.Name .Values.nameOverride | trunc 50 | trimSuffix "-" -}} +{{- end }} + +{{/* +Create a default fully qualified app name. +We truncate at 63 chars because some Kubernetes name fields are limited to this (by the DNS naming spec). +If release name contains chart name it will be used as a full name. +The components in this chart create additional resources that expand the longest created name strings. +The longest name that gets created adds and extra 37 characters, so truncation should be 63-35=26. +*/}} +{{- define "kube-prometheus-stack.fullname" -}} +{{- if .Values.fullnameOverride -}} +{{- .Values.fullnameOverride | trunc 26 | trimSuffix "-" -}} +{{- else -}} +{{- $name := default .Chart.Name .Values.nameOverride -}} +{{- if contains $name .Release.Name -}} +{{- .Release.Name | trunc 26 | trimSuffix "-" -}} +{{- else -}} +{{- printf "%s-%s" .Release.Name $name | trunc 26 | trimSuffix "-" -}} +{{- end -}} +{{- end -}} +{{- end -}} + +{{/* Fullname suffixed with -operator */}} +{{/* Adding 9 to 26 truncation of kube-prometheus-stack.fullname */}} +{{- define "kube-prometheus-stack.operator.fullname" -}} +{{- if .Values.prometheusOperator.fullnameOverride -}} +{{- .Values.prometheusOperator.fullnameOverride | trunc 35 | trimSuffix "-" -}} +{{- else -}} +{{- printf "%s-operator" (include "kube-prometheus-stack.fullname" .) -}} +{{- end }} +{{- end }} + +{{/* Prometheus custom resource instance name */}} +{{- define "kube-prometheus-stack.prometheus.crname" -}} +{{- if .Values.cleanPrometheusOperatorObjectNames }} +{{- include "kube-prometheus-stack.fullname" . }} +{{- else }} +{{- print (include "kube-prometheus-stack.fullname" .) "-prometheus" }} +{{- end }} +{{- end }} + +{{/* Prometheus apiVersion for networkpolicy */}} +{{- define "kube-prometheus-stack.prometheus.networkPolicy.apiVersion" -}} +{{- print "networking.k8s.io/v1" -}} +{{- end }} + +{{/* Alertmanager custom resource instance name */}} +{{- define "kube-prometheus-stack.alertmanager.crname" -}} +{{- if .Values.cleanPrometheusOperatorObjectNames }} +{{- include "kube-prometheus-stack.fullname" . }} +{{- else }} +{{- print (include "kube-prometheus-stack.fullname" .) "-alertmanager" -}} +{{- end }} +{{- end }} + +{{/* Fullname suffixed with thanos-ruler */}} +{{- define "kube-prometheus-stack.thanosRuler.fullname" -}} +{{- printf "%s-thanos-ruler" (include "kube-prometheus-stack.fullname" .) -}} +{{- end }} + +{{/* Shortened name suffixed with thanos-ruler */}} +{{- define "kube-prometheus-stack.thanosRuler.name" -}} +{{- default (printf "%s-thanos-ruler" (include "kube-prometheus-stack.name" .)) .Values.thanosRuler.name -}} +{{- end }} + + +{{/* Create chart name and version as used by the chart label. */}} +{{- define "kube-prometheus-stack.chartref" -}} +{{- replace "+" "_" .Chart.Version | printf "%s-%s" .Chart.Name -}} +{{- end }} + +{{/* Generate basic labels */}} +{{- define "kube-prometheus-stack.labels" }} +app.kubernetes.io/managed-by: {{ .Release.Service }} +app.kubernetes.io/instance: {{ .Release.Name }} +app.kubernetes.io/version: "{{ replace "+" "_" .Chart.Version }}" +app.kubernetes.io/part-of: {{ template "kube-prometheus-stack.name" . }} +chart: {{ template "kube-prometheus-stack.chartref" . }} +release: {{ $.Release.Name | quote }} +heritage: {{ $.Release.Service | quote }} +{{- if .Values.commonLabels}} +{{ toYaml .Values.commonLabels }} +{{- end }} +{{- end }} + +{{/* Create the name of kube-prometheus-stack service account to use */}} +{{- define "kube-prometheus-stack.operator.serviceAccountName" -}} +{{- if .Values.prometheusOperator.serviceAccount.create -}} + {{ default (include "kube-prometheus-stack.operator.fullname" .) .Values.prometheusOperator.serviceAccount.name }} +{{- else -}} + {{ default "default" .Values.prometheusOperator.serviceAccount.name }} +{{- end -}} +{{- end -}} + +{{/* Create the name of kube-prometheus-stack service account to use */}} +{{- define "kube-prometheus-stack.operator.admissionWebhooks.serviceAccountName" -}} +{{- if .Values.prometheusOperator.serviceAccount.create -}} + {{ default (printf "%s-webhook" (include "kube-prometheus-stack.operator.fullname" .)) .Values.prometheusOperator.admissionWebhooks.deployment.serviceAccount.name }} +{{- else -}} + {{ default "default" .Values.prometheusOperator.admissionWebhooks.deployment.serviceAccount.name }} +{{- end -}} +{{- end -}} + +{{/* Create the name of prometheus service account to use */}} +{{- define "kube-prometheus-stack.prometheus.serviceAccountName" -}} +{{- if .Values.prometheus.serviceAccount.create -}} + {{ default (print (include "kube-prometheus-stack.fullname" .) "-prometheus") .Values.prometheus.serviceAccount.name }} +{{- else -}} + {{ default "default" .Values.prometheus.serviceAccount.name }} +{{- end -}} +{{- end -}} + +{{/* Create the name of alertmanager service account to use */}} +{{- define "kube-prometheus-stack.alertmanager.serviceAccountName" -}} +{{- if .Values.alertmanager.serviceAccount.create -}} + {{ default (print (include "kube-prometheus-stack.fullname" .) "-alertmanager") .Values.alertmanager.serviceAccount.name }} +{{- else -}} + {{ default "default" .Values.alertmanager.serviceAccount.name }} +{{- end -}} +{{- end -}} + +{{/* Create the name of thanosRuler service account to use */}} +{{- define "kube-prometheus-stack.thanosRuler.serviceAccountName" -}} +{{- if .Values.thanosRuler.serviceAccount.create -}} + {{ default (include "kube-prometheus-stack.thanosRuler.name" .) .Values.thanosRuler.serviceAccount.name }} +{{- else -}} + {{ default "default" .Values.thanosRuler.serviceAccount.name }} +{{- end -}} +{{- end -}} + +{{/* +Allow the release namespace to be overridden for multi-namespace deployments in combined charts +*/}} +{{- define "kube-prometheus-stack.namespace" -}} + {{- if .Values.namespaceOverride -}} + {{- .Values.namespaceOverride -}} + {{- else -}} + {{- .Release.Namespace -}} + {{- end -}} +{{- end -}} + +{{/* +Use the grafana namespace override for multi-namespace deployments in combined charts +*/}} +{{- define "kube-prometheus-stack-grafana.namespace" -}} + {{- if .Values.grafana.namespaceOverride -}} + {{- .Values.grafana.namespaceOverride -}} + {{- else -}} + {{- .Release.Namespace -}} + {{- end -}} +{{- end -}} + +{{/* +Allow kube-state-metrics job name to be overridden +*/}} +{{- define "kube-prometheus-stack-kube-state-metrics.name" -}} + {{- if index .Values "kube-state-metrics" "nameOverride" -}} + {{- index .Values "kube-state-metrics" "nameOverride" -}} + {{- else -}} + {{- print "kube-state-metrics" -}} + {{- end -}} +{{- end -}} + +{{/* +Use the kube-state-metrics namespace override for multi-namespace deployments in combined charts +*/}} +{{- define "kube-prometheus-stack-kube-state-metrics.namespace" -}} + {{- if index .Values "kube-state-metrics" "namespaceOverride" -}} + {{- index .Values "kube-state-metrics" "namespaceOverride" -}} + {{- else -}} + {{- .Release.Namespace -}} + {{- end -}} +{{- end -}} + +{{/* +Use the prometheus-node-exporter namespace override for multi-namespace deployments in combined charts +*/}} +{{- define "kube-prometheus-stack-prometheus-node-exporter.namespace" -}} + {{- if index .Values "prometheus-node-exporter" "namespaceOverride" -}} + {{- index .Values "prometheus-node-exporter" "namespaceOverride" -}} + {{- else -}} + {{- .Release.Namespace -}} + {{- end -}} +{{- end -}} + +{{/* Allow KubeVersion to be overridden. */}} +{{- define "kube-prometheus-stack.kubeVersion" -}} + {{- default .Capabilities.KubeVersion.Version .Values.kubeVersionOverride -}} +{{- end -}} + +{{/* Get Ingress API Version */}} +{{- define "kube-prometheus-stack.ingress.apiVersion" -}} + {{- if and (.Capabilities.APIVersions.Has "networking.k8s.io/v1") (semverCompare ">= 1.19-0" (include "kube-prometheus-stack.kubeVersion" .)) -}} + {{- print "networking.k8s.io/v1" -}} + {{- else if .Capabilities.APIVersions.Has "networking.k8s.io/v1beta1" -}} + {{- print "networking.k8s.io/v1beta1" -}} + {{- else -}} + {{- print "extensions/v1beta1" -}} + {{- end -}} +{{- end -}} + +{{/* Check Ingress stability */}} +{{- define "kube-prometheus-stack.ingress.isStable" -}} + {{- eq (include "kube-prometheus-stack.ingress.apiVersion" .) "networking.k8s.io/v1" -}} +{{- end -}} + +{{/* Check Ingress supports pathType */}} +{{/* pathType was added to networking.k8s.io/v1beta1 in Kubernetes 1.18 */}} +{{- define "kube-prometheus-stack.ingress.supportsPathType" -}} + {{- or (eq (include "kube-prometheus-stack.ingress.isStable" .) "true") (and (eq (include "kube-prometheus-stack.ingress.apiVersion" .) "networking.k8s.io/v1beta1") (semverCompare ">= 1.18-0" (include "kube-prometheus-stack.kubeVersion" .))) -}} +{{- end -}} + +{{/* Get Policy API Version */}} +{{- define "kube-prometheus-stack.pdb.apiVersion" -}} + {{- if and (.Capabilities.APIVersions.Has "policy/v1") (semverCompare ">= 1.21-0" (include "kube-prometheus-stack.kubeVersion" .)) -}} + {{- print "policy/v1" -}} + {{- else -}} + {{- print "policy/v1beta1" -}} + {{- end -}} + {{- end -}} + +{{/* Get value based on current Kubernetes version */}} +{{- define "kube-prometheus-stack.kubeVersionDefaultValue" -}} + {{- $values := index . 0 -}} + {{- $kubeVersion := index . 1 -}} + {{- $old := index . 2 -}} + {{- $new := index . 3 -}} + {{- $default := index . 4 -}} + {{- if kindIs "invalid" $default -}} + {{- if semverCompare $kubeVersion (include "kube-prometheus-stack.kubeVersion" $values) -}} + {{- print $new -}} + {{- else -}} + {{- print $old -}} + {{- end -}} + {{- else -}} + {{- print $default }} + {{- end -}} +{{- end -}} + +{{/* Get value for kube-controller-manager depending on insecure scraping availability */}} +{{- define "kube-prometheus-stack.kubeControllerManager.insecureScrape" -}} + {{- $values := index . 0 -}} + {{- $insecure := index . 1 -}} + {{- $secure := index . 2 -}} + {{- $userValue := index . 3 -}} + {{- include "kube-prometheus-stack.kubeVersionDefaultValue" (list $values ">= 1.22-0" $insecure $secure $userValue) -}} +{{- end -}} + +{{/* Get value for kube-scheduler depending on insecure scraping availability */}} +{{- define "kube-prometheus-stack.kubeScheduler.insecureScrape" -}} + {{- $values := index . 0 -}} + {{- $insecure := index . 1 -}} + {{- $secure := index . 2 -}} + {{- $userValue := index . 3 -}} + {{- include "kube-prometheus-stack.kubeVersionDefaultValue" (list $values ">= 1.23-0" $insecure $secure $userValue) -}} +{{- end -}} + +{{/* Sets default scrape limits for servicemonitor */}} +{{- define "servicemonitor.scrapeLimits" -}} +{{- with .sampleLimit }} +sampleLimit: {{ . }} +{{- end }} +{{- with .targetLimit }} +targetLimit: {{ . }} +{{- end }} +{{- with .labelLimit }} +labelLimit: {{ . }} +{{- end }} +{{- with .labelNameLengthLimit }} +labelNameLengthLimit: {{ . }} +{{- end }} +{{- with .labelValueLengthLimit }} +labelValueLengthLimit: {{ . }} +{{- end }} +{{- end -}} + +{{/* +To help compatibility with other charts which use global.imagePullSecrets. +Allow either an array of {name: pullSecret} maps (k8s-style), or an array of strings (more common helm-style). +global: + imagePullSecrets: + - name: pullSecret1 + - name: pullSecret2 + +or + +global: + imagePullSecrets: + - pullSecret1 + - pullSecret2 +*/}} +{{- define "kube-prometheus-stack.imagePullSecrets" -}} +{{- range .Values.global.imagePullSecrets }} + {{- if eq (typeOf .) "map[string]interface {}" }} +- {{ toYaml . | trim }} + {{- else }} +- name: {{ . }} + {{- end }} +{{- end }} +{{- end -}} + +{{- define "kube-prometheus-stack.operator.admission-webhook.dnsNames" }} +{{- $fullname := include "kube-prometheus-stack.operator.fullname" . }} +{{- $namespace := include "kube-prometheus-stack.namespace" . }} +{{- $fullname }} +{{ $fullname }}.{{ $namespace }}.svc +{{- if .Values.prometheusOperator.admissionWebhooks.deployment.enabled }} +{{ $fullname }}-webhook +{{ $fullname }}-webhook.{{ $namespace }}.svc +{{- end }} +{{- end }} diff --git a/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/templates/alertmanager/alertmanager.yaml b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/templates/alertmanager/alertmanager.yaml new file mode 100644 index 0000000000..19044054ac --- /dev/null +++ b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/templates/alertmanager/alertmanager.yaml @@ -0,0 +1,191 @@ +{{- if .Values.alertmanager.enabled }} +apiVersion: monitoring.coreos.com/v1 +kind: Alertmanager +metadata: + name: {{ template "kube-prometheus-stack.alertmanager.crname" . }} + namespace: {{ template "kube-prometheus-stack.namespace" . }} + labels: + app: {{ template "kube-prometheus-stack.name" . }}-alertmanager +{{ include "kube-prometheus-stack.labels" . | indent 4 }} +{{- if .Values.alertmanager.annotations }} + annotations: +{{ toYaml .Values.alertmanager.annotations | indent 4 }} +{{- end }} +spec: +{{- if .Values.alertmanager.alertmanagerSpec.image }} + {{- $registry := include "monitoring_registry" . | default .Values.alertmanager.alertmanagerSpec.image.registry }} + {{- if and .Values.alertmanager.alertmanagerSpec.image.tag .Values.alertmanager.alertmanagerSpec.image.sha }} + image: "{{ $registry }}/{{ .Values.alertmanager.alertmanagerSpec.image.repository }}:{{ .Values.alertmanager.alertmanagerSpec.image.tag }}@sha256:{{ .Values.alertmanager.alertmanagerSpec.image.sha }}" + {{- else if .Values.alertmanager.alertmanagerSpec.image.sha }} + image: "{{ $registry }}/{{ .Values.alertmanager.alertmanagerSpec.image.repository }}@sha256:{{ .Values.alertmanager.alertmanagerSpec.image.sha }}" + {{- else if .Values.alertmanager.alertmanagerSpec.image.tag }} + image: "{{ $registry }}/{{ .Values.alertmanager.alertmanagerSpec.image.repository }}:{{ .Values.alertmanager.alertmanagerSpec.image.tag }}" + {{- else }} + image: "{{ $registry }}/{{ .Values.alertmanager.alertmanagerSpec.image.repository }}" + {{- end }} + version: {{ .Values.alertmanager.alertmanagerSpec.image.tag }} + {{- if .Values.alertmanager.alertmanagerSpec.image.sha }} + sha: {{ .Values.alertmanager.alertmanagerSpec.image.sha }} + {{- end }} +{{- end }} + replicas: {{ .Values.alertmanager.alertmanagerSpec.replicas }} + listenLocal: {{ .Values.alertmanager.alertmanagerSpec.listenLocal }} + serviceAccountName: {{ template "kube-prometheus-stack.alertmanager.serviceAccountName" . }} + automountServiceAccountToken: {{ .Values.alertmanager.alertmanagerSpec.automountServiceAccountToken }} +{{- if .Values.alertmanager.alertmanagerSpec.externalUrl }} + externalUrl: "{{ tpl .Values.alertmanager.alertmanagerSpec.externalUrl . }}" +{{- else if and .Values.alertmanager.ingress.enabled .Values.alertmanager.ingress.hosts }} + externalUrl: "http://{{ tpl (index .Values.alertmanager.ingress.hosts 0) . }}{{ .Values.alertmanager.alertmanagerSpec.routePrefix }}" +{{- else if not (or (kindIs "invalid" .Values.global.cattle.url) (kindIs "invalid" .Values.global.cattle.clusterId)) }} + externalUrl: "{{ .Values.global.cattle.url }}/k8s/clusters/{{ .Values.global.cattle.clusterId }}/api/v1/namespaces/{{ .Values.namespaceOverride }}/services/http:{{ template "kube-prometheus-stack.fullname" . }}-alertmanager:{{ .Values.alertmanager.service.port }}/proxy" +{{- else }} + externalUrl: http://{{ template "kube-prometheus-stack.fullname" . }}-alertmanager.{{ template "kube-prometheus-stack.namespace" . }}:{{ .Values.alertmanager.service.port }} +{{- end }} + nodeSelector: {{ include "linux-node-selector" . | nindent 4 }} +{{- if .Values.alertmanager.alertmanagerSpec.nodeSelector }} +{{ toYaml .Values.alertmanager.alertmanagerSpec.nodeSelector | indent 4 }} +{{- end }} + paused: {{ .Values.alertmanager.alertmanagerSpec.paused }} + logFormat: {{ .Values.alertmanager.alertmanagerSpec.logFormat | quote }} + logLevel: {{ .Values.alertmanager.alertmanagerSpec.logLevel | quote }} + retention: {{ .Values.alertmanager.alertmanagerSpec.retention | quote }} +{{- if .Values.alertmanager.alertmanagerSpec.secrets }} + secrets: +{{ toYaml .Values.alertmanager.alertmanagerSpec.secrets | indent 4 }} +{{- end }} +{{- if .Values.alertmanager.alertmanagerSpec.configSecret }} + configSecret: {{ .Values.alertmanager.alertmanagerSpec.configSecret }} +{{- end }} +{{- if .Values.alertmanager.alertmanagerSpec.configMaps }} + configMaps: +{{ toYaml .Values.alertmanager.alertmanagerSpec.configMaps | indent 4 }} +{{- end }} +{{- if .Values.alertmanager.alertmanagerSpec.alertmanagerConfigSelector }} + alertmanagerConfigSelector: +{{ tpl (toYaml .Values.alertmanager.alertmanagerSpec.alertmanagerConfigSelector | indent 4) . }} +{{ else }} + alertmanagerConfigSelector: {} +{{- end }} +{{- if .Values.alertmanager.alertmanagerSpec.alertmanagerConfigNamespaceSelector }} + alertmanagerConfigNamespaceSelector: +{{ tpl (toYaml .Values.alertmanager.alertmanagerSpec.alertmanagerConfigNamespaceSelector | indent 4) . }} +{{ else }} + alertmanagerConfigNamespaceSelector: {} +{{- end }} +{{- if .Values.alertmanager.alertmanagerSpec.web }} + web: +{{ toYaml .Values.alertmanager.alertmanagerSpec.web | indent 4 }} +{{- end }} +{{- if .Values.alertmanager.alertmanagerSpec.alertmanagerConfiguration }} + alertmanagerConfiguration: +{{ toYaml .Values.alertmanager.alertmanagerSpec.alertmanagerConfiguration | indent 4 }} +{{- end }} +{{- if .Values.alertmanager.alertmanagerSpec.alertmanagerConfigMatcherStrategy }} + alertmanagerConfigMatcherStrategy: +{{ toYaml .Values.alertmanager.alertmanagerSpec.alertmanagerConfigMatcherStrategy | indent 4 }} +{{- end }} +{{- if .Values.alertmanager.alertmanagerSpec.resources }} + resources: +{{ toYaml .Values.alertmanager.alertmanagerSpec.resources | indent 4 }} +{{- end }} +{{- if .Values.alertmanager.alertmanagerSpec.routePrefix }} + routePrefix: "{{ .Values.alertmanager.alertmanagerSpec.routePrefix }}" +{{- end }} +{{- if .Values.alertmanager.alertmanagerSpec.securityContext }} + securityContext: +{{ toYaml .Values.alertmanager.alertmanagerSpec.securityContext | indent 4 }} +{{- end }} +{{- if .Values.alertmanager.alertmanagerSpec.storage }} + storage: +{{ tpl (toYaml .Values.alertmanager.alertmanagerSpec.storage | indent 4) . }} +{{- end }} +{{- if .Values.alertmanager.alertmanagerSpec.podMetadata }} + podMetadata: +{{ toYaml .Values.alertmanager.alertmanagerSpec.podMetadata | indent 4 }} +{{- end }} +{{- if or .Values.alertmanager.alertmanagerSpec.podAntiAffinity .Values.alertmanager.alertmanagerSpec.affinity }} + affinity: +{{- end }} +{{- if .Values.alertmanager.alertmanagerSpec.affinity }} +{{ toYaml .Values.alertmanager.alertmanagerSpec.affinity | indent 4 }} +{{- end }} +{{- if eq .Values.alertmanager.alertmanagerSpec.podAntiAffinity "hard" }} + podAntiAffinity: + requiredDuringSchedulingIgnoredDuringExecution: + - topologyKey: {{ .Values.alertmanager.alertmanagerSpec.podAntiAffinityTopologyKey }} + labelSelector: + matchExpressions: + - {key: app.kubernetes.io/name, operator: In, values: [alertmanager]} + - {key: alertmanager, operator: In, values: [{{ template "kube-prometheus-stack.alertmanager.crname" . }}]} +{{- else if eq .Values.alertmanager.alertmanagerSpec.podAntiAffinity "soft" }} + podAntiAffinity: + preferredDuringSchedulingIgnoredDuringExecution: + - weight: 100 + podAffinityTerm: + topologyKey: {{ .Values.alertmanager.alertmanagerSpec.podAntiAffinityTopologyKey }} + labelSelector: + matchExpressions: + - {key: app.kubernetes.io/name, operator: In, values: [alertmanager]} + - {key: alertmanager, operator: In, values: [{{ template "kube-prometheus-stack.alertmanager.crname" . }}]} +{{- end }} + tolerations: {{ include "linux-node-tolerations" . | nindent 4 }} +{{- if .Values.alertmanager.alertmanagerSpec.tolerations }} +{{ toYaml .Values.alertmanager.alertmanagerSpec.tolerations | indent 4 }} +{{- end }} +{{- if .Values.alertmanager.alertmanagerSpec.topologySpreadConstraints }} + topologySpreadConstraints: +{{ toYaml .Values.alertmanager.alertmanagerSpec.topologySpreadConstraints | indent 4 }} +{{- end }} +{{- if .Values.global.imagePullSecrets }} + imagePullSecrets: +{{ include "kube-prometheus-stack.imagePullSecrets" . | trim | indent 4 }} +{{- end }} +{{- if .Values.alertmanager.alertmanagerSpec.containers }} + containers: +{{ toYaml .Values.alertmanager.alertmanagerSpec.containers | indent 4 }} +{{- end }} +{{- if .Values.alertmanager.alertmanagerSpec.initContainers }} + initContainers: +{{ toYaml .Values.alertmanager.alertmanagerSpec.initContainers | indent 4 }} +{{- end }} +{{- if .Values.alertmanager.alertmanagerSpec.priorityClassName }} + priorityClassName: {{.Values.alertmanager.alertmanagerSpec.priorityClassName }} +{{- end }} +{{- if .Values.alertmanager.alertmanagerSpec.additionalPeers }} + additionalPeers: +{{ toYaml .Values.alertmanager.alertmanagerSpec.additionalPeers | indent 4 }} +{{- end }} +{{- if .Values.alertmanager.alertmanagerSpec.volumes }} + volumes: +{{ toYaml .Values.alertmanager.alertmanagerSpec.volumes | indent 4 }} +{{- end }} +{{- if .Values.alertmanager.alertmanagerSpec.volumeMounts }} + volumeMounts: +{{ toYaml .Values.alertmanager.alertmanagerSpec.volumeMounts | indent 4 }} +{{- end }} + portName: {{ .Values.alertmanager.alertmanagerSpec.portName }} +{{- if .Values.alertmanager.alertmanagerSpec.clusterAdvertiseAddress }} + clusterAdvertiseAddress: {{ .Values.alertmanager.alertmanagerSpec.clusterAdvertiseAddress }} +{{- end }} +{{- if .Values.alertmanager.alertmanagerSpec.clusterGossipInterval }} + clusterGossipInterval: {{ .Values.alertmanager.alertmanagerSpec.clusterGossipInterval }} +{{- end }} +{{- if .Values.alertmanager.alertmanagerSpec.clusterPeerTimeout }} + clusterPeerTimeout: {{ .Values.alertmanager.alertmanagerSpec.clusterPeerTimeout }} +{{- end }} +{{- if .Values.alertmanager.alertmanagerSpec.clusterPushpullInterval }} + clusterPushpullInterval: {{ .Values.alertmanager.alertmanagerSpec.clusterPushpullInterval }} +{{- end }} +{{- if .Values.alertmanager.alertmanagerSpec.forceEnableClusterMode }} + forceEnableClusterMode: {{ .Values.alertmanager.alertmanagerSpec.forceEnableClusterMode }} +{{- end }} +{{- if .Values.alertmanager.alertmanagerSpec.minReadySeconds }} + minReadySeconds: {{ .Values.alertmanager.alertmanagerSpec.minReadySeconds }} +{{- end }} +{{- with .Values.alertmanager.alertmanagerSpec.additionalConfig }} + {{- tpl (toYaml .) $ | nindent 2 }} +{{- end }} +{{- with .Values.alertmanager.alertmanagerSpec.additionalConfigString }} + {{- tpl . $ | nindent 2 }} +{{- end }} +{{- end }} diff --git a/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/templates/alertmanager/extrasecret.yaml b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/templates/alertmanager/extrasecret.yaml new file mode 100644 index 0000000000..ecd8f47021 --- /dev/null +++ b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/templates/alertmanager/extrasecret.yaml @@ -0,0 +1,20 @@ +{{- if .Values.alertmanager.extraSecret.data -}} +{{- $secretName := printf "alertmanager-%s-extra" (include "kube-prometheus-stack.fullname" . ) -}} +apiVersion: v1 +kind: Secret +metadata: + name: {{ default $secretName .Values.alertmanager.extraSecret.name }} + namespace: {{ template "kube-prometheus-stack.namespace" . }} +{{- if .Values.alertmanager.extraSecret.annotations }} + annotations: +{{ toYaml .Values.alertmanager.extraSecret.annotations | indent 4 }} +{{- end }} + labels: + app: {{ template "kube-prometheus-stack.name" . }}-alertmanager + app.kubernetes.io/component: alertmanager +{{ include "kube-prometheus-stack.labels" . | indent 4 }} +data: +{{- range $key, $val := .Values.alertmanager.extraSecret.data }} + {{ $key }}: {{ $val | b64enc | quote }} +{{- end }} +{{- end }} diff --git a/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/templates/alertmanager/ingress.yaml b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/templates/alertmanager/ingress.yaml new file mode 100644 index 0000000000..be9f5aa279 --- /dev/null +++ b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/templates/alertmanager/ingress.yaml @@ -0,0 +1,78 @@ +{{- if and .Values.alertmanager.enabled .Values.alertmanager.ingress.enabled }} +{{- $pathType := .Values.alertmanager.ingress.pathType | default "ImplementationSpecific" }} +{{- $serviceName := printf "%s-%s" (include "kube-prometheus-stack.fullname" .) "alertmanager" }} +{{- $backendServiceName := .Values.alertmanager.ingress.serviceName | default (printf "%s-%s" (include "kube-prometheus-stack.fullname" .) "alertmanager") }} +{{- $servicePort := .Values.alertmanager.ingress.servicePort | default .Values.alertmanager.service.port -}} +{{- $routePrefix := list .Values.alertmanager.alertmanagerSpec.routePrefix }} +{{- $paths := .Values.alertmanager.ingress.paths | default $routePrefix -}} +{{- $apiIsStable := eq (include "kube-prometheus-stack.ingress.isStable" .) "true" -}} +{{- $ingressSupportsPathType := eq (include "kube-prometheus-stack.ingress.supportsPathType" .) "true" -}} +apiVersion: {{ include "kube-prometheus-stack.ingress.apiVersion" . }} +kind: Ingress +metadata: + name: {{ $serviceName }} + namespace: {{ template "kube-prometheus-stack.namespace" . }} +{{- if .Values.alertmanager.ingress.annotations }} + annotations: + {{- tpl (toYaml .Values.alertmanager.ingress.annotations) . | nindent 4 }} +{{- end }} + labels: + app: {{ template "kube-prometheus-stack.name" . }}-alertmanager +{{- if .Values.alertmanager.ingress.labels }} +{{ toYaml .Values.alertmanager.ingress.labels | indent 4 }} +{{- end }} +{{ include "kube-prometheus-stack.labels" . | indent 4 }} +spec: + {{- if $apiIsStable }} + {{- if .Values.alertmanager.ingress.ingressClassName }} + ingressClassName: {{ .Values.alertmanager.ingress.ingressClassName }} + {{- end }} + {{- end }} + rules: + {{- if .Values.alertmanager.ingress.hosts }} + {{- range $host := .Values.alertmanager.ingress.hosts }} + - host: {{ tpl $host $ | quote }} + http: + paths: + {{- range $p := $paths }} + - path: {{ tpl $p $ }} + {{- if and $pathType $ingressSupportsPathType }} + pathType: {{ $pathType }} + {{- end }} + backend: + {{- if $apiIsStable }} + service: + name: {{ $backendServiceName }} + port: + number: {{ $servicePort }} + {{- else }} + serviceName: {{ $backendServiceName }} + servicePort: {{ $servicePort }} + {{- end }} + {{- end -}} + {{- end -}} + {{- else }} + - http: + paths: + {{- range $p := $paths }} + - path: {{ tpl $p $ }} + {{- if and $pathType $ingressSupportsPathType }} + pathType: {{ $pathType }} + {{- end }} + backend: + {{- if $apiIsStable }} + service: + name: {{ $backendServiceName }} + port: + number: {{ $servicePort }} + {{- else }} + serviceName: {{ $backendServiceName }} + servicePort: {{ $servicePort }} + {{- end }} + {{- end -}} + {{- end -}} + {{- if .Values.alertmanager.ingress.tls }} + tls: +{{ tpl (toYaml .Values.alertmanager.ingress.tls | indent 4) . }} + {{- end -}} +{{- end -}} diff --git a/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/templates/alertmanager/ingressperreplica.yaml b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/templates/alertmanager/ingressperreplica.yaml new file mode 100644 index 0000000000..b2e00a4162 --- /dev/null +++ b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/templates/alertmanager/ingressperreplica.yaml @@ -0,0 +1,67 @@ +{{- if and .Values.alertmanager.enabled .Values.alertmanager.servicePerReplica.enabled .Values.alertmanager.ingressPerReplica.enabled }} +{{- $pathType := .Values.alertmanager.ingressPerReplica.pathType | default "" }} +{{- $count := .Values.alertmanager.alertmanagerSpec.replicas | int -}} +{{- $servicePort := .Values.alertmanager.service.port -}} +{{- $ingressValues := .Values.alertmanager.ingressPerReplica -}} +{{- $apiIsStable := eq (include "kube-prometheus-stack.ingress.isStable" .) "true" -}} +{{- $ingressSupportsPathType := eq (include "kube-prometheus-stack.ingress.supportsPathType" .) "true" -}} +apiVersion: v1 +kind: List +metadata: + name: {{ include "kube-prometheus-stack.fullname" $ }}-alertmanager-ingressperreplica + namespace: {{ template "kube-prometheus-stack.namespace" . }} +items: +{{ range $i, $e := until $count }} + - kind: Ingress + apiVersion: {{ include "kube-prometheus-stack.ingress.apiVersion" $ }} + metadata: + name: {{ include "kube-prometheus-stack.fullname" $ }}-alertmanager-{{ $i }} + namespace: {{ template "kube-prometheus-stack.namespace" $ }} + labels: + app: {{ include "kube-prometheus-stack.name" $ }}-alertmanager + {{ include "kube-prometheus-stack.labels" $ | indent 8 }} + {{- if $ingressValues.labels }} +{{ toYaml $ingressValues.labels | indent 8 }} + {{- end }} + {{- if $ingressValues.annotations }} + annotations: + {{- tpl (toYaml $ingressValues.annotations) $ | nindent 8 }} + {{- end }} + spec: + {{- if $apiIsStable }} + {{- if $ingressValues.ingressClassName }} + ingressClassName: {{ $ingressValues.ingressClassName }} + {{- end }} + {{- end }} + rules: + - host: {{ $ingressValues.hostPrefix }}-{{ $i }}.{{ $ingressValues.hostDomain }} + http: + paths: + {{- range $p := $ingressValues.paths }} + - path: {{ tpl $p $ }} + {{- if and $pathType $ingressSupportsPathType }} + pathType: {{ $pathType }} + {{- end }} + backend: + {{- if $apiIsStable }} + service: + name: {{ include "kube-prometheus-stack.fullname" $ }}-alertmanager-{{ $i }} + port: + number: {{ $servicePort }} + {{- else }} + serviceName: {{ include "kube-prometheus-stack.fullname" $ }}-alertmanager-{{ $i }} + servicePort: {{ $servicePort }} + {{- end }} + {{- end -}} + {{- if or $ingressValues.tlsSecretName $ingressValues.tlsSecretPerReplica.enabled }} + tls: + - hosts: + - {{ $ingressValues.hostPrefix }}-{{ $i }}.{{ $ingressValues.hostDomain }} + {{- if $ingressValues.tlsSecretPerReplica.enabled }} + secretName: {{ $ingressValues.tlsSecretPerReplica.prefix }}-{{ $i }} + {{- else }} + secretName: {{ $ingressValues.tlsSecretName }} + {{- end }} + {{- end }} +{{- end -}} +{{- end -}} diff --git a/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/templates/alertmanager/podDisruptionBudget.yaml b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/templates/alertmanager/podDisruptionBudget.yaml new file mode 100644 index 0000000000..b183403125 --- /dev/null +++ b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/templates/alertmanager/podDisruptionBudget.yaml @@ -0,0 +1,21 @@ +{{- if and .Values.alertmanager.enabled .Values.alertmanager.podDisruptionBudget.enabled }} +apiVersion: {{ include "kube-prometheus-stack.pdb.apiVersion" . }} +kind: PodDisruptionBudget +metadata: + name: {{ template "kube-prometheus-stack.fullname" . }}-alertmanager + namespace: {{ template "kube-prometheus-stack.namespace" . }} + labels: + app: {{ template "kube-prometheus-stack.name" . }}-alertmanager +{{ include "kube-prometheus-stack.labels" . | indent 4 }} +spec: + {{- if .Values.alertmanager.podDisruptionBudget.minAvailable }} + minAvailable: {{ .Values.alertmanager.podDisruptionBudget.minAvailable }} + {{- end }} + {{- if .Values.alertmanager.podDisruptionBudget.maxUnavailable }} + maxUnavailable: {{ .Values.alertmanager.podDisruptionBudget.maxUnavailable }} + {{- end }} + selector: + matchLabels: + app.kubernetes.io/name: alertmanager + alertmanager: {{ template "kube-prometheus-stack.alertmanager.crname" . }} +{{- end }} diff --git a/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/templates/alertmanager/psp-role.yaml b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/templates/alertmanager/psp-role.yaml new file mode 100644 index 0000000000..8810e93ded --- /dev/null +++ b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/templates/alertmanager/psp-role.yaml @@ -0,0 +1,23 @@ +{{- if and .Values.alertmanager.enabled (or .Values.global.cattle.psp.enabled (and .Values.global.rbac.create .Values.global.rbac.pspEnabled)) }} +{{- if .Capabilities.APIVersions.Has "policy/v1beta1/PodSecurityPolicy" }} +kind: Role +apiVersion: rbac.authorization.k8s.io/v1 +metadata: + name: {{ template "kube-prometheus-stack.fullname" . }}-alertmanager + namespace: {{ template "kube-prometheus-stack.namespace" . }} + labels: + app: {{ template "kube-prometheus-stack.name" . }}-alertmanager +{{ include "kube-prometheus-stack.labels" . | indent 4 }} +rules: +{{- $kubeTargetVersion := default .Capabilities.KubeVersion.GitVersion .Values.kubeTargetVersionOverride }} +{{- if semverCompare "> 1.15.0-0" $kubeTargetVersion }} +- apiGroups: ['policy'] +{{- else }} +- apiGroups: ['extensions'] +{{- end }} + resources: ['podsecuritypolicies'] + verbs: ['use'] + resourceNames: + - {{ template "kube-prometheus-stack.fullname" . }}-alertmanager +{{- end }} +{{- end }} diff --git a/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/templates/alertmanager/psp-rolebinding.yaml b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/templates/alertmanager/psp-rolebinding.yaml new file mode 100644 index 0000000000..794f4ad178 --- /dev/null +++ b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/templates/alertmanager/psp-rolebinding.yaml @@ -0,0 +1,20 @@ +{{- if and .Values.alertmanager.enabled (or .Values.global.cattle.psp.enabled (and .Values.global.rbac.create .Values.global.rbac.pspEnabled)) }} +{{- if .Capabilities.APIVersions.Has "policy/v1beta1/PodSecurityPolicy" }} +apiVersion: rbac.authorization.k8s.io/v1 +kind: RoleBinding +metadata: + name: {{ template "kube-prometheus-stack.fullname" . }}-alertmanager + namespace: {{ template "kube-prometheus-stack.namespace" . }} + labels: + app: {{ template "kube-prometheus-stack.name" . }}-alertmanager +{{ include "kube-prometheus-stack.labels" . | indent 4 }} +roleRef: + apiGroup: rbac.authorization.k8s.io + kind: Role + name: {{ template "kube-prometheus-stack.fullname" . }}-alertmanager +subjects: + - kind: ServiceAccount + name: {{ template "kube-prometheus-stack.alertmanager.serviceAccountName" . }} + namespace: {{ template "kube-prometheus-stack.namespace" . }} +{{- end }} +{{- end }} diff --git a/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/templates/alertmanager/psp.yaml b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/templates/alertmanager/psp.yaml new file mode 100644 index 0000000000..07b616b5cb --- /dev/null +++ b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/templates/alertmanager/psp.yaml @@ -0,0 +1,47 @@ +{{- if and .Values.alertmanager.enabled (or .Values.global.cattle.psp.enabled (and .Values.global.rbac.create .Values.global.rbac.pspEnabled)) }} +{{- if .Capabilities.APIVersions.Has "policy/v1beta1/PodSecurityPolicy" }} +apiVersion: policy/v1beta1 +kind: PodSecurityPolicy +metadata: + name: {{ template "kube-prometheus-stack.fullname" . }}-alertmanager + labels: + app: {{ template "kube-prometheus-stack.name" . }}-alertmanager +{{- if .Values.global.rbac.pspAnnotations }} + annotations: +{{ toYaml .Values.global.rbac.pspAnnotations | indent 4 }} +{{- end }} +{{ include "kube-prometheus-stack.labels" . | indent 4 }} +spec: + privileged: false + # Allow core volume types. + volumes: + - 'configMap' + - 'emptyDir' + - 'projected' + - 'secret' + - 'downwardAPI' + - 'persistentVolumeClaim' + hostNetwork: false + hostIPC: false + hostPID: false + runAsUser: + # Permits the container to run with root privileges as well. + rule: 'RunAsAny' + seLinux: + # This policy assumes the nodes are using AppArmor rather than SELinux. + rule: 'RunAsAny' + supplementalGroups: + rule: 'MustRunAs' + ranges: + # Allow adding the root group. + - min: 0 + max: 65535 + fsGroup: + rule: 'MustRunAs' + ranges: + # Allow adding the root group. + - min: 0 + max: 65535 + readOnlyRootFilesystem: false +{{- end }} +{{- end }} diff --git a/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/templates/alertmanager/secret.yaml b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/templates/alertmanager/secret.yaml new file mode 100644 index 0000000000..d2fe84a7bf --- /dev/null +++ b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/templates/alertmanager/secret.yaml @@ -0,0 +1,35 @@ +{{- if and (.Values.alertmanager.enabled) (not .Values.alertmanager.alertmanagerSpec.useExistingSecret) }} +{{/* This file is applied when the operation is helm install and the target secret does not exist. */}} +{{- $secretName := (printf "alertmanager-%s" (include "kube-prometheus-stack.alertmanager.crname" .)) }} +{{- if or (not (lookup "v1" "Secret" (include "kube-prometheus-stack.namespace" .) $secretName)) (eq .Values.alertmanager.secret.recreateIfExists true) }} +apiVersion: v1 +kind: Secret +metadata: + name: {{ $secretName }} + namespace: {{ template "kube-prometheus-stack.namespace" . }} + annotations: + "helm.sh/hook": pre-install, pre-upgrade + "helm.sh/hook-weight": "3" + "helm.sh/resource-policy": keep +{{- if .Values.alertmanager.secret.annotations }} +{{ toYaml .Values.alertmanager.secret.annotations | indent 4 }} +{{- end }} + labels: + app: {{ template "kube-prometheus-stack.name" . }}-alertmanager +{{ include "kube-prometheus-stack.labels" . | indent 4 }} +data: +{{- if .Values.alertmanager.tplConfig }} +{{- if .Values.alertmanager.stringConfig }} + alertmanager.yaml: {{ tpl (.Values.alertmanager.stringConfig) . | b64enc | quote }} +{{- else if eq (typeOf .Values.alertmanager.config) "string" }} + alertmanager.yaml: {{ tpl (.Values.alertmanager.config) . | b64enc | quote }} +{{- else }} + alertmanager.yaml: {{ tpl (toYaml .Values.alertmanager.config) . | b64enc | quote }} +{{- end }} +{{- else }} + alertmanager.yaml: {{ toYaml .Values.alertmanager.config | b64enc | quote }} +{{- end }} +{{- range $key, $val := .Values.alertmanager.templateFiles }} + {{ $key }}: {{ $val | b64enc | quote }} +{{- end }} +{{- end }}{{- end }} diff --git a/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/templates/alertmanager/service.yaml b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/templates/alertmanager/service.yaml new file mode 100644 index 0000000000..373de328a5 --- /dev/null +++ b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/templates/alertmanager/service.yaml @@ -0,0 +1,68 @@ +{{- $kubeTargetVersion := default .Capabilities.KubeVersion.GitVersion .Values.kubeTargetVersionOverride }} +{{- if .Values.alertmanager.enabled }} +apiVersion: v1 +kind: Service +metadata: + name: {{ template "kube-prometheus-stack.fullname" . }}-alertmanager + namespace: {{ template "kube-prometheus-stack.namespace" . }} + labels: + app: {{ template "kube-prometheus-stack.name" . }}-alertmanager + self-monitor: {{ .Values.alertmanager.serviceMonitor.selfMonitor | quote }} +{{ include "kube-prometheus-stack.labels" . | indent 4 }} +{{- if .Values.alertmanager.service.labels }} +{{ toYaml .Values.alertmanager.service.labels | indent 4 }} +{{- end }} +{{- if .Values.alertmanager.service.annotations }} + annotations: +{{ toYaml .Values.alertmanager.service.annotations | indent 4 }} +{{- end }} +spec: +{{- if .Values.alertmanager.service.clusterIP }} + clusterIP: {{ .Values.alertmanager.service.clusterIP }} +{{- end }} +{{- if .Values.alertmanager.service.externalIPs }} + externalIPs: +{{ toYaml .Values.alertmanager.service.externalIPs | indent 4 }} +{{- end }} +{{- if .Values.alertmanager.service.loadBalancerIP }} + loadBalancerIP: {{ .Values.alertmanager.service.loadBalancerIP }} +{{- end }} +{{- if .Values.alertmanager.service.loadBalancerSourceRanges }} + loadBalancerSourceRanges: + {{- range $cidr := .Values.alertmanager.service.loadBalancerSourceRanges }} + - {{ $cidr }} + {{- end }} +{{- end }} +{{- if ne .Values.alertmanager.service.type "ClusterIP" }} + externalTrafficPolicy: {{ .Values.alertmanager.service.externalTrafficPolicy }} +{{- end }} + ports: + - name: {{ .Values.alertmanager.alertmanagerSpec.portName }} + {{- if eq .Values.alertmanager.service.type "NodePort" }} + nodePort: {{ .Values.alertmanager.service.nodePort }} + {{- end }} + port: {{ .Values.alertmanager.service.port }} + targetPort: {{ .Values.alertmanager.service.targetPort }} + protocol: TCP + - name: reloader-web + {{- if semverCompare ">=1.20.0-0" $kubeTargetVersion }} + appProtocol: http + {{- end }} + port: 8080 + targetPort: reloader-web +{{- if .Values.alertmanager.service.additionalPorts }} +{{ toYaml .Values.alertmanager.service.additionalPorts | indent 2 }} +{{- end }} + selector: + app.kubernetes.io/name: alertmanager + alertmanager: {{ template "kube-prometheus-stack.alertmanager.crname" . }} +{{- if .Values.alertmanager.service.sessionAffinity }} + sessionAffinity: {{ .Values.alertmanager.service.sessionAffinity }} +{{- end }} +{{- if eq .Values.alertmanager.service.sessionAffinity "ClientIP" }} + sessionAffinityConfig: + clientIP: + timeoutSeconds: {{ .Values.alertmanager.service.sessionAffinityConfig.clientIP.timeoutSeconds }} +{{- end }} + type: "{{ .Values.alertmanager.service.type }}" +{{- end }} diff --git a/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/templates/alertmanager/serviceaccount.yaml b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/templates/alertmanager/serviceaccount.yaml new file mode 100644 index 0000000000..745ced8bde --- /dev/null +++ b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/templates/alertmanager/serviceaccount.yaml @@ -0,0 +1,21 @@ +{{- if and .Values.alertmanager.enabled .Values.alertmanager.serviceAccount.create }} +apiVersion: v1 +kind: ServiceAccount +metadata: + name: {{ template "kube-prometheus-stack.alertmanager.serviceAccountName" . }} + namespace: {{ template "kube-prometheus-stack.namespace" . }} + labels: + app: {{ template "kube-prometheus-stack.name" . }}-alertmanager + app.kubernetes.io/name: {{ template "kube-prometheus-stack.name" . }}-alertmanager + app.kubernetes.io/component: alertmanager +{{ include "kube-prometheus-stack.labels" . | indent 4 }} +{{- if .Values.alertmanager.serviceAccount.annotations }} + annotations: +{{ toYaml .Values.alertmanager.serviceAccount.annotations | indent 4 }} +{{- end }} +automountServiceAccountToken: {{ .Values.alertmanager.serviceAccount.automountServiceAccountToken }} +{{- if .Values.global.imagePullSecrets }} +imagePullSecrets: +{{ include "kube-prometheus-stack.imagePullSecrets" . | trim | indent 2}} +{{- end }} +{{- end }} diff --git a/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/templates/alertmanager/servicemonitor.yaml b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/templates/alertmanager/servicemonitor.yaml new file mode 100644 index 0000000000..6233690019 --- /dev/null +++ b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/templates/alertmanager/servicemonitor.yaml @@ -0,0 +1,84 @@ +{{- if and .Values.alertmanager.enabled .Values.alertmanager.serviceMonitor.selfMonitor }} +apiVersion: monitoring.coreos.com/v1 +kind: ServiceMonitor +metadata: + name: {{ template "kube-prometheus-stack.fullname" . }}-alertmanager + namespace: {{ template "kube-prometheus-stack.namespace" . }} + labels: + app: {{ template "kube-prometheus-stack.name" . }}-alertmanager +{{ include "kube-prometheus-stack.labels" . | indent 4 }} +{{- with .Values.alertmanager.serviceMonitor.additionalLabels }} +{{- toYaml . | nindent 4 }} +{{- end }} +spec: + {{- include "servicemonitor.scrapeLimits" .Values.alertmanager.serviceMonitor | nindent 2 }} + selector: + matchLabels: + app: {{ template "kube-prometheus-stack.name" . }}-alertmanager + release: {{ $.Release.Name | quote }} + self-monitor: "true" + namespaceSelector: + matchNames: + - {{ printf "%s" (include "kube-prometheus-stack.namespace" .) | quote }} + endpoints: + - port: {{ .Values.alertmanager.alertmanagerSpec.portName }} + enableHttp2: {{ .Values.alertmanager.serviceMonitor.enableHttp2 }} + {{- if .Values.alertmanager.serviceMonitor.interval }} + interval: {{ .Values.alertmanager.serviceMonitor.interval }} + {{- end }} + {{- if .Values.alertmanager.serviceMonitor.proxyUrl }} + proxyUrl: {{ .Values.alertmanager.serviceMonitor.proxyUrl }} + {{- end }} + {{- if .Values.alertmanager.serviceMonitor.scheme }} + scheme: {{ .Values.alertmanager.serviceMonitor.scheme }} + {{- end }} + {{- if .Values.alertmanager.serviceMonitor.bearerTokenFile }} + bearerTokenFile: {{ .Values.alertmanager.serviceMonitor.bearerTokenFile }} + {{- end }} + {{- if .Values.alertmanager.serviceMonitor.tlsConfig }} + tlsConfig: {{- toYaml .Values.alertmanager.serviceMonitor.tlsConfig | nindent 6 }} + {{- end }} + path: "{{ trimSuffix "/" .Values.alertmanager.alertmanagerSpec.routePrefix }}/metrics" + metricRelabelings: + {{- if .Values.alertmanager.serviceMonitor.metricRelabelings }} + {{- tpl (toYaml .Values.alertmanager.serviceMonitor.metricRelabelings | nindent 6) . }} + {{- end }} + {{ if .Values.global.cattle.clusterId }} + - sourceLabels: [__address__] + targetLabel: cluster_id + replacement: {{ .Values.global.cattle.clusterId }} + {{- end }} + {{ if .Values.global.cattle.clusterName }} + - sourceLabels: [__address__] + targetLabel: cluster_name + replacement: {{ .Values.global.cattle.clusterName }} + {{- end }} + {{- if .Values.alertmanager.serviceMonitor.relabelings }} + relabelings: {{- toYaml .Values.alertmanager.serviceMonitor.relabelings | nindent 6 }} + {{- end }} + {{- range .Values.alertmanager.serviceMonitor.additionalEndpoints }} + - port: {{ .port }} + {{- if or $.Values.alertmanager.serviceMonitor.interval .interval }} + interval: {{ default $.Values.alertmanager.serviceMonitor.interval .interval }} + {{- end }} + {{- if or $.Values.alertmanager.serviceMonitor.proxyUrl .proxyUrl }} + proxyUrl: {{ default $.Values.alertmanager.serviceMonitor.proxyUrl .proxyUrl }} + {{- end }} + {{- if or $.Values.alertmanager.serviceMonitor.scheme .scheme }} + scheme: {{ default $.Values.alertmanager.serviceMonitor.scheme .scheme }} + {{- end }} + {{- if or $.Values.alertmanager.serviceMonitor.bearerTokenFile .bearerTokenFile }} + bearerTokenFile: {{ default $.Values.alertmanager.serviceMonitor.bearerTokenFile .bearerTokenFile }} + {{- end }} + {{- if or $.Values.alertmanager.serviceMonitor.tlsConfig .tlsConfig }} + tlsConfig: {{- default $.Values.alertmanager.serviceMonitor.tlsConfig .tlsConfig | toYaml | nindent 6 }} + {{- end }} + path: {{ .path }} + {{- if or $.Values.alertmanager.serviceMonitor.metricRelabelings .metricRelabelings }} + metricRelabelings: {{- tpl (default $.Values.alertmanager.serviceMonitor.metricRelabelings .metricRelabelings | toYaml | nindent 6) . }} + {{- end }} + {{- if or $.Values.alertmanager.serviceMonitor.relabelings .relabelings }} + relabelings: {{- default $.Values.alertmanager.serviceMonitor.relabelings .relabelings | toYaml | nindent 6 }} + {{- end }} + {{- end }} +{{- end }} diff --git a/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/templates/alertmanager/serviceperreplica.yaml b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/templates/alertmanager/serviceperreplica.yaml new file mode 100644 index 0000000000..75a13bdf97 --- /dev/null +++ b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/templates/alertmanager/serviceperreplica.yaml @@ -0,0 +1,49 @@ +{{- if and .Values.alertmanager.enabled .Values.alertmanager.servicePerReplica.enabled }} +{{- $count := .Values.alertmanager.alertmanagerSpec.replicas | int -}} +{{- $serviceValues := .Values.alertmanager.servicePerReplica -}} +apiVersion: v1 +kind: List +metadata: + name: {{ include "kube-prometheus-stack.fullname" $ }}-alertmanager-serviceperreplica + namespace: {{ template "kube-prometheus-stack.namespace" . }} +items: +{{- range $i, $e := until $count }} + - apiVersion: v1 + kind: Service + metadata: + name: {{ include "kube-prometheus-stack.fullname" $ }}-alertmanager-{{ $i }} + namespace: {{ template "kube-prometheus-stack.namespace" $ }} + labels: + app: {{ include "kube-prometheus-stack.name" $ }}-alertmanager +{{ include "kube-prometheus-stack.labels" $ | indent 8 }} + {{- if $serviceValues.annotations }} + annotations: +{{ toYaml $serviceValues.annotations | indent 8 }} + {{- end }} + spec: + {{- if $serviceValues.clusterIP }} + clusterIP: {{ $serviceValues.clusterIP }} + {{- end }} + {{- if $serviceValues.loadBalancerSourceRanges }} + loadBalancerSourceRanges: + {{- range $cidr := $serviceValues.loadBalancerSourceRanges }} + - {{ $cidr }} + {{- end }} + {{- end }} + {{- if ne $serviceValues.type "ClusterIP" }} + externalTrafficPolicy: {{ $serviceValues.externalTrafficPolicy }} + {{- end }} + ports: + - name: {{ $.Values.alertmanager.alertmanagerSpec.portName }} + {{- if eq $serviceValues.type "NodePort" }} + nodePort: {{ $serviceValues.nodePort }} + {{- end }} + port: {{ $serviceValues.port }} + targetPort: {{ $serviceValues.targetPort }} + selector: + app.kubernetes.io/name: alertmanager + alertmanager: {{ template "kube-prometheus-stack.alertmanager.crname" $ }} + statefulset.kubernetes.io/pod-name: alertmanager-{{ include "kube-prometheus-stack.alertmanager.crname" $ }}-{{ $i }} + type: "{{ $serviceValues.type }}" +{{- end }} +{{- end }} diff --git a/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/templates/exporters/core-dns/service.yaml b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/templates/exporters/core-dns/service.yaml new file mode 100644 index 0000000000..b8618f7558 --- /dev/null +++ b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/templates/exporters/core-dns/service.yaml @@ -0,0 +1,24 @@ +{{- if and .Values.coreDns.enabled .Values.coreDns.service.enabled .Values.kubernetesServiceMonitors.enabled }} +apiVersion: v1 +kind: Service +metadata: + name: {{ template "kube-prometheus-stack.fullname" . }}-coredns + labels: + app: {{ template "kube-prometheus-stack.name" . }}-coredns + jobLabel: coredns +{{ include "kube-prometheus-stack.labels" . | indent 4 }} + namespace: kube-system +spec: + clusterIP: None + ports: + - name: {{ .Values.coreDns.serviceMonitor.port }} + port: {{ .Values.coreDns.service.port }} + protocol: TCP + targetPort: {{ .Values.coreDns.service.targetPort }} + selector: + {{- if .Values.coreDns.service.selector }} +{{ toYaml .Values.coreDns.service.selector | indent 4 }} + {{- else}} + k8s-app: kube-dns + {{- end}} +{{- end }} diff --git a/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/templates/exporters/core-dns/servicemonitor.yaml b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/templates/exporters/core-dns/servicemonitor.yaml new file mode 100644 index 0000000000..dc15a06937 --- /dev/null +++ b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/templates/exporters/core-dns/servicemonitor.yaml @@ -0,0 +1,58 @@ +{{- if and .Values.coreDns.enabled .Values.coreDns.serviceMonitor.enabled .Values.kubernetesServiceMonitors.enabled }} +apiVersion: monitoring.coreos.com/v1 +kind: ServiceMonitor +metadata: + name: {{ template "kube-prometheus-stack.fullname" . }}-coredns + {{- if .Values.prometheus.prometheusSpec.ignoreNamespaceSelectors }} + namespace: kube-system + {{- else }} + namespace: {{ template "kube-prometheus-stack.namespace" . }} + {{- end }} + labels: + app: {{ template "kube-prometheus-stack.name" . }}-coredns + {{- with .Values.coreDns.serviceMonitor.additionalLabels }} + {{- toYaml . | nindent 4 }} + {{- end }} +{{ include "kube-prometheus-stack.labels" . | indent 4 }} +spec: + jobLabel: {{ .Values.coreDns.serviceMonitor.jobLabel }} + {{- include "servicemonitor.scrapeLimits" .Values.coreDns.serviceMonitor | nindent 2 }} + selector: + {{- if .Values.coreDns.serviceMonitor.selector }} + {{ tpl (toYaml .Values.coreDns.serviceMonitor.selector | nindent 4) . }} + {{- else }} + matchLabels: + app: {{ template "kube-prometheus-stack.name" . }}-coredns + release: {{ $.Release.Name | quote }} + {{- end }} + namespaceSelector: + matchNames: + - "kube-system" + endpoints: + - port: {{ .Values.coreDns.serviceMonitor.port }} + {{- if .Values.coreDns.serviceMonitor.interval}} + interval: {{ .Values.coreDns.serviceMonitor.interval }} + {{- end }} + {{- if .Values.coreDns.serviceMonitor.proxyUrl }} + proxyUrl: {{ .Values.coreDns.serviceMonitor.proxyUrl}} + {{- end }} + bearerTokenFile: /var/run/secrets/kubernetes.io/serviceaccount/token + metricRelabelings: + {{- if .Values.coreDns.serviceMonitor.metricRelabelings }} + {{ tpl (toYaml .Values.coreDns.serviceMonitor.metricRelabelings | indent 4) . }} + {{- end }} + {{ if .Values.global.cattle.clusterId }} + - sourceLabels: [__address__] + targetLabel: cluster_id + replacement: {{ .Values.global.cattle.clusterId }} + {{- end }} + {{ if .Values.global.cattle.clusterName }} + - sourceLabels: [__address__] + targetLabel: cluster_name + replacement: {{ .Values.global.cattle.clusterName }} + {{- end }} +{{- if .Values.coreDns.serviceMonitor.relabelings }} + relabelings: +{{ tpl (toYaml .Values.coreDns.serviceMonitor.relabelings | indent 4) . }} +{{- end }} +{{- end }} diff --git a/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/templates/exporters/kube-api-server/servicemonitor.yaml b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/templates/exporters/kube-api-server/servicemonitor.yaml new file mode 100644 index 0000000000..66e777632e --- /dev/null +++ b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/templates/exporters/kube-api-server/servicemonitor.yaml @@ -0,0 +1,57 @@ +{{- if and .Values.kubeApiServer.enabled .Values.kubernetesServiceMonitors.enabled }} +apiVersion: monitoring.coreos.com/v1 +kind: ServiceMonitor +metadata: + name: {{ template "kube-prometheus-stack.fullname" . }}-apiserver + {{- if .Values.prometheus.prometheusSpec.ignoreNamespaceSelectors }} + namespace: default + {{- else }} + namespace: {{ template "kube-prometheus-stack.namespace" . }} + {{- end }} + labels: + app: {{ template "kube-prometheus-stack.name" . }}-apiserver + {{- with .Values.kubeApiServer.serviceMonitor.additionalLabels }} + {{- toYaml . | nindent 4 }} + {{- end }} +{{ include "kube-prometheus-stack.labels" . | indent 4 }} +spec: + {{- include "servicemonitor.scrapeLimits" .Values.kubeApiServer.serviceMonitor | nindent 2 }} + endpoints: + - bearerTokenFile: /var/run/secrets/kubernetes.io/serviceaccount/token + {{- if .Values.kubeApiServer.serviceMonitor.interval }} + interval: {{ .Values.kubeApiServer.serviceMonitor.interval }} + {{- end }} + {{- if .Values.kubeApiServer.serviceMonitor.proxyUrl }} + proxyUrl: {{ .Values.kubeApiServer.serviceMonitor.proxyUrl }} + {{- end }} + port: https + scheme: https + metricRelabelings: + {{- if .Values.kubeApiServer.serviceMonitor.metricRelabelings }} +{{ tpl (toYaml .Values.kubeApiServer.serviceMonitor.metricRelabelings | indent 6) . }} + {{- end }} + {{ if .Values.global.cattle.clusterId }} + - sourceLabels: [__address__] + targetLabel: cluster_id + replacement: {{ .Values.global.cattle.clusterId }} + {{- end }} + {{ if .Values.global.cattle.clusterName}} + - sourceLabels: [__address__] + targetLabel: cluster_name + replacement: {{ .Values.global.cattle.clusterName }} + {{- end }} +{{- if .Values.kubeApiServer.serviceMonitor.relabelings }} + relabelings: +{{ tpl (toYaml .Values.kubeApiServer.serviceMonitor.relabelings | indent 6) . }} +{{- end }} + tlsConfig: + caFile: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt + serverName: {{ .Values.kubeApiServer.tlsConfig.serverName }} + insecureSkipVerify: {{ .Values.kubeApiServer.tlsConfig.insecureSkipVerify }} + jobLabel: {{ .Values.kubeApiServer.serviceMonitor.jobLabel }} + namespaceSelector: + matchNames: + - default + selector: +{{ toYaml .Values.kubeApiServer.serviceMonitor.selector | indent 4 }} +{{- end}} diff --git a/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/templates/exporters/kube-controller-manager/endpoints.yaml b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/templates/exporters/kube-controller-manager/endpoints.yaml new file mode 100644 index 0000000000..6a6afa6412 --- /dev/null +++ b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/templates/exporters/kube-controller-manager/endpoints.yaml @@ -0,0 +1,22 @@ +{{- if and .Values.kubeControllerManager.enabled .Values.kubeControllerManager.endpoints .Values.kubernetesServiceMonitors.enabled }} +apiVersion: v1 +kind: Endpoints +metadata: + name: {{ template "kube-prometheus-stack.fullname" . }}-kube-controller-manager + labels: + app: {{ template "kube-prometheus-stack.name" . }}-kube-controller-manager + k8s-app: kube-controller-manager +{{ include "kube-prometheus-stack.labels" . | indent 4 }} + namespace: kube-system +subsets: + - addresses: + {{- range .Values.kubeControllerManager.endpoints }} + - ip: {{ . }} + {{- end }} + ports: + - name: {{ .Values.kubeControllerManager.serviceMonitor.port }} + {{- $kubeControllerManagerDefaultInsecurePort := 10252 }} + {{- $kubeControllerManagerDefaultSecurePort := 10257 }} + port: {{ include "kube-prometheus-stack.kubeControllerManager.insecureScrape" (list . $kubeControllerManagerDefaultInsecurePort $kubeControllerManagerDefaultSecurePort .Values.kubeControllerManager.service.port) }} + protocol: TCP +{{- end }} diff --git a/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/templates/exporters/kube-controller-manager/service.yaml b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/templates/exporters/kube-controller-manager/service.yaml new file mode 100644 index 0000000000..43b1a976d5 --- /dev/null +++ b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/templates/exporters/kube-controller-manager/service.yaml @@ -0,0 +1,29 @@ +{{- if and .Values.kubeControllerManager.enabled .Values.kubeControllerManager.service.enabled .Values.kubernetesServiceMonitors.enabled }} +apiVersion: v1 +kind: Service +metadata: + name: {{ template "kube-prometheus-stack.fullname" . }}-kube-controller-manager + labels: + app: {{ template "kube-prometheus-stack.name" . }}-kube-controller-manager + jobLabel: kube-controller-manager +{{ include "kube-prometheus-stack.labels" . | indent 4 }} + namespace: kube-system +spec: + clusterIP: None + ports: + - name: {{ .Values.kubeControllerManager.serviceMonitor.port }} + {{- $kubeControllerManagerDefaultInsecurePort := 10252 }} + {{- $kubeControllerManagerDefaultSecurePort := 10257 }} + port: {{ include "kube-prometheus-stack.kubeControllerManager.insecureScrape" (list . $kubeControllerManagerDefaultInsecurePort $kubeControllerManagerDefaultSecurePort .Values.kubeControllerManager.service.port) }} + protocol: TCP + targetPort: {{ include "kube-prometheus-stack.kubeControllerManager.insecureScrape" (list . $kubeControllerManagerDefaultInsecurePort $kubeControllerManagerDefaultSecurePort .Values.kubeControllerManager.service.targetPort) }} +{{- if .Values.kubeControllerManager.endpoints }}{{- else }} + selector: + {{- if .Values.kubeControllerManager.service.selector }} +{{ toYaml .Values.kubeControllerManager.service.selector | indent 4 }} + {{- else}} + component: kube-controller-manager + {{- end}} +{{- end }} + type: ClusterIP +{{- end }} diff --git a/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/templates/exporters/kube-controller-manager/servicemonitor.yaml b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/templates/exporters/kube-controller-manager/servicemonitor.yaml new file mode 100644 index 0000000000..7ed3baa65f --- /dev/null +++ b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/templates/exporters/kube-controller-manager/servicemonitor.yaml @@ -0,0 +1,69 @@ +{{- if and .Values.kubeControllerManager.enabled .Values.kubeControllerManager.serviceMonitor.enabled .Values.kubernetesServiceMonitors.enabled }} +apiVersion: monitoring.coreos.com/v1 +kind: ServiceMonitor +metadata: + name: {{ template "kube-prometheus-stack.fullname" . }}-kube-controller-manager + {{- if .Values.prometheus.prometheusSpec.ignoreNamespaceSelectors }} + namespace: kube-system + {{- else }} + namespace: {{ template "kube-prometheus-stack.namespace" . }} + {{- end }} + labels: + app: {{ template "kube-prometheus-stack.name" . }}-kube-controller-manager + {{- with .Values.kubeControllerManager.serviceMonitor.additionalLabels }} + {{- toYaml . | nindent 4 }} + {{- end }} +{{ include "kube-prometheus-stack.labels" . | indent 4 }} +spec: + jobLabel: {{ .Values.kubeControllerManager.serviceMonitor.jobLabel }} + {{- include "servicemonitor.scrapeLimits" .Values.kubeControllerManager.serviceMonitor | nindent 2 }} + selector: + {{- if .Values.kubeControllerManager.serviceMonitor.selector }} + {{ tpl (toYaml .Values.kubeControllerManager.serviceMonitor.selector | nindent 4) . }} + {{- else }} + matchLabels: + app: {{ template "kube-prometheus-stack.name" . }}-kube-controller-manager + release: {{ $.Release.Name | quote }} + {{- end }} + namespaceSelector: + matchNames: + - "kube-system" + endpoints: + - port: {{ .Values.kubeControllerManager.serviceMonitor.port }} + {{- if .Values.kubeControllerManager.serviceMonitor.interval }} + interval: {{ .Values.kubeControllerManager.serviceMonitor.interval }} + {{- end }} + bearerTokenFile: /var/run/secrets/kubernetes.io/serviceaccount/token + {{- if .Values.kubeControllerManager.serviceMonitor.proxyUrl }} + proxyUrl: {{ .Values.kubeControllerManager.serviceMonitor.proxyUrl}} + {{- end }} + {{- if eq (include "kube-prometheus-stack.kubeControllerManager.insecureScrape" (list . false true .Values.kubeControllerManager.serviceMonitor.https )) "true" }} + scheme: https + tlsConfig: + caFile: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt + {{- if eq (include "kube-prometheus-stack.kubeControllerManager.insecureScrape" (list . nil true .Values.kubeControllerManager.serviceMonitor.insecureSkipVerify)) "true" }} + insecureSkipVerify: true + {{- end }} + {{- if .Values.kubeControllerManager.serviceMonitor.serverName }} + serverName: {{ .Values.kubeControllerManager.serviceMonitor.serverName }} + {{- end }} + {{- end }} + metricRelabelings: + {{- if.Values.kubeControllerManager.serviceMonitor.metricRelabelings }} + {{ tpl (toYaml .Values.kubeControllerManager.serviceMonitor.metricRelabelings | indent 4) . }} + {{- end }} + {{ if .Values.global.cattle.clusterId }} + - sourceLabels: [__address__] + targetLabel: cluster_id + replacement: {{ .Values.global.cattle.clusterId }} + {{- end }} + {{ if .Values.global.cattle.clusterName}} + - sourceLabels: [__address__] + targetLabel: cluster_name + replacement: {{ .Values.global.cattle.clusterName }} + {{- end }} +{{- if .Values.kubeControllerManager.serviceMonitor.relabelings }} + relabelings: +{{ tpl (toYaml .Values.kubeControllerManager.serviceMonitor.relabelings | indent 4) . }} +{{- end }} +{{- end }} diff --git a/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/templates/exporters/kube-dns/service.yaml b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/templates/exporters/kube-dns/service.yaml new file mode 100644 index 0000000000..81b2c9930c --- /dev/null +++ b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/templates/exporters/kube-dns/service.yaml @@ -0,0 +1,28 @@ +{{- if and .Values.kubeDns.enabled .Values.kubernetesServiceMonitors.enabled }} +apiVersion: v1 +kind: Service +metadata: + name: {{ template "kube-prometheus-stack.fullname" . }}-kube-dns + labels: + app: {{ template "kube-prometheus-stack.name" . }}-kube-dns + jobLabel: kube-dns +{{ include "kube-prometheus-stack.labels" . | indent 4 }} + namespace: kube-system +spec: + clusterIP: None + ports: + - name: http-metrics-dnsmasq + port: {{ .Values.kubeDns.service.dnsmasq.port }} + protocol: TCP + targetPort: {{ .Values.kubeDns.service.dnsmasq.targetPort }} + - name: http-metrics-skydns + port: {{ .Values.kubeDns.service.skydns.port }} + protocol: TCP + targetPort: {{ .Values.kubeDns.service.skydns.targetPort }} + selector: + {{- if .Values.kubeDns.service.selector }} +{{ toYaml .Values.kubeDns.service.selector | indent 4 }} + {{- else}} + k8s-app: kube-dns + {{- end}} +{{- end }} diff --git a/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/templates/exporters/kube-dns/servicemonitor.yaml b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/templates/exporters/kube-dns/servicemonitor.yaml new file mode 100644 index 0000000000..9fa41b575f --- /dev/null +++ b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/templates/exporters/kube-dns/servicemonitor.yaml @@ -0,0 +1,71 @@ +{{- if and .Values.kubeDns.enabled .Values.kubernetesServiceMonitors.enabled }} +apiVersion: monitoring.coreos.com/v1 +kind: ServiceMonitor +metadata: + name: {{ template "kube-prometheus-stack.fullname" . }}-kube-dns + {{- if .Values.prometheus.prometheusSpec.ignoreNamespaceSelectors }} + namespace: kube-system + {{- else }} + namespace: {{ template "kube-prometheus-stack.namespace" . }} + {{- end }} + labels: + app: {{ template "kube-prometheus-stack.name" . }}-kube-dns + {{- with .Values.kubeDns.serviceMonitor.additionalLabels }} + {{- toYaml . | nindent 4 }} + {{- end }} +{{ include "kube-prometheus-stack.labels" . | indent 4 }} +spec: + jobLabel: {{ .Values.kubeDns.serviceMonitor.jobLabel }} + {{- include "servicemonitor.scrapeLimits" .Values.kubeDns.serviceMonitor | nindent 2 }} + selector: + {{- if .Values.kubeDns.serviceMonitor.selector }} + {{ tpl (toYaml .Values.kubeDns.serviceMonitor.selector | nindent 4) . }} + {{- else }} + matchLabels: + app: {{ template "kube-prometheus-stack.name" . }}-kube-dns + release: {{ $.Release.Name | quote }} + {{- end }} + namespaceSelector: + matchNames: + - "kube-system" + endpoints: + - port: http-metrics-dnsmasq + {{- if .Values.kubeDns.serviceMonitor.interval }} + interval: {{ .Values.kubeDns.serviceMonitor.interval }} + {{- end }} + bearerTokenFile: /var/run/secrets/kubernetes.io/serviceaccount/token + {{- if .Values.kubeDns.serviceMonitor.proxyUrl }} + proxyUrl: {{ .Values.kubeDns.serviceMonitor.proxyUrl}} + {{- end }} + metricRelabelings: + {{- if .Values.kubeDns.serviceMonitor.dnsmasqMetricRelabelings }} + {{ tpl (toYaml .Values.kubeDns.serviceMonitor.dnsmasqMetricRelabelings | indent 4) . }} + {{- end }} + {{ if .Values.global.cattle.clusterId }} + - sourceLabels: [__address__] + targetLabel: cluster_id + replacement: {{ .Values.global.cattle.clusterId }} + {{- end }} + {{ if .Values.global.cattle.clusterName}} + - sourceLabels: [__address__] + targetLabel: cluster_name + replacement: {{ .Values.global.cattle.clusterName }} + {{- end }} +{{- if .Values.kubeDns.serviceMonitor.dnsmasqRelabelings }} + relabelings: +{{ toYaml .Values.kubeDns.serviceMonitor.dnsmasqRelabelings | indent 4 }} +{{- end }} + - port: http-metrics-skydns + {{- if .Values.kubeDns.serviceMonitor.interval }} + interval: {{ .Values.kubeDns.serviceMonitor.interval }} + {{- end }} + bearerTokenFile: /var/run/secrets/kubernetes.io/serviceaccount/token +{{- if .Values.kubeDns.serviceMonitor.metricRelabelings }} + metricRelabelings: +{{ tpl (toYaml .Values.kubeDns.serviceMonitor.metricRelabelings | indent 4) . }} +{{- end }} +{{- if .Values.kubeDns.serviceMonitor.relabelings }} + relabelings: +{{ tpl (toYaml .Values.kubeDns.serviceMonitor.relabelings | indent 4) . }} +{{- end }} +{{- end }} diff --git a/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/templates/exporters/kube-etcd/endpoints.yaml b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/templates/exporters/kube-etcd/endpoints.yaml new file mode 100644 index 0000000000..e366447577 --- /dev/null +++ b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/templates/exporters/kube-etcd/endpoints.yaml @@ -0,0 +1,20 @@ +{{- if and .Values.kubeEtcd.enabled .Values.kubeEtcd.endpoints .Values.kubernetesServiceMonitors.enabled }} +apiVersion: v1 +kind: Endpoints +metadata: + name: {{ template "kube-prometheus-stack.fullname" . }}-kube-etcd + labels: + app: {{ template "kube-prometheus-stack.name" . }}-kube-etcd + k8s-app: etcd-server +{{ include "kube-prometheus-stack.labels" . | indent 4 }} + namespace: kube-system +subsets: + - addresses: + {{- range .Values.kubeEtcd.endpoints }} + - ip: {{ . }} + {{- end }} + ports: + - name: {{ .Values.kubeEtcd.serviceMonitor.port }} + port: {{ .Values.kubeEtcd.service.port }} + protocol: TCP +{{- end }} diff --git a/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/templates/exporters/kube-etcd/service.yaml b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/templates/exporters/kube-etcd/service.yaml new file mode 100644 index 0000000000..d07d4f35e3 --- /dev/null +++ b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/templates/exporters/kube-etcd/service.yaml @@ -0,0 +1,27 @@ +{{- if and .Values.kubeEtcd.enabled .Values.kubeEtcd.service.enabled .Values.kubernetesServiceMonitors.enabled }} +apiVersion: v1 +kind: Service +metadata: + name: {{ template "kube-prometheus-stack.fullname" . }}-kube-etcd + labels: + app: {{ template "kube-prometheus-stack.name" . }}-kube-etcd + jobLabel: kube-etcd +{{ include "kube-prometheus-stack.labels" . | indent 4 }} + namespace: kube-system +spec: + clusterIP: None + ports: + - name: {{ .Values.kubeEtcd.serviceMonitor.port }} + port: {{ .Values.kubeEtcd.service.port }} + protocol: TCP + targetPort: {{ .Values.kubeEtcd.service.targetPort }} +{{- if .Values.kubeEtcd.endpoints }}{{- else }} + selector: + {{- if .Values.kubeEtcd.service.selector }} +{{ toYaml .Values.kubeEtcd.service.selector | indent 4 }} + {{- else}} + component: etcd + {{- end}} +{{- end }} + type: ClusterIP +{{- end -}} diff --git a/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/templates/exporters/kube-etcd/servicemonitor.yaml b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/templates/exporters/kube-etcd/servicemonitor.yaml new file mode 100644 index 0000000000..26fdbdbed3 --- /dev/null +++ b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/templates/exporters/kube-etcd/servicemonitor.yaml @@ -0,0 +1,75 @@ +{{- if and .Values.kubeEtcd.enabled .Values.kubeEtcd.serviceMonitor.enabled .Values.kubernetesServiceMonitors.enabled }} +apiVersion: monitoring.coreos.com/v1 +kind: ServiceMonitor +metadata: + name: {{ template "kube-prometheus-stack.fullname" . }}-kube-etcd + {{- if .Values.prometheus.prometheusSpec.ignoreNamespaceSelectors }} + namespace: kube-system + {{- else }} + namespace: {{ template "kube-prometheus-stack.namespace" . }} + {{- end }} + labels: + app: {{ template "kube-prometheus-stack.name" . }}-kube-etcd + {{- with .Values.kubeEtcd.serviceMonitor.additionalLabels }} + {{- toYaml . | nindent 4 }} + {{- end }} +{{ include "kube-prometheus-stack.labels" . | indent 4 }} +spec: + jobLabel: {{ .Values.kubeEtcd.serviceMonitor.jobLabel }} + {{- include "servicemonitor.scrapeLimits" .Values.kubeEtcd.serviceMonitor | nindent 4 }} + selector: + {{- if .Values.kubeEtcd.serviceMonitor.selector }} + {{ tpl (toYaml .Values.kubeEtcd.serviceMonitor.selector | nindent 4) . }} + {{- else }} + matchLabels: + app: {{ template "kube-prometheus-stack.name" . }}-kube-etcd + release: {{ $.Release.Name | quote }} + {{- end }} + namespaceSelector: + matchNames: + - "kube-system" + endpoints: + - port: {{ .Values.kubeEtcd.serviceMonitor.port }} + {{- if .Values.kubeEtcd.serviceMonitor.interval }} + interval: {{ .Values.kubeEtcd.serviceMonitor.interval }} + {{- end }} + bearerTokenFile: /var/run/secrets/kubernetes.io/serviceaccount/token + {{- if .Values.kubeEtcd.serviceMonitor.proxyUrl }} + proxyUrl: {{ .Values.kubeEtcd.serviceMonitor.proxyUrl}} + {{- end }} + {{- if eq .Values.kubeEtcd.serviceMonitor.scheme "https" }} + scheme: https + tlsConfig: + {{- if .Values.kubeEtcd.serviceMonitor.serverName }} + serverName: {{ .Values.kubeEtcd.serviceMonitor.serverName }} + {{- end }} + {{- if .Values.kubeEtcd.serviceMonitor.caFile }} + caFile: {{ .Values.kubeEtcd.serviceMonitor.caFile }} + {{- end }} + {{- if .Values.kubeEtcd.serviceMonitor.certFile }} + certFile: {{ .Values.kubeEtcd.serviceMonitor.certFile }} + {{- end }} + {{- if .Values.kubeEtcd.serviceMonitor.keyFile }} + keyFile: {{ .Values.kubeEtcd.serviceMonitor.keyFile }} + {{- end}} + insecureSkipVerify: {{ .Values.kubeEtcd.serviceMonitor.insecureSkipVerify }} + {{- end }} + metricRelabelings: + {{- if .Values.kubeEtcd.serviceMonitor.metricRelabelings }} + {{ tpl (toYaml .Values.kubeEtcd.serviceMonitor.metricRelabelings | indent 4) . }} + {{- end }} + {{ if .Values.global.cattle.clusterId }} + - sourceLabels: [__address__] + targetLabel: cluster_id + replacement: {{ .Values.global.cattle.clusterId }} + {{- end }} + {{ if .Values.global.cattle.clusterName}} + - sourceLabels: [__address__] + targetLabel: cluster_name + replacement: {{ .Values.global.cattle.clusterName }} + {{- end }} +{{- if .Values.kubeEtcd.serviceMonitor.relabelings }} + relabelings: +{{ tpl (toYaml .Values.kubeEtcd.serviceMonitor.relabelings | indent 4) . }} +{{- end }} +{{- end }} diff --git a/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/templates/exporters/kube-proxy/endpoints.yaml b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/templates/exporters/kube-proxy/endpoints.yaml new file mode 100644 index 0000000000..8613e62425 --- /dev/null +++ b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/templates/exporters/kube-proxy/endpoints.yaml @@ -0,0 +1,20 @@ +{{- if and .Values.kubeProxy.enabled .Values.kubeProxy.endpoints .Values.kubernetesServiceMonitors.enabled }} +apiVersion: v1 +kind: Endpoints +metadata: + name: {{ template "kube-prometheus-stack.fullname" . }}-kube-proxy + labels: + app: {{ template "kube-prometheus-stack.name" . }}-kube-proxy + k8s-app: kube-proxy +{{ include "kube-prometheus-stack.labels" . | indent 4 }} + namespace: kube-system +subsets: + - addresses: + {{- range .Values.kubeProxy.endpoints }} + - ip: {{ . }} + {{- end }} + ports: + - name: {{ .Values.kubeProxy.serviceMonitor.port }} + port: {{ .Values.kubeProxy.service.port }} + protocol: TCP +{{- end }} diff --git a/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/templates/exporters/kube-proxy/service.yaml b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/templates/exporters/kube-proxy/service.yaml new file mode 100644 index 0000000000..8ccb2210d7 --- /dev/null +++ b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/templates/exporters/kube-proxy/service.yaml @@ -0,0 +1,27 @@ +{{- if and .Values.kubeProxy.enabled .Values.kubeProxy.service.enabled .Values.kubernetesServiceMonitors.enabled }} +apiVersion: v1 +kind: Service +metadata: + name: {{ template "kube-prometheus-stack.fullname" . }}-kube-proxy + labels: + app: {{ template "kube-prometheus-stack.name" . }}-kube-proxy + jobLabel: kube-proxy +{{ include "kube-prometheus-stack.labels" . | indent 4 }} + namespace: kube-system +spec: + clusterIP: None + ports: + - name: {{ .Values.kubeProxy.serviceMonitor.port }} + port: {{ .Values.kubeProxy.service.port }} + protocol: TCP + targetPort: {{ .Values.kubeProxy.service.targetPort }} +{{- if .Values.kubeProxy.endpoints }}{{- else }} + selector: + {{- if .Values.kubeProxy.service.selector }} +{{ toYaml .Values.kubeProxy.service.selector | indent 4 }} + {{- else}} + k8s-app: kube-proxy + {{- end}} +{{- end }} + type: ClusterIP +{{- end -}} diff --git a/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/templates/exporters/kube-proxy/servicemonitor.yaml b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/templates/exporters/kube-proxy/servicemonitor.yaml new file mode 100644 index 0000000000..24b0ab2001 --- /dev/null +++ b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/templates/exporters/kube-proxy/servicemonitor.yaml @@ -0,0 +1,63 @@ +{{- if and .Values.kubeProxy.enabled .Values.kubeProxy.serviceMonitor.enabled .Values.kubernetesServiceMonitors.enabled }} +apiVersion: monitoring.coreos.com/v1 +kind: ServiceMonitor +metadata: + name: {{ template "kube-prometheus-stack.fullname" . }}-kube-proxy + {{- if .Values.prometheus.prometheusSpec.ignoreNamespaceSelectors }} + namespace: kube-system + {{- else }} + namespace: {{ template "kube-prometheus-stack.namespace" . }} + {{- end }} + labels: + app: {{ template "kube-prometheus-stack.name" . }}-kube-proxy + {{- with .Values.kubeProxy.serviceMonitor.additionalLabels }} + {{- toYaml . | nindent 4 }} + {{- end }} +{{ include "kube-prometheus-stack.labels" . | indent 4 }} +spec: + jobLabel: {{ .Values.kubeProxy.serviceMonitor.jobLabel }} + {{- include "servicemonitor.scrapeLimits" .Values.kubeProxy.serviceMonitor | nindent 2 }} + selector: + {{- if .Values.kubeProxy.serviceMonitor.selector }} + {{ tpl (toYaml .Values.kubeProxy.serviceMonitor.selector | nindent 4) . }} + {{- else }} + matchLabels: + app: {{ template "kube-prometheus-stack.name" . }}-kube-proxy + release: {{ $.Release.Name | quote }} + {{- end }} + namespaceSelector: + matchNames: + - "kube-system" + endpoints: + - port: {{ .Values.kubeProxy.serviceMonitor.port }} + {{- if .Values.kubeProxy.serviceMonitor.interval }} + interval: {{ .Values.kubeProxy.serviceMonitor.interval }} + {{- end }} + bearerTokenFile: /var/run/secrets/kubernetes.io/serviceaccount/token + {{- if .Values.kubeProxy.serviceMonitor.proxyUrl }} + proxyUrl: {{ .Values.kubeProxy.serviceMonitor.proxyUrl}} + {{- end }} + {{- if .Values.kubeProxy.serviceMonitor.https }} + scheme: https + tlsConfig: + caFile: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt + {{- end}} + metricRelabelings: + {{- if .Values.kubeProxy.serviceMonitor.metricRelabelings }} + {{ tpl (toYaml .Values.kubeProxy.serviceMonitor.metricRelabelings | indent 4) . }} + {{- end }} + {{ if .Values.global.cattle.clusterId }} + - sourceLabels: [__address__] + targetLabel: cluster_id + replacement: {{ .Values.global.cattle.clusterId }} + {{- end }} + {{ if .Values.global.cattle.clusterName}} + - sourceLabels: [__address__] + targetLabel: cluster_name + replacement: {{ .Values.global.cattle.clusterName }} + {{- end }} +{{- if .Values.kubeProxy.serviceMonitor.relabelings }} + relabelings: +{{ tpl (toYaml .Values.kubeProxy.serviceMonitor.relabelings | indent 4) . }} +{{- end }} +{{- end }} diff --git a/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/templates/exporters/kube-scheduler/endpoints.yaml b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/templates/exporters/kube-scheduler/endpoints.yaml new file mode 100644 index 0000000000..6236b42f10 --- /dev/null +++ b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/templates/exporters/kube-scheduler/endpoints.yaml @@ -0,0 +1,22 @@ +{{- if and .Values.kubeScheduler.enabled .Values.kubeScheduler.endpoints .Values.kubernetesServiceMonitors.enabled }} +apiVersion: v1 +kind: Endpoints +metadata: + name: {{ template "kube-prometheus-stack.fullname" . }}-kube-scheduler + labels: + app: {{ template "kube-prometheus-stack.name" . }}-kube-scheduler + k8s-app: kube-scheduler +{{ include "kube-prometheus-stack.labels" . | indent 4 }} + namespace: kube-system +subsets: + - addresses: + {{- range .Values.kubeScheduler.endpoints }} + - ip: {{ . }} + {{- end }} + ports: + - name: {{ .Values.kubeScheduler.serviceMonitor.port }} + {{- $kubeSchedulerDefaultInsecurePort := 10251 }} + {{- $kubeSchedulerDefaultSecurePort := 10259 }} + port: {{ include "kube-prometheus-stack.kubeScheduler.insecureScrape" (list . $kubeSchedulerDefaultInsecurePort $kubeSchedulerDefaultSecurePort .Values.kubeScheduler.service.port) }} + protocol: TCP +{{- end }} diff --git a/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/templates/exporters/kube-scheduler/service.yaml b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/templates/exporters/kube-scheduler/service.yaml new file mode 100644 index 0000000000..90b3a800a4 --- /dev/null +++ b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/templates/exporters/kube-scheduler/service.yaml @@ -0,0 +1,29 @@ +{{- if and .Values.kubeScheduler.enabled .Values.kubeScheduler.service.enabled .Values.kubernetesServiceMonitors.enabled }} +apiVersion: v1 +kind: Service +metadata: + name: {{ template "kube-prometheus-stack.fullname" . }}-kube-scheduler + labels: + app: {{ template "kube-prometheus-stack.name" . }}-kube-scheduler + jobLabel: kube-scheduler +{{ include "kube-prometheus-stack.labels" . | indent 4 }} + namespace: kube-system +spec: + clusterIP: None + ports: + - name: {{ .Values.kubeScheduler.serviceMonitor.port }} + {{- $kubeSchedulerDefaultInsecurePort := 10251 }} + {{- $kubeSchedulerDefaultSecurePort := 10259 }} + port: {{ include "kube-prometheus-stack.kubeScheduler.insecureScrape" (list . $kubeSchedulerDefaultInsecurePort $kubeSchedulerDefaultSecurePort .Values.kubeScheduler.service.port) }} + protocol: TCP + targetPort: {{ include "kube-prometheus-stack.kubeScheduler.insecureScrape" (list . $kubeSchedulerDefaultInsecurePort $kubeSchedulerDefaultSecurePort .Values.kubeScheduler.service.targetPort) }} +{{- if .Values.kubeScheduler.endpoints }}{{- else }} + selector: + {{- if .Values.kubeScheduler.service.selector }} +{{ toYaml .Values.kubeScheduler.service.selector | indent 4 }} + {{- else}} + component: kube-scheduler + {{- end}} +{{- end }} + type: ClusterIP +{{- end -}} diff --git a/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/templates/exporters/kube-scheduler/servicemonitor.yaml b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/templates/exporters/kube-scheduler/servicemonitor.yaml new file mode 100644 index 0000000000..b17c4f1d47 --- /dev/null +++ b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/templates/exporters/kube-scheduler/servicemonitor.yaml @@ -0,0 +1,69 @@ +{{- if and .Values.kubeScheduler.enabled .Values.kubeScheduler.serviceMonitor.enabled .Values.kubernetesServiceMonitors.enabled }} +apiVersion: monitoring.coreos.com/v1 +kind: ServiceMonitor +metadata: + name: {{ template "kube-prometheus-stack.fullname" . }}-kube-scheduler + {{- if .Values.prometheus.prometheusSpec.ignoreNamespaceSelectors }} + namespace: kube-system + {{- else }} + namespace: {{ template "kube-prometheus-stack.namespace" . }} + {{- end }} + labels: + app: {{ template "kube-prometheus-stack.name" . }}-kube-scheduler + {{- with .Values.kubeScheduler.serviceMonitor.additionalLabels }} + {{- toYaml . | nindent 4 }} + {{- end }} +{{ include "kube-prometheus-stack.labels" . | indent 4 }} +spec: + jobLabel: {{ .Values.kubeScheduler.serviceMonitor.jobLabel }} + {{- include "servicemonitor.scrapeLimits" .Values.kubeScheduler.serviceMonitor | nindent 2 }} + selector: + {{- if .Values.kubeScheduler.serviceMonitor.selector }} + {{ tpl (toYaml .Values.kubeScheduler.serviceMonitor.selector | nindent 4) . }} + {{- else }} + matchLabels: + app: {{ template "kube-prometheus-stack.name" . }}-kube-scheduler + release: {{ $.Release.Name | quote }} + {{- end }} + namespaceSelector: + matchNames: + - "kube-system" + endpoints: + - port: {{ .Values.kubeScheduler.serviceMonitor.port }} + {{- if .Values.kubeScheduler.serviceMonitor.interval }} + interval: {{ .Values.kubeScheduler.serviceMonitor.interval }} + {{- end }} + bearerTokenFile: /var/run/secrets/kubernetes.io/serviceaccount/token + {{- if .Values.kubeScheduler.serviceMonitor.proxyUrl }} + proxyUrl: {{ .Values.kubeScheduler.serviceMonitor.proxyUrl}} + {{- end }} + {{- if eq (include "kube-prometheus-stack.kubeScheduler.insecureScrape" (list . false true .Values.kubeScheduler.serviceMonitor.https )) "true" }} + scheme: https + tlsConfig: + caFile: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt + {{- if eq (include "kube-prometheus-stack.kubeScheduler.insecureScrape" (list . nil true .Values.kubeScheduler.serviceMonitor.insecureSkipVerify)) "true" }} + insecureSkipVerify: true + {{- end }} + {{- if .Values.kubeScheduler.serviceMonitor.serverName }} + serverName: {{ .Values.kubeScheduler.serviceMonitor.serverName }} + {{- end}} + {{- end}} + metricRelabelings: + {{- if .Values.kubeScheduler.serviceMonitor.metricRelabelings }} + {{ tpl (toYaml .Values.kubeScheduler.serviceMonitor.metricRelabelings | indent 4) . }} + {{- end }} + {{ if .Values.global.cattle.clusterId }} + - sourceLabels: [__address__] + targetLabel: cluster_id + replacement: {{ .Values.global.cattle.clusterId }} + {{- end }} + {{ if .Values.global.cattle.clusterName}} + - sourceLabels: [__address__] + targetLabel: cluster_name + replacement: {{ .Values.global.cattle.clusterName }} + {{- end }} +{{- if .Values.kubeScheduler.serviceMonitor.relabelings }} + relabelings: +{{ tpl (toYaml .Values.kubeScheduler.serviceMonitor.relabelings | indent 4) . }} +{{- end }} +{{- end }} diff --git a/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/templates/exporters/kube-state-metrics/validate.yaml b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/templates/exporters/kube-state-metrics/validate.yaml new file mode 100644 index 0000000000..9211b3d771 --- /dev/null +++ b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/templates/exporters/kube-state-metrics/validate.yaml @@ -0,0 +1,7 @@ +{{- if .Values.kubeStateMetrics.enabled }} +{{- if not (kindIs "invalid" .Values.kubeStateMetrics.serviceMonitor) }} +{{- if .Values.kubeStateMetrics.serviceMonitor.namespaceOverride }} +{{- fail "kubeStateMetrics.serviceMonitor.namespaceOverride was removed. Please use kube-state-metrics.namespaceOverride instead." }} +{{- end }} +{{- end }} +{{- end }} diff --git a/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/templates/exporters/kubelet/servicemonitor.yaml b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/templates/exporters/kubelet/servicemonitor.yaml new file mode 100644 index 0000000000..f570fbfdbc --- /dev/null +++ b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/templates/exporters/kubelet/servicemonitor.yaml @@ -0,0 +1,246 @@ +{{- if (and (not .Values.kubelet.enabled) .Values.hardenedKubelet.enabled) }} +{{ required "Cannot set .Values.hardenedKubelet.enabled=true when .Values.kubelet.enabled=false" "" }} +{{- end }} +{{- if (and .Values.kubelet.enabled .Values.kubernetesServiceMonitors.enabled (not .Values.hardenedKubelet.enabled) (not .Values.k3sServer.enabled)) }} +apiVersion: monitoring.coreos.com/v1 +kind: ServiceMonitor +metadata: + name: {{ template "kube-prometheus-stack.fullname" . }}-kubelet + {{- if .Values.prometheus.prometheusSpec.ignoreNamespaceSelectors }} + namespace: {{ .Values.kubelet.namespace }} + {{- else }} + namespace: {{ template "kube-prometheus-stack.namespace" . }} + {{- end }} + labels: + app: {{ template "kube-prometheus-stack.name" . }}-kubelet + {{- with .Values.kubelet.serviceMonitor.additionalLabels }} + {{- toYaml . | nindent 4 }} + {{- end }} +{{- include "kube-prometheus-stack.labels" . | indent 4 }} +spec: + {{- include "servicemonitor.scrapeLimits" .Values.kubelet.serviceMonitor | nindent 2 }} + {{- with .Values.kubelet.serviceMonitor.attachMetadata }} + attachMetadata: + {{- toYaml . | nindent 4 }} + {{- end }} + endpoints: + {{- if .Values.kubelet.serviceMonitor.https }} + - port: https-metrics + scheme: https + {{- if .Values.kubelet.serviceMonitor.interval }} + interval: {{ .Values.kubelet.serviceMonitor.interval }} + {{- end }} + {{- if .Values.kubelet.serviceMonitor.proxyUrl }} + proxyUrl: {{ .Values.kubelet.serviceMonitor.proxyUrl }} + {{- end }} + {{- if .Values.kubelet.serviceMonitor.scrapeTimeout }} + scrapeTimeout: {{ .Values.kubelet.serviceMonitor.scrapeTimeout }} + {{- end }} + tlsConfig: + caFile: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt + insecureSkipVerify: true + bearerTokenFile: /var/run/secrets/kubernetes.io/serviceaccount/token + honorLabels: {{ .Values.kubelet.serviceMonitor.honorLabels }} + honorTimestamps: {{ .Values.kubelet.serviceMonitor.honorTimestamps }} + metricRelabelings: + {{- if .Values.kubelet.serviceMonitor.metricRelabelings }} + {{- tpl (toYaml .Values.kubelet.serviceMonitor.metricRelabelings | nindent 6) . }} + {{- end }} + {{ if .Values.global.cattle.clusterId }} + - sourceLabels: [__address__] + targetLabel: cluster_id + replacement: {{ .Values.global.cattle.clusterId }} + {{- end }} + {{ if .Values.global.cattle.clusterName}} + - sourceLabels: [__address__] + targetLabel: cluster_name + replacement: {{ .Values.global.cattle.clusterName }} + {{- end }} +{{- if .Values.kubelet.serviceMonitor.relabelings }} + relabelings: +{{ tpl (toYaml .Values.kubelet.serviceMonitor.relabelings | indent 4) . }} +{{- end }} +{{- if .Values.kubelet.serviceMonitor.cAdvisor }} + - port: https-metrics + scheme: https + path: /metrics/cadvisor + {{- if .Values.kubelet.serviceMonitor.interval }} + interval: {{ .Values.kubelet.serviceMonitor.interval }} + {{- end }} + {{- if .Values.kubelet.serviceMonitor.proxyUrl }} + proxyUrl: {{ .Values.kubelet.serviceMonitor.proxyUrl }} + {{- end }} + {{- if .Values.kubelet.serviceMonitor.scrapeTimeout }} + scrapeTimeout: {{ .Values.kubelet.serviceMonitor.scrapeTimeout }} + {{- end }} + honorLabels: {{ .Values.kubelet.serviceMonitor.honorLabels }} + honorTimestamps: {{ .Values.kubelet.serviceMonitor.honorTimestamps }} + tlsConfig: + caFile: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt + insecureSkipVerify: true + bearerTokenFile: /var/run/secrets/kubernetes.io/serviceaccount/token +{{- if .Values.kubelet.serviceMonitor.cAdvisorMetricRelabelings }} + metricRelabelings: +{{ tpl (toYaml .Values.kubelet.serviceMonitor.cAdvisorMetricRelabelings | indent 4) . }} +{{- end }} +{{- if .Values.kubelet.serviceMonitor.cAdvisorRelabelings }} + relabelings: +{{ tpl (toYaml .Values.kubelet.serviceMonitor.cAdvisorRelabelings | indent 4) . }} +{{- end }} +{{- end }} +{{- if .Values.kubelet.serviceMonitor.probes }} + - port: https-metrics + scheme: https + path: /metrics/probes + {{- if .Values.kubelet.serviceMonitor.interval }} + interval: {{ .Values.kubelet.serviceMonitor.interval }} + {{- end }} + {{- if .Values.kubelet.serviceMonitor.proxyUrl }} + proxyUrl: {{ .Values.kubelet.serviceMonitor.proxyUrl }} + {{- end }} + {{- if .Values.kubelet.serviceMonitor.scrapeTimeout }} + scrapeTimeout: {{ .Values.kubelet.serviceMonitor.scrapeTimeout }} + {{- end }} + honorLabels: {{ .Values.kubelet.serviceMonitor.honorLabels }} + honorTimestamps: {{ .Values.kubelet.serviceMonitor.honorTimestamps }} + tlsConfig: + caFile: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt + insecureSkipVerify: true + bearerTokenFile: /var/run/secrets/kubernetes.io/serviceaccount/token +{{- if .Values.kubelet.serviceMonitor.probesMetricRelabelings }} + metricRelabelings: +{{ tpl (toYaml .Values.kubelet.serviceMonitor.probesMetricRelabelings | indent 4) . }} +{{- end }} +{{- if .Values.kubelet.serviceMonitor.probesRelabelings }} + relabelings: +{{ tpl (toYaml .Values.kubelet.serviceMonitor.probesRelabelings | indent 4) . }} +{{- end }} +{{- end }} +{{- if .Values.kubelet.serviceMonitor.resource }} + - port: https-metrics + scheme: https + path: {{ include "kubelet.serviceMonitor.resourcePath" . }} + {{- if .Values.kubelet.serviceMonitor.interval }} + interval: {{ .Values.kubelet.serviceMonitor.interval }} + {{- end }} + {{- if .Values.kubelet.serviceMonitor.proxyUrl }} + proxyUrl: {{ .Values.kubelet.serviceMonitor.proxyUrl }} + {{- end }} + {{- if .Values.kubelet.serviceMonitor.scrapeTimeout }} + scrapeTimeout: {{ .Values.kubelet.serviceMonitor.scrapeTimeout }} + {{- end }} + honorLabels: {{ .Values.kubelet.serviceMonitor.honorLabels }} + honorTimestamps: {{ .Values.kubelet.serviceMonitor.honorTimestamps }} + tlsConfig: + caFile: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt + insecureSkipVerify: true + bearerTokenFile: /var/run/secrets/kubernetes.io/serviceaccount/token +{{- if .Values.kubelet.serviceMonitor.resourceMetricRelabelings }} + metricRelabelings: +{{ tpl (toYaml .Values.kubelet.serviceMonitor.resourceMetricRelabelings | indent 4) . }} +{{- end }} +{{- if .Values.kubelet.serviceMonitor.resourceRelabelings }} + relabelings: +{{ tpl (toYaml .Values.kubelet.serviceMonitor.resourceRelabelings | indent 4) . }} +{{- end }} +{{- end }} + {{- else }} + - port: http-metrics + {{- if .Values.kubelet.serviceMonitor.interval }} + interval: {{ .Values.kubelet.serviceMonitor.interval }} + {{- end }} + {{- if .Values.kubelet.serviceMonitor.proxyUrl }} + proxyUrl: {{ .Values.kubelet.serviceMonitor.proxyUrl }} + {{- end }} + {{- if .Values.kubelet.serviceMonitor.scrapeTimeout }} + scrapeTimeout: {{ .Values.kubelet.serviceMonitor.scrapeTimeout }} + {{- end }} + honorLabels: {{ .Values.kubelet.serviceMonitor.honorLabels }} + honorTimestamps: {{ .Values.kubelet.serviceMonitor.honorTimestamps }} +{{- if .Values.kubelet.serviceMonitor.metricRelabelings }} + metricRelabelings: +{{ tpl (toYaml .Values.kubelet.serviceMonitor.metricRelabelings | indent 4) . }} +{{- end }} +{{- if .Values.kubelet.serviceMonitor.relabelings }} + relabelings: +{{ tpl (toYaml .Values.kubelet.serviceMonitor.relabelings | indent 4) . }} +{{- end }} +{{- if .Values.kubelet.serviceMonitor.cAdvisor }} + - port: http-metrics + path: /metrics/cadvisor + {{- if .Values.kubelet.serviceMonitor.interval }} + interval: {{ .Values.kubelet.serviceMonitor.interval }} + {{- end }} + {{- if .Values.kubelet.serviceMonitor.proxyUrl }} + proxyUrl: {{ .Values.kubelet.serviceMonitor.proxyUrl }} + {{- end }} + {{- if .Values.kubelet.serviceMonitor.scrapeTimeout }} + scrapeTimeout: {{ .Values.kubelet.serviceMonitor.scrapeTimeout }} + {{- end }} + honorLabels: {{ .Values.kubelet.serviceMonitor.honorLabels }} + honorTimestamps: {{ .Values.kubelet.serviceMonitor.honorTimestamps }} +{{- if .Values.kubelet.serviceMonitor.cAdvisorMetricRelabelings }} + metricRelabelings: +{{ tpl (toYaml .Values.kubelet.serviceMonitor.cAdvisorMetricRelabelings | indent 4) . }} +{{- end }} +{{- if .Values.kubelet.serviceMonitor.cAdvisorRelabelings }} + relabelings: +{{ tpl (toYaml .Values.kubelet.serviceMonitor.cAdvisorRelabelings | indent 4) . }} +{{- end }} +{{- if .Values.kubelet.serviceMonitor.probes }} + - port: http-metrics + path: /metrics/probes + {{- if .Values.kubelet.serviceMonitor.interval }} + interval: {{ .Values.kubelet.serviceMonitor.interval }} + {{- end }} + {{- if .Values.kubelet.serviceMonitor.proxyUrl }} + proxyUrl: {{ .Values.kubelet.serviceMonitor.proxyUrl }} + {{- end }} + {{- if .Values.kubelet.serviceMonitor.scrapeTimeout }} + scrapeTimeout: {{ .Values.kubelet.serviceMonitor.scrapeTimeout }} + {{- end }} + honorLabels: {{ .Values.kubelet.serviceMonitor.honorLabels }} + honorTimestamps: {{ .Values.kubelet.serviceMonitor.honorTimestamps }} +{{- if .Values.kubelet.serviceMonitor.probesMetricRelabelings }} + metricRelabelings: +{{ tpl (toYaml .Values.kubelet.serviceMonitor.probesMetricRelabelings | indent 4) . }} +{{- end }} +{{- if .Values.kubelet.serviceMonitor.probesRelabelings }} + relabelings: +{{ tpl (toYaml .Values.kubelet.serviceMonitor.probesRelabelings | indent 4) . }} +{{- end }} +{{- end }} +{{- if .Values.kubelet.serviceMonitor.resource }} + - port: http-metrics + path: {{ include "kubelet.serviceMonitor.resourcePath" . }} + {{- if .Values.kubelet.serviceMonitor.interval }} + interval: {{ .Values.kubelet.serviceMonitor.interval }} + {{- end }} + {{- if .Values.kubelet.serviceMonitor.proxyUrl }} + proxyUrl: {{ .Values.kubelet.serviceMonitor.proxyUrl }} + {{- end }} + {{- if .Values.kubelet.serviceMonitor.scrapeTimeout }} + scrapeTimeout: {{ .Values.kubelet.serviceMonitor.scrapeTimeout }} + {{- end }} + honorLabels: {{ .Values.kubelet.serviceMonitor.honorLabels }} + honorTimestamps: {{ .Values.kubelet.serviceMonitor.honorTimestamps }} +{{- if .Values.kubelet.serviceMonitor.resourceMetricRelabelings }} + metricRelabelings: +{{ tpl (toYaml .Values.kubelet.serviceMonitor.resourceMetricRelabelings | indent 4) . }} +{{- end }} +{{- if .Values.kubelet.serviceMonitor.resourceRelabelings }} + relabelings: +{{ tpl (toYaml .Values.kubelet.serviceMonitor.resourceRelabelings | indent 4) . }} +{{- end }} +{{- end }} +{{- end }} + {{- end }} + jobLabel: k8s-app + namespaceSelector: + matchNames: + - {{ .Values.kubelet.namespace }} + selector: + matchLabels: + app.kubernetes.io/name: kubelet + k8s-app: kubelet +{{- end}} diff --git a/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/templates/exporters/node-exporter/validate.yaml b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/templates/exporters/node-exporter/validate.yaml new file mode 100644 index 0000000000..bdc73d6165 --- /dev/null +++ b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/templates/exporters/node-exporter/validate.yaml @@ -0,0 +1,3 @@ +{{- if (and (not .Values.nodeExporter.enabled) .Values.hardenedNodeExporter.enabled) }} +{{ required "Cannot set .Values.hardenedNodeExporter.enabled=true when .Values.nodeExporter.enabled=false" "" }} +{{- end }} diff --git a/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/templates/extra-objects.yaml b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/templates/extra-objects.yaml new file mode 100644 index 0000000000..567f7bf329 --- /dev/null +++ b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/templates/extra-objects.yaml @@ -0,0 +1,4 @@ +{{ range .Values.extraManifests }} +--- +{{ tpl (toYaml .) $ }} +{{ end }} diff --git a/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/templates/grafana/configmap-dashboards.yaml b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/templates/grafana/configmap-dashboards.yaml new file mode 100644 index 0000000000..e719009ffe --- /dev/null +++ b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/templates/grafana/configmap-dashboards.yaml @@ -0,0 +1,24 @@ +{{- if or (and .Values.grafana.enabled .Values.grafana.defaultDashboardsEnabled) .Values.grafana.forceDeployDashboards }} +{{- $files := .Files.Glob "dashboards-1.14/*.json" }} +{{- if $files }} +apiVersion: v1 +kind: ConfigMapList +items: +{{- range $path, $fileContents := $files }} +{{- $dashboardName := regexReplaceAll "(^.*/)(.*)\\.json$" $path "${2}" }} +- apiVersion: v1 + kind: ConfigMap + metadata: + name: {{ printf "%s-%s" (include "kube-prometheus-stack.fullname" $) $dashboardName | trunc 63 | trimSuffix "-" }} + namespace: {{ .Values.grafana.defaultDashboards.namespace }} + labels: + {{- if $.Values.grafana.sidecar.dashboards.label }} + {{ $.Values.grafana.sidecar.dashboards.label }}: {{ ternary $.Values.grafana.sidecar.dashboards.labelValue "1" (not (empty $.Values.grafana.sidecar.dashboards.labelValue)) | quote }} + {{- end }} + app: {{ template "kube-prometheus-stack.name" $ }}-grafana +{{ include "kube-prometheus-stack.labels" $ | indent 6 }} + data: + {{ $dashboardName }}.json: {{ $.Files.Get $path | toJson }} +{{- end }} +{{- end }} +{{- end }} diff --git a/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/templates/grafana/configmaps-datasources.yaml b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/templates/grafana/configmaps-datasources.yaml new file mode 100644 index 0000000000..718020d4f6 --- /dev/null +++ b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/templates/grafana/configmaps-datasources.yaml @@ -0,0 +1,81 @@ +{{- if or (and .Values.grafana.enabled .Values.grafana.sidecar.datasources.enabled) .Values.grafana.forceDeployDatasources }} +apiVersion: v1 +kind: ConfigMap +metadata: + name: {{ template "kube-prometheus-stack.fullname" . }}-grafana-datasource + namespace: {{ default .Values.grafana.sidecar.datasources.searchNamespace (include "kube-prometheus-stack.namespace" .) }} +{{- if .Values.grafana.sidecar.datasources.annotations }} + annotations: + {{- toYaml .Values.grafana.sidecar.datasources.annotations | nindent 4 }} +{{- end }} + labels: + {{ $.Values.grafana.sidecar.datasources.label }}: {{ $.Values.grafana.sidecar.datasources.labelValue | quote }} + app: {{ template "kube-prometheus-stack.name" $ }}-grafana +{{ include "kube-prometheus-stack.labels" $ | indent 4 }} +data: + datasource.yaml: |- + apiVersion: 1 +{{- if .Values.grafana.deleteDatasources }} + deleteDatasources: +{{ tpl (toYaml .Values.grafana.deleteDatasources | indent 6) . }} +{{- end }} + datasources: +{{- $scrapeInterval := .Values.grafana.sidecar.datasources.defaultDatasourceScrapeInterval | default .Values.prometheus.prometheusSpec.scrapeInterval | default "30s" }} +{{- if .Values.grafana.sidecar.datasources.defaultDatasourceEnabled }} + - name: Prometheus + type: prometheus + uid: {{ .Values.grafana.sidecar.datasources.uid }} + {{- if .Values.grafana.sidecar.datasources.url }} + url: {{ .Values.grafana.sidecar.datasources.url }} + {{- else }} + url: http://{{ template "kube-prometheus-stack.fullname" . }}-prometheus.{{ template "kube-prometheus-stack.namespace" . }}:{{ .Values.prometheus.service.port }}/{{ trimPrefix "/" .Values.prometheus.prometheusSpec.routePrefix }} + {{- end }} + access: proxy + isDefault: {{ .Values.grafana.sidecar.datasources.isDefaultDatasource }} + jsonData: + httpMethod: {{ .Values.grafana.sidecar.datasources.httpMethod }} + timeInterval: {{ $scrapeInterval }} + {{- if .Values.grafana.sidecar.datasources.timeout }} + timeout: {{ .Values.grafana.sidecar.datasources.timeout }} + {{- end }} +{{- if .Values.grafana.sidecar.datasources.exemplarTraceIdDestinations }} + exemplarTraceIdDestinations: + - datasourceUid: {{ .Values.grafana.sidecar.datasources.exemplarTraceIdDestinations.datasourceUid }} + name: {{ .Values.grafana.sidecar.datasources.exemplarTraceIdDestinations.traceIdLabelName }} +{{- end }} +{{- if .Values.grafana.sidecar.datasources.createPrometheusReplicasDatasources }} +{{- range until (int .Values.prometheus.prometheusSpec.replicas) }} + - name: Prometheus-{{ . }} + type: prometheus + uid: {{ $.Values.grafana.sidecar.datasources.uid }}-replica-{{ . }} + url: http://prometheus-{{ template "kube-prometheus-stack.prometheus.crname" $ }}-{{ . }}.prometheus-operated:9090/{{ trimPrefix "/" $.Values.prometheus.prometheusSpec.routePrefix }} + access: proxy + isDefault: false + jsonData: + timeInterval: {{ $scrapeInterval }} +{{- if $.Values.grafana.sidecar.datasources.exemplarTraceIdDestinations }} + exemplarTraceIdDestinations: + - datasourceUid: {{ $.Values.grafana.sidecar.datasources.exemplarTraceIdDestinations.datasourceUid }} + name: {{ $.Values.grafana.sidecar.datasources.exemplarTraceIdDestinations.traceIdLabelName }} +{{- end }} +{{- end }} +{{- end }} +{{- if .Values.grafana.sidecar.datasources.alertmanager.enabled }} + - name: Alertmanager + type: alertmanager + uid: {{ .Values.grafana.sidecar.datasources.alertmanager.uid }} + {{- if .Values.grafana.sidecar.datasources.alertmanager.url }} + url: {{ .Values.grafana.sidecar.datasources.alertmanager.url }} + {{- else }} + url: http://{{ template "kube-prometheus-stack.fullname" . }}-alertmanager.{{ template "kube-prometheus-stack.namespace" . }}:{{ .Values.alertmanager.service.port }}/{{ trimPrefix "/" .Values.alertmanager.alertmanagerSpec.routePrefix }} + {{- end }} + access: proxy + jsonData: + handleGrafanaManagedAlerts: {{ .Values.grafana.sidecar.datasources.alertmanager.handleGrafanaManagedAlerts }} + implementation: {{ .Values.grafana.sidecar.datasources.alertmanager.implementation }} +{{- end }} +{{- end }} +{{- if .Values.grafana.additionalDataSources }} +{{ tpl (toYaml .Values.grafana.additionalDataSources | indent 4) . }} +{{- end }} +{{- end }} diff --git a/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/templates/grafana/dashboards-1.14/alertmanager-overview.yaml b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/templates/grafana/dashboards-1.14/alertmanager-overview.yaml new file mode 100644 index 0000000000..dfc26d7ecd --- /dev/null +++ b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/templates/grafana/dashboards-1.14/alertmanager-overview.yaml @@ -0,0 +1,616 @@ +{{- /* +Generated from 'alertmanager-overview' from https://raw.githubusercontent.com/prometheus-operator/kube-prometheus/a8ba97a150c75be42010c75d10b720c55e182f1a/manifests/grafana-dashboardDefinitions.yaml +Do not change in-place! In order to change this file first read following link: +https://github.com/prometheus-community/helm-charts/tree/main/charts/kube-prometheus-stack/hack +*/ -}} +{{- $kubeTargetVersion := default .Capabilities.KubeVersion.GitVersion .Values.kubeTargetVersionOverride }} +{{- if and (or .Values.grafana.enabled .Values.grafana.forceDeployDashboards) (semverCompare ">=1.14.0-0" $kubeTargetVersion) (semverCompare "<9.9.9-9" $kubeTargetVersion) .Values.grafana.defaultDashboardsEnabled }} +{{- if and .Values.alertmanager.enabled .Values.alertmanager.serviceMonitor.selfMonitor }} +apiVersion: v1 +kind: ConfigMap +metadata: + namespace: {{ .Values.grafana.defaultDashboards.namespace }} + name: {{ printf "%s-%s" (include "kube-prometheus-stack.fullname" $) "alertmanager-overview" | trunc 63 | trimSuffix "-" }} + annotations: +{{ toYaml .Values.grafana.sidecar.dashboards.annotations | indent 4 }} + labels: + {{- if $.Values.grafana.sidecar.dashboards.label }} + {{ $.Values.grafana.sidecar.dashboards.label }}: {{ ternary $.Values.grafana.sidecar.dashboards.labelValue "1" (not (empty $.Values.grafana.sidecar.dashboards.labelValue)) | quote }} + {{- end }} + app: {{ template "kube-prometheus-stack.name" $ }}-grafana +{{ include "kube-prometheus-stack.labels" $ | indent 4 }} +data: + alertmanager-overview.json: |- + { + "__inputs": [ + + ], + "__requires": [ + + ], + "annotations": { + "list": [ + + ] + }, + "editable": false, + "gnetId": null, + "graphTooltip": 1, + "hideControls": false, + "id": null, + "links": [ + + ], + "refresh": "30s", + "rows": [ + { + "collapse": false, + "collapsed": false, + "panels": [ + { + "aliasColors": { + + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "description": "current set of alerts stored in the Alertmanager", + "fill": 1, + "fillGradient": 0, + "gridPos": { + + }, + "id": 2, + "legend": { + "alignAsTable": false, + "avg": false, + "current": false, + "max": false, + "min": false, + "rightSide": false, + "show": false, + "sideWidth": null, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ + + ], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "repeat": null, + "seriesOverrides": [ + + ], + "spaceLength": 10, + "span": 6, + "stack": true, + "steppedLine": false, + "targets": [ + { + "expr": "sum(alertmanager_alerts{namespace=~\"$namespace\",service=~\"$service\"}) by (namespace,service,instance)", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{`{{`}}instance{{`}}`}}", + "refId": "A" + } + ], + "thresholds": [ + + ], + "timeFrom": null, + "timeShift": null, + "title": "Alerts", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ + + ] + }, + "yaxes": [ + { + "format": "none", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "none", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + }, + { + "aliasColors": { + + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "description": "rate of successful and invalid alerts received by the Alertmanager", + "fill": 1, + "fillGradient": 0, + "gridPos": { + + }, + "id": 3, + "legend": { + "alignAsTable": false, + "avg": false, + "current": false, + "max": false, + "min": false, + "rightSide": false, + "show": false, + "sideWidth": null, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ + + ], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "repeat": null, + "seriesOverrides": [ + + ], + "spaceLength": 10, + "span": 6, + "stack": true, + "steppedLine": false, + "targets": [ + { + "expr": "sum(rate(alertmanager_alerts_received_total{namespace=~\"$namespace\",service=~\"$service\"}[$__rate_interval])) by (namespace,service,instance)", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{`{{`}}instance{{`}}`}} Received", + "refId": "A" + }, + { + "expr": "sum(rate(alertmanager_alerts_invalid_total{namespace=~\"$namespace\",service=~\"$service\"}[$__rate_interval])) by (namespace,service,instance)", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{`{{`}}instance{{`}}`}} Invalid", + "refId": "B" + } + ], + "thresholds": [ + + ], + "timeFrom": null, + "timeShift": null, + "title": "Alerts receive rate", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ + + ] + }, + "yaxes": [ + { + "format": "ops", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "ops", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Alerts", + "titleSize": "h6", + "type": "row" + }, + { + "collapse": false, + "collapsed": false, + "panels": [ + { + "aliasColors": { + + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "description": "rate of successful and invalid notifications sent by the Alertmanager", + "fill": 1, + "fillGradient": 0, + "gridPos": { + + }, + "id": 4, + "legend": { + "alignAsTable": false, + "avg": false, + "current": false, + "max": false, + "min": false, + "rightSide": false, + "show": false, + "sideWidth": null, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ + + ], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "repeat": "integration", + "seriesOverrides": [ + + ], + "spaceLength": 10, + "stack": true, + "steppedLine": false, + "targets": [ + { + "expr": "sum(rate(alertmanager_notifications_total{namespace=~\"$namespace\",service=~\"$service\", integration=\"$integration\"}[$__rate_interval])) by (integration,namespace,service,instance)", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{`{{`}}instance{{`}}`}} Total", + "refId": "A" + }, + { + "expr": "sum(rate(alertmanager_notifications_failed_total{namespace=~\"$namespace\",service=~\"$service\", integration=\"$integration\"}[$__rate_interval])) by (integration,namespace,service,instance)", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{`{{`}}instance{{`}}`}} Failed", + "refId": "B" + } + ], + "thresholds": [ + + ], + "timeFrom": null, + "timeShift": null, + "title": "$integration: Notifications Send Rate", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ + + ] + }, + "yaxes": [ + { + "format": "ops", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "ops", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + }, + { + "aliasColors": { + + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "description": "latency of notifications sent by the Alertmanager", + "fill": 1, + "fillGradient": 0, + "gridPos": { + + }, + "id": 5, + "legend": { + "alignAsTable": false, + "avg": false, + "current": false, + "max": false, + "min": false, + "rightSide": false, + "show": false, + "sideWidth": null, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ + + ], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "repeat": "integration", + "seriesOverrides": [ + + ], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "histogram_quantile(0.99,\n sum(rate(alertmanager_notification_latency_seconds_bucket{namespace=~\"$namespace\",service=~\"$service\", integration=\"$integration\"}[$__rate_interval])) by (le,namespace,service,instance)\n) \n", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{`{{`}}instance{{`}}`}} 99th Percentile", + "refId": "A" + }, + { + "expr": "histogram_quantile(0.50,\n sum(rate(alertmanager_notification_latency_seconds_bucket{namespace=~\"$namespace\",service=~\"$service\", integration=\"$integration\"}[$__rate_interval])) by (le,namespace,service,instance)\n) \n", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{`{{`}}instance{{`}}`}} Median", + "refId": "B" + }, + { + "expr": "sum(rate(alertmanager_notification_latency_seconds_sum{namespace=~\"$namespace\",service=~\"$service\", integration=\"$integration\"}[$__rate_interval])) by (namespace,service,instance)\n/\nsum(rate(alertmanager_notification_latency_seconds_count{namespace=~\"$namespace\",service=~\"$service\", integration=\"$integration\"}[$__rate_interval])) by (namespace,service,instance)\n", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{`{{`}}instance{{`}}`}} Average", + "refId": "C" + } + ], + "thresholds": [ + + ], + "timeFrom": null, + "timeShift": null, + "title": "$integration: Notification Duration", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ + + ] + }, + "yaxes": [ + { + "format": "s", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "s", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Notifications", + "titleSize": "h6", + "type": "row" + } + ], + "schemaVersion": 14, + "style": "dark", + "tags": [ + "alertmanager-mixin" + ], + "templating": { + "list": [ + { + "current": { + "text": "Prometheus", + "value": "Prometheus" + }, + "hide": 0, + "label": "Data Source", + "name": "datasource", + "options": [ + + ], + "query": "prometheus", + "refresh": 1, + "regex": "", + "type": "datasource" + }, + { + "allValue": null, + "current": { + "text": "", + "value": "" + }, + "datasource": "$datasource", + "hide": 0, + "includeAll": false, + "label": "namespace", + "multi": false, + "name": "namespace", + "options": [ + + ], + "query": "label_values(alertmanager_alerts, namespace)", + "refresh": 2, + "regex": "", + "sort": 1, + "tagValuesQuery": "", + "tags": [ + + ], + "tagsQuery": "", + "type": "query", + "useTags": false + }, + { + "allValue": null, + "current": { + "text": "", + "value": "" + }, + "datasource": "$datasource", + "hide": 0, + "includeAll": false, + "label": "service", + "multi": false, + "name": "service", + "options": [ + + ], + "query": "label_values(alertmanager_alerts, service)", + "refresh": 2, + "regex": "", + "sort": 1, + "tagValuesQuery": "", + "tags": [ + + ], + "tagsQuery": "", + "type": "query", + "useTags": false + }, + { + "allValue": null, + "current": { + "text": "all", + "value": "$__all" + }, + "datasource": "$datasource", + "hide": 2, + "includeAll": true, + "label": null, + "multi": false, + "name": "integration", + "options": [ + + ], + "query": "label_values(alertmanager_notifications_total{integration=~\".*\"}, integration)", + "refresh": 2, + "regex": "", + "sort": 1, + "tagValuesQuery": "", + "tags": [ + + ], + "tagsQuery": "", + "type": "query", + "useTags": false + } + ] + }, + "time": { + "from": "now-1h", + "to": "now" + }, + "timepicker": { + "refresh_intervals": [ + "5s", + "10s", + "30s", + "1m", + "5m", + "15m", + "30m", + "1h", + "2h", + "1d" + ], + "time_options": [ + "5m", + "15m", + "1h", + "6h", + "12h", + "24h", + "2d", + "7d", + "30d" + ] + }, + "timezone": "{{ .Values.grafana.defaultDashboardsTimezone }}", + "title": "Alertmanager / Overview", + "uid": "alertmanager-overview", + "version": 0 + } +{{- end }} +{{- end }} diff --git a/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/templates/grafana/dashboards-1.14/apiserver.yaml b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/templates/grafana/dashboards-1.14/apiserver.yaml new file mode 100644 index 0000000000..bd1048b567 --- /dev/null +++ b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/templates/grafana/dashboards-1.14/apiserver.yaml @@ -0,0 +1,1772 @@ +{{- /* +Generated from 'apiserver' from https://raw.githubusercontent.com/prometheus-operator/kube-prometheus/a8ba97a150c75be42010c75d10b720c55e182f1a/manifests/grafana-dashboardDefinitions.yaml +Do not change in-place! In order to change this file first read following link: +https://github.com/prometheus-community/helm-charts/tree/main/charts/kube-prometheus-stack/hack +*/ -}} +{{- $kubeTargetVersion := default .Capabilities.KubeVersion.GitVersion .Values.kubeTargetVersionOverride }} +{{- if and (or .Values.grafana.enabled .Values.grafana.forceDeployDashboards) (semverCompare ">=1.14.0-0" $kubeTargetVersion) (semverCompare "<9.9.9-9" $kubeTargetVersion) .Values.grafana.defaultDashboardsEnabled .Values.kubeApiServer.enabled }} +apiVersion: v1 +kind: ConfigMap +metadata: + namespace: {{ .Values.grafana.defaultDashboards.namespace }} + name: {{ printf "%s-%s" (include "kube-prometheus-stack.fullname" $) "apiserver" | trunc 63 | trimSuffix "-" }} + annotations: +{{ toYaml .Values.grafana.sidecar.dashboards.annotations | indent 4 }} + labels: + {{- if $.Values.grafana.sidecar.dashboards.label }} + {{ $.Values.grafana.sidecar.dashboards.label }}: {{ ternary $.Values.grafana.sidecar.dashboards.labelValue "1" (not (empty $.Values.grafana.sidecar.dashboards.labelValue)) | quote }} + {{- end }} + app: {{ template "kube-prometheus-stack.name" $ }}-grafana +{{ include "kube-prometheus-stack.labels" $ | indent 4 }} +data: + apiserver.json: |- + { + "__inputs": [ + + ], + "__requires": [ + + ], + "annotations": { + "list": [ + + ] + }, + "editable": false, + "gnetId": null, + "graphTooltip": 0, + "hideControls": false, + "id": null, + "links": [ + + ], + "panels": [ + { + "content": "The SLO (service level objective) and other metrics displayed on this dashboard are for informational purposes only.", + "datasource": null, + "description": "The SLO (service level objective) and other metrics displayed on this dashboard are for informational purposes only.", + "gridPos": { + "h": 2, + "w": 24, + "x": 0, + "y": 0 + }, + "id": 2, + "mode": "markdown", + "span": 12, + "title": "Notice", + "type": "text" + } + ], + "refresh": "10s", + "rows": [ + { + "collapse": false, + "collapsed": false, + "panels": [ + { + "cacheTimeout": null, + "colorBackground": false, + "colorValue": false, + "colors": [ + "#299c46", + "rgba(237, 129, 40, 0.89)", + "#d44a3a" + ], + "datasource": "$datasource", + "decimals": 3, + "description": "How many percent of requests (both read and write) in 30 days have been answered successfully and fast enough?", + "format": "percentunit", + "gauge": { + "maxValue": 100, + "minValue": 0, + "show": false, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "gridPos": { + + }, + "id": 3, + "interval": "1m", + "legend": { + "alignAsTable": true, + "rightSide": true + }, + "links": [ + + ], + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": 100, + "nullPointMode": "connected", + "nullText": null, + "postfix": "", + "postfixFontSize": "50%", + "prefix": "", + "prefixFontSize": "50%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], + "span": 4, + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": false, + "lineColor": "rgb(31, 120, 193)", + "show": false + }, + "tableColumn": "", + "targets": [ + { + "expr": "apiserver_request:availability30d{verb=\"all\", cluster=\"$cluster\"}", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "", + "refId": "A" + } + ], + "thresholds": "", + "title": "Availability (30d) > 99.000%", + "tooltip": { + "shared": false + }, + "type": "singlestat", + "valueFontSize": "80%", + "valueMaps": [ + { + "op": "=", + "text": "N/A", + "value": "null" + } + ], + "valueName": "avg" + }, + { + "aliasColors": { + + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "decimals": 3, + "description": "How much error budget is left looking at our 0.990% availability guarantees?", + "fill": 10, + "fillGradient": 0, + "gridPos": { + + }, + "id": 4, + "interval": "1m", + "legend": { + "alignAsTable": true, + "avg": false, + "current": false, + "max": false, + "min": false, + "rightSide": true, + "show": true, + "sideWidth": null, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ + + ], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "repeat": null, + "seriesOverrides": [ + + ], + "spaceLength": 10, + "span": 8, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "100 * (apiserver_request:availability30d{verb=\"all\", cluster=\"$cluster\"} - 0.990000)", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "errorbudget", + "refId": "A" + } + ], + "thresholds": [ + + ], + "timeFrom": null, + "timeShift": null, + "title": "ErrorBudget (30d) > 99.000%", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ + + ] + }, + "yaxes": [ + { + "decimals": 3, + "format": "percentunit", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "decimals": 3, + "format": "percentunit", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": false, + "title": "Dashboard Row", + "titleSize": "h6", + "type": "row" + }, + { + "collapse": false, + "collapsed": false, + "panels": [ + { + "cacheTimeout": null, + "colorBackground": false, + "colorValue": false, + "colors": [ + "#299c46", + "rgba(237, 129, 40, 0.89)", + "#d44a3a" + ], + "datasource": "$datasource", + "decimals": 3, + "description": "How many percent of read requests (LIST,GET) in 30 days have been answered successfully and fast enough?", + "format": "percentunit", + "gauge": { + "maxValue": 100, + "minValue": 0, + "show": false, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "gridPos": { + + }, + "id": 5, + "interval": "1m", + "legend": { + "alignAsTable": true, + "rightSide": true + }, + "links": [ + + ], + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": 100, + "nullPointMode": "connected", + "nullText": null, + "postfix": "", + "postfixFontSize": "50%", + "prefix": "", + "prefixFontSize": "50%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], + "span": 3, + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": false, + "lineColor": "rgb(31, 120, 193)", + "show": false + }, + "tableColumn": "", + "targets": [ + { + "expr": "apiserver_request:availability30d{verb=\"read\", cluster=\"$cluster\"}", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "", + "refId": "A" + } + ], + "thresholds": "", + "title": "Read Availability (30d)", + "tooltip": { + "shared": false + }, + "type": "singlestat", + "valueFontSize": "80%", + "valueMaps": [ + { + "op": "=", + "text": "N/A", + "value": "null" + } + ], + "valueName": "avg" + }, + { + "aliasColors": { + + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "description": "How many read requests (LIST,GET) per second do the apiservers get by code?", + "fill": 10, + "fillGradient": 0, + "gridPos": { + + }, + "id": 6, + "interval": "1m", + "legend": { + "alignAsTable": true, + "avg": false, + "current": false, + "max": false, + "min": false, + "rightSide": true, + "show": true, + "sideWidth": null, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ + + ], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "repeat": null, + "seriesOverrides": [ + { + "alias": "/2../i", + "color": "#56A64B" + }, + { + "alias": "/3../i", + "color": "#F2CC0C" + }, + { + "alias": "/4../i", + "color": "#3274D9" + }, + { + "alias": "/5../i", + "color": "#E02F44" + } + ], + "spaceLength": 10, + "span": 3, + "stack": true, + "steppedLine": false, + "targets": [ + { + "expr": "sum by (code) (code_resource:apiserver_request_total:rate5m{verb=\"read\", cluster=\"$cluster\"})", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{`{{`}} code {{`}}`}}", + "refId": "A" + } + ], + "thresholds": [ + + ], + "timeFrom": null, + "timeShift": null, + "title": "Read SLI - Requests", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ + + ] + }, + "yaxes": [ + { + "format": "reqps", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "reqps", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + }, + { + "aliasColors": { + + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "description": "How many percent of read requests (LIST,GET) per second are returned with errors (5xx)?", + "fill": 1, + "fillGradient": 0, + "gridPos": { + + }, + "id": 7, + "interval": "1m", + "legend": { + "alignAsTable": true, + "avg": false, + "current": false, + "max": false, + "min": false, + "rightSide": true, + "show": true, + "sideWidth": null, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ + + ], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "repeat": null, + "seriesOverrides": [ + + ], + "spaceLength": 10, + "span": 3, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum by (resource) (code_resource:apiserver_request_total:rate5m{verb=\"read\",code=~\"5..\", cluster=\"$cluster\"}) / sum by (resource) (code_resource:apiserver_request_total:rate5m{verb=\"read\", cluster=\"$cluster\"})", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{`{{`}} resource {{`}}`}}", + "refId": "A" + } + ], + "thresholds": [ + + ], + "timeFrom": null, + "timeShift": null, + "title": "Read SLI - Errors", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ + + ] + }, + "yaxes": [ + { + "format": "percentunit", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "percentunit", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + } + ] + }, + { + "aliasColors": { + + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "description": "How many seconds is the 99th percentile for reading (LIST|GET) a given resource?", + "fill": 1, + "fillGradient": 0, + "gridPos": { + + }, + "id": 8, + "interval": "1m", + "legend": { + "alignAsTable": true, + "avg": false, + "current": false, + "max": false, + "min": false, + "rightSide": true, + "show": true, + "sideWidth": null, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ + + ], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "repeat": null, + "seriesOverrides": [ + + ], + "spaceLength": 10, + "span": 3, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "cluster_quantile:apiserver_request{{ if (semverCompare ">=1.23.0-0" $kubeTargetVersion) }}_slo{{ end }}_duration_seconds:histogram_quantile{verb=\"read\", cluster=\"$cluster\"}", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{`{{`}} resource {{`}}`}}", + "refId": "A" + } + ], + "thresholds": [ + + ], + "timeFrom": null, + "timeShift": null, + "title": "Read SLI - Duration", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ + + ] + }, + "yaxes": [ + { + "format": "s", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "s", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": false, + "title": "Dashboard Row", + "titleSize": "h6", + "type": "row" + }, + { + "collapse": false, + "collapsed": false, + "panels": [ + { + "cacheTimeout": null, + "colorBackground": false, + "colorValue": false, + "colors": [ + "#299c46", + "rgba(237, 129, 40, 0.89)", + "#d44a3a" + ], + "datasource": "$datasource", + "decimals": 3, + "description": "How many percent of write requests (POST|PUT|PATCH|DELETE) in 30 days have been answered successfully and fast enough?", + "format": "percentunit", + "gauge": { + "maxValue": 100, + "minValue": 0, + "show": false, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "gridPos": { + + }, + "id": 9, + "interval": "1m", + "legend": { + "alignAsTable": true, + "rightSide": true + }, + "links": [ + + ], + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": 100, + "nullPointMode": "connected", + "nullText": null, + "postfix": "", + "postfixFontSize": "50%", + "prefix": "", + "prefixFontSize": "50%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], + "span": 3, + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": false, + "lineColor": "rgb(31, 120, 193)", + "show": false + }, + "tableColumn": "", + "targets": [ + { + "expr": "apiserver_request:availability30d{verb=\"write\", cluster=\"$cluster\"}", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "", + "refId": "A" + } + ], + "thresholds": "", + "title": "Write Availability (30d)", + "tooltip": { + "shared": false + }, + "type": "singlestat", + "valueFontSize": "80%", + "valueMaps": [ + { + "op": "=", + "text": "N/A", + "value": "null" + } + ], + "valueName": "avg" + }, + { + "aliasColors": { + + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "description": "How many write requests (POST|PUT|PATCH|DELETE) per second do the apiservers get by code?", + "fill": 10, + "fillGradient": 0, + "gridPos": { + + }, + "id": 10, + "interval": "1m", + "legend": { + "alignAsTable": true, + "avg": false, + "current": false, + "max": false, + "min": false, + "rightSide": true, + "show": true, + "sideWidth": null, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ + + ], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "repeat": null, + "seriesOverrides": [ + { + "alias": "/2../i", + "color": "#56A64B" + }, + { + "alias": "/3../i", + "color": "#F2CC0C" + }, + { + "alias": "/4../i", + "color": "#3274D9" + }, + { + "alias": "/5../i", + "color": "#E02F44" + } + ], + "spaceLength": 10, + "span": 3, + "stack": true, + "steppedLine": false, + "targets": [ + { + "expr": "sum by (code) (code_resource:apiserver_request_total:rate5m{verb=\"write\", cluster=\"$cluster\"})", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{`{{`}} code {{`}}`}}", + "refId": "A" + } + ], + "thresholds": [ + + ], + "timeFrom": null, + "timeShift": null, + "title": "Write SLI - Requests", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ + + ] + }, + "yaxes": [ + { + "format": "reqps", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "reqps", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + }, + { + "aliasColors": { + + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "description": "How many percent of write requests (POST|PUT|PATCH|DELETE) per second are returned with errors (5xx)?", + "fill": 1, + "fillGradient": 0, + "gridPos": { + + }, + "id": 11, + "interval": "1m", + "legend": { + "alignAsTable": true, + "avg": false, + "current": false, + "max": false, + "min": false, + "rightSide": true, + "show": true, + "sideWidth": null, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ + + ], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "repeat": null, + "seriesOverrides": [ + + ], + "spaceLength": 10, + "span": 3, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum by (resource) (code_resource:apiserver_request_total:rate5m{verb=\"write\",code=~\"5..\", cluster=\"$cluster\"}) / sum by (resource) (code_resource:apiserver_request_total:rate5m{verb=\"write\", cluster=\"$cluster\"})", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{`{{`}} resource {{`}}`}}", + "refId": "A" + } + ], + "thresholds": [ + + ], + "timeFrom": null, + "timeShift": null, + "title": "Write SLI - Errors", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ + + ] + }, + "yaxes": [ + { + "format": "percentunit", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "percentunit", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + } + ] + }, + { + "aliasColors": { + + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "description": "How many seconds is the 99th percentile for writing (POST|PUT|PATCH|DELETE) a given resource?", + "fill": 1, + "fillGradient": 0, + "gridPos": { + + }, + "id": 12, + "interval": "1m", + "legend": { + "alignAsTable": true, + "avg": false, + "current": false, + "max": false, + "min": false, + "rightSide": true, + "show": true, + "sideWidth": null, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ + + ], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "repeat": null, + "seriesOverrides": [ + + ], + "spaceLength": 10, + "span": 3, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "cluster_quantile:apiserver_request{{ if (semverCompare ">=1.23.0-0" $kubeTargetVersion) }}_slo{{ end }}_duration_seconds:histogram_quantile{verb=\"write\", cluster=\"$cluster\"}", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{`{{`}} resource {{`}}`}}", + "refId": "A" + } + ], + "thresholds": [ + + ], + "timeFrom": null, + "timeShift": null, + "title": "Write SLI - Duration", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ + + ] + }, + "yaxes": [ + { + "format": "s", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "s", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": false, + "title": "Dashboard Row", + "titleSize": "h6", + "type": "row" + }, + { + "collapse": false, + "collapsed": false, + "panels": [ + { + "aliasColors": { + + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "fillGradient": 0, + "gridPos": { + + }, + "id": 13, + "interval": "1m", + "legend": { + "alignAsTable": true, + "avg": false, + "current": false, + "max": false, + "min": false, + "rightSide": true, + "show": false, + "sideWidth": null, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ + + ], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "repeat": null, + "seriesOverrides": [ + + ], + "spaceLength": 10, + "span": 6, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum(rate(workqueue_adds_total{job=\"apiserver\", instance=~\"$instance\", cluster=\"$cluster\"}[$__rate_interval])) by (instance, name)", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{`{{`}}instance{{`}}`}} {{`{{`}}name{{`}}`}}", + "refId": "A" + } + ], + "thresholds": [ + + ], + "timeFrom": null, + "timeShift": null, + "title": "Work Queue Add Rate", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ + + ] + }, + "yaxes": [ + { + "format": "ops", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "ops", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + } + ] + }, + { + "aliasColors": { + + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "fillGradient": 0, + "gridPos": { + + }, + "id": 14, + "interval": "1m", + "legend": { + "alignAsTable": true, + "avg": false, + "current": false, + "max": false, + "min": false, + "rightSide": true, + "show": false, + "sideWidth": null, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ + + ], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "repeat": null, + "seriesOverrides": [ + + ], + "spaceLength": 10, + "span": 6, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum(rate(workqueue_depth{job=\"apiserver\", instance=~\"$instance\", cluster=\"$cluster\"}[$__rate_interval])) by (instance, name)", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{`{{`}}instance{{`}}`}} {{`{{`}}name{{`}}`}}", + "refId": "A" + } + ], + "thresholds": [ + + ], + "timeFrom": null, + "timeShift": null, + "title": "Work Queue Depth", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ + + ] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + } + ] + }, + { + "aliasColors": { + + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "fillGradient": 0, + "gridPos": { + + }, + "id": 15, + "interval": "1m", + "legend": { + "alignAsTable": true, + "avg": false, + "current": true, + "max": false, + "min": false, + "rightSide": true, + "show": true, + "sideWidth": null, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "links": [ + + ], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "repeat": null, + "seriesOverrides": [ + + ], + "spaceLength": 10, + "span": 12, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "histogram_quantile(0.99, sum(rate(workqueue_queue_duration_seconds_bucket{job=\"apiserver\", instance=~\"$instance\", cluster=\"$cluster\"}[$__rate_interval])) by (instance, name, le))", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{`{{`}}instance{{`}}`}} {{`{{`}}name{{`}}`}}", + "refId": "A" + } + ], + "thresholds": [ + + ], + "timeFrom": null, + "timeShift": null, + "title": "Work Queue Latency", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ + + ] + }, + "yaxes": [ + { + "format": "s", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "s", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": false, + "title": "Dashboard Row", + "titleSize": "h6", + "type": "row" + }, + { + "collapse": false, + "collapsed": false, + "panels": [ + { + "aliasColors": { + + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "fillGradient": 0, + "gridPos": { + + }, + "id": 16, + "interval": "1m", + "legend": { + "alignAsTable": true, + "avg": false, + "current": false, + "max": false, + "min": false, + "rightSide": true, + "show": true, + "sideWidth": null, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ + + ], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "repeat": null, + "seriesOverrides": [ + + ], + "spaceLength": 10, + "span": 4, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "process_resident_memory_bytes{job=\"apiserver\",instance=~\"$instance\", cluster=\"$cluster\"}", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{`{{`}}instance{{`}}`}}", + "refId": "A" + } + ], + "thresholds": [ + + ], + "timeFrom": null, + "timeShift": null, + "title": "Memory", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ + + ] + }, + "yaxes": [ + { + "format": "bytes", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "bytes", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + }, + { + "aliasColors": { + + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "fillGradient": 0, + "gridPos": { + + }, + "id": 17, + "interval": "1m", + "legend": { + "alignAsTable": true, + "avg": false, + "current": false, + "max": false, + "min": false, + "rightSide": true, + "show": true, + "sideWidth": null, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ + + ], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "repeat": null, + "seriesOverrides": [ + + ], + "spaceLength": 10, + "span": 4, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "rate(process_cpu_seconds_total{job=\"apiserver\",instance=~\"$instance\", cluster=\"$cluster\"}[$__rate_interval])", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{`{{`}}instance{{`}}`}}", + "refId": "A" + } + ], + "thresholds": [ + + ], + "timeFrom": null, + "timeShift": null, + "title": "CPU usage", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ + + ] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + } + ] + }, + { + "aliasColors": { + + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "fillGradient": 0, + "gridPos": { + + }, + "id": 18, + "interval": "1m", + "legend": { + "alignAsTable": true, + "avg": false, + "current": false, + "max": false, + "min": false, + "rightSide": true, + "show": true, + "sideWidth": null, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ + + ], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "repeat": null, + "seriesOverrides": [ + + ], + "spaceLength": 10, + "span": 4, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "go_goroutines{job=\"apiserver\",instance=~\"$instance\", cluster=\"$cluster\"}", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{`{{`}}instance{{`}}`}}", + "refId": "A" + } + ], + "thresholds": [ + + ], + "timeFrom": null, + "timeShift": null, + "title": "Goroutines", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ + + ] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": false, + "title": "Dashboard Row", + "titleSize": "h6", + "type": "row" + } + ], + "schemaVersion": 14, + "style": "dark", + "tags": [ + "kubernetes-mixin" + ], + "templating": { + "list": [ + { + "current": { + "text": "Prometheus", + "value": "Prometheus" + }, + "hide": 0, + "label": "Data Source", + "name": "datasource", + "options": [ + + ], + "query": "prometheus", + "refresh": 1, + "regex": "", + "type": "datasource" + }, + { + "allValue": null, + "current": { + + }, + "datasource": "$datasource", + "hide": {{ if .Values.grafana.sidecar.dashboards.multicluster.global.enabled }}0{{ else }}2{{ end }}, + "includeAll": false, + "label": "cluster", + "multi": false, + "name": "cluster", + "options": [ + + ], + "query": "label_values(up{job=\"apiserver\"}, cluster)", + "refresh": 2, + "regex": "", + "sort": 1, + "tagValuesQuery": "", + "tags": [ + + ], + "tagsQuery": "", + "type": "query", + "useTags": false + }, + { + "allValue": null, + "current": { + + }, + "datasource": "$datasource", + "hide": 0, + "includeAll": true, + "label": null, + "multi": false, + "name": "instance", + "options": [ + + ], + "query": "label_values(up{job=\"apiserver\", cluster=\"$cluster\"}, instance)", + "refresh": 2, + "regex": "", + "sort": 1, + "tagValuesQuery": "", + "tags": [ + + ], + "tagsQuery": "", + "type": "query", + "useTags": false + } + ] + }, + "time": { + "from": "now-1h", + "to": "now" + }, + "timepicker": { + "refresh_intervals": [ + "5s", + "10s", + "30s", + "1m", + "5m", + "15m", + "30m", + "1h", + "2h", + "1d" + ], + "time_options": [ + "5m", + "15m", + "1h", + "6h", + "12h", + "24h", + "2d", + "7d", + "30d" + ] + }, + "timezone": "{{ .Values.grafana.defaultDashboardsTimezone }}", + "title": "Kubernetes / API server", + "uid": "09ec8aa1e996d6ffcd6817bbaff4db1b", + "version": 0 + } +{{- end }} \ No newline at end of file diff --git a/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/templates/grafana/dashboards-1.14/cluster-total.yaml b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/templates/grafana/dashboards-1.14/cluster-total.yaml new file mode 100644 index 0000000000..f4be0bbd45 --- /dev/null +++ b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/templates/grafana/dashboards-1.14/cluster-total.yaml @@ -0,0 +1,1882 @@ +{{- /* +Generated from 'cluster-total' from https://raw.githubusercontent.com/prometheus-operator/kube-prometheus/a8ba97a150c75be42010c75d10b720c55e182f1a/manifests/grafana-dashboardDefinitions.yaml +Do not change in-place! In order to change this file first read following link: +https://github.com/prometheus-community/helm-charts/tree/main/charts/kube-prometheus-stack/hack +*/ -}} +{{- $kubeTargetVersion := default .Capabilities.KubeVersion.GitVersion .Values.kubeTargetVersionOverride }} +{{- if and (or .Values.grafana.enabled .Values.grafana.forceDeployDashboards) (semverCompare ">=1.14.0-0" $kubeTargetVersion) (semverCompare "<9.9.9-9" $kubeTargetVersion) .Values.grafana.defaultDashboardsEnabled }} +apiVersion: v1 +kind: ConfigMap +metadata: + namespace: {{ .Values.grafana.defaultDashboards.namespace }} + name: {{ printf "%s-%s" (include "kube-prometheus-stack.fullname" $) "cluster-total" | trunc 63 | trimSuffix "-" }} + annotations: +{{ toYaml .Values.grafana.sidecar.dashboards.annotations | indent 4 }} + labels: + {{- if $.Values.grafana.sidecar.dashboards.label }} + {{ $.Values.grafana.sidecar.dashboards.label }}: {{ ternary $.Values.grafana.sidecar.dashboards.labelValue "1" (not (empty $.Values.grafana.sidecar.dashboards.labelValue)) | quote }} + {{- end }} + app: {{ template "kube-prometheus-stack.name" $ }}-grafana +{{ include "kube-prometheus-stack.labels" $ | indent 4 }} +data: + cluster-total.json: |- + { + "__inputs": [ + + ], + "__requires": [ + + ], + "annotations": { + "list": [ + { + "builtIn": 1, + "datasource": "-- Grafana --", + "enable": true, + "hide": true, + "iconColor": "rgba(0, 211, 255, 1)", + "name": "Annotations & Alerts", + "type": "dashboard" + } + ] + }, + "editable": true, + "gnetId": null, + "graphTooltip": 0, + "hideControls": false, + "id": null, + "links": [ + + ], + "panels": [ + { + "collapse": false, + "collapsed": false, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 0 + }, + "id": 2, + "panels": [ + + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Current Bandwidth", + "titleSize": "h6", + "type": "row" + }, + { + "aliasColors": { + + }, + "bars": true, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 2, + "fillGradient": 0, + "gridPos": { + "h": 9, + "w": 12, + "x": 0, + "y": 1 + }, + "id": 3, + "legend": { + "alignAsTable": true, + "avg": false, + "current": true, + "hideEmpty": true, + "hideZero": true, + "max": false, + "min": false, + "rightSide": true, + "show": true, + "sideWidth": null, + "sort": "current", + "sortDesc": true, + "total": false, + "values": true + }, + "lines": false, + "linewidth": 1, + "links": [ + + ], + "minSpan": 24, + "nullPointMode": "null", + "paceLength": 10, + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "repeat": null, + "seriesOverrides": [ + + ], + "spaceLength": 10, + "span": 24, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sort_desc(sum(irate(container_network_receive_bytes_total{cluster=\"$cluster\",namespace=~\".+\"}[$interval:$resolution])) by (namespace))", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "{{`{{`}}namespace{{`}}`}}", + "refId": "A", + "step": 10 + } + ], + "thresholds": [ + + ], + "timeFrom": null, + "timeShift": null, + "title": "Current Rate of Bytes Received", + "tooltip": { + "shared": true, + "sort": 2, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "series", + "name": null, + "show": false, + "values": [ + "current" + ] + }, + "yaxes": [ + { + "format": "Bps", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "Bps", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + } + ] + }, + { + "aliasColors": { + + }, + "bars": true, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 2, + "fillGradient": 0, + "gridPos": { + "h": 9, + "w": 12, + "x": 12, + "y": 1 + }, + "id": 4, + "legend": { + "alignAsTable": true, + "avg": false, + "current": true, + "hideEmpty": true, + "hideZero": true, + "max": false, + "min": false, + "rightSide": true, + "show": true, + "sideWidth": null, + "sort": "current", + "sortDesc": true, + "total": false, + "values": true + }, + "lines": false, + "linewidth": 1, + "links": [ + + ], + "minSpan": 24, + "nullPointMode": "null", + "paceLength": 10, + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "repeat": null, + "seriesOverrides": [ + + ], + "spaceLength": 10, + "span": 24, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sort_desc(sum(irate(container_network_transmit_bytes_total{cluster=\"$cluster\",namespace=~\".+\"}[$interval:$resolution])) by (namespace))", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "{{`{{`}}namespace{{`}}`}}", + "refId": "A", + "step": 10 + } + ], + "thresholds": [ + + ], + "timeFrom": null, + "timeShift": null, + "title": "Current Rate of Bytes Transmitted", + "tooltip": { + "shared": true, + "sort": 2, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "series", + "name": null, + "show": false, + "values": [ + "current" + ] + }, + "yaxes": [ + { + "format": "Bps", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "Bps", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + } + ] + }, + { + "columns": [ + { + "text": "Time", + "value": "Time" + }, + { + "text": "Value #A", + "value": "Value #A" + }, + { + "text": "Value #B", + "value": "Value #B" + }, + { + "text": "Value #C", + "value": "Value #C" + }, + { + "text": "Value #D", + "value": "Value #D" + }, + { + "text": "Value #E", + "value": "Value #E" + }, + { + "text": "Value #F", + "value": "Value #F" + }, + { + "text": "Value #G", + "value": "Value #G" + }, + { + "text": "Value #H", + "value": "Value #H" + }, + { + "text": "namespace", + "value": "namespace" + } + ], + "datasource": "$datasource", + "fill": 1, + "fontSize": "90%", + "gridPos": { + "h": 9, + "w": 24, + "x": 0, + "y": 10 + }, + "id": 5, + "lines": true, + "linewidth": 1, + "links": [ + + ], + "minSpan": 24, + "nullPointMode": "null as zero", + "renderer": "flot", + "scroll": true, + "showHeader": true, + "sort": { + "col": 0, + "desc": false + }, + "spaceLength": 10, + "span": 24, + "styles": [ + { + "alias": "Time", + "colorMode": null, + "colors": [ + + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "link": false, + "linkTooltip": "Drill down", + "linkUrl": "", + "pattern": "Time", + "thresholds": [ + + ], + "type": "hidden", + "unit": "short" + }, + { + "alias": "Current Bandwidth Received", + "colorMode": null, + "colors": [ + + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "link": false, + "linkTooltip": "Drill down", + "linkUrl": "", + "pattern": "Value #A", + "thresholds": [ + + ], + "type": "number", + "unit": "Bps" + }, + { + "alias": "Current Bandwidth Transmitted", + "colorMode": null, + "colors": [ + + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "link": false, + "linkTooltip": "Drill down", + "linkUrl": "", + "pattern": "Value #B", + "thresholds": [ + + ], + "type": "number", + "unit": "Bps" + }, + { + "alias": "Average Bandwidth Received", + "colorMode": null, + "colors": [ + + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "link": false, + "linkTooltip": "Drill down", + "linkUrl": "", + "pattern": "Value #C", + "thresholds": [ + + ], + "type": "number", + "unit": "Bps" + }, + { + "alias": "Average Bandwidth Transmitted", + "colorMode": null, + "colors": [ + + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "link": false, + "linkTooltip": "Drill down", + "linkUrl": "", + "pattern": "Value #D", + "thresholds": [ + + ], + "type": "number", + "unit": "Bps" + }, + { + "alias": "Rate of Received Packets", + "colorMode": null, + "colors": [ + + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "link": false, + "linkTooltip": "Drill down", + "linkUrl": "", + "pattern": "Value #E", + "thresholds": [ + + ], + "type": "number", + "unit": "pps" + }, + { + "alias": "Rate of Transmitted Packets", + "colorMode": null, + "colors": [ + + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "link": false, + "linkTooltip": "Drill down", + "linkUrl": "", + "pattern": "Value #F", + "thresholds": [ + + ], + "type": "number", + "unit": "pps" + }, + { + "alias": "Rate of Received Packets Dropped", + "colorMode": null, + "colors": [ + + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "link": false, + "linkTooltip": "Drill down", + "linkUrl": "", + "pattern": "Value #G", + "thresholds": [ + + ], + "type": "number", + "unit": "pps" + }, + { + "alias": "Rate of Transmitted Packets Dropped", + "colorMode": null, + "colors": [ + + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "link": false, + "linkTooltip": "Drill down", + "linkUrl": "", + "pattern": "Value #H", + "thresholds": [ + + ], + "type": "number", + "unit": "pps" + }, + { + "alias": "Namespace", + "colorMode": null, + "colors": [ + + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "link": true, + "linkTooltip": "Drill down", + "linkUrl": "d/8b7a8b326d7a6f1f04244066368c67af/kubernetes-networking-namespace-pods?orgId=1&refresh=30s&var-namespace=$__cell", + "pattern": "namespace", + "thresholds": [ + + ], + "type": "number", + "unit": "short" + } + ], + "targets": [ + { + "expr": "sort_desc(sum(irate(container_network_receive_bytes_total{cluster=\"$cluster\",namespace=~\".+\"}[$interval:$resolution])) by (namespace))", + "format": "table", + "instant": true, + "intervalFactor": 2, + "legendFormat": "", + "refId": "A", + "step": 10 + }, + { + "expr": "sort_desc(sum(irate(container_network_transmit_bytes_total{cluster=\"$cluster\",namespace=~\".+\"}[$interval:$resolution])) by (namespace))", + "format": "table", + "instant": true, + "intervalFactor": 2, + "legendFormat": "", + "refId": "B", + "step": 10 + }, + { + "expr": "sort_desc(avg(irate(container_network_receive_bytes_total{cluster=\"$cluster\",namespace=~\".+\"}[$interval:$resolution])) by (namespace))", + "format": "table", + "instant": true, + "intervalFactor": 2, + "legendFormat": "", + "refId": "C", + "step": 10 + }, + { + "expr": "sort_desc(avg(irate(container_network_transmit_bytes_total{cluster=\"$cluster\",namespace=~\".+\"}[$interval:$resolution])) by (namespace))", + "format": "table", + "instant": true, + "intervalFactor": 2, + "legendFormat": "", + "refId": "D", + "step": 10 + }, + { + "expr": "sort_desc(sum(irate(container_network_receive_packets_total{cluster=\"$cluster\",namespace=~\".+\"}[$interval:$resolution])) by (namespace))", + "format": "table", + "instant": true, + "intervalFactor": 2, + "legendFormat": "", + "refId": "E", + "step": 10 + }, + { + "expr": "sort_desc(sum(irate(container_network_transmit_packets_total{cluster=\"$cluster\",namespace=~\".+\"}[$interval:$resolution])) by (namespace))", + "format": "table", + "instant": true, + "intervalFactor": 2, + "legendFormat": "", + "refId": "F", + "step": 10 + }, + { + "expr": "sort_desc(sum(irate(container_network_receive_packets_dropped_total{cluster=\"$cluster\",namespace=~\".+\"}[$interval:$resolution])) by (namespace))", + "format": "table", + "instant": true, + "intervalFactor": 2, + "legendFormat": "", + "refId": "G", + "step": 10 + }, + { + "expr": "sort_desc(sum(irate(container_network_transmit_packets_dropped_total{cluster=\"$cluster\",namespace=~\".+\"}[$interval:$resolution])) by (namespace))", + "format": "table", + "instant": true, + "intervalFactor": 2, + "legendFormat": "", + "refId": "H", + "step": 10 + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Current Status", + "type": "table" + }, + { + "collapse": true, + "collapsed": true, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 10 + }, + "id": 6, + "panels": [ + { + "aliasColors": { + + }, + "bars": true, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 2, + "fillGradient": 0, + "gridPos": { + "h": 9, + "w": 12, + "x": 0, + "y": 11 + }, + "id": 7, + "legend": { + "alignAsTable": true, + "avg": false, + "current": true, + "hideEmpty": true, + "hideZero": true, + "max": false, + "min": false, + "rightSide": true, + "show": true, + "sideWidth": null, + "sort": "current", + "sortDesc": true, + "total": false, + "values": true + }, + "lines": false, + "linewidth": 1, + "links": [ + + ], + "minSpan": 24, + "nullPointMode": "null", + "paceLength": 10, + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "repeat": null, + "seriesOverrides": [ + + ], + "spaceLength": 10, + "span": 24, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sort_desc(avg(irate(container_network_receive_bytes_total{cluster=\"$cluster\",namespace=~\".+\"}[$interval:$resolution])) by (namespace))", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "{{`{{`}}namespace{{`}}`}}", + "refId": "A", + "step": 10 + } + ], + "thresholds": [ + + ], + "timeFrom": null, + "timeShift": null, + "title": "Average Rate of Bytes Received", + "tooltip": { + "shared": true, + "sort": 2, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "series", + "name": null, + "show": false, + "values": [ + "current" + ] + }, + "yaxes": [ + { + "format": "Bps", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "Bps", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + } + ] + }, + { + "aliasColors": { + + }, + "bars": true, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 2, + "fillGradient": 0, + "gridPos": { + "h": 9, + "w": 12, + "x": 12, + "y": 11 + }, + "id": 8, + "legend": { + "alignAsTable": true, + "avg": false, + "current": true, + "hideEmpty": true, + "hideZero": true, + "max": false, + "min": false, + "rightSide": true, + "show": true, + "sideWidth": null, + "sort": "current", + "sortDesc": true, + "total": false, + "values": true + }, + "lines": false, + "linewidth": 1, + "links": [ + + ], + "minSpan": 24, + "nullPointMode": "null", + "paceLength": 10, + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "repeat": null, + "seriesOverrides": [ + + ], + "spaceLength": 10, + "span": 24, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sort_desc(avg(irate(container_network_transmit_bytes_total{cluster=\"$cluster\",namespace=~\".+\"}[$interval:$resolution])) by (namespace))", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "{{`{{`}}namespace{{`}}`}}", + "refId": "A", + "step": 10 + } + ], + "thresholds": [ + + ], + "timeFrom": null, + "timeShift": null, + "title": "Average Rate of Bytes Transmitted", + "tooltip": { + "shared": true, + "sort": 2, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "series", + "name": null, + "show": false, + "values": [ + "current" + ] + }, + "yaxes": [ + { + "format": "Bps", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "Bps", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Average Bandwidth", + "titleSize": "h6", + "type": "row" + }, + { + "collapse": false, + "collapsed": false, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 11 + }, + "id": 9, + "panels": [ + + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Bandwidth History", + "titleSize": "h6", + "type": "row" + }, + { + "aliasColors": { + + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 2, + "fillGradient": 0, + "gridPos": { + "h": 9, + "w": 24, + "x": 0, + "y": 12 + }, + "id": 10, + "legend": { + "alignAsTable": true, + "avg": true, + "current": true, + "hideEmpty": true, + "hideZero": true, + "max": true, + "min": true, + "rightSide": true, + "show": true, + "sideWidth": null, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 2, + "links": [ + + ], + "minSpan": 24, + "nullPointMode": "connected", + "paceLength": 10, + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "repeat": null, + "seriesOverrides": [ + + ], + "spaceLength": 10, + "span": 24, + "stack": true, + "steppedLine": false, + "targets": [ + { + "expr": "sort_desc(sum(irate(container_network_receive_bytes_total{cluster=\"$cluster\",namespace=~\".+\"}[$interval:$resolution])) by (namespace))", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "{{`{{`}}namespace{{`}}`}}", + "refId": "A", + "step": 10 + } + ], + "thresholds": [ + + ], + "timeFrom": null, + "timeShift": null, + "title": "Receive Bandwidth", + "tooltip": { + "shared": true, + "sort": 2, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ + + ] + }, + "yaxes": [ + { + "format": "Bps", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "Bps", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + } + ] + }, + { + "aliasColors": { + + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 2, + "fillGradient": 0, + "gridPos": { + "h": 9, + "w": 24, + "x": 0, + "y": 21 + }, + "id": 11, + "legend": { + "alignAsTable": true, + "avg": true, + "current": true, + "hideEmpty": true, + "hideZero": true, + "max": true, + "min": true, + "rightSide": true, + "show": true, + "sideWidth": null, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 2, + "links": [ + + ], + "minSpan": 24, + "nullPointMode": "connected", + "paceLength": 10, + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "repeat": null, + "seriesOverrides": [ + + ], + "spaceLength": 10, + "span": 24, + "stack": true, + "steppedLine": false, + "targets": [ + { + "expr": "sort_desc(sum(irate(container_network_transmit_bytes_total{cluster=\"$cluster\",namespace=~\".+\"}[$interval:$resolution])) by (namespace))", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "{{`{{`}}namespace{{`}}`}}", + "refId": "A", + "step": 10 + } + ], + "thresholds": [ + + ], + "timeFrom": null, + "timeShift": null, + "title": "Transmit Bandwidth", + "tooltip": { + "shared": true, + "sort": 2, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ + + ] + }, + "yaxes": [ + { + "format": "Bps", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "Bps", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + } + ] + }, + { + "collapse": true, + "collapsed": true, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 30 + }, + "id": 12, + "panels": [ + { + "aliasColors": { + + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 2, + "fillGradient": 0, + "gridPos": { + "h": 9, + "w": 24, + "x": 0, + "y": 31 + }, + "id": 13, + "legend": { + "alignAsTable": true, + "avg": true, + "current": true, + "hideEmpty": true, + "hideZero": true, + "max": true, + "min": true, + "rightSide": true, + "show": true, + "sideWidth": null, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 2, + "links": [ + + ], + "minSpan": 24, + "nullPointMode": "connected", + "paceLength": 10, + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "repeat": null, + "seriesOverrides": [ + + ], + "spaceLength": 10, + "span": 24, + "stack": true, + "steppedLine": false, + "targets": [ + { + "expr": "sort_desc(sum(irate(container_network_receive_packets_total{cluster=\"$cluster\",namespace=~\".+\"}[$interval:$resolution])) by (namespace))", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "{{`{{`}}namespace{{`}}`}}", + "refId": "A", + "step": 10 + } + ], + "thresholds": [ + + ], + "timeFrom": null, + "timeShift": null, + "title": "Rate of Received Packets", + "tooltip": { + "shared": true, + "sort": 2, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ + + ] + }, + "yaxes": [ + { + "format": "pps", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "pps", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + } + ] + }, + { + "aliasColors": { + + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 2, + "fillGradient": 0, + "gridPos": { + "h": 9, + "w": 24, + "x": 0, + "y": 40 + }, + "id": 14, + "legend": { + "alignAsTable": true, + "avg": true, + "current": true, + "hideEmpty": true, + "hideZero": true, + "max": true, + "min": true, + "rightSide": true, + "show": true, + "sideWidth": null, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 2, + "links": [ + + ], + "minSpan": 24, + "nullPointMode": "connected", + "paceLength": 10, + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "repeat": null, + "seriesOverrides": [ + + ], + "spaceLength": 10, + "span": 24, + "stack": true, + "steppedLine": false, + "targets": [ + { + "expr": "sort_desc(sum(irate(container_network_transmit_packets_total{cluster=\"$cluster\",namespace=~\".+\"}[$interval:$resolution])) by (namespace))", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "{{`{{`}}namespace{{`}}`}}", + "refId": "A", + "step": 10 + } + ], + "thresholds": [ + + ], + "timeFrom": null, + "timeShift": null, + "title": "Rate of Transmitted Packets", + "tooltip": { + "shared": true, + "sort": 2, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ + + ] + }, + "yaxes": [ + { + "format": "pps", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "pps", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Packets", + "titleSize": "h6", + "type": "row" + }, + { + "collapse": true, + "collapsed": true, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 31 + }, + "id": 15, + "panels": [ + { + "aliasColors": { + + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 2, + "fillGradient": 0, + "gridPos": { + "h": 9, + "w": 24, + "x": 0, + "y": 50 + }, + "id": 16, + "legend": { + "alignAsTable": true, + "avg": true, + "current": true, + "hideEmpty": true, + "hideZero": true, + "max": true, + "min": true, + "rightSide": true, + "show": true, + "sideWidth": null, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 2, + "links": [ + + ], + "minSpan": 24, + "nullPointMode": "connected", + "paceLength": 10, + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "repeat": null, + "seriesOverrides": [ + + ], + "spaceLength": 10, + "span": 24, + "stack": true, + "steppedLine": false, + "targets": [ + { + "expr": "sort_desc(sum(irate(container_network_receive_packets_dropped_total{cluster=\"$cluster\",namespace=~\".+\"}[$interval:$resolution])) by (namespace))", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "{{`{{`}}namespace{{`}}`}}", + "refId": "A", + "step": 10 + } + ], + "thresholds": [ + + ], + "timeFrom": null, + "timeShift": null, + "title": "Rate of Received Packets Dropped", + "tooltip": { + "shared": true, + "sort": 2, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ + + ] + }, + "yaxes": [ + { + "format": "pps", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "pps", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + } + ] + }, + { + "aliasColors": { + + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 2, + "fillGradient": 0, + "gridPos": { + "h": 9, + "w": 24, + "x": 0, + "y": 59 + }, + "id": 17, + "legend": { + "alignAsTable": true, + "avg": true, + "current": true, + "hideEmpty": true, + "hideZero": true, + "max": true, + "min": true, + "rightSide": true, + "show": true, + "sideWidth": null, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 2, + "links": [ + + ], + "minSpan": 24, + "nullPointMode": "connected", + "paceLength": 10, + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "repeat": null, + "seriesOverrides": [ + + ], + "spaceLength": 10, + "span": 24, + "stack": true, + "steppedLine": false, + "targets": [ + { + "expr": "sort_desc(sum(irate(container_network_transmit_packets_dropped_total{cluster=\"$cluster\",namespace=~\".+\"}[$interval:$resolution])) by (namespace))", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "{{`{{`}}namespace{{`}}`}}", + "refId": "A", + "step": 10 + } + ], + "thresholds": [ + + ], + "timeFrom": null, + "timeShift": null, + "title": "Rate of Transmitted Packets Dropped", + "tooltip": { + "shared": true, + "sort": 2, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ + + ] + }, + "yaxes": [ + { + "format": "pps", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "pps", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + } + ] + }, + { + "aliasColors": { + + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 2, + "fillGradient": 0, + "gridPos": { + "h": 9, + "w": 24, + "x": 0, + "y": 59 + }, + "id": 18, + "legend": { + "alignAsTable": true, + "avg": true, + "current": true, + "hideEmpty": true, + "hideZero": true, + "max": true, + "min": true, + "rightSide": true, + "show": true, + "sideWidth": null, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 2, + "links": [ + { + "targetBlank": true, + "title": "What is TCP Retransmit?", + "url": "https://accedian.com/enterprises/blog/network-packet-loss-retransmissions-and-duplicate-acknowledgements/" + } + ], + "minSpan": 24, + "nullPointMode": "connected", + "paceLength": 10, + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "repeat": null, + "seriesOverrides": [ + + ], + "spaceLength": 10, + "span": 24, + "stack": true, + "steppedLine": false, + "targets": [ + { + "expr": "sort_desc(sum(rate(node_netstat_Tcp_RetransSegs{cluster=\"$cluster\"}[$interval:$resolution]) / rate(node_netstat_Tcp_OutSegs{cluster=\"$cluster\"}[$interval:$resolution])) by (instance))", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "{{`{{`}}instance{{`}}`}}", + "refId": "A", + "step": 10 + } + ], + "thresholds": [ + + ], + "timeFrom": null, + "timeShift": null, + "title": "Rate of TCP Retransmits out of all sent segments", + "tooltip": { + "shared": true, + "sort": 2, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ + + ] + }, + "yaxes": [ + { + "format": "percentunit", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "percentunit", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + } + ] + }, + { + "aliasColors": { + + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 2, + "fillGradient": 0, + "gridPos": { + "h": 9, + "w": 24, + "x": 0, + "y": 59 + }, + "id": 19, + "legend": { + "alignAsTable": true, + "avg": true, + "current": true, + "hideEmpty": true, + "hideZero": true, + "max": true, + "min": true, + "rightSide": true, + "show": true, + "sideWidth": null, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 2, + "links": [ + { + "targetBlank": true, + "title": "Why monitor SYN retransmits?", + "url": "https://github.com/prometheus/node_exporter/issues/1023#issuecomment-408128365" + } + ], + "minSpan": 24, + "nullPointMode": "connected", + "paceLength": 10, + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "repeat": null, + "seriesOverrides": [ + + ], + "spaceLength": 10, + "span": 24, + "stack": true, + "steppedLine": false, + "targets": [ + { + "expr": "sort_desc(sum(rate(node_netstat_TcpExt_TCPSynRetrans{cluster=\"$cluster\"}[$interval:$resolution]) / rate(node_netstat_Tcp_RetransSegs{cluster=\"$cluster\"}[$interval:$resolution])) by (instance))", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "{{`{{`}}instance{{`}}`}}", + "refId": "A", + "step": 10 + } + ], + "thresholds": [ + + ], + "timeFrom": null, + "timeShift": null, + "title": "Rate of TCP SYN Retransmits out of all retransmits", + "tooltip": { + "shared": true, + "sort": 2, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ + + ] + }, + "yaxes": [ + { + "format": "percentunit", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "percentunit", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Errors", + "titleSize": "h6", + "type": "row" + } + ], + "refresh": "10s", + "rows": [ + + ], + "schemaVersion": 18, + "style": "dark", + "tags": [ + "kubernetes-mixin" + ], + "templating": { + "list": [ + { + "allValue": null, + "auto": false, + "auto_count": 30, + "auto_min": "10s", + "current": { + "text": "5m", + "value": "5m" + }, + "datasource": "$datasource", + "hide": 0, + "includeAll": false, + "label": null, + "multi": false, + "name": "resolution", + "options": [ + { + "selected": false, + "text": "30s", + "value": "30s" + }, + { + "selected": true, + "text": "5m", + "value": "5m" + }, + { + "selected": false, + "text": "1h", + "value": "1h" + } + ], + "query": "30s,5m,1h", + "refresh": 2, + "regex": "", + "skipUrlSync": false, + "sort": 1, + "tagValuesQuery": "", + "tags": [ + + ], + "tagsQuery": "", + "type": "interval", + "useTags": false + }, + { + "allValue": null, + "auto": false, + "auto_count": 30, + "auto_min": "10s", + "current": { + "text": "5m", + "value": "5m" + }, + "datasource": "$datasource", + "hide": 2, + "includeAll": false, + "label": null, + "multi": false, + "name": "interval", + "options": [ + { + "selected": true, + "text": "4h", + "value": "4h" + } + ], + "query": "4h", + "refresh": 2, + "regex": "", + "skipUrlSync": false, + "sort": 1, + "tagValuesQuery": "", + "tags": [ + + ], + "tagsQuery": "", + "type": "interval", + "useTags": false + }, + { + "current": { + "text": "Prometheus", + "value": "Prometheus" + }, + "hide": 0, + "label": "Data Source", + "name": "datasource", + "options": [ + + ], + "query": "prometheus", + "refresh": 1, + "regex": "", + "type": "datasource" + }, + { + "allValue": null, + "current": { + + }, + "datasource": "$datasource", + "hide": {{ if .Values.grafana.sidecar.dashboards.multicluster.global.enabled }}0{{ else }}2{{ end }}, + "includeAll": false, + "label": null, + "multi": false, + "name": "cluster", + "options": [ + + ], + "query": "label_values(up{job=\"{{ include "exporter.kubelet.jobName" . }}\", metrics_path=\"/metrics/cadvisor\"}, cluster)", + "refresh": 2, + "regex": "", + "sort": 0, + "tagValuesQuery": "", + "tags": [ + + ], + "tagsQuery": "", + "type": "query", + "useTags": false + } + ] + }, + "time": { + "from": "now-1h", + "to": "now" + }, + "timepicker": { + "refresh_intervals": [ + "5s", + "10s", + "30s", + "1m", + "5m", + "15m", + "30m", + "1h", + "2h", + "1d" + ], + "time_options": [ + "5m", + "15m", + "1h", + "6h", + "12h", + "24h", + "2d", + "7d", + "30d" + ] + }, + "timezone": "{{ .Values.grafana.defaultDashboardsTimezone }}", + "title": "Kubernetes / Networking / Cluster", + "uid": "ff635a025bcfea7bc3dd4f508990a3e9", + "version": 0 + } +{{- end }} \ No newline at end of file diff --git a/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/templates/grafana/dashboards-1.14/controller-manager.yaml b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/templates/grafana/dashboards-1.14/controller-manager.yaml new file mode 100644 index 0000000000..8d420d7a4f --- /dev/null +++ b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/templates/grafana/dashboards-1.14/controller-manager.yaml @@ -0,0 +1,1196 @@ +{{- /* +Generated from 'controller-manager' from https://raw.githubusercontent.com/prometheus-operator/kube-prometheus/a8ba97a150c75be42010c75d10b720c55e182f1a/manifests/grafana-dashboardDefinitions.yaml +Do not change in-place! In order to change this file first read following link: +https://github.com/prometheus-community/helm-charts/tree/main/charts/kube-prometheus-stack/hack +*/ -}} +{{- $kubeTargetVersion := default .Capabilities.KubeVersion.GitVersion .Values.kubeTargetVersionOverride }} +{{- if and (or .Values.grafana.enabled .Values.grafana.forceDeployDashboards) (semverCompare ">=1.14.0-0" $kubeTargetVersion) (semverCompare "<9.9.9-9" $kubeTargetVersion) .Values.grafana.defaultDashboardsEnabled }} +{{- if (include "exporter.kubeControllerManager.enabled" .)}} +apiVersion: v1 +kind: ConfigMap +metadata: + namespace: {{ .Values.grafana.defaultDashboards.namespace }} + name: {{ printf "%s-%s" (include "kube-prometheus-stack.fullname" $) "controller-manager" | trunc 63 | trimSuffix "-" }} + annotations: +{{ toYaml .Values.grafana.sidecar.dashboards.annotations | indent 4 }} + labels: + {{- if $.Values.grafana.sidecar.dashboards.label }} + {{ $.Values.grafana.sidecar.dashboards.label }}: {{ ternary $.Values.grafana.sidecar.dashboards.labelValue "1" (not (empty $.Values.grafana.sidecar.dashboards.labelValue)) | quote }} + {{- end }} + app: {{ template "kube-prometheus-stack.name" $ }}-grafana +{{ include "kube-prometheus-stack.labels" $ | indent 4 }} +data: + controller-manager.json: |- + { + "__inputs": [ + + ], + "__requires": [ + + ], + "annotations": { + "list": [ + + ] + }, + "editable": false, + "gnetId": null, + "graphTooltip": 0, + "hideControls": false, + "id": null, + "links": [ + + ], + "refresh": "10s", + "rows": [ + { + "collapse": false, + "collapsed": false, + "panels": [ + { + "cacheTimeout": null, + "colorBackground": false, + "colorValue": false, + "colors": [ + "#299c46", + "rgba(237, 129, 40, 0.89)", + "#d44a3a" + ], + "datasource": "$datasource", + "format": "none", + "gauge": { + "maxValue": 100, + "minValue": 0, + "show": false, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "gridPos": { + + }, + "id": 2, + "interval": "1m", + "legend": { + "alignAsTable": true, + "rightSide": true + }, + "links": [ + + ], + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": 100, + "nullPointMode": "connected", + "nullText": null, + "postfix": "", + "postfixFontSize": "50%", + "prefix": "", + "prefixFontSize": "50%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], + "span": 2, + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": false, + "lineColor": "rgb(31, 120, 193)", + "show": false + }, + "tableColumn": "", + "targets": [ + { + {{- if .Values.k3sServer.enabled }} + "expr": "sum(up{cluster=\"$cluster\", job=\"{{ include "exporter.kubeControllerManager.jobName" . }}\", metrics_path=\"/metrics\"})", + {{- else }} + "expr": "sum(up{cluster=\"$cluster\", job=\"{{ include "exporter.kubeControllerManager.jobName" . }}\"})", + {{- end }} + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "", + "refId": "A" + } + ], + "thresholds": "", + "title": "Up", + "tooltip": { + "shared": false + }, + "type": "singlestat", + "valueFontSize": "80%", + "valueMaps": [ + { + "op": "=", + "text": "N/A", + "value": "null" + } + ], + "valueName": "min" + }, + { + "aliasColors": { + + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "fillGradient": 0, + "gridPos": { + + }, + "id": 3, + "interval": "1m", + "legend": { + "alignAsTable": true, + "avg": false, + "current": true, + "max": false, + "min": false, + "rightSide": true, + "show": true, + "sideWidth": null, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "links": [ + + ], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "repeat": null, + "seriesOverrides": [ + + ], + "spaceLength": 10, + "span": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum(rate(workqueue_adds_total{cluster=\"$cluster\", job=\"{{ include "exporter.kubeControllerManager.jobName" . }}\", instance=~\"$instance\"}[$__rate_interval])) by (cluster, instance, name)", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{`{{`}}cluster{{`}}`}} {{`{{`}}instance{{`}}`}} {{`{{`}}name{{`}}`}}", + "refId": "A" + } + ], + "thresholds": [ + + ], + "timeFrom": null, + "timeShift": null, + "title": "Work Queue Add Rate", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ + + ] + }, + "yaxes": [ + { + "format": "ops", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "ops", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": false, + "title": "Dashboard Row", + "titleSize": "h6", + "type": "row" + }, + { + "collapse": false, + "collapsed": false, + "panels": [ + { + "aliasColors": { + + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "fillGradient": 0, + "gridPos": { + + }, + "id": 4, + "interval": "1m", + "legend": { + "alignAsTable": true, + "avg": false, + "current": true, + "max": false, + "min": false, + "rightSide": true, + "show": true, + "sideWidth": null, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "links": [ + + ], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "repeat": null, + "seriesOverrides": [ + + ], + "spaceLength": 10, + "span": 12, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum(rate(workqueue_depth{cluster=\"$cluster\", job=\"{{ include "exporter.kubeControllerManager.jobName" . }}\", instance=~\"$instance\"}[$__rate_interval])) by (cluster, instance, name)", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{`{{`}}cluster{{`}}`}} {{`{{`}}instance{{`}}`}} {{`{{`}}name{{`}}`}}", + "refId": "A" + } + ], + "thresholds": [ + + ], + "timeFrom": null, + "timeShift": null, + "title": "Work Queue Depth", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ + + ] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": false, + "title": "Dashboard Row", + "titleSize": "h6", + "type": "row" + }, + { + "collapse": false, + "collapsed": false, + "panels": [ + { + "aliasColors": { + + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "fillGradient": 0, + "gridPos": { + + }, + "id": 5, + "interval": "1m", + "legend": { + "alignAsTable": true, + "avg": false, + "current": true, + "max": false, + "min": false, + "rightSide": true, + "show": true, + "sideWidth": null, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "links": [ + + ], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "repeat": null, + "seriesOverrides": [ + + ], + "spaceLength": 10, + "span": 12, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "histogram_quantile(0.99, sum(rate(workqueue_queue_duration_seconds_bucket{cluster=\"$cluster\", job=\"{{ include "exporter.kubeControllerManager.jobName" . }}\", instance=~\"$instance\"}[$__rate_interval])) by (cluster, instance, name, le))", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{`{{`}}cluster{{`}}`}} {{`{{`}}instance{{`}}`}} {{`{{`}}name{{`}}`}}", + "refId": "A" + } + ], + "thresholds": [ + + ], + "timeFrom": null, + "timeShift": null, + "title": "Work Queue Latency", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ + + ] + }, + "yaxes": [ + { + "format": "s", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "s", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": false, + "title": "Dashboard Row", + "titleSize": "h6", + "type": "row" + }, + { + "collapse": false, + "collapsed": false, + "panels": [ + { + "aliasColors": { + + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "fillGradient": 0, + "gridPos": { + + }, + "id": 6, + "interval": "1m", + "legend": { + "alignAsTable": true, + "avg": false, + "current": false, + "max": false, + "min": false, + "rightSide": true, + "show": true, + "sideWidth": null, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ + + ], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "repeat": null, + "seriesOverrides": [ + + ], + "spaceLength": 10, + "span": 4, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum(rate(rest_client_requests_total{job=\"{{ include "exporter.kubeControllerManager.jobName" . }}\", instance=~\"$instance\",code=~\"2..\"}[$__rate_interval]))", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "2xx", + "refId": "A" + }, + { + "expr": "sum(rate(rest_client_requests_total{job=\"{{ include "exporter.kubeControllerManager.jobName" . }}\", instance=~\"$instance\",code=~\"3..\"}[$__rate_interval]))", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "3xx", + "refId": "B" + }, + { + "expr": "sum(rate(rest_client_requests_total{job=\"{{ include "exporter.kubeControllerManager.jobName" . }}\", instance=~\"$instance\",code=~\"4..\"}[$__rate_interval]))", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "4xx", + "refId": "C" + }, + { + "expr": "sum(rate(rest_client_requests_total{job=\"{{ include "exporter.kubeControllerManager.jobName" . }}\", instance=~\"$instance\",code=~\"5..\"}[$__rate_interval]))", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "5xx", + "refId": "D" + } + ], + "thresholds": [ + + ], + "timeFrom": null, + "timeShift": null, + "title": "Kube API Request Rate", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ + + ] + }, + "yaxes": [ + { + "format": "ops", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "ops", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + }, + { + "aliasColors": { + + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "fillGradient": 0, + "gridPos": { + + }, + "id": 7, + "interval": "1m", + "legend": { + "alignAsTable": true, + "avg": false, + "current": false, + "max": false, + "min": false, + "rightSide": true, + "show": true, + "sideWidth": null, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ + + ], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "repeat": null, + "seriesOverrides": [ + + ], + "spaceLength": 10, + "span": 8, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "histogram_quantile(0.99, sum(rate(rest_client_request_duration_seconds_bucket{cluster=\"$cluster\", job=\"{{ include "exporter.kubeControllerManager.jobName" . }}\", instance=~\"$instance\", verb=\"POST\"}[$__rate_interval])) by (verb, url, le))", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{`{{`}}verb{{`}}`}} {{`{{`}}url{{`}}`}}", + "refId": "A" + } + ], + "thresholds": [ + + ], + "timeFrom": null, + "timeShift": null, + "title": "Post Request Latency 99th Quantile", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ + + ] + }, + "yaxes": [ + { + "format": "s", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "s", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": false, + "title": "Dashboard Row", + "titleSize": "h6", + "type": "row" + }, + { + "collapse": false, + "collapsed": false, + "panels": [ + { + "aliasColors": { + + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "fillGradient": 0, + "gridPos": { + + }, + "id": 8, + "interval": "1m", + "legend": { + "alignAsTable": true, + "avg": false, + "current": true, + "max": false, + "min": false, + "rightSide": true, + "show": true, + "sideWidth": null, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "links": [ + + ], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "repeat": null, + "seriesOverrides": [ + + ], + "spaceLength": 10, + "span": 12, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "histogram_quantile(0.99, sum(rate(rest_client_request_duration_seconds_bucket{cluster=\"$cluster\", job=\"{{ include "exporter.kubeControllerManager.jobName" . }}\", instance=~\"$instance\", verb=\"GET\"}[$__rate_interval])) by (verb, url, le))", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{`{{`}}verb{{`}}`}} {{`{{`}}url{{`}}`}}", + "refId": "A" + } + ], + "thresholds": [ + + ], + "timeFrom": null, + "timeShift": null, + "title": "Get Request Latency 99th Quantile", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ + + ] + }, + "yaxes": [ + { + "format": "s", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "s", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": false, + "title": "Dashboard Row", + "titleSize": "h6", + "type": "row" + }, + { + "collapse": false, + "collapsed": false, + "panels": [ + { + "aliasColors": { + + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "fillGradient": 0, + "gridPos": { + + }, + "id": 9, + "interval": "1m", + "legend": { + "alignAsTable": true, + "avg": false, + "current": false, + "max": false, + "min": false, + "rightSide": true, + "show": true, + "sideWidth": null, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ + + ], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "repeat": null, + "seriesOverrides": [ + + ], + "spaceLength": 10, + "span": 4, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "process_resident_memory_bytes{cluster=\"$cluster\", job=\"{{ include "exporter.kubeControllerManager.jobName" . }}\",instance=~\"$instance\"}", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{`{{`}}instance{{`}}`}}", + "refId": "A" + } + ], + "thresholds": [ + + ], + "timeFrom": null, + "timeShift": null, + "title": "Memory", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ + + ] + }, + "yaxes": [ + { + "format": "bytes", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "bytes", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + }, + { + "aliasColors": { + + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "fillGradient": 0, + "gridPos": { + + }, + "id": 10, + "interval": "1m", + "legend": { + "alignAsTable": true, + "avg": false, + "current": false, + "max": false, + "min": false, + "rightSide": true, + "show": true, + "sideWidth": null, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ + + ], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "repeat": null, + "seriesOverrides": [ + + ], + "spaceLength": 10, + "span": 4, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "rate(process_cpu_seconds_total{cluster=\"$cluster\", job=\"{{ include "exporter.kubeControllerManager.jobName" . }}\",instance=~\"$instance\"}[$__rate_interval])", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{`{{`}}instance{{`}}`}}", + "refId": "A" + } + ], + "thresholds": [ + + ], + "timeFrom": null, + "timeShift": null, + "title": "CPU usage", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ + + ] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + } + ] + }, + { + "aliasColors": { + + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "fillGradient": 0, + "gridPos": { + + }, + "id": 11, + "interval": "1m", + "legend": { + "alignAsTable": true, + "avg": false, + "current": false, + "max": false, + "min": false, + "rightSide": true, + "show": true, + "sideWidth": null, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ + + ], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "repeat": null, + "seriesOverrides": [ + + ], + "spaceLength": 10, + "span": 4, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "go_goroutines{cluster=\"$cluster\", job=\"{{ include "exporter.kubeControllerManager.jobName" . }}\",instance=~\"$instance\"}", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{`{{`}}instance{{`}}`}}", + "refId": "A" + } + ], + "thresholds": [ + + ], + "timeFrom": null, + "timeShift": null, + "title": "Goroutines", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ + + ] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": false, + "title": "Dashboard Row", + "titleSize": "h6", + "type": "row" + } + ], + "schemaVersion": 14, + "style": "dark", + "tags": [ + "kubernetes-mixin" + ], + "templating": { + "list": [ + { + "current": { + "text": "Prometheus", + "value": "Prometheus" + }, + "hide": 0, + "label": "Data Source", + "name": "datasource", + "options": [ + + ], + "query": "prometheus", + "refresh": 1, + "regex": "", + "type": "datasource" + }, + { + "allValue": null, + "current": { + + }, + "datasource": "$datasource", + "hide": {{ if .Values.grafana.sidecar.dashboards.multicluster.global.enabled }}0{{ else }}2{{ end }}, + "includeAll": false, + "label": "cluster", + "multi": false, + "name": "cluster", + "options": [ + + ], + "query": "label_values(up{job=\"{{ include "exporter.kubeControllerManager.jobName" . }}\"}, cluster)", + "refresh": 2, + "regex": "", + "sort": 1, + "tagValuesQuery": "", + "tags": [ + + ], + "tagsQuery": "", + "type": "query", + "useTags": false + }, + { + "allValue": null, + "current": { + + }, + "datasource": "$datasource", + "hide": 0, + "includeAll": true, + "label": null, + "multi": false, + "name": "instance", + "options": [ + + ], + "query": "label_values(up{cluster=\"$cluster\", job=\"{{ include "exporter.kubeControllerManager.jobName" . }}\"}, instance)", + "refresh": 2, + "regex": "", + "sort": 1, + "tagValuesQuery": "", + "tags": [ + + ], + "tagsQuery": "", + "type": "query", + "useTags": false + } + ] + }, + "time": { + "from": "now-1h", + "to": "now" + }, + "timepicker": { + "refresh_intervals": [ + "5s", + "10s", + "30s", + "1m", + "5m", + "15m", + "30m", + "1h", + "2h", + "1d" + ], + "time_options": [ + "5m", + "15m", + "1h", + "6h", + "12h", + "24h", + "2d", + "7d", + "30d" + ] + }, + "timezone": "{{ .Values.grafana.defaultDashboardsTimezone }}", + "title": "Kubernetes / Controller Manager", + "uid": "72e0e05bef5099e5f049b05fdc429ed4", + "version": 0 + } +{{- end }} +{{- end }} \ No newline at end of file diff --git a/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/templates/grafana/dashboards-1.14/etcd.yaml b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/templates/grafana/dashboards-1.14/etcd.yaml new file mode 100644 index 0000000000..0eeedc6299 --- /dev/null +++ b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/templates/grafana/dashboards-1.14/etcd.yaml @@ -0,0 +1,1229 @@ +{{- /* +Generated from 'etcd' from https://github.com/etcd-io/etcd.git +Do not change in-place! In order to change this file first read following link: +https://github.com/prometheus-community/helm-charts/tree/main/charts/kube-prometheus-stack/hack +*/ -}} +{{- $kubeTargetVersion := default .Capabilities.KubeVersion.GitVersion .Values.kubeTargetVersionOverride }} +{{- if and (or .Values.grafana.enabled .Values.grafana.forceDeployDashboards) (semverCompare ">=1.14.0-0" $kubeTargetVersion) (semverCompare "<9.9.9-9" $kubeTargetVersion) .Values.grafana.defaultDashboardsEnabled }} +{{- if (include "exporter.kubeEtcd.enabled" .)}} +apiVersion: v1 +kind: ConfigMap +metadata: + namespace: {{ .Values.grafana.defaultDashboards.namespace }} + name: {{ printf "%s-%s" (include "kube-prometheus-stack.fullname" $) "etcd" | trunc 63 | trimSuffix "-" }} + annotations: +{{ toYaml .Values.grafana.sidecar.dashboards.annotations | indent 4 }} + labels: + {{- if $.Values.grafana.sidecar.dashboards.label }} + {{ $.Values.grafana.sidecar.dashboards.label }}: {{ ternary $.Values.grafana.sidecar.dashboards.labelValue "1" (not (empty $.Values.grafana.sidecar.dashboards.labelValue)) | quote }} + {{- end }} + app: {{ template "kube-prometheus-stack.name" $ }}-grafana +{{ include "kube-prometheus-stack.labels" $ | indent 4 }} +data: + etcd.json: |- + { + "annotations": { + "list": [] + }, + "description": "etcd sample Grafana dashboard with Prometheus", + "editable": true, + "gnetId": null, + "hideControls": false, + "links": [], + "refresh": "10s", + "rows": [ + { + "collapse": false, + "editable": true, + "height": "250px", + "panels": [ + { + "cacheTimeout": null, + "colorBackground": false, + "colorValue": false, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "datasource": "$datasource", + "editable": true, + "error": false, + "format": "none", + "gauge": { + "maxValue": 100, + "minValue": 0, + "show": false, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "id": 28, + "interval": null, + "isNew": true, + "links": [], + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": 100, + "nullPointMode": "connected", + "nullText": null, + "postfix": "", + "postfixFontSize": "50%", + "prefix": "", + "prefixFontSize": "50%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], + "span": 3, + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": false, + "lineColor": "rgb(31, 120, 193)", + "show": false + }, + "targets": [ + { + "expr": "sum(etcd_server_has_leader{job=\"$cluster\"})", + "intervalFactor": 2, + "legendFormat": "", + "metric": "etcd_server_has_leader", + "refId": "A", + "step": 20 + } + ], + "thresholds": "", + "title": "Up", + "type": "singlestat", + "valueFontSize": "200%", + "valueMaps": [ + { + "op": "=", + "text": "N/A", + "value": "null" + } + ], + "valueName": "avg" + }, + { + "aliasColors": {}, + "bars": false, + "datasource": "$datasource", + "editable": true, + "error": false, + "fill": 0, + "id": 23, + "isNew": true, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": false, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 2, + "links": [], + "nullPointMode": "connected", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "span": 5, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum(rate(grpc_server_started_total{job=\"$cluster\",grpc_type=\"unary\"}[$__rate_interval]))", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "RPC Rate", + "metric": "grpc_server_started_total", + "refId": "A", + "step": 2 + }, + { + "expr": "sum(rate(grpc_server_handled_total{job=\"$cluster\",grpc_type=\"unary\",grpc_code=~\"Unknown|FailedPrecondition|ResourceExhausted|Internal|Unavailable|DataLoss|DeadlineExceeded\"}[$__rate_interval]))", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "RPC Failed Rate", + "metric": "grpc_server_handled_total", + "refId": "B", + "step": 2 + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "RPC Rate", + "tooltip": { + "msResolution": false, + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "ops", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + }, + { + "aliasColors": {}, + "bars": false, + "datasource": "$datasource", + "editable": true, + "error": false, + "fill": 0, + "id": 41, + "isNew": true, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": false, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 2, + "links": [], + "nullPointMode": "connected", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "span": 4, + "stack": true, + "steppedLine": false, + "targets": [ + { + "expr": "sum(grpc_server_started_total{job=\"$cluster\",grpc_service=\"etcdserverpb.Watch\",grpc_type=\"bidi_stream\"}) - sum(grpc_server_handled_total{job=\"$cluster\",grpc_service=\"etcdserverpb.Watch\",grpc_type=\"bidi_stream\"})", + "intervalFactor": 2, + "legendFormat": "Watch Streams", + "metric": "grpc_server_handled_total", + "refId": "A", + "step": 4 + }, + { + "expr": "sum(grpc_server_started_total{job=\"$cluster\",grpc_service=\"etcdserverpb.Lease\",grpc_type=\"bidi_stream\"}) - sum(grpc_server_handled_total{job=\"$cluster\",grpc_service=\"etcdserverpb.Lease\",grpc_type=\"bidi_stream\"})", + "intervalFactor": 2, + "legendFormat": "Lease Streams", + "metric": "grpc_server_handled_total", + "refId": "B", + "step": 4 + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "Active Streams", + "tooltip": { + "msResolution": false, + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": "", + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + } + ], + "showTitle": false, + "title": "Row" + }, + { + "collapse": false, + "editable": true, + "height": "250px", + "panels": [ + { + "aliasColors": {}, + "bars": false, + "datasource": "$datasource", + "decimals": null, + "editable": true, + "error": false, + "fill": 0, + "grid": {}, + "id": 1, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": false, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 2, + "links": [], + "nullPointMode": "connected", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "span": 4, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "etcd_mvcc_db_total_size_in_bytes{job=\"$cluster\"}", + "hide": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{`{{`}}instance{{`}}`}} DB Size", + "metric": "", + "refId": "A", + "step": 4 + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "DB Size", + "tooltip": { + "msResolution": false, + "shared": true, + "sort": 0, + "value_type": "cumulative" + }, + "type": "graph", + "xaxis": { + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "bytes", + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + }, + { + "aliasColors": {}, + "bars": false, + "datasource": "$datasource", + "editable": true, + "error": false, + "fill": 0, + "grid": {}, + "id": 3, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": false, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 2, + "links": [], + "nullPointMode": "connected", + "percentage": false, + "pointradius": 1, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "span": 4, + "stack": false, + "steppedLine": true, + "targets": [ + { + "expr": "histogram_quantile(0.99, sum(rate(etcd_disk_wal_fsync_duration_seconds_bucket{job=\"$cluster\"}[$__rate_interval])) by (instance, le))", + "hide": false, + "intervalFactor": 2, + "legendFormat": "{{`{{`}}instance{{`}}`}} WAL fsync", + "metric": "etcd_disk_wal_fsync_duration_seconds_bucket", + "refId": "A", + "step": 4 + }, + { + "expr": "histogram_quantile(0.99, sum(rate(etcd_disk_backend_commit_duration_seconds_bucket{job=\"$cluster\"}[$__rate_interval])) by (instance, le))", + "intervalFactor": 2, + "legendFormat": "{{`{{`}}instance{{`}}`}} DB fsync", + "metric": "etcd_disk_backend_commit_duration_seconds_bucket", + "refId": "B", + "step": 4 + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "Disk Sync Duration", + "tooltip": { + "msResolution": false, + "shared": true, + "sort": 0, + "value_type": "cumulative" + }, + "type": "graph", + "xaxis": { + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "s", + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + }, + { + "aliasColors": {}, + "bars": false, + "datasource": "$datasource", + "editable": true, + "error": false, + "fill": 0, + "id": 29, + "isNew": true, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": false, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 2, + "links": [], + "nullPointMode": "connected", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "span": 4, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "process_resident_memory_bytes{job=\"$cluster\"}", + "intervalFactor": 2, + "legendFormat": "{{`{{`}}instance{{`}}`}} Resident Memory", + "metric": "process_resident_memory_bytes", + "refId": "A", + "step": 4 + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "Memory", + "tooltip": { + "msResolution": false, + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "bytes", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + } + ], + "title": "New row" + }, + { + "collapse": false, + "editable": true, + "height": "250px", + "panels": [ + { + "aliasColors": {}, + "bars": false, + "datasource": "$datasource", + "editable": true, + "error": false, + "fill": 5, + "id": 22, + "isNew": true, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": false, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 2, + "links": [], + "nullPointMode": "connected", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "span": 3, + "stack": true, + "steppedLine": false, + "targets": [ + { + "expr": "rate(etcd_network_client_grpc_received_bytes_total{job=\"$cluster\"}[$__rate_interval])", + "intervalFactor": 2, + "legendFormat": "{{`{{`}}instance{{`}}`}} Client Traffic In", + "metric": "etcd_network_client_grpc_received_bytes_total", + "refId": "A", + "step": 4 + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "Client Traffic In", + "tooltip": { + "msResolution": false, + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "Bps", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + }, + { + "aliasColors": {}, + "bars": false, + "datasource": "$datasource", + "editable": true, + "error": false, + "fill": 5, + "id": 21, + "isNew": true, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": false, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 2, + "links": [], + "nullPointMode": "connected", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "span": 3, + "stack": true, + "steppedLine": false, + "targets": [ + { + "expr": "rate(etcd_network_client_grpc_sent_bytes_total{job=\"$cluster\"}[$__rate_interval])", + "intervalFactor": 2, + "legendFormat": "{{`{{`}}instance{{`}}`}} Client Traffic Out", + "metric": "etcd_network_client_grpc_sent_bytes_total", + "refId": "A", + "step": 4 + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "Client Traffic Out", + "tooltip": { + "msResolution": false, + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "Bps", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + }, + { + "aliasColors": {}, + "bars": false, + "datasource": "$datasource", + "editable": true, + "error": false, + "fill": 0, + "id": 20, + "isNew": true, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": false, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 2, + "links": [], + "nullPointMode": "connected", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "span": 3, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum(rate(etcd_network_peer_received_bytes_total{job=\"$cluster\"}[$__rate_interval])) by (instance)", + "intervalFactor": 2, + "legendFormat": "{{`{{`}}instance{{`}}`}} Peer Traffic In", + "metric": "etcd_network_peer_received_bytes_total", + "refId": "A", + "step": 4 + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "Peer Traffic In", + "tooltip": { + "msResolution": false, + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "Bps", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + }, + { + "aliasColors": {}, + "bars": false, + "datasource": "$datasource", + "decimals": null, + "editable": true, + "error": false, + "fill": 0, + "grid": {}, + "id": 16, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": false, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 2, + "links": [], + "nullPointMode": "connected", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "span": 3, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum(rate(etcd_network_peer_sent_bytes_total{job=\"$cluster\"}[$__rate_interval])) by (instance)", + "hide": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{`{{`}}instance{{`}}`}} Peer Traffic Out", + "metric": "etcd_network_peer_sent_bytes_total", + "refId": "A", + "step": 4 + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "Peer Traffic Out", + "tooltip": { + "msResolution": false, + "shared": true, + "sort": 0, + "value_type": "cumulative" + }, + "type": "graph", + "xaxis": { + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "Bps", + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + } + ], + "title": "New row" + }, + { + "collapse": false, + "editable": true, + "height": "250px", + "panels": [ + { + "aliasColors": {}, + "bars": false, + "datasource": "$datasource", + "editable": true, + "error": false, + "fill": 0, + "id": 40, + "isNew": true, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": false, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 2, + "links": [], + "nullPointMode": "connected", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "span": 6, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum(rate(etcd_server_proposals_failed_total{job=\"$cluster\"}[$__rate_interval]))", + "intervalFactor": 2, + "legendFormat": "Proposal Failure Rate", + "metric": "etcd_server_proposals_failed_total", + "refId": "A", + "step": 2 + }, + { + "expr": "sum(etcd_server_proposals_pending{job=\"$cluster\"})", + "intervalFactor": 2, + "legendFormat": "Proposal Pending Total", + "metric": "etcd_server_proposals_pending", + "refId": "B", + "step": 2 + }, + { + "expr": "sum(rate(etcd_server_proposals_committed_total{job=\"$cluster\"}[$__rate_interval]))", + "intervalFactor": 2, + "legendFormat": "Proposal Commit Rate", + "metric": "etcd_server_proposals_committed_total", + "refId": "C", + "step": 2 + }, + { + "expr": "sum(rate(etcd_server_proposals_applied_total{job=\"$cluster\"}[$__rate_interval]))", + "intervalFactor": 2, + "legendFormat": "Proposal Apply Rate", + "refId": "D", + "step": 2 + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "Raft Proposals", + "tooltip": { + "msResolution": false, + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": "", + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + }, + { + "aliasColors": {}, + "bars": false, + "datasource": "$datasource", + "decimals": 0, + "editable": true, + "error": false, + "fill": 0, + "id": 19, + "isNew": true, + "legend": { + "alignAsTable": false, + "avg": false, + "current": false, + "max": false, + "min": false, + "rightSide": false, + "show": false, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 2, + "links": [], + "nullPointMode": "connected", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "span": 6, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "changes(etcd_server_leader_changes_seen_total{job=\"$cluster\"}[1d])", + "intervalFactor": 2, + "legendFormat": "{{`{{`}}instance{{`}}`}} Total Leader Elections Per Day", + "metric": "etcd_server_leader_changes_seen_total", + "refId": "A", + "step": 2 + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "Total Leader Elections Per Day", + "tooltip": { + "msResolution": false, + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "decimals": 0, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "custom": {} + }, + "overrides": [] + }, + "fill": 0, + "fillGradient": 0, + "gridPos": { + "h": 7, + "w": 12, + "x": 0, + "y": 28 + }, + "hiddenSeries": false, + "id": 42, + "isNew": true, + "legend": { + "alignAsTable": false, + "avg": false, + "current": false, + "max": false, + "min": false, + "rightSide": false, + "show": false, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 2, + "links": [], + "nullPointMode": "connected", + "options": { + "alertThreshold": true + }, + "percentage": false, + "pluginVersion": "7.4.3", + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "histogram_quantile(0.99, sum by (instance, le) (rate(etcd_network_peer_round_trip_time_seconds_bucket{job=\"$cluster\"}[$__rate_interval])))", + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{`{{`}}instance{{`}}`}} Peer round trip time", + "metric": "etcd_network_peer_round_trip_time_seconds_bucket", + "refId": "A", + "step": 2 + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Peer round trip time", + "tooltip": { + "msResolution": false, + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "$$hashKey": "object:925", + "decimals": null, + "format": "s", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "$$hashKey": "object:926", + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + } + ], + "title": "New row" + } + ], + "schemaVersion": 13, + "sharedCrosshair": false, + "style": "dark", + "tags": [ + "etcd-mixin" + ], + "templating": { + "list": [ + { + "current": { + "text": "Prometheus", + "value": "Prometheus" + }, + "hide": 0, + "label": "Data Source", + "name": "datasource", + "options": [], + "query": "prometheus", + "refresh": 1, + "regex": "", + "type": "datasource" + }, + { + "allValue": null, + "current": { + "text": "prod", + "value": "prod" + }, + "datasource": "$datasource", + "hide": {{ if (or .Values.grafana.sidecar.dashboards.multicluster.global.enabled .Values.grafana.sidecar.dashboards.multicluster.etcd.enabled) }}0{{ else }}2{{ end }}, + "includeAll": false, + "label": "cluster", + "multi": false, + "name": "cluster", + "options": [], + "query": "label_values(etcd_server_has_leader, job)", + "refresh": 2, + "regex": "", + "sort": 2, + "tagValuesQuery": "", + "tags": [], + "tagsQuery": "", + "type": "query", + "useTags": false + } + ] + }, + "time": { + "from": "now-15m", + "to": "now" + }, + "timepicker": { + "now": true, + "refresh_intervals": [ + "5s", + "10s", + "30s", + "1m", + "5m", + "15m", + "30m", + "1h", + "2h", + "1d" + ], + "time_options": [ + "5m", + "15m", + "1h", + "6h", + "12h", + "24h", + "2d", + "7d", + "30d" + ] + }, + "timezone": "{{ .Values.grafana.defaultDashboardsTimezone }}", + "title": "etcd", + "uid": "c2f4e12cdf69feb95caa41a5a1b423d9", + "version": 215 + } +{{- end }} +{{- end }} \ No newline at end of file diff --git a/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/templates/grafana/dashboards-1.14/grafana-overview.yaml b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/templates/grafana/dashboards-1.14/grafana-overview.yaml new file mode 100644 index 0000000000..d2609140cf --- /dev/null +++ b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/templates/grafana/dashboards-1.14/grafana-overview.yaml @@ -0,0 +1,635 @@ +{{- /* +Generated from 'grafana-overview' from https://raw.githubusercontent.com/prometheus-operator/kube-prometheus/a8ba97a150c75be42010c75d10b720c55e182f1a/manifests/grafana-dashboardDefinitions.yaml +Do not change in-place! In order to change this file first read following link: +https://github.com/prometheus-community/helm-charts/tree/main/charts/kube-prometheus-stack/hack +*/ -}} +{{- $kubeTargetVersion := default .Capabilities.KubeVersion.GitVersion .Values.kubeTargetVersionOverride }} +{{- if and (or .Values.grafana.enabled .Values.grafana.forceDeployDashboards) (semverCompare ">=1.14.0-0" $kubeTargetVersion) (semverCompare "<9.9.9-9" $kubeTargetVersion) .Values.grafana.defaultDashboardsEnabled }} +apiVersion: v1 +kind: ConfigMap +metadata: + namespace: {{ .Values.grafana.defaultDashboards.namespace }} + name: {{ printf "%s-%s" (include "kube-prometheus-stack.fullname" $) "grafana-overview" | trunc 63 | trimSuffix "-" }} + annotations: +{{ toYaml .Values.grafana.sidecar.dashboards.annotations | indent 4 }} + labels: + {{- if $.Values.grafana.sidecar.dashboards.label }} + {{ $.Values.grafana.sidecar.dashboards.label }}: {{ ternary $.Values.grafana.sidecar.dashboards.labelValue "1" (not (empty $.Values.grafana.sidecar.dashboards.labelValue)) | quote }} + {{- end }} + app: {{ template "kube-prometheus-stack.name" $ }}-grafana +{{ include "kube-prometheus-stack.labels" $ | indent 4 }} +data: + grafana-overview.json: |- + { + "annotations": { + "list": [ + { + "builtIn": 1, + "datasource": "-- Grafana --", + "enable": true, + "hide": true, + "iconColor": "rgba(0, 211, 255, 1)", + "name": "Annotations & Alerts", + "target": { + "limit": 100, + "matchAny": false, + "tags": [ + + ], + "type": "dashboard" + }, + "type": "dashboard" + } + ] + }, + "editable": true, + "gnetId": null, + "graphTooltip": 0, + "id": 3085, + "iteration": 1631554945276, + "links": [ + + ], + "panels": [ + { + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "mappings": [ + + ], + "noValue": "0", + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [ + + ] + }, + "gridPos": { + "h": 5, + "w": 6, + "x": 0, + "y": 0 + }, + "id": 6, + "options": { + "colorMode": "value", + "graphMode": "area", + "justifyMode": "auto", + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "mean" + ], + "fields": "", + "values": false + }, + "text": { + + }, + "textMode": "auto" + }, + "pluginVersion": "8.1.3", + "targets": [ + { + "expr": "grafana_alerting_result_total{job=~\"$job\", instance=~\"$instance\", state=\"alerting\"}", + "instant": true, + "interval": "", + "legendFormat": "", + "refId": "A" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Firing Alerts", + "type": "stat" + }, + { + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "mappings": [ + + ], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [ + + ] + }, + "gridPos": { + "h": 5, + "w": 6, + "x": 6, + "y": 0 + }, + "id": 8, + "options": { + "colorMode": "value", + "graphMode": "area", + "justifyMode": "auto", + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "mean" + ], + "fields": "", + "values": false + }, + "text": { + + }, + "textMode": "auto" + }, + "pluginVersion": "8.1.3", + "targets": [ + { + "expr": "sum(grafana_stat_totals_dashboard{job=~\"$job\", instance=~\"$instance\"})", + "interval": "", + "legendFormat": "", + "refId": "A" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Dashboards", + "type": "stat" + }, + { + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": { + "align": null, + "displayMode": "auto" + }, + "mappings": [ + + ], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [ + + ] + }, + "gridPos": { + "h": 5, + "w": 12, + "x": 12, + "y": 0 + }, + "id": 10, + "options": { + "showHeader": true + }, + "pluginVersion": "8.1.3", + "targets": [ + { + "expr": "grafana_build_info{job=~\"$job\", instance=~\"$instance\"}", + "instant": true, + "interval": "", + "legendFormat": "", + "refId": "A" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Build Info", + "transformations": [ + { + "id": "labelsToFields", + "options": { + + } + }, + { + "id": "organize", + "options": { + "excludeByName": { + "Time": true, + "Value": true, + "branch": true, + "container": true, + "goversion": true, + "namespace": true, + "pod": true, + "revision": true + }, + "indexByName": { + "Time": 7, + "Value": 11, + "branch": 4, + "container": 8, + "edition": 2, + "goversion": 6, + "instance": 1, + "job": 0, + "namespace": 9, + "pod": 10, + "revision": 5, + "version": 3 + }, + "renameByName": { + + } + } + } + ], + "type": "table" + }, + { + "aliasColors": { + + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "links": [ + + ] + }, + "overrides": [ + + ] + }, + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 5 + }, + "hiddenSeries": false, + "id": 2, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "nullPointMode": "null", + "options": { + "alertThreshold": true + }, + "percentage": false, + "pluginVersion": "8.1.3", + "pointradius": 2, + "points": false, + "renderer": "flot", + "seriesOverrides": [ + + ], + "spaceLength": 10, + "stack": true, + "steppedLine": false, + "targets": [ + { + "expr": "sum by (status_code) (irate(grafana_http_request_duration_seconds_count{job=~\"$job\", instance=~\"$instance\"}[1m])) ", + "interval": "", + "legendFormat": "{{`{{`}}status_code{{`}}`}}", + "refId": "A" + } + ], + "thresholds": [ + + ], + "timeFrom": null, + "timeRegions": [ + + ], + "timeShift": null, + "title": "RPS", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ + + ] + }, + "yaxes": [ + { + "$$hashKey": "object:157", + "format": "reqps", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "$$hashKey": "object:158", + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": { + + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "links": [ + + ] + }, + "overrides": [ + + ] + }, + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 5 + }, + "hiddenSeries": false, + "id": 4, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "nullPointMode": "null", + "options": { + "alertThreshold": true + }, + "percentage": false, + "pluginVersion": "8.1.3", + "pointradius": 2, + "points": false, + "renderer": "flot", + "seriesOverrides": [ + + ], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "exemplar": true, + "expr": "histogram_quantile(0.99, sum(irate(grafana_http_request_duration_seconds_bucket{instance=~\"$instance\", job=~\"$job\"}[$__rate_interval])) by (le)) * 1", + "interval": "", + "legendFormat": "99th Percentile", + "refId": "A" + }, + { + "exemplar": true, + "expr": "histogram_quantile(0.50, sum(irate(grafana_http_request_duration_seconds_bucket{instance=~\"$instance\", job=~\"$job\"}[$__rate_interval])) by (le)) * 1", + "interval": "", + "legendFormat": "50th Percentile", + "refId": "B" + }, + { + "exemplar": true, + "expr": "sum(irate(grafana_http_request_duration_seconds_sum{instance=~\"$instance\", job=~\"$job\"}[$__rate_interval])) * 1 / sum(irate(grafana_http_request_duration_seconds_count{instance=~\"$instance\", job=~\"$job\"}[$__rate_interval]))", + "interval": "", + "legendFormat": "Average", + "refId": "C" + } + ], + "thresholds": [ + + ], + "timeFrom": null, + "timeRegions": [ + + ], + "timeShift": null, + "title": "Request Latency", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ + + ] + }, + "yaxes": [ + { + "$$hashKey": "object:210", + "format": "ms", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "$$hashKey": "object:211", + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + } + ], + "schemaVersion": 30, + "style": "dark", + "tags": [ + + ], + "templating": { + "list": [ + { + "current": { + "text": "Prometheus", + "value": "Prometheus" + }, + "description": null, + "error": null, + "hide": 0, + "includeAll": false, + "label": "Data Source", + "multi": false, + "name": "datasource", + "options": [ + + ], + "query": "prometheus", + "queryValue": "", + "refresh": 1, + "regex": "", + "skipUrlSync": false, + "type": "datasource" + }, + { + "allValue": ".*", + "current": { + "selected": false, + "text": [ + "default/grafana" + ], + "value": [ + "default/grafana" + ] + }, + "datasource": "$datasource", + "definition": "label_values(grafana_build_info, job)", + "description": null, + "error": null, + "hide": 0, + "includeAll": true, + "label": null, + "multi": true, + "name": "job", + "options": [ + + ], + "query": { + "query": "label_values(grafana_build_info, job)", + "refId": "Billing Admin-job-Variable-Query" + }, + "refresh": 1, + "regex": "", + "skipUrlSync": false, + "sort": 0, + "tagValuesQuery": "", + "tagsQuery": "", + "type": "query", + "useTags": false + }, + { + "allValue": ".*", + "current": { + "selected": false, + "text": "All", + "value": "$__all" + }, + "datasource": "$datasource", + "definition": "label_values(grafana_build_info, instance)", + "description": null, + "error": null, + "hide": 0, + "includeAll": true, + "label": null, + "multi": true, + "name": "instance", + "options": [ + + ], + "query": { + "query": "label_values(grafana_build_info, instance)", + "refId": "Billing Admin-instance-Variable-Query" + }, + "refresh": 1, + "regex": "", + "skipUrlSync": false, + "sort": 0, + "tagValuesQuery": "", + "tagsQuery": "", + "type": "query", + "useTags": false + } + ] + }, + "time": { + "from": "now-6h", + "to": "now" + }, + "timepicker": { + "refresh_intervals": [ + "10s", + "30s", + "1m", + "5m", + "15m", + "30m", + "1h", + "2h", + "1d" + ] + }, + "timezone": "{{ .Values.grafana.defaultDashboardsTimezone }}", + "title": "Grafana Overview", + "uid": "6be0s85Mk", + "version": 2 + } +{{- end }} \ No newline at end of file diff --git a/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/templates/grafana/dashboards-1.14/k8s-coredns.yaml b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/templates/grafana/dashboards-1.14/k8s-coredns.yaml new file mode 100644 index 0000000000..7ecca76f23 --- /dev/null +++ b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/templates/grafana/dashboards-1.14/k8s-coredns.yaml @@ -0,0 +1,1534 @@ +{{- /* +Generated from 'k8s-coredns' from ../files/dashboards/k8s-coredns.json +Do not change in-place! In order to change this file first read following link: +https://github.com/prometheus-community/helm-charts/tree/main/charts/kube-prometheus-stack/hack +*/ -}} +{{- $kubeTargetVersion := default .Capabilities.KubeVersion.GitVersion .Values.kubeTargetVersionOverride }} +{{- if and (or .Values.grafana.enabled .Values.grafana.forceDeployDashboards) (semverCompare ">=1.14.0-0" $kubeTargetVersion) (semverCompare "<9.9.9-9" $kubeTargetVersion) .Values.grafana.defaultDashboardsEnabled .Values.coreDns.enabled }} +apiVersion: v1 +kind: ConfigMap +metadata: + namespace: {{ .Values.grafana.defaultDashboards.namespace }} + name: {{ printf "%s-%s" (include "kube-prometheus-stack.fullname" $) "k8s-coredns" | trunc 63 | trimSuffix "-" }} + annotations: +{{ toYaml .Values.grafana.sidecar.dashboards.annotations | indent 4 }} + labels: + {{- if $.Values.grafana.sidecar.dashboards.label }} + {{ $.Values.grafana.sidecar.dashboards.label }}: {{ ternary $.Values.grafana.sidecar.dashboards.labelValue "1" (not (empty $.Values.grafana.sidecar.dashboards.labelValue)) | quote }} + {{- end }} + app: {{ template "kube-prometheus-stack.name" $ }}-grafana +{{ include "kube-prometheus-stack.labels" $ | indent 4 }} +data: + k8s-coredns.json: |- + { + "annotations": { + "list": [ + { + "builtIn": 1, + "datasource": "-- Grafana --", + "enable": true, + "hide": true, + "iconColor": "rgba(0, 211, 255, 1)", + "name": "Annotations & Alerts", + "type": "dashboard" + } + ] + }, + "description": "A dashboard for the CoreDNS DNS server with updated metrics for version 1.7.0+. Based on the CoreDNS dashboard by buhay.", + "editable": true, + "gnetId": 12539, + "graphTooltip": 0, + "iteration": 1603798405693, + "links": [ + { + "icon": "external link", + "tags": [], + "targetBlank": true, + "title": "CoreDNS.io", + "type": "link", + "url": "https://coredns.io" + } + ], + "panels": [ + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "custom": {}, + "links": [], + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [] + }, + "fill": 1, + "fillGradient": 0, + "grid": {}, + "gridPos": { + "h": 7, + "w": 8, + "x": 0, + "y": 0 + }, + "hiddenSeries": false, + "id": 2, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 2, + "links": [], + "nullPointMode": "connected", + "options": { + "alertThreshold": true + }, + "percentage": false, + "pluginVersion": "7.2.0", + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ + { + "alias": "total", + "yaxis": 2 + } + ], + "spaceLength": 10, + "stack": true, + "steppedLine": false, + "targets": [ + { + "expr": "sum(rate(coredns_dns_request_count_total{job=\"coredns\",instance=~\"$instance\"}[5m])) by (proto) or\nsum(rate(coredns_dns_requests_total{job=\"coredns\",instance=~\"$instance\"}[5m])) by (proto)", + "format": "time_series", + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{"{{proto}}"}}", + "refId": "A", + "step": 60 + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Requests (total)", + "tooltip": { + "shared": true, + "sort": 2, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "pps", + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "pps", + "logBase": 1, + "max": null, + "min": 0, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "custom": {}, + "links": [] + }, + "overrides": [] + }, + "fill": 1, + "fillGradient": 0, + "grid": {}, + "gridPos": { + "h": 7, + "w": 8, + "x": 8, + "y": 0 + }, + "hiddenSeries": false, + "id": 4, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 2, + "links": [], + "nullPointMode": "connected", + "options": { + "alertThreshold": true + }, + "percentage": false, + "pluginVersion": "7.2.0", + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ + { + "alias": "total", + "yaxis": 2 + }, + { + "alias": "other", + "yaxis": 2 + } + ], + "spaceLength": 10, + "stack": true, + "steppedLine": false, + "targets": [ + { + "expr": "sum(rate(coredns_dns_request_type_count_total{job=\"coredns\",instance=~\"$instance\"}[5m])) by (type) or \nsum(rate(coredns_dns_requests_total{job=\"coredns\",instance=~\"$instance\"}[5m])) by (type)", + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{"{{type}}"}}", + "refId": "A", + "step": 60 + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Requests (by qtype)", + "tooltip": { + "shared": true, + "sort": 2, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "pps", + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "pps", + "logBase": 1, + "max": null, + "min": 0, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "custom": {}, + "links": [] + }, + "overrides": [] + }, + "fill": 1, + "fillGradient": 0, + "grid": {}, + "gridPos": { + "h": 7, + "w": 8, + "x": 16, + "y": 0 + }, + "hiddenSeries": false, + "id": 6, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 2, + "links": [], + "nullPointMode": "connected", + "options": { + "alertThreshold": true + }, + "percentage": false, + "pluginVersion": "7.2.0", + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ + { + "alias": "total", + "yaxis": 2 + } + ], + "spaceLength": 10, + "stack": true, + "steppedLine": false, + "targets": [ + { + "expr": "sum(rate(coredns_dns_request_count_total{job=\"coredns\",instance=~\"$instance\"}[5m])) by (zone) or\nsum(rate(coredns_dns_requests_total{job=\"coredns\",instance=~\"$instance\"}[5m])) by (zone)", + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{"{{zone}}"}}", + "refId": "A", + "step": 60 + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Requests (by zone)", + "tooltip": { + "shared": true, + "sort": 2, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "pps", + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "pps", + "logBase": 1, + "max": null, + "min": 0, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "custom": {}, + "links": [] + }, + "overrides": [] + }, + "fill": 1, + "fillGradient": 0, + "grid": {}, + "gridPos": { + "h": 7, + "w": 12, + "x": 0, + "y": 7 + }, + "hiddenSeries": false, + "id": 8, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 2, + "links": [], + "nullPointMode": "connected", + "options": { + "alertThreshold": true + }, + "percentage": false, + "pluginVersion": "7.2.0", + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ + { + "alias": "total", + "yaxis": 2 + } + ], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum(rate(coredns_dns_request_do_count_total{job=\"coredns\",instance=~\"$instance\"}[5m])) or\nsum(rate(coredns_dns_do_requests_total{job=\"coredns\",instance=~\"$instance\"}[5m]))", + "interval": "", + "intervalFactor": 2, + "legendFormat": "DO", + "refId": "A", + "step": 40 + }, + { + "expr": "sum(rate(coredns_dns_request_count_total{job=\"coredns\",instance=~\"$instance\"}[5m])) or\nsum(rate(coredns_dns_requests_total{job=\"coredns\",instance=~\"$instance\"}[5m]))", + "interval": "", + "intervalFactor": 2, + "legendFormat": "total", + "refId": "B", + "step": 40 + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Requests (DO bit)", + "tooltip": { + "shared": true, + "sort": 2, + "value_type": "cumulative" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "pps", + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "pps", + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "custom": {}, + "links": [] + }, + "overrides": [] + }, + "fill": 1, + "fillGradient": 0, + "grid": {}, + "gridPos": { + "h": 7, + "w": 6, + "x": 12, + "y": 7 + }, + "hiddenSeries": false, + "id": 10, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 2, + "links": [], + "nullPointMode": "connected", + "options": { + "alertThreshold": true + }, + "percentage": false, + "pluginVersion": "7.2.0", + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ + { + "alias": "tcp:90", + "yaxis": 2 + }, + { + "alias": "tcp:99 ", + "yaxis": 2 + }, + { + "alias": "tcp:50", + "yaxis": 2 + } + ], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "histogram_quantile(0.99, sum(rate(coredns_dns_request_size_bytes_bucket{job=\"coredns\",instance=~\"$instance\",proto=\"udp\"}[5m])) by (le,proto))", + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{"{{proto}}"}}:99 ", + "refId": "A", + "step": 60 + }, + { + "expr": "histogram_quantile(0.90, sum(rate(coredns_dns_request_size_bytes_bucket{job=\"coredns\",instance=~\"$instance\",proto=\"udp\"}[5m])) by (le,proto))", + "intervalFactor": 2, + "legendFormat": "{{"{{proto}}"}}:90", + "refId": "B", + "step": 60 + }, + { + "expr": "histogram_quantile(0.50, sum(rate(coredns_dns_request_size_bytes_bucket{job=\"coredns\",instance=~\"$instance\",proto=\"udp\"}[5m])) by (le,proto))", + "intervalFactor": 2, + "legendFormat": "{{"{{proto}}"}}:50", + "refId": "C", + "step": 60 + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Requests (size, udp)", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "cumulative" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "bytes", + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "logBase": 1, + "max": null, + "min": 0, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "custom": {}, + "links": [] + }, + "overrides": [] + }, + "fill": 1, + "fillGradient": 0, + "grid": {}, + "gridPos": { + "h": 7, + "w": 6, + "x": 18, + "y": 7 + }, + "hiddenSeries": false, + "id": 12, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 2, + "links": [], + "nullPointMode": "connected", + "options": { + "alertThreshold": true + }, + "percentage": false, + "pluginVersion": "7.2.0", + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ + { + "alias": "tcp:90", + "yaxis": 1 + }, + { + "alias": "tcp:99 ", + "yaxis": 1 + }, + { + "alias": "tcp:50", + "yaxis": 1 + } + ], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "histogram_quantile(0.99, sum(rate(coredns_dns_request_size_bytes_bucket{job=\"coredns\",instance=~\"$instance\",proto=\"tcp\"}[5m])) by (le,proto))", + "format": "time_series", + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{"{{proto}}"}}:99 ", + "refId": "A", + "step": 60 + }, + { + "expr": "histogram_quantile(0.90, sum(rate(coredns_dns_request_size_bytes_bucket{job=\"coredns\",instance=~\"$instance\",proto=\"tcp\"}[5m])) by (le,proto))", + "format": "time_series", + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{"{{proto}}"}}:90", + "refId": "B", + "step": 60 + }, + { + "expr": "histogram_quantile(0.50, sum(rate(coredns_dns_request_size_bytes_bucket{job=\"coredns\",instance=~\"$instance\",proto=\"tcp\"}[5m])) by (le,proto))", + "format": "time_series", + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{"{{proto}}"}}:50", + "refId": "C", + "step": 60 + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Requests (size,tcp)", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "cumulative" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "bytes", + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "logBase": 1, + "max": null, + "min": 0, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "custom": {}, + "links": [] + }, + "overrides": [] + }, + "fill": 1, + "fillGradient": 0, + "grid": {}, + "gridPos": { + "h": 7, + "w": 12, + "x": 0, + "y": 14 + }, + "hiddenSeries": false, + "id": 14, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 2, + "links": [], + "nullPointMode": "connected", + "options": { + "alertThreshold": true + }, + "percentage": false, + "pluginVersion": "7.2.0", + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": true, + "steppedLine": false, + "targets": [ + { + "expr": "sum(rate(coredns_dns_response_rcode_count_total{job=\"coredns\",instance=~\"$instance\"}[5m])) by (rcode) or\nsum(rate(coredns_dns_responses_total{job=\"coredns\",instance=~\"$instance\"}[5m])) by (rcode)", + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{"{{rcode}}"}}", + "refId": "A", + "step": 40 + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Responses (by rcode)", + "tooltip": { + "shared": true, + "sort": 2, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "pps", + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "custom": {}, + "links": [] + }, + "overrides": [] + }, + "fill": 1, + "fillGradient": 0, + "grid": {}, + "gridPos": { + "h": 7, + "w": 12, + "x": 12, + "y": 14 + }, + "hiddenSeries": false, + "id": 32, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 2, + "links": [], + "nullPointMode": "connected", + "options": { + "alertThreshold": true + }, + "percentage": false, + "pluginVersion": "7.2.0", + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "histogram_quantile(0.99, sum(rate(coredns_dns_request_duration_seconds_bucket{job=\"coredns\",instance=~\"$instance\"}[5m])) by (le, job))", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "99%", + "refId": "A", + "step": 40 + }, + { + "expr": "histogram_quantile(0.90, sum(rate(coredns_dns_request_duration_seconds_bucket{job=\"coredns\",instance=~\"$instance\"}[5m])) by (le))", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "90%", + "refId": "B", + "step": 40 + }, + { + "expr": "histogram_quantile(0.50, sum(rate(coredns_dns_request_duration_seconds_bucket{job=\"coredns\",instance=~\"$instance\"}[5m])) by (le))", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "50%", + "refId": "C", + "step": 40 + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Responses (duration)", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "cumulative" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "s", + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "custom": {}, + "links": [] + }, + "overrides": [] + }, + "fill": 1, + "fillGradient": 0, + "grid": {}, + "gridPos": { + "h": 7, + "w": 12, + "x": 0, + "y": 21 + }, + "hiddenSeries": false, + "id": 18, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 2, + "links": [], + "nullPointMode": "connected", + "options": { + "alertThreshold": true + }, + "percentage": false, + "pluginVersion": "7.2.0", + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ + { + "alias": "udp:50%", + "yaxis": 1 + }, + { + "alias": "tcp:50%", + "yaxis": 2 + }, + { + "alias": "tcp:90%", + "yaxis": 2 + }, + { + "alias": "tcp:99%", + "yaxis": 2 + } + ], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "histogram_quantile(0.99, sum(rate(coredns_dns_response_size_bytes_bucket{job=\"coredns\",instance=~\"$instance\",proto=\"udp\"}[5m])) by (le,proto)) ", + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{"{{proto}}"}}:99%", + "refId": "A", + "step": 40 + }, + { + "expr": "histogram_quantile(0.90, sum(rate(coredns_dns_response_size_bytes_bucket{job=\"coredns\",instance=~\"$instance\",proto=\"udp\"}[5m])) by (le,proto)) ", + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{"{{proto}}"}}:90%", + "refId": "B", + "step": 40 + }, + { + "expr": "histogram_quantile(0.50, sum(rate(coredns_dns_response_size_bytes_bucket{job=\"coredns\",instance=~\"$instance\",proto=\"udp\"}[5m])) by (le,proto)) ", + "hide": false, + "intervalFactor": 2, + "legendFormat": "{{"{{proto}}"}}:50%", + "metric": "", + "refId": "C", + "step": 40 + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Responses (size, udp)", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "cumulative" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "bytes", + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "logBase": 1, + "max": null, + "min": 0, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "custom": {}, + "links": [] + }, + "overrides": [] + }, + "fill": 1, + "fillGradient": 0, + "grid": {}, + "gridPos": { + "h": 7, + "w": 12, + "x": 12, + "y": 21 + }, + "hiddenSeries": false, + "id": 20, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 2, + "links": [], + "nullPointMode": "connected", + "options": { + "alertThreshold": true + }, + "percentage": false, + "pluginVersion": "7.2.0", + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ + { + "alias": "udp:50%", + "yaxis": 1 + }, + { + "alias": "tcp:50%", + "yaxis": 1 + }, + { + "alias": "tcp:90%", + "yaxis": 1 + }, + { + "alias": "tcp:99%", + "yaxis": 1 + } + ], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "histogram_quantile(0.99, sum(rate(coredns_dns_response_size_bytes_bucket{job=\"coredns\",instance=~\"$instance\",proto=\"tcp\"}[5m])) by (le,proto)) ", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{"{{proto}}"}}:99%", + "refId": "A", + "step": 40 + }, + { + "expr": "histogram_quantile(0.90, sum(rate(coredns_dns_response_size_bytes_bucket{job=\"coredns\",instance=~\"$instance\",proto=\"tcp\"}[5m])) by (le,proto)) ", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{"{{proto}}"}}:90%", + "refId": "B", + "step": 40 + }, + { + "expr": "histogram_quantile(0.50, sum(rate(coredns_dns_response_size_bytes_bucket{job=\"coredns\",instance=~\"$instance\",proto=\"tcp\"}[5m])) by (le, proto)) ", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{"{{proto}}"}}:50%", + "metric": "", + "refId": "C", + "step": 40 + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Responses (size, tcp)", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "cumulative" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "bytes", + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "logBase": 1, + "max": null, + "min": 0, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "custom": {}, + "links": [] + }, + "overrides": [] + }, + "fill": 1, + "fillGradient": 0, + "grid": {}, + "gridPos": { + "h": 7, + "w": 12, + "x": 0, + "y": 28 + }, + "hiddenSeries": false, + "id": 22, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 2, + "links": [], + "nullPointMode": "connected", + "options": { + "alertThreshold": true + }, + "percentage": false, + "pluginVersion": "7.2.0", + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": true, + "steppedLine": false, + "targets": [ + { + "expr": "sum(coredns_cache_size{job=\"coredns\",instance=~\"$instance\"}) by (type) or\nsum(coredns_cache_entries{job=\"coredns\",instance=~\"$instance\"}) by (type)", + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{"{{type}}"}}", + "refId": "A", + "step": 40 + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Cache (size)", + "tooltip": { + "shared": true, + "sort": 2, + "value_type": "cumulative" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "decbytes", + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "logBase": 1, + "max": null, + "min": 0, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "custom": {}, + "links": [] + }, + "overrides": [] + }, + "fill": 1, + "fillGradient": 0, + "grid": {}, + "gridPos": { + "h": 7, + "w": 12, + "x": 12, + "y": 28 + }, + "hiddenSeries": false, + "id": 24, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 2, + "links": [], + "nullPointMode": "connected", + "options": { + "alertThreshold": true + }, + "percentage": false, + "pluginVersion": "7.2.0", + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ + { + "alias": "misses", + "yaxis": 2 + } + ], + "spaceLength": 10, + "stack": true, + "steppedLine": false, + "targets": [ + { + "expr": "sum(rate(coredns_cache_hits_total{job=\"coredns\",instance=~\"$instance\"}[5m])) by (type)", + "hide": false, + "intervalFactor": 2, + "legendFormat": "hits:{{"{{type}}"}}", + "refId": "A", + "step": 40 + }, + { + "expr": "sum(rate(coredns_cache_misses_total{job=\"coredns\",instance=~\"$instance\"}[5m])) by (type)", + "hide": false, + "intervalFactor": 2, + "legendFormat": "misses", + "refId": "B", + "step": 40 + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Cache (hitrate)", + "tooltip": { + "shared": true, + "sort": 2, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "pps", + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "pps", + "logBase": 1, + "max": null, + "min": 0, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + } + ], + "refresh": "10s", + "schemaVersion": 26, + "style": "dark", + "tags": [ + "dns", + "coredns" + ], + "templating": { + "list": [ + { + "current": { + "text": "Prometheus", + "value": "Prometheus" + }, + "hide": 0, + "includeAll": false, + "label": "Data Source", + "multi": false, + "name": "datasource", + "options": [], + "query": "prometheus", + "queryValue": "", + "refresh": 1, + "regex": "", + "skipUrlSync": false, + "type": "datasource" + }, + { + "allValue": ".*", + "current": { + "selected": true, + "text": "All", + "value": "$__all" + }, + "datasource": "$datasource", + "definition": "label_values(up{job=\"coredns\"}, instance)", + "hide": 0, + "includeAll": true, + "label": "Instance", + "multi": false, + "name": "instance", + "options": [], + "query": "label_values(up{job=\"coredns\"}, instance)", + "refresh": 1, + "regex": "", + "skipUrlSync": false, + "sort": 3, + "tagValuesQuery": "", + "tags": [], + "tagsQuery": "", + "type": "query", + "useTags": false + } + ] + }, + "time": { + "from": "now-3h", + "to": "now" + }, + "timepicker": { + "refresh_intervals": [ + "10s", + "30s", + "1m", + "5m", + "15m", + "30m", + "1h", + "2h", + "1d" + ] + }, + "timezone": "{{ .Values.grafana.defaultDashboardsTimezone }}", + "title": "CoreDNS", + "uid": "vkQ0UHxik", + "version": 2 + } +{{- end }} diff --git a/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/templates/grafana/dashboards-1.14/k8s-resources-cluster.yaml b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/templates/grafana/dashboards-1.14/k8s-resources-cluster.yaml new file mode 100644 index 0000000000..93ee57db93 --- /dev/null +++ b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/templates/grafana/dashboards-1.14/k8s-resources-cluster.yaml @@ -0,0 +1,3088 @@ +{{- /* +Generated from 'k8s-resources-cluster' from https://raw.githubusercontent.com/prometheus-operator/kube-prometheus/a8ba97a150c75be42010c75d10b720c55e182f1a/manifests/grafana-dashboardDefinitions.yaml +Do not change in-place! In order to change this file first read following link: +https://github.com/prometheus-community/helm-charts/tree/main/charts/kube-prometheus-stack/hack +*/ -}} +{{- $kubeTargetVersion := default .Capabilities.KubeVersion.GitVersion .Values.kubeTargetVersionOverride }} +{{- if and (or .Values.grafana.enabled .Values.grafana.forceDeployDashboards) (semverCompare ">=1.14.0-0" $kubeTargetVersion) (semverCompare "<9.9.9-9" $kubeTargetVersion) .Values.grafana.defaultDashboardsEnabled }} +apiVersion: v1 +kind: ConfigMap +metadata: + namespace: {{ .Values.grafana.defaultDashboards.namespace }} + name: {{ printf "%s-%s" (include "kube-prometheus-stack.fullname" $) "k8s-resources-cluster" | trunc 63 | trimSuffix "-" }} + annotations: +{{ toYaml .Values.grafana.sidecar.dashboards.annotations | indent 4 }} + labels: + {{- if $.Values.grafana.sidecar.dashboards.label }} + {{ $.Values.grafana.sidecar.dashboards.label }}: {{ ternary $.Values.grafana.sidecar.dashboards.labelValue "1" (not (empty $.Values.grafana.sidecar.dashboards.labelValue)) | quote }} + {{- end }} + app: {{ template "kube-prometheus-stack.name" $ }}-grafana +{{ include "kube-prometheus-stack.labels" $ | indent 4 }} +data: + k8s-resources-cluster.json: |- + { + "annotations": { + "list": [ + + ] + }, + "editable": true, + "gnetId": null, + "graphTooltip": 0, + "hideControls": false, + "links": [ + + ], + "refresh": "10s", + "rows": [ + { + "collapse": false, + "height": "100px", + "panels": [ + { + "aliasColors": { + + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "format": "percentunit", + "id": 1, + "interval": "1m", + "legend": { + "alignAsTable": true, + "avg": false, + "current": false, + "max": false, + "min": false, + "rightSide": true, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ + + ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ + + ], + "spaceLength": 10, + "span": 2, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "cluster:node_cpu:ratio_rate5m{cluster=\"$cluster\"}", + "format": "time_series", + "instant": true, + "intervalFactor": 2, + "refId": "A" + } + ], + "thresholds": "70,80", + "timeFrom": null, + "timeShift": null, + "title": "CPU Utilisation", + "tooltip": { + "shared": false, + "sort": 2, + "value_type": "individual" + }, + "type": "singlestat", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ + + ] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + }, + { + "aliasColors": { + + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "format": "percentunit", + "id": 2, + "interval": "1m", + "legend": { + "alignAsTable": true, + "avg": false, + "current": false, + "max": false, + "min": false, + "rightSide": true, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ + + ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ + + ], + "spaceLength": 10, + "span": 2, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum(namespace_cpu:kube_pod_container_resource_requests:sum{cluster=\"$cluster\"}) / sum(kube_node_status_allocatable{job=\"kube-state-metrics\",resource=\"cpu\",cluster=\"$cluster\"})", + "format": "time_series", + "instant": true, + "intervalFactor": 2, + "refId": "A" + } + ], + "thresholds": "70,80", + "timeFrom": null, + "timeShift": null, + "title": "CPU Requests Commitment", + "tooltip": { + "shared": false, + "sort": 2, + "value_type": "individual" + }, + "type": "singlestat", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ + + ] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + }, + { + "aliasColors": { + + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "format": "percentunit", + "id": 3, + "interval": "1m", + "legend": { + "alignAsTable": true, + "avg": false, + "current": false, + "max": false, + "min": false, + "rightSide": true, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ + + ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ + + ], + "spaceLength": 10, + "span": 2, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum(namespace_cpu:kube_pod_container_resource_limits:sum{cluster=\"$cluster\"}) / sum(kube_node_status_allocatable{job=\"kube-state-metrics\",resource=\"cpu\",cluster=\"$cluster\"})", + "format": "time_series", + "instant": true, + "intervalFactor": 2, + "refId": "A" + } + ], + "thresholds": "70,80", + "timeFrom": null, + "timeShift": null, + "title": "CPU Limits Commitment", + "tooltip": { + "shared": false, + "sort": 2, + "value_type": "individual" + }, + "type": "singlestat", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ + + ] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + }, + { + "aliasColors": { + + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "format": "percentunit", + "id": 4, + "interval": "1m", + "legend": { + "alignAsTable": true, + "avg": false, + "current": false, + "max": false, + "min": false, + "rightSide": true, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ + + ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ + + ], + "spaceLength": 10, + "span": 2, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "1 - sum(:node_memory_MemAvailable_bytes:sum{cluster=\"$cluster\"}) / sum(node_memory_MemTotal_bytes{job=\"node-exporter\",cluster=\"$cluster\"})", + "format": "time_series", + "instant": true, + "intervalFactor": 2, + "refId": "A" + } + ], + "thresholds": "70,80", + "timeFrom": null, + "timeShift": null, + "title": "Memory Utilisation", + "tooltip": { + "shared": false, + "sort": 2, + "value_type": "individual" + }, + "type": "singlestat", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ + + ] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + }, + { + "aliasColors": { + + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "format": "percentunit", + "id": 5, + "interval": "1m", + "legend": { + "alignAsTable": true, + "avg": false, + "current": false, + "max": false, + "min": false, + "rightSide": true, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ + + ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ + + ], + "spaceLength": 10, + "span": 2, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum(namespace_memory:kube_pod_container_resource_requests:sum{cluster=\"$cluster\"}) / sum(kube_node_status_allocatable{job=\"kube-state-metrics\",resource=\"memory\",cluster=\"$cluster\"})", + "format": "time_series", + "instant": true, + "intervalFactor": 2, + "refId": "A" + } + ], + "thresholds": "70,80", + "timeFrom": null, + "timeShift": null, + "title": "Memory Requests Commitment", + "tooltip": { + "shared": false, + "sort": 2, + "value_type": "individual" + }, + "type": "singlestat", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ + + ] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + }, + { + "aliasColors": { + + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "format": "percentunit", + "id": 6, + "interval": "1m", + "legend": { + "alignAsTable": true, + "avg": false, + "current": false, + "max": false, + "min": false, + "rightSide": true, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ + + ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ + + ], + "spaceLength": 10, + "span": 2, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum(namespace_memory:kube_pod_container_resource_limits:sum{cluster=\"$cluster\"}) / sum(kube_node_status_allocatable{job=\"kube-state-metrics\",resource=\"memory\",cluster=\"$cluster\"})", + "format": "time_series", + "instant": true, + "intervalFactor": 2, + "refId": "A" + } + ], + "thresholds": "70,80", + "timeFrom": null, + "timeShift": null, + "title": "Memory Limits Commitment", + "tooltip": { + "shared": false, + "sort": 2, + "value_type": "individual" + }, + "type": "singlestat", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ + + ] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": false, + "title": "Headlines", + "titleSize": "h6" + }, + { + "collapse": false, + "height": "250px", + "panels": [ + { + "aliasColors": { + + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 10, + "id": 7, + "interval": "1m", + "legend": { + "alignAsTable": true, + "avg": false, + "current": false, + "max": false, + "min": false, + "rightSide": true, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 0, + "links": [ + + ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ + + ], + "spaceLength": 10, + "span": 12, + "stack": true, + "steppedLine": false, + "targets": [ + { + "expr": "sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{cluster=\"$cluster\"}) by (namespace)", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{`{{`}}namespace{{`}}`}}", + "legendLink": null, + "step": 10 + } + ], + "thresholds": [ + + ], + "timeFrom": null, + "timeShift": null, + "title": "CPU Usage", + "tooltip": { + "shared": false, + "sort": 2, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ + + ] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "CPU", + "titleSize": "h6" + }, + { + "collapse": false, + "height": "250px", + "panels": [ + { + "aliasColors": { + + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "id": 8, + "interval": "1m", + "legend": { + "alignAsTable": true, + "avg": false, + "current": false, + "max": false, + "min": false, + "rightSide": true, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ + + ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ + + ], + "spaceLength": 10, + "span": 12, + "stack": false, + "steppedLine": false, + "styles": [ + { + "alias": "Time", + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "pattern": "Time", + "type": "hidden" + }, + { + "alias": "Pods", + "colorMode": null, + "colors": [ + + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 0, + "link": true, + "linkTargetBlank": false, + "linkTooltip": "Drill down to pods", + "linkUrl": "d/85a562078cdf77779eaa1add43ccec1e/k8s-resources-namespace?var-datasource=$datasource&var-cluster=$cluster&var-namespace=$__cell_1", + "pattern": "Value #A", + "thresholds": [ + + ], + "type": "number", + "unit": "short" + }, + { + "alias": "Workloads", + "colorMode": null, + "colors": [ + + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 0, + "link": true, + "linkTargetBlank": false, + "linkTooltip": "Drill down to workloads", + "linkUrl": "d/a87fb0d919ec0ea5f6543124e16c42a5/k8s-resources-workloads-namespace?var-datasource=$datasource&var-cluster=$cluster&var-namespace=$__cell_1", + "pattern": "Value #B", + "thresholds": [ + + ], + "type": "number", + "unit": "short" + }, + { + "alias": "CPU Usage", + "colorMode": null, + "colors": [ + + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "link": false, + "linkTargetBlank": false, + "linkTooltip": "Drill down", + "linkUrl": "", + "pattern": "Value #C", + "thresholds": [ + + ], + "type": "number", + "unit": "short" + }, + { + "alias": "CPU Requests", + "colorMode": null, + "colors": [ + + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "link": false, + "linkTargetBlank": false, + "linkTooltip": "Drill down", + "linkUrl": "", + "pattern": "Value #D", + "thresholds": [ + + ], + "type": "number", + "unit": "short" + }, + { + "alias": "CPU Requests %", + "colorMode": null, + "colors": [ + + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "link": false, + "linkTargetBlank": false, + "linkTooltip": "Drill down", + "linkUrl": "", + "pattern": "Value #E", + "thresholds": [ + + ], + "type": "number", + "unit": "percentunit" + }, + { + "alias": "CPU Limits", + "colorMode": null, + "colors": [ + + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "link": false, + "linkTargetBlank": false, + "linkTooltip": "Drill down", + "linkUrl": "", + "pattern": "Value #F", + "thresholds": [ + + ], + "type": "number", + "unit": "short" + }, + { + "alias": "CPU Limits %", + "colorMode": null, + "colors": [ + + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "link": false, + "linkTargetBlank": false, + "linkTooltip": "Drill down", + "linkUrl": "", + "pattern": "Value #G", + "thresholds": [ + + ], + "type": "number", + "unit": "percentunit" + }, + { + "alias": "Namespace", + "colorMode": null, + "colors": [ + + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "link": true, + "linkTargetBlank": false, + "linkTooltip": "Drill down to pods", + "linkUrl": "d/85a562078cdf77779eaa1add43ccec1e/k8s-resources-namespace?var-datasource=$datasource&var-cluster=$cluster&var-namespace=$__cell", + "pattern": "namespace", + "thresholds": [ + + ], + "type": "number", + "unit": "short" + }, + { + "alias": "", + "colorMode": null, + "colors": [ + + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "pattern": "/.*/", + "thresholds": [ + + ], + "type": "string", + "unit": "short" + } + ], + "targets": [ + { + "expr": "sum(kube_pod_owner{job=\"kube-state-metrics\", cluster=\"$cluster\"}) by (namespace)", + "format": "table", + "instant": true, + "intervalFactor": 2, + "legendFormat": "", + "refId": "A", + "step": 10 + }, + { + "expr": "count(avg(namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\"}) by (workload, namespace)) by (namespace)", + "format": "table", + "instant": true, + "intervalFactor": 2, + "legendFormat": "", + "refId": "B", + "step": 10 + }, + { + "expr": "sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{cluster=\"$cluster\"}) by (namespace)", + "format": "table", + "instant": true, + "intervalFactor": 2, + "legendFormat": "", + "refId": "C", + "step": 10 + }, + { + "expr": "sum(namespace_cpu:kube_pod_container_resource_requests:sum{cluster=\"$cluster\"}) by (namespace)", + "format": "table", + "instant": true, + "intervalFactor": 2, + "legendFormat": "", + "refId": "D", + "step": 10 + }, + { + "expr": "sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{cluster=\"$cluster\"}) by (namespace) / sum(namespace_cpu:kube_pod_container_resource_requests:sum{cluster=\"$cluster\"}) by (namespace)", + "format": "table", + "instant": true, + "intervalFactor": 2, + "legendFormat": "", + "refId": "E", + "step": 10 + }, + { + "expr": "sum(namespace_cpu:kube_pod_container_resource_limits:sum{cluster=\"$cluster\"}) by (namespace)", + "format": "table", + "instant": true, + "intervalFactor": 2, + "legendFormat": "", + "refId": "F", + "step": 10 + }, + { + "expr": "sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{cluster=\"$cluster\"}) by (namespace) / sum(namespace_cpu:kube_pod_container_resource_limits:sum{cluster=\"$cluster\"}) by (namespace)", + "format": "table", + "instant": true, + "intervalFactor": 2, + "legendFormat": "", + "refId": "G", + "step": 10 + } + ], + "thresholds": [ + + ], + "timeFrom": null, + "timeShift": null, + "title": "CPU Quota", + "tooltip": { + "shared": false, + "sort": 2, + "value_type": "individual" + }, + "transform": "table", + "type": "table", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ + + ] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "CPU Quota", + "titleSize": "h6" + }, + { + "collapse": false, + "height": "250px", + "panels": [ + { + "aliasColors": { + + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 10, + "id": 9, + "interval": "1m", + "legend": { + "alignAsTable": true, + "avg": false, + "current": false, + "max": false, + "min": false, + "rightSide": true, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 0, + "links": [ + + ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ + + ], + "spaceLength": 10, + "span": 12, + "stack": true, + "steppedLine": false, + "targets": [ + { + "expr": "sum(container_memory_rss{job=\"{{ include "exporter.kubelet.jobName" . }}\", metrics_path=\"/metrics/cadvisor\", cluster=\"$cluster\", container!=\"\"}) by (namespace)", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{`{{`}}namespace{{`}}`}}", + "legendLink": null, + "step": 10 + } + ], + "thresholds": [ + + ], + "timeFrom": null, + "timeShift": null, + "title": "Memory Usage (w/o cache)", + "tooltip": { + "shared": false, + "sort": 2, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ + + ] + }, + "yaxes": [ + { + "format": "bytes", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Memory", + "titleSize": "h6" + }, + { + "collapse": false, + "height": "250px", + "panels": [ + { + "aliasColors": { + + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "id": 10, + "interval": "1m", + "legend": { + "alignAsTable": true, + "avg": false, + "current": false, + "max": false, + "min": false, + "rightSide": true, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ + + ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ + + ], + "spaceLength": 10, + "span": 12, + "stack": false, + "steppedLine": false, + "styles": [ + { + "alias": "Time", + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "pattern": "Time", + "type": "hidden" + }, + { + "alias": "Pods", + "colorMode": null, + "colors": [ + + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 0, + "link": true, + "linkTargetBlank": false, + "linkTooltip": "Drill down to pods", + "linkUrl": "d/85a562078cdf77779eaa1add43ccec1e/k8s-resources-namespace?var-datasource=$datasource&var-cluster=$cluster&var-namespace=$__cell_1", + "pattern": "Value #A", + "thresholds": [ + + ], + "type": "number", + "unit": "short" + }, + { + "alias": "Workloads", + "colorMode": null, + "colors": [ + + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 0, + "link": true, + "linkTargetBlank": false, + "linkTooltip": "Drill down to workloads", + "linkUrl": "d/a87fb0d919ec0ea5f6543124e16c42a5/k8s-resources-workloads-namespace?var-datasource=$datasource&var-cluster=$cluster&var-namespace=$__cell_1", + "pattern": "Value #B", + "thresholds": [ + + ], + "type": "number", + "unit": "short" + }, + { + "alias": "Memory Usage", + "colorMode": null, + "colors": [ + + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "link": false, + "linkTargetBlank": false, + "linkTooltip": "Drill down", + "linkUrl": "", + "pattern": "Value #C", + "thresholds": [ + + ], + "type": "number", + "unit": "bytes" + }, + { + "alias": "Memory Requests", + "colorMode": null, + "colors": [ + + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "link": false, + "linkTargetBlank": false, + "linkTooltip": "Drill down", + "linkUrl": "", + "pattern": "Value #D", + "thresholds": [ + + ], + "type": "number", + "unit": "bytes" + }, + { + "alias": "Memory Requests %", + "colorMode": null, + "colors": [ + + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "link": false, + "linkTargetBlank": false, + "linkTooltip": "Drill down", + "linkUrl": "", + "pattern": "Value #E", + "thresholds": [ + + ], + "type": "number", + "unit": "percentunit" + }, + { + "alias": "Memory Limits", + "colorMode": null, + "colors": [ + + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "link": false, + "linkTargetBlank": false, + "linkTooltip": "Drill down", + "linkUrl": "", + "pattern": "Value #F", + "thresholds": [ + + ], + "type": "number", + "unit": "bytes" + }, + { + "alias": "Memory Limits %", + "colorMode": null, + "colors": [ + + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "link": false, + "linkTargetBlank": false, + "linkTooltip": "Drill down", + "linkUrl": "", + "pattern": "Value #G", + "thresholds": [ + + ], + "type": "number", + "unit": "percentunit" + }, + { + "alias": "Namespace", + "colorMode": null, + "colors": [ + + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "link": true, + "linkTargetBlank": false, + "linkTooltip": "Drill down to pods", + "linkUrl": "d/85a562078cdf77779eaa1add43ccec1e/k8s-resources-namespace?var-datasource=$datasource&var-cluster=$cluster&var-namespace=$__cell", + "pattern": "namespace", + "thresholds": [ + + ], + "type": "number", + "unit": "short" + }, + { + "alias": "", + "colorMode": null, + "colors": [ + + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "pattern": "/.*/", + "thresholds": [ + + ], + "type": "string", + "unit": "short" + } + ], + "targets": [ + { + "expr": "sum(kube_pod_owner{job=\"kube-state-metrics\", cluster=\"$cluster\"}) by (namespace)", + "format": "table", + "instant": true, + "intervalFactor": 2, + "legendFormat": "", + "refId": "A", + "step": 10 + }, + { + "expr": "count(avg(namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\"}) by (workload, namespace)) by (namespace)", + "format": "table", + "instant": true, + "intervalFactor": 2, + "legendFormat": "", + "refId": "B", + "step": 10 + }, + { + "expr": "sum(container_memory_rss{job=\"{{ include "exporter.kubelet.jobName" . }}\", metrics_path=\"/metrics/cadvisor\", cluster=\"$cluster\", container!=\"\"}) by (namespace)", + "format": "table", + "instant": true, + "intervalFactor": 2, + "legendFormat": "", + "refId": "C", + "step": 10 + }, + { + "expr": "sum(namespace_memory:kube_pod_container_resource_requests:sum{cluster=\"$cluster\"}) by (namespace)", + "format": "table", + "instant": true, + "intervalFactor": 2, + "legendFormat": "", + "refId": "D", + "step": 10 + }, + { + "expr": "sum(container_memory_rss{job=\"{{ include "exporter.kubelet.jobName" . }}\", metrics_path=\"/metrics/cadvisor\", cluster=\"$cluster\", container!=\"\"}) by (namespace) / sum(namespace_memory:kube_pod_container_resource_requests:sum{cluster=\"$cluster\"}) by (namespace)", + "format": "table", + "instant": true, + "intervalFactor": 2, + "legendFormat": "", + "refId": "E", + "step": 10 + }, + { + "expr": "sum(namespace_memory:kube_pod_container_resource_limits:sum{cluster=\"$cluster\"}) by (namespace)", + "format": "table", + "instant": true, + "intervalFactor": 2, + "legendFormat": "", + "refId": "F", + "step": 10 + }, + { + "expr": "sum(container_memory_rss{job=\"{{ include "exporter.kubelet.jobName" . }}\", metrics_path=\"/metrics/cadvisor\", cluster=\"$cluster\", container!=\"\"}) by (namespace) / sum(namespace_memory:kube_pod_container_resource_limits:sum{cluster=\"$cluster\"}) by (namespace)", + "format": "table", + "instant": true, + "intervalFactor": 2, + "legendFormat": "", + "refId": "G", + "step": 10 + } + ], + "thresholds": [ + + ], + "timeFrom": null, + "timeShift": null, + "title": "Requests by Namespace", + "tooltip": { + "shared": false, + "sort": 2, + "value_type": "individual" + }, + "transform": "table", + "type": "table", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ + + ] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Memory Requests", + "titleSize": "h6" + }, + { + "collapse": false, + "height": "250px", + "panels": [ + { + "aliasColors": { + + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "id": 11, + "interval": "1m", + "legend": { + "alignAsTable": true, + "avg": false, + "current": false, + "max": false, + "min": false, + "rightSide": true, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ + + ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ + + ], + "spaceLength": 10, + "span": 12, + "stack": false, + "steppedLine": false, + "styles": [ + { + "alias": "Time", + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "pattern": "Time", + "type": "hidden" + }, + { + "alias": "Current Receive Bandwidth", + "colorMode": null, + "colors": [ + + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "link": false, + "linkTargetBlank": false, + "linkTooltip": "Drill down", + "linkUrl": "", + "pattern": "Value #A", + "thresholds": [ + + ], + "type": "number", + "unit": "Bps" + }, + { + "alias": "Current Transmit Bandwidth", + "colorMode": null, + "colors": [ + + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "link": false, + "linkTargetBlank": false, + "linkTooltip": "Drill down", + "linkUrl": "", + "pattern": "Value #B", + "thresholds": [ + + ], + "type": "number", + "unit": "Bps" + }, + { + "alias": "Rate of Received Packets", + "colorMode": null, + "colors": [ + + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "link": false, + "linkTargetBlank": false, + "linkTooltip": "Drill down", + "linkUrl": "", + "pattern": "Value #C", + "thresholds": [ + + ], + "type": "number", + "unit": "pps" + }, + { + "alias": "Rate of Transmitted Packets", + "colorMode": null, + "colors": [ + + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "link": false, + "linkTargetBlank": false, + "linkTooltip": "Drill down", + "linkUrl": "", + "pattern": "Value #D", + "thresholds": [ + + ], + "type": "number", + "unit": "pps" + }, + { + "alias": "Rate of Received Packets Dropped", + "colorMode": null, + "colors": [ + + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "link": false, + "linkTargetBlank": false, + "linkTooltip": "Drill down", + "linkUrl": "", + "pattern": "Value #E", + "thresholds": [ + + ], + "type": "number", + "unit": "pps" + }, + { + "alias": "Rate of Transmitted Packets Dropped", + "colorMode": null, + "colors": [ + + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "link": false, + "linkTargetBlank": false, + "linkTooltip": "Drill down", + "linkUrl": "", + "pattern": "Value #F", + "thresholds": [ + + ], + "type": "number", + "unit": "pps" + }, + { + "alias": "Namespace", + "colorMode": null, + "colors": [ + + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "link": true, + "linkTargetBlank": false, + "linkTooltip": "Drill down to pods", + "linkUrl": "d/85a562078cdf77779eaa1add43ccec1e/k8s-resources-namespace?var-datasource=$datasource&var-cluster=$cluster&var-namespace=$__cell", + "pattern": "namespace", + "thresholds": [ + + ], + "type": "number", + "unit": "short" + }, + { + "alias": "", + "colorMode": null, + "colors": [ + + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "pattern": "/.*/", + "thresholds": [ + + ], + "type": "string", + "unit": "short" + } + ], + "targets": [ + { + "expr": "sum(irate(container_network_receive_bytes_total{job=\"{{ include "exporter.kubelet.jobName" . }}\", metrics_path=\"/metrics/cadvisor\", cluster=\"$cluster\", namespace=~\".+\"}[$__rate_interval])) by (namespace)", + "format": "table", + "instant": true, + "intervalFactor": 2, + "legendFormat": "", + "refId": "A", + "step": 10 + }, + { + "expr": "sum(irate(container_network_transmit_bytes_total{job=\"{{ include "exporter.kubelet.jobName" . }}\", metrics_path=\"/metrics/cadvisor\", cluster=\"$cluster\", namespace=~\".+\"}[$__rate_interval])) by (namespace)", + "format": "table", + "instant": true, + "intervalFactor": 2, + "legendFormat": "", + "refId": "B", + "step": 10 + }, + { + "expr": "sum(irate(container_network_receive_packets_total{job=\"{{ include "exporter.kubelet.jobName" . }}\", metrics_path=\"/metrics/cadvisor\", cluster=\"$cluster\", namespace=~\".+\"}[$__rate_interval])) by (namespace)", + "format": "table", + "instant": true, + "intervalFactor": 2, + "legendFormat": "", + "refId": "C", + "step": 10 + }, + { + "expr": "sum(irate(container_network_transmit_packets_total{job=\"{{ include "exporter.kubelet.jobName" . }}\", metrics_path=\"/metrics/cadvisor\", cluster=\"$cluster\", namespace=~\".+\"}[$__rate_interval])) by (namespace)", + "format": "table", + "instant": true, + "intervalFactor": 2, + "legendFormat": "", + "refId": "D", + "step": 10 + }, + { + "expr": "sum(irate(container_network_receive_packets_dropped_total{job=\"{{ include "exporter.kubelet.jobName" . }}\", metrics_path=\"/metrics/cadvisor\", cluster=\"$cluster\", namespace=~\".+\"}[$__rate_interval])) by (namespace)", + "format": "table", + "instant": true, + "intervalFactor": 2, + "legendFormat": "", + "refId": "E", + "step": 10 + }, + { + "expr": "sum(irate(container_network_transmit_packets_dropped_total{job=\"{{ include "exporter.kubelet.jobName" . }}\", metrics_path=\"/metrics/cadvisor\", cluster=\"$cluster\", namespace=~\".+\"}[$__rate_interval])) by (namespace)", + "format": "table", + "instant": true, + "intervalFactor": 2, + "legendFormat": "", + "refId": "F", + "step": 10 + } + ], + "thresholds": [ + + ], + "timeFrom": null, + "timeShift": null, + "title": "Current Network Usage", + "tooltip": { + "shared": false, + "sort": 2, + "value_type": "individual" + }, + "transform": "table", + "type": "table", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ + + ] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Current Network Usage", + "titleSize": "h6" + }, + { + "collapse": false, + "height": "250px", + "panels": [ + { + "aliasColors": { + + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 10, + "id": 12, + "interval": "1m", + "legend": { + "alignAsTable": true, + "avg": false, + "current": false, + "max": false, + "min": false, + "rightSide": true, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 0, + "links": [ + + ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ + + ], + "spaceLength": 10, + "span": 6, + "stack": true, + "steppedLine": false, + "targets": [ + { + "expr": "sum(irate(container_network_receive_bytes_total{job=\"{{ include "exporter.kubelet.jobName" . }}\", metrics_path=\"/metrics/cadvisor\", cluster=\"$cluster\", namespace=~\".+\"}[$__rate_interval])) by (namespace)", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{`{{`}}namespace{{`}}`}}", + "legendLink": null, + "step": 10 + } + ], + "thresholds": [ + + ], + "timeFrom": null, + "timeShift": null, + "title": "Receive Bandwidth", + "tooltip": { + "shared": false, + "sort": 2, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ + + ] + }, + "yaxes": [ + { + "format": "Bps", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + }, + { + "aliasColors": { + + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 10, + "id": 13, + "interval": "1m", + "legend": { + "alignAsTable": true, + "avg": false, + "current": false, + "max": false, + "min": false, + "rightSide": true, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 0, + "links": [ + + ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ + + ], + "spaceLength": 10, + "span": 6, + "stack": true, + "steppedLine": false, + "targets": [ + { + "expr": "sum(irate(container_network_transmit_bytes_total{job=\"{{ include "exporter.kubelet.jobName" . }}\", metrics_path=\"/metrics/cadvisor\", cluster=\"$cluster\", namespace=~\".+\"}[$__rate_interval])) by (namespace)", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{`{{`}}namespace{{`}}`}}", + "legendLink": null, + "step": 10 + } + ], + "thresholds": [ + + ], + "timeFrom": null, + "timeShift": null, + "title": "Transmit Bandwidth", + "tooltip": { + "shared": false, + "sort": 2, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ + + ] + }, + "yaxes": [ + { + "format": "Bps", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Bandwidth", + "titleSize": "h6" + }, + { + "collapse": false, + "height": "250px", + "panels": [ + { + "aliasColors": { + + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 10, + "id": 14, + "interval": "1m", + "legend": { + "alignAsTable": true, + "avg": false, + "current": false, + "max": false, + "min": false, + "rightSide": true, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 0, + "links": [ + + ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ + + ], + "spaceLength": 10, + "span": 6, + "stack": true, + "steppedLine": false, + "targets": [ + { + "expr": "avg(irate(container_network_receive_bytes_total{job=\"{{ include "exporter.kubelet.jobName" . }}\", metrics_path=\"/metrics/cadvisor\", cluster=\"$cluster\", namespace=~\".+\"}[$__rate_interval])) by (namespace)", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{`{{`}}namespace{{`}}`}}", + "legendLink": null, + "step": 10 + } + ], + "thresholds": [ + + ], + "timeFrom": null, + "timeShift": null, + "title": "Average Container Bandwidth by Namespace: Received", + "tooltip": { + "shared": false, + "sort": 2, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ + + ] + }, + "yaxes": [ + { + "format": "Bps", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + }, + { + "aliasColors": { + + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 10, + "id": 15, + "interval": "1m", + "legend": { + "alignAsTable": true, + "avg": false, + "current": false, + "max": false, + "min": false, + "rightSide": true, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 0, + "links": [ + + ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ + + ], + "spaceLength": 10, + "span": 6, + "stack": true, + "steppedLine": false, + "targets": [ + { + "expr": "avg(irate(container_network_transmit_bytes_total{job=\"{{ include "exporter.kubelet.jobName" . }}\", metrics_path=\"/metrics/cadvisor\", cluster=\"$cluster\", namespace=~\".+\"}[$__rate_interval])) by (namespace)", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{`{{`}}namespace{{`}}`}}", + "legendLink": null, + "step": 10 + } + ], + "thresholds": [ + + ], + "timeFrom": null, + "timeShift": null, + "title": "Average Container Bandwidth by Namespace: Transmitted", + "tooltip": { + "shared": false, + "sort": 2, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ + + ] + }, + "yaxes": [ + { + "format": "Bps", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Average Container Bandwidth by Namespace", + "titleSize": "h6" + }, + { + "collapse": false, + "height": "250px", + "panels": [ + { + "aliasColors": { + + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 10, + "id": 16, + "interval": "1m", + "legend": { + "alignAsTable": true, + "avg": false, + "current": false, + "max": false, + "min": false, + "rightSide": true, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 0, + "links": [ + + ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ + + ], + "spaceLength": 10, + "span": 6, + "stack": true, + "steppedLine": false, + "targets": [ + { + "expr": "sum(irate(container_network_receive_packets_total{job=\"{{ include "exporter.kubelet.jobName" . }}\", metrics_path=\"/metrics/cadvisor\", cluster=\"$cluster\", namespace=~\".+\"}[$__rate_interval])) by (namespace)", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{`{{`}}namespace{{`}}`}}", + "legendLink": null, + "step": 10 + } + ], + "thresholds": [ + + ], + "timeFrom": null, + "timeShift": null, + "title": "Rate of Received Packets", + "tooltip": { + "shared": false, + "sort": 2, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ + + ] + }, + "yaxes": [ + { + "format": "pps", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + }, + { + "aliasColors": { + + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 10, + "id": 17, + "interval": "1m", + "legend": { + "alignAsTable": true, + "avg": false, + "current": false, + "max": false, + "min": false, + "rightSide": true, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 0, + "links": [ + + ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ + + ], + "spaceLength": 10, + "span": 6, + "stack": true, + "steppedLine": false, + "targets": [ + { + "expr": "sum(irate(container_network_transmit_packets_total{job=\"{{ include "exporter.kubelet.jobName" . }}\", metrics_path=\"/metrics/cadvisor\", cluster=\"$cluster\", namespace=~\".+\"}[$__rate_interval])) by (namespace)", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{`{{`}}namespace{{`}}`}}", + "legendLink": null, + "step": 10 + } + ], + "thresholds": [ + + ], + "timeFrom": null, + "timeShift": null, + "title": "Rate of Transmitted Packets", + "tooltip": { + "shared": false, + "sort": 2, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ + + ] + }, + "yaxes": [ + { + "format": "pps", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Rate of Packets", + "titleSize": "h6" + }, + { + "collapse": false, + "height": "250px", + "panels": [ + { + "aliasColors": { + + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 10, + "id": 18, + "interval": "1m", + "legend": { + "alignAsTable": true, + "avg": false, + "current": false, + "max": false, + "min": false, + "rightSide": true, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 0, + "links": [ + + ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ + + ], + "spaceLength": 10, + "span": 6, + "stack": true, + "steppedLine": false, + "targets": [ + { + "expr": "sum(irate(container_network_receive_packets_dropped_total{job=\"{{ include "exporter.kubelet.jobName" . }}\", metrics_path=\"/metrics/cadvisor\", cluster=\"$cluster\", namespace=~\".+\"}[$__rate_interval])) by (namespace)", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{`{{`}}namespace{{`}}`}}", + "legendLink": null, + "step": 10 + } + ], + "thresholds": [ + + ], + "timeFrom": null, + "timeShift": null, + "title": "Rate of Received Packets Dropped", + "tooltip": { + "shared": false, + "sort": 2, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ + + ] + }, + "yaxes": [ + { + "format": "pps", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + }, + { + "aliasColors": { + + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 10, + "id": 19, + "interval": "1m", + "legend": { + "alignAsTable": true, + "avg": false, + "current": false, + "max": false, + "min": false, + "rightSide": true, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 0, + "links": [ + + ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ + + ], + "spaceLength": 10, + "span": 6, + "stack": true, + "steppedLine": false, + "targets": [ + { + "expr": "sum(irate(container_network_transmit_packets_dropped_total{job=\"{{ include "exporter.kubelet.jobName" . }}\", metrics_path=\"/metrics/cadvisor\", cluster=\"$cluster\", namespace=~\".+\"}[$__rate_interval])) by (namespace)", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{`{{`}}namespace{{`}}`}}", + "legendLink": null, + "step": 10 + } + ], + "thresholds": [ + + ], + "timeFrom": null, + "timeShift": null, + "title": "Rate of Transmitted Packets Dropped", + "tooltip": { + "shared": false, + "sort": 2, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ + + ] + }, + "yaxes": [ + { + "format": "pps", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Rate of Packets Dropped", + "titleSize": "h6" + }, + { + "collapse": false, + "height": "250px", + "panels": [ + { + "aliasColors": { + + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "decimals": -1, + "fill": 10, + "id": 20, + "interval": "1m", + "legend": { + "alignAsTable": true, + "avg": false, + "current": false, + "max": false, + "min": false, + "rightSide": true, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 0, + "links": [ + + ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ + + ], + "spaceLength": 10, + "span": 6, + "stack": true, + "steppedLine": false, + "targets": [ + { + "expr": "ceil(sum by(namespace) (rate(container_fs_reads_total{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", container!=\"\", device=~\"(/dev/)?(mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|md.+|dasd.+)\", cluster=\"$cluster\", namespace!=\"\"}[$__rate_interval]) + rate(container_fs_writes_total{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", container!=\"\", cluster=\"$cluster\", namespace!=\"\"}[$__rate_interval])))", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{`{{`}}namespace{{`}}`}}", + "legendLink": null, + "step": 10 + } + ], + "thresholds": [ + + ], + "timeFrom": null, + "timeShift": null, + "title": "IOPS(Reads+Writes)", + "tooltip": { + "shared": false, + "sort": 2, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ + + ] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + }, + { + "aliasColors": { + + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 10, + "id": 21, + "interval": "1m", + "legend": { + "alignAsTable": true, + "avg": false, + "current": false, + "max": false, + "min": false, + "rightSide": true, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 0, + "links": [ + + ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ + + ], + "spaceLength": 10, + "span": 6, + "stack": true, + "steppedLine": false, + "targets": [ + { + "expr": "sum by(namespace) (rate(container_fs_reads_bytes_total{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", container!=\"\", device=~\"(/dev/)?(mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|md.+|dasd.+)\", cluster=\"$cluster\", namespace!=\"\"}[$__rate_interval]) + rate(container_fs_writes_bytes_total{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", container!=\"\", cluster=\"$cluster\", namespace!=\"\"}[$__rate_interval]))", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{`{{`}}namespace{{`}}`}}", + "legendLink": null, + "step": 10 + } + ], + "thresholds": [ + + ], + "timeFrom": null, + "timeShift": null, + "title": "ThroughPut(Read+Write)", + "tooltip": { + "shared": false, + "sort": 2, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ + + ] + }, + "yaxes": [ + { + "format": "Bps", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Storage IO", + "titleSize": "h6" + }, + { + "collapse": false, + "height": "250px", + "panels": [ + { + "aliasColors": { + + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "id": 22, + "interval": "1m", + "legend": { + "alignAsTable": true, + "avg": false, + "current": false, + "max": false, + "min": false, + "rightSide": true, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ + + ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ + + ], + "sort": { + "col": 4, + "desc": true + }, + "spaceLength": 10, + "span": 12, + "stack": false, + "steppedLine": false, + "styles": [ + { + "alias": "Time", + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "pattern": "Time", + "type": "hidden" + }, + { + "alias": "IOPS(Reads)", + "colorMode": null, + "colors": [ + + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": -1, + "link": false, + "linkTargetBlank": false, + "linkTooltip": "Drill down", + "linkUrl": "", + "pattern": "Value #A", + "thresholds": [ + + ], + "type": "number", + "unit": "short" + }, + { + "alias": "IOPS(Writes)", + "colorMode": null, + "colors": [ + + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": -1, + "link": false, + "linkTargetBlank": false, + "linkTooltip": "Drill down", + "linkUrl": "", + "pattern": "Value #B", + "thresholds": [ + + ], + "type": "number", + "unit": "short" + }, + { + "alias": "IOPS(Reads + Writes)", + "colorMode": null, + "colors": [ + + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": -1, + "link": false, + "linkTargetBlank": false, + "linkTooltip": "Drill down", + "linkUrl": "", + "pattern": "Value #C", + "thresholds": [ + + ], + "type": "number", + "unit": "short" + }, + { + "alias": "Throughput(Read)", + "colorMode": null, + "colors": [ + + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "link": false, + "linkTargetBlank": false, + "linkTooltip": "Drill down", + "linkUrl": "", + "pattern": "Value #D", + "thresholds": [ + + ], + "type": "number", + "unit": "Bps" + }, + { + "alias": "Throughput(Write)", + "colorMode": null, + "colors": [ + + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "link": false, + "linkTargetBlank": false, + "linkTooltip": "Drill down", + "linkUrl": "", + "pattern": "Value #E", + "thresholds": [ + + ], + "type": "number", + "unit": "Bps" + }, + { + "alias": "Throughput(Read + Write)", + "colorMode": null, + "colors": [ + + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "link": false, + "linkTargetBlank": false, + "linkTooltip": "Drill down", + "linkUrl": "", + "pattern": "Value #F", + "thresholds": [ + + ], + "type": "number", + "unit": "Bps" + }, + { + "alias": "Namespace", + "colorMode": null, + "colors": [ + + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "link": true, + "linkTargetBlank": false, + "linkTooltip": "Drill down to pods", + "linkUrl": "d/85a562078cdf77779eaa1add43ccec1e/k8s-resources-namespace?var-datasource=$datasource&var-cluster=$cluster&var-namespace=$__cell", + "pattern": "namespace", + "thresholds": [ + + ], + "type": "number", + "unit": "short" + }, + { + "alias": "", + "colorMode": null, + "colors": [ + + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "pattern": "/.*/", + "thresholds": [ + + ], + "type": "string", + "unit": "short" + } + ], + "targets": [ + { + "expr": "sum by(namespace) (rate(container_fs_reads_total{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", device=~\"(/dev/)?(mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|md.+|dasd.+)\", container!=\"\", cluster=\"$cluster\", namespace!=\"\"}[$__rate_interval]))", + "format": "table", + "instant": true, + "intervalFactor": 2, + "legendFormat": "", + "refId": "A", + "step": 10 + }, + { + "expr": "sum by(namespace) (rate(container_fs_writes_total{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", device=~\"(/dev/)?(mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|md.+|dasd.+)\", container!=\"\", cluster=\"$cluster\", namespace!=\"\"}[$__rate_interval]))", + "format": "table", + "instant": true, + "intervalFactor": 2, + "legendFormat": "", + "refId": "B", + "step": 10 + }, + { + "expr": "sum by(namespace) (rate(container_fs_reads_total{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", device=~\"(/dev/)?(mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|md.+|dasd.+)\", container!=\"\", cluster=\"$cluster\", namespace!=\"\"}[$__rate_interval]) + rate(container_fs_writes_total{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", device=~\"(/dev/)?(mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|md.+|dasd.+)\", container!=\"\", cluster=\"$cluster\", namespace!=\"\"}[$__rate_interval]))", + "format": "table", + "instant": true, + "intervalFactor": 2, + "legendFormat": "", + "refId": "C", + "step": 10 + }, + { + "expr": "sum by(namespace) (rate(container_fs_reads_bytes_total{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", device=~\"(/dev/)?(mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|md.+|dasd.+)\", container!=\"\", cluster=\"$cluster\", namespace!=\"\"}[$__rate_interval]))", + "format": "table", + "instant": true, + "intervalFactor": 2, + "legendFormat": "", + "refId": "D", + "step": 10 + }, + { + "expr": "sum by(namespace) (rate(container_fs_writes_bytes_total{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", device=~\"(/dev/)?(mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|md.+|dasd.+)\", container!=\"\", cluster=\"$cluster\", namespace!=\"\"}[$__rate_interval]))", + "format": "table", + "instant": true, + "intervalFactor": 2, + "legendFormat": "", + "refId": "E", + "step": 10 + }, + { + "expr": "sum by(namespace) (rate(container_fs_reads_bytes_total{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", device=~\"(/dev/)?(mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|md.+|dasd.+)\", container!=\"\", cluster=\"$cluster\", namespace!=\"\"}[$__rate_interval]) + rate(container_fs_writes_bytes_total{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", device=~\"(/dev/)?(mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|md.+|dasd.+)\", container!=\"\", cluster=\"$cluster\", namespace!=\"\"}[$__rate_interval]))", + "format": "table", + "instant": true, + "intervalFactor": 2, + "legendFormat": "", + "refId": "F", + "step": 10 + } + ], + "thresholds": [ + + ], + "timeFrom": null, + "timeShift": null, + "title": "Current Storage IO", + "tooltip": { + "shared": false, + "sort": 2, + "value_type": "individual" + }, + "transform": "table", + "type": "table", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ + + ] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Storage IO - Distribution", + "titleSize": "h6" + } + ], + "schemaVersion": 14, + "style": "dark", + "tags": [ + "kubernetes-mixin" + ], + "templating": { + "list": [ + { + "current": { + "text": "Prometheus", + "value": "Prometheus" + }, + "hide": 0, + "label": "Data Source", + "name": "datasource", + "options": [ + + ], + "query": "prometheus", + "refresh": 1, + "regex": "", + "type": "datasource" + }, + { + "allValue": null, + "current": { + "text": "", + "value": "" + }, + "datasource": "$datasource", + "hide": {{ if .Values.grafana.sidecar.dashboards.multicluster.global.enabled }}0{{ else }}2{{ end }}, + "includeAll": false, + "label": null, + "multi": false, + "name": "cluster", + "options": [ + + ], + "query": "label_values(up{job=\"{{ include "exporter.kubelet.jobName" . }}\", metrics_path=\"/metrics/cadvisor\"}, cluster)", + "refresh": 2, + "regex": "", + "sort": 1, + "tagValuesQuery": "", + "tags": [ + + ], + "tagsQuery": "", + "type": "query", + "useTags": false + } + ] + }, + "time": { + "from": "now-1h", + "to": "now" + }, + "timepicker": { + "refresh_intervals": [ + "5s", + "10s", + "30s", + "1m", + "5m", + "15m", + "30m", + "1h", + "2h", + "1d" + ], + "time_options": [ + "5m", + "15m", + "1h", + "6h", + "12h", + "24h", + "2d", + "7d", + "30d" + ] + }, + "timezone": "{{ .Values.grafana.defaultDashboardsTimezone }}", + "title": "Kubernetes / Compute Resources / Cluster", + "uid": "efa86fd1d0c121a26444b636a3f509a8", + "version": 0 + } +{{- end }} \ No newline at end of file diff --git a/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/templates/grafana/dashboards-1.14/k8s-resources-multicluster.yaml b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/templates/grafana/dashboards-1.14/k8s-resources-multicluster.yaml new file mode 100644 index 0000000000..9c295831a5 --- /dev/null +++ b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/templates/grafana/dashboards-1.14/k8s-resources-multicluster.yaml @@ -0,0 +1,24 @@ +{{- /* +Generated from 'k8s-resources-multicluster' from https://raw.githubusercontent.com/prometheus-operator/kube-prometheus/a8ba97a150c75be42010c75d10b720c55e182f1a/manifests/grafana-dashboardDefinitions.yaml +Do not change in-place! In order to change this file first read following link: +https://github.com/prometheus-community/helm-charts/tree/main/charts/kube-prometheus-stack/hack +*/ -}} +{{- $kubeTargetVersion := default .Capabilities.KubeVersion.GitVersion .Values.kubeTargetVersionOverride }} +{{- if and (or .Values.grafana.enabled .Values.grafana.forceDeployDashboards) (semverCompare ">=1.14.0-0" $kubeTargetVersion) (semverCompare "<9.9.9-9" $kubeTargetVersion) .Values.grafana.defaultDashboardsEnabled }} +apiVersion: v1 +kind: ConfigMap +metadata: + namespace: {{ template "kube-prometheus-stack-grafana.namespace" . }} + name: {{ printf "%s-%s" (include "kube-prometheus-stack.fullname" $) "k8s-resources-multicluster" | trunc 63 | trimSuffix "-" }} + annotations: +{{ toYaml .Values.grafana.sidecar.dashboards.annotations | indent 4 }} + labels: + {{- if $.Values.grafana.sidecar.dashboards.label }} + {{ $.Values.grafana.sidecar.dashboards.label }}: {{ ternary $.Values.grafana.sidecar.dashboards.labelValue "1" (not (empty $.Values.grafana.sidecar.dashboards.labelValue)) | quote }} + {{- end }} + app: {{ template "kube-prometheus-stack.name" $ }}-grafana +{{ include "kube-prometheus-stack.labels" $ | indent 4 }} +data: + k8s-resources-multicluster.json: |- + {{`{"annotations":{"list":[]},"editable":`}}{{ .Values.grafana.defaultDashboardsEditable }}{{`,"gnetId":null,"graphTooltip":0,"hideControls":false,"links":[],"refresh":"10s","rows":[{"collapse":false,"height":"100px","panels":[{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"$datasource","fill":1,"format":"percentunit","id":1,"interval":"1m","legend":{"alignAsTable":true,"avg":false,"current":false,"max":false,"min":false,"rightSide":true,"show":true,"total":false,"values":false},"lines":true,"linewidth":1,"links":[],"nullPointMode":"null as zero","percentage":false,"pointradius":5,"points":false,"renderer":"flot","seriesOverrides":[],"spaceLength":10,"span":2,"stack":false,"steppedLine":false,"targets":[{"expr":"cluster:node_cpu:ratio_rate5m","format":"time_series","instant":true,"refId":"A"}],"thresholds":"70,80","timeFrom":null,"timeShift":null,"title":"CPU Utilisation","tooltip":{"shared":false,"sort":2,"value_type":"individual"},"type":"singlestat","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"short","label":null,"logBase":1,"max":null,"min":0,"show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":false}]},{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"$datasource","fill":1,"format":"percentunit","id":2,"interval":"1m","legend":{"alignAsTable":true,"avg":false,"current":false,"max":false,"min":false,"rightSide":true,"show":true,"total":false,"values":false},"lines":true,"linewidth":1,"links":[],"nullPointMode":"null as zero","percentage":false,"pointradius":5,"points":false,"renderer":"flot","seriesOverrides":[],"spaceLength":10,"span":2,"stack":false,"steppedLine":false,"targets":[{"expr":"sum(kube_pod_container_resource_requests{job=\"kube-state-metrics\", resource=\"cpu\"}) / sum(kube_node_status_allocatable{job=\"kube-state-metrics\", resource=\"cpu\"})","format":"time_series","instant":true,"refId":"A"}],"thresholds":"70,80","timeFrom":null,"timeShift":null,"title":"CPU Requests Commitment","tooltip":{"shared":false,"sort":2,"value_type":"individual"},"type":"singlestat","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"short","label":null,"logBase":1,"max":null,"min":0,"show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":false}]},{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"$datasource","fill":1,"format":"percentunit","id":3,"interval":"1m","legend":{"alignAsTable":true,"avg":false,"current":false,"max":false,"min":false,"rightSide":true,"show":true,"total":false,"values":false},"lines":true,"linewidth":1,"links":[],"nullPointMode":"null as zero","percentage":false,"pointradius":5,"points":false,"renderer":"flot","seriesOverrides":[],"spaceLength":10,"span":2,"stack":false,"steppedLine":false,"targets":[{"expr":"sum(kube_pod_container_resource_limits{job=\"kube-state-metrics\", resource=\"cpu\"}) / sum(kube_node_status_allocatable{job=\"kube-state-metrics\", resource=\"cpu\"})","format":"time_series","instant":true,"refId":"A"}],"thresholds":"70,80","timeFrom":null,"timeShift":null,"title":"CPU Limits Commitment","tooltip":{"shared":false,"sort":2,"value_type":"individual"},"type":"singlestat","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"short","label":null,"logBase":1,"max":null,"min":0,"show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":false}]},{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"$datasource","fill":1,"format":"percentunit","id":4,"interval":"1m","legend":{"alignAsTable":true,"avg":false,"current":false,"max":false,"min":false,"rightSide":true,"show":true,"total":false,"values":false},"lines":true,"linewidth":1,"links":[],"nullPointMode":"null as zero","percentage":false,"pointradius":5,"points":false,"renderer":"flot","seriesOverrides":[],"spaceLength":10,"span":2,"stack":false,"steppedLine":false,"targets":[{"expr":"1 - sum(:node_memory_MemAvailable_bytes:sum) / sum(node_memory_MemTotal_bytes{job=\"node-exporter\"})","format":"time_series","instant":true,"refId":"A"}],"thresholds":"70,80","timeFrom":null,"timeShift":null,"title":"Memory Utilisation","tooltip":{"shared":false,"sort":2,"value_type":"individual"},"type":"singlestat","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"short","label":null,"logBase":1,"max":null,"min":0,"show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":false}]},{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"$datasource","fill":1,"format":"percentunit","id":5,"interval":"1m","legend":{"alignAsTable":true,"avg":false,"current":false,"max":false,"min":false,"rightSide":true,"show":true,"total":false,"values":false},"lines":true,"linewidth":1,"links":[],"nullPointMode":"null as zero","percentage":false,"pointradius":5,"points":false,"renderer":"flot","seriesOverrides":[],"spaceLength":10,"span":2,"stack":false,"steppedLine":false,"targets":[{"expr":"sum(kube_pod_container_resource_requests{job=\"kube-state-metrics\", resource=\"memory\"}) / sum(kube_node_status_allocatable{job=\"kube-state-metrics\", resource=\"memory\"})","format":"time_series","instant":true,"refId":"A"}],"thresholds":"70,80","timeFrom":null,"timeShift":null,"title":"Memory Requests Commitment","tooltip":{"shared":false,"sort":2,"value_type":"individual"},"type":"singlestat","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"short","label":null,"logBase":1,"max":null,"min":0,"show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":false}]},{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"$datasource","fill":1,"format":"percentunit","id":6,"interval":"1m","legend":{"alignAsTable":true,"avg":false,"current":false,"max":false,"min":false,"rightSide":true,"show":true,"total":false,"values":false},"lines":true,"linewidth":1,"links":[],"nullPointMode":"null as zero","percentage":false,"pointradius":5,"points":false,"renderer":"flot","seriesOverrides":[],"spaceLength":10,"span":2,"stack":false,"steppedLine":false,"targets":[{"expr":"sum(kube_pod_container_resource_limits{job=\"kube-state-metrics\", resource=\"memory\"}) / sum(kube_node_status_allocatable{job=\"kube-state-metrics\", resource=\"memory\"})","format":"time_series","instant":true,"refId":"A"}],"thresholds":"70,80","timeFrom":null,"timeShift":null,"title":"Memory Limits Commitment","tooltip":{"shared":false,"sort":2,"value_type":"individual"},"type":"singlestat","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"short","label":null,"logBase":1,"max":null,"min":0,"show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":false}]}],"repeat":null,"repeatIteration":null,"repeatRowId":null,"showTitle":false,"title":"Headlines","titleSize":"h6"},{"collapse":false,"height":"250px","panels":[{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"$datasource","fill":0,"id":7,"interval":"1m","legend":{"alignAsTable":true,"avg":false,"current":false,"max":false,"min":false,"rightSide":true,"show":true,"total":false,"values":false},"lines":true,"linewidth":2,"links":[],"nullPointMode":"null as zero","percentage":false,"pointradius":5,"points":false,"renderer":"flot","seriesOverrides":[],"spaceLength":10,"span":12,"stack":false,"steppedLine":false,"targets":[{"expr":"sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate) by (cluster)","format":"time_series","legendFormat":"{{cluster}}","legendLink":null}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"CPU Usage","tooltip":{"shared":false,"sort":2,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"short","label":null,"logBase":1,"max":null,"min":0,"show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":false}]}],"repeat":null,"repeatIteration":null,"repeatRowId":null,"showTitle":true,"title":"CPU","titleSize":"h6"},{"collapse":false,"height":"250px","panels":[{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"$datasource","fill":1,"id":8,"interval":"1m","legend":{"alignAsTable":true,"avg":false,"current":false,"max":false,"min":false,"rightSide":true,"show":true,"total":false,"values":false},"lines":true,"linewidth":1,"links":[],"nullPointMode":"null as zero","percentage":false,"pointradius":5,"points":false,"renderer":"flot","seriesOverrides":[],"spaceLength":10,"span":12,"stack":false,"steppedLine":false,"styles":[{"alias":"Time","dateFormat":"YYYY-MM-DD HH:mm:ss","pattern":"Time","type":"hidden"},{"alias":"CPU Usage","colorMode":null,"colors":[],"dateFormat":"YYYY-MM-DD HH:mm:ss","decimals":2,"link":false,"linkTargetBlank":false,"linkTooltip":"Drill down","linkUrl":"","pattern":"Value #A","thresholds":[],"type":"number","unit":"short"},{"alias":"CPU Requests","colorMode":null,"colors":[],"dateFormat":"YYYY-MM-DD HH:mm:ss","decimals":2,"link":false,"linkTargetBlank":false,"linkTooltip":"Drill down","linkUrl":"","pattern":"Value #B","thresholds":[],"type":"number","unit":"short"},{"alias":"CPU Requests %","colorMode":null,"colors":[],"dateFormat":"YYYY-MM-DD HH:mm:ss","decimals":2,"link":false,"linkTargetBlank":false,"linkTooltip":"Drill down","linkUrl":"","pattern":"Value #C","thresholds":[],"type":"number","unit":"percentunit"},{"alias":"CPU Limits","colorMode":null,"colors":[],"dateFormat":"YYYY-MM-DD HH:mm:ss","decimals":2,"link":false,"linkTargetBlank":false,"linkTooltip":"Drill down","linkUrl":"","pattern":"Value #D","thresholds":[],"type":"number","unit":"short"},{"alias":"CPU Limits %","colorMode":null,"colors":[],"dateFormat":"YYYY-MM-DD HH:mm:ss","decimals":2,"link":false,"linkTargetBlank":false,"linkTooltip":"Drill down","linkUrl":"","pattern":"Value #E","thresholds":[],"type":"number","unit":"percentunit"},{"alias":"Cluster","colorMode":null,"colors":[],"dateFormat":"YYYY-MM-DD HH:mm:ss","decimals":2,"link":true,"linkTargetBlank":false,"linkTooltip":"Drill down","linkUrl":"/d/efa86fd1d0c121a26444b636a3f509a8/k8s-resources-cluster?var-datasource=$datasource&var-cluster=`}}{{ if .Values.grafana.sidecar.dashboards.enableNewTablePanelSyntax }}${__value.text}{{ else }}$__cell{{ end }}{{`","pattern":"cluster","thresholds":[],"type":"number","unit":"short"},{"alias":"","colorMode":null,"colors":[],"dateFormat":"YYYY-MM-DD HH:mm:ss","decimals":2,"pattern":"/.*/","thresholds":[],"type":"string","unit":"short"}],"targets":[{"expr":"sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate) by (cluster)","format":"table","instant":true,"legendFormat":"","refId":"A"},{"expr":"sum(kube_pod_container_resource_requests{job=\"kube-state-metrics\", resource=\"cpu\"}) by (cluster)","format":"table","instant":true,"legendFormat":"","refId":"B"},{"expr":"sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate) by (cluster) / sum(kube_pod_container_resource_requests{job=\"kube-state-metrics\", resource=\"cpu\"}) by (cluster)","format":"table","instant":true,"legendFormat":"","refId":"C"},{"expr":"sum(kube_pod_container_resource_limits{job=\"kube-state-metrics\", resource=\"cpu\"}) by (cluster)","format":"table","instant":true,"legendFormat":"","refId":"D"},{"expr":"sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate) by (cluster) / sum(kube_pod_container_resource_limits{job=\"kube-state-metrics\", resource=\"cpu\"}) by (cluster)","format":"table","instant":true,"legendFormat":"","refId":"E"}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"CPU Quota","tooltip":{"shared":false,"sort":2,"value_type":"individual"},"transform":"table","type":"table","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"short","label":null,"logBase":1,"max":null,"min":0,"show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":false}]}],"repeat":null,"repeatIteration":null,"repeatRowId":null,"showTitle":true,"title":"CPU Quota","titleSize":"h6"},{"collapse":false,"height":"250px","panels":[{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"$datasource","fill":0,"id":9,"interval":"1m","legend":{"alignAsTable":true,"avg":false,"current":false,"max":false,"min":false,"rightSide":true,"show":true,"total":false,"values":false},"lines":true,"linewidth":2,"links":[],"nullPointMode":"null as zero","percentage":false,"pointradius":5,"points":false,"renderer":"flot","seriesOverrides":[],"spaceLength":10,"span":12,"stack":false,"steppedLine":false,"targets":[{"expr":"sum(container_memory_rss{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", container!=\"\"}) by (cluster)","format":"time_series","legendFormat":"{{cluster}}","legendLink":null}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"Memory Usage (w/o cache)","tooltip":{"shared":false,"sort":2,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"bytes","label":null,"logBase":1,"max":null,"min":0,"show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":false}]}],"repeat":null,"repeatIteration":null,"repeatRowId":null,"showTitle":true,"title":"Memory","titleSize":"h6"},{"collapse":false,"height":"250px","panels":[{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"$datasource","fill":1,"id":10,"interval":"1m","legend":{"alignAsTable":true,"avg":false,"current":false,"max":false,"min":false,"rightSide":true,"show":true,"total":false,"values":false},"lines":true,"linewidth":1,"links":[],"nullPointMode":"null as zero","percentage":false,"pointradius":5,"points":false,"renderer":"flot","seriesOverrides":[],"spaceLength":10,"span":12,"stack":false,"steppedLine":false,"styles":[{"alias":"Time","dateFormat":"YYYY-MM-DD HH:mm:ss","pattern":"Time","type":"hidden"},{"alias":"Memory Usage","colorMode":null,"colors":[],"dateFormat":"YYYY-MM-DD HH:mm:ss","decimals":2,"link":false,"linkTargetBlank":false,"linkTooltip":"Drill down","linkUrl":"","pattern":"Value #A","thresholds":[],"type":"number","unit":"bytes"},{"alias":"Memory Requests","colorMode":null,"colors":[],"dateFormat":"YYYY-MM-DD HH:mm:ss","decimals":2,"link":false,"linkTargetBlank":false,"linkTooltip":"Drill down","linkUrl":"","pattern":"Value #B","thresholds":[],"type":"number","unit":"bytes"},{"alias":"Memory Requests %","colorMode":null,"colors":[],"dateFormat":"YYYY-MM-DD HH:mm:ss","decimals":2,"link":false,"linkTargetBlank":false,"linkTooltip":"Drill down","linkUrl":"","pattern":"Value #C","thresholds":[],"type":"number","unit":"percentunit"},{"alias":"Memory Limits","colorMode":null,"colors":[],"dateFormat":"YYYY-MM-DD HH:mm:ss","decimals":2,"link":false,"linkTargetBlank":false,"linkTooltip":"Drill down","linkUrl":"","pattern":"Value #D","thresholds":[],"type":"number","unit":"bytes"},{"alias":"Memory Limits %","colorMode":null,"colors":[],"dateFormat":"YYYY-MM-DD HH:mm:ss","decimals":2,"link":false,"linkTargetBlank":false,"linkTooltip":"Drill down","linkUrl":"","pattern":"Value #E","thresholds":[],"type":"number","unit":"percentunit"},{"alias":"Cluster","colorMode":null,"colors":[],"dateFormat":"YYYY-MM-DD HH:mm:ss","decimals":2,"link":true,"linkTargetBlank":false,"linkTooltip":"Drill down","linkUrl":"/d/efa86fd1d0c121a26444b636a3f509a8/k8s-resources-cluster?var-datasource=$datasource&var-cluster=`}}{{ if .Values.grafana.sidecar.dashboards.enableNewTablePanelSyntax }}${__value.text}{{ else }}$__cell{{ end }}{{`","pattern":"cluster","thresholds":[],"type":"number","unit":"short"},{"alias":"","colorMode":null,"colors":[],"dateFormat":"YYYY-MM-DD HH:mm:ss","decimals":2,"pattern":"/.*/","thresholds":[],"type":"string","unit":"short"}],"targets":[{"expr":"sum(container_memory_rss{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", container!=\"\"}) by (cluster)","format":"table","instant":true,"legendFormat":"","refId":"A"},{"expr":"sum(kube_pod_container_resource_requests{job=\"kube-state-metrics\", resource=\"memory\"}) by (cluster)","format":"table","instant":true,"legendFormat":"","refId":"B"},{"expr":"sum(container_memory_rss{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", container!=\"\"}) by (cluster) / sum(kube_pod_container_resource_requests{job=\"kube-state-metrics\", resource=\"memory\"}) by (cluster)","format":"table","instant":true,"legendFormat":"","refId":"C"},{"expr":"sum(kube_pod_container_resource_limits{job=\"kube-state-metrics\", resource=\"memory\"}) by (cluster)","format":"table","instant":true,"legendFormat":"","refId":"D"},{"expr":"sum(container_memory_rss{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", container!=\"\"}) by (cluster) / sum(kube_pod_container_resource_limits{job=\"kube-state-metrics\", resource=\"memory\"}) by (cluster)","format":"table","instant":true,"legendFormat":"","refId":"E"}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"Requests by Cluster","tooltip":{"shared":false,"sort":2,"value_type":"individual"},"transform":"table","type":"table","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"short","label":null,"logBase":1,"max":null,"min":0,"show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":false}]}],"repeat":null,"repeatIteration":null,"repeatRowId":null,"showTitle":true,"title":"Memory Requests","titleSize":"h6"}],"schemaVersion":14,"style":"dark","tags":["kubernetes-mixin"],"templating":{"list":[{"current":{"text":"default","value":"default"},"hide":0,"label":"Data source","name":"datasource","options":[],"query":"prometheus","refresh":1,"regex":"","type":"datasource"}]},"time":{"from":"now-1h","to":"now"},"timepicker":{"refresh_intervals":["5s","10s","30s","1m","5m","15m","30m","1h","2h","1d"],"time_options":["5m","15m","1h","6h","12h","24h","2d","7d","30d"]},"timezone": "`}}{{ .Values.grafana.defaultDashboardsTimezone }}{{`","title":"Kubernetes / Compute Resources / Multi-Cluster","uid":"b59e6c9f2fcbe2e16d77fc492374cc4f","version":0}`}} +{{- end }} \ No newline at end of file diff --git a/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/templates/grafana/dashboards-1.14/k8s-resources-namespace.yaml b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/templates/grafana/dashboards-1.14/k8s-resources-namespace.yaml new file mode 100644 index 0000000000..1c32c9c02e --- /dev/null +++ b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/templates/grafana/dashboards-1.14/k8s-resources-namespace.yaml @@ -0,0 +1,2797 @@ +{{- /* +Generated from 'k8s-resources-namespace' from https://raw.githubusercontent.com/prometheus-operator/kube-prometheus/a8ba97a150c75be42010c75d10b720c55e182f1a/manifests/grafana-dashboardDefinitions.yaml +Do not change in-place! In order to change this file first read following link: +https://github.com/prometheus-community/helm-charts/tree/main/charts/kube-prometheus-stack/hack +*/ -}} +{{- $kubeTargetVersion := default .Capabilities.KubeVersion.GitVersion .Values.kubeTargetVersionOverride }} +{{- if and (or .Values.grafana.enabled .Values.grafana.forceDeployDashboards) (semverCompare ">=1.14.0-0" $kubeTargetVersion) (semverCompare "<9.9.9-9" $kubeTargetVersion) .Values.grafana.defaultDashboardsEnabled }} +apiVersion: v1 +kind: ConfigMap +metadata: + namespace: {{ .Values.grafana.defaultDashboards.namespace }} + name: {{ printf "%s-%s" (include "kube-prometheus-stack.fullname" $) "k8s-resources-namespace" | trunc 63 | trimSuffix "-" }} + annotations: +{{ toYaml .Values.grafana.sidecar.dashboards.annotations | indent 4 }} + labels: + {{- if $.Values.grafana.sidecar.dashboards.label }} + {{ $.Values.grafana.sidecar.dashboards.label }}: {{ ternary $.Values.grafana.sidecar.dashboards.labelValue "1" (not (empty $.Values.grafana.sidecar.dashboards.labelValue)) | quote }} + {{- end }} + app: {{ template "kube-prometheus-stack.name" $ }}-grafana +{{ include "kube-prometheus-stack.labels" $ | indent 4 }} +data: + k8s-resources-namespace.json: |- + { + "annotations": { + "list": [ + + ] + }, + "editable": true, + "gnetId": null, + "graphTooltip": 0, + "hideControls": false, + "links": [ + + ], + "refresh": "10s", + "rows": [ + { + "collapse": false, + "height": "100px", + "panels": [ + { + "aliasColors": { + + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "format": "percentunit", + "id": 1, + "interval": "1m", + "legend": { + "alignAsTable": true, + "avg": false, + "current": false, + "max": false, + "min": false, + "rightSide": true, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ + + ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ + + ], + "spaceLength": 10, + "span": 3, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{cluster=\"$cluster\", namespace=\"$namespace\"}) / sum(kube_pod_container_resource_requests{job=\"kube-state-metrics\", cluster=\"$cluster\", namespace=\"$namespace\", resource=\"cpu\"})", + "format": "time_series", + "instant": true, + "intervalFactor": 2, + "refId": "A" + } + ], + "thresholds": "70,80", + "timeFrom": null, + "timeShift": null, + "title": "CPU Utilisation (from requests)", + "tooltip": { + "shared": false, + "sort": 2, + "value_type": "individual" + }, + "type": "singlestat", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ + + ] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + }, + { + "aliasColors": { + + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "format": "percentunit", + "id": 2, + "interval": "1m", + "legend": { + "alignAsTable": true, + "avg": false, + "current": false, + "max": false, + "min": false, + "rightSide": true, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ + + ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ + + ], + "spaceLength": 10, + "span": 3, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{cluster=\"$cluster\", namespace=\"$namespace\"}) / sum(kube_pod_container_resource_limits{job=\"kube-state-metrics\", cluster=\"$cluster\", namespace=\"$namespace\", resource=\"cpu\"})", + "format": "time_series", + "instant": true, + "intervalFactor": 2, + "refId": "A" + } + ], + "thresholds": "70,80", + "timeFrom": null, + "timeShift": null, + "title": "CPU Utilisation (from limits)", + "tooltip": { + "shared": false, + "sort": 2, + "value_type": "individual" + }, + "type": "singlestat", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ + + ] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + }, + { + "aliasColors": { + + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "format": "percentunit", + "id": 3, + "interval": "1m", + "legend": { + "alignAsTable": true, + "avg": false, + "current": false, + "max": false, + "min": false, + "rightSide": true, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ + + ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ + + ], + "spaceLength": 10, + "span": 3, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum(container_memory_working_set_bytes{job=\"{{ include "exporter.kubelet.jobName" . }}\", metrics_path=\"/metrics/cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\",container!=\"\", image!=\"\"}) / sum(kube_pod_container_resource_requests{job=\"kube-state-metrics\", cluster=\"$cluster\", namespace=\"$namespace\", resource=\"memory\"})", + "format": "time_series", + "instant": true, + "intervalFactor": 2, + "refId": "A" + } + ], + "thresholds": "70,80", + "timeFrom": null, + "timeShift": null, + "title": "Memory Utilisation (from requests)", + "tooltip": { + "shared": false, + "sort": 2, + "value_type": "individual" + }, + "type": "singlestat", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ + + ] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + }, + { + "aliasColors": { + + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "format": "percentunit", + "id": 4, + "interval": "1m", + "legend": { + "alignAsTable": true, + "avg": false, + "current": false, + "max": false, + "min": false, + "rightSide": true, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ + + ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ + + ], + "spaceLength": 10, + "span": 3, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum(container_memory_working_set_bytes{job=\"{{ include "exporter.kubelet.jobName" . }}\", metrics_path=\"/metrics/cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\",container!=\"\", image!=\"\"}) / sum(kube_pod_container_resource_limits{job=\"kube-state-metrics\", cluster=\"$cluster\", namespace=\"$namespace\", resource=\"memory\"})", + "format": "time_series", + "instant": true, + "intervalFactor": 2, + "refId": "A" + } + ], + "thresholds": "70,80", + "timeFrom": null, + "timeShift": null, + "title": "Memory Utilisation (from limits)", + "tooltip": { + "shared": false, + "sort": 2, + "value_type": "individual" + }, + "type": "singlestat", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ + + ] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": false, + "title": "Headlines", + "titleSize": "h6" + }, + { + "collapse": false, + "height": "250px", + "panels": [ + { + "aliasColors": { + + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 10, + "id": 5, + "interval": "1m", + "legend": { + "alignAsTable": true, + "avg": false, + "current": false, + "max": false, + "min": false, + "rightSide": true, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 0, + "links": [ + + ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ + { + "alias": "quota - requests", + "color": "#F2495C", + "dashes": true, + "fill": 0, + "hiddenSeries": true, + "hideTooltip": true, + "legend": true, + "linewidth": 2, + "stack": false + }, + { + "alias": "quota - limits", + "color": "#FF9830", + "dashes": true, + "fill": 0, + "hiddenSeries": true, + "hideTooltip": true, + "legend": true, + "linewidth": 2, + "stack": false + } + ], + "spaceLength": 10, + "span": 12, + "stack": true, + "steppedLine": false, + "targets": [ + { + "expr": "sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{cluster=\"$cluster\", namespace=\"$namespace\"}) by (pod)", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{`{{`}}pod{{`}}`}}", + "legendLink": null, + "step": 10 + }, + { + "expr": "scalar(kube_resourcequota{cluster=\"$cluster\", namespace=\"$namespace\", type=\"hard\",resource=\"requests.cpu\"})", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "quota - requests", + "legendLink": null, + "step": 10 + }, + { + "expr": "scalar(kube_resourcequota{cluster=\"$cluster\", namespace=\"$namespace\", type=\"hard\",resource=\"limits.cpu\"})", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "quota - limits", + "legendLink": null, + "step": 10 + } + ], + "thresholds": [ + + ], + "timeFrom": null, + "timeShift": null, + "title": "CPU Usage", + "tooltip": { + "shared": false, + "sort": 2, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ + + ] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "CPU Usage", + "titleSize": "h6" + }, + { + "collapse": false, + "height": "250px", + "panels": [ + { + "aliasColors": { + + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "id": 6, + "interval": "1m", + "legend": { + "alignAsTable": true, + "avg": false, + "current": false, + "max": false, + "min": false, + "rightSide": true, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ + + ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ + + ], + "spaceLength": 10, + "span": 12, + "stack": false, + "steppedLine": false, + "styles": [ + { + "alias": "Time", + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "pattern": "Time", + "type": "hidden" + }, + { + "alias": "CPU Usage", + "colorMode": null, + "colors": [ + + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "link": false, + "linkTargetBlank": false, + "linkTooltip": "Drill down", + "linkUrl": "", + "pattern": "Value #A", + "thresholds": [ + + ], + "type": "number", + "unit": "short" + }, + { + "alias": "CPU Requests", + "colorMode": null, + "colors": [ + + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "link": false, + "linkTargetBlank": false, + "linkTooltip": "Drill down", + "linkUrl": "", + "pattern": "Value #B", + "thresholds": [ + + ], + "type": "number", + "unit": "short" + }, + { + "alias": "CPU Requests %", + "colorMode": null, + "colors": [ + + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "link": false, + "linkTargetBlank": false, + "linkTooltip": "Drill down", + "linkUrl": "", + "pattern": "Value #C", + "thresholds": [ + + ], + "type": "number", + "unit": "percentunit" + }, + { + "alias": "CPU Limits", + "colorMode": null, + "colors": [ + + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "link": false, + "linkTargetBlank": false, + "linkTooltip": "Drill down", + "linkUrl": "", + "pattern": "Value #D", + "thresholds": [ + + ], + "type": "number", + "unit": "short" + }, + { + "alias": "CPU Limits %", + "colorMode": null, + "colors": [ + + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "link": false, + "linkTargetBlank": false, + "linkTooltip": "Drill down", + "linkUrl": "", + "pattern": "Value #E", + "thresholds": [ + + ], + "type": "number", + "unit": "percentunit" + }, + { + "alias": "Pod", + "colorMode": null, + "colors": [ + + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "link": true, + "linkTargetBlank": false, + "linkTooltip": "Drill down", + "linkUrl": "d/6581e46e4e5c7ba40a07646395ef7b23/k8s-resources-pod?var-datasource=$datasource&var-cluster=$cluster&var-namespace=$namespace&var-pod=$__cell", + "pattern": "pod", + "thresholds": [ + + ], + "type": "number", + "unit": "short" + }, + { + "alias": "", + "colorMode": null, + "colors": [ + + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "pattern": "/.*/", + "thresholds": [ + + ], + "type": "string", + "unit": "short" + } + ], + "targets": [ + { + "expr": "sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{cluster=\"$cluster\", namespace=\"$namespace\"}) by (pod)", + "format": "table", + "instant": true, + "intervalFactor": 2, + "legendFormat": "", + "refId": "A", + "step": 10 + }, + { + "expr": "sum(cluster:namespace:pod_cpu:active:kube_pod_container_resource_requests{cluster=\"$cluster\", namespace=\"$namespace\"}) by (pod)", + "format": "table", + "instant": true, + "intervalFactor": 2, + "legendFormat": "", + "refId": "B", + "step": 10 + }, + { + "expr": "sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{cluster=\"$cluster\", namespace=\"$namespace\"}) by (pod) / sum(cluster:namespace:pod_cpu:active:kube_pod_container_resource_requests{cluster=\"$cluster\", namespace=\"$namespace\"}) by (pod)", + "format": "table", + "instant": true, + "intervalFactor": 2, + "legendFormat": "", + "refId": "C", + "step": 10 + }, + { + "expr": "sum(cluster:namespace:pod_cpu:active:kube_pod_container_resource_limits{cluster=\"$cluster\", namespace=\"$namespace\"}) by (pod)", + "format": "table", + "instant": true, + "intervalFactor": 2, + "legendFormat": "", + "refId": "D", + "step": 10 + }, + { + "expr": "sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{cluster=\"$cluster\", namespace=\"$namespace\"}) by (pod) / sum(cluster:namespace:pod_cpu:active:kube_pod_container_resource_limits{cluster=\"$cluster\", namespace=\"$namespace\"}) by (pod)", + "format": "table", + "instant": true, + "intervalFactor": 2, + "legendFormat": "", + "refId": "E", + "step": 10 + } + ], + "thresholds": [ + + ], + "timeFrom": null, + "timeShift": null, + "title": "CPU Quota", + "tooltip": { + "shared": false, + "sort": 2, + "value_type": "individual" + }, + "transform": "table", + "type": "table", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ + + ] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "CPU Quota", + "titleSize": "h6" + }, + { + "collapse": false, + "height": "250px", + "panels": [ + { + "aliasColors": { + + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 10, + "id": 7, + "interval": "1m", + "legend": { + "alignAsTable": true, + "avg": false, + "current": false, + "max": false, + "min": false, + "rightSide": true, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 0, + "links": [ + + ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ + { + "alias": "quota - requests", + "color": "#F2495C", + "dashes": true, + "fill": 0, + "hiddenSeries": true, + "hideTooltip": true, + "legend": true, + "linewidth": 2, + "stack": false + }, + { + "alias": "quota - limits", + "color": "#FF9830", + "dashes": true, + "fill": 0, + "hiddenSeries": true, + "hideTooltip": true, + "legend": true, + "linewidth": 2, + "stack": false + } + ], + "spaceLength": 10, + "span": 12, + "stack": true, + "steppedLine": false, + "targets": [ + { + "expr": "sum(container_memory_working_set_bytes{job=\"{{ include "exporter.kubelet.jobName" . }}\", metrics_path=\"/metrics/cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\", container!=\"\", image!=\"\"}) by (pod)", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{`{{`}}pod{{`}}`}}", + "legendLink": null, + "step": 10 + }, + { + "expr": "scalar(kube_resourcequota{cluster=\"$cluster\", namespace=\"$namespace\", type=\"hard\",resource=\"requests.memory\"})", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "quota - requests", + "legendLink": null, + "step": 10 + }, + { + "expr": "scalar(kube_resourcequota{cluster=\"$cluster\", namespace=\"$namespace\", type=\"hard\",resource=\"limits.memory\"})", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "quota - limits", + "legendLink": null, + "step": 10 + } + ], + "thresholds": [ + + ], + "timeFrom": null, + "timeShift": null, + "title": "Memory Usage (w/o cache)", + "tooltip": { + "shared": false, + "sort": 2, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ + + ] + }, + "yaxes": [ + { + "format": "bytes", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Memory Usage", + "titleSize": "h6" + }, + { + "collapse": false, + "height": "250px", + "panels": [ + { + "aliasColors": { + + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "id": 8, + "interval": "1m", + "legend": { + "alignAsTable": true, + "avg": false, + "current": false, + "max": false, + "min": false, + "rightSide": true, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ + + ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ + + ], + "spaceLength": 10, + "span": 12, + "stack": false, + "steppedLine": false, + "styles": [ + { + "alias": "Time", + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "pattern": "Time", + "type": "hidden" + }, + { + "alias": "Memory Usage", + "colorMode": null, + "colors": [ + + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "link": false, + "linkTargetBlank": false, + "linkTooltip": "Drill down", + "linkUrl": "", + "pattern": "Value #A", + "thresholds": [ + + ], + "type": "number", + "unit": "bytes" + }, + { + "alias": "Memory Requests", + "colorMode": null, + "colors": [ + + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "link": false, + "linkTargetBlank": false, + "linkTooltip": "Drill down", + "linkUrl": "", + "pattern": "Value #B", + "thresholds": [ + + ], + "type": "number", + "unit": "bytes" + }, + { + "alias": "Memory Requests %", + "colorMode": null, + "colors": [ + + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "link": false, + "linkTargetBlank": false, + "linkTooltip": "Drill down", + "linkUrl": "", + "pattern": "Value #C", + "thresholds": [ + + ], + "type": "number", + "unit": "percentunit" + }, + { + "alias": "Memory Limits", + "colorMode": null, + "colors": [ + + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "link": false, + "linkTargetBlank": false, + "linkTooltip": "Drill down", + "linkUrl": "", + "pattern": "Value #D", + "thresholds": [ + + ], + "type": "number", + "unit": "bytes" + }, + { + "alias": "Memory Limits %", + "colorMode": null, + "colors": [ + + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "link": false, + "linkTargetBlank": false, + "linkTooltip": "Drill down", + "linkUrl": "", + "pattern": "Value #E", + "thresholds": [ + + ], + "type": "number", + "unit": "percentunit" + }, + { + "alias": "Memory Usage (RSS)", + "colorMode": null, + "colors": [ + + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "link": false, + "linkTargetBlank": false, + "linkTooltip": "Drill down", + "linkUrl": "", + "pattern": "Value #F", + "thresholds": [ + + ], + "type": "number", + "unit": "bytes" + }, + { + "alias": "Memory Usage (Cache)", + "colorMode": null, + "colors": [ + + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "link": false, + "linkTargetBlank": false, + "linkTooltip": "Drill down", + "linkUrl": "", + "pattern": "Value #G", + "thresholds": [ + + ], + "type": "number", + "unit": "bytes" + }, + { + "alias": "Memory Usage (Swap)", + "colorMode": null, + "colors": [ + + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "link": false, + "linkTargetBlank": false, + "linkTooltip": "Drill down", + "linkUrl": "", + "pattern": "Value #H", + "thresholds": [ + + ], + "type": "number", + "unit": "bytes" + }, + { + "alias": "Pod", + "colorMode": null, + "colors": [ + + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "link": true, + "linkTargetBlank": false, + "linkTooltip": "Drill down", + "linkUrl": "d/6581e46e4e5c7ba40a07646395ef7b23/k8s-resources-pod?var-datasource=$datasource&var-cluster=$cluster&var-namespace=$namespace&var-pod=$__cell", + "pattern": "pod", + "thresholds": [ + + ], + "type": "number", + "unit": "short" + }, + { + "alias": "", + "colorMode": null, + "colors": [ + + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "pattern": "/.*/", + "thresholds": [ + + ], + "type": "string", + "unit": "short" + } + ], + "targets": [ + { + "expr": "sum(container_memory_working_set_bytes{job=\"{{ include "exporter.kubelet.jobName" . }}\", metrics_path=\"/metrics/cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\",container!=\"\", image!=\"\"}) by (pod)", + "format": "table", + "instant": true, + "intervalFactor": 2, + "legendFormat": "", + "refId": "A", + "step": 10 + }, + { + "expr": "sum(cluster:namespace:pod_memory:active:kube_pod_container_resource_requests{cluster=\"$cluster\", namespace=\"$namespace\"}) by (pod)", + "format": "table", + "instant": true, + "intervalFactor": 2, + "legendFormat": "", + "refId": "B", + "step": 10 + }, + { + "expr": "sum(container_memory_working_set_bytes{job=\"{{ include "exporter.kubelet.jobName" . }}\", metrics_path=\"/metrics/cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\",container!=\"\", image!=\"\"}) by (pod) / sum(cluster:namespace:pod_memory:active:kube_pod_container_resource_requests{cluster=\"$cluster\", namespace=\"$namespace\"}) by (pod)", + "format": "table", + "instant": true, + "intervalFactor": 2, + "legendFormat": "", + "refId": "C", + "step": 10 + }, + { + "expr": "sum(cluster:namespace:pod_memory:active:kube_pod_container_resource_limits{cluster=\"$cluster\", namespace=\"$namespace\"}) by (pod)", + "format": "table", + "instant": true, + "intervalFactor": 2, + "legendFormat": "", + "refId": "D", + "step": 10 + }, + { + "expr": "sum(container_memory_working_set_bytes{job=\"{{ include "exporter.kubelet.jobName" . }}\", metrics_path=\"/metrics/cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\",container!=\"\", image!=\"\"}) by (pod) / sum(cluster:namespace:pod_memory:active:kube_pod_container_resource_limits{cluster=\"$cluster\", namespace=\"$namespace\"}) by (pod)", + "format": "table", + "instant": true, + "intervalFactor": 2, + "legendFormat": "", + "refId": "E", + "step": 10 + }, + { + "expr": "sum(container_memory_rss{job=\"{{ include "exporter.kubelet.jobName" . }}\", metrics_path=\"/metrics/cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\",container!=\"\"}) by (pod)", + "format": "table", + "instant": true, + "intervalFactor": 2, + "legendFormat": "", + "refId": "F", + "step": 10 + }, + { + "expr": "sum(container_memory_cache{job=\"{{ include "exporter.kubelet.jobName" . }}\", metrics_path=\"/metrics/cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\",container!=\"\"}) by (pod)", + "format": "table", + "instant": true, + "intervalFactor": 2, + "legendFormat": "", + "refId": "G", + "step": 10 + }, + { + "expr": "sum(container_memory_swap{job=\"{{ include "exporter.kubelet.jobName" . }}\", metrics_path=\"/metrics/cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\",container!=\"\"}) by (pod)", + "format": "table", + "instant": true, + "intervalFactor": 2, + "legendFormat": "", + "refId": "H", + "step": 10 + } + ], + "thresholds": [ + + ], + "timeFrom": null, + "timeShift": null, + "title": "Memory Quota", + "tooltip": { + "shared": false, + "sort": 2, + "value_type": "individual" + }, + "transform": "table", + "type": "table", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ + + ] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Memory Quota", + "titleSize": "h6" + }, + { + "collapse": false, + "height": "250px", + "panels": [ + { + "aliasColors": { + + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "id": 9, + "interval": "1m", + "legend": { + "alignAsTable": true, + "avg": false, + "current": false, + "max": false, + "min": false, + "rightSide": true, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ + + ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ + + ], + "spaceLength": 10, + "span": 12, + "stack": false, + "steppedLine": false, + "styles": [ + { + "alias": "Time", + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "pattern": "Time", + "type": "hidden" + }, + { + "alias": "Current Receive Bandwidth", + "colorMode": null, + "colors": [ + + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "link": false, + "linkTargetBlank": false, + "linkTooltip": "Drill down", + "linkUrl": "", + "pattern": "Value #A", + "thresholds": [ + + ], + "type": "number", + "unit": "Bps" + }, + { + "alias": "Current Transmit Bandwidth", + "colorMode": null, + "colors": [ + + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "link": false, + "linkTargetBlank": false, + "linkTooltip": "Drill down", + "linkUrl": "", + "pattern": "Value #B", + "thresholds": [ + + ], + "type": "number", + "unit": "Bps" + }, + { + "alias": "Rate of Received Packets", + "colorMode": null, + "colors": [ + + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "link": false, + "linkTargetBlank": false, + "linkTooltip": "Drill down", + "linkUrl": "", + "pattern": "Value #C", + "thresholds": [ + + ], + "type": "number", + "unit": "pps" + }, + { + "alias": "Rate of Transmitted Packets", + "colorMode": null, + "colors": [ + + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "link": false, + "linkTargetBlank": false, + "linkTooltip": "Drill down", + "linkUrl": "", + "pattern": "Value #D", + "thresholds": [ + + ], + "type": "number", + "unit": "pps" + }, + { + "alias": "Rate of Received Packets Dropped", + "colorMode": null, + "colors": [ + + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "link": false, + "linkTargetBlank": false, + "linkTooltip": "Drill down", + "linkUrl": "", + "pattern": "Value #E", + "thresholds": [ + + ], + "type": "number", + "unit": "pps" + }, + { + "alias": "Rate of Transmitted Packets Dropped", + "colorMode": null, + "colors": [ + + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "link": false, + "linkTargetBlank": false, + "linkTooltip": "Drill down", + "linkUrl": "", + "pattern": "Value #F", + "thresholds": [ + + ], + "type": "number", + "unit": "pps" + }, + { + "alias": "Pod", + "colorMode": null, + "colors": [ + + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "link": true, + "linkTargetBlank": false, + "linkTooltip": "Drill down to pods", + "linkUrl": "d/6581e46e4e5c7ba40a07646395ef7b23/k8s-resources-pod?var-datasource=$datasource&var-cluster=$cluster&var-namespace=$namespace&var-pod=$__cell", + "pattern": "pod", + "thresholds": [ + + ], + "type": "number", + "unit": "short" + }, + { + "alias": "", + "colorMode": null, + "colors": [ + + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "pattern": "/.*/", + "thresholds": [ + + ], + "type": "string", + "unit": "short" + } + ], + "targets": [ + { + "expr": "sum(irate(container_network_receive_bytes_total{job=\"{{ include "exporter.kubelet.jobName" . }}\", metrics_path=\"/metrics/cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])) by (pod)", + "format": "table", + "instant": true, + "intervalFactor": 2, + "legendFormat": "", + "refId": "A", + "step": 10 + }, + { + "expr": "sum(irate(container_network_transmit_bytes_total{job=\"{{ include "exporter.kubelet.jobName" . }}\", metrics_path=\"/metrics/cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])) by (pod)", + "format": "table", + "instant": true, + "intervalFactor": 2, + "legendFormat": "", + "refId": "B", + "step": 10 + }, + { + "expr": "sum(irate(container_network_receive_packets_total{job=\"{{ include "exporter.kubelet.jobName" . }}\", metrics_path=\"/metrics/cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])) by (pod)", + "format": "table", + "instant": true, + "intervalFactor": 2, + "legendFormat": "", + "refId": "C", + "step": 10 + }, + { + "expr": "sum(irate(container_network_transmit_packets_total{job=\"{{ include "exporter.kubelet.jobName" . }}\", metrics_path=\"/metrics/cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])) by (pod)", + "format": "table", + "instant": true, + "intervalFactor": 2, + "legendFormat": "", + "refId": "D", + "step": 10 + }, + { + "expr": "sum(irate(container_network_receive_packets_dropped_total{job=\"{{ include "exporter.kubelet.jobName" . }}\", metrics_path=\"/metrics/cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])) by (pod)", + "format": "table", + "instant": true, + "intervalFactor": 2, + "legendFormat": "", + "refId": "E", + "step": 10 + }, + { + "expr": "sum(irate(container_network_transmit_packets_dropped_total{job=\"{{ include "exporter.kubelet.jobName" . }}\", metrics_path=\"/metrics/cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])) by (pod)", + "format": "table", + "instant": true, + "intervalFactor": 2, + "legendFormat": "", + "refId": "F", + "step": 10 + } + ], + "thresholds": [ + + ], + "timeFrom": null, + "timeShift": null, + "title": "Current Network Usage", + "tooltip": { + "shared": false, + "sort": 2, + "value_type": "individual" + }, + "transform": "table", + "type": "table", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ + + ] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Current Network Usage", + "titleSize": "h6" + }, + { + "collapse": false, + "height": "250px", + "panels": [ + { + "aliasColors": { + + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 10, + "id": 10, + "interval": "1m", + "legend": { + "alignAsTable": true, + "avg": false, + "current": false, + "max": false, + "min": false, + "rightSide": true, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 0, + "links": [ + + ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ + + ], + "spaceLength": 10, + "span": 6, + "stack": true, + "steppedLine": false, + "targets": [ + { + "expr": "sum(irate(container_network_receive_bytes_total{cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])) by (pod)", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{`{{`}}pod{{`}}`}}", + "legendLink": null, + "step": 10 + } + ], + "thresholds": [ + + ], + "timeFrom": null, + "timeShift": null, + "title": "Receive Bandwidth", + "tooltip": { + "shared": false, + "sort": 2, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ + + ] + }, + "yaxes": [ + { + "format": "Bps", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + }, + { + "aliasColors": { + + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 10, + "id": 11, + "interval": "1m", + "legend": { + "alignAsTable": true, + "avg": false, + "current": false, + "max": false, + "min": false, + "rightSide": true, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 0, + "links": [ + + ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ + + ], + "spaceLength": 10, + "span": 6, + "stack": true, + "steppedLine": false, + "targets": [ + { + "expr": "sum(irate(container_network_transmit_bytes_total{cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])) by (pod)", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{`{{`}}pod{{`}}`}}", + "legendLink": null, + "step": 10 + } + ], + "thresholds": [ + + ], + "timeFrom": null, + "timeShift": null, + "title": "Transmit Bandwidth", + "tooltip": { + "shared": false, + "sort": 2, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ + + ] + }, + "yaxes": [ + { + "format": "Bps", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Bandwidth", + "titleSize": "h6" + }, + { + "collapse": false, + "height": "250px", + "panels": [ + { + "aliasColors": { + + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 10, + "id": 12, + "interval": "1m", + "legend": { + "alignAsTable": true, + "avg": false, + "current": false, + "max": false, + "min": false, + "rightSide": true, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 0, + "links": [ + + ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ + + ], + "spaceLength": 10, + "span": 6, + "stack": true, + "steppedLine": false, + "targets": [ + { + "expr": "sum(irate(container_network_receive_packets_total{cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])) by (pod)", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{`{{`}}pod{{`}}`}}", + "legendLink": null, + "step": 10 + } + ], + "thresholds": [ + + ], + "timeFrom": null, + "timeShift": null, + "title": "Rate of Received Packets", + "tooltip": { + "shared": false, + "sort": 2, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ + + ] + }, + "yaxes": [ + { + "format": "pps", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + }, + { + "aliasColors": { + + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 10, + "id": 13, + "interval": "1m", + "legend": { + "alignAsTable": true, + "avg": false, + "current": false, + "max": false, + "min": false, + "rightSide": true, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 0, + "links": [ + + ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ + + ], + "spaceLength": 10, + "span": 6, + "stack": true, + "steppedLine": false, + "targets": [ + { + "expr": "sum(irate(container_network_transmit_packets_total{cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])) by (pod)", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{`{{`}}pod{{`}}`}}", + "legendLink": null, + "step": 10 + } + ], + "thresholds": [ + + ], + "timeFrom": null, + "timeShift": null, + "title": "Rate of Transmitted Packets", + "tooltip": { + "shared": false, + "sort": 2, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ + + ] + }, + "yaxes": [ + { + "format": "pps", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Rate of Packets", + "titleSize": "h6" + }, + { + "collapse": false, + "height": "250px", + "panels": [ + { + "aliasColors": { + + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 10, + "id": 14, + "interval": "1m", + "legend": { + "alignAsTable": true, + "avg": false, + "current": false, + "max": false, + "min": false, + "rightSide": true, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 0, + "links": [ + + ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ + + ], + "spaceLength": 10, + "span": 6, + "stack": true, + "steppedLine": false, + "targets": [ + { + "expr": "sum(irate(container_network_receive_packets_dropped_total{cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])) by (pod)", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{`{{`}}pod{{`}}`}}", + "legendLink": null, + "step": 10 + } + ], + "thresholds": [ + + ], + "timeFrom": null, + "timeShift": null, + "title": "Rate of Received Packets Dropped", + "tooltip": { + "shared": false, + "sort": 2, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ + + ] + }, + "yaxes": [ + { + "format": "pps", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + }, + { + "aliasColors": { + + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 10, + "id": 15, + "interval": "1m", + "legend": { + "alignAsTable": true, + "avg": false, + "current": false, + "max": false, + "min": false, + "rightSide": true, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 0, + "links": [ + + ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ + + ], + "spaceLength": 10, + "span": 6, + "stack": true, + "steppedLine": false, + "targets": [ + { + "expr": "sum(irate(container_network_transmit_packets_dropped_total{cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])) by (pod)", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{`{{`}}pod{{`}}`}}", + "legendLink": null, + "step": 10 + } + ], + "thresholds": [ + + ], + "timeFrom": null, + "timeShift": null, + "title": "Rate of Transmitted Packets Dropped", + "tooltip": { + "shared": false, + "sort": 2, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ + + ] + }, + "yaxes": [ + { + "format": "pps", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Rate of Packets Dropped", + "titleSize": "h6" + }, + { + "collapse": false, + "height": "250px", + "panels": [ + { + "aliasColors": { + + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "decimals": -1, + "fill": 10, + "id": 16, + "interval": "1m", + "legend": { + "alignAsTable": true, + "avg": false, + "current": false, + "max": false, + "min": false, + "rightSide": true, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 0, + "links": [ + + ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ + + ], + "spaceLength": 10, + "span": 6, + "stack": true, + "steppedLine": false, + "targets": [ + { + "expr": "ceil(sum by(pod) (rate(container_fs_reads_total{container!=\"\", device=~\"(/dev/)?(mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|md.+|dasd.+)\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval]) + rate(container_fs_writes_total{container!=\"\", device=~\"(/dev/)?(mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|md.+|dasd.+)\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])))", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{`{{`}}pod{{`}}`}}", + "legendLink": null, + "step": 10 + } + ], + "thresholds": [ + + ], + "timeFrom": null, + "timeShift": null, + "title": "IOPS(Reads+Writes)", + "tooltip": { + "shared": false, + "sort": 2, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ + + ] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + }, + { + "aliasColors": { + + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 10, + "id": 17, + "interval": "1m", + "legend": { + "alignAsTable": true, + "avg": false, + "current": false, + "max": false, + "min": false, + "rightSide": true, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 0, + "links": [ + + ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ + + ], + "spaceLength": 10, + "span": 6, + "stack": true, + "steppedLine": false, + "targets": [ + { + "expr": "sum by(pod) (rate(container_fs_reads_bytes_total{container!=\"\", device=~\"(/dev/)?(mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|md.+|dasd.+)\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval]) + rate(container_fs_writes_bytes_total{container!=\"\", device=~\"(/dev/)?(mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|md.+|dasd.+)\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval]))", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{`{{`}}pod{{`}}`}}", + "legendLink": null, + "step": 10 + } + ], + "thresholds": [ + + ], + "timeFrom": null, + "timeShift": null, + "title": "ThroughPut(Read+Write)", + "tooltip": { + "shared": false, + "sort": 2, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ + + ] + }, + "yaxes": [ + { + "format": "Bps", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Storage IO", + "titleSize": "h6" + }, + { + "collapse": false, + "height": "250px", + "panels": [ + { + "aliasColors": { + + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "id": 18, + "interval": "1m", + "legend": { + "alignAsTable": true, + "avg": false, + "current": false, + "max": false, + "min": false, + "rightSide": true, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ + + ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ + + ], + "sort": { + "col": 4, + "desc": true + }, + "spaceLength": 10, + "span": 12, + "stack": false, + "steppedLine": false, + "styles": [ + { + "alias": "Time", + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "pattern": "Time", + "type": "hidden" + }, + { + "alias": "IOPS(Reads)", + "colorMode": null, + "colors": [ + + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": -1, + "link": false, + "linkTargetBlank": false, + "linkTooltip": "Drill down", + "linkUrl": "", + "pattern": "Value #A", + "thresholds": [ + + ], + "type": "number", + "unit": "short" + }, + { + "alias": "IOPS(Writes)", + "colorMode": null, + "colors": [ + + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": -1, + "link": false, + "linkTargetBlank": false, + "linkTooltip": "Drill down", + "linkUrl": "", + "pattern": "Value #B", + "thresholds": [ + + ], + "type": "number", + "unit": "short" + }, + { + "alias": "IOPS(Reads + Writes)", + "colorMode": null, + "colors": [ + + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": -1, + "link": false, + "linkTargetBlank": false, + "linkTooltip": "Drill down", + "linkUrl": "", + "pattern": "Value #C", + "thresholds": [ + + ], + "type": "number", + "unit": "short" + }, + { + "alias": "Throughput(Read)", + "colorMode": null, + "colors": [ + + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "link": false, + "linkTargetBlank": false, + "linkTooltip": "Drill down", + "linkUrl": "", + "pattern": "Value #D", + "thresholds": [ + + ], + "type": "number", + "unit": "Bps" + }, + { + "alias": "Throughput(Write)", + "colorMode": null, + "colors": [ + + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "link": false, + "linkTargetBlank": false, + "linkTooltip": "Drill down", + "linkUrl": "", + "pattern": "Value #E", + "thresholds": [ + + ], + "type": "number", + "unit": "Bps" + }, + { + "alias": "Throughput(Read + Write)", + "colorMode": null, + "colors": [ + + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "link": false, + "linkTargetBlank": false, + "linkTooltip": "Drill down", + "linkUrl": "", + "pattern": "Value #F", + "thresholds": [ + + ], + "type": "number", + "unit": "Bps" + }, + { + "alias": "Pod", + "colorMode": null, + "colors": [ + + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "link": true, + "linkTargetBlank": false, + "linkTooltip": "Drill down to pods", + "linkUrl": "d/6581e46e4e5c7ba40a07646395ef7b23/k8s-resources-pod?var-datasource=$datasource&var-cluster=$cluster&var-namespace=$namespace&var-pod=$__cell", + "pattern": "pod", + "thresholds": [ + + ], + "type": "number", + "unit": "short" + }, + { + "alias": "", + "colorMode": null, + "colors": [ + + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "pattern": "/.*/", + "thresholds": [ + + ], + "type": "string", + "unit": "short" + } + ], + "targets": [ + { + "expr": "sum by(pod) (rate(container_fs_reads_total{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", device=~\"(/dev/)?(mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|md.+|dasd.+)\", container!=\"\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval]))", + "format": "table", + "instant": true, + "intervalFactor": 2, + "legendFormat": "", + "refId": "A", + "step": 10 + }, + { + "expr": "sum by(pod) (rate(container_fs_writes_total{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", device=~\"(/dev/)?(mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|md.+|dasd.+)\", container!=\"\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval]))", + "format": "table", + "instant": true, + "intervalFactor": 2, + "legendFormat": "", + "refId": "B", + "step": 10 + }, + { + "expr": "sum by(pod) (rate(container_fs_reads_total{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", device=~\"(/dev/)?(mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|md.+|dasd.+)\", container!=\"\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval]) + rate(container_fs_writes_total{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", device=~\"(/dev/)?(mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|md.+|dasd.+)\", container!=\"\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval]))", + "format": "table", + "instant": true, + "intervalFactor": 2, + "legendFormat": "", + "refId": "C", + "step": 10 + }, + { + "expr": "sum by(pod) (rate(container_fs_reads_bytes_total{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", device=~\"(/dev/)?(mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|md.+|dasd.+)\", container!=\"\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval]))", + "format": "table", + "instant": true, + "intervalFactor": 2, + "legendFormat": "", + "refId": "D", + "step": 10 + }, + { + "expr": "sum by(pod) (rate(container_fs_writes_bytes_total{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", device=~\"(/dev/)?(mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|md.+|dasd.+)\", container!=\"\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval]))", + "format": "table", + "instant": true, + "intervalFactor": 2, + "legendFormat": "", + "refId": "E", + "step": 10 + }, + { + "expr": "sum by(pod) (rate(container_fs_reads_bytes_total{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", device=~\"(/dev/)?(mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|md.+|dasd.+)\", container!=\"\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval]) + rate(container_fs_writes_bytes_total{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", device=~\"(/dev/)?(mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|md.+|dasd.+)\", container!=\"\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval]))", + "format": "table", + "instant": true, + "intervalFactor": 2, + "legendFormat": "", + "refId": "F", + "step": 10 + } + ], + "thresholds": [ + + ], + "timeFrom": null, + "timeShift": null, + "title": "Current Storage IO", + "tooltip": { + "shared": false, + "sort": 2, + "value_type": "individual" + }, + "transform": "table", + "type": "table", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ + + ] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Storage IO - Distribution", + "titleSize": "h6" + } + ], + "schemaVersion": 14, + "style": "dark", + "tags": [ + "kubernetes-mixin" + ], + "templating": { + "list": [ + { + "current": { + "text": "Prometheus", + "value": "Prometheus" + }, + "hide": 0, + "label": "Data Source", + "name": "datasource", + "options": [ + + ], + "query": "prometheus", + "refresh": 1, + "regex": "", + "type": "datasource" + }, + { + "allValue": null, + "current": { + "text": "", + "value": "" + }, + "datasource": "$datasource", + "hide": {{ if .Values.grafana.sidecar.dashboards.multicluster.global.enabled }}0{{ else }}2{{ end }}, + "includeAll": false, + "label": null, + "multi": false, + "name": "cluster", + "options": [ + + ], + "query": "label_values(up{job=\"kube-state-metrics\"}, cluster)", + "refresh": 2, + "regex": "", + "sort": 1, + "tagValuesQuery": "", + "tags": [ + + ], + "tagsQuery": "", + "type": "query", + "useTags": false + }, + { + "allValue": null, + "current": { + "text": "", + "value": "" + }, + "datasource": "$datasource", + "hide": 0, + "includeAll": false, + "label": null, + "multi": false, + "name": "namespace", + "options": [ + + ], + "query": "label_values(kube_namespace_status_phase{job=\"kube-state-metrics\", cluster=\"$cluster\"}, namespace)", + "refresh": 2, + "regex": "", + "sort": 1, + "tagValuesQuery": "", + "tags": [ + + ], + "tagsQuery": "", + "type": "query", + "useTags": false + } + ] + }, + "time": { + "from": "now-1h", + "to": "now" + }, + "timepicker": { + "refresh_intervals": [ + "5s", + "10s", + "30s", + "1m", + "5m", + "15m", + "30m", + "1h", + "2h", + "1d" + ], + "time_options": [ + "5m", + "15m", + "1h", + "6h", + "12h", + "24h", + "2d", + "7d", + "30d" + ] + }, + "timezone": "{{ .Values.grafana.defaultDashboardsTimezone }}", + "title": "Kubernetes / Compute Resources / Namespace (Pods)", + "uid": "85a562078cdf77779eaa1add43ccec1e", + "version": 0 + } +{{- end }} \ No newline at end of file diff --git a/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/templates/grafana/dashboards-1.14/k8s-resources-node.yaml b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/templates/grafana/dashboards-1.14/k8s-resources-node.yaml new file mode 100644 index 0000000000..e60a42d747 --- /dev/null +++ b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/templates/grafana/dashboards-1.14/k8s-resources-node.yaml @@ -0,0 +1,1026 @@ +{{- /* +Generated from 'k8s-resources-node' from https://raw.githubusercontent.com/prometheus-operator/kube-prometheus/a8ba97a150c75be42010c75d10b720c55e182f1a/manifests/grafana-dashboardDefinitions.yaml +Do not change in-place! In order to change this file first read following link: +https://github.com/prometheus-community/helm-charts/tree/main/charts/kube-prometheus-stack/hack +*/ -}} +{{- $kubeTargetVersion := default .Capabilities.KubeVersion.GitVersion .Values.kubeTargetVersionOverride }} +{{- if and (or .Values.grafana.enabled .Values.grafana.forceDeployDashboards) (semverCompare ">=1.14.0-0" $kubeTargetVersion) (semverCompare "<9.9.9-9" $kubeTargetVersion) .Values.grafana.defaultDashboardsEnabled }} +apiVersion: v1 +kind: ConfigMap +metadata: + namespace: {{ .Values.grafana.defaultDashboards.namespace }} + name: {{ printf "%s-%s" (include "kube-prometheus-stack.fullname" $) "k8s-resources-node" | trunc 63 | trimSuffix "-" }} + annotations: +{{ toYaml .Values.grafana.sidecar.dashboards.annotations | indent 4 }} + labels: + {{- if $.Values.grafana.sidecar.dashboards.label }} + {{ $.Values.grafana.sidecar.dashboards.label }}: {{ ternary $.Values.grafana.sidecar.dashboards.labelValue "1" (not (empty $.Values.grafana.sidecar.dashboards.labelValue)) | quote }} + {{- end }} + app: {{ template "kube-prometheus-stack.name" $ }}-grafana +{{ include "kube-prometheus-stack.labels" $ | indent 4 }} +data: + k8s-resources-node.json: |- + { + "annotations": { + "list": [ + + ] + }, + "editable": true, + "gnetId": null, + "graphTooltip": 0, + "hideControls": false, + "links": [ + + ], + "refresh": "10s", + "rows": [ + { + "collapse": false, + "height": "250px", + "panels": [ + { + "aliasColors": { + + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 10, + "id": 1, + "interval": "1m", + "legend": { + "alignAsTable": true, + "avg": false, + "current": false, + "max": false, + "min": false, + "rightSide": true, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 0, + "links": [ + + ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ + { + "alias": "max capacity", + "color": "#F2495C", + "dashes": true, + "fill": 0, + "hiddenSeries": true, + "hideTooltip": true, + "legend": true, + "linewidth": 2, + "stack": false + } + ], + "spaceLength": 10, + "span": 12, + "stack": true, + "steppedLine": false, + "targets": [ + { + "expr": "sum(kube_node_status_capacity{cluster=\"$cluster\", node=~\"$node\", resource=\"cpu\"})", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "max capacity", + "legendLink": null, + "step": 10 + }, + { + "expr": "sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{cluster=\"$cluster\", node=~\"$node\"}) by (pod)", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{`{{`}}pod{{`}}`}}", + "legendLink": null, + "step": 10 + } + ], + "thresholds": [ + + ], + "timeFrom": null, + "timeShift": null, + "title": "CPU Usage", + "tooltip": { + "shared": false, + "sort": 2, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ + + ] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "CPU Usage", + "titleSize": "h6" + }, + { + "collapse": false, + "height": "250px", + "panels": [ + { + "aliasColors": { + + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "id": 2, + "interval": "1m", + "legend": { + "alignAsTable": true, + "avg": false, + "current": false, + "max": false, + "min": false, + "rightSide": true, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ + + ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ + + ], + "spaceLength": 10, + "span": 12, + "stack": false, + "steppedLine": false, + "styles": [ + { + "alias": "Time", + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "pattern": "Time", + "type": "hidden" + }, + { + "alias": "CPU Usage", + "colorMode": null, + "colors": [ + + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "link": false, + "linkTargetBlank": false, + "linkTooltip": "Drill down", + "linkUrl": "", + "pattern": "Value #A", + "thresholds": [ + + ], + "type": "number", + "unit": "short" + }, + { + "alias": "CPU Requests", + "colorMode": null, + "colors": [ + + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "link": false, + "linkTargetBlank": false, + "linkTooltip": "Drill down", + "linkUrl": "", + "pattern": "Value #B", + "thresholds": [ + + ], + "type": "number", + "unit": "short" + }, + { + "alias": "CPU Requests %", + "colorMode": null, + "colors": [ + + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "link": false, + "linkTargetBlank": false, + "linkTooltip": "Drill down", + "linkUrl": "", + "pattern": "Value #C", + "thresholds": [ + + ], + "type": "number", + "unit": "percentunit" + }, + { + "alias": "CPU Limits", + "colorMode": null, + "colors": [ + + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "link": false, + "linkTargetBlank": false, + "linkTooltip": "Drill down", + "linkUrl": "", + "pattern": "Value #D", + "thresholds": [ + + ], + "type": "number", + "unit": "short" + }, + { + "alias": "CPU Limits %", + "colorMode": null, + "colors": [ + + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "link": false, + "linkTargetBlank": false, + "linkTooltip": "Drill down", + "linkUrl": "", + "pattern": "Value #E", + "thresholds": [ + + ], + "type": "number", + "unit": "percentunit" + }, + { + "alias": "Pod", + "colorMode": null, + "colors": [ + + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "link": false, + "linkTargetBlank": false, + "linkTooltip": "Drill down", + "linkUrl": "", + "pattern": "pod", + "thresholds": [ + + ], + "type": "number", + "unit": "short" + }, + { + "alias": "", + "colorMode": null, + "colors": [ + + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "pattern": "/.*/", + "thresholds": [ + + ], + "type": "string", + "unit": "short" + } + ], + "targets": [ + { + "expr": "sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{cluster=\"$cluster\", node=~\"$node\"}) by (pod)", + "format": "table", + "instant": true, + "intervalFactor": 2, + "legendFormat": "", + "refId": "A", + "step": 10 + }, + { + "expr": "sum(cluster:namespace:pod_cpu:active:kube_pod_container_resource_requests{cluster=\"$cluster\", node=~\"$node\"}) by (pod)", + "format": "table", + "instant": true, + "intervalFactor": 2, + "legendFormat": "", + "refId": "B", + "step": 10 + }, + { + "expr": "sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{cluster=\"$cluster\", node=~\"$node\"}) by (pod) / sum(cluster:namespace:pod_cpu:active:kube_pod_container_resource_requests{cluster=\"$cluster\", node=~\"$node\"}) by (pod)", + "format": "table", + "instant": true, + "intervalFactor": 2, + "legendFormat": "", + "refId": "C", + "step": 10 + }, + { + "expr": "sum(cluster:namespace:pod_cpu:active:kube_pod_container_resource_limits{cluster=\"$cluster\", node=~\"$node\"}) by (pod)", + "format": "table", + "instant": true, + "intervalFactor": 2, + "legendFormat": "", + "refId": "D", + "step": 10 + }, + { + "expr": "sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{cluster=\"$cluster\", node=~\"$node\"}) by (pod) / sum(cluster:namespace:pod_cpu:active:kube_pod_container_resource_limits{cluster=\"$cluster\", node=~\"$node\"}) by (pod)", + "format": "table", + "instant": true, + "intervalFactor": 2, + "legendFormat": "", + "refId": "E", + "step": 10 + } + ], + "thresholds": [ + + ], + "timeFrom": null, + "timeShift": null, + "title": "CPU Quota", + "tooltip": { + "shared": false, + "sort": 2, + "value_type": "individual" + }, + "transform": "table", + "type": "table", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ + + ] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "CPU Quota", + "titleSize": "h6" + }, + { + "collapse": false, + "height": "250px", + "panels": [ + { + "aliasColors": { + + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 10, + "id": 3, + "interval": "1m", + "legend": { + "alignAsTable": true, + "avg": false, + "current": false, + "max": false, + "min": false, + "rightSide": true, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 0, + "links": [ + + ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ + { + "alias": "max capacity", + "color": "#F2495C", + "dashes": true, + "fill": 0, + "hiddenSeries": true, + "hideTooltip": true, + "legend": true, + "linewidth": 2, + "stack": false + } + ], + "spaceLength": 10, + "span": 12, + "stack": true, + "steppedLine": false, + "targets": [ + { + "expr": "sum(kube_node_status_capacity{cluster=\"$cluster\", node=~\"$node\", resource=\"memory\"})", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "max capacity", + "legendLink": null, + "step": 10 + }, + { + "expr": "sum(node_namespace_pod_container:container_memory_working_set_bytes{cluster=\"$cluster\", node=~\"$node\", container!=\"\"}) by (pod)", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{`{{`}}pod{{`}}`}}", + "legendLink": null, + "step": 10 + } + ], + "thresholds": [ + + ], + "timeFrom": null, + "timeShift": null, + "title": "Memory Usage (w/o cache)", + "tooltip": { + "shared": false, + "sort": 2, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ + + ] + }, + "yaxes": [ + { + "format": "bytes", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Memory Usage", + "titleSize": "h6" + }, + { + "collapse": false, + "height": "250px", + "panels": [ + { + "aliasColors": { + + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "id": 4, + "interval": "1m", + "legend": { + "alignAsTable": true, + "avg": false, + "current": false, + "max": false, + "min": false, + "rightSide": true, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ + + ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ + + ], + "spaceLength": 10, + "span": 12, + "stack": false, + "steppedLine": false, + "styles": [ + { + "alias": "Time", + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "pattern": "Time", + "type": "hidden" + }, + { + "alias": "Memory Usage", + "colorMode": null, + "colors": [ + + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "link": false, + "linkTargetBlank": false, + "linkTooltip": "Drill down", + "linkUrl": "", + "pattern": "Value #A", + "thresholds": [ + + ], + "type": "number", + "unit": "bytes" + }, + { + "alias": "Memory Requests", + "colorMode": null, + "colors": [ + + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "link": false, + "linkTargetBlank": false, + "linkTooltip": "Drill down", + "linkUrl": "", + "pattern": "Value #B", + "thresholds": [ + + ], + "type": "number", + "unit": "bytes" + }, + { + "alias": "Memory Requests %", + "colorMode": null, + "colors": [ + + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "link": false, + "linkTargetBlank": false, + "linkTooltip": "Drill down", + "linkUrl": "", + "pattern": "Value #C", + "thresholds": [ + + ], + "type": "number", + "unit": "percentunit" + }, + { + "alias": "Memory Limits", + "colorMode": null, + "colors": [ + + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "link": false, + "linkTargetBlank": false, + "linkTooltip": "Drill down", + "linkUrl": "", + "pattern": "Value #D", + "thresholds": [ + + ], + "type": "number", + "unit": "bytes" + }, + { + "alias": "Memory Limits %", + "colorMode": null, + "colors": [ + + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "link": false, + "linkTargetBlank": false, + "linkTooltip": "Drill down", + "linkUrl": "", + "pattern": "Value #E", + "thresholds": [ + + ], + "type": "number", + "unit": "percentunit" + }, + { + "alias": "Memory Usage (RSS)", + "colorMode": null, + "colors": [ + + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "link": false, + "linkTargetBlank": false, + "linkTooltip": "Drill down", + "linkUrl": "", + "pattern": "Value #F", + "thresholds": [ + + ], + "type": "number", + "unit": "bytes" + }, + { + "alias": "Memory Usage (Cache)", + "colorMode": null, + "colors": [ + + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "link": false, + "linkTargetBlank": false, + "linkTooltip": "Drill down", + "linkUrl": "", + "pattern": "Value #G", + "thresholds": [ + + ], + "type": "number", + "unit": "bytes" + }, + { + "alias": "Memory Usage (Swap)", + "colorMode": null, + "colors": [ + + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "link": false, + "linkTargetBlank": false, + "linkTooltip": "Drill down", + "linkUrl": "", + "pattern": "Value #H", + "thresholds": [ + + ], + "type": "number", + "unit": "bytes" + }, + { + "alias": "Pod", + "colorMode": null, + "colors": [ + + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "link": false, + "linkTargetBlank": false, + "linkTooltip": "Drill down", + "linkUrl": "", + "pattern": "pod", + "thresholds": [ + + ], + "type": "number", + "unit": "short" + }, + { + "alias": "", + "colorMode": null, + "colors": [ + + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "pattern": "/.*/", + "thresholds": [ + + ], + "type": "string", + "unit": "short" + } + ], + "targets": [ + { + "expr": "sum(node_namespace_pod_container:container_memory_working_set_bytes{cluster=\"$cluster\", node=~\"$node\",container!=\"\"}) by (pod)", + "format": "table", + "instant": true, + "intervalFactor": 2, + "legendFormat": "", + "refId": "A", + "step": 10 + }, + { + "expr": "sum(cluster:namespace:pod_memory:active:kube_pod_container_resource_requests{cluster=\"$cluster\", node=~\"$node\"}) by (pod)", + "format": "table", + "instant": true, + "intervalFactor": 2, + "legendFormat": "", + "refId": "B", + "step": 10 + }, + { + "expr": "sum(node_namespace_pod_container:container_memory_working_set_bytes{cluster=\"$cluster\", node=~\"$node\",container!=\"\"}) by (pod) / sum(cluster:namespace:pod_memory:active:kube_pod_container_resource_requests{cluster=\"$cluster\", node=~\"$node\"}) by (pod)", + "format": "table", + "instant": true, + "intervalFactor": 2, + "legendFormat": "", + "refId": "C", + "step": 10 + }, + { + "expr": "sum(cluster:namespace:pod_memory:active:kube_pod_container_resource_limits{cluster=\"$cluster\", node=~\"$node\"}) by (pod)", + "format": "table", + "instant": true, + "intervalFactor": 2, + "legendFormat": "", + "refId": "D", + "step": 10 + }, + { + "expr": "sum(node_namespace_pod_container:container_memory_working_set_bytes{cluster=\"$cluster\", node=~\"$node\",container!=\"\"}) by (pod) / sum(cluster:namespace:pod_memory:active:kube_pod_container_resource_limits{cluster=\"$cluster\", node=~\"$node\"}) by (pod)", + "format": "table", + "instant": true, + "intervalFactor": 2, + "legendFormat": "", + "refId": "E", + "step": 10 + }, + { + "expr": "sum(node_namespace_pod_container:container_memory_rss{cluster=\"$cluster\", node=~\"$node\",container!=\"\"}) by (pod)", + "format": "table", + "instant": true, + "intervalFactor": 2, + "legendFormat": "", + "refId": "F", + "step": 10 + }, + { + "expr": "sum(node_namespace_pod_container:container_memory_cache{cluster=\"$cluster\", node=~\"$node\",container!=\"\"}) by (pod)", + "format": "table", + "instant": true, + "intervalFactor": 2, + "legendFormat": "", + "refId": "G", + "step": 10 + }, + { + "expr": "sum(node_namespace_pod_container:container_memory_swap{cluster=\"$cluster\", node=~\"$node\",container!=\"\"}) by (pod)", + "format": "table", + "instant": true, + "intervalFactor": 2, + "legendFormat": "", + "refId": "H", + "step": 10 + } + ], + "thresholds": [ + + ], + "timeFrom": null, + "timeShift": null, + "title": "Memory Quota", + "tooltip": { + "shared": false, + "sort": 2, + "value_type": "individual" + }, + "transform": "table", + "type": "table", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ + + ] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Memory Quota", + "titleSize": "h6" + } + ], + "schemaVersion": 14, + "style": "dark", + "tags": [ + "kubernetes-mixin" + ], + "templating": { + "list": [ + { + "current": { + "text": "Prometheus", + "value": "Prometheus" + }, + "hide": 0, + "label": "Data Source", + "name": "datasource", + "options": [ + + ], + "query": "prometheus", + "refresh": 1, + "regex": "", + "type": "datasource" + }, + { + "allValue": null, + "current": { + "text": "", + "value": "" + }, + "datasource": "$datasource", + "hide": {{ if .Values.grafana.sidecar.dashboards.multicluster.global.enabled }}0{{ else }}2{{ end }}, + "includeAll": false, + "label": null, + "multi": false, + "name": "cluster", + "options": [ + + ], + "query": "label_values(up{job=\"kube-state-metrics\"}, cluster)", + "refresh": 2, + "regex": "", + "sort": 1, + "tagValuesQuery": "", + "tags": [ + + ], + "tagsQuery": "", + "type": "query", + "useTags": false + }, + { + "allValue": null, + "current": { + "text": "", + "value": "" + }, + "datasource": "$datasource", + "hide": 0, + "includeAll": false, + "label": null, + "multi": true, + "name": "node", + "options": [ + + ], + "query": "label_values(kube_node_info{cluster=\"$cluster\"}, node)", + "refresh": 2, + "regex": "", + "sort": 1, + "tagValuesQuery": "", + "tags": [ + + ], + "tagsQuery": "", + "type": "query", + "useTags": false + } + ] + }, + "time": { + "from": "now-1h", + "to": "now" + }, + "timepicker": { + "refresh_intervals": [ + "5s", + "10s", + "30s", + "1m", + "5m", + "15m", + "30m", + "1h", + "2h", + "1d" + ], + "time_options": [ + "5m", + "15m", + "1h", + "6h", + "12h", + "24h", + "2d", + "7d", + "30d" + ] + }, + "timezone": "{{ .Values.grafana.defaultDashboardsTimezone }}", + "title": "Kubernetes / Compute Resources / Node (Pods)", + "uid": "200ac8fdbfbb74b39aff88118e4d1c2c", + "version": 0 + } +{{- end }} \ No newline at end of file diff --git a/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/templates/grafana/dashboards-1.14/k8s-resources-pod.yaml b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/templates/grafana/dashboards-1.14/k8s-resources-pod.yaml new file mode 100644 index 0000000000..80fab51c00 --- /dev/null +++ b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/templates/grafana/dashboards-1.14/k8s-resources-pod.yaml @@ -0,0 +1,2469 @@ +{{- /* +Generated from 'k8s-resources-pod' from https://raw.githubusercontent.com/prometheus-operator/kube-prometheus/a8ba97a150c75be42010c75d10b720c55e182f1a/manifests/grafana-dashboardDefinitions.yaml +Do not change in-place! In order to change this file first read following link: +https://github.com/prometheus-community/helm-charts/tree/main/charts/kube-prometheus-stack/hack +*/ -}} +{{- $kubeTargetVersion := default .Capabilities.KubeVersion.GitVersion .Values.kubeTargetVersionOverride }} +{{- if and (or .Values.grafana.enabled .Values.grafana.forceDeployDashboards) (semverCompare ">=1.14.0-0" $kubeTargetVersion) (semverCompare "<9.9.9-9" $kubeTargetVersion) .Values.grafana.defaultDashboardsEnabled }} +apiVersion: v1 +kind: ConfigMap +metadata: + namespace: {{ .Values.grafana.defaultDashboards.namespace }} + name: {{ printf "%s-%s" (include "kube-prometheus-stack.fullname" $) "k8s-resources-pod" | trunc 63 | trimSuffix "-" }} + annotations: +{{ toYaml .Values.grafana.sidecar.dashboards.annotations | indent 4 }} + labels: + {{- if $.Values.grafana.sidecar.dashboards.label }} + {{ $.Values.grafana.sidecar.dashboards.label }}: {{ ternary $.Values.grafana.sidecar.dashboards.labelValue "1" (not (empty $.Values.grafana.sidecar.dashboards.labelValue)) | quote }} + {{- end }} + app: {{ template "kube-prometheus-stack.name" $ }}-grafana +{{ include "kube-prometheus-stack.labels" $ | indent 4 }} +data: + k8s-resources-pod.json: |- + { + "annotations": { + "list": [ + + ] + }, + "editable": true, + "gnetId": null, + "graphTooltip": 0, + "hideControls": false, + "links": [ + + ], + "refresh": "10s", + "rows": [ + { + "collapse": false, + "height": "250px", + "panels": [ + { + "aliasColors": { + + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 10, + "id": 1, + "interval": "1m", + "legend": { + "alignAsTable": true, + "avg": false, + "current": false, + "max": false, + "min": false, + "rightSide": true, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 0, + "links": [ + + ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ + { + "alias": "requests", + "color": "#F2495C", + "fill": 0, + "hideTooltip": true, + "legend": true, + "linewidth": 2, + "stack": false + }, + { + "alias": "limits", + "color": "#FF9830", + "fill": 0, + "hideTooltip": true, + "legend": true, + "linewidth": 2, + "stack": false + } + ], + "spaceLength": 10, + "span": 12, + "stack": true, + "steppedLine": false, + "targets": [ + { + "expr": "sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{namespace=\"$namespace\", pod=\"$pod\", cluster=\"$cluster\"}) by (container)", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{`{{`}}container{{`}}`}}", + "legendLink": null, + "step": 10 + }, + { + "expr": "sum(\n kube_pod_container_resource_requests{job=\"kube-state-metrics\", cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\", resource=\"cpu\"}\n)\n", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "requests", + "legendLink": null, + "step": 10 + }, + { + "expr": "sum(\n kube_pod_container_resource_limits{job=\"kube-state-metrics\", cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\", resource=\"cpu\"}\n)\n", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "limits", + "legendLink": null, + "step": 10 + } + ], + "thresholds": [ + + ], + "timeFrom": null, + "timeShift": null, + "title": "CPU Usage", + "tooltip": { + "shared": false, + "sort": 2, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ + + ] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "CPU Usage", + "titleSize": "h6" + }, + { + "collapse": false, + "height": "250px", + "panels": [ + { + "aliasColors": { + + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 10, + "id": 2, + "interval": "1m", + "legend": { + "alignAsTable": true, + "avg": false, + "current": true, + "max": true, + "min": false, + "rightSide": true, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 0, + "links": [ + + ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ + + ], + "spaceLength": 10, + "span": 12, + "stack": true, + "steppedLine": false, + "targets": [ + { + "expr": "sum(increase(container_cpu_cfs_throttled_periods_total{job=\"{{ include "exporter.kubelet.jobName" . }}\", metrics_path=\"/metrics/cadvisor\", namespace=\"$namespace\", pod=\"$pod\", container!=\"\", cluster=\"$cluster\"}[$__rate_interval])) by (container) /sum(increase(container_cpu_cfs_periods_total{job=\"{{ include "exporter.kubelet.jobName" . }}\", metrics_path=\"/metrics/cadvisor\", namespace=\"$namespace\", pod=\"$pod\", container!=\"\", cluster=\"$cluster\"}[$__rate_interval])) by (container)", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{`{{`}}container{{`}}`}}", + "legendLink": null, + "step": 10 + } + ], + "thresholds": [ + { + "colorMode": "critical", + "fill": true, + "line": true, + "op": "gt", + "value": 0.25, + "yaxis": "left" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "CPU Throttling", + "tooltip": { + "shared": false, + "sort": 2, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ + + ] + }, + "yaxes": [ + { + "format": "percentunit", + "label": null, + "logBase": 1, + "max": 1, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "CPU Throttling", + "titleSize": "h6" + }, + { + "collapse": false, + "height": "250px", + "panels": [ + { + "aliasColors": { + + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "id": 3, + "interval": "1m", + "legend": { + "alignAsTable": true, + "avg": false, + "current": false, + "max": false, + "min": false, + "rightSide": true, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ + + ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ + + ], + "spaceLength": 10, + "span": 12, + "stack": false, + "steppedLine": false, + "styles": [ + { + "alias": "Time", + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "pattern": "Time", + "type": "hidden" + }, + { + "alias": "CPU Usage", + "colorMode": null, + "colors": [ + + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "link": false, + "linkTargetBlank": false, + "linkTooltip": "Drill down", + "linkUrl": "", + "pattern": "Value #A", + "thresholds": [ + + ], + "type": "number", + "unit": "short" + }, + { + "alias": "CPU Requests", + "colorMode": null, + "colors": [ + + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "link": false, + "linkTargetBlank": false, + "linkTooltip": "Drill down", + "linkUrl": "", + "pattern": "Value #B", + "thresholds": [ + + ], + "type": "number", + "unit": "short" + }, + { + "alias": "CPU Requests %", + "colorMode": null, + "colors": [ + + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "link": false, + "linkTargetBlank": false, + "linkTooltip": "Drill down", + "linkUrl": "", + "pattern": "Value #C", + "thresholds": [ + + ], + "type": "number", + "unit": "percentunit" + }, + { + "alias": "CPU Limits", + "colorMode": null, + "colors": [ + + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "link": false, + "linkTargetBlank": false, + "linkTooltip": "Drill down", + "linkUrl": "", + "pattern": "Value #D", + "thresholds": [ + + ], + "type": "number", + "unit": "short" + }, + { + "alias": "CPU Limits %", + "colorMode": null, + "colors": [ + + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "link": false, + "linkTargetBlank": false, + "linkTooltip": "Drill down", + "linkUrl": "", + "pattern": "Value #E", + "thresholds": [ + + ], + "type": "number", + "unit": "percentunit" + }, + { + "alias": "Container", + "colorMode": null, + "colors": [ + + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "link": false, + "linkTargetBlank": false, + "linkTooltip": "Drill down", + "linkUrl": "", + "pattern": "container", + "thresholds": [ + + ], + "type": "number", + "unit": "short" + }, + { + "alias": "", + "colorMode": null, + "colors": [ + + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "pattern": "/.*/", + "thresholds": [ + + ], + "type": "string", + "unit": "short" + } + ], + "targets": [ + { + "expr": "sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\"}) by (container)", + "format": "table", + "instant": true, + "intervalFactor": 2, + "legendFormat": "", + "refId": "A", + "step": 10 + }, + { + "expr": "sum(cluster:namespace:pod_cpu:active:kube_pod_container_resource_requests{cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\"}) by (container)", + "format": "table", + "instant": true, + "intervalFactor": 2, + "legendFormat": "", + "refId": "B", + "step": 10 + }, + { + "expr": "sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\"}) by (container) / sum(cluster:namespace:pod_cpu:active:kube_pod_container_resource_requests{cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\"}) by (container)", + "format": "table", + "instant": true, + "intervalFactor": 2, + "legendFormat": "", + "refId": "C", + "step": 10 + }, + { + "expr": "sum(cluster:namespace:pod_cpu:active:kube_pod_container_resource_limits{cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\"}) by (container)", + "format": "table", + "instant": true, + "intervalFactor": 2, + "legendFormat": "", + "refId": "D", + "step": 10 + }, + { + "expr": "sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\"}) by (container) / sum(cluster:namespace:pod_cpu:active:kube_pod_container_resource_limits{cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\"}) by (container)", + "format": "table", + "instant": true, + "intervalFactor": 2, + "legendFormat": "", + "refId": "E", + "step": 10 + } + ], + "thresholds": [ + + ], + "timeFrom": null, + "timeShift": null, + "title": "CPU Quota", + "tooltip": { + "shared": false, + "sort": 2, + "value_type": "individual" + }, + "transform": "table", + "type": "table", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ + + ] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "CPU Quota", + "titleSize": "h6" + }, + { + "collapse": false, + "height": "250px", + "panels": [ + { + "aliasColors": { + + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 10, + "id": 4, + "interval": "1m", + "legend": { + "alignAsTable": true, + "avg": false, + "current": false, + "max": false, + "min": false, + "rightSide": true, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 0, + "links": [ + + ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ + { + "alias": "requests", + "color": "#F2495C", + "dashes": true, + "fill": 0, + "hideTooltip": true, + "legend": true, + "linewidth": 2, + "stack": false + }, + { + "alias": "limits", + "color": "#FF9830", + "dashes": true, + "fill": 0, + "hideTooltip": true, + "legend": true, + "linewidth": 2, + "stack": false + } + ], + "spaceLength": 10, + "span": 12, + "stack": true, + "steppedLine": false, + "targets": [ + { + "expr": "sum(container_memory_working_set_bytes{job=\"{{ include "exporter.kubelet.jobName" . }}\", metrics_path=\"/metrics/cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\", container!=\"\", image!=\"\"}) by (container)", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{`{{`}}container{{`}}`}}", + "legendLink": null, + "step": 10 + }, + { + "expr": "sum(\n kube_pod_container_resource_requests{job=\"kube-state-metrics\", cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\", resource=\"memory\"}\n)\n", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "requests", + "legendLink": null, + "step": 10 + }, + { + "expr": "sum(\n kube_pod_container_resource_limits{job=\"kube-state-metrics\", cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\", resource=\"memory\"}\n)\n", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "limits", + "legendLink": null, + "step": 10 + } + ], + "thresholds": [ + + ], + "timeFrom": null, + "timeShift": null, + "title": "Memory Usage (WSS)", + "tooltip": { + "shared": false, + "sort": 2, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ + + ] + }, + "yaxes": [ + { + "format": "bytes", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Memory Usage", + "titleSize": "h6" + }, + { + "collapse": false, + "height": "250px", + "panels": [ + { + "aliasColors": { + + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "id": 5, + "interval": "1m", + "legend": { + "alignAsTable": true, + "avg": false, + "current": false, + "max": false, + "min": false, + "rightSide": true, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ + + ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ + + ], + "spaceLength": 10, + "span": 12, + "stack": false, + "steppedLine": false, + "styles": [ + { + "alias": "Time", + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "pattern": "Time", + "type": "hidden" + }, + { + "alias": "Memory Usage (WSS)", + "colorMode": null, + "colors": [ + + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "link": false, + "linkTargetBlank": false, + "linkTooltip": "Drill down", + "linkUrl": "", + "pattern": "Value #A", + "thresholds": [ + + ], + "type": "number", + "unit": "bytes" + }, + { + "alias": "Memory Requests", + "colorMode": null, + "colors": [ + + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "link": false, + "linkTargetBlank": false, + "linkTooltip": "Drill down", + "linkUrl": "", + "pattern": "Value #B", + "thresholds": [ + + ], + "type": "number", + "unit": "bytes" + }, + { + "alias": "Memory Requests %", + "colorMode": null, + "colors": [ + + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "link": false, + "linkTargetBlank": false, + "linkTooltip": "Drill down", + "linkUrl": "", + "pattern": "Value #C", + "thresholds": [ + + ], + "type": "number", + "unit": "percentunit" + }, + { + "alias": "Memory Limits", + "colorMode": null, + "colors": [ + + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "link": false, + "linkTargetBlank": false, + "linkTooltip": "Drill down", + "linkUrl": "", + "pattern": "Value #D", + "thresholds": [ + + ], + "type": "number", + "unit": "bytes" + }, + { + "alias": "Memory Limits %", + "colorMode": null, + "colors": [ + + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "link": false, + "linkTargetBlank": false, + "linkTooltip": "Drill down", + "linkUrl": "", + "pattern": "Value #E", + "thresholds": [ + + ], + "type": "number", + "unit": "percentunit" + }, + { + "alias": "Memory Usage (RSS)", + "colorMode": null, + "colors": [ + + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "link": false, + "linkTargetBlank": false, + "linkTooltip": "Drill down", + "linkUrl": "", + "pattern": "Value #F", + "thresholds": [ + + ], + "type": "number", + "unit": "bytes" + }, + { + "alias": "Memory Usage (Cache)", + "colorMode": null, + "colors": [ + + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "link": false, + "linkTargetBlank": false, + "linkTooltip": "Drill down", + "linkUrl": "", + "pattern": "Value #G", + "thresholds": [ + + ], + "type": "number", + "unit": "bytes" + }, + { + "alias": "Memory Usage (Swap)", + "colorMode": null, + "colors": [ + + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "link": false, + "linkTargetBlank": false, + "linkTooltip": "Drill down", + "linkUrl": "", + "pattern": "Value #H", + "thresholds": [ + + ], + "type": "number", + "unit": "bytes" + }, + { + "alias": "Container", + "colorMode": null, + "colors": [ + + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "link": false, + "linkTargetBlank": false, + "linkTooltip": "Drill down", + "linkUrl": "", + "pattern": "container", + "thresholds": [ + + ], + "type": "number", + "unit": "short" + }, + { + "alias": "", + "colorMode": null, + "colors": [ + + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "pattern": "/.*/", + "thresholds": [ + + ], + "type": "string", + "unit": "short" + } + ], + "targets": [ + { + "expr": "sum(container_memory_working_set_bytes{job=\"{{ include "exporter.kubelet.jobName" . }}\", metrics_path=\"/metrics/cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\", container!=\"\", image!=\"\"}) by (container)", + "format": "table", + "instant": true, + "intervalFactor": 2, + "legendFormat": "", + "refId": "A", + "step": 10 + }, + { + "expr": "sum(cluster:namespace:pod_memory:active:kube_pod_container_resource_requests{cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\"}) by (container)", + "format": "table", + "instant": true, + "intervalFactor": 2, + "legendFormat": "", + "refId": "B", + "step": 10 + }, + { + "expr": "sum(container_memory_working_set_bytes{job=\"{{ include "exporter.kubelet.jobName" . }}\", metrics_path=\"/metrics/cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\", image!=\"\"}) by (container) / sum(cluster:namespace:pod_memory:active:kube_pod_container_resource_requests{cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\"}) by (container)", + "format": "table", + "instant": true, + "intervalFactor": 2, + "legendFormat": "", + "refId": "C", + "step": 10 + }, + { + "expr": "sum(cluster:namespace:pod_memory:active:kube_pod_container_resource_limits{cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\"}) by (container)", + "format": "table", + "instant": true, + "intervalFactor": 2, + "legendFormat": "", + "refId": "D", + "step": 10 + }, + { + "expr": "sum(container_memory_working_set_bytes{job=\"{{ include "exporter.kubelet.jobName" . }}\", metrics_path=\"/metrics/cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\", container!=\"\", image!=\"\"}) by (container) / sum(cluster:namespace:pod_memory:active:kube_pod_container_resource_limits{cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\"}) by (container)", + "format": "table", + "instant": true, + "intervalFactor": 2, + "legendFormat": "", + "refId": "E", + "step": 10 + }, + { + "expr": "sum(container_memory_rss{job=\"{{ include "exporter.kubelet.jobName" . }}\", metrics_path=\"/metrics/cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\", container != \"\", container != \"POD\"}) by (container)", + "format": "table", + "instant": true, + "intervalFactor": 2, + "legendFormat": "", + "refId": "F", + "step": 10 + }, + { + "expr": "sum(container_memory_cache{job=\"{{ include "exporter.kubelet.jobName" . }}\", metrics_path=\"/metrics/cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\", container != \"\", container != \"POD\"}) by (container)", + "format": "table", + "instant": true, + "intervalFactor": 2, + "legendFormat": "", + "refId": "G", + "step": 10 + }, + { + "expr": "sum(container_memory_swap{job=\"{{ include "exporter.kubelet.jobName" . }}\", metrics_path=\"/metrics/cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\", container != \"\", container != \"POD\"}) by (container)", + "format": "table", + "instant": true, + "intervalFactor": 2, + "legendFormat": "", + "refId": "H", + "step": 10 + } + ], + "thresholds": [ + + ], + "timeFrom": null, + "timeShift": null, + "title": "Memory Quota", + "tooltip": { + "shared": false, + "sort": 2, + "value_type": "individual" + }, + "transform": "table", + "type": "table", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ + + ] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Memory Quota", + "titleSize": "h6" + }, + { + "collapse": false, + "height": "250px", + "panels": [ + { + "aliasColors": { + + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 10, + "id": 6, + "interval": "1m", + "legend": { + "alignAsTable": true, + "avg": false, + "current": false, + "max": false, + "min": false, + "rightSide": true, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 0, + "links": [ + + ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ + + ], + "spaceLength": 10, + "span": 6, + "stack": true, + "steppedLine": false, + "targets": [ + { + "expr": "sum(irate(container_network_receive_bytes_total{job=\"{{ include "exporter.kubelet.jobName" . }}\", metrics_path=\"/metrics/cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\", pod=~\"$pod\"}[$__rate_interval])) by (pod)", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{`{{`}}pod{{`}}`}}", + "legendLink": null, + "step": 10 + } + ], + "thresholds": [ + + ], + "timeFrom": null, + "timeShift": null, + "title": "Receive Bandwidth", + "tooltip": { + "shared": false, + "sort": 2, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ + + ] + }, + "yaxes": [ + { + "format": "Bps", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + }, + { + "aliasColors": { + + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 10, + "id": 7, + "interval": "1m", + "legend": { + "alignAsTable": true, + "avg": false, + "current": false, + "max": false, + "min": false, + "rightSide": true, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 0, + "links": [ + + ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ + + ], + "spaceLength": 10, + "span": 6, + "stack": true, + "steppedLine": false, + "targets": [ + { + "expr": "sum(irate(container_network_transmit_bytes_total{job=\"{{ include "exporter.kubelet.jobName" . }}\", metrics_path=\"/metrics/cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\", pod=~\"$pod\"}[$__rate_interval])) by (pod)", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{`{{`}}pod{{`}}`}}", + "legendLink": null, + "step": 10 + } + ], + "thresholds": [ + + ], + "timeFrom": null, + "timeShift": null, + "title": "Transmit Bandwidth", + "tooltip": { + "shared": false, + "sort": 2, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ + + ] + }, + "yaxes": [ + { + "format": "Bps", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Bandwidth", + "titleSize": "h6" + }, + { + "collapse": false, + "height": "250px", + "panels": [ + { + "aliasColors": { + + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 10, + "id": 8, + "interval": "1m", + "legend": { + "alignAsTable": true, + "avg": false, + "current": false, + "max": false, + "min": false, + "rightSide": true, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 0, + "links": [ + + ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ + + ], + "spaceLength": 10, + "span": 6, + "stack": true, + "steppedLine": false, + "targets": [ + { + "expr": "sum(irate(container_network_receive_packets_total{job=\"{{ include "exporter.kubelet.jobName" . }}\", metrics_path=\"/metrics/cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\", pod=~\"$pod\"}[$__rate_interval])) by (pod)", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{`{{`}}pod{{`}}`}}", + "legendLink": null, + "step": 10 + } + ], + "thresholds": [ + + ], + "timeFrom": null, + "timeShift": null, + "title": "Rate of Received Packets", + "tooltip": { + "shared": false, + "sort": 2, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ + + ] + }, + "yaxes": [ + { + "format": "pps", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + }, + { + "aliasColors": { + + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 10, + "id": 9, + "interval": "1m", + "legend": { + "alignAsTable": true, + "avg": false, + "current": false, + "max": false, + "min": false, + "rightSide": true, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 0, + "links": [ + + ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ + + ], + "spaceLength": 10, + "span": 6, + "stack": true, + "steppedLine": false, + "targets": [ + { + "expr": "sum(irate(container_network_transmit_packets_total{job=\"{{ include "exporter.kubelet.jobName" . }}\", metrics_path=\"/metrics/cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\", pod=~\"$pod\"}[$__rate_interval])) by (pod)", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{`{{`}}pod{{`}}`}}", + "legendLink": null, + "step": 10 + } + ], + "thresholds": [ + + ], + "timeFrom": null, + "timeShift": null, + "title": "Rate of Transmitted Packets", + "tooltip": { + "shared": false, + "sort": 2, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ + + ] + }, + "yaxes": [ + { + "format": "pps", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Rate of Packets", + "titleSize": "h6" + }, + { + "collapse": false, + "height": "250px", + "panels": [ + { + "aliasColors": { + + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 10, + "id": 10, + "interval": "1m", + "legend": { + "alignAsTable": true, + "avg": false, + "current": false, + "max": false, + "min": false, + "rightSide": true, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 0, + "links": [ + + ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ + + ], + "spaceLength": 10, + "span": 6, + "stack": true, + "steppedLine": false, + "targets": [ + { + "expr": "sum(irate(container_network_receive_packets_dropped_total{job=\"{{ include "exporter.kubelet.jobName" . }}\", metrics_path=\"/metrics/cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\", pod=~\"$pod\"}[$__rate_interval])) by (pod)", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{`{{`}}pod{{`}}`}}", + "legendLink": null, + "step": 10 + } + ], + "thresholds": [ + + ], + "timeFrom": null, + "timeShift": null, + "title": "Rate of Received Packets Dropped", + "tooltip": { + "shared": false, + "sort": 2, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ + + ] + }, + "yaxes": [ + { + "format": "pps", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + }, + { + "aliasColors": { + + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 10, + "id": 11, + "interval": "1m", + "legend": { + "alignAsTable": true, + "avg": false, + "current": false, + "max": false, + "min": false, + "rightSide": true, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 0, + "links": [ + + ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ + + ], + "spaceLength": 10, + "span": 6, + "stack": true, + "steppedLine": false, + "targets": [ + { + "expr": "sum(irate(container_network_transmit_packets_dropped_total{job=\"{{ include "exporter.kubelet.jobName" . }}\", metrics_path=\"/metrics/cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\", pod=~\"$pod\"}[$__rate_interval])) by (pod)", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{`{{`}}pod{{`}}`}}", + "legendLink": null, + "step": 10 + } + ], + "thresholds": [ + + ], + "timeFrom": null, + "timeShift": null, + "title": "Rate of Transmitted Packets Dropped", + "tooltip": { + "shared": false, + "sort": 2, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ + + ] + }, + "yaxes": [ + { + "format": "pps", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Rate of Packets Dropped", + "titleSize": "h6" + }, + { + "collapse": false, + "height": "250px", + "panels": [ + { + "aliasColors": { + + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "decimals": -1, + "fill": 10, + "id": 12, + "interval": "1m", + "legend": { + "alignAsTable": true, + "avg": false, + "current": false, + "max": false, + "min": false, + "rightSide": true, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 0, + "links": [ + + ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ + + ], + "spaceLength": 10, + "span": 6, + "stack": true, + "steppedLine": false, + "targets": [ + { + "expr": "ceil(sum by(pod) (rate(container_fs_reads_total{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", device=~\"(/dev/)?(mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|md.+|dasd.+)\", container!=\"\", cluster=\"$cluster\", namespace=\"$namespace\", pod=~\"$pod\"}[$__rate_interval])))", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "Reads", + "legendLink": null, + "step": 10 + }, + { + "expr": "ceil(sum by(pod) (rate(container_fs_writes_total{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", device=~\"(/dev/)?(mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|md.+|dasd.+)\", container!=\"\", cluster=\"$cluster\",namespace=\"$namespace\", pod=~\"$pod\"}[$__rate_interval])))", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "Writes", + "legendLink": null, + "step": 10 + } + ], + "thresholds": [ + + ], + "timeFrom": null, + "timeShift": null, + "title": "IOPS", + "tooltip": { + "shared": false, + "sort": 2, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ + + ] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + }, + { + "aliasColors": { + + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 10, + "id": 13, + "interval": "1m", + "legend": { + "alignAsTable": true, + "avg": false, + "current": false, + "max": false, + "min": false, + "rightSide": true, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 0, + "links": [ + + ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ + + ], + "spaceLength": 10, + "span": 6, + "stack": true, + "steppedLine": false, + "targets": [ + { + "expr": "sum by(pod) (rate(container_fs_reads_bytes_total{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", device=~\"(/dev/)?(mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|md.+|dasd.+)\", container!=\"\", cluster=\"$cluster\", namespace=\"$namespace\", pod=~\"$pod\"}[$__rate_interval]))", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "Reads", + "legendLink": null, + "step": 10 + }, + { + "expr": "sum by(pod) (rate(container_fs_writes_bytes_total{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", device=~\"(/dev/)?(mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|md.+|dasd.+)\", container!=\"\", cluster=\"$cluster\", namespace=\"$namespace\", pod=~\"$pod\"}[$__rate_interval]))", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "Writes", + "legendLink": null, + "step": 10 + } + ], + "thresholds": [ + + ], + "timeFrom": null, + "timeShift": null, + "title": "ThroughPut", + "tooltip": { + "shared": false, + "sort": 2, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ + + ] + }, + "yaxes": [ + { + "format": "Bps", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Storage IO - Distribution(Pod - Read & Writes)", + "titleSize": "h6" + }, + { + "collapse": false, + "height": "250px", + "panels": [ + { + "aliasColors": { + + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "decimals": -1, + "fill": 10, + "id": 14, + "interval": "1m", + "legend": { + "alignAsTable": true, + "avg": false, + "current": false, + "max": false, + "min": false, + "rightSide": true, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 0, + "links": [ + + ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ + + ], + "spaceLength": 10, + "span": 6, + "stack": true, + "steppedLine": false, + "targets": [ + { + "expr": "ceil(sum by(container) (rate(container_fs_reads_total{job=\"{{ include "exporter.kubelet.jobName" . }}\", metrics_path=\"/metrics/cadvisor\", container!=\"\", cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\"}[$__rate_interval]) + rate(container_fs_writes_total{job=\"{{ include "exporter.kubelet.jobName" . }}\", metrics_path=\"/metrics/cadvisor\", container!=\"\", cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\"}[$__rate_interval])))", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{`{{`}}container{{`}}`}}", + "legendLink": null, + "step": 10 + } + ], + "thresholds": [ + + ], + "timeFrom": null, + "timeShift": null, + "title": "IOPS(Reads+Writes)", + "tooltip": { + "shared": false, + "sort": 2, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ + + ] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + }, + { + "aliasColors": { + + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 10, + "id": 15, + "interval": "1m", + "legend": { + "alignAsTable": true, + "avg": false, + "current": false, + "max": false, + "min": false, + "rightSide": true, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 0, + "links": [ + + ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ + + ], + "spaceLength": 10, + "span": 6, + "stack": true, + "steppedLine": false, + "targets": [ + { + "expr": "sum by(container) (rate(container_fs_reads_bytes_total{job=\"{{ include "exporter.kubelet.jobName" . }}\", metrics_path=\"/metrics/cadvisor\", container!=\"\", cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\"}[$__rate_interval]) + rate(container_fs_writes_bytes_total{job=\"{{ include "exporter.kubelet.jobName" . }}\", metrics_path=\"/metrics/cadvisor\", container!=\"\", cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\"}[$__rate_interval]))", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{`{{`}}container{{`}}`}}", + "legendLink": null, + "step": 10 + } + ], + "thresholds": [ + + ], + "timeFrom": null, + "timeShift": null, + "title": "ThroughPut(Read+Write)", + "tooltip": { + "shared": false, + "sort": 2, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ + + ] + }, + "yaxes": [ + { + "format": "Bps", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Storage IO - Distribution(Containers)", + "titleSize": "h6" + }, + { + "collapse": false, + "height": "250px", + "panels": [ + { + "aliasColors": { + + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "id": 16, + "interval": "1m", + "legend": { + "alignAsTable": true, + "avg": false, + "current": false, + "max": false, + "min": false, + "rightSide": true, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ + + ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ + + ], + "sort": { + "col": 4, + "desc": true + }, + "spaceLength": 10, + "span": 12, + "stack": false, + "steppedLine": false, + "styles": [ + { + "alias": "Time", + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "pattern": "Time", + "type": "hidden" + }, + { + "alias": "IOPS(Reads)", + "colorMode": null, + "colors": [ + + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": -1, + "link": false, + "linkTargetBlank": false, + "linkTooltip": "Drill down", + "linkUrl": "", + "pattern": "Value #A", + "thresholds": [ + + ], + "type": "number", + "unit": "short" + }, + { + "alias": "IOPS(Writes)", + "colorMode": null, + "colors": [ + + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": -1, + "link": false, + "linkTargetBlank": false, + "linkTooltip": "Drill down", + "linkUrl": "", + "pattern": "Value #B", + "thresholds": [ + + ], + "type": "number", + "unit": "short" + }, + { + "alias": "IOPS(Reads + Writes)", + "colorMode": null, + "colors": [ + + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": -1, + "link": false, + "linkTargetBlank": false, + "linkTooltip": "Drill down", + "linkUrl": "", + "pattern": "Value #C", + "thresholds": [ + + ], + "type": "number", + "unit": "short" + }, + { + "alias": "Throughput(Read)", + "colorMode": null, + "colors": [ + + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "link": false, + "linkTargetBlank": false, + "linkTooltip": "Drill down", + "linkUrl": "", + "pattern": "Value #D", + "thresholds": [ + + ], + "type": "number", + "unit": "Bps" + }, + { + "alias": "Throughput(Write)", + "colorMode": null, + "colors": [ + + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "link": false, + "linkTargetBlank": false, + "linkTooltip": "Drill down", + "linkUrl": "", + "pattern": "Value #E", + "thresholds": [ + + ], + "type": "number", + "unit": "Bps" + }, + { + "alias": "Throughput(Read + Write)", + "colorMode": null, + "colors": [ + + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "link": false, + "linkTargetBlank": false, + "linkTooltip": "Drill down", + "linkUrl": "", + "pattern": "Value #F", + "thresholds": [ + + ], + "type": "number", + "unit": "Bps" + }, + { + "alias": "Container", + "colorMode": null, + "colors": [ + + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "link": false, + "linkTargetBlank": false, + "linkTooltip": "Drill down", + "linkUrl": "", + "pattern": "container", + "thresholds": [ + + ], + "type": "number", + "unit": "short" + }, + { + "alias": "", + "colorMode": null, + "colors": [ + + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "pattern": "/.*/", + "thresholds": [ + + ], + "type": "string", + "unit": "short" + } + ], + "targets": [ + { + "expr": "sum by(container) (rate(container_fs_reads_total{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", device=~\"(/dev/)?(mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|md.+|dasd.+)\", container!=\"\", cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\"}[$__rate_interval]))", + "format": "table", + "instant": true, + "intervalFactor": 2, + "legendFormat": "", + "refId": "A", + "step": 10 + }, + { + "expr": "sum by(container) (rate(container_fs_writes_total{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\",device=~\"(/dev/)?(mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|md.+|dasd.+)\", container!=\"\", cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\"}[$__rate_interval]))", + "format": "table", + "instant": true, + "intervalFactor": 2, + "legendFormat": "", + "refId": "B", + "step": 10 + }, + { + "expr": "sum by(container) (rate(container_fs_reads_total{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", device=~\"(/dev/)?(mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|md.+|dasd.+)\", container!=\"\", cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\"}[$__rate_interval]) + rate(container_fs_writes_total{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", device=~\"(/dev/)?(mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|md.+|dasd.+)\", container!=\"\", cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\"}[$__rate_interval]))", + "format": "table", + "instant": true, + "intervalFactor": 2, + "legendFormat": "", + "refId": "C", + "step": 10 + }, + { + "expr": "sum by(container) (rate(container_fs_reads_bytes_total{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", device=~\"(/dev/)?(mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|md.+|dasd.+)\", container!=\"\", cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\"}[$__rate_interval]))", + "format": "table", + "instant": true, + "intervalFactor": 2, + "legendFormat": "", + "refId": "D", + "step": 10 + }, + { + "expr": "sum by(container) (rate(container_fs_writes_bytes_total{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", device=~\"(/dev/)?(mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|md.+|dasd.+)\", container!=\"\", cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\"}[$__rate_interval]))", + "format": "table", + "instant": true, + "intervalFactor": 2, + "legendFormat": "", + "refId": "E", + "step": 10 + }, + { + "expr": "sum by(container) (rate(container_fs_reads_bytes_total{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", device=~\"(/dev/)?(mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|md.+|dasd.+)\", container!=\"\", cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\"}[$__rate_interval]) + rate(container_fs_writes_bytes_total{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", device=~\"(/dev/)?(mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|md.+|dasd.+)\", container!=\"\", cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\"}[$__rate_interval]))", + "format": "table", + "instant": true, + "intervalFactor": 2, + "legendFormat": "", + "refId": "F", + "step": 10 + } + ], + "thresholds": [ + + ], + "timeFrom": null, + "timeShift": null, + "title": "Current Storage IO", + "tooltip": { + "shared": false, + "sort": 2, + "value_type": "individual" + }, + "transform": "table", + "type": "table", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ + + ] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Storage IO - Distribution", + "titleSize": "h6" + } + ], + "schemaVersion": 14, + "style": "dark", + "tags": [ + "kubernetes-mixin" + ], + "templating": { + "list": [ + { + "current": { + "text": "Prometheus", + "value": "Prometheus" + }, + "hide": 0, + "label": "Data Source", + "name": "datasource", + "options": [ + + ], + "query": "prometheus", + "refresh": 1, + "regex": "", + "type": "datasource" + }, + { + "allValue": null, + "current": { + "text": "", + "value": "" + }, + "datasource": "$datasource", + "hide": {{ if .Values.grafana.sidecar.dashboards.multicluster.global.enabled }}0{{ else }}2{{ end }}, + "includeAll": false, + "label": null, + "multi": false, + "name": "cluster", + "options": [ + + ], + "query": "label_values(up{job=\"kube-state-metrics\"}, cluster)", + "refresh": 2, + "regex": "", + "sort": 1, + "tagValuesQuery": "", + "tags": [ + + ], + "tagsQuery": "", + "type": "query", + "useTags": false + }, + { + "allValue": null, + "current": { + "text": "", + "value": "" + }, + "datasource": "$datasource", + "hide": 0, + "includeAll": false, + "label": null, + "multi": false, + "name": "namespace", + "options": [ + + ], + "query": "label_values(kube_namespace_status_phase{job=\"kube-state-metrics\", cluster=\"$cluster\"}, namespace)", + "refresh": 2, + "regex": "", + "sort": 1, + "tagValuesQuery": "", + "tags": [ + + ], + "tagsQuery": "", + "type": "query", + "useTags": false + }, + { + "allValue": null, + "current": { + "text": "", + "value": "" + }, + "datasource": "$datasource", + "hide": 0, + "includeAll": false, + "label": null, + "multi": false, + "name": "pod", + "options": [ + + ], + "query": "label_values(kube_pod_info{job=\"kube-state-metrics\", cluster=\"$cluster\", namespace=\"$namespace\"}, pod)", + "refresh": 2, + "regex": "", + "sort": 1, + "tagValuesQuery": "", + "tags": [ + + ], + "tagsQuery": "", + "type": "query", + "useTags": false + } + ] + }, + "time": { + "from": "now-1h", + "to": "now" + }, + "timepicker": { + "refresh_intervals": [ + "5s", + "10s", + "30s", + "1m", + "5m", + "15m", + "30m", + "1h", + "2h", + "1d" + ], + "time_options": [ + "5m", + "15m", + "1h", + "6h", + "12h", + "24h", + "2d", + "7d", + "30d" + ] + }, + "timezone": "{{ .Values.grafana.defaultDashboardsTimezone }}", + "title": "Kubernetes / Compute Resources / Pod", + "uid": "6581e46e4e5c7ba40a07646395ef7b23", + "version": 0 + } +{{- end }} \ No newline at end of file diff --git a/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/templates/grafana/dashboards-1.14/k8s-resources-windows-cluster.yaml b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/templates/grafana/dashboards-1.14/k8s-resources-windows-cluster.yaml new file mode 100644 index 0000000000..d77170afd8 --- /dev/null +++ b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/templates/grafana/dashboards-1.14/k8s-resources-windows-cluster.yaml @@ -0,0 +1,24 @@ +{{- /* +Generated from 'k8s-resources-windows-cluster' from https://github.com/kubernetes-monitoring/kubernetes-mixin.git +Do not change in-place! In order to change this file first read following link: +https://github.com/prometheus-community/helm-charts/tree/main/charts/kube-prometheus-stack/hack +*/ -}} +{{- $kubeTargetVersion := default .Capabilities.KubeVersion.GitVersion .Values.kubeTargetVersionOverride }} +{{- if and (or .Values.grafana.enabled .Values.grafana.forceDeployDashboards) (semverCompare ">=1.14.0-0" $kubeTargetVersion) (semverCompare "<9.9.9-9" $kubeTargetVersion) .Values.grafana.defaultDashboardsEnabled .Values.windowsMonitoring.enabled }} +apiVersion: v1 +kind: ConfigMap +metadata: + namespace: {{ template "kube-prometheus-stack-grafana.namespace" . }} + name: {{ printf "%s-%s" (include "kube-prometheus-stack.fullname" $) "k8s-resources-windows-cluster" | trunc 63 | trimSuffix "-" }} + annotations: +{{ toYaml .Values.grafana.sidecar.dashboards.annotations | indent 4 }} + labels: + {{- if $.Values.grafana.sidecar.dashboards.label }} + {{ $.Values.grafana.sidecar.dashboards.label }}: {{ ternary $.Values.grafana.sidecar.dashboards.labelValue "1" (not (empty $.Values.grafana.sidecar.dashboards.labelValue)) | quote }} + {{- end }} + app: {{ template "kube-prometheus-stack.name" $ }}-grafana +{{ include "kube-prometheus-stack.labels" $ | indent 4 }} +data: + k8s-resources-windows-cluster.json: |- + {{`{"__inputs":[],"__requires":[],"annotations":{"list":[]},"editable":`}}{{ .Values.grafana.defaultDashboardsEditable }}{{`,"gnetId":null,"graphTooltip":0,"hideControls":false,"id":null,"links":[],"refresh":"","rows":[{"collapse":false,"height":"100px","panels":[{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"$datasource","fill":1,"format":"percentunit","id":2,"legend":{"avg":false,"current":false,"max":false,"min":false,"show":true,"total":false,"values":false},"lines":true,"linewidth":1,"links":[],"nullPointMode":"null as zero","percentage":false,"pointradius":5,"points":false,"renderer":"flot","seriesOverrides":[],"spaceLength":10,"span":2,"stack":false,"steppedLine":false,"targets":[{"expr":"1 - avg(rate(windows_cpu_time_total{cluster=\"$cluster\", job=\"windows-exporter\", mode=\"idle\"}[1m]))","format":"time_series","instant":true,"refId":"A"}],"thresholds":"70,80","timeFrom":null,"timeShift":null,"title":"CPU Utilisation","tooltip":{"shared":true,"sort":2,"value_type":"individual"},"type":"singlestat","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"short","label":null,"logBase":1,"max":null,"min":0,"show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":false}]},{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"$datasource","fill":1,"format":"percentunit","id":3,"legend":{"avg":false,"current":false,"max":false,"min":false,"show":true,"total":false,"values":false},"lines":true,"linewidth":1,"links":[],"nullPointMode":"null as zero","percentage":false,"pointradius":5,"points":false,"renderer":"flot","seriesOverrides":[],"spaceLength":10,"span":2,"stack":false,"steppedLine":false,"targets":[{"expr":"sum(kube_pod_windows_container_resource_cpu_cores_request{cluster=\"$cluster\"}) / sum(node:windows_node_num_cpu:sum{cluster=\"$cluster\"})","format":"time_series","instant":true,"refId":"A"}],"thresholds":"70,80","timeFrom":null,"timeShift":null,"title":"CPU Requests Commitment","tooltip":{"shared":true,"sort":2,"value_type":"individual"},"type":"singlestat","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"short","label":null,"logBase":1,"max":null,"min":0,"show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":false}]},{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"$datasource","fill":1,"format":"percentunit","id":4,"legend":{"avg":false,"current":false,"max":false,"min":false,"show":true,"total":false,"values":false},"lines":true,"linewidth":1,"links":[],"nullPointMode":"null as zero","percentage":false,"pointradius":5,"points":false,"renderer":"flot","seriesOverrides":[],"spaceLength":10,"span":2,"stack":false,"steppedLine":false,"targets":[{"expr":"sum(kube_pod_windows_container_resource_cpu_cores_limit{cluster=\"$cluster\"}) / sum(node:windows_node_num_cpu:sum{cluster=\"$cluster\"})","format":"time_series","instant":true,"refId":"A"}],"thresholds":"70,80","timeFrom":null,"timeShift":null,"title":"CPU Limits Commitment","tooltip":{"shared":true,"sort":2,"value_type":"individual"},"type":"singlestat","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"short","label":null,"logBase":1,"max":null,"min":0,"show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":false}]},{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"$datasource","fill":1,"format":"percentunit","id":5,"legend":{"avg":false,"current":false,"max":false,"min":false,"show":true,"total":false,"values":false},"lines":true,"linewidth":1,"links":[],"nullPointMode":"null as zero","percentage":false,"pointradius":5,"points":false,"renderer":"flot","seriesOverrides":[],"spaceLength":10,"span":2,"stack":false,"steppedLine":false,"targets":[{"expr":"1 - sum(:windows_node_memory_MemFreeCached_bytes:sum{cluster=\"$cluster\"}) / sum(:windows_node_memory_MemTotal_bytes:sum{cluster=\"$cluster\"})","format":"time_series","instant":true,"refId":"A"}],"thresholds":"70,80","timeFrom":null,"timeShift":null,"title":"Memory Utilisation","tooltip":{"shared":true,"sort":2,"value_type":"individual"},"type":"singlestat","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"short","label":null,"logBase":1,"max":null,"min":0,"show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":false}]},{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"$datasource","fill":1,"format":"percentunit","id":6,"legend":{"avg":false,"current":false,"max":false,"min":false,"show":true,"total":false,"values":false},"lines":true,"linewidth":1,"links":[],"nullPointMode":"null as zero","percentage":false,"pointradius":5,"points":false,"renderer":"flot","seriesOverrides":[],"spaceLength":10,"span":2,"stack":false,"steppedLine":false,"targets":[{"expr":"sum(kube_pod_windows_container_resource_memory_request{cluster=\"$cluster\"}) / sum(:windows_node_memory_MemTotal_bytes:sum{cluster=\"$cluster\"})","format":"time_series","instant":true,"refId":"A"}],"thresholds":"70,80","timeFrom":null,"timeShift":null,"title":"Memory Requests Commitment","tooltip":{"shared":true,"sort":2,"value_type":"individual"},"type":"singlestat","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"short","label":null,"logBase":1,"max":null,"min":0,"show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":false}]},{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"$datasource","fill":1,"format":"percentunit","id":7,"legend":{"avg":false,"current":false,"max":false,"min":false,"show":true,"total":false,"values":false},"lines":true,"linewidth":1,"links":[],"nullPointMode":"null as zero","percentage":false,"pointradius":5,"points":false,"renderer":"flot","seriesOverrides":[],"spaceLength":10,"span":2,"stack":false,"steppedLine":false,"targets":[{"expr":"sum(kube_pod_windows_container_resource_memory_limit{cluster=\"$cluster\"}) / sum(:windows_node_memory_MemTotal_bytes:sum{cluster=\"$cluster\"})","format":"time_series","instant":true,"refId":"A"}],"thresholds":"70,80","timeFrom":null,"timeShift":null,"title":"Memory Limits Commitment","tooltip":{"shared":true,"sort":2,"value_type":"individual"},"type":"singlestat","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"short","label":null,"logBase":1,"max":null,"min":0,"show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":false}]}],"repeat":null,"repeatIteration":null,"repeatRowId":null,"showTitle":false,"title":"Headlines","titleSize":"h6"},{"collapse":false,"height":"250px","panels":[{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"$datasource","fill":10,"id":8,"legend":{"avg":false,"current":false,"max":false,"min":false,"show":true,"total":false,"values":false},"lines":true,"linewidth":0,"links":[],"nullPointMode":"null as zero","percentage":false,"pointradius":5,"points":false,"renderer":"flot","seriesOverrides":[],"spaceLength":10,"span":12,"stack":true,"steppedLine":false,"targets":[{"expr":"sum(namespace_pod_container:windows_container_cpu_usage_seconds_total:sum_rate{cluster=\"$cluster\"}) by (namespace)","format":"time_series","legendFormat":"{{namespace}}","legendLink":null}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"CPU Usage","tooltip":{"shared":true,"sort":2,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"short","label":null,"logBase":1,"max":null,"min":0,"show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":false}]}],"repeat":null,"repeatIteration":null,"repeatRowId":null,"showTitle":true,"title":"CPU","titleSize":"h6"},{"collapse":false,"height":"250px","panels":[{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"$datasource","fill":1,"id":9,"legend":{"avg":false,"current":false,"max":false,"min":false,"show":true,"total":false,"values":false},"lines":true,"linewidth":1,"links":[],"nullPointMode":"null as zero","percentage":false,"pointradius":5,"points":false,"renderer":"flot","seriesOverrides":[],"spaceLength":10,"span":12,"stack":false,"steppedLine":false,"styles":[{"alias":"Time","dateFormat":"YYYY-MM-DD HH:mm:ss","pattern":"Time","type":"hidden"},{"alias":"CPU Usage","colorMode":null,"colors":[],"dateFormat":"YYYY-MM-DD HH:mm:ss","decimals":2,"link":false,"linkTargetBlank":false,"linkTooltip":"Drill down","linkUrl":"","pattern":"Value #A","thresholds":[],"type":"number","unit":"short"},{"alias":"CPU Requests","colorMode":null,"colors":[],"dateFormat":"YYYY-MM-DD HH:mm:ss","decimals":2,"link":false,"linkTargetBlank":false,"linkTooltip":"Drill down","linkUrl":"","pattern":"Value #B","thresholds":[],"type":"number","unit":"short"},{"alias":"CPU Requests %","colorMode":null,"colors":[],"dateFormat":"YYYY-MM-DD HH:mm:ss","decimals":2,"link":false,"linkTargetBlank":false,"linkTooltip":"Drill down","linkUrl":"","pattern":"Value #C","thresholds":[],"type":"number","unit":"percentunit"},{"alias":"CPU Limits","colorMode":null,"colors":[],"dateFormat":"YYYY-MM-DD HH:mm:ss","decimals":2,"link":false,"linkTargetBlank":false,"linkTooltip":"Drill down","linkUrl":"","pattern":"Value #D","thresholds":[],"type":"number","unit":"short"},{"alias":"CPU Limits %","colorMode":null,"colors":[],"dateFormat":"YYYY-MM-DD HH:mm:ss","decimals":2,"link":false,"linkTargetBlank":false,"linkTooltip":"Drill down","linkUrl":"","pattern":"Value #E","thresholds":[],"type":"number","unit":"percentunit"},{"alias":"Namespace","colorMode":null,"colors":[],"dateFormat":"YYYY-MM-DD HH:mm:ss","decimals":2,"link":true,"linkTargetBlank":false,"linkTooltip":"Drill down","linkUrl":"/d/490b402361724ab1d4c45666c1fa9b6f/k8s-resources-windows-namespace?var-datasource=$datasource&var-namespace=`}}{{ if .Values.grafana.sidecar.dashboards.enableNewTablePanelSyntax }}${__value.text}{{ else }}$__cell{{ end }}{{`","pattern":"namespace","thresholds":[],"type":"number","unit":"short"},{"alias":"","colorMode":null,"colors":[],"dateFormat":"YYYY-MM-DD HH:mm:ss","decimals":2,"pattern":"/.*/","thresholds":[],"type":"string","unit":"short"}],"targets":[{"expr":"sum(namespace_pod_container:windows_container_cpu_usage_seconds_total:sum_rate{cluster=\"$cluster\"}) by (namespace)","format":"table","instant":true,"legendFormat":"","refId":"A"},{"expr":"sum(kube_pod_windows_container_resource_cpu_cores_request{cluster=\"$cluster\"}) by (namespace)","format":"table","instant":true,"legendFormat":"","refId":"B"},{"expr":"sum(namespace_pod_container:windows_container_cpu_usage_seconds_total:sum_rate{cluster=\"$cluster\"}) by (namespace) / sum(kube_pod_windows_container_resource_cpu_cores_request{cluster=\"$cluster\"}) by (namespace)","format":"table","instant":true,"legendFormat":"","refId":"C"},{"expr":"sum(kube_pod_windows_container_resource_cpu_cores_limit{cluster=\"$cluster\"}) by (namespace)","format":"table","instant":true,"legendFormat":"","refId":"D"},{"expr":"sum(namespace_pod_container:windows_container_cpu_usage_seconds_total:sum_rate{cluster=\"$cluster\"}) by (namespace) / sum(kube_pod_windows_container_resource_cpu_cores_limit{cluster=\"$cluster\"}) by (namespace)","format":"table","instant":true,"legendFormat":"","refId":"E"}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"CPU Quota","tooltip":{"shared":true,"sort":2,"value_type":"individual"},"transform":"table","type":"table","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"short","label":null,"logBase":1,"max":null,"min":0,"show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":false}]}],"repeat":null,"repeatIteration":null,"repeatRowId":null,"showTitle":true,"title":"CPU Quota","titleSize":"h6"},{"collapse":false,"height":"250px","panels":[{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"$datasource","fill":10,"id":10,"legend":{"avg":false,"current":false,"max":false,"min":false,"show":true,"total":false,"values":false},"lines":true,"linewidth":0,"links":[],"nullPointMode":"null as zero","percentage":false,"pointradius":5,"points":false,"renderer":"flot","seriesOverrides":[],"spaceLength":10,"span":12,"stack":true,"steppedLine":false,"targets":[{"expr":"sum(windows_container_private_working_set_usage{cluster=\"$cluster\"}) by (namespace)","format":"time_series","legendFormat":"{{namespace}}","legendLink":null}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"Memory Usage (Private Working Set)","tooltip":{"shared":true,"sort":2,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"decbytes","label":null,"logBase":1,"max":null,"min":0,"show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":false}]}],"repeat":null,"repeatIteration":null,"repeatRowId":null,"showTitle":true,"title":"Memory","titleSize":"h6"},{"collapse":false,"height":"250px","panels":[{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"$datasource","fill":1,"id":11,"legend":{"avg":false,"current":false,"max":false,"min":false,"show":true,"total":false,"values":false},"lines":true,"linewidth":1,"links":[],"nullPointMode":"null as zero","percentage":false,"pointradius":5,"points":false,"renderer":"flot","seriesOverrides":[],"spaceLength":10,"span":12,"stack":false,"steppedLine":false,"styles":[{"alias":"Time","dateFormat":"YYYY-MM-DD HH:mm:ss","pattern":"Time","type":"hidden"},{"alias":"Memory Usage","colorMode":null,"colors":[],"dateFormat":"YYYY-MM-DD HH:mm:ss","decimals":2,"link":false,"linkTargetBlank":false,"linkTooltip":"Drill down","linkUrl":"","pattern":"Value #A","thresholds":[],"type":"number","unit":"decbytes"},{"alias":"Memory Requests","colorMode":null,"colors":[],"dateFormat":"YYYY-MM-DD HH:mm:ss","decimals":2,"link":false,"linkTargetBlank":false,"linkTooltip":"Drill down","linkUrl":"","pattern":"Value #B","thresholds":[],"type":"number","unit":"decbytes"},{"alias":"Memory Requests %","colorMode":null,"colors":[],"dateFormat":"YYYY-MM-DD HH:mm:ss","decimals":2,"link":false,"linkTargetBlank":false,"linkTooltip":"Drill down","linkUrl":"","pattern":"Value #C","thresholds":[],"type":"number","unit":"percentunit"},{"alias":"Memory Limits","colorMode":null,"colors":[],"dateFormat":"YYYY-MM-DD HH:mm:ss","decimals":2,"link":false,"linkTargetBlank":false,"linkTooltip":"Drill down","linkUrl":"","pattern":"Value #D","thresholds":[],"type":"number","unit":"decbytes"},{"alias":"Memory Limits %","colorMode":null,"colors":[],"dateFormat":"YYYY-MM-DD HH:mm:ss","decimals":2,"link":false,"linkTargetBlank":false,"linkTooltip":"Drill down","linkUrl":"","pattern":"Value #E","thresholds":[],"type":"number","unit":"percentunit"},{"alias":"Namespace","colorMode":null,"colors":[],"dateFormat":"YYYY-MM-DD HH:mm:ss","decimals":2,"link":true,"linkTargetBlank":false,"linkTooltip":"Drill down","linkUrl":"/d/490b402361724ab1d4c45666c1fa9b6f/k8s-resources-windows-namespace?var-datasource=$datasource&var-namespace=`}}{{ if .Values.grafana.sidecar.dashboards.enableNewTablePanelSyntax }}${__value.text}{{ else }}$__cell{{ end }}{{`","pattern":"namespace","thresholds":[],"type":"number","unit":"short"},{"alias":"","colorMode":null,"colors":[],"dateFormat":"YYYY-MM-DD HH:mm:ss","decimals":2,"pattern":"/.*/","thresholds":[],"type":"string","unit":"short"}],"targets":[{"expr":"sum(windows_container_private_working_set_usage{cluster=\"$cluster\"}) by (namespace)","format":"table","instant":true,"legendFormat":"","refId":"A"},{"expr":"sum(kube_pod_windows_container_resource_memory_request{cluster=\"$cluster\"}) by (namespace)","format":"table","instant":true,"legendFormat":"","refId":"B"},{"expr":"sum(windows_container_private_working_set_usage{cluster=\"$cluster\"}) by (namespace) / sum(kube_pod_windows_container_resource_memory_request{cluster=\"$cluster\"}) by (namespace)","format":"table","instant":true,"legendFormat":"","refId":"C"},{"expr":"sum(kube_pod_windows_container_resource_memory_limit{cluster=\"$cluster\"}) by (namespace)","format":"table","instant":true,"legendFormat":"","refId":"D"},{"expr":"sum(windows_container_private_working_set_usage{cluster=\"$cluster\"}) by (namespace) / sum(kube_pod_windows_container_resource_memory_limit{cluster=\"$cluster\"}) by (namespace)","format":"table","instant":true,"legendFormat":"","refId":"E"}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"Requests by Namespace","tooltip":{"shared":true,"sort":2,"value_type":"individual"},"transform":"table","type":"table","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"short","label":null,"logBase":1,"max":null,"min":0,"show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":false}]}],"repeat":null,"repeatIteration":null,"repeatRowId":null,"showTitle":true,"title":"Memory Requests","titleSize":"h6"}],"schemaVersion":14,"style":"dark","tags":["kubernetes-mixin"],"templating":{"list":[{"current":{"text":"default","value":"default"},"hide":0,"label":null,"name":"datasource","options":[],"query":"prometheus","refresh":1,"regex":"","type":"datasource"},{"allValue":null,"current":{},"datasource":"$datasource","hide":`}}{{ if .Values.grafana.sidecar.dashboards.multicluster.global.enabled }}0{{ else }}2{{ end }}{{`,"includeAll":false,"label":"cluster","multi":false,"name":"cluster","options":[],"query":"label_values(up{job=\"windows-exporter\"}, cluster)","refresh":2,"regex":"","sort":1,"tagValuesQuery":"","tags":[],"tagsQuery":"","type":"query","useTags":false}]},"time":{"from":"now-6h","to":"now"},"timepicker":{"refresh_intervals":["5s","10s","30s","1m","5m","15m","30m","1h","2h","1d"],"time_options":["5m","15m","1h","6h","12h","24h","2d","7d","30d"]},"timezone": "`}}{{ .Values.grafana.defaultDashboardsTimezone }}{{`","title":"Kubernetes / Compute Resources / Cluster(Windows)","uid":"4d08557fd9391b100730f2494bccac68","version":0}`}} +{{- end }} \ No newline at end of file diff --git a/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/templates/grafana/dashboards-1.14/k8s-resources-windows-namespace.yaml b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/templates/grafana/dashboards-1.14/k8s-resources-windows-namespace.yaml new file mode 100644 index 0000000000..13a1fc3abd --- /dev/null +++ b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/templates/grafana/dashboards-1.14/k8s-resources-windows-namespace.yaml @@ -0,0 +1,24 @@ +{{- /* +Generated from 'k8s-resources-windows-namespace' from https://github.com/kubernetes-monitoring/kubernetes-mixin.git +Do not change in-place! In order to change this file first read following link: +https://github.com/prometheus-community/helm-charts/tree/main/charts/kube-prometheus-stack/hack +*/ -}} +{{- $kubeTargetVersion := default .Capabilities.KubeVersion.GitVersion .Values.kubeTargetVersionOverride }} +{{- if and (or .Values.grafana.enabled .Values.grafana.forceDeployDashboards) (semverCompare ">=1.14.0-0" $kubeTargetVersion) (semverCompare "<9.9.9-9" $kubeTargetVersion) .Values.grafana.defaultDashboardsEnabled .Values.windowsMonitoring.enabled }} +apiVersion: v1 +kind: ConfigMap +metadata: + namespace: {{ template "kube-prometheus-stack-grafana.namespace" . }} + name: {{ printf "%s-%s" (include "kube-prometheus-stack.fullname" $) "k8s-resources-windows-namespace" | trunc 63 | trimSuffix "-" }} + annotations: +{{ toYaml .Values.grafana.sidecar.dashboards.annotations | indent 4 }} + labels: + {{- if $.Values.grafana.sidecar.dashboards.label }} + {{ $.Values.grafana.sidecar.dashboards.label }}: {{ ternary $.Values.grafana.sidecar.dashboards.labelValue "1" (not (empty $.Values.grafana.sidecar.dashboards.labelValue)) | quote }} + {{- end }} + app: {{ template "kube-prometheus-stack.name" $ }}-grafana +{{ include "kube-prometheus-stack.labels" $ | indent 4 }} +data: + k8s-resources-windows-namespace.json: |- + {{`{"__inputs":[],"__requires":[],"annotations":{"list":[]},"editable":`}}{{ .Values.grafana.defaultDashboardsEditable }}{{`,"gnetId":null,"graphTooltip":0,"hideControls":false,"id":null,"links":[],"refresh":"","rows":[{"collapse":false,"height":"250px","panels":[{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"$datasource","fill":10,"id":2,"legend":{"avg":false,"current":false,"max":false,"min":false,"show":true,"total":false,"values":false},"lines":true,"linewidth":0,"links":[],"nullPointMode":"null as zero","percentage":false,"pointradius":5,"points":false,"renderer":"flot","seriesOverrides":[],"spaceLength":10,"span":12,"stack":true,"steppedLine":false,"targets":[{"expr":"sum(namespace_pod_container:windows_container_cpu_usage_seconds_total:sum_rate{cluster=\"$cluster\", namespace=\"$namespace\"}) by (pod)","format":"time_series","legendFormat":"{{pod}}","legendLink":null}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"CPU Usage","tooltip":{"shared":true,"sort":2,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"short","label":null,"logBase":1,"max":null,"min":0,"show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":false}]}],"repeat":null,"repeatIteration":null,"repeatRowId":null,"showTitle":true,"title":"CPU Usage","titleSize":"h6"},{"collapse":false,"height":"250px","panels":[{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"$datasource","fill":1,"id":3,"legend":{"avg":false,"current":false,"max":false,"min":false,"show":true,"total":false,"values":false},"lines":true,"linewidth":1,"links":[],"nullPointMode":"null as zero","percentage":false,"pointradius":5,"points":false,"renderer":"flot","seriesOverrides":[],"spaceLength":10,"span":12,"stack":false,"steppedLine":false,"styles":[{"alias":"Time","dateFormat":"YYYY-MM-DD HH:mm:ss","pattern":"Time","type":"hidden"},{"alias":"CPU Usage","colorMode":null,"colors":[],"dateFormat":"YYYY-MM-DD HH:mm:ss","decimals":2,"link":false,"linkTargetBlank":false,"linkTooltip":"Drill down","linkUrl":"","pattern":"Value #A","thresholds":[],"type":"number","unit":"short"},{"alias":"CPU Requests","colorMode":null,"colors":[],"dateFormat":"YYYY-MM-DD HH:mm:ss","decimals":2,"link":false,"linkTargetBlank":false,"linkTooltip":"Drill down","linkUrl":"","pattern":"Value #B","thresholds":[],"type":"number","unit":"short"},{"alias":"CPU Requests %","colorMode":null,"colors":[],"dateFormat":"YYYY-MM-DD HH:mm:ss","decimals":2,"link":false,"linkTargetBlank":false,"linkTooltip":"Drill down","linkUrl":"","pattern":"Value #C","thresholds":[],"type":"number","unit":"percentunit"},{"alias":"CPU Limits","colorMode":null,"colors":[],"dateFormat":"YYYY-MM-DD HH:mm:ss","decimals":2,"link":false,"linkTargetBlank":false,"linkTooltip":"Drill down","linkUrl":"","pattern":"Value #D","thresholds":[],"type":"number","unit":"short"},{"alias":"CPU Limits %","colorMode":null,"colors":[],"dateFormat":"YYYY-MM-DD HH:mm:ss","decimals":2,"link":false,"linkTargetBlank":false,"linkTooltip":"Drill down","linkUrl":"","pattern":"Value #E","thresholds":[],"type":"number","unit":"percentunit"},{"alias":"Pod","colorMode":null,"colors":[],"dateFormat":"YYYY-MM-DD HH:mm:ss","decimals":2,"link":true,"linkTargetBlank":false,"linkTooltip":"Drill down","linkUrl":"/d/40597a704a610e936dc6ed374a7ce023/k8s-resources-windows-pod?var-datasource=$datasource&var-namespace=$namespace&var-pod=`}}{{ if .Values.grafana.sidecar.dashboards.enableNewTablePanelSyntax }}${__value.text}{{ else }}$__cell{{ end }}{{`","pattern":"pod","thresholds":[],"type":"number","unit":"short"},{"alias":"","colorMode":null,"colors":[],"dateFormat":"YYYY-MM-DD HH:mm:ss","decimals":2,"pattern":"/.*/","thresholds":[],"type":"string","unit":"short"}],"targets":[{"expr":"sum(namespace_pod_container:windows_container_cpu_usage_seconds_total:sum_rate{cluster=\"$cluster\", namespace=\"$namespace\"}) by (pod)","format":"table","instant":true,"legendFormat":"","refId":"A"},{"expr":"sum(kube_pod_windows_container_resource_cpu_cores_request{cluster=\"$cluster\", namespace=\"$namespace\"}) by (pod)","format":"table","instant":true,"legendFormat":"","refId":"B"},{"expr":"sum(namespace_pod_container:windows_container_cpu_usage_seconds_total:sum_rate{cluster=\"$cluster\", namespace=\"$namespace\"}) by (pod) / sum(kube_pod_windows_container_resource_cpu_cores_request{cluster=\"$cluster\", namespace=\"$namespace\"}) by (pod)","format":"table","instant":true,"legendFormat":"","refId":"C"},{"expr":"sum(kube_pod_windows_container_resource_cpu_cores_limit{cluster=\"$cluster\", namespace=\"$namespace\"}) by (pod)","format":"table","instant":true,"legendFormat":"","refId":"D"},{"expr":"sum(namespace_pod_container:windows_container_cpu_usage_seconds_total:sum_rate{cluster=\"$cluster\", namespace=\"$namespace\"}) by (pod) / sum(kube_pod_windows_container_resource_cpu_cores_limit{cluster=\"$cluster\", namespace=\"$namespace\"}) by (pod)","format":"table","instant":true,"legendFormat":"","refId":"E"}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"CPU Quota","tooltip":{"shared":true,"sort":2,"value_type":"individual"},"transform":"table","type":"table","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"short","label":null,"logBase":1,"max":null,"min":0,"show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":false}]}],"repeat":null,"repeatIteration":null,"repeatRowId":null,"showTitle":true,"title":"CPU Quota","titleSize":"h6"},{"collapse":false,"height":"250px","panels":[{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"$datasource","fill":10,"id":4,"legend":{"avg":false,"current":false,"max":false,"min":false,"show":true,"total":false,"values":false},"lines":true,"linewidth":0,"links":[],"nullPointMode":"null as zero","percentage":false,"pointradius":5,"points":false,"renderer":"flot","seriesOverrides":[],"spaceLength":10,"span":12,"stack":true,"steppedLine":false,"targets":[{"expr":"sum(windows_container_private_working_set_usage{cluster=\"$cluster\", namespace=\"$namespace\"}) by (pod)","format":"time_series","legendFormat":"{{pod}}","legendLink":null}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"Memory Usage","tooltip":{"shared":true,"sort":2,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"decbytes","label":null,"logBase":1,"max":null,"min":0,"show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":false}]}],"repeat":null,"repeatIteration":null,"repeatRowId":null,"showTitle":true,"title":"Memory Usage","titleSize":"h6"},{"collapse":false,"height":"250px","panels":[{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"$datasource","fill":1,"id":5,"legend":{"avg":false,"current":false,"max":false,"min":false,"show":true,"total":false,"values":false},"lines":true,"linewidth":1,"links":[],"nullPointMode":"null as zero","percentage":false,"pointradius":5,"points":false,"renderer":"flot","seriesOverrides":[],"spaceLength":10,"span":12,"stack":false,"steppedLine":false,"styles":[{"alias":"Time","dateFormat":"YYYY-MM-DD HH:mm:ss","pattern":"Time","type":"hidden"},{"alias":"Memory Usage","colorMode":null,"colors":[],"dateFormat":"YYYY-MM-DD HH:mm:ss","decimals":2,"link":false,"linkTargetBlank":false,"linkTooltip":"Drill down","linkUrl":"","pattern":"Value #A","thresholds":[],"type":"number","unit":"decbytes"},{"alias":"Memory Requests","colorMode":null,"colors":[],"dateFormat":"YYYY-MM-DD HH:mm:ss","decimals":2,"link":false,"linkTargetBlank":false,"linkTooltip":"Drill down","linkUrl":"","pattern":"Value #B","thresholds":[],"type":"number","unit":"decbytes"},{"alias":"Memory Requests %","colorMode":null,"colors":[],"dateFormat":"YYYY-MM-DD HH:mm:ss","decimals":2,"link":false,"linkTargetBlank":false,"linkTooltip":"Drill down","linkUrl":"","pattern":"Value #C","thresholds":[],"type":"number","unit":"percentunit"},{"alias":"Memory Limits","colorMode":null,"colors":[],"dateFormat":"YYYY-MM-DD HH:mm:ss","decimals":2,"link":false,"linkTargetBlank":false,"linkTooltip":"Drill down","linkUrl":"","pattern":"Value #D","thresholds":[],"type":"number","unit":"decbytes"},{"alias":"Memory Limits %","colorMode":null,"colors":[],"dateFormat":"YYYY-MM-DD HH:mm:ss","decimals":2,"link":false,"linkTargetBlank":false,"linkTooltip":"Drill down","linkUrl":"","pattern":"Value #E","thresholds":[],"type":"number","unit":"percentunit"},{"alias":"Pod","colorMode":null,"colors":[],"dateFormat":"YYYY-MM-DD HH:mm:ss","decimals":2,"link":true,"linkTargetBlank":false,"linkTooltip":"Drill down","linkUrl":"/d/40597a704a610e936dc6ed374a7ce023/k8s-resources-windows-pod?var-datasource=$datasource&var-namespace=$namespace&var-pod=`}}{{ if .Values.grafana.sidecar.dashboards.enableNewTablePanelSyntax }}${__value.text}{{ else }}$__cell{{ end }}{{`","pattern":"pod","thresholds":[],"type":"number","unit":"short"},{"alias":"","colorMode":null,"colors":[],"dateFormat":"YYYY-MM-DD HH:mm:ss","decimals":2,"pattern":"/.*/","thresholds":[],"type":"string","unit":"short"}],"targets":[{"expr":"sum(windows_container_private_working_set_usage{cluster=\"$cluster\", namespace=\"$namespace\"}) by (pod)","format":"table","instant":true,"legendFormat":"","refId":"A"},{"expr":"sum(kube_pod_windows_container_resource_memory_request{cluster=\"$cluster\", namespace=\"$namespace\"}) by (pod)","format":"table","instant":true,"legendFormat":"","refId":"B"},{"expr":"sum(windows_container_private_working_set_usage{cluster=\"$cluster\", namespace=\"$namespace\"}) by (pod) / sum(kube_pod_windows_container_resource_memory_request{cluster=\"$cluster\", namespace=\"$namespace\"}) by (pod)","format":"table","instant":true,"legendFormat":"","refId":"C"},{"expr":"sum(kube_pod_windows_container_resource_memory_limit{cluster=\"$cluster\", namespace=\"$namespace\"}) by (pod)","format":"table","instant":true,"legendFormat":"","refId":"D"},{"expr":"sum(windows_container_private_working_set_usage{cluster=\"$cluster\", namespace=\"$namespace\"}) by (pod) / sum(kube_pod_windows_container_resource_memory_limit{cluster=\"$cluster\", namespace=\"$namespace\"}) by (pod)","format":"table","instant":true,"legendFormat":"","refId":"E"}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"Memory Quota","tooltip":{"shared":true,"sort":2,"value_type":"individual"},"transform":"table","type":"table","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"short","label":null,"logBase":1,"max":null,"min":0,"show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":false}]}],"repeat":null,"repeatIteration":null,"repeatRowId":null,"showTitle":true,"title":"Memory Quota","titleSize":"h6"}],"schemaVersion":14,"style":"dark","tags":["kubernetes-mixin"],"templating":{"list":[{"current":{"selected":true,"text":"default","value":"default"},"hide":0,"label":null,"name":"datasource","options":[],"query":"prometheus","refresh":1,"regex":"","type":"datasource"},{"allValue":null,"current":{},"datasource":"$datasource","hide":`}}{{ if .Values.grafana.sidecar.dashboards.multicluster.global.enabled }}0{{ else }}2{{ end }}{{`,"includeAll":false,"label":"cluster","multi":false,"name":"cluster","options":[],"query":"label_values(up{job=\"windows-exporter\"}, cluster)","refresh":2,"regex":"","sort":1,"tagValuesQuery":"","tags":[],"tagsQuery":"","type":"query","useTags":false},{"allValue":null,"current":{},"datasource":"$datasource","hide":0,"includeAll":false,"label":"Namespace","multi":false,"name":"namespace","options":[],"query":"label_values(windows_pod_container_available{cluster=\"$cluster\"}, namespace)","refresh":2,"regex":"","sort":1,"tagValuesQuery":"","tags":[],"tagsQuery":"","type":"query","useTags":false}]},"time":{"from":"now-6h","to":"now"},"timepicker":{"refresh_intervals":["5s","10s","30s","1m","5m","15m","30m","1h","2h","1d"],"time_options":["5m","15m","1h","6h","12h","24h","2d","7d","30d"]},"timezone": "`}}{{ .Values.grafana.defaultDashboardsTimezone }}{{`","title":"Kubernetes / Compute Resources / Namespace(Windows)","uid":"490b402361724ab1d4c45666c1fa9b6f","version":0}`}} +{{- end }} \ No newline at end of file diff --git a/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/templates/grafana/dashboards-1.14/k8s-resources-windows-pod.yaml b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/templates/grafana/dashboards-1.14/k8s-resources-windows-pod.yaml new file mode 100644 index 0000000000..6686e54053 --- /dev/null +++ b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/templates/grafana/dashboards-1.14/k8s-resources-windows-pod.yaml @@ -0,0 +1,24 @@ +{{- /* +Generated from 'k8s-resources-windows-pod' from https://github.com/kubernetes-monitoring/kubernetes-mixin.git +Do not change in-place! In order to change this file first read following link: +https://github.com/prometheus-community/helm-charts/tree/main/charts/kube-prometheus-stack/hack +*/ -}} +{{- $kubeTargetVersion := default .Capabilities.KubeVersion.GitVersion .Values.kubeTargetVersionOverride }} +{{- if and (or .Values.grafana.enabled .Values.grafana.forceDeployDashboards) (semverCompare ">=1.14.0-0" $kubeTargetVersion) (semverCompare "<9.9.9-9" $kubeTargetVersion) .Values.grafana.defaultDashboardsEnabled .Values.windowsMonitoring.enabled }} +apiVersion: v1 +kind: ConfigMap +metadata: + namespace: {{ template "kube-prometheus-stack-grafana.namespace" . }} + name: {{ printf "%s-%s" (include "kube-prometheus-stack.fullname" $) "k8s-resources-windows-pod" | trunc 63 | trimSuffix "-" }} + annotations: +{{ toYaml .Values.grafana.sidecar.dashboards.annotations | indent 4 }} + labels: + {{- if $.Values.grafana.sidecar.dashboards.label }} + {{ $.Values.grafana.sidecar.dashboards.label }}: {{ ternary $.Values.grafana.sidecar.dashboards.labelValue "1" (not (empty $.Values.grafana.sidecar.dashboards.labelValue)) | quote }} + {{- end }} + app: {{ template "kube-prometheus-stack.name" $ }}-grafana +{{ include "kube-prometheus-stack.labels" $ | indent 4 }} +data: + k8s-resources-windows-pod.json: |- + {{`{"__inputs":[],"__requires":[],"annotations":{"list":[]},"editable":`}}{{ .Values.grafana.defaultDashboardsEditable }}{{`,"gnetId":null,"graphTooltip":0,"hideControls":false,"id":null,"links":[],"refresh":"","rows":[{"collapse":false,"height":"250px","panels":[{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"$datasource","fill":10,"id":2,"legend":{"avg":false,"current":false,"max":false,"min":false,"show":true,"total":false,"values":false},"lines":true,"linewidth":0,"links":[],"nullPointMode":"null as zero","percentage":false,"pointradius":5,"points":false,"renderer":"flot","seriesOverrides":[],"spaceLength":10,"span":12,"stack":true,"steppedLine":false,"targets":[{"expr":"sum(namespace_pod_container:windows_container_cpu_usage_seconds_total:sum_rate{cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\"}) by (container)","format":"time_series","legendFormat":"{{container}}","legendLink":null}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"CPU Usage","tooltip":{"shared":true,"sort":2,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"short","label":null,"logBase":1,"max":null,"min":0,"show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":false}]}],"repeat":null,"repeatIteration":null,"repeatRowId":null,"showTitle":true,"title":"CPU Usage","titleSize":"h6"},{"collapse":false,"height":"250px","panels":[{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"$datasource","fill":1,"id":3,"legend":{"avg":false,"current":false,"max":false,"min":false,"show":true,"total":false,"values":false},"lines":true,"linewidth":1,"links":[],"nullPointMode":"null as zero","percentage":false,"pointradius":5,"points":false,"renderer":"flot","seriesOverrides":[],"spaceLength":10,"span":12,"stack":false,"steppedLine":false,"styles":[{"alias":"Time","dateFormat":"YYYY-MM-DD HH:mm:ss","pattern":"Time","type":"hidden"},{"alias":"CPU Usage","colorMode":null,"colors":[],"dateFormat":"YYYY-MM-DD HH:mm:ss","decimals":2,"link":false,"linkTargetBlank":false,"linkTooltip":"Drill down","linkUrl":"","pattern":"Value #A","thresholds":[],"type":"number","unit":"short"},{"alias":"CPU Requests","colorMode":null,"colors":[],"dateFormat":"YYYY-MM-DD HH:mm:ss","decimals":2,"link":false,"linkTargetBlank":false,"linkTooltip":"Drill down","linkUrl":"","pattern":"Value #B","thresholds":[],"type":"number","unit":"short"},{"alias":"CPU Requests %","colorMode":null,"colors":[],"dateFormat":"YYYY-MM-DD HH:mm:ss","decimals":2,"link":false,"linkTargetBlank":false,"linkTooltip":"Drill down","linkUrl":"","pattern":"Value #C","thresholds":[],"type":"number","unit":"percentunit"},{"alias":"CPU Limits","colorMode":null,"colors":[],"dateFormat":"YYYY-MM-DD HH:mm:ss","decimals":2,"link":false,"linkTargetBlank":false,"linkTooltip":"Drill down","linkUrl":"","pattern":"Value #D","thresholds":[],"type":"number","unit":"short"},{"alias":"CPU Limits %","colorMode":null,"colors":[],"dateFormat":"YYYY-MM-DD HH:mm:ss","decimals":2,"link":false,"linkTargetBlank":false,"linkTooltip":"Drill down","linkUrl":"","pattern":"Value #E","thresholds":[],"type":"number","unit":"percentunit"},{"alias":"Container","colorMode":null,"colors":[],"dateFormat":"YYYY-MM-DD HH:mm:ss","decimals":2,"link":false,"linkTargetBlank":false,"linkTooltip":"Drill down","linkUrl":"","pattern":"container","thresholds":[],"type":"number","unit":"short"},{"alias":"","colorMode":null,"colors":[],"dateFormat":"YYYY-MM-DD HH:mm:ss","decimals":2,"pattern":"/.*/","thresholds":[],"type":"string","unit":"short"}],"targets":[{"expr":"sum(namespace_pod_container:windows_container_cpu_usage_seconds_total:sum_rate{cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\"}) by (container)","format":"table","instant":true,"legendFormat":"","refId":"A"},{"expr":"sum(kube_pod_windows_container_resource_cpu_cores_request{cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\"}) by (container)","format":"table","instant":true,"legendFormat":"","refId":"B"},{"expr":"sum(namespace_pod_container:windows_container_cpu_usage_seconds_total:sum_rate{cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\"}) by (container) / sum(kube_pod_windows_container_resource_cpu_cores_request{cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\"}) by (container)","format":"table","instant":true,"legendFormat":"","refId":"C"},{"expr":"sum(kube_pod_windows_container_resource_cpu_cores_limit{cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\"}) by (container)","format":"table","instant":true,"legendFormat":"","refId":"D"},{"expr":"sum(namespace_pod_container:windows_container_cpu_usage_seconds_total:sum_rate{cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\"}) by (container) / sum(kube_pod_windows_container_resource_cpu_cores_limit{cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\"}) by (container)","format":"table","instant":true,"legendFormat":"","refId":"E"}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"CPU Quota","tooltip":{"shared":true,"sort":2,"value_type":"individual"},"transform":"table","type":"table","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"short","label":null,"logBase":1,"max":null,"min":0,"show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":false}]}],"repeat":null,"repeatIteration":null,"repeatRowId":null,"showTitle":true,"title":"CPU Quota","titleSize":"h6"},{"collapse":false,"height":"250px","panels":[{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"$datasource","fill":10,"id":4,"legend":{"avg":false,"current":false,"max":false,"min":false,"show":true,"total":false,"values":false},"lines":true,"linewidth":0,"links":[],"nullPointMode":"null as zero","percentage":false,"pointradius":5,"points":false,"renderer":"flot","seriesOverrides":[],"spaceLength":10,"span":12,"stack":true,"steppedLine":false,"targets":[{"expr":"sum(windows_container_private_working_set_usage{cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\"}) by (container)","format":"time_series","legendFormat":"{{container}}","legendLink":null}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"Memory Usage","tooltip":{"shared":true,"sort":2,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"short","label":null,"logBase":1,"max":null,"min":0,"show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":false}]}],"repeat":null,"repeatIteration":null,"repeatRowId":null,"showTitle":true,"title":"Memory Usage","titleSize":"h6"},{"collapse":false,"height":"250px","panels":[{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"$datasource","fill":1,"id":5,"legend":{"avg":false,"current":false,"max":false,"min":false,"show":true,"total":false,"values":false},"lines":true,"linewidth":1,"links":[],"nullPointMode":"null as zero","percentage":false,"pointradius":5,"points":false,"renderer":"flot","seriesOverrides":[],"spaceLength":10,"span":12,"stack":false,"steppedLine":false,"styles":[{"alias":"Time","dateFormat":"YYYY-MM-DD HH:mm:ss","pattern":"Time","type":"hidden"},{"alias":"Memory Usage","colorMode":null,"colors":[],"dateFormat":"YYYY-MM-DD HH:mm:ss","decimals":2,"link":false,"linkTargetBlank":false,"linkTooltip":"Drill down","linkUrl":"","pattern":"Value #A","thresholds":[],"type":"number","unit":"decbytes"},{"alias":"Memory Requests","colorMode":null,"colors":[],"dateFormat":"YYYY-MM-DD HH:mm:ss","decimals":2,"link":false,"linkTargetBlank":false,"linkTooltip":"Drill down","linkUrl":"","pattern":"Value #B","thresholds":[],"type":"number","unit":"decbytes"},{"alias":"Memory Requests %","colorMode":null,"colors":[],"dateFormat":"YYYY-MM-DD HH:mm:ss","decimals":2,"link":false,"linkTargetBlank":false,"linkTooltip":"Drill down","linkUrl":"","pattern":"Value #C","thresholds":[],"type":"number","unit":"percentunit"},{"alias":"Memory Limits","colorMode":null,"colors":[],"dateFormat":"YYYY-MM-DD HH:mm:ss","decimals":2,"link":false,"linkTargetBlank":false,"linkTooltip":"Drill down","linkUrl":"","pattern":"Value #D","thresholds":[],"type":"number","unit":"decbytes"},{"alias":"Memory Limits %","colorMode":null,"colors":[],"dateFormat":"YYYY-MM-DD HH:mm:ss","decimals":2,"link":false,"linkTargetBlank":false,"linkTooltip":"Drill down","linkUrl":"","pattern":"Value #E","thresholds":[],"type":"number","unit":"percentunit"},{"alias":"Container","colorMode":null,"colors":[],"dateFormat":"YYYY-MM-DD HH:mm:ss","decimals":2,"link":false,"linkTargetBlank":false,"linkTooltip":"Drill down","linkUrl":"","pattern":"container","thresholds":[],"type":"number","unit":"short"},{"alias":"","colorMode":null,"colors":[],"dateFormat":"YYYY-MM-DD HH:mm:ss","decimals":2,"pattern":"/.*/","thresholds":[],"type":"string","unit":"short"}],"targets":[{"expr":"sum(windows_container_private_working_set_usage{cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\"}) by (container)","format":"table","instant":true,"legendFormat":"","refId":"A"},{"expr":"sum(kube_pod_windows_container_resource_memory_request{cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\"}) by (container)","format":"table","instant":true,"legendFormat":"","refId":"B"},{"expr":"sum(windows_container_private_working_set_usage{cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\"}) by (container) / sum(kube_pod_windows_container_resource_memory_request{cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\"}) by (container)","format":"table","instant":true,"legendFormat":"","refId":"C"},{"expr":"sum(kube_pod_windows_container_resource_memory_limit{cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\"}) by (container)","format":"table","instant":true,"legendFormat":"","refId":"D"},{"expr":"sum(windows_container_private_working_set_usage{cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\"}) by (container) / sum(kube_pod_windows_container_resource_memory_limit{cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\"}) by (container)","format":"table","instant":true,"legendFormat":"","refId":"E"}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"Memory Quota","tooltip":{"shared":true,"sort":2,"value_type":"individual"},"transform":"table","type":"table","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"short","label":null,"logBase":1,"max":null,"min":0,"show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":false}]}],"repeat":null,"repeatIteration":null,"repeatRowId":null,"showTitle":true,"title":"Memory Quota","titleSize":"h6"},{"collapse":false,"height":"250px","panels":[{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"$datasource","fill":1,"fillGradient":0,"id":6,"legend":{"alignAsTable":true,"avg":true,"current":true,"max":false,"min":false,"rightSide":true,"show":true,"sideWidth":null,"total":false,"values":false},"lines":true,"linewidth":1,"links":[],"nullPointMode":"null","percentage":false,"pointradius":5,"points":false,"renderer":"flot","repeat":null,"seriesOverrides":[],"spaceLength":10,"span":12,"stack":false,"steppedLine":false,"targets":[{"expr":"sort_desc(sum by (container) (rate(windows_container_network_received_bytes_total{cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\"}[1m])))","format":"time_series","intervalFactor":2,"legendFormat":"Received : {{ container }}","refId":"A"},{"expr":"sort_desc(sum by (container) (rate(windows_container_network_transmitted_bytes_total{cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\"}[1m])))","format":"time_series","intervalFactor":2,"legendFormat":"Transmitted : {{ container }}","refId":"B"}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"Network I/O","tooltip":{"shared":true,"sort":0,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"bytes","label":null,"logBase":1,"max":null,"min":0,"show":true},{"format":"bytes","label":null,"logBase":1,"max":null,"min":0,"show":true}]}],"repeat":null,"repeatIteration":null,"repeatRowId":null,"showTitle":true,"title":"Network I/O","titleSize":"h6"}],"schemaVersion":14,"style":"dark","tags":["kubernetes-mixin"],"templating":{"list":[{"current":{"text":"default","value":"default"},"hide":0,"label":null,"name":"datasource","options":[],"query":"prometheus","refresh":1,"regex":"","type":"datasource"},{"allValue":null,"current":{},"datasource":"$datasource","hide":`}}{{ if .Values.grafana.sidecar.dashboards.multicluster.global.enabled }}0{{ else }}2{{ end }}{{`,"includeAll":false,"label":"cluster","multi":false,"name":"cluster","options":[],"query":"label_values(up{job=\"windows-exporter\"}, cluster)","refresh":2,"regex":"","sort":1,"tagValuesQuery":"","tags":[],"tagsQuery":"","type":"query","useTags":false},{"allValue":null,"current":{},"datasource":"$datasource","hide":0,"includeAll":false,"label":"Namespace","multi":false,"name":"namespace","options":[],"query":"label_values(windows_pod_container_available{cluster=\"$cluster\"}, namespace)","refresh":2,"regex":"","sort":1,"tagValuesQuery":"","tags":[],"tagsQuery":"","type":"query","useTags":false},{"allValue":null,"current":{},"datasource":"$datasource","hide":0,"includeAll":false,"label":"Pod","multi":false,"name":"pod","options":[],"query":"label_values(windows_pod_container_available{cluster=\"$cluster\",namespace=\"$namespace\"}, pod)","refresh":2,"regex":"","sort":1,"tagValuesQuery":"","tags":[],"tagsQuery":"","type":"query","useTags":false}]},"time":{"from":"now-6h","to":"now"},"timepicker":{"refresh_intervals":["5s","10s","30s","1m","5m","15m","30m","1h","2h","1d"],"time_options":["5m","15m","1h","6h","12h","24h","2d","7d","30d"]},"timezone": "`}}{{ .Values.grafana.defaultDashboardsTimezone }}{{`","title":"Kubernetes / Compute Resources / Pod(Windows)","uid":"40597a704a610e936dc6ed374a7ce023","version":0}`}} +{{- end }} \ No newline at end of file diff --git a/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/templates/grafana/dashboards-1.14/k8s-resources-workload.yaml b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/templates/grafana/dashboards-1.14/k8s-resources-workload.yaml new file mode 100644 index 0000000000..e2a63ae208 --- /dev/null +++ b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/templates/grafana/dashboards-1.14/k8s-resources-workload.yaml @@ -0,0 +1,2024 @@ +{{- /* +Generated from 'k8s-resources-workload' from https://raw.githubusercontent.com/prometheus-operator/kube-prometheus/a8ba97a150c75be42010c75d10b720c55e182f1a/manifests/grafana-dashboardDefinitions.yaml +Do not change in-place! In order to change this file first read following link: +https://github.com/prometheus-community/helm-charts/tree/main/charts/kube-prometheus-stack/hack +*/ -}} +{{- $kubeTargetVersion := default .Capabilities.KubeVersion.GitVersion .Values.kubeTargetVersionOverride }} +{{- if and (or .Values.grafana.enabled .Values.grafana.forceDeployDashboards) (semverCompare ">=1.14.0-0" $kubeTargetVersion) (semverCompare "<9.9.9-9" $kubeTargetVersion) .Values.grafana.defaultDashboardsEnabled }} +apiVersion: v1 +kind: ConfigMap +metadata: + namespace: {{ .Values.grafana.defaultDashboards.namespace }} + name: {{ printf "%s-%s" (include "kube-prometheus-stack.fullname" $) "k8s-resources-workload" | trunc 63 | trimSuffix "-" }} + annotations: +{{ toYaml .Values.grafana.sidecar.dashboards.annotations | indent 4 }} + labels: + {{- if $.Values.grafana.sidecar.dashboards.label }} + {{ $.Values.grafana.sidecar.dashboards.label }}: {{ ternary $.Values.grafana.sidecar.dashboards.labelValue "1" (not (empty $.Values.grafana.sidecar.dashboards.labelValue)) | quote }} + {{- end }} + app: {{ template "kube-prometheus-stack.name" $ }}-grafana +{{ include "kube-prometheus-stack.labels" $ | indent 4 }} +data: + k8s-resources-workload.json: |- + { + "annotations": { + "list": [ + + ] + }, + "editable": true, + "gnetId": null, + "graphTooltip": 0, + "hideControls": false, + "links": [ + + ], + "refresh": "10s", + "rows": [ + { + "collapse": false, + "height": "250px", + "panels": [ + { + "aliasColors": { + + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 10, + "id": 1, + "interval": "1m", + "legend": { + "alignAsTable": true, + "avg": false, + "current": false, + "max": false, + "min": false, + "rightSide": true, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 0, + "links": [ + + ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ + + ], + "spaceLength": 10, + "span": 12, + "stack": true, + "steppedLine": false, + "targets": [ + { + "expr": "sum(\n node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{cluster=\"$cluster\", namespace=\"$namespace\"}\n * on(namespace,pod)\n group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload=\"$workload\", workload_type=\"$type\"}\n) by (pod)\n", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{`{{`}}pod{{`}}`}}", + "legendLink": null, + "step": 10 + } + ], + "thresholds": [ + + ], + "timeFrom": null, + "timeShift": null, + "title": "CPU Usage", + "tooltip": { + "shared": false, + "sort": 2, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ + + ] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "CPU Usage", + "titleSize": "h6" + }, + { + "collapse": false, + "height": "250px", + "panels": [ + { + "aliasColors": { + + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "id": 2, + "interval": "1m", + "legend": { + "alignAsTable": true, + "avg": false, + "current": false, + "max": false, + "min": false, + "rightSide": true, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ + + ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ + + ], + "spaceLength": 10, + "span": 12, + "stack": false, + "steppedLine": false, + "styles": [ + { + "alias": "Time", + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "pattern": "Time", + "type": "hidden" + }, + { + "alias": "CPU Usage", + "colorMode": null, + "colors": [ + + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "link": false, + "linkTargetBlank": false, + "linkTooltip": "Drill down", + "linkUrl": "", + "pattern": "Value #A", + "thresholds": [ + + ], + "type": "number", + "unit": "short" + }, + { + "alias": "CPU Requests", + "colorMode": null, + "colors": [ + + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "link": false, + "linkTargetBlank": false, + "linkTooltip": "Drill down", + "linkUrl": "", + "pattern": "Value #B", + "thresholds": [ + + ], + "type": "number", + "unit": "short" + }, + { + "alias": "CPU Requests %", + "colorMode": null, + "colors": [ + + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "link": false, + "linkTargetBlank": false, + "linkTooltip": "Drill down", + "linkUrl": "", + "pattern": "Value #C", + "thresholds": [ + + ], + "type": "number", + "unit": "percentunit" + }, + { + "alias": "CPU Limits", + "colorMode": null, + "colors": [ + + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "link": false, + "linkTargetBlank": false, + "linkTooltip": "Drill down", + "linkUrl": "", + "pattern": "Value #D", + "thresholds": [ + + ], + "type": "number", + "unit": "short" + }, + { + "alias": "CPU Limits %", + "colorMode": null, + "colors": [ + + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "link": false, + "linkTargetBlank": false, + "linkTooltip": "Drill down", + "linkUrl": "", + "pattern": "Value #E", + "thresholds": [ + + ], + "type": "number", + "unit": "percentunit" + }, + { + "alias": "Pod", + "colorMode": null, + "colors": [ + + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "link": true, + "linkTargetBlank": false, + "linkTooltip": "Drill down", + "linkUrl": "d/6581e46e4e5c7ba40a07646395ef7b23/k8s-resources-pod?var-datasource=$datasource&var-cluster=$cluster&var-namespace=$namespace&var-pod=$__cell", + "pattern": "pod", + "thresholds": [ + + ], + "type": "number", + "unit": "short" + }, + { + "alias": "", + "colorMode": null, + "colors": [ + + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "pattern": "/.*/", + "thresholds": [ + + ], + "type": "string", + "unit": "short" + } + ], + "targets": [ + { + "expr": "sum(\n node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{cluster=\"$cluster\", namespace=\"$namespace\"}\n * on(namespace,pod)\n group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload=\"$workload\", workload_type=\"$type\"}\n) by (pod)\n", + "format": "table", + "instant": true, + "intervalFactor": 2, + "legendFormat": "", + "refId": "A", + "step": 10 + }, + { + "expr": "sum(\n kube_pod_container_resource_requests{job=\"kube-state-metrics\", cluster=\"$cluster\", namespace=\"$namespace\", resource=\"cpu\"}\n * on(namespace,pod)\n group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload=\"$workload\", workload_type=\"$type\"}\n) by (pod)\n", + "format": "table", + "instant": true, + "intervalFactor": 2, + "legendFormat": "", + "refId": "B", + "step": 10 + }, + { + "expr": "sum(\n node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{cluster=\"$cluster\", namespace=\"$namespace\"}\n * on(namespace,pod)\n group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload=\"$workload\", workload_type=\"$type\"}\n) by (pod)\n/sum(\n kube_pod_container_resource_requests{job=\"kube-state-metrics\", cluster=\"$cluster\", namespace=\"$namespace\", resource=\"cpu\"}\n * on(namespace,pod)\n group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload=\"$workload\", workload_type=\"$type\"}\n) by (pod)\n", + "format": "table", + "instant": true, + "intervalFactor": 2, + "legendFormat": "", + "refId": "C", + "step": 10 + }, + { + "expr": "sum(\n kube_pod_container_resource_limits{job=\"kube-state-metrics\", cluster=\"$cluster\", namespace=\"$namespace\", resource=\"cpu\"}\n * on(namespace,pod)\n group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload=\"$workload\", workload_type=\"$type\"}\n) by (pod)\n", + "format": "table", + "instant": true, + "intervalFactor": 2, + "legendFormat": "", + "refId": "D", + "step": 10 + }, + { + "expr": "sum(\n node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{cluster=\"$cluster\", namespace=\"$namespace\"}\n * on(namespace,pod)\n group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload=\"$workload\", workload_type=\"$type\"}\n) by (pod)\n/sum(\n kube_pod_container_resource_limits{job=\"kube-state-metrics\", cluster=\"$cluster\", namespace=\"$namespace\", resource=\"cpu\"}\n * on(namespace,pod)\n group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload=\"$workload\", workload_type=\"$type\"}\n) by (pod)\n", + "format": "table", + "instant": true, + "intervalFactor": 2, + "legendFormat": "", + "refId": "E", + "step": 10 + } + ], + "thresholds": [ + + ], + "timeFrom": null, + "timeShift": null, + "title": "CPU Quota", + "tooltip": { + "shared": false, + "sort": 2, + "value_type": "individual" + }, + "transform": "table", + "type": "table", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ + + ] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "CPU Quota", + "titleSize": "h6" + }, + { + "collapse": false, + "height": "250px", + "panels": [ + { + "aliasColors": { + + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 10, + "id": 3, + "interval": "1m", + "legend": { + "alignAsTable": true, + "avg": false, + "current": false, + "max": false, + "min": false, + "rightSide": true, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 0, + "links": [ + + ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ + + ], + "spaceLength": 10, + "span": 12, + "stack": true, + "steppedLine": false, + "targets": [ + { + "expr": "sum(\n container_memory_working_set_bytes{cluster=\"$cluster\", namespace=\"$namespace\", container!=\"\", image!=\"\"}\n * on(namespace,pod)\n group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload=\"$workload\", workload_type=\"$type\"}\n) by (pod)\n", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{`{{`}}pod{{`}}`}}", + "legendLink": null, + "step": 10 + } + ], + "thresholds": [ + + ], + "timeFrom": null, + "timeShift": null, + "title": "Memory Usage", + "tooltip": { + "shared": false, + "sort": 2, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ + + ] + }, + "yaxes": [ + { + "format": "bytes", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Memory Usage", + "titleSize": "h6" + }, + { + "collapse": false, + "height": "250px", + "panels": [ + { + "aliasColors": { + + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "id": 4, + "interval": "1m", + "legend": { + "alignAsTable": true, + "avg": false, + "current": false, + "max": false, + "min": false, + "rightSide": true, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ + + ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ + + ], + "spaceLength": 10, + "span": 12, + "stack": false, + "steppedLine": false, + "styles": [ + { + "alias": "Time", + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "pattern": "Time", + "type": "hidden" + }, + { + "alias": "Memory Usage", + "colorMode": null, + "colors": [ + + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "link": false, + "linkTargetBlank": false, + "linkTooltip": "Drill down", + "linkUrl": "", + "pattern": "Value #A", + "thresholds": [ + + ], + "type": "number", + "unit": "bytes" + }, + { + "alias": "Memory Requests", + "colorMode": null, + "colors": [ + + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "link": false, + "linkTargetBlank": false, + "linkTooltip": "Drill down", + "linkUrl": "", + "pattern": "Value #B", + "thresholds": [ + + ], + "type": "number", + "unit": "bytes" + }, + { + "alias": "Memory Requests %", + "colorMode": null, + "colors": [ + + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "link": false, + "linkTargetBlank": false, + "linkTooltip": "Drill down", + "linkUrl": "", + "pattern": "Value #C", + "thresholds": [ + + ], + "type": "number", + "unit": "percentunit" + }, + { + "alias": "Memory Limits", + "colorMode": null, + "colors": [ + + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "link": false, + "linkTargetBlank": false, + "linkTooltip": "Drill down", + "linkUrl": "", + "pattern": "Value #D", + "thresholds": [ + + ], + "type": "number", + "unit": "bytes" + }, + { + "alias": "Memory Limits %", + "colorMode": null, + "colors": [ + + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "link": false, + "linkTargetBlank": false, + "linkTooltip": "Drill down", + "linkUrl": "", + "pattern": "Value #E", + "thresholds": [ + + ], + "type": "number", + "unit": "percentunit" + }, + { + "alias": "Pod", + "colorMode": null, + "colors": [ + + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "link": true, + "linkTargetBlank": false, + "linkTooltip": "Drill down", + "linkUrl": "d/6581e46e4e5c7ba40a07646395ef7b23/k8s-resources-pod?var-datasource=$datasource&var-cluster=$cluster&var-namespace=$namespace&var-pod=$__cell", + "pattern": "pod", + "thresholds": [ + + ], + "type": "number", + "unit": "short" + }, + { + "alias": "", + "colorMode": null, + "colors": [ + + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "pattern": "/.*/", + "thresholds": [ + + ], + "type": "string", + "unit": "short" + } + ], + "targets": [ + { + "expr": "sum(\n container_memory_working_set_bytes{cluster=\"$cluster\", namespace=\"$namespace\", container!=\"\", image!=\"\"}\n * on(namespace,pod)\n group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload=\"$workload\", workload_type=\"$type\"}\n) by (pod)\n", + "format": "table", + "instant": true, + "intervalFactor": 2, + "legendFormat": "", + "refId": "A", + "step": 10 + }, + { + "expr": "sum(\n kube_pod_container_resource_requests{job=\"kube-state-metrics\", cluster=\"$cluster\", namespace=\"$namespace\", resource=\"memory\"}\n * on(namespace,pod)\n group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload=\"$workload\", workload_type=\"$type\"}\n) by (pod)\n", + "format": "table", + "instant": true, + "intervalFactor": 2, + "legendFormat": "", + "refId": "B", + "step": 10 + }, + { + "expr": "sum(\n container_memory_working_set_bytes{cluster=\"$cluster\", namespace=\"$namespace\", container!=\"\", image!=\"\"}\n * on(namespace,pod)\n group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload=\"$workload\", workload_type=\"$type\"}\n) by (pod)\n/sum(\n kube_pod_container_resource_requests{job=\"kube-state-metrics\", cluster=\"$cluster\", namespace=\"$namespace\", resource=\"memory\"}\n * on(namespace,pod)\n group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload=\"$workload\", workload_type=\"$type\"}\n) by (pod)\n", + "format": "table", + "instant": true, + "intervalFactor": 2, + "legendFormat": "", + "refId": "C", + "step": 10 + }, + { + "expr": "sum(\n kube_pod_container_resource_limits{job=\"kube-state-metrics\", cluster=\"$cluster\", namespace=\"$namespace\", resource=\"memory\"}\n * on(namespace,pod)\n group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload=\"$workload\", workload_type=\"$type\"}\n) by (pod)\n", + "format": "table", + "instant": true, + "intervalFactor": 2, + "legendFormat": "", + "refId": "D", + "step": 10 + }, + { + "expr": "sum(\n container_memory_working_set_bytes{cluster=\"$cluster\", namespace=\"$namespace\", container!=\"\", image!=\"\"}\n * on(namespace,pod)\n group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload=\"$workload\", workload_type=\"$type\"}\n) by (pod)\n/sum(\n kube_pod_container_resource_limits{job=\"kube-state-metrics\", cluster=\"$cluster\", namespace=\"$namespace\", resource=\"memory\"}\n * on(namespace,pod)\n group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload=\"$workload\", workload_type=\"$type\"}\n) by (pod)\n", + "format": "table", + "instant": true, + "intervalFactor": 2, + "legendFormat": "", + "refId": "E", + "step": 10 + } + ], + "thresholds": [ + + ], + "timeFrom": null, + "timeShift": null, + "title": "Memory Quota", + "tooltip": { + "shared": false, + "sort": 2, + "value_type": "individual" + }, + "transform": "table", + "type": "table", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ + + ] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Memory Quota", + "titleSize": "h6" + }, + { + "collapse": false, + "height": "250px", + "panels": [ + { + "aliasColors": { + + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "id": 5, + "interval": "1m", + "legend": { + "alignAsTable": true, + "avg": false, + "current": false, + "max": false, + "min": false, + "rightSide": true, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ + + ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ + + ], + "spaceLength": 10, + "span": 12, + "stack": false, + "steppedLine": false, + "styles": [ + { + "alias": "Time", + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "pattern": "Time", + "type": "hidden" + }, + { + "alias": "Current Receive Bandwidth", + "colorMode": null, + "colors": [ + + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "link": false, + "linkTargetBlank": false, + "linkTooltip": "Drill down", + "linkUrl": "", + "pattern": "Value #A", + "thresholds": [ + + ], + "type": "number", + "unit": "Bps" + }, + { + "alias": "Current Transmit Bandwidth", + "colorMode": null, + "colors": [ + + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "link": false, + "linkTargetBlank": false, + "linkTooltip": "Drill down", + "linkUrl": "", + "pattern": "Value #B", + "thresholds": [ + + ], + "type": "number", + "unit": "Bps" + }, + { + "alias": "Rate of Received Packets", + "colorMode": null, + "colors": [ + + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "link": false, + "linkTargetBlank": false, + "linkTooltip": "Drill down", + "linkUrl": "", + "pattern": "Value #C", + "thresholds": [ + + ], + "type": "number", + "unit": "pps" + }, + { + "alias": "Rate of Transmitted Packets", + "colorMode": null, + "colors": [ + + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "link": false, + "linkTargetBlank": false, + "linkTooltip": "Drill down", + "linkUrl": "", + "pattern": "Value #D", + "thresholds": [ + + ], + "type": "number", + "unit": "pps" + }, + { + "alias": "Rate of Received Packets Dropped", + "colorMode": null, + "colors": [ + + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "link": false, + "linkTargetBlank": false, + "linkTooltip": "Drill down", + "linkUrl": "", + "pattern": "Value #E", + "thresholds": [ + + ], + "type": "number", + "unit": "pps" + }, + { + "alias": "Rate of Transmitted Packets Dropped", + "colorMode": null, + "colors": [ + + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "link": false, + "linkTargetBlank": false, + "linkTooltip": "Drill down", + "linkUrl": "", + "pattern": "Value #F", + "thresholds": [ + + ], + "type": "number", + "unit": "pps" + }, + { + "alias": "Pod", + "colorMode": null, + "colors": [ + + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "link": true, + "linkTargetBlank": false, + "linkTooltip": "Drill down", + "linkUrl": "d/6581e46e4e5c7ba40a07646395ef7b23/k8s-resources-pod?var-datasource=$datasource&var-cluster=$cluster&var-namespace=$namespace&var-pod=$__cell", + "pattern": "pod", + "thresholds": [ + + ], + "type": "number", + "unit": "short" + }, + { + "alias": "", + "colorMode": null, + "colors": [ + + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "pattern": "/.*/", + "thresholds": [ + + ], + "type": "string", + "unit": "short" + } + ], + "targets": [ + { + "expr": "(sum(irate(container_network_receive_bytes_total{job=\"{{ include "exporter.kubelet.jobName" . }}\", metrics_path=\"/metrics/cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload=~\"$workload\", workload_type=\"$type\"}) by (pod))\n", + "format": "table", + "instant": true, + "intervalFactor": 2, + "legendFormat": "", + "refId": "A", + "step": 10 + }, + { + "expr": "(sum(irate(container_network_transmit_bytes_total{job=\"{{ include "exporter.kubelet.jobName" . }}\", metrics_path=\"/metrics/cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload=~\"$workload\", workload_type=\"$type\"}) by (pod))\n", + "format": "table", + "instant": true, + "intervalFactor": 2, + "legendFormat": "", + "refId": "B", + "step": 10 + }, + { + "expr": "(sum(irate(container_network_receive_packets_total{job=\"{{ include "exporter.kubelet.jobName" . }}\", metrics_path=\"/metrics/cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload=~\"$workload\", workload_type=\"$type\"}) by (pod))\n", + "format": "table", + "instant": true, + "intervalFactor": 2, + "legendFormat": "", + "refId": "C", + "step": 10 + }, + { + "expr": "(sum(irate(container_network_transmit_packets_total{job=\"{{ include "exporter.kubelet.jobName" . }}\", metrics_path=\"/metrics/cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload=~\"$workload\", workload_type=\"$type\"}) by (pod))\n", + "format": "table", + "instant": true, + "intervalFactor": 2, + "legendFormat": "", + "refId": "D", + "step": 10 + }, + { + "expr": "(sum(irate(container_network_receive_packets_dropped_total{job=\"{{ include "exporter.kubelet.jobName" . }}\", metrics_path=\"/metrics/cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload=~\"$workload\", workload_type=\"$type\"}) by (pod))\n", + "format": "table", + "instant": true, + "intervalFactor": 2, + "legendFormat": "", + "refId": "E", + "step": 10 + }, + { + "expr": "(sum(irate(container_network_transmit_packets_dropped_total{job=\"{{ include "exporter.kubelet.jobName" . }}\", metrics_path=\"/metrics/cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload=~\"$workload\", workload_type=\"$type\"}) by (pod))\n", + "format": "table", + "instant": true, + "intervalFactor": 2, + "legendFormat": "", + "refId": "F", + "step": 10 + } + ], + "thresholds": [ + + ], + "timeFrom": null, + "timeShift": null, + "title": "Current Network Usage", + "tooltip": { + "shared": false, + "sort": 2, + "value_type": "individual" + }, + "transform": "table", + "type": "table", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ + + ] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Current Network Usage", + "titleSize": "h6" + }, + { + "collapse": false, + "height": "250px", + "panels": [ + { + "aliasColors": { + + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 10, + "id": 6, + "interval": "1m", + "legend": { + "alignAsTable": true, + "avg": false, + "current": false, + "max": false, + "min": false, + "rightSide": true, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 0, + "links": [ + + ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ + + ], + "spaceLength": 10, + "span": 6, + "stack": true, + "steppedLine": false, + "targets": [ + { + "expr": "(sum(irate(container_network_receive_bytes_total{job=\"{{ include "exporter.kubelet.jobName" . }}\", metrics_path=\"/metrics/cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload=~\"$workload\", workload_type=\"$type\"}) by (pod))\n", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{`{{`}}pod{{`}}`}}", + "legendLink": null, + "step": 10 + } + ], + "thresholds": [ + + ], + "timeFrom": null, + "timeShift": null, + "title": "Receive Bandwidth", + "tooltip": { + "shared": false, + "sort": 2, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ + + ] + }, + "yaxes": [ + { + "format": "Bps", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + }, + { + "aliasColors": { + + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 10, + "id": 7, + "interval": "1m", + "legend": { + "alignAsTable": true, + "avg": false, + "current": false, + "max": false, + "min": false, + "rightSide": true, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 0, + "links": [ + + ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ + + ], + "spaceLength": 10, + "span": 6, + "stack": true, + "steppedLine": false, + "targets": [ + { + "expr": "(sum(irate(container_network_transmit_bytes_total{job=\"{{ include "exporter.kubelet.jobName" . }}\", metrics_path=\"/metrics/cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload=~\"$workload\", workload_type=\"$type\"}) by (pod))\n", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{`{{`}}pod{{`}}`}}", + "legendLink": null, + "step": 10 + } + ], + "thresholds": [ + + ], + "timeFrom": null, + "timeShift": null, + "title": "Transmit Bandwidth", + "tooltip": { + "shared": false, + "sort": 2, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ + + ] + }, + "yaxes": [ + { + "format": "Bps", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Bandwidth", + "titleSize": "h6" + }, + { + "collapse": false, + "height": "250px", + "panels": [ + { + "aliasColors": { + + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 10, + "id": 8, + "interval": "1m", + "legend": { + "alignAsTable": true, + "avg": false, + "current": false, + "max": false, + "min": false, + "rightSide": true, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 0, + "links": [ + + ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ + + ], + "spaceLength": 10, + "span": 6, + "stack": true, + "steppedLine": false, + "targets": [ + { + "expr": "(avg(irate(container_network_receive_bytes_total{job=\"{{ include "exporter.kubelet.jobName" . }}\", metrics_path=\"/metrics/cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload=~\"$workload\", workload_type=\"$type\"}) by (pod))\n", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{`{{`}}pod{{`}}`}}", + "legendLink": null, + "step": 10 + } + ], + "thresholds": [ + + ], + "timeFrom": null, + "timeShift": null, + "title": "Average Container Bandwidth by Pod: Received", + "tooltip": { + "shared": false, + "sort": 2, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ + + ] + }, + "yaxes": [ + { + "format": "Bps", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + }, + { + "aliasColors": { + + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 10, + "id": 9, + "interval": "1m", + "legend": { + "alignAsTable": true, + "avg": false, + "current": false, + "max": false, + "min": false, + "rightSide": true, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 0, + "links": [ + + ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ + + ], + "spaceLength": 10, + "span": 6, + "stack": true, + "steppedLine": false, + "targets": [ + { + "expr": "(avg(irate(container_network_transmit_bytes_total{job=\"{{ include "exporter.kubelet.jobName" . }}\", metrics_path=\"/metrics/cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload=~\"$workload\", workload_type=\"$type\"}) by (pod))\n", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{`{{`}}pod{{`}}`}}", + "legendLink": null, + "step": 10 + } + ], + "thresholds": [ + + ], + "timeFrom": null, + "timeShift": null, + "title": "Average Container Bandwidth by Pod: Transmitted", + "tooltip": { + "shared": false, + "sort": 2, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ + + ] + }, + "yaxes": [ + { + "format": "Bps", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Average Container Bandwidth by Pod", + "titleSize": "h6" + }, + { + "collapse": false, + "height": "250px", + "panels": [ + { + "aliasColors": { + + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 10, + "id": 10, + "interval": "1m", + "legend": { + "alignAsTable": true, + "avg": false, + "current": false, + "max": false, + "min": false, + "rightSide": true, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 0, + "links": [ + + ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ + + ], + "spaceLength": 10, + "span": 6, + "stack": true, + "steppedLine": false, + "targets": [ + { + "expr": "(sum(irate(container_network_receive_packets_total{job=\"{{ include "exporter.kubelet.jobName" . }}\", metrics_path=\"/metrics/cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload=~\"$workload\", workload_type=\"$type\"}) by (pod))\n", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{`{{`}}pod{{`}}`}}", + "legendLink": null, + "step": 10 + } + ], + "thresholds": [ + + ], + "timeFrom": null, + "timeShift": null, + "title": "Rate of Received Packets", + "tooltip": { + "shared": false, + "sort": 2, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ + + ] + }, + "yaxes": [ + { + "format": "pps", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + }, + { + "aliasColors": { + + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 10, + "id": 11, + "interval": "1m", + "legend": { + "alignAsTable": true, + "avg": false, + "current": false, + "max": false, + "min": false, + "rightSide": true, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 0, + "links": [ + + ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ + + ], + "spaceLength": 10, + "span": 6, + "stack": true, + "steppedLine": false, + "targets": [ + { + "expr": "(sum(irate(container_network_transmit_packets_total{job=\"{{ include "exporter.kubelet.jobName" . }}\", metrics_path=\"/metrics/cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload=~\"$workload\", workload_type=\"$type\"}) by (pod))\n", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{`{{`}}pod{{`}}`}}", + "legendLink": null, + "step": 10 + } + ], + "thresholds": [ + + ], + "timeFrom": null, + "timeShift": null, + "title": "Rate of Transmitted Packets", + "tooltip": { + "shared": false, + "sort": 2, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ + + ] + }, + "yaxes": [ + { + "format": "pps", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Rate of Packets", + "titleSize": "h6" + }, + { + "collapse": false, + "height": "250px", + "panels": [ + { + "aliasColors": { + + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 10, + "id": 12, + "interval": "1m", + "legend": { + "alignAsTable": true, + "avg": false, + "current": false, + "max": false, + "min": false, + "rightSide": true, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 0, + "links": [ + + ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ + + ], + "spaceLength": 10, + "span": 6, + "stack": true, + "steppedLine": false, + "targets": [ + { + "expr": "(sum(irate(container_network_receive_packets_dropped_total{job=\"{{ include "exporter.kubelet.jobName" . }}\", metrics_path=\"/metrics/cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload=~\"$workload\", workload_type=\"$type\"}) by (pod))\n", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{`{{`}}pod{{`}}`}}", + "legendLink": null, + "step": 10 + } + ], + "thresholds": [ + + ], + "timeFrom": null, + "timeShift": null, + "title": "Rate of Received Packets Dropped", + "tooltip": { + "shared": false, + "sort": 2, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ + + ] + }, + "yaxes": [ + { + "format": "pps", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + }, + { + "aliasColors": { + + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 10, + "id": 13, + "interval": "1m", + "legend": { + "alignAsTable": true, + "avg": false, + "current": false, + "max": false, + "min": false, + "rightSide": true, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 0, + "links": [ + + ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ + + ], + "spaceLength": 10, + "span": 6, + "stack": true, + "steppedLine": false, + "targets": [ + { + "expr": "(sum(irate(container_network_transmit_packets_dropped_total{job=\"{{ include "exporter.kubelet.jobName" . }}\", metrics_path=\"/metrics/cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload=~\"$workload\", workload_type=\"$type\"}) by (pod))\n", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{`{{`}}pod{{`}}`}}", + "legendLink": null, + "step": 10 + } + ], + "thresholds": [ + + ], + "timeFrom": null, + "timeShift": null, + "title": "Rate of Transmitted Packets Dropped", + "tooltip": { + "shared": false, + "sort": 2, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ + + ] + }, + "yaxes": [ + { + "format": "pps", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Rate of Packets Dropped", + "titleSize": "h6" + } + ], + "schemaVersion": 14, + "style": "dark", + "tags": [ + "kubernetes-mixin" + ], + "templating": { + "list": [ + { + "current": { + "text": "Prometheus", + "value": "Prometheus" + }, + "hide": 0, + "label": "Data Source", + "name": "datasource", + "options": [ + + ], + "query": "prometheus", + "refresh": 1, + "regex": "", + "type": "datasource" + }, + { + "allValue": null, + "current": { + "text": "", + "value": "" + }, + "datasource": "$datasource", + "hide": {{ if .Values.grafana.sidecar.dashboards.multicluster.global.enabled }}0{{ else }}2{{ end }}, + "includeAll": false, + "label": null, + "multi": false, + "name": "cluster", + "options": [ + + ], + "query": "label_values(up{job=\"kube-state-metrics\"}, cluster)", + "refresh": 2, + "regex": "", + "sort": 1, + "tagValuesQuery": "", + "tags": [ + + ], + "tagsQuery": "", + "type": "query", + "useTags": false + }, + { + "allValue": null, + "current": { + "text": "", + "value": "" + }, + "datasource": "$datasource", + "hide": 0, + "includeAll": false, + "label": null, + "multi": false, + "name": "namespace", + "options": [ + + ], + "query": "label_values(kube_namespace_status_phase{job=\"kube-state-metrics\", cluster=\"$cluster\"}, namespace)", + "refresh": 2, + "regex": "", + "sort": 1, + "tagValuesQuery": "", + "tags": [ + + ], + "tagsQuery": "", + "type": "query", + "useTags": false + }, + { + "allValue": null, + "current": { + "text": "", + "value": "" + }, + "datasource": "$datasource", + "hide": 0, + "includeAll": false, + "label": null, + "multi": false, + "name": "type", + "options": [ + + ], + "query": "label_values(namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\"}, workload_type)", + "refresh": 2, + "regex": "", + "sort": 1, + "tagValuesQuery": "", + "tags": [ + + ], + "tagsQuery": "", + "type": "query", + "useTags": false + }, + { + "allValue": null, + "current": { + "text": "", + "value": "" + }, + "datasource": "$datasource", + "hide": 0, + "includeAll": false, + "label": null, + "multi": false, + "name": "workload", + "options": [ + + ], + "query": "label_values(namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload_type=\"$type\"}, workload)", + "refresh": 2, + "regex": "", + "sort": 1, + "tagValuesQuery": "", + "tags": [ + + ], + "tagsQuery": "", + "type": "query", + "useTags": false + } + ] + }, + "time": { + "from": "now-1h", + "to": "now" + }, + "timepicker": { + "refresh_intervals": [ + "5s", + "10s", + "30s", + "1m", + "5m", + "15m", + "30m", + "1h", + "2h", + "1d" + ], + "time_options": [ + "5m", + "15m", + "1h", + "6h", + "12h", + "24h", + "2d", + "7d", + "30d" + ] + }, + "timezone": "{{ .Values.grafana.defaultDashboardsTimezone }}", + "title": "Kubernetes / Compute Resources / Workload", + "uid": "a164a7f0339f99e89cea5cb47e9be617", + "version": 0 + } +{{- end }} \ No newline at end of file diff --git a/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/templates/grafana/dashboards-1.14/k8s-resources-workloads-namespace.yaml b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/templates/grafana/dashboards-1.14/k8s-resources-workloads-namespace.yaml new file mode 100644 index 0000000000..95d758ea2d --- /dev/null +++ b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/templates/grafana/dashboards-1.14/k8s-resources-workloads-namespace.yaml @@ -0,0 +1,2189 @@ +{{- /* +Generated from 'k8s-resources-workloads-namespace' from https://raw.githubusercontent.com/prometheus-operator/kube-prometheus/a8ba97a150c75be42010c75d10b720c55e182f1a/manifests/grafana-dashboardDefinitions.yaml +Do not change in-place! In order to change this file first read following link: +https://github.com/prometheus-community/helm-charts/tree/main/charts/kube-prometheus-stack/hack +*/ -}} +{{- $kubeTargetVersion := default .Capabilities.KubeVersion.GitVersion .Values.kubeTargetVersionOverride }} +{{- if and (or .Values.grafana.enabled .Values.grafana.forceDeployDashboards) (semverCompare ">=1.14.0-0" $kubeTargetVersion) (semverCompare "<9.9.9-9" $kubeTargetVersion) .Values.grafana.defaultDashboardsEnabled }} +apiVersion: v1 +kind: ConfigMap +metadata: + namespace: {{ .Values.grafana.defaultDashboards.namespace }} + name: {{ printf "%s-%s" (include "kube-prometheus-stack.fullname" $) "k8s-resources-workloads-namespace" | trunc 63 | trimSuffix "-" }} + annotations: +{{ toYaml .Values.grafana.sidecar.dashboards.annotations | indent 4 }} + labels: + {{- if $.Values.grafana.sidecar.dashboards.label }} + {{ $.Values.grafana.sidecar.dashboards.label }}: {{ ternary $.Values.grafana.sidecar.dashboards.labelValue "1" (not (empty $.Values.grafana.sidecar.dashboards.labelValue)) | quote }} + {{- end }} + app: {{ template "kube-prometheus-stack.name" $ }}-grafana +{{ include "kube-prometheus-stack.labels" $ | indent 4 }} +data: + k8s-resources-workloads-namespace.json: |- + { + "annotations": { + "list": [ + + ] + }, + "editable": true, + "gnetId": null, + "graphTooltip": 0, + "hideControls": false, + "links": [ + + ], + "refresh": "10s", + "rows": [ + { + "collapse": false, + "height": "250px", + "panels": [ + { + "aliasColors": { + + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 10, + "id": 1, + "interval": "1m", + "legend": { + "alignAsTable": true, + "avg": false, + "current": false, + "max": false, + "min": false, + "rightSide": true, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 0, + "links": [ + + ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ + { + "alias": "quota - requests", + "color": "#F2495C", + "dashes": true, + "fill": 0, + "hiddenSeries": true, + "hideTooltip": true, + "legend": true, + "linewidth": 2, + "stack": false + }, + { + "alias": "quota - limits", + "color": "#FF9830", + "dashes": true, + "fill": 0, + "hiddenSeries": true, + "hideTooltip": true, + "legend": true, + "linewidth": 2, + "stack": false + } + ], + "spaceLength": 10, + "span": 12, + "stack": true, + "steppedLine": false, + "targets": [ + { + "expr": "sum(\n node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{cluster=\"$cluster\", namespace=\"$namespace\"}\n* on(namespace,pod)\n group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload_type=\"$type\"}\n) by (workload, workload_type)\n", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{`{{`}}workload{{`}}`}} - {{`{{`}}workload_type{{`}}`}}", + "legendLink": null, + "step": 10 + }, + { + "expr": "scalar(kube_resourcequota{cluster=\"$cluster\", namespace=\"$namespace\", type=\"hard\",resource=\"requests.cpu\"})", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "quota - requests", + "legendLink": null, + "step": 10 + }, + { + "expr": "scalar(kube_resourcequota{cluster=\"$cluster\", namespace=\"$namespace\", type=\"hard\",resource=\"limits.cpu\"})", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "quota - limits", + "legendLink": null, + "step": 10 + } + ], + "thresholds": [ + + ], + "timeFrom": null, + "timeShift": null, + "title": "CPU Usage", + "tooltip": { + "shared": false, + "sort": 2, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ + + ] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "CPU Usage", + "titleSize": "h6" + }, + { + "collapse": false, + "height": "250px", + "panels": [ + { + "aliasColors": { + + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "id": 2, + "interval": "1m", + "legend": { + "alignAsTable": true, + "avg": false, + "current": false, + "max": false, + "min": false, + "rightSide": true, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ + + ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ + + ], + "spaceLength": 10, + "span": 12, + "stack": false, + "steppedLine": false, + "styles": [ + { + "alias": "Time", + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "pattern": "Time", + "type": "hidden" + }, + { + "alias": "Running Pods", + "colorMode": null, + "colors": [ + + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 0, + "link": false, + "linkTargetBlank": false, + "linkTooltip": "Drill down", + "linkUrl": "", + "pattern": "Value #A", + "thresholds": [ + + ], + "type": "number", + "unit": "short" + }, + { + "alias": "CPU Usage", + "colorMode": null, + "colors": [ + + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "link": false, + "linkTargetBlank": false, + "linkTooltip": "Drill down", + "linkUrl": "", + "pattern": "Value #B", + "thresholds": [ + + ], + "type": "number", + "unit": "short" + }, + { + "alias": "CPU Requests", + "colorMode": null, + "colors": [ + + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "link": false, + "linkTargetBlank": false, + "linkTooltip": "Drill down", + "linkUrl": "", + "pattern": "Value #C", + "thresholds": [ + + ], + "type": "number", + "unit": "short" + }, + { + "alias": "CPU Requests %", + "colorMode": null, + "colors": [ + + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "link": false, + "linkTargetBlank": false, + "linkTooltip": "Drill down", + "linkUrl": "", + "pattern": "Value #D", + "thresholds": [ + + ], + "type": "number", + "unit": "percentunit" + }, + { + "alias": "CPU Limits", + "colorMode": null, + "colors": [ + + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "link": false, + "linkTargetBlank": false, + "linkTooltip": "Drill down", + "linkUrl": "", + "pattern": "Value #E", + "thresholds": [ + + ], + "type": "number", + "unit": "short" + }, + { + "alias": "CPU Limits %", + "colorMode": null, + "colors": [ + + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "link": false, + "linkTargetBlank": false, + "linkTooltip": "Drill down", + "linkUrl": "", + "pattern": "Value #F", + "thresholds": [ + + ], + "type": "number", + "unit": "percentunit" + }, + { + "alias": "Workload", + "colorMode": null, + "colors": [ + + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "link": true, + "linkTargetBlank": false, + "linkTooltip": "Drill down", + "linkUrl": "d/a164a7f0339f99e89cea5cb47e9be617/k8s-resources-workload?var-datasource=$datasource&var-cluster=$cluster&var-namespace=$namespace&var-workload=$__cell&var-type=$__cell_2", + "pattern": "workload", + "thresholds": [ + + ], + "type": "number", + "unit": "short" + }, + { + "alias": "Workload Type", + "colorMode": null, + "colors": [ + + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "link": false, + "linkTargetBlank": false, + "linkTooltip": "Drill down", + "linkUrl": "", + "pattern": "workload_type", + "thresholds": [ + + ], + "type": "number", + "unit": "short" + }, + { + "alias": "", + "colorMode": null, + "colors": [ + + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "pattern": "/.*/", + "thresholds": [ + + ], + "type": "string", + "unit": "short" + } + ], + "targets": [ + { + "expr": "count(namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload_type=\"$type\"}) by (workload, workload_type)", + "format": "table", + "instant": true, + "intervalFactor": 2, + "legendFormat": "", + "refId": "A", + "step": 10 + }, + { + "expr": "sum(\n node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{cluster=\"$cluster\", namespace=\"$namespace\"}\n* on(namespace,pod)\n group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload_type=\"$type\"}\n) by (workload, workload_type)\n", + "format": "table", + "instant": true, + "intervalFactor": 2, + "legendFormat": "", + "refId": "B", + "step": 10 + }, + { + "expr": "sum(\n kube_pod_container_resource_requests{job=\"kube-state-metrics\", cluster=\"$cluster\", namespace=\"$namespace\", resource=\"cpu\"}\n* on(namespace,pod)\n group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload_type=\"$type\"}\n) by (workload, workload_type)\n", + "format": "table", + "instant": true, + "intervalFactor": 2, + "legendFormat": "", + "refId": "C", + "step": 10 + }, + { + "expr": "sum(\n node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{cluster=\"$cluster\", namespace=\"$namespace\"}\n* on(namespace,pod)\n group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload_type=\"$type\"}\n) by (workload, workload_type)\n/sum(\n kube_pod_container_resource_requests{job=\"kube-state-metrics\", cluster=\"$cluster\", namespace=\"$namespace\", resource=\"cpu\"}\n* on(namespace,pod)\n group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload_type=\"$type\"}\n) by (workload, workload_type)\n", + "format": "table", + "instant": true, + "intervalFactor": 2, + "legendFormat": "", + "refId": "D", + "step": 10 + }, + { + "expr": "sum(\n kube_pod_container_resource_limits{job=\"kube-state-metrics\", cluster=\"$cluster\", namespace=\"$namespace\", resource=\"cpu\"}\n* on(namespace,pod)\n group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload_type=\"$type\"}\n) by (workload, workload_type)\n", + "format": "table", + "instant": true, + "intervalFactor": 2, + "legendFormat": "", + "refId": "E", + "step": 10 + }, + { + "expr": "sum(\n node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{cluster=\"$cluster\", namespace=\"$namespace\"}\n* on(namespace,pod)\n group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload_type=\"$type\"}\n) by (workload, workload_type)\n/sum(\n kube_pod_container_resource_limits{job=\"kube-state-metrics\", cluster=\"$cluster\", namespace=\"$namespace\", resource=\"cpu\"}\n* on(namespace,pod)\n group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload_type=\"$type\"}\n) by (workload, workload_type)\n", + "format": "table", + "instant": true, + "intervalFactor": 2, + "legendFormat": "", + "refId": "F", + "step": 10 + } + ], + "thresholds": [ + + ], + "timeFrom": null, + "timeShift": null, + "title": "CPU Quota", + "tooltip": { + "shared": false, + "sort": 2, + "value_type": "individual" + }, + "transform": "table", + "type": "table", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ + + ] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "CPU Quota", + "titleSize": "h6" + }, + { + "collapse": false, + "height": "250px", + "panels": [ + { + "aliasColors": { + + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 10, + "id": 3, + "interval": "1m", + "legend": { + "alignAsTable": true, + "avg": false, + "current": false, + "max": false, + "min": false, + "rightSide": true, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 0, + "links": [ + + ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ + { + "alias": "quota - requests", + "color": "#F2495C", + "dashes": true, + "fill": 0, + "hiddenSeries": true, + "hideTooltip": true, + "legend": true, + "linewidth": 2, + "stack": false + }, + { + "alias": "quota - limits", + "color": "#FF9830", + "dashes": true, + "fill": 0, + "hiddenSeries": true, + "hideTooltip": true, + "legend": true, + "linewidth": 2, + "stack": false + } + ], + "spaceLength": 10, + "span": 12, + "stack": true, + "steppedLine": false, + "targets": [ + { + "expr": "sum(\n container_memory_working_set_bytes{job=\"{{ include "exporter.kubelet.jobName" . }}\", metrics_path=\"/metrics/cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\", container!=\"\", image!=\"\"}\n * on(namespace,pod)\n group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload_type=\"$type\"}\n) by (workload, workload_type)\n", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{`{{`}}workload{{`}}`}} - {{`{{`}}workload_type{{`}}`}}", + "legendLink": null, + "step": 10 + }, + { + "expr": "scalar(kube_resourcequota{cluster=\"$cluster\", namespace=\"$namespace\", type=\"hard\",resource=\"requests.memory\"})", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "quota - requests", + "legendLink": null, + "step": 10 + }, + { + "expr": "scalar(kube_resourcequota{cluster=\"$cluster\", namespace=\"$namespace\", type=\"hard\",resource=\"limits.memory\"})", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "quota - limits", + "legendLink": null, + "step": 10 + } + ], + "thresholds": [ + + ], + "timeFrom": null, + "timeShift": null, + "title": "Memory Usage", + "tooltip": { + "shared": false, + "sort": 2, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ + + ] + }, + "yaxes": [ + { + "format": "bytes", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Memory Usage", + "titleSize": "h6" + }, + { + "collapse": false, + "height": "250px", + "panels": [ + { + "aliasColors": { + + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "id": 4, + "interval": "1m", + "legend": { + "alignAsTable": true, + "avg": false, + "current": false, + "max": false, + "min": false, + "rightSide": true, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ + + ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ + + ], + "spaceLength": 10, + "span": 12, + "stack": false, + "steppedLine": false, + "styles": [ + { + "alias": "Time", + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "pattern": "Time", + "type": "hidden" + }, + { + "alias": "Running Pods", + "colorMode": null, + "colors": [ + + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 0, + "link": false, + "linkTargetBlank": false, + "linkTooltip": "Drill down", + "linkUrl": "", + "pattern": "Value #A", + "thresholds": [ + + ], + "type": "number", + "unit": "short" + }, + { + "alias": "Memory Usage", + "colorMode": null, + "colors": [ + + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "link": false, + "linkTargetBlank": false, + "linkTooltip": "Drill down", + "linkUrl": "", + "pattern": "Value #B", + "thresholds": [ + + ], + "type": "number", + "unit": "bytes" + }, + { + "alias": "Memory Requests", + "colorMode": null, + "colors": [ + + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "link": false, + "linkTargetBlank": false, + "linkTooltip": "Drill down", + "linkUrl": "", + "pattern": "Value #C", + "thresholds": [ + + ], + "type": "number", + "unit": "bytes" + }, + { + "alias": "Memory Requests %", + "colorMode": null, + "colors": [ + + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "link": false, + "linkTargetBlank": false, + "linkTooltip": "Drill down", + "linkUrl": "", + "pattern": "Value #D", + "thresholds": [ + + ], + "type": "number", + "unit": "percentunit" + }, + { + "alias": "Memory Limits", + "colorMode": null, + "colors": [ + + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "link": false, + "linkTargetBlank": false, + "linkTooltip": "Drill down", + "linkUrl": "", + "pattern": "Value #E", + "thresholds": [ + + ], + "type": "number", + "unit": "bytes" + }, + { + "alias": "Memory Limits %", + "colorMode": null, + "colors": [ + + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "link": false, + "linkTargetBlank": false, + "linkTooltip": "Drill down", + "linkUrl": "", + "pattern": "Value #F", + "thresholds": [ + + ], + "type": "number", + "unit": "percentunit" + }, + { + "alias": "Workload", + "colorMode": null, + "colors": [ + + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "link": true, + "linkTargetBlank": false, + "linkTooltip": "Drill down", + "linkUrl": "d/a164a7f0339f99e89cea5cb47e9be617/k8s-resources-workload?var-datasource=$datasource&var-cluster=$cluster&var-namespace=$namespace&var-workload=$__cell&var-type=$__cell_2", + "pattern": "workload", + "thresholds": [ + + ], + "type": "number", + "unit": "short" + }, + { + "alias": "Workload Type", + "colorMode": null, + "colors": [ + + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "link": false, + "linkTargetBlank": false, + "linkTooltip": "Drill down", + "linkUrl": "", + "pattern": "workload_type", + "thresholds": [ + + ], + "type": "number", + "unit": "short" + }, + { + "alias": "", + "colorMode": null, + "colors": [ + + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "pattern": "/.*/", + "thresholds": [ + + ], + "type": "string", + "unit": "short" + } + ], + "targets": [ + { + "expr": "count(namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload_type=\"$type\"}) by (workload, workload_type)", + "format": "table", + "instant": true, + "intervalFactor": 2, + "legendFormat": "", + "refId": "A", + "step": 10 + }, + { + "expr": "sum(\n container_memory_working_set_bytes{job=\"{{ include "exporter.kubelet.jobName" . }}\", metrics_path=\"/metrics/cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\", container!=\"\", image!=\"\"}\n * on(namespace,pod)\n group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload_type=\"$type\"}\n) by (workload, workload_type)\n", + "format": "table", + "instant": true, + "intervalFactor": 2, + "legendFormat": "", + "refId": "B", + "step": 10 + }, + { + "expr": "sum(\n kube_pod_container_resource_requests{job=\"kube-state-metrics\", cluster=\"$cluster\", namespace=\"$namespace\", resource=\"memory\"}\n* on(namespace,pod)\n group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload_type=\"$type\"}\n) by (workload, workload_type)\n", + "format": "table", + "instant": true, + "intervalFactor": 2, + "legendFormat": "", + "refId": "C", + "step": 10 + }, + { + "expr": "sum(\n container_memory_working_set_bytes{job=\"{{ include "exporter.kubelet.jobName" . }}\", metrics_path=\"/metrics/cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\", container!=\"\", image!=\"\"}\n * on(namespace,pod)\n group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload_type=\"$type\"}\n) by (workload, workload_type)\n/sum(\n kube_pod_container_resource_requests{job=\"kube-state-metrics\", cluster=\"$cluster\", namespace=\"$namespace\", resource=\"memory\"}\n* on(namespace,pod)\n group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload_type=\"$type\"}\n) by (workload, workload_type)\n", + "format": "table", + "instant": true, + "intervalFactor": 2, + "legendFormat": "", + "refId": "D", + "step": 10 + }, + { + "expr": "sum(\n kube_pod_container_resource_limits{job=\"kube-state-metrics\", cluster=\"$cluster\", namespace=\"$namespace\", resource=\"memory\"}\n* on(namespace,pod)\n group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload_type=\"$type\"}\n) by (workload, workload_type)\n", + "format": "table", + "instant": true, + "intervalFactor": 2, + "legendFormat": "", + "refId": "E", + "step": 10 + }, + { + "expr": "sum(\n container_memory_working_set_bytes{job=\"{{ include "exporter.kubelet.jobName" . }}\", metrics_path=\"/metrics/cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\", container!=\"\", image!=\"\"}\n * on(namespace,pod)\n group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload_type=\"$type\"}\n) by (workload, workload_type)\n/sum(\n kube_pod_container_resource_limits{job=\"kube-state-metrics\", cluster=\"$cluster\", namespace=\"$namespace\", resource=\"memory\"}\n* on(namespace,pod)\n group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload_type=\"$type\"}\n) by (workload, workload_type)\n", + "format": "table", + "instant": true, + "intervalFactor": 2, + "legendFormat": "", + "refId": "F", + "step": 10 + } + ], + "thresholds": [ + + ], + "timeFrom": null, + "timeShift": null, + "title": "Memory Quota", + "tooltip": { + "shared": false, + "sort": 2, + "value_type": "individual" + }, + "transform": "table", + "type": "table", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ + + ] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Memory Quota", + "titleSize": "h6" + }, + { + "collapse": false, + "height": "250px", + "panels": [ + { + "aliasColors": { + + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "id": 5, + "interval": "1m", + "legend": { + "alignAsTable": true, + "avg": false, + "current": false, + "max": false, + "min": false, + "rightSide": true, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ + + ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ + + ], + "spaceLength": 10, + "span": 12, + "stack": false, + "steppedLine": false, + "styles": [ + { + "alias": "Time", + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "pattern": "Time", + "type": "hidden" + }, + { + "alias": "Current Receive Bandwidth", + "colorMode": null, + "colors": [ + + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "link": false, + "linkTargetBlank": false, + "linkTooltip": "Drill down", + "linkUrl": "", + "pattern": "Value #A", + "thresholds": [ + + ], + "type": "number", + "unit": "Bps" + }, + { + "alias": "Current Transmit Bandwidth", + "colorMode": null, + "colors": [ + + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "link": false, + "linkTargetBlank": false, + "linkTooltip": "Drill down", + "linkUrl": "", + "pattern": "Value #B", + "thresholds": [ + + ], + "type": "number", + "unit": "Bps" + }, + { + "alias": "Rate of Received Packets", + "colorMode": null, + "colors": [ + + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "link": false, + "linkTargetBlank": false, + "linkTooltip": "Drill down", + "linkUrl": "", + "pattern": "Value #C", + "thresholds": [ + + ], + "type": "number", + "unit": "pps" + }, + { + "alias": "Rate of Transmitted Packets", + "colorMode": null, + "colors": [ + + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "link": false, + "linkTargetBlank": false, + "linkTooltip": "Drill down", + "linkUrl": "", + "pattern": "Value #D", + "thresholds": [ + + ], + "type": "number", + "unit": "pps" + }, + { + "alias": "Rate of Received Packets Dropped", + "colorMode": null, + "colors": [ + + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "link": false, + "linkTargetBlank": false, + "linkTooltip": "Drill down", + "linkUrl": "", + "pattern": "Value #E", + "thresholds": [ + + ], + "type": "number", + "unit": "pps" + }, + { + "alias": "Rate of Transmitted Packets Dropped", + "colorMode": null, + "colors": [ + + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "link": false, + "linkTargetBlank": false, + "linkTooltip": "Drill down", + "linkUrl": "", + "pattern": "Value #F", + "thresholds": [ + + ], + "type": "number", + "unit": "pps" + }, + { + "alias": "Workload", + "colorMode": null, + "colors": [ + + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "link": true, + "linkTargetBlank": false, + "linkTooltip": "Drill down to pods", + "linkUrl": "d/a164a7f0339f99e89cea5cb47e9be617/k8s-resources-workload?var-datasource=$datasource&var-cluster=$cluster&var-namespace=$namespace&var-workload=$__cell&var-type=$type", + "pattern": "workload", + "thresholds": [ + + ], + "type": "number", + "unit": "short" + }, + { + "alias": "Workload Type", + "colorMode": null, + "colors": [ + + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "link": false, + "linkTargetBlank": false, + "linkTooltip": "Drill down", + "linkUrl": "", + "pattern": "workload_type", + "thresholds": [ + + ], + "type": "number", + "unit": "short" + }, + { + "alias": "", + "colorMode": null, + "colors": [ + + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "pattern": "/.*/", + "thresholds": [ + + ], + "type": "string", + "unit": "short" + } + ], + "targets": [ + { + "expr": "(sum(irate(container_network_receive_bytes_total{job=\"{{ include "exporter.kubelet.jobName" . }}\", metrics_path=\"/metrics/cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload_type=\"$type\"}) by (workload))\n", + "format": "table", + "instant": true, + "intervalFactor": 2, + "legendFormat": "", + "refId": "A", + "step": 10 + }, + { + "expr": "(sum(irate(container_network_transmit_bytes_total{job=\"{{ include "exporter.kubelet.jobName" . }}\", metrics_path=\"/metrics/cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload_type=\"$type\"}) by (workload))\n", + "format": "table", + "instant": true, + "intervalFactor": 2, + "legendFormat": "", + "refId": "B", + "step": 10 + }, + { + "expr": "(sum(irate(container_network_receive_packets_total{job=\"{{ include "exporter.kubelet.jobName" . }}\", metrics_path=\"/metrics/cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload_type=\"$type\"}) by (workload))\n", + "format": "table", + "instant": true, + "intervalFactor": 2, + "legendFormat": "", + "refId": "C", + "step": 10 + }, + { + "expr": "(sum(irate(container_network_transmit_packets_total{job=\"{{ include "exporter.kubelet.jobName" . }}\", metrics_path=\"/metrics/cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload_type=\"$type\"}) by (workload))\n", + "format": "table", + "instant": true, + "intervalFactor": 2, + "legendFormat": "", + "refId": "D", + "step": 10 + }, + { + "expr": "(sum(irate(container_network_receive_packets_dropped_total{job=\"{{ include "exporter.kubelet.jobName" . }}\", metrics_path=\"/metrics/cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload_type=\"$type\"}) by (workload))\n", + "format": "table", + "instant": true, + "intervalFactor": 2, + "legendFormat": "", + "refId": "E", + "step": 10 + }, + { + "expr": "(sum(irate(container_network_transmit_packets_dropped_total{job=\"{{ include "exporter.kubelet.jobName" . }}\", metrics_path=\"/metrics/cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload_type=\"$type\"}) by (workload))\n", + "format": "table", + "instant": true, + "intervalFactor": 2, + "legendFormat": "", + "refId": "F", + "step": 10 + } + ], + "thresholds": [ + + ], + "timeFrom": null, + "timeShift": null, + "title": "Current Network Usage", + "tooltip": { + "shared": false, + "sort": 2, + "value_type": "individual" + }, + "transform": "table", + "type": "table", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ + + ] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Current Network Usage", + "titleSize": "h6" + }, + { + "collapse": false, + "height": "250px", + "panels": [ + { + "aliasColors": { + + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 10, + "id": 6, + "interval": "1m", + "legend": { + "alignAsTable": true, + "avg": false, + "current": false, + "max": false, + "min": false, + "rightSide": true, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 0, + "links": [ + + ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ + + ], + "spaceLength": 10, + "span": 6, + "stack": true, + "steppedLine": false, + "targets": [ + { + "expr": "(sum(irate(container_network_receive_bytes_total{job=\"{{ include "exporter.kubelet.jobName" . }}\", metrics_path=\"/metrics/cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload=~\".+\", workload_type=\"$type\"}) by (workload))\n", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{`{{`}}workload{{`}}`}}", + "legendLink": null, + "step": 10 + } + ], + "thresholds": [ + + ], + "timeFrom": null, + "timeShift": null, + "title": "Receive Bandwidth", + "tooltip": { + "shared": false, + "sort": 2, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ + + ] + }, + "yaxes": [ + { + "format": "Bps", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + }, + { + "aliasColors": { + + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 10, + "id": 7, + "interval": "1m", + "legend": { + "alignAsTable": true, + "avg": false, + "current": false, + "max": false, + "min": false, + "rightSide": true, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 0, + "links": [ + + ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ + + ], + "spaceLength": 10, + "span": 6, + "stack": true, + "steppedLine": false, + "targets": [ + { + "expr": "(sum(irate(container_network_transmit_bytes_total{job=\"{{ include "exporter.kubelet.jobName" . }}\", metrics_path=\"/metrics/cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload=~\".+\", workload_type=\"$type\"}) by (workload))\n", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{`{{`}}workload{{`}}`}}", + "legendLink": null, + "step": 10 + } + ], + "thresholds": [ + + ], + "timeFrom": null, + "timeShift": null, + "title": "Transmit Bandwidth", + "tooltip": { + "shared": false, + "sort": 2, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ + + ] + }, + "yaxes": [ + { + "format": "Bps", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Bandwidth", + "titleSize": "h6" + }, + { + "collapse": false, + "height": "250px", + "panels": [ + { + "aliasColors": { + + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 10, + "id": 8, + "interval": "1m", + "legend": { + "alignAsTable": true, + "avg": false, + "current": false, + "max": false, + "min": false, + "rightSide": true, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 0, + "links": [ + + ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ + + ], + "spaceLength": 10, + "span": 6, + "stack": true, + "steppedLine": false, + "targets": [ + { + "expr": "(avg(irate(container_network_receive_bytes_total{job=\"{{ include "exporter.kubelet.jobName" . }}\", metrics_path=\"/metrics/cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload=~\".+\", workload_type=\"$type\"}) by (workload))\n", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{`{{`}}workload{{`}}`}}", + "legendLink": null, + "step": 10 + } + ], + "thresholds": [ + + ], + "timeFrom": null, + "timeShift": null, + "title": "Average Container Bandwidth by Workload: Received", + "tooltip": { + "shared": false, + "sort": 2, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ + + ] + }, + "yaxes": [ + { + "format": "Bps", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + }, + { + "aliasColors": { + + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 10, + "id": 9, + "interval": "1m", + "legend": { + "alignAsTable": true, + "avg": false, + "current": false, + "max": false, + "min": false, + "rightSide": true, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 0, + "links": [ + + ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ + + ], + "spaceLength": 10, + "span": 6, + "stack": true, + "steppedLine": false, + "targets": [ + { + "expr": "(avg(irate(container_network_transmit_bytes_total{job=\"{{ include "exporter.kubelet.jobName" . }}\", metrics_path=\"/metrics/cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload=~\".+\", workload_type=\"$type\"}) by (workload))\n", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{`{{`}}workload{{`}}`}}", + "legendLink": null, + "step": 10 + } + ], + "thresholds": [ + + ], + "timeFrom": null, + "timeShift": null, + "title": "Average Container Bandwidth by Workload: Transmitted", + "tooltip": { + "shared": false, + "sort": 2, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ + + ] + }, + "yaxes": [ + { + "format": "Bps", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Average Container Bandwidth by Workload", + "titleSize": "h6" + }, + { + "collapse": false, + "height": "250px", + "panels": [ + { + "aliasColors": { + + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 10, + "id": 10, + "interval": "1m", + "legend": { + "alignAsTable": true, + "avg": false, + "current": false, + "max": false, + "min": false, + "rightSide": true, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 0, + "links": [ + + ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ + + ], + "spaceLength": 10, + "span": 6, + "stack": true, + "steppedLine": false, + "targets": [ + { + "expr": "(sum(irate(container_network_receive_packets_total{job=\"{{ include "exporter.kubelet.jobName" . }}\", metrics_path=\"/metrics/cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload=~\".+\", workload_type=\"$type\"}) by (workload))\n", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{`{{`}}workload{{`}}`}}", + "legendLink": null, + "step": 10 + } + ], + "thresholds": [ + + ], + "timeFrom": null, + "timeShift": null, + "title": "Rate of Received Packets", + "tooltip": { + "shared": false, + "sort": 2, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ + + ] + }, + "yaxes": [ + { + "format": "pps", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + }, + { + "aliasColors": { + + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 10, + "id": 11, + "interval": "1m", + "legend": { + "alignAsTable": true, + "avg": false, + "current": false, + "max": false, + "min": false, + "rightSide": true, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 0, + "links": [ + + ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ + + ], + "spaceLength": 10, + "span": 6, + "stack": true, + "steppedLine": false, + "targets": [ + { + "expr": "(sum(irate(container_network_transmit_packets_total{job=\"{{ include "exporter.kubelet.jobName" . }}\", metrics_path=\"/metrics/cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload=~\".+\", workload_type=\"$type\"}) by (workload))\n", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{`{{`}}workload{{`}}`}}", + "legendLink": null, + "step": 10 + } + ], + "thresholds": [ + + ], + "timeFrom": null, + "timeShift": null, + "title": "Rate of Transmitted Packets", + "tooltip": { + "shared": false, + "sort": 2, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ + + ] + }, + "yaxes": [ + { + "format": "pps", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Rate of Packets", + "titleSize": "h6" + }, + { + "collapse": false, + "height": "250px", + "panels": [ + { + "aliasColors": { + + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 10, + "id": 12, + "interval": "1m", + "legend": { + "alignAsTable": true, + "avg": false, + "current": false, + "max": false, + "min": false, + "rightSide": true, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 0, + "links": [ + + ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ + + ], + "spaceLength": 10, + "span": 6, + "stack": true, + "steppedLine": false, + "targets": [ + { + "expr": "(sum(irate(container_network_receive_packets_dropped_total{job=\"{{ include "exporter.kubelet.jobName" . }}\", metrics_path=\"/metrics/cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload=~\".+\", workload_type=\"$type\"}) by (workload))\n", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{`{{`}}workload{{`}}`}}", + "legendLink": null, + "step": 10 + } + ], + "thresholds": [ + + ], + "timeFrom": null, + "timeShift": null, + "title": "Rate of Received Packets Dropped", + "tooltip": { + "shared": false, + "sort": 2, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ + + ] + }, + "yaxes": [ + { + "format": "pps", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + }, + { + "aliasColors": { + + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 10, + "id": 13, + "interval": "1m", + "legend": { + "alignAsTable": true, + "avg": false, + "current": false, + "max": false, + "min": false, + "rightSide": true, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 0, + "links": [ + + ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ + + ], + "spaceLength": 10, + "span": 6, + "stack": true, + "steppedLine": false, + "targets": [ + { + "expr": "(sum(irate(container_network_transmit_packets_dropped_total{job=\"{{ include "exporter.kubelet.jobName" . }}\", metrics_path=\"/metrics/cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload=~\".+\", workload_type=\"$type\"}) by (workload))\n", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{`{{`}}workload{{`}}`}}", + "legendLink": null, + "step": 10 + } + ], + "thresholds": [ + + ], + "timeFrom": null, + "timeShift": null, + "title": "Rate of Transmitted Packets Dropped", + "tooltip": { + "shared": false, + "sort": 2, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ + + ] + }, + "yaxes": [ + { + "format": "pps", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Rate of Packets Dropped", + "titleSize": "h6" + } + ], + "schemaVersion": 14, + "style": "dark", + "tags": [ + "kubernetes-mixin" + ], + "templating": { + "list": [ + { + "current": { + "text": "Prometheus", + "value": "Prometheus" + }, + "hide": 0, + "label": "Data Source", + "name": "datasource", + "options": [ + + ], + "query": "prometheus", + "refresh": 1, + "regex": "", + "type": "datasource" + }, + { + "allValue": null, + "current": { + "text": "", + "value": "" + }, + "datasource": "$datasource", + "hide": {{ if .Values.grafana.sidecar.dashboards.multicluster.global.enabled }}0{{ else }}2{{ end }}, + "includeAll": false, + "label": null, + "multi": false, + "name": "cluster", + "options": [ + + ], + "query": "label_values(up{job=\"kube-state-metrics\"}, cluster)", + "refresh": 2, + "regex": "", + "sort": 1, + "tagValuesQuery": "", + "tags": [ + + ], + "tagsQuery": "", + "type": "query", + "useTags": false + }, + { + "allValue": null, + "current": { + "text": "", + "value": "" + }, + "datasource": "$datasource", + "hide": 0, + "includeAll": false, + "label": null, + "multi": false, + "name": "namespace", + "options": [ + + ], + "query": "label_values(kube_pod_info{job=\"kube-state-metrics\", cluster=\"$cluster\"}, namespace)", + "refresh": 2, + "regex": "", + "sort": 1, + "tagValuesQuery": "", + "tags": [ + + ], + "tagsQuery": "", + "type": "query", + "useTags": false + }, + { + "allValue": null, + "auto": false, + "auto_count": 30, + "auto_min": "10s", + "current": { + "text": "deployment", + "value": "deployment" + }, + "datasource": "$datasource", + "definition": "label_values(namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload=~\".+\"}, workload_type)", + "hide": 0, + "includeAll": false, + "label": null, + "multi": false, + "name": "type", + "options": [ + + ], + "query": "label_values(namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload=~\".+\"}, workload_type)", + "refresh": 2, + "regex": "", + "skipUrlSync": false, + "sort": 0, + "tagValuesQuery": "", + "tags": [ + + ], + "tagsQuery": "", + "type": "query", + "useTags": false + } + ] + }, + "time": { + "from": "now-1h", + "to": "now" + }, + "timepicker": { + "refresh_intervals": [ + "5s", + "10s", + "30s", + "1m", + "5m", + "15m", + "30m", + "1h", + "2h", + "1d" + ], + "time_options": [ + "5m", + "15m", + "1h", + "6h", + "12h", + "24h", + "2d", + "7d", + "30d" + ] + }, + "timezone": "{{ .Values.grafana.defaultDashboardsTimezone }}", + "title": "Kubernetes / Compute Resources / Namespace (Workloads)", + "uid": "a87fb0d919ec0ea5f6543124e16c42a5", + "version": 0 + } +{{- end }} \ No newline at end of file diff --git a/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/templates/grafana/dashboards-1.14/k8s-windows-cluster-rsrc-use.yaml b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/templates/grafana/dashboards-1.14/k8s-windows-cluster-rsrc-use.yaml new file mode 100644 index 0000000000..d9ce9d738c --- /dev/null +++ b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/templates/grafana/dashboards-1.14/k8s-windows-cluster-rsrc-use.yaml @@ -0,0 +1,24 @@ +{{- /* +Generated from 'k8s-windows-cluster-rsrc-use' from https://github.com/kubernetes-monitoring/kubernetes-mixin.git +Do not change in-place! In order to change this file first read following link: +https://github.com/prometheus-community/helm-charts/tree/main/charts/kube-prometheus-stack/hack +*/ -}} +{{- $kubeTargetVersion := default .Capabilities.KubeVersion.GitVersion .Values.kubeTargetVersionOverride }} +{{- if and (or .Values.grafana.enabled .Values.grafana.forceDeployDashboards) (semverCompare ">=1.14.0-0" $kubeTargetVersion) (semverCompare "<9.9.9-9" $kubeTargetVersion) .Values.grafana.defaultDashboardsEnabled .Values.windowsMonitoring.enabled }} +apiVersion: v1 +kind: ConfigMap +metadata: + namespace: {{ template "kube-prometheus-stack-grafana.namespace" . }} + name: {{ printf "%s-%s" (include "kube-prometheus-stack.fullname" $) "k8s-windows-cluster-rsrc-use" | trunc 63 | trimSuffix "-" }} + annotations: +{{ toYaml .Values.grafana.sidecar.dashboards.annotations | indent 4 }} + labels: + {{- if $.Values.grafana.sidecar.dashboards.label }} + {{ $.Values.grafana.sidecar.dashboards.label }}: {{ ternary $.Values.grafana.sidecar.dashboards.labelValue "1" (not (empty $.Values.grafana.sidecar.dashboards.labelValue)) | quote }} + {{- end }} + app: {{ template "kube-prometheus-stack.name" $ }}-grafana +{{ include "kube-prometheus-stack.labels" $ | indent 4 }} +data: + k8s-windows-cluster-rsrc-use.json: |- + {{`{"__inputs":[],"__requires":[],"annotations":{"list":[]},"editable":`}}{{ .Values.grafana.defaultDashboardsEditable }}{{`,"gnetId":null,"graphTooltip":0,"hideControls":false,"id":null,"links":[],"refresh":"","rows":[{"collapse":false,"height":"250px","panels":[{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"$datasource","fill":10,"id":2,"legend":{"avg":false,"current":false,"max":false,"min":false,"show":true,"total":false,"values":false},"lines":true,"linewidth":0,"links":[],"nullPointMode":"null as zero","percentage":false,"pointradius":5,"points":false,"renderer":"flot","seriesOverrides":[],"spaceLength":10,"span":12,"stack":true,"steppedLine":false,"targets":[{"expr":"node:windows_node_cpu_utilisation:avg1m{cluster=\"$cluster\"} * node:windows_node_num_cpu:sum{cluster=\"$cluster\"} / scalar(sum(node:windows_node_num_cpu:sum{cluster=\"$cluster\"}))","format":"time_series","legendFormat":"{{instance}}","legendLink":"/d/96e7484b0bb53b74fbc2bcb7723cd40b/k8s-windows-node-rsrc-use"}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"CPU Utilisation","tooltip":{"shared":true,"sort":2,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"percentunit","label":null,"logBase":1,"max":1,"min":0,"show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":false}]}],"repeat":null,"repeatIteration":null,"repeatRowId":null,"showTitle":true,"title":"CPU","titleSize":"h6"},{"collapse":false,"height":"250px","panels":[{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"$datasource","fill":10,"id":3,"legend":{"avg":false,"current":false,"max":false,"min":false,"show":true,"total":false,"values":false},"lines":true,"linewidth":0,"links":[],"nullPointMode":"null as zero","percentage":false,"pointradius":5,"points":false,"renderer":"flot","seriesOverrides":[],"spaceLength":10,"span":6,"stack":true,"steppedLine":false,"targets":[{"expr":"node:windows_node_memory_utilisation:ratio{cluster=\"$cluster\"}","format":"time_series","legendFormat":"{{instance}}","legendLink":"/d/96e7484b0bb53b74fbc2bcb7723cd40b/k8s-windows-node-rsrc-use"}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"Memory Utilisation","tooltip":{"shared":true,"sort":2,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"percentunit","label":null,"logBase":1,"max":1,"min":0,"show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":false}]},{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"$datasource","fill":10,"id":4,"legend":{"avg":false,"current":false,"max":false,"min":false,"show":true,"total":false,"values":false},"lines":true,"linewidth":0,"links":[],"nullPointMode":"null as zero","percentage":false,"pointradius":5,"points":false,"renderer":"flot","seriesOverrides":[],"spaceLength":10,"span":6,"stack":true,"steppedLine":false,"targets":[{"expr":"node:windows_node_memory_swap_io_pages:irate{cluster=\"$cluster\"}","format":"time_series","legendFormat":"{{instance}}","legendLink":"/d/96e7484b0bb53b74fbc2bcb7723cd40b/k8s-windows-node-rsrc-use"}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"Memory Saturation (Swap I/O Pages)","tooltip":{"shared":true,"sort":2,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"short","label":null,"logBase":1,"max":null,"min":0,"show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":false}]}],"repeat":null,"repeatIteration":null,"repeatRowId":null,"showTitle":true,"title":"Memory","titleSize":"h6"},{"collapse":false,"height":"250px","panels":[{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"$datasource","fill":10,"id":5,"legend":{"avg":false,"current":false,"max":false,"min":false,"show":true,"total":false,"values":false},"lines":true,"linewidth":0,"links":[],"nullPointMode":"null as zero","percentage":false,"pointradius":5,"points":false,"renderer":"flot","seriesOverrides":[],"spaceLength":10,"span":12,"stack":true,"steppedLine":false,"targets":[{"expr":"node:windows_node_disk_utilisation:avg_irate{cluster=\"$cluster\"} / scalar(node:windows_node:sum{cluster=\"$cluster\"})","format":"time_series","legendFormat":"{{instance}}","legendLink":"/d/96e7484b0bb53b74fbc2bcb7723cd40b/k8s-windows-node-rsrc-use"}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"Disk IO Utilisation","tooltip":{"shared":true,"sort":2,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"percentunit","label":null,"logBase":1,"max":1,"min":0,"show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":false}]}],"repeat":null,"repeatIteration":null,"repeatRowId":null,"showTitle":true,"title":"Disk","titleSize":"h6"},{"collapse":false,"height":"250px","panels":[{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"$datasource","fill":10,"id":6,"legend":{"avg":false,"current":false,"max":false,"min":false,"show":true,"total":false,"values":false},"lines":true,"linewidth":0,"links":[],"nullPointMode":"null as zero","percentage":false,"pointradius":5,"points":false,"renderer":"flot","seriesOverrides":[],"spaceLength":10,"span":6,"stack":true,"steppedLine":false,"targets":[{"expr":"node:windows_node_net_utilisation:sum_irate{cluster=\"$cluster\"}","format":"time_series","legendFormat":"{{instance}}","legendLink":"/d/96e7484b0bb53b74fbc2bcb7723cd40b/k8s-windows-node-rsrc-use"}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"Net Utilisation (Transmitted)","tooltip":{"shared":true,"sort":2,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"Bps","label":null,"logBase":1,"max":null,"min":0,"show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":false}]},{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"$datasource","fill":10,"id":7,"legend":{"avg":false,"current":false,"max":false,"min":false,"show":true,"total":false,"values":false},"lines":true,"linewidth":0,"links":[],"nullPointMode":"null as zero","percentage":false,"pointradius":5,"points":false,"renderer":"flot","seriesOverrides":[],"spaceLength":10,"span":6,"stack":true,"steppedLine":false,"targets":[{"expr":"node:windows_node_net_saturation:sum_irate{cluster=\"$cluster\"}","format":"time_series","legendFormat":"{{instance}}","legendLink":"/d/96e7484b0bb53b74fbc2bcb7723cd40b/k8s-windows-node-rsrc-use"}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"Net Saturation (Dropped)","tooltip":{"shared":true,"sort":2,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"Bps","label":null,"logBase":1,"max":null,"min":0,"show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":false}]}],"repeat":null,"repeatIteration":null,"repeatRowId":null,"showTitle":true,"title":"Network","titleSize":"h6"},{"collapse":false,"height":"250px","panels":[{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"$datasource","fill":10,"id":8,"legend":{"avg":false,"current":false,"max":false,"min":false,"show":true,"total":false,"values":false},"lines":true,"linewidth":0,"links":[],"nullPointMode":"null as zero","percentage":false,"pointradius":5,"points":false,"renderer":"flot","seriesOverrides":[],"spaceLength":10,"span":12,"stack":true,"steppedLine":false,"targets":[{"expr":"sum by (instance)(node:windows_node_filesystem_usage:{cluster=\"$cluster\"})\n","format":"time_series","legendFormat":"{{instance}}","legendLink":"/d/96e7484b0bb53b74fbc2bcb7723cd40b/k8s-windows-node-rsrc-use"}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"Disk Capacity","tooltip":{"shared":true,"sort":2,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"percentunit","label":null,"logBase":1,"max":1,"min":0,"show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":false}]}],"repeat":null,"repeatIteration":null,"repeatRowId":null,"showTitle":true,"title":"Storage","titleSize":"h6"}],"schemaVersion":14,"style":"dark","tags":["kubernetes-mixin"],"templating":{"list":[{"current":{"text":"default","value":"default"},"hide":0,"label":null,"name":"datasource","options":[],"query":"prometheus","refresh":1,"regex":"","type":"datasource"},{"allValue":null,"current":{},"datasource":"$datasource","hide":`}}{{ if .Values.grafana.sidecar.dashboards.multicluster.global.enabled }}0{{ else }}2{{ end }}{{`,"includeAll":false,"label":"cluster","multi":false,"name":"cluster","options":[],"query":"label_values(up{job=\"windows-exporter\"}, cluster)","refresh":2,"regex":"","sort":1,"tagValuesQuery":"","tags":[],"tagsQuery":"","type":"query","useTags":false}]},"time":{"from":"now-6h","to":"now"},"timepicker":{"refresh_intervals":["5s","10s","30s","1m","5m","15m","30m","1h","2h","1d"],"time_options":["5m","15m","1h","6h","12h","24h","2d","7d","30d"]},"timezone": "`}}{{ .Values.grafana.defaultDashboardsTimezone }}{{`","title":"Kubernetes / USE Method / Cluster(Windows)","uid":"53a43377ec9aaf2ff64dfc7a1f539334","version":0}`}} +{{- end }} \ No newline at end of file diff --git a/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/templates/grafana/dashboards-1.14/k8s-windows-node-rsrc-use.yaml b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/templates/grafana/dashboards-1.14/k8s-windows-node-rsrc-use.yaml new file mode 100644 index 0000000000..a7608496a3 --- /dev/null +++ b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/templates/grafana/dashboards-1.14/k8s-windows-node-rsrc-use.yaml @@ -0,0 +1,24 @@ +{{- /* +Generated from 'k8s-windows-node-rsrc-use' from https://github.com/kubernetes-monitoring/kubernetes-mixin.git +Do not change in-place! In order to change this file first read following link: +https://github.com/prometheus-community/helm-charts/tree/main/charts/kube-prometheus-stack/hack +*/ -}} +{{- $kubeTargetVersion := default .Capabilities.KubeVersion.GitVersion .Values.kubeTargetVersionOverride }} +{{- if and (or .Values.grafana.enabled .Values.grafana.forceDeployDashboards) (semverCompare ">=1.14.0-0" $kubeTargetVersion) (semverCompare "<9.9.9-9" $kubeTargetVersion) .Values.grafana.defaultDashboardsEnabled .Values.windowsMonitoring.enabled }} +apiVersion: v1 +kind: ConfigMap +metadata: + namespace: {{ template "kube-prometheus-stack-grafana.namespace" . }} + name: {{ printf "%s-%s" (include "kube-prometheus-stack.fullname" $) "k8s-windows-node-rsrc-use" | trunc 63 | trimSuffix "-" }} + annotations: +{{ toYaml .Values.grafana.sidecar.dashboards.annotations | indent 4 }} + labels: + {{- if $.Values.grafana.sidecar.dashboards.label }} + {{ $.Values.grafana.sidecar.dashboards.label }}: {{ ternary $.Values.grafana.sidecar.dashboards.labelValue "1" (not (empty $.Values.grafana.sidecar.dashboards.labelValue)) | quote }} + {{- end }} + app: {{ template "kube-prometheus-stack.name" $ }}-grafana +{{ include "kube-prometheus-stack.labels" $ | indent 4 }} +data: + k8s-windows-node-rsrc-use.json: |- + {{`{"__inputs":[],"__requires":[],"annotations":{"list":[]},"editable":`}}{{ .Values.grafana.defaultDashboardsEditable }}{{`,"gnetId":null,"graphTooltip":0,"hideControls":false,"id":null,"links":[],"refresh":"","rows":[{"collapse":false,"height":"250px","panels":[{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"$datasource","fill":1,"id":2,"legend":{"avg":false,"current":false,"max":false,"min":false,"show":true,"total":false,"values":false},"lines":true,"linewidth":1,"links":[],"nullPointMode":"null as zero","percentage":false,"pointradius":5,"points":false,"renderer":"flot","seriesOverrides":[],"spaceLength":10,"span":6,"stack":false,"steppedLine":false,"targets":[{"expr":"node:windows_node_cpu_utilisation:avg1m{cluster=\"$cluster\", instance=\"$instance\"}","format":"time_series","legendFormat":"Utilisation","legendLink":null}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"CPU Utilisation","tooltip":{"shared":true,"sort":2,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"percentunit","label":null,"logBase":1,"max":null,"min":0,"show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":false}]},{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"$datasource","fill":1,"id":3,"legend":{"avg":false,"current":false,"max":false,"min":false,"show":true,"total":false,"values":false},"lines":true,"linewidth":1,"links":[],"nullPointMode":"null as zero","percentage":false,"pointradius":5,"points":false,"renderer":"flot","seriesOverrides":[],"spaceLength":10,"span":6,"stack":false,"steppedLine":false,"targets":[{"expr":"sum by (core) (irate(windows_cpu_time_total{cluster=\"$cluster\", job=\"windows-exporter\", mode!=\"idle\", instance=\"$instance\"}[$__rate_interval]))","format":"time_series","legendFormat":"{{core}}","legendLink":null}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"CPU Usage Per Core","tooltip":{"shared":true,"sort":2,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"percentunit","label":null,"logBase":1,"max":null,"min":0,"show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":false}]}],"repeat":null,"repeatIteration":null,"repeatRowId":null,"showTitle":true,"title":"CPU","titleSize":"h6"},{"collapse":false,"height":"250px","panels":[{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"$datasource","fill":1,"id":4,"legend":{"avg":false,"current":false,"max":false,"min":false,"show":true,"total":false,"values":false},"lines":true,"linewidth":1,"links":[],"nullPointMode":"null as zero","percentage":false,"pointradius":5,"points":false,"renderer":"flot","seriesOverrides":[],"spaceLength":10,"span":4,"stack":false,"steppedLine":false,"targets":[{"expr":"node:windows_node_memory_utilisation:{cluster=\"$cluster\", instance=\"$instance\"}","format":"time_series","legendFormat":"Memory","legendLink":null}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"Memory Utilisation %","tooltip":{"shared":true,"sort":2,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"percentunit","label":null,"logBase":1,"max":null,"min":0,"show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":false}]},{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"$datasource","fill":1,"fillGradient":0,"id":5,"legend":{"alignAsTable":false,"avg":false,"current":false,"max":false,"min":false,"rightSide":false,"show":true,"sideWidth":null,"total":false,"values":false},"lines":true,"linewidth":1,"links":[],"nullPointMode":"null","percentage":false,"pointradius":5,"points":false,"renderer":"flot","repeat":null,"seriesOverrides":[],"spaceLength":10,"span":4,"stack":false,"steppedLine":false,"targets":[{"expr":"max(\n windows_os_visible_memory_bytes{cluster=\"$cluster\", job=\"windows-exporter\", instance=\"$instance\"}\n - windows_memory_available_bytes{cluster=\"$cluster\", job=\"windows-exporter\", instance=\"$instance\"}\n)\n","format":"time_series","intervalFactor":2,"legendFormat":"memory used","refId":"A"},{"expr":"max(node:windows_node_memory_totalCached_bytes:sum{cluster=\"$cluster\", instance=\"$instance\"})","format":"time_series","intervalFactor":2,"legendFormat":"memory cached","refId":"B"},{"expr":"max(windows_memory_available_bytes{cluster=\"$cluster\", job=\"windows-exporter\", instance=\"$instance\"})","format":"time_series","intervalFactor":2,"legendFormat":"memory free","refId":"C"}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"Memory Usage","tooltip":{"shared":true,"sort":0,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"bytes","label":null,"logBase":1,"max":null,"min":null,"show":true},{"format":"bytes","label":null,"logBase":1,"max":null,"min":null,"show":true}]},{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"$datasource","fill":1,"id":6,"legend":{"avg":false,"current":false,"max":false,"min":false,"show":true,"total":false,"values":false},"lines":true,"linewidth":1,"links":[],"nullPointMode":"null as zero","percentage":false,"pointradius":5,"points":false,"renderer":"flot","seriesOverrides":[],"spaceLength":10,"span":4,"stack":false,"steppedLine":false,"targets":[{"expr":"node:windows_node_memory_swap_io_pages:irate{cluster=\"$cluster\", instance=\"$instance\"}","format":"time_series","legendFormat":"Swap IO","legendLink":null}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"Memory Saturation (Swap I/O) Pages","tooltip":{"shared":true,"sort":2,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"short","label":null,"logBase":1,"max":null,"min":0,"show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":false}]}],"repeat":null,"repeatIteration":null,"repeatRowId":null,"showTitle":true,"title":"Memory","titleSize":"h6"},{"collapse":false,"height":"250px","panels":[{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"$datasource","fill":1,"id":7,"legend":{"avg":false,"current":false,"max":false,"min":false,"show":true,"total":false,"values":false},"lines":true,"linewidth":1,"links":[],"nullPointMode":"null as zero","percentage":false,"pointradius":5,"points":false,"renderer":"flot","seriesOverrides":[],"spaceLength":10,"span":6,"stack":false,"steppedLine":false,"targets":[{"expr":"node:windows_node_disk_utilisation:avg_irate{cluster=\"$cluster\", instance=\"$instance\"}","format":"time_series","legendFormat":"Utilisation","legendLink":null}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"Disk IO Utilisation","tooltip":{"shared":true,"sort":2,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"percentunit","label":null,"logBase":1,"max":null,"min":0,"show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":false}]},{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"$datasource","fill":1,"fillGradient":0,"id":8,"legend":{"alignAsTable":false,"avg":false,"current":false,"max":false,"min":false,"rightSide":false,"show":true,"sideWidth":null,"total":false,"values":false},"lines":true,"linewidth":1,"links":[],"nullPointMode":"null","percentage":false,"pointradius":5,"points":false,"renderer":"flot","repeat":null,"seriesOverrides":[{"alias":"read","yaxis":1},{"alias":"io time","yaxis":2}],"spaceLength":10,"span":6,"stack":false,"steppedLine":false,"targets":[{"expr":"max(rate(windows_logical_disk_read_bytes_total{cluster=\"$cluster\", job=\"windows-exporter\", instance=\"$instance\"}[2m]))","format":"time_series","intervalFactor":2,"legendFormat":"read","refId":"A"},{"expr":"max(rate(windows_logical_disk_write_bytes_total{cluster=\"$cluster\", job=\"windows-exporter\", instance=\"$instance\"}[2m]))","format":"time_series","intervalFactor":2,"legendFormat":"written","refId":"B"},{"expr":"max(rate(windows_logical_disk_read_seconds_total{cluster=\"$cluster\", job=\"windows-exporter\", instance=\"$instance\"}[2m]) + rate(windows_logical_disk_write_seconds_total{cluster=\"$cluster\", job=\"windows-exporter\", instance=\"$instance\"}[2m]))","format":"time_series","intervalFactor":2,"legendFormat":"io time","refId":"C"}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"Disk I/O","tooltip":{"shared":true,"sort":0,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"bytes","label":null,"logBase":1,"max":null,"min":null,"show":true},{"format":"ms","label":null,"logBase":1,"max":null,"min":null,"show":true}]}],"repeat":null,"repeatIteration":null,"repeatRowId":null,"showTitle":true,"title":"Disk","titleSize":"h6"},{"collapse":false,"height":"250px","panels":[{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"$datasource","fill":1,"id":9,"legend":{"avg":false,"current":false,"max":false,"min":false,"show":true,"total":false,"values":false},"lines":true,"linewidth":1,"links":[],"nullPointMode":"null as zero","percentage":false,"pointradius":5,"points":false,"renderer":"flot","seriesOverrides":[],"spaceLength":10,"span":6,"stack":false,"steppedLine":false,"targets":[{"expr":"node:windows_node_net_utilisation:sum_irate{cluster=\"$cluster\", instance=\"$instance\"}","format":"time_series","legendFormat":"Utilisation","legendLink":null}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"Net Utilisation (Transmitted)","tooltip":{"shared":true,"sort":2,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"Bps","label":null,"logBase":1,"max":null,"min":0,"show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":false}]},{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"$datasource","fill":1,"id":10,"legend":{"avg":false,"current":false,"max":false,"min":false,"show":true,"total":false,"values":false},"lines":true,"linewidth":1,"links":[],"nullPointMode":"null as zero","percentage":false,"pointradius":5,"points":false,"renderer":"flot","seriesOverrides":[],"spaceLength":10,"span":6,"stack":false,"steppedLine":false,"targets":[{"expr":"node:windows_node_net_saturation:sum_irate{cluster=\"$cluster\", instance=\"$instance\"}","format":"time_series","legendFormat":"Saturation","legendLink":null}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"Net Saturation (Dropped)","tooltip":{"shared":true,"sort":2,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"Bps","label":null,"logBase":1,"max":null,"min":0,"show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":false}]}],"repeat":null,"repeatIteration":null,"repeatRowId":null,"showTitle":true,"title":"Net","titleSize":"h6"},{"collapse":false,"height":"250px","panels":[{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"$datasource","fill":1,"id":11,"legend":{"avg":false,"current":false,"max":false,"min":false,"show":true,"total":false,"values":false},"lines":true,"linewidth":1,"links":[],"nullPointMode":"null as zero","percentage":false,"pointradius":5,"points":false,"renderer":"flot","seriesOverrides":[],"spaceLength":10,"span":12,"stack":false,"steppedLine":false,"targets":[{"expr":"node:windows_node_filesystem_usage:{cluster=\"$cluster\", instance=\"$instance\"}\n","format":"time_series","legendFormat":"{{volume}}","legendLink":null}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"Disk Utilisation","tooltip":{"shared":true,"sort":2,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"percentunit","label":null,"logBase":1,"max":null,"min":0,"show":true},{"format":"short","label":null,"logBase":1,"max":null,"min":null,"show":false}]}],"repeat":null,"repeatIteration":null,"repeatRowId":null,"showTitle":true,"title":"Disk","titleSize":"h6"}],"schemaVersion":14,"style":"dark","tags":["kubernetes-mixin"],"templating":{"list":[{"current":{"text":"default","value":"default"},"hide":0,"label":null,"name":"datasource","options":[],"query":"prometheus","refresh":1,"regex":"","type":"datasource"},{"allValue":null,"current":{},"datasource":"$datasource","hide":`}}{{ if .Values.grafana.sidecar.dashboards.multicluster.global.enabled }}0{{ else }}2{{ end }}{{`,"includeAll":false,"label":"cluster","multi":false,"name":"cluster","options":[],"query":"label_values(up{job=\"windows-exporter\"}, cluster)","refresh":2,"regex":"","sort":1,"tagValuesQuery":"","tags":[],"tagsQuery":"","type":"query","useTags":false},{"allValue":null,"current":{},"datasource":"$datasource","hide":0,"includeAll":false,"label":"Instance","multi":false,"name":"instance","options":[],"query":"label_values(windows_system_system_up_time{cluster=\"$cluster\"}, instance)","refresh":2,"regex":"","sort":1,"tagValuesQuery":"","tags":[],"tagsQuery":"","type":"query","useTags":false}]},"time":{"from":"now-6h","to":"now"},"timepicker":{"refresh_intervals":["5s","10s","30s","1m","5m","15m","30m","1h","2h","1d"],"time_options":["5m","15m","1h","6h","12h","24h","2d","7d","30d"]},"timezone": "`}}{{ .Values.grafana.defaultDashboardsTimezone }}{{`","title":"Kubernetes / USE Method / Node(Windows)","uid":"96e7484b0bb53b74fbc2bcb7723cd40b","version":0}`}} +{{- end }} \ No newline at end of file diff --git a/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/templates/grafana/dashboards-1.14/kubelet.yaml b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/templates/grafana/dashboards-1.14/kubelet.yaml new file mode 100644 index 0000000000..74a5303f8f --- /dev/null +++ b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/templates/grafana/dashboards-1.14/kubelet.yaml @@ -0,0 +1,2256 @@ +{{- /* +Generated from 'kubelet' from https://raw.githubusercontent.com/prometheus-operator/kube-prometheus/a8ba97a150c75be42010c75d10b720c55e182f1a/manifests/grafana-dashboardDefinitions.yaml +Do not change in-place! In order to change this file first read following link: +https://github.com/prometheus-community/helm-charts/tree/main/charts/kube-prometheus-stack/hack +*/ -}} +{{- $kubeTargetVersion := default .Capabilities.KubeVersion.GitVersion .Values.kubeTargetVersionOverride }} +{{- if and (or .Values.grafana.enabled .Values.grafana.forceDeployDashboards) (semverCompare ">=1.14.0-0" $kubeTargetVersion) (semverCompare "<9.9.9-9" $kubeTargetVersion) .Values.grafana.defaultDashboardsEnabled }} +{{- if (include "exporter.kubelet.enabled" .) }} +apiVersion: v1 +kind: ConfigMap +metadata: + namespace: {{ .Values.grafana.defaultDashboards.namespace }} + name: {{ printf "%s-%s" (include "kube-prometheus-stack.fullname" $) "kubelet" | trunc 63 | trimSuffix "-" }} + annotations: +{{ toYaml .Values.grafana.sidecar.dashboards.annotations | indent 4 }} + labels: + {{- if $.Values.grafana.sidecar.dashboards.label }} + {{ $.Values.grafana.sidecar.dashboards.label }}: {{ ternary $.Values.grafana.sidecar.dashboards.labelValue "1" (not (empty $.Values.grafana.sidecar.dashboards.labelValue)) | quote }} + {{- end }} + app: {{ template "kube-prometheus-stack.name" $ }}-grafana +{{ include "kube-prometheus-stack.labels" $ | indent 4 }} +data: + kubelet.json: |- + { + "__inputs": [ + + ], + "__requires": [ + + ], + "annotations": { + "list": [ + + ] + }, + "editable": false, + "gnetId": null, + "graphTooltip": 0, + "hideControls": false, + "id": null, + "links": [ + + ], + "panels": [ + { + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "links": [ + + ], + "mappings": [ + + ], + "thresholds": { + "mode": "absolute", + "steps": [ + + ] + }, + "unit": "none" + } + }, + "gridPos": { + "h": 7, + "w": 4, + "x": 0, + "y": 0 + }, + "id": 2, + "links": [ + + ], + "options": { + "colorMode": "value", + "graphMode": "area", + "justifyMode": "auto", + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "textMode": "auto" + }, + "pluginVersion": "7", + "targets": [ + { + "expr": "sum(kubelet_node_name{cluster=\"$cluster\", job=\"{{ include "exporter.kubelet.jobName" . }}\", metrics_path=\"/metrics\"})", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "", + "refId": "A" + } + ], + "title": "Running Kubelets", + "transparent": false, + "type": "stat" + }, + { + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "links": [ + + ], + "mappings": [ + + ], + "thresholds": { + "mode": "absolute", + "steps": [ + + ] + }, + "unit": "none" + } + }, + "gridPos": { + "h": 7, + "w": 4, + "x": 4, + "y": 0 + }, + "id": 3, + "links": [ + + ], + "options": { + "colorMode": "value", + "graphMode": "area", + "justifyMode": "auto", + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "textMode": "auto" + }, + "pluginVersion": "7", + "targets": [ + { + "expr": "sum(kubelet_running_pods{cluster=\"$cluster\", job=\"{{ include "exporter.kubelet.jobName" . }}\", metrics_path=\"/metrics\", instance=~\"$instance\"}) OR sum(kubelet_running_pod_count{cluster=\"$cluster\", job=\"{{ include "exporter.kubelet.jobName" . }}\", metrics_path=\"/metrics\", instance=~\"$instance\"})", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{`{{`}}instance{{`}}`}}", + "refId": "A" + } + ], + "title": "Running Pods", + "transparent": false, + "type": "stat" + }, + { + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "links": [ + + ], + "mappings": [ + + ], + "thresholds": { + "mode": "absolute", + "steps": [ + + ] + }, + "unit": "none" + } + }, + "gridPos": { + "h": 7, + "w": 4, + "x": 8, + "y": 0 + }, + "id": 4, + "links": [ + + ], + "options": { + "colorMode": "value", + "graphMode": "area", + "justifyMode": "auto", + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "textMode": "auto" + }, + "pluginVersion": "7", + "targets": [ + { + "expr": "sum(kubelet_running_containers{cluster=\"$cluster\", job=\"{{ include "exporter.kubelet.jobName" . }}\", metrics_path=\"/metrics\", instance=~\"$instance\"}) OR sum(kubelet_running_container_count{cluster=\"$cluster\", job=\"{{ include "exporter.kubelet.jobName" . }}\", metrics_path=\"/metrics\", instance=~\"$instance\"})", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{`{{`}}instance{{`}}`}}", + "refId": "A" + } + ], + "title": "Running Containers", + "transparent": false, + "type": "stat" + }, + { + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "links": [ + + ], + "mappings": [ + + ], + "thresholds": { + "mode": "absolute", + "steps": [ + + ] + }, + "unit": "none" + } + }, + "gridPos": { + "h": 7, + "w": 4, + "x": 12, + "y": 0 + }, + "id": 5, + "links": [ + + ], + "options": { + "colorMode": "value", + "graphMode": "area", + "justifyMode": "auto", + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "textMode": "auto" + }, + "pluginVersion": "7", + "targets": [ + { + "expr": "sum(volume_manager_total_volumes{cluster=\"$cluster\", job=\"{{ include "exporter.kubelet.jobName" . }}\", metrics_path=\"/metrics\", instance=~\"$instance\", state=\"actual_state_of_world\"})", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{`{{`}}instance{{`}}`}}", + "refId": "A" + } + ], + "title": "Actual Volume Count", + "transparent": false, + "type": "stat" + }, + { + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "links": [ + + ], + "mappings": [ + + ], + "thresholds": { + "mode": "absolute", + "steps": [ + + ] + }, + "unit": "none" + } + }, + "gridPos": { + "h": 7, + "w": 4, + "x": 16, + "y": 0 + }, + "id": 6, + "links": [ + + ], + "options": { + "colorMode": "value", + "graphMode": "area", + "justifyMode": "auto", + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "textMode": "auto" + }, + "pluginVersion": "7", + "targets": [ + { + "expr": "sum(volume_manager_total_volumes{cluster=\"$cluster\", job=\"{{ include "exporter.kubelet.jobName" . }}\", metrics_path=\"/metrics\", instance=~\"$instance\",state=\"desired_state_of_world\"})", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{`{{`}}instance{{`}}`}}", + "refId": "A" + } + ], + "title": "Desired Volume Count", + "transparent": false, + "type": "stat" + }, + { + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "links": [ + + ], + "mappings": [ + + ], + "thresholds": { + "mode": "absolute", + "steps": [ + + ] + }, + "unit": "none" + } + }, + "gridPos": { + "h": 7, + "w": 4, + "x": 20, + "y": 0 + }, + "id": 7, + "links": [ + + ], + "options": { + "colorMode": "value", + "graphMode": "area", + "justifyMode": "auto", + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "textMode": "auto" + }, + "pluginVersion": "7", + "targets": [ + { + "expr": "sum(rate(kubelet_node_config_error{cluster=\"$cluster\", job=\"{{ include "exporter.kubelet.jobName" . }}\", metrics_path=\"/metrics\", instance=~\"$instance\"}[$__rate_interval]))", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{`{{`}}instance{{`}}`}}", + "refId": "A" + } + ], + "title": "Config Error Count", + "transparent": false, + "type": "stat" + }, + { + "aliasColors": { + + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 7, + "w": 12, + "x": 0, + "y": 7 + }, + "id": 8, + "legend": { + "alignAsTable": true, + "avg": false, + "current": true, + "max": false, + "min": false, + "rightSide": true, + "show": true, + "sideWidth": null, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "links": [ + + ], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "repeat": null, + "seriesOverrides": [ + + ], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum(rate(kubelet_runtime_operations_total{cluster=\"$cluster\",job=\"{{ include "exporter.kubelet.jobName" . }}\", metrics_path=\"/metrics\",instance=~\"$instance\"}[$__rate_interval])) by (operation_type, instance)", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{`{{`}}instance{{`}}`}} {{`{{`}}operation_type{{`}}`}}", + "refId": "A" + } + ], + "thresholds": [ + + ], + "timeFrom": null, + "timeShift": null, + "title": "Operation Rate", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ + + ] + }, + "yaxes": [ + { + "format": "ops", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "ops", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + }, + { + "aliasColors": { + + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 7, + "w": 12, + "x": 12, + "y": 7 + }, + "id": 9, + "legend": { + "alignAsTable": true, + "avg": false, + "current": true, + "max": false, + "min": false, + "rightSide": true, + "show": true, + "sideWidth": null, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "links": [ + + ], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "repeat": null, + "seriesOverrides": [ + + ], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum(rate(kubelet_runtime_operations_errors_total{cluster=\"$cluster\",job=\"{{ include "exporter.kubelet.jobName" . }}\", metrics_path=\"/metrics\",instance=~\"$instance\"}[$__rate_interval])) by (instance, operation_type)", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{`{{`}}instance{{`}}`}} {{`{{`}}operation_type{{`}}`}}", + "refId": "A" + } + ], + "thresholds": [ + + ], + "timeFrom": null, + "timeShift": null, + "title": "Operation Error Rate", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ + + ] + }, + "yaxes": [ + { + "format": "ops", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "ops", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + }, + { + "aliasColors": { + + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 7, + "w": 24, + "x": 0, + "y": 14 + }, + "id": 10, + "legend": { + "alignAsTable": true, + "avg": false, + "current": true, + "max": false, + "min": false, + "rightSide": true, + "show": true, + "sideWidth": null, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "links": [ + + ], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "repeat": null, + "seriesOverrides": [ + + ], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "histogram_quantile(0.99, sum(rate(kubelet_runtime_operations_duration_seconds_bucket{cluster=\"$cluster\",job=\"{{ include "exporter.kubelet.jobName" . }}\", metrics_path=\"/metrics\",instance=~\"$instance\"}[$__rate_interval])) by (instance, operation_type, le))", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{`{{`}}instance{{`}}`}} {{`{{`}}operation_type{{`}}`}}", + "refId": "A" + } + ], + "thresholds": [ + + ], + "timeFrom": null, + "timeShift": null, + "title": "Operation duration 99th quantile", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ + + ] + }, + "yaxes": [ + { + "format": "s", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "s", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + }, + { + "aliasColors": { + + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 7, + "w": 12, + "x": 0, + "y": 21 + }, + "id": 11, + "legend": { + "alignAsTable": true, + "avg": false, + "current": true, + "max": false, + "min": false, + "rightSide": true, + "show": true, + "sideWidth": null, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "links": [ + + ], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "repeat": null, + "seriesOverrides": [ + + ], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum(rate(kubelet_pod_start_duration_seconds_count{cluster=\"$cluster\",job=\"{{ include "exporter.kubelet.jobName" . }}\", metrics_path=\"/metrics\",instance=~\"$instance\"}[$__rate_interval])) by (instance)", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{`{{`}}instance{{`}}`}} pod", + "refId": "A" + }, + { + "expr": "sum(rate(kubelet_pod_worker_duration_seconds_count{cluster=\"$cluster\",job=\"{{ include "exporter.kubelet.jobName" . }}\", metrics_path=\"/metrics\",instance=~\"$instance\"}[$__rate_interval])) by (instance)", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{`{{`}}instance{{`}}`}} worker", + "refId": "B" + } + ], + "thresholds": [ + + ], + "timeFrom": null, + "timeShift": null, + "title": "Pod Start Rate", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ + + ] + }, + "yaxes": [ + { + "format": "ops", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "ops", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + }, + { + "aliasColors": { + + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 7, + "w": 12, + "x": 12, + "y": 21 + }, + "id": 12, + "legend": { + "alignAsTable": true, + "avg": false, + "current": true, + "max": false, + "min": false, + "rightSide": true, + "show": true, + "sideWidth": null, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "links": [ + + ], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "repeat": null, + "seriesOverrides": [ + + ], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "histogram_quantile(0.99, sum(rate(kubelet_pod_start_duration_seconds_bucket{cluster=\"$cluster\",job=\"{{ include "exporter.kubelet.jobName" . }}\", metrics_path=\"/metrics\",instance=~\"$instance\"}[$__rate_interval])) by (instance, le))", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{`{{`}}instance{{`}}`}} pod", + "refId": "A" + }, + { + "expr": "histogram_quantile(0.99, sum(rate(kubelet_pod_worker_duration_seconds_bucket{cluster=\"$cluster\",job=\"{{ include "exporter.kubelet.jobName" . }}\", metrics_path=\"/metrics\",instance=~\"$instance\"}[$__rate_interval])) by (instance, le))", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{`{{`}}instance{{`}}`}} worker", + "refId": "B" + } + ], + "thresholds": [ + + ], + "timeFrom": null, + "timeShift": null, + "title": "Pod Start Duration", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ + + ] + }, + "yaxes": [ + { + "format": "s", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "s", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + }, + { + "aliasColors": { + + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 7, + "w": 12, + "x": 0, + "y": 28 + }, + "id": 13, + "legend": { + "alignAsTable": true, + "avg": false, + "current": true, + "hideEmpty": true, + "hideZero": true, + "max": false, + "min": false, + "rightSide": true, + "show": true, + "sideWidth": null, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "links": [ + + ], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "repeat": null, + "seriesOverrides": [ + + ], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum(rate(storage_operation_duration_seconds_count{cluster=\"$cluster\",job=\"{{ include "exporter.kubelet.jobName" . }}\", metrics_path=\"/metrics\",instance=~\"$instance\"}[$__rate_interval])) by (instance, operation_name, volume_plugin)", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{`{{`}}instance{{`}}`}} {{`{{`}}operation_name{{`}}`}} {{`{{`}}volume_plugin{{`}}`}}", + "refId": "A" + } + ], + "thresholds": [ + + ], + "timeFrom": null, + "timeShift": null, + "title": "Storage Operation Rate", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ + + ] + }, + "yaxes": [ + { + "format": "ops", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "ops", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + }, + { + "aliasColors": { + + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 7, + "w": 12, + "x": 12, + "y": 28 + }, + "id": 14, + "legend": { + "alignAsTable": true, + "avg": false, + "current": true, + "hideEmpty": true, + "hideZero": true, + "max": false, + "min": false, + "rightSide": true, + "show": true, + "sideWidth": null, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "links": [ + + ], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "repeat": null, + "seriesOverrides": [ + + ], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum(rate(storage_operation_errors_total{cluster=\"$cluster\",job=\"{{ include "exporter.kubelet.jobName" . }}\", metrics_path=\"/metrics\",instance=~\"$instance\"}[$__rate_interval])) by (instance, operation_name, volume_plugin)", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{`{{`}}instance{{`}}`}} {{`{{`}}operation_name{{`}}`}} {{`{{`}}volume_plugin{{`}}`}}", + "refId": "A" + } + ], + "thresholds": [ + + ], + "timeFrom": null, + "timeShift": null, + "title": "Storage Operation Error Rate", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ + + ] + }, + "yaxes": [ + { + "format": "ops", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "ops", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + }, + { + "aliasColors": { + + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 7, + "w": 24, + "x": 0, + "y": 35 + }, + "id": 15, + "legend": { + "alignAsTable": true, + "avg": false, + "current": true, + "hideEmpty": true, + "hideZero": true, + "max": false, + "min": false, + "rightSide": true, + "show": true, + "sideWidth": null, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "links": [ + + ], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "repeat": null, + "seriesOverrides": [ + + ], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "histogram_quantile(0.99, sum(rate(storage_operation_duration_seconds_bucket{cluster=\"$cluster\", job=\"{{ include "exporter.kubelet.jobName" . }}\", metrics_path=\"/metrics\", instance=~\"$instance\"}[$__rate_interval])) by (instance, operation_name, volume_plugin, le))", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{`{{`}}instance{{`}}`}} {{`{{`}}operation_name{{`}}`}} {{`{{`}}volume_plugin{{`}}`}}", + "refId": "A" + } + ], + "thresholds": [ + + ], + "timeFrom": null, + "timeShift": null, + "title": "Storage Operation Duration 99th quantile", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ + + ] + }, + "yaxes": [ + { + "format": "s", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "s", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + }, + { + "aliasColors": { + + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 7, + "w": 12, + "x": 0, + "y": 42 + }, + "id": 16, + "legend": { + "alignAsTable": true, + "avg": false, + "current": true, + "max": false, + "min": false, + "rightSide": true, + "show": true, + "sideWidth": null, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "links": [ + + ], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "repeat": null, + "seriesOverrides": [ + + ], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum(rate(kubelet_cgroup_manager_duration_seconds_count{cluster=\"$cluster\", job=\"{{ include "exporter.kubelet.jobName" . }}\", metrics_path=\"/metrics\", instance=~\"$instance\"}[$__rate_interval])) by (instance, operation_type)", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{`{{`}}operation_type{{`}}`}}", + "refId": "A" + } + ], + "thresholds": [ + + ], + "timeFrom": null, + "timeShift": null, + "title": "Cgroup manager operation rate", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ + + ] + }, + "yaxes": [ + { + "format": "ops", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "ops", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + }, + { + "aliasColors": { + + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 7, + "w": 12, + "x": 12, + "y": 42 + }, + "id": 17, + "legend": { + "alignAsTable": true, + "avg": false, + "current": true, + "max": false, + "min": false, + "rightSide": true, + "show": true, + "sideWidth": null, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "links": [ + + ], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "repeat": null, + "seriesOverrides": [ + + ], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "histogram_quantile(0.99, sum(rate(kubelet_cgroup_manager_duration_seconds_bucket{cluster=\"$cluster\", job=\"{{ include "exporter.kubelet.jobName" . }}\", metrics_path=\"/metrics\", instance=~\"$instance\"}[$__rate_interval])) by (instance, operation_type, le))", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{`{{`}}instance{{`}}`}} {{`{{`}}operation_type{{`}}`}}", + "refId": "A" + } + ], + "thresholds": [ + + ], + "timeFrom": null, + "timeShift": null, + "title": "Cgroup manager 99th quantile", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ + + ] + }, + "yaxes": [ + { + "format": "s", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "s", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + }, + { + "aliasColors": { + + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "description": "Pod lifecycle event generator", + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 7, + "w": 12, + "x": 0, + "y": 49 + }, + "id": 18, + "legend": { + "alignAsTable": true, + "avg": false, + "current": true, + "max": false, + "min": false, + "rightSide": true, + "show": true, + "sideWidth": null, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "links": [ + + ], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "repeat": null, + "seriesOverrides": [ + + ], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum(rate(kubelet_pleg_relist_duration_seconds_count{cluster=\"$cluster\", job=\"{{ include "exporter.kubelet.jobName" . }}\", metrics_path=\"/metrics\", instance=~\"$instance\"}[$__rate_interval])) by (instance)", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{`{{`}}instance{{`}}`}}", + "refId": "A" + } + ], + "thresholds": [ + + ], + "timeFrom": null, + "timeShift": null, + "title": "PLEG relist rate", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ + + ] + }, + "yaxes": [ + { + "format": "ops", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "ops", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + }, + { + "aliasColors": { + + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 7, + "w": 12, + "x": 12, + "y": 49 + }, + "id": 19, + "legend": { + "alignAsTable": true, + "avg": false, + "current": true, + "max": false, + "min": false, + "rightSide": true, + "show": true, + "sideWidth": null, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "links": [ + + ], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "repeat": null, + "seriesOverrides": [ + + ], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "histogram_quantile(0.99, sum(rate(kubelet_pleg_relist_interval_seconds_bucket{cluster=\"$cluster\",job=\"{{ include "exporter.kubelet.jobName" . }}\", metrics_path=\"/metrics\",instance=~\"$instance\"}[$__rate_interval])) by (instance, le))", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{`{{`}}instance{{`}}`}}", + "refId": "A" + } + ], + "thresholds": [ + + ], + "timeFrom": null, + "timeShift": null, + "title": "PLEG relist interval", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ + + ] + }, + "yaxes": [ + { + "format": "s", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "s", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + }, + { + "aliasColors": { + + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 7, + "w": 24, + "x": 0, + "y": 56 + }, + "id": 20, + "legend": { + "alignAsTable": true, + "avg": false, + "current": true, + "max": false, + "min": false, + "rightSide": true, + "show": true, + "sideWidth": null, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "links": [ + + ], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "repeat": null, + "seriesOverrides": [ + + ], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "histogram_quantile(0.99, sum(rate(kubelet_pleg_relist_duration_seconds_bucket{cluster=\"$cluster\",job=\"{{ include "exporter.kubelet.jobName" . }}\", metrics_path=\"/metrics\",instance=~\"$instance\"}[$__rate_interval])) by (instance, le))", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{`{{`}}instance{{`}}`}}", + "refId": "A" + } + ], + "thresholds": [ + + ], + "timeFrom": null, + "timeShift": null, + "title": "PLEG relist duration", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ + + ] + }, + "yaxes": [ + { + "format": "s", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "s", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + }, + { + "aliasColors": { + + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 7, + "w": 24, + "x": 0, + "y": 63 + }, + "id": 21, + "legend": { + "alignAsTable": false, + "avg": false, + "current": false, + "max": false, + "min": false, + "rightSide": false, + "show": true, + "sideWidth": null, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ + + ], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "repeat": null, + "seriesOverrides": [ + + ], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum(rate(rest_client_requests_total{cluster=\"$cluster\",job=\"{{ include "exporter.kubelet.jobName" . }}\", metrics_path=\"/metrics\", instance=~\"$instance\",code=~\"2..\"}[$__rate_interval]))", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "2xx", + "refId": "A" + }, + { + "expr": "sum(rate(rest_client_requests_total{cluster=\"$cluster\",job=\"{{ include "exporter.kubelet.jobName" . }}\", metrics_path=\"/metrics\", instance=~\"$instance\",code=~\"3..\"}[$__rate_interval]))", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "3xx", + "refId": "B" + }, + { + "expr": "sum(rate(rest_client_requests_total{cluster=\"$cluster\",job=\"{{ include "exporter.kubelet.jobName" . }}\", metrics_path=\"/metrics\", instance=~\"$instance\",code=~\"4..\"}[$__rate_interval]))", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "4xx", + "refId": "C" + }, + { + "expr": "sum(rate(rest_client_requests_total{cluster=\"$cluster\",job=\"{{ include "exporter.kubelet.jobName" . }}\", metrics_path=\"/metrics\", instance=~\"$instance\",code=~\"5..\"}[$__rate_interval]))", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "5xx", + "refId": "D" + } + ], + "thresholds": [ + + ], + "timeFrom": null, + "timeShift": null, + "title": "RPC Rate", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ + + ] + }, + "yaxes": [ + { + "format": "ops", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "ops", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + }, + { + "aliasColors": { + + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 7, + "w": 24, + "x": 0, + "y": 70 + }, + "id": 22, + "legend": { + "alignAsTable": true, + "avg": false, + "current": true, + "max": false, + "min": false, + "rightSide": true, + "show": true, + "sideWidth": null, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "links": [ + + ], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "repeat": null, + "seriesOverrides": [ + + ], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "histogram_quantile(0.99, sum(rate(rest_client_request_duration_seconds_bucket{cluster=\"$cluster\",job=\"{{ include "exporter.kubelet.jobName" . }}\", metrics_path=\"/metrics\", instance=~\"$instance\"}[$__rate_interval])) by (instance, verb, url, le))", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{`{{`}}instance{{`}}`}} {{`{{`}}verb{{`}}`}} {{`{{`}}url{{`}}`}}", + "refId": "A" + } + ], + "thresholds": [ + + ], + "timeFrom": null, + "timeShift": null, + "title": "Request duration 99th quantile", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ + + ] + }, + "yaxes": [ + { + "format": "s", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "s", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + }, + { + "aliasColors": { + + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 7, + "w": 8, + "x": 0, + "y": 77 + }, + "id": 23, + "legend": { + "alignAsTable": false, + "avg": false, + "current": false, + "max": false, + "min": false, + "rightSide": false, + "show": true, + "sideWidth": null, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ + + ], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "repeat": null, + "seriesOverrides": [ + + ], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "process_resident_memory_bytes{cluster=\"$cluster\",job=\"{{ include "exporter.kubelet.jobName" . }}\", metrics_path=\"/metrics\",instance=~\"$instance\"}", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{`{{`}}instance{{`}}`}}", + "refId": "A" + } + ], + "thresholds": [ + + ], + "timeFrom": null, + "timeShift": null, + "title": "Memory", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ + + ] + }, + "yaxes": [ + { + "format": "bytes", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "bytes", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + }, + { + "aliasColors": { + + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 7, + "w": 8, + "x": 8, + "y": 77 + }, + "id": 24, + "legend": { + "alignAsTable": false, + "avg": false, + "current": false, + "max": false, + "min": false, + "rightSide": false, + "show": true, + "sideWidth": null, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ + + ], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "repeat": null, + "seriesOverrides": [ + + ], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "rate(process_cpu_seconds_total{cluster=\"$cluster\",job=\"{{ include "exporter.kubelet.jobName" . }}\", metrics_path=\"/metrics\",instance=~\"$instance\"}[$__rate_interval])", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{`{{`}}instance{{`}}`}}", + "refId": "A" + } + ], + "thresholds": [ + + ], + "timeFrom": null, + "timeShift": null, + "title": "CPU usage", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ + + ] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + }, + { + "aliasColors": { + + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 7, + "w": 8, + "x": 16, + "y": 77 + }, + "id": 25, + "legend": { + "alignAsTable": false, + "avg": false, + "current": false, + "max": false, + "min": false, + "rightSide": false, + "show": true, + "sideWidth": null, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ + + ], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "repeat": null, + "seriesOverrides": [ + + ], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "go_goroutines{cluster=\"$cluster\",job=\"{{ include "exporter.kubelet.jobName" . }}\", metrics_path=\"/metrics\",instance=~\"$instance\"}", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{`{{`}}instance{{`}}`}}", + "refId": "A" + } + ], + "thresholds": [ + + ], + "timeFrom": null, + "timeShift": null, + "title": "Goroutines", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ + + ] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + } + ], + "refresh": "10s", + "rows": [ + + ], + "schemaVersion": 14, + "style": "dark", + "tags": [ + "kubernetes-mixin" + ], + "templating": { + "list": [ + { + "current": { + "text": "Prometheus", + "value": "Prometheus" + }, + "hide": 0, + "label": "Data Source", + "name": "datasource", + "options": [ + + ], + "query": "prometheus", + "refresh": 1, + "regex": "", + "type": "datasource" + }, + { + "allValue": null, + "current": { + + }, + "datasource": "$datasource", + "hide": {{ if .Values.grafana.sidecar.dashboards.multicluster.global.enabled }}0{{ else }}2{{ end }}, + "includeAll": false, + "label": "cluster", + "multi": false, + "name": "cluster", + "options": [ + + ], + "query": "label_values(up{job=\"{{ include "exporter.kubelet.jobName" . }}\", metrics_path=\"/metrics\"}, cluster)", + "refresh": 2, + "regex": "", + "sort": 1, + "tagValuesQuery": "", + "tags": [ + + ], + "tagsQuery": "", + "type": "query", + "useTags": false + }, + { + "allValue": null, + "current": { + + }, + "datasource": "$datasource", + "hide": 0, + "includeAll": true, + "label": "instance", + "multi": false, + "name": "instance", + "options": [ + + ], + "query": "label_values(up{job=\"{{ include "exporter.kubelet.jobName" . }}\", metrics_path=\"/metrics\",cluster=\"$cluster\"}, instance)", + "refresh": 2, + "regex": "", + "sort": 1, + "tagValuesQuery": "", + "tags": [ + + ], + "tagsQuery": "", + "type": "query", + "useTags": false + } + ] + }, + "time": { + "from": "now-1h", + "to": "now" + }, + "timepicker": { + "refresh_intervals": [ + "5s", + "10s", + "30s", + "1m", + "5m", + "15m", + "30m", + "1h", + "2h", + "1d" + ], + "time_options": [ + "5m", + "15m", + "1h", + "6h", + "12h", + "24h", + "2d", + "7d", + "30d" + ] + }, + "timezone": "{{ .Values.grafana.defaultDashboardsTimezone }}", + "title": "Kubernetes / Kubelet", + "uid": "3138fa155d5915769fbded898ac09fd9", + "version": 0 + } +{{- end }} +{{- end }} \ No newline at end of file diff --git a/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/templates/grafana/dashboards-1.14/namespace-by-pod.yaml b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/templates/grafana/dashboards-1.14/namespace-by-pod.yaml new file mode 100644 index 0000000000..f5c72844fb --- /dev/null +++ b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/templates/grafana/dashboards-1.14/namespace-by-pod.yaml @@ -0,0 +1,1464 @@ +{{- /* +Generated from 'namespace-by-pod' from https://raw.githubusercontent.com/prometheus-operator/kube-prometheus/a8ba97a150c75be42010c75d10b720c55e182f1a/manifests/grafana-dashboardDefinitions.yaml +Do not change in-place! In order to change this file first read following link: +https://github.com/prometheus-community/helm-charts/tree/main/charts/kube-prometheus-stack/hack +*/ -}} +{{- $kubeTargetVersion := default .Capabilities.KubeVersion.GitVersion .Values.kubeTargetVersionOverride }} +{{- if and (or .Values.grafana.enabled .Values.grafana.forceDeployDashboards) (semverCompare ">=1.14.0-0" $kubeTargetVersion) (semverCompare "<9.9.9-9" $kubeTargetVersion) .Values.grafana.defaultDashboardsEnabled }} +apiVersion: v1 +kind: ConfigMap +metadata: + namespace: {{ .Values.grafana.defaultDashboards.namespace }} + name: {{ printf "%s-%s" (include "kube-prometheus-stack.fullname" $) "namespace-by-pod" | trunc 63 | trimSuffix "-" }} + annotations: +{{ toYaml .Values.grafana.sidecar.dashboards.annotations | indent 4 }} + labels: + {{- if $.Values.grafana.sidecar.dashboards.label }} + {{ $.Values.grafana.sidecar.dashboards.label }}: {{ ternary $.Values.grafana.sidecar.dashboards.labelValue "1" (not (empty $.Values.grafana.sidecar.dashboards.labelValue)) | quote }} + {{- end }} + app: {{ template "kube-prometheus-stack.name" $ }}-grafana +{{ include "kube-prometheus-stack.labels" $ | indent 4 }} +data: + namespace-by-pod.json: |- + { + "__inputs": [ + + ], + "__requires": [ + + ], + "annotations": { + "list": [ + { + "builtIn": 1, + "datasource": "-- Grafana --", + "enable": true, + "hide": true, + "iconColor": "rgba(0, 211, 255, 1)", + "name": "Annotations & Alerts", + "type": "dashboard" + } + ] + }, + "editable": true, + "gnetId": null, + "graphTooltip": 0, + "hideControls": false, + "id": null, + "links": [ + + ], + "panels": [ + { + "collapse": false, + "collapsed": false, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 0 + }, + "id": 2, + "panels": [ + + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Current Bandwidth", + "titleSize": "h6", + "type": "row" + }, + { + "cacheTimeout": null, + "colorBackground": false, + "colorValue": false, + "colors": [ + "#299c46", + "rgba(237, 129, 40, 0.89)", + "#d44a3a" + ], + "datasource": "$datasource", + "decimals": 0, + "format": "time_series", + "gauge": { + "maxValue": 100, + "minValue": 0, + "show": false, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "gridPos": { + "h": 9, + "w": 12, + "x": 0, + "y": 1 + }, + "height": 9, + "id": 3, + "interval": null, + "links": [ + + ], + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": 100, + "minSpan": 12, + "nullPointMode": "connected", + "nullText": null, + "options": { + "fieldOptions": { + "calcs": [ + "last" + ], + "defaults": { + "max": 10000000000, + "min": 0, + "title": "$namespace", + "unit": "Bps" + }, + "mappings": [ + + ], + "override": { + + }, + "thresholds": [ + { + "color": "dark-green", + "index": 0, + "value": null + }, + { + "color": "dark-yellow", + "index": 1, + "value": 5000000000 + }, + { + "color": "dark-red", + "index": 2, + "value": 7000000000 + } + ], + "values": false + } + }, + "postfix": "", + "postfixFontSize": "50%", + "prefix": "", + "prefixFontSize": "50%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], + "span": 12, + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": false, + "lineColor": "rgb(31, 120, 193)", + "show": false + }, + "tableColumn": "", + "targets": [ + { + "expr": "sum(irate(container_network_receive_bytes_total{cluster=\"$cluster\",namespace=~\"$namespace\"}[$interval:$resolution]))", + "format": "time_series", + "instant": null, + "intervalFactor": 1, + "legendFormat": "", + "refId": "A" + } + ], + "thresholds": "", + "timeFrom": null, + "timeShift": null, + "title": "Current Rate of Bytes Received", + "type": "gauge", + "valueFontSize": "80%", + "valueMaps": [ + { + "op": "=", + "text": "N/A", + "value": "null" + } + ], + "valueName": "current" + }, + { + "cacheTimeout": null, + "colorBackground": false, + "colorValue": false, + "colors": [ + "#299c46", + "rgba(237, 129, 40, 0.89)", + "#d44a3a" + ], + "datasource": "$datasource", + "decimals": 0, + "format": "time_series", + "gauge": { + "maxValue": 100, + "minValue": 0, + "show": false, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "gridPos": { + "h": 9, + "w": 12, + "x": 12, + "y": 1 + }, + "height": 9, + "id": 4, + "interval": null, + "links": [ + + ], + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": 100, + "minSpan": 12, + "nullPointMode": "connected", + "nullText": null, + "options": { + "fieldOptions": { + "calcs": [ + "last" + ], + "defaults": { + "max": 10000000000, + "min": 0, + "title": "$namespace", + "unit": "Bps" + }, + "mappings": [ + + ], + "override": { + + }, + "thresholds": [ + { + "color": "dark-green", + "index": 0, + "value": null + }, + { + "color": "dark-yellow", + "index": 1, + "value": 5000000000 + }, + { + "color": "dark-red", + "index": 2, + "value": 7000000000 + } + ], + "values": false + } + }, + "postfix": "", + "postfixFontSize": "50%", + "prefix": "", + "prefixFontSize": "50%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], + "span": 12, + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": false, + "lineColor": "rgb(31, 120, 193)", + "show": false + }, + "tableColumn": "", + "targets": [ + { + "expr": "sum(irate(container_network_transmit_bytes_total{cluster=\"$cluster\",namespace=~\"$namespace\"}[$interval:$resolution]))", + "format": "time_series", + "instant": null, + "intervalFactor": 1, + "legendFormat": "", + "refId": "A" + } + ], + "thresholds": "", + "timeFrom": null, + "timeShift": null, + "title": "Current Rate of Bytes Transmitted", + "type": "gauge", + "valueFontSize": "80%", + "valueMaps": [ + { + "op": "=", + "text": "N/A", + "value": "null" + } + ], + "valueName": "current" + }, + { + "columns": [ + { + "text": "Time", + "value": "Time" + }, + { + "text": "Value #A", + "value": "Value #A" + }, + { + "text": "Value #B", + "value": "Value #B" + }, + { + "text": "Value #C", + "value": "Value #C" + }, + { + "text": "Value #D", + "value": "Value #D" + }, + { + "text": "Value #E", + "value": "Value #E" + }, + { + "text": "Value #F", + "value": "Value #F" + }, + { + "text": "pod", + "value": "pod" + } + ], + "datasource": "$datasource", + "fill": 1, + "fontSize": "100%", + "gridPos": { + "h": 9, + "w": 24, + "x": 0, + "y": 10 + }, + "id": 5, + "lines": true, + "linewidth": 1, + "links": [ + + ], + "minSpan": 24, + "nullPointMode": "null as zero", + "renderer": "flot", + "scroll": true, + "showHeader": true, + "sort": { + "col": 0, + "desc": false + }, + "spaceLength": 10, + "span": 24, + "styles": [ + { + "alias": "Time", + "colorMode": null, + "colors": [ + + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "link": false, + "linkTooltip": "Drill down", + "linkUrl": "", + "pattern": "Time", + "thresholds": [ + + ], + "type": "hidden", + "unit": "short" + }, + { + "alias": "Bandwidth Received", + "colorMode": null, + "colors": [ + + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "link": false, + "linkTooltip": "Drill down", + "linkUrl": "", + "pattern": "Value #A", + "thresholds": [ + + ], + "type": "number", + "unit": "Bps" + }, + { + "alias": "Bandwidth Transmitted", + "colorMode": null, + "colors": [ + + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "link": false, + "linkTooltip": "Drill down", + "linkUrl": "", + "pattern": "Value #B", + "thresholds": [ + + ], + "type": "number", + "unit": "Bps" + }, + { + "alias": "Rate of Received Packets", + "colorMode": null, + "colors": [ + + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "link": false, + "linkTooltip": "Drill down", + "linkUrl": "", + "pattern": "Value #C", + "thresholds": [ + + ], + "type": "number", + "unit": "pps" + }, + { + "alias": "Rate of Transmitted Packets", + "colorMode": null, + "colors": [ + + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "link": false, + "linkTooltip": "Drill down", + "linkUrl": "", + "pattern": "Value #D", + "thresholds": [ + + ], + "type": "number", + "unit": "pps" + }, + { + "alias": "Rate of Received Packets Dropped", + "colorMode": null, + "colors": [ + + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "link": false, + "linkTooltip": "Drill down", + "linkUrl": "", + "pattern": "Value #E", + "thresholds": [ + + ], + "type": "number", + "unit": "pps" + }, + { + "alias": "Rate of Transmitted Packets Dropped", + "colorMode": null, + "colors": [ + + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "link": false, + "linkTooltip": "Drill down", + "linkUrl": "", + "pattern": "Value #F", + "thresholds": [ + + ], + "type": "number", + "unit": "pps" + }, + { + "alias": "Pod", + "colorMode": null, + "colors": [ + + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "link": true, + "linkTooltip": "Drill down", + "linkUrl": "d/7a18067ce943a40ae25454675c19ff5c/kubernetes-networking-pod?orgId=1&refresh=30s&var-namespace=$namespace&var-pod=$__cell", + "pattern": "pod", + "thresholds": [ + + ], + "type": "number", + "unit": "short" + } + ], + "targets": [ + { + "expr": "sum(irate(container_network_receive_bytes_total{cluster=\"$cluster\",namespace=~\"$namespace\"}[$interval:$resolution])) by (pod)", + "format": "table", + "instant": true, + "intervalFactor": 2, + "legendFormat": "", + "refId": "A", + "step": 10 + }, + { + "expr": "sum(irate(container_network_transmit_bytes_total{cluster=\"$cluster\",namespace=~\"$namespace\"}[$interval:$resolution])) by (pod)", + "format": "table", + "instant": true, + "intervalFactor": 2, + "legendFormat": "", + "refId": "B", + "step": 10 + }, + { + "expr": "sum(irate(container_network_receive_packets_total{cluster=\"$cluster\",namespace=~\"$namespace\"}[$interval:$resolution])) by (pod)", + "format": "table", + "instant": true, + "intervalFactor": 2, + "legendFormat": "", + "refId": "C", + "step": 10 + }, + { + "expr": "sum(irate(container_network_transmit_packets_total{cluster=\"$cluster\",namespace=~\"$namespace\"}[$interval:$resolution])) by (pod)", + "format": "table", + "instant": true, + "intervalFactor": 2, + "legendFormat": "", + "refId": "D", + "step": 10 + }, + { + "expr": "sum(irate(container_network_receive_packets_dropped_total{cluster=\"$cluster\",namespace=~\"$namespace\"}[$interval:$resolution])) by (pod)", + "format": "table", + "instant": true, + "intervalFactor": 2, + "legendFormat": "", + "refId": "E", + "step": 10 + }, + { + "expr": "sum(irate(container_network_transmit_packets_dropped_total{cluster=\"$cluster\",namespace=~\"$namespace\"}[$interval:$resolution])) by (pod)", + "format": "table", + "instant": true, + "intervalFactor": 2, + "legendFormat": "", + "refId": "F", + "step": 10 + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Current Status", + "type": "table" + }, + { + "collapse": false, + "collapsed": false, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 19 + }, + "id": 6, + "panels": [ + + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Bandwidth", + "titleSize": "h6", + "type": "row" + }, + { + "aliasColors": { + + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 2, + "fillGradient": 0, + "gridPos": { + "h": 9, + "w": 12, + "x": 0, + "y": 20 + }, + "id": 7, + "legend": { + "alignAsTable": false, + "avg": false, + "current": false, + "hideEmpty": true, + "hideZero": true, + "max": false, + "min": false, + "rightSide": false, + "show": true, + "sideWidth": null, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 2, + "links": [ + + ], + "minSpan": 12, + "nullPointMode": "connected", + "paceLength": 10, + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "repeat": null, + "seriesOverrides": [ + + ], + "spaceLength": 10, + "span": 12, + "stack": true, + "steppedLine": false, + "targets": [ + { + "expr": "sum(irate(container_network_receive_bytes_total{cluster=\"$cluster\",namespace=~\"$namespace\"}[$interval:$resolution])) by (pod)", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "{{`{{`}}pod{{`}}`}}", + "refId": "A", + "step": 10 + } + ], + "thresholds": [ + + ], + "timeFrom": null, + "timeShift": null, + "title": "Receive Bandwidth", + "tooltip": { + "shared": true, + "sort": 2, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ + + ] + }, + "yaxes": [ + { + "format": "Bps", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "Bps", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + } + ] + }, + { + "aliasColors": { + + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 2, + "fillGradient": 0, + "gridPos": { + "h": 9, + "w": 12, + "x": 12, + "y": 20 + }, + "id": 8, + "legend": { + "alignAsTable": false, + "avg": false, + "current": false, + "hideEmpty": true, + "hideZero": true, + "max": false, + "min": false, + "rightSide": false, + "show": true, + "sideWidth": null, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 2, + "links": [ + + ], + "minSpan": 12, + "nullPointMode": "connected", + "paceLength": 10, + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "repeat": null, + "seriesOverrides": [ + + ], + "spaceLength": 10, + "span": 12, + "stack": true, + "steppedLine": false, + "targets": [ + { + "expr": "sum(irate(container_network_transmit_bytes_total{cluster=\"$cluster\",namespace=~\"$namespace\"}[$interval:$resolution])) by (pod)", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "{{`{{`}}pod{{`}}`}}", + "refId": "A", + "step": 10 + } + ], + "thresholds": [ + + ], + "timeFrom": null, + "timeShift": null, + "title": "Transmit Bandwidth", + "tooltip": { + "shared": true, + "sort": 2, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ + + ] + }, + "yaxes": [ + { + "format": "Bps", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "Bps", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + } + ] + }, + { + "collapse": true, + "collapsed": true, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 29 + }, + "id": 9, + "panels": [ + { + "aliasColors": { + + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 2, + "fillGradient": 0, + "gridPos": { + "h": 10, + "w": 12, + "x": 0, + "y": 30 + }, + "id": 10, + "legend": { + "alignAsTable": false, + "avg": false, + "current": false, + "hideEmpty": true, + "hideZero": true, + "max": false, + "min": false, + "rightSide": false, + "show": true, + "sideWidth": null, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 2, + "links": [ + + ], + "minSpan": 12, + "nullPointMode": "connected", + "paceLength": 10, + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "repeat": null, + "seriesOverrides": [ + + ], + "spaceLength": 10, + "span": 12, + "stack": true, + "steppedLine": false, + "targets": [ + { + "expr": "sum(irate(container_network_receive_packets_total{cluster=\"$cluster\",namespace=~\"$namespace\"}[$interval:$resolution])) by (pod)", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "{{`{{`}}pod{{`}}`}}", + "refId": "A", + "step": 10 + } + ], + "thresholds": [ + + ], + "timeFrom": null, + "timeShift": null, + "title": "Rate of Received Packets", + "tooltip": { + "shared": true, + "sort": 2, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ + + ] + }, + "yaxes": [ + { + "format": "pps", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "pps", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + } + ] + }, + { + "aliasColors": { + + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 2, + "fillGradient": 0, + "gridPos": { + "h": 10, + "w": 12, + "x": 12, + "y": 30 + }, + "id": 11, + "legend": { + "alignAsTable": false, + "avg": false, + "current": false, + "hideEmpty": true, + "hideZero": true, + "max": false, + "min": false, + "rightSide": false, + "show": true, + "sideWidth": null, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 2, + "links": [ + + ], + "minSpan": 12, + "nullPointMode": "connected", + "paceLength": 10, + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "repeat": null, + "seriesOverrides": [ + + ], + "spaceLength": 10, + "span": 12, + "stack": true, + "steppedLine": false, + "targets": [ + { + "expr": "sum(irate(container_network_transmit_packets_total{cluster=\"$cluster\",namespace=~\"$namespace\"}[$interval:$resolution])) by (pod)", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "{{`{{`}}pod{{`}}`}}", + "refId": "A", + "step": 10 + } + ], + "thresholds": [ + + ], + "timeFrom": null, + "timeShift": null, + "title": "Rate of Transmitted Packets", + "tooltip": { + "shared": true, + "sort": 2, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ + + ] + }, + "yaxes": [ + { + "format": "pps", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "pps", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Packets", + "titleSize": "h6", + "type": "row" + }, + { + "collapse": true, + "collapsed": true, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 30 + }, + "id": 12, + "panels": [ + { + "aliasColors": { + + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 2, + "fillGradient": 0, + "gridPos": { + "h": 10, + "w": 12, + "x": 0, + "y": 40 + }, + "id": 13, + "legend": { + "alignAsTable": false, + "avg": false, + "current": false, + "hideEmpty": true, + "hideZero": true, + "max": false, + "min": false, + "rightSide": false, + "show": true, + "sideWidth": null, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 2, + "links": [ + + ], + "minSpan": 12, + "nullPointMode": "connected", + "paceLength": 10, + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "repeat": null, + "seriesOverrides": [ + + ], + "spaceLength": 10, + "span": 12, + "stack": true, + "steppedLine": false, + "targets": [ + { + "expr": "sum(irate(container_network_receive_packets_dropped_total{cluster=\"$cluster\",namespace=~\"$namespace\"}[$interval:$resolution])) by (pod)", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "{{`{{`}}pod{{`}}`}}", + "refId": "A", + "step": 10 + } + ], + "thresholds": [ + + ], + "timeFrom": null, + "timeShift": null, + "title": "Rate of Received Packets Dropped", + "tooltip": { + "shared": true, + "sort": 2, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ + + ] + }, + "yaxes": [ + { + "format": "pps", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "pps", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + } + ] + }, + { + "aliasColors": { + + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 2, + "fillGradient": 0, + "gridPos": { + "h": 10, + "w": 12, + "x": 12, + "y": 40 + }, + "id": 14, + "legend": { + "alignAsTable": false, + "avg": false, + "current": false, + "hideEmpty": true, + "hideZero": true, + "max": false, + "min": false, + "rightSide": false, + "show": true, + "sideWidth": null, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 2, + "links": [ + + ], + "minSpan": 12, + "nullPointMode": "connected", + "paceLength": 10, + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "repeat": null, + "seriesOverrides": [ + + ], + "spaceLength": 10, + "span": 12, + "stack": true, + "steppedLine": false, + "targets": [ + { + "expr": "sum(irate(container_network_transmit_packets_dropped_total{cluster=\"$cluster\",namespace=~\"$namespace\"}[$interval:$resolution])) by (pod)", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "{{`{{`}}pod{{`}}`}}", + "refId": "A", + "step": 10 + } + ], + "thresholds": [ + + ], + "timeFrom": null, + "timeShift": null, + "title": "Rate of Transmitted Packets Dropped", + "tooltip": { + "shared": true, + "sort": 2, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ + + ] + }, + "yaxes": [ + { + "format": "pps", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "pps", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Errors", + "titleSize": "h6", + "type": "row" + } + ], + "refresh": "10s", + "rows": [ + + ], + "schemaVersion": 18, + "style": "dark", + "tags": [ + "kubernetes-mixin" + ], + "templating": { + "list": [ + { + "current": { + "text": "Prometheus", + "value": "Prometheus" + }, + "hide": 0, + "label": "Data Source", + "name": "datasource", + "options": [ + + ], + "query": "prometheus", + "refresh": 1, + "regex": "", + "type": "datasource" + }, + { + "allValue": null, + "current": { + + }, + "datasource": "$datasource", + "hide": {{ if .Values.grafana.sidecar.dashboards.multicluster.global.enabled }}0{{ else }}2{{ end }}, + "includeAll": false, + "label": null, + "multi": false, + "name": "cluster", + "options": [ + + ], + "query": "label_values(up{job=\"{{ include "exporter.kubelet.jobName" . }}\", metrics_path=\"/metrics/cadvisor\"}, cluster)", + "refresh": 2, + "regex": "", + "sort": 0, + "tagValuesQuery": "", + "tags": [ + + ], + "tagsQuery": "", + "type": "query", + "useTags": false + }, + { + "allValue": ".+", + "auto": false, + "auto_count": 30, + "auto_min": "10s", + "current": { + "text": "kube-system", + "value": "kube-system" + }, + "datasource": "$datasource", + "definition": "label_values(container_network_receive_packets_total{cluster=\"$cluster\"}, namespace)", + "hide": 0, + "includeAll": true, + "label": null, + "multi": false, + "name": "namespace", + "options": [ + + ], + "query": "label_values(container_network_receive_packets_total{cluster=\"$cluster\"}, namespace)", + "refresh": 2, + "regex": "", + "skipUrlSync": false, + "sort": 1, + "tagValuesQuery": "", + "tags": [ + + ], + "tagsQuery": "", + "type": "query", + "useTags": false + }, + { + "allValue": null, + "auto": false, + "auto_count": 30, + "auto_min": "10s", + "current": { + "text": "5m", + "value": "5m" + }, + "datasource": "$datasource", + "hide": 0, + "includeAll": false, + "label": null, + "multi": false, + "name": "resolution", + "options": [ + { + "selected": false, + "text": "30s", + "value": "30s" + }, + { + "selected": true, + "text": "5m", + "value": "5m" + }, + { + "selected": false, + "text": "1h", + "value": "1h" + } + ], + "query": "30s,5m,1h", + "refresh": 2, + "regex": "", + "skipUrlSync": false, + "sort": 1, + "tagValuesQuery": "", + "tags": [ + + ], + "tagsQuery": "", + "type": "interval", + "useTags": false + }, + { + "allValue": null, + "auto": false, + "auto_count": 30, + "auto_min": "10s", + "current": { + "text": "5m", + "value": "5m" + }, + "datasource": "$datasource", + "hide": 2, + "includeAll": false, + "label": null, + "multi": false, + "name": "interval", + "options": [ + { + "selected": true, + "text": "4h", + "value": "4h" + } + ], + "query": "4h", + "refresh": 2, + "regex": "", + "skipUrlSync": false, + "sort": 1, + "tagValuesQuery": "", + "tags": [ + + ], + "tagsQuery": "", + "type": "interval", + "useTags": false + } + ] + }, + "time": { + "from": "now-1h", + "to": "now" + }, + "timepicker": { + "refresh_intervals": [ + "5s", + "10s", + "30s", + "1m", + "5m", + "15m", + "30m", + "1h", + "2h", + "1d" + ], + "time_options": [ + "5m", + "15m", + "1h", + "6h", + "12h", + "24h", + "2d", + "7d", + "30d" + ] + }, + "timezone": "{{ .Values.grafana.defaultDashboardsTimezone }}", + "title": "Kubernetes / Networking / Namespace (Pods)", + "uid": "8b7a8b326d7a6f1f04244066368c67af", + "version": 0 + } +{{- end }} \ No newline at end of file diff --git a/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/templates/grafana/dashboards-1.14/namespace-by-workload.yaml b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/templates/grafana/dashboards-1.14/namespace-by-workload.yaml new file mode 100644 index 0000000000..801b09c265 --- /dev/null +++ b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/templates/grafana/dashboards-1.14/namespace-by-workload.yaml @@ -0,0 +1,1736 @@ +{{- /* +Generated from 'namespace-by-workload' from https://raw.githubusercontent.com/prometheus-operator/kube-prometheus/a8ba97a150c75be42010c75d10b720c55e182f1a/manifests/grafana-dashboardDefinitions.yaml +Do not change in-place! In order to change this file first read following link: +https://github.com/prometheus-community/helm-charts/tree/main/charts/kube-prometheus-stack/hack +*/ -}} +{{- $kubeTargetVersion := default .Capabilities.KubeVersion.GitVersion .Values.kubeTargetVersionOverride }} +{{- if and (or .Values.grafana.enabled .Values.grafana.forceDeployDashboards) (semverCompare ">=1.14.0-0" $kubeTargetVersion) (semverCompare "<9.9.9-9" $kubeTargetVersion) .Values.grafana.defaultDashboardsEnabled }} +apiVersion: v1 +kind: ConfigMap +metadata: + namespace: {{ .Values.grafana.defaultDashboards.namespace }} + name: {{ printf "%s-%s" (include "kube-prometheus-stack.fullname" $) "namespace-by-workload" | trunc 63 | trimSuffix "-" }} + annotations: +{{ toYaml .Values.grafana.sidecar.dashboards.annotations | indent 4 }} + labels: + {{- if $.Values.grafana.sidecar.dashboards.label }} + {{ $.Values.grafana.sidecar.dashboards.label }}: {{ ternary $.Values.grafana.sidecar.dashboards.labelValue "1" (not (empty $.Values.grafana.sidecar.dashboards.labelValue)) | quote }} + {{- end }} + app: {{ template "kube-prometheus-stack.name" $ }}-grafana +{{ include "kube-prometheus-stack.labels" $ | indent 4 }} +data: + namespace-by-workload.json: |- + { + "__inputs": [ + + ], + "__requires": [ + + ], + "annotations": { + "list": [ + { + "builtIn": 1, + "datasource": "-- Grafana --", + "enable": true, + "hide": true, + "iconColor": "rgba(0, 211, 255, 1)", + "name": "Annotations & Alerts", + "type": "dashboard" + } + ] + }, + "editable": true, + "gnetId": null, + "graphTooltip": 0, + "hideControls": false, + "id": null, + "links": [ + + ], + "panels": [ + { + "collapse": false, + "collapsed": false, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 0 + }, + "id": 2, + "panels": [ + + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Current Bandwidth", + "titleSize": "h6", + "type": "row" + }, + { + "aliasColors": { + + }, + "bars": true, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 2, + "fillGradient": 0, + "gridPos": { + "h": 9, + "w": 12, + "x": 0, + "y": 1 + }, + "id": 3, + "legend": { + "alignAsTable": true, + "avg": false, + "current": true, + "hideEmpty": true, + "hideZero": true, + "max": false, + "min": false, + "rightSide": true, + "show": true, + "sideWidth": null, + "sort": "current", + "sortDesc": true, + "total": false, + "values": true + }, + "lines": false, + "linewidth": 1, + "links": [ + + ], + "minSpan": 24, + "nullPointMode": "null", + "paceLength": 10, + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "repeat": null, + "seriesOverrides": [ + + ], + "spaceLength": 10, + "span": 24, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sort_desc(sum(irate(container_network_receive_bytes_total{cluster=\"$cluster\",namespace=\"$namespace\"}[$interval:$resolution])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\",namespace=\"$namespace\", workload=~\".+\", workload_type=\"$type\"}) by (workload))\n", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "{{`{{`}} workload {{`}}`}}", + "refId": "A", + "step": 10 + } + ], + "thresholds": [ + + ], + "timeFrom": null, + "timeShift": null, + "title": "Current Rate of Bytes Received", + "tooltip": { + "shared": true, + "sort": 2, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "series", + "name": null, + "show": false, + "values": [ + "current" + ] + }, + "yaxes": [ + { + "format": "Bps", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "Bps", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + } + ] + }, + { + "aliasColors": { + + }, + "bars": true, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 2, + "fillGradient": 0, + "gridPos": { + "h": 9, + "w": 12, + "x": 12, + "y": 1 + }, + "id": 4, + "legend": { + "alignAsTable": true, + "avg": false, + "current": true, + "hideEmpty": true, + "hideZero": true, + "max": false, + "min": false, + "rightSide": true, + "show": true, + "sideWidth": null, + "sort": "current", + "sortDesc": true, + "total": false, + "values": true + }, + "lines": false, + "linewidth": 1, + "links": [ + + ], + "minSpan": 24, + "nullPointMode": "null", + "paceLength": 10, + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "repeat": null, + "seriesOverrides": [ + + ], + "spaceLength": 10, + "span": 24, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sort_desc(sum(irate(container_network_transmit_bytes_total{cluster=\"$cluster\",namespace=\"$namespace\"}[$interval:$resolution])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\",namespace=\"$namespace\", workload=~\".+\", workload_type=\"$type\"}) by (workload))\n", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "{{`{{`}} workload {{`}}`}}", + "refId": "A", + "step": 10 + } + ], + "thresholds": [ + + ], + "timeFrom": null, + "timeShift": null, + "title": "Current Rate of Bytes Transmitted", + "tooltip": { + "shared": true, + "sort": 2, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "series", + "name": null, + "show": false, + "values": [ + "current" + ] + }, + "yaxes": [ + { + "format": "Bps", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "Bps", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + } + ] + }, + { + "columns": [ + { + "text": "Time", + "value": "Time" + }, + { + "text": "Value #A", + "value": "Value #A" + }, + { + "text": "Value #B", + "value": "Value #B" + }, + { + "text": "Value #C", + "value": "Value #C" + }, + { + "text": "Value #D", + "value": "Value #D" + }, + { + "text": "Value #E", + "value": "Value #E" + }, + { + "text": "Value #F", + "value": "Value #F" + }, + { + "text": "Value #G", + "value": "Value #G" + }, + { + "text": "Value #H", + "value": "Value #H" + }, + { + "text": "workload", + "value": "workload" + } + ], + "datasource": "$datasource", + "fill": 1, + "fontSize": "90%", + "gridPos": { + "h": 9, + "w": 24, + "x": 0, + "y": 10 + }, + "id": 5, + "lines": true, + "linewidth": 1, + "links": [ + + ], + "minSpan": 24, + "nullPointMode": "null as zero", + "renderer": "flot", + "scroll": true, + "showHeader": true, + "sort": { + "col": 0, + "desc": false + }, + "spaceLength": 10, + "span": 24, + "styles": [ + { + "alias": "Time", + "colorMode": null, + "colors": [ + + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "link": false, + "linkTooltip": "Drill down", + "linkUrl": "", + "pattern": "Time", + "thresholds": [ + + ], + "type": "hidden", + "unit": "short" + }, + { + "alias": "Current Bandwidth Received", + "colorMode": null, + "colors": [ + + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "link": false, + "linkTooltip": "Drill down", + "linkUrl": "", + "pattern": "Value #A", + "thresholds": [ + + ], + "type": "number", + "unit": "Bps" + }, + { + "alias": "Current Bandwidth Transmitted", + "colorMode": null, + "colors": [ + + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "link": false, + "linkTooltip": "Drill down", + "linkUrl": "", + "pattern": "Value #B", + "thresholds": [ + + ], + "type": "number", + "unit": "Bps" + }, + { + "alias": "Average Bandwidth Received", + "colorMode": null, + "colors": [ + + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "link": false, + "linkTooltip": "Drill down", + "linkUrl": "", + "pattern": "Value #C", + "thresholds": [ + + ], + "type": "number", + "unit": "Bps" + }, + { + "alias": "Average Bandwidth Transmitted", + "colorMode": null, + "colors": [ + + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "link": false, + "linkTooltip": "Drill down", + "linkUrl": "", + "pattern": "Value #D", + "thresholds": [ + + ], + "type": "number", + "unit": "Bps" + }, + { + "alias": "Rate of Received Packets", + "colorMode": null, + "colors": [ + + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "link": false, + "linkTooltip": "Drill down", + "linkUrl": "", + "pattern": "Value #E", + "thresholds": [ + + ], + "type": "number", + "unit": "pps" + }, + { + "alias": "Rate of Transmitted Packets", + "colorMode": null, + "colors": [ + + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "link": false, + "linkTooltip": "Drill down", + "linkUrl": "", + "pattern": "Value #F", + "thresholds": [ + + ], + "type": "number", + "unit": "pps" + }, + { + "alias": "Rate of Received Packets Dropped", + "colorMode": null, + "colors": [ + + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "link": false, + "linkTooltip": "Drill down", + "linkUrl": "", + "pattern": "Value #G", + "thresholds": [ + + ], + "type": "number", + "unit": "pps" + }, + { + "alias": "Rate of Transmitted Packets Dropped", + "colorMode": null, + "colors": [ + + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "link": false, + "linkTooltip": "Drill down", + "linkUrl": "", + "pattern": "Value #H", + "thresholds": [ + + ], + "type": "number", + "unit": "pps" + }, + { + "alias": "Workload", + "colorMode": null, + "colors": [ + + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "link": true, + "linkTooltip": "Drill down", + "linkUrl": "d/728bf77cc1166d2f3133bf25846876cc/kubernetes-networking-workload?orgId=1&refresh=30s&var-namespace=$namespace&var-type=$type&var-workload=$__cell", + "pattern": "workload", + "thresholds": [ + + ], + "type": "number", + "unit": "short" + } + ], + "targets": [ + { + "expr": "sort_desc(sum(irate(container_network_receive_bytes_total{cluster=\"$cluster\",namespace=\"$namespace\"}[$interval:$resolution])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\",namespace=\"$namespace\", workload=~\".+\", workload_type=\"$type\"}) by (workload))\n", + "format": "table", + "instant": true, + "intervalFactor": 2, + "legendFormat": "", + "refId": "A", + "step": 10 + }, + { + "expr": "sort_desc(sum(irate(container_network_transmit_bytes_total{cluster=\"$cluster\",namespace=\"$namespace\"}[$interval:$resolution])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\",namespace=\"$namespace\", workload=~\".+\", workload_type=\"$type\"}) by (workload))\n", + "format": "table", + "instant": true, + "intervalFactor": 2, + "legendFormat": "", + "refId": "B", + "step": 10 + }, + { + "expr": "sort_desc(avg(irate(container_network_receive_bytes_total{cluster=\"$cluster\",namespace=\"$namespace\"}[$interval:$resolution])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\",namespace=\"$namespace\", workload=~\".+\", workload_type=\"$type\"}) by (workload))\n", + "format": "table", + "instant": true, + "intervalFactor": 2, + "legendFormat": "", + "refId": "C", + "step": 10 + }, + { + "expr": "sort_desc(avg(irate(container_network_transmit_bytes_total{cluster=\"$cluster\",namespace=\"$namespace\"}[$interval:$resolution])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\",namespace=\"$namespace\", workload=~\".+\", workload_type=\"$type\"}) by (workload))\n", + "format": "table", + "instant": true, + "intervalFactor": 2, + "legendFormat": "", + "refId": "D", + "step": 10 + }, + { + "expr": "sort_desc(sum(irate(container_network_receive_packets_total{cluster=\"$cluster\",namespace=\"$namespace\"}[$interval:$resolution])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\",namespace=\"$namespace\", workload=~\".+\", workload_type=\"$type\"}) by (workload))\n", + "format": "table", + "instant": true, + "intervalFactor": 2, + "legendFormat": "", + "refId": "E", + "step": 10 + }, + { + "expr": "sort_desc(sum(irate(container_network_transmit_packets_total{cluster=\"$cluster\",namespace=\"$namespace\"}[$interval:$resolution])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\",namespace=\"$namespace\", workload=~\".+\", workload_type=\"$type\"}) by (workload))\n", + "format": "table", + "instant": true, + "intervalFactor": 2, + "legendFormat": "", + "refId": "F", + "step": 10 + }, + { + "expr": "sort_desc(sum(irate(container_network_receive_packets_dropped_total{cluster=\"$cluster\",namespace=\"$namespace\"}[$interval:$resolution])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\",namespace=\"$namespace\", workload=~\".+\", workload_type=\"$type\"}) by (workload))\n", + "format": "table", + "instant": true, + "intervalFactor": 2, + "legendFormat": "", + "refId": "G", + "step": 10 + }, + { + "expr": "sort_desc(sum(irate(container_network_transmit_packets_dropped_total{cluster=\"$cluster\",namespace=\"$namespace\"}[$interval:$resolution])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\",namespace=\"$namespace\", workload=~\".+\", workload_type=\"$type\"}) by (workload))\n", + "format": "table", + "instant": true, + "intervalFactor": 2, + "legendFormat": "", + "refId": "H", + "step": 10 + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Current Status", + "type": "table" + }, + { + "collapse": true, + "collapsed": true, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 19 + }, + "id": 6, + "panels": [ + { + "aliasColors": { + + }, + "bars": true, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 2, + "fillGradient": 0, + "gridPos": { + "h": 9, + "w": 12, + "x": 0, + "y": 20 + }, + "id": 7, + "legend": { + "alignAsTable": true, + "avg": false, + "current": true, + "hideEmpty": true, + "hideZero": true, + "max": false, + "min": false, + "rightSide": true, + "show": true, + "sideWidth": null, + "sort": "current", + "sortDesc": true, + "total": false, + "values": true + }, + "lines": false, + "linewidth": 1, + "links": [ + + ], + "minSpan": 24, + "nullPointMode": "null", + "paceLength": 10, + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "repeat": null, + "seriesOverrides": [ + + ], + "spaceLength": 10, + "span": 24, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sort_desc(avg(irate(container_network_receive_bytes_total{cluster=\"$cluster\",namespace=\"$namespace\"}[$interval:$resolution])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\",namespace=\"$namespace\", workload=~\".+\", workload_type=\"$type\"}) by (workload))\n", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "{{`{{`}} workload {{`}}`}}", + "refId": "A", + "step": 10 + } + ], + "thresholds": [ + + ], + "timeFrom": null, + "timeShift": null, + "title": "Average Rate of Bytes Received", + "tooltip": { + "shared": true, + "sort": 2, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "series", + "name": null, + "show": false, + "values": [ + "current" + ] + }, + "yaxes": [ + { + "format": "Bps", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "Bps", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + } + ] + }, + { + "aliasColors": { + + }, + "bars": true, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 2, + "fillGradient": 0, + "gridPos": { + "h": 9, + "w": 12, + "x": 12, + "y": 20 + }, + "id": 8, + "legend": { + "alignAsTable": true, + "avg": false, + "current": true, + "hideEmpty": true, + "hideZero": true, + "max": false, + "min": false, + "rightSide": true, + "show": true, + "sideWidth": null, + "sort": "current", + "sortDesc": true, + "total": false, + "values": true + }, + "lines": false, + "linewidth": 1, + "links": [ + + ], + "minSpan": 24, + "nullPointMode": "null", + "paceLength": 10, + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "repeat": null, + "seriesOverrides": [ + + ], + "spaceLength": 10, + "span": 24, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sort_desc(avg(irate(container_network_transmit_bytes_total{cluster=\"$cluster\",namespace=\"$namespace\"}[$interval:$resolution])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\",namespace=\"$namespace\", workload=~\".+\", workload_type=\"$type\"}) by (workload))\n", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "{{`{{`}} workload {{`}}`}}", + "refId": "A", + "step": 10 + } + ], + "thresholds": [ + + ], + "timeFrom": null, + "timeShift": null, + "title": "Average Rate of Bytes Transmitted", + "tooltip": { + "shared": true, + "sort": 2, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "series", + "name": null, + "show": false, + "values": [ + "current" + ] + }, + "yaxes": [ + { + "format": "Bps", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "Bps", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Average Bandwidth", + "titleSize": "h6", + "type": "row" + }, + { + "collapse": false, + "collapsed": false, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 29 + }, + "id": 9, + "panels": [ + + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Bandwidth HIstory", + "titleSize": "h6", + "type": "row" + }, + { + "aliasColors": { + + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 2, + "fillGradient": 0, + "gridPos": { + "h": 9, + "w": 12, + "x": 0, + "y": 38 + }, + "id": 10, + "legend": { + "alignAsTable": false, + "avg": false, + "current": false, + "hideEmpty": true, + "hideZero": true, + "max": false, + "min": false, + "rightSide": false, + "show": true, + "sideWidth": null, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 2, + "links": [ + + ], + "minSpan": 12, + "nullPointMode": "connected", + "paceLength": 10, + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "repeat": null, + "seriesOverrides": [ + + ], + "spaceLength": 10, + "span": 12, + "stack": true, + "steppedLine": false, + "targets": [ + { + "expr": "sort_desc(sum(irate(container_network_receive_bytes_total{cluster=\"$cluster\",namespace=\"$namespace\"}[$interval:$resolution])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\",namespace=\"$namespace\", workload=~\".+\", workload_type=\"$type\"}) by (workload))\n", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "{{`{{`}}workload{{`}}`}}", + "refId": "A", + "step": 10 + } + ], + "thresholds": [ + + ], + "timeFrom": null, + "timeShift": null, + "title": "Receive Bandwidth", + "tooltip": { + "shared": true, + "sort": 2, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ + + ] + }, + "yaxes": [ + { + "format": "Bps", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "Bps", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + } + ] + }, + { + "aliasColors": { + + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 2, + "fillGradient": 0, + "gridPos": { + "h": 9, + "w": 12, + "x": 12, + "y": 38 + }, + "id": 11, + "legend": { + "alignAsTable": false, + "avg": false, + "current": false, + "hideEmpty": true, + "hideZero": true, + "max": false, + "min": false, + "rightSide": false, + "show": true, + "sideWidth": null, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 2, + "links": [ + + ], + "minSpan": 12, + "nullPointMode": "connected", + "paceLength": 10, + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "repeat": null, + "seriesOverrides": [ + + ], + "spaceLength": 10, + "span": 12, + "stack": true, + "steppedLine": false, + "targets": [ + { + "expr": "sort_desc(sum(irate(container_network_transmit_bytes_total{cluster=\"$cluster\",namespace=\"$namespace\"}[$interval:$resolution])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\",namespace=\"$namespace\", workload=~\".+\", workload_type=\"$type\"}) by (workload))\n", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "{{`{{`}}workload{{`}}`}}", + "refId": "A", + "step": 10 + } + ], + "thresholds": [ + + ], + "timeFrom": null, + "timeShift": null, + "title": "Transmit Bandwidth", + "tooltip": { + "shared": true, + "sort": 2, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ + + ] + }, + "yaxes": [ + { + "format": "Bps", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "Bps", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + } + ] + }, + { + "collapse": true, + "collapsed": true, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 39 + }, + "id": 12, + "panels": [ + { + "aliasColors": { + + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 2, + "fillGradient": 0, + "gridPos": { + "h": 9, + "w": 12, + "x": 0, + "y": 40 + }, + "id": 13, + "legend": { + "alignAsTable": false, + "avg": false, + "current": false, + "hideEmpty": true, + "hideZero": true, + "max": false, + "min": false, + "rightSide": false, + "show": true, + "sideWidth": null, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 2, + "links": [ + + ], + "minSpan": 12, + "nullPointMode": "connected", + "paceLength": 10, + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "repeat": null, + "seriesOverrides": [ + + ], + "spaceLength": 10, + "span": 12, + "stack": true, + "steppedLine": false, + "targets": [ + { + "expr": "sort_desc(sum(irate(container_network_receive_packets_total{cluster=\"$cluster\",namespace=\"$namespace\"}[$interval:$resolution])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\",namespace=\"$namespace\", workload=~\".+\", workload_type=\"$type\"}) by (workload))\n", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "{{`{{`}}workload{{`}}`}}", + "refId": "A", + "step": 10 + } + ], + "thresholds": [ + + ], + "timeFrom": null, + "timeShift": null, + "title": "Rate of Received Packets", + "tooltip": { + "shared": true, + "sort": 2, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ + + ] + }, + "yaxes": [ + { + "format": "pps", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "pps", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + } + ] + }, + { + "aliasColors": { + + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 2, + "fillGradient": 0, + "gridPos": { + "h": 9, + "w": 12, + "x": 12, + "y": 40 + }, + "id": 14, + "legend": { + "alignAsTable": false, + "avg": false, + "current": false, + "hideEmpty": true, + "hideZero": true, + "max": false, + "min": false, + "rightSide": false, + "show": true, + "sideWidth": null, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 2, + "links": [ + + ], + "minSpan": 12, + "nullPointMode": "connected", + "paceLength": 10, + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "repeat": null, + "seriesOverrides": [ + + ], + "spaceLength": 10, + "span": 12, + "stack": true, + "steppedLine": false, + "targets": [ + { + "expr": "sort_desc(sum(irate(container_network_transmit_packets_total{cluster=\"$cluster\",namespace=\"$namespace\"}[$interval:$resolution])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\",namespace=\"$namespace\", workload=~\".+\", workload_type=\"$type\"}) by (workload))\n", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "{{`{{`}}workload{{`}}`}}", + "refId": "A", + "step": 10 + } + ], + "thresholds": [ + + ], + "timeFrom": null, + "timeShift": null, + "title": "Rate of Transmitted Packets", + "tooltip": { + "shared": true, + "sort": 2, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ + + ] + }, + "yaxes": [ + { + "format": "pps", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "pps", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Packets", + "titleSize": "h6", + "type": "row" + }, + { + "collapse": true, + "collapsed": true, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 40 + }, + "id": 15, + "panels": [ + { + "aliasColors": { + + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 2, + "fillGradient": 0, + "gridPos": { + "h": 9, + "w": 12, + "x": 0, + "y": 41 + }, + "id": 16, + "legend": { + "alignAsTable": false, + "avg": false, + "current": false, + "hideEmpty": true, + "hideZero": true, + "max": false, + "min": false, + "rightSide": false, + "show": true, + "sideWidth": null, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 2, + "links": [ + + ], + "minSpan": 12, + "nullPointMode": "connected", + "paceLength": 10, + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "repeat": null, + "seriesOverrides": [ + + ], + "spaceLength": 10, + "span": 12, + "stack": true, + "steppedLine": false, + "targets": [ + { + "expr": "sort_desc(sum(irate(container_network_receive_packets_dropped_total{cluster=\"$cluster\",namespace=\"$namespace\"}[$interval:$resolution])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\",namespace=\"$namespace\", workload=~\".+\", workload_type=\"$type\"}) by (workload))\n", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "{{`{{`}}workload{{`}}`}}", + "refId": "A", + "step": 10 + } + ], + "thresholds": [ + + ], + "timeFrom": null, + "timeShift": null, + "title": "Rate of Received Packets Dropped", + "tooltip": { + "shared": true, + "sort": 2, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ + + ] + }, + "yaxes": [ + { + "format": "pps", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "pps", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + } + ] + }, + { + "aliasColors": { + + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 2, + "fillGradient": 0, + "gridPos": { + "h": 9, + "w": 12, + "x": 12, + "y": 41 + }, + "id": 17, + "legend": { + "alignAsTable": false, + "avg": false, + "current": false, + "hideEmpty": true, + "hideZero": true, + "max": false, + "min": false, + "rightSide": false, + "show": true, + "sideWidth": null, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 2, + "links": [ + + ], + "minSpan": 12, + "nullPointMode": "connected", + "paceLength": 10, + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "repeat": null, + "seriesOverrides": [ + + ], + "spaceLength": 10, + "span": 12, + "stack": true, + "steppedLine": false, + "targets": [ + { + "expr": "sort_desc(sum(irate(container_network_transmit_packets_dropped_total{cluster=\"$cluster\",namespace=\"$namespace\"}[$interval:$resolution])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\",namespace=\"$namespace\", workload=~\".+\", workload_type=\"$type\"}) by (workload))\n", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "{{`{{`}}workload{{`}}`}}", + "refId": "A", + "step": 10 + } + ], + "thresholds": [ + + ], + "timeFrom": null, + "timeShift": null, + "title": "Rate of Transmitted Packets Dropped", + "tooltip": { + "shared": true, + "sort": 2, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ + + ] + }, + "yaxes": [ + { + "format": "pps", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "pps", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Errors", + "titleSize": "h6", + "type": "row" + } + ], + "refresh": "10s", + "rows": [ + + ], + "schemaVersion": 18, + "style": "dark", + "tags": [ + "kubernetes-mixin" + ], + "templating": { + "list": [ + { + "current": { + "text": "Prometheus", + "value": "Prometheus" + }, + "hide": 0, + "label": "Data Source", + "name": "datasource", + "options": [ + + ], + "query": "prometheus", + "refresh": 1, + "regex": "", + "type": "datasource" + }, + { + "allValue": null, + "current": { + + }, + "datasource": "$datasource", + "hide": {{ if .Values.grafana.sidecar.dashboards.multicluster.global.enabled }}0{{ else }}2{{ end }}, + "includeAll": false, + "label": null, + "multi": false, + "name": "cluster", + "options": [ + + ], + "query": "label_values(up{job=\"{{ include "exporter.kubelet.jobName" . }}\", metrics_path=\"/metrics/cadvisor\"}, cluster)", + "refresh": 2, + "regex": "", + "sort": 0, + "tagValuesQuery": "", + "tags": [ + + ], + "tagsQuery": "", + "type": "query", + "useTags": false + }, + { + "allValue": null, + "auto": false, + "auto_count": 30, + "auto_min": "10s", + "current": { + "text": "kube-system", + "value": "kube-system" + }, + "datasource": "$datasource", + "definition": "label_values(container_network_receive_packets_total{cluster=\"$cluster\"}, namespace)", + "hide": 0, + "includeAll": false, + "label": null, + "multi": false, + "name": "namespace", + "options": [ + + ], + "query": "label_values(container_network_receive_packets_total{cluster=\"$cluster\"}, namespace)", + "refresh": 2, + "regex": "", + "skipUrlSync": false, + "sort": 1, + "tagValuesQuery": "", + "tags": [ + + ], + "tagsQuery": "", + "type": "query", + "useTags": false + }, + { + "allValue": null, + "auto": false, + "auto_count": 30, + "auto_min": "10s", + "current": { + "text": "deployment", + "value": "deployment" + }, + "datasource": "$datasource", + "definition": "label_values(namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\",namespace=\"$namespace\", workload=~\".+\"}, workload_type)", + "hide": 0, + "includeAll": false, + "label": null, + "multi": false, + "name": "type", + "options": [ + + ], + "query": "label_values(namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\",namespace=\"$namespace\", workload=~\".+\"}, workload_type)", + "refresh": 2, + "regex": "", + "skipUrlSync": false, + "sort": 0, + "tagValuesQuery": "", + "tags": [ + + ], + "tagsQuery": "", + "type": "query", + "useTags": false + }, + { + "allValue": null, + "auto": false, + "auto_count": 30, + "auto_min": "10s", + "current": { + "text": "5m", + "value": "5m" + }, + "datasource": "$datasource", + "hide": 0, + "includeAll": false, + "label": null, + "multi": false, + "name": "resolution", + "options": [ + { + "selected": false, + "text": "30s", + "value": "30s" + }, + { + "selected": true, + "text": "5m", + "value": "5m" + }, + { + "selected": false, + "text": "1h", + "value": "1h" + } + ], + "query": "30s,5m,1h", + "refresh": 2, + "regex": "", + "skipUrlSync": false, + "sort": 1, + "tagValuesQuery": "", + "tags": [ + + ], + "tagsQuery": "", + "type": "interval", + "useTags": false + }, + { + "allValue": null, + "auto": false, + "auto_count": 30, + "auto_min": "10s", + "current": { + "text": "5m", + "value": "5m" + }, + "datasource": "$datasource", + "hide": 2, + "includeAll": false, + "label": null, + "multi": false, + "name": "interval", + "options": [ + { + "selected": true, + "text": "4h", + "value": "4h" + } + ], + "query": "4h", + "refresh": 2, + "regex": "", + "skipUrlSync": false, + "sort": 1, + "tagValuesQuery": "", + "tags": [ + + ], + "tagsQuery": "", + "type": "interval", + "useTags": false + } + ] + }, + "time": { + "from": "now-1h", + "to": "now" + }, + "timepicker": { + "refresh_intervals": [ + "5s", + "10s", + "30s", + "1m", + "5m", + "15m", + "30m", + "1h", + "2h", + "1d" + ], + "time_options": [ + "5m", + "15m", + "1h", + "6h", + "12h", + "24h", + "2d", + "7d", + "30d" + ] + }, + "timezone": "{{ .Values.grafana.defaultDashboardsTimezone }}", + "title": "Kubernetes / Networking / Namespace (Workload)", + "uid": "bbb2a765a623ae38130206c7d94a160f", + "version": 0 + } +{{- end }} \ No newline at end of file diff --git a/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/templates/grafana/dashboards-1.14/node-cluster-rsrc-use.yaml b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/templates/grafana/dashboards-1.14/node-cluster-rsrc-use.yaml new file mode 100644 index 0000000000..9869a3d3e0 --- /dev/null +++ b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/templates/grafana/dashboards-1.14/node-cluster-rsrc-use.yaml @@ -0,0 +1,1063 @@ +{{- /* +Generated from 'node-cluster-rsrc-use' from https://raw.githubusercontent.com/prometheus-operator/kube-prometheus/a8ba97a150c75be42010c75d10b720c55e182f1a/manifests/grafana-dashboardDefinitions.yaml +Do not change in-place! In order to change this file first read following link: +https://github.com/prometheus-community/helm-charts/tree/main/charts/kube-prometheus-stack/hack +*/ -}} +{{- $kubeTargetVersion := default .Capabilities.KubeVersion.GitVersion .Values.kubeTargetVersionOverride }} +{{- if and (or .Values.grafana.enabled .Values.grafana.forceDeployDashboards) (semverCompare ">=1.14.0-0" $kubeTargetVersion) (semverCompare "<9.9.9-9" $kubeTargetVersion) .Values.grafana.defaultDashboardsEnabled (or .Values.nodeExporter.enabled .Values.nodeExporter.forceDeployDashboards) }} +apiVersion: v1 +kind: ConfigMap +metadata: + namespace: {{ .Values.grafana.defaultDashboards.namespace }} + name: {{ printf "%s-%s" (include "kube-prometheus-stack.fullname" $) "node-cluster-rsrc-use" | trunc 63 | trimSuffix "-" }} + annotations: +{{ toYaml .Values.grafana.sidecar.dashboards.annotations | indent 4 }} + labels: + {{- if $.Values.grafana.sidecar.dashboards.label }} + {{ $.Values.grafana.sidecar.dashboards.label }}: {{ ternary $.Values.grafana.sidecar.dashboards.labelValue "1" (not (empty $.Values.grafana.sidecar.dashboards.labelValue)) | quote }} + {{- end }} + app: {{ template "kube-prometheus-stack.name" $ }}-grafana +{{ include "kube-prometheus-stack.labels" $ | indent 4 }} +data: + node-cluster-rsrc-use.json: |- + { + "__inputs": [ + + ], + "__requires": [ + + ], + "annotations": { + "list": [ + + ] + }, + "editable": false, + "gnetId": null, + "graphTooltip": 1, + "hideControls": false, + "id": null, + "links": [ + + ], + "refresh": "30s", + "rows": [ + { + "collapse": false, + "collapsed": false, + "panels": [ + { + "aliasColors": { + + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 10, + "fillGradient": 0, + "gridPos": { + + }, + "id": 2, + "legend": { + "alignAsTable": false, + "avg": false, + "current": false, + "max": false, + "min": false, + "rightSide": false, + "show": false, + "sideWidth": null, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ + + ], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "repeat": null, + "seriesOverrides": [ + + ], + "spaceLength": 10, + "span": 6, + "stack": true, + "steppedLine": false, + "targets": [ + { + "expr": "((\n instance:node_cpu_utilisation:rate5m{job=\"node-exporter\", cluster=\"$cluster\"}\n *\n instance:node_num_cpu:sum{job=\"node-exporter\", cluster=\"$cluster\"}\n) != 0 )\n/ scalar(sum(instance:node_num_cpu:sum{job=\"node-exporter\", cluster=\"$cluster\"}))\n", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{`{{`}} instance {{`}}`}}", + "refId": "A" + } + ], + "thresholds": [ + + ], + "timeFrom": null, + "timeShift": null, + "title": "CPU Utilisation", + "tooltip": { + "shared": true, + "sort": 2, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ + + ] + }, + "yaxes": [ + { + "format": "percentunit", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "percentunit", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + }, + { + "aliasColors": { + + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 10, + "fillGradient": 0, + "gridPos": { + + }, + "id": 3, + "legend": { + "alignAsTable": false, + "avg": false, + "current": false, + "max": false, + "min": false, + "rightSide": false, + "show": false, + "sideWidth": null, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ + + ], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "repeat": null, + "seriesOverrides": [ + + ], + "spaceLength": 10, + "span": 6, + "stack": true, + "steppedLine": false, + "targets": [ + { + "expr": "(\n instance:node_load1_per_cpu:ratio{job=\"node-exporter\", cluster=\"$cluster\"}\n / scalar(count(instance:node_load1_per_cpu:ratio{job=\"node-exporter\", cluster=\"$cluster\"}))\n) != 0\n", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{`{{`}}instance{{`}}`}}", + "refId": "A" + } + ], + "thresholds": [ + + ], + "timeFrom": null, + "timeShift": null, + "title": "CPU Saturation (Load1 per CPU)", + "tooltip": { + "shared": true, + "sort": 2, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ + + ] + }, + "yaxes": [ + { + "format": "percentunit", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "percentunit", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "CPU", + "titleSize": "h6", + "type": "row" + }, + { + "collapse": false, + "collapsed": false, + "panels": [ + { + "aliasColors": { + + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 10, + "fillGradient": 0, + "gridPos": { + + }, + "id": 4, + "legend": { + "alignAsTable": false, + "avg": false, + "current": false, + "max": false, + "min": false, + "rightSide": false, + "show": false, + "sideWidth": null, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ + + ], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "repeat": null, + "seriesOverrides": [ + + ], + "spaceLength": 10, + "span": 6, + "stack": true, + "steppedLine": false, + "targets": [ + { + "expr": "(\n instance:node_memory_utilisation:ratio{job=\"node-exporter\", cluster=\"$cluster\"}\n / scalar(count(instance:node_memory_utilisation:ratio{job=\"node-exporter\", cluster=\"$cluster\"}))\n) != 0\n", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{`{{`}}instance{{`}}`}}", + "refId": "A" + } + ], + "thresholds": [ + + ], + "timeFrom": null, + "timeShift": null, + "title": "Memory Utilisation", + "tooltip": { + "shared": true, + "sort": 2, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ + + ] + }, + "yaxes": [ + { + "format": "percentunit", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "percentunit", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + }, + { + "aliasColors": { + + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 10, + "fillGradient": 0, + "gridPos": { + + }, + "id": 5, + "legend": { + "alignAsTable": false, + "avg": false, + "current": false, + "max": false, + "min": false, + "rightSide": false, + "show": false, + "sideWidth": null, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ + + ], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "repeat": null, + "seriesOverrides": [ + + ], + "spaceLength": 10, + "span": 6, + "stack": true, + "steppedLine": false, + "targets": [ + { + "expr": "instance:node_vmstat_pgmajfault:rate5m{job=\"node-exporter\", cluster=\"$cluster\"}", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{`{{`}}instance{{`}}`}}", + "refId": "A" + } + ], + "thresholds": [ + + ], + "timeFrom": null, + "timeShift": null, + "title": "Memory Saturation (Major Page Faults)", + "tooltip": { + "shared": true, + "sort": 2, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ + + ] + }, + "yaxes": [ + { + "format": "rds", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "rds", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Memory", + "titleSize": "h6", + "type": "row" + }, + { + "collapse": false, + "collapsed": false, + "panels": [ + { + "aliasColors": { + + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 10, + "fillGradient": 0, + "gridPos": { + + }, + "id": 6, + "legend": { + "alignAsTable": false, + "avg": false, + "current": false, + "max": false, + "min": false, + "rightSide": false, + "show": false, + "sideWidth": null, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ + + ], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "repeat": null, + "seriesOverrides": [ + { + "alias": "/Receive/", + "stack": "A" + }, + { + "alias": "/Transmit/", + "stack": "B", + "transform": "negative-Y" + } + ], + "spaceLength": 10, + "span": 6, + "stack": true, + "steppedLine": false, + "targets": [ + { + "expr": "instance:node_network_receive_bytes_excluding_lo:rate5m{job=\"node-exporter\", cluster=\"$cluster\"} != 0", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{`{{`}}instance{{`}}`}} Receive", + "refId": "A" + }, + { + "expr": "instance:node_network_transmit_bytes_excluding_lo:rate5m{job=\"node-exporter\", cluster=\"$cluster\"} != 0", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{`{{`}}instance{{`}}`}} Transmit", + "refId": "B" + } + ], + "thresholds": [ + + ], + "timeFrom": null, + "timeShift": null, + "title": "Network Utilisation (Bytes Receive/Transmit)", + "tooltip": { + "shared": true, + "sort": 2, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ + + ] + }, + "yaxes": [ + { + "format": "Bps", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "Bps", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + }, + { + "aliasColors": { + + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 10, + "fillGradient": 0, + "gridPos": { + + }, + "id": 7, + "legend": { + "alignAsTable": false, + "avg": false, + "current": false, + "max": false, + "min": false, + "rightSide": false, + "show": false, + "sideWidth": null, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ + + ], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "repeat": null, + "seriesOverrides": [ + { + "alias": "/ Receive/", + "stack": "A" + }, + { + "alias": "/ Transmit/", + "stack": "B", + "transform": "negative-Y" + } + ], + "spaceLength": 10, + "span": 6, + "stack": true, + "steppedLine": false, + "targets": [ + { + "expr": "instance:node_network_receive_drop_excluding_lo:rate5m{job=\"node-exporter\", cluster=\"$cluster\"} != 0", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{`{{`}}instance{{`}}`}} Receive", + "refId": "A" + }, + { + "expr": "instance:node_network_transmit_drop_excluding_lo:rate5m{job=\"node-exporter\", cluster=\"$cluster\"} != 0", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{`{{`}}instance{{`}}`}} Transmit", + "refId": "B" + } + ], + "thresholds": [ + + ], + "timeFrom": null, + "timeShift": null, + "title": "Network Saturation (Drops Receive/Transmit)", + "tooltip": { + "shared": true, + "sort": 2, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ + + ] + }, + "yaxes": [ + { + "format": "Bps", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "Bps", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Network", + "titleSize": "h6", + "type": "row" + }, + { + "collapse": false, + "collapsed": false, + "panels": [ + { + "aliasColors": { + + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 10, + "fillGradient": 0, + "gridPos": { + + }, + "id": 8, + "legend": { + "alignAsTable": false, + "avg": false, + "current": false, + "max": false, + "min": false, + "rightSide": false, + "show": false, + "sideWidth": null, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ + + ], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "repeat": null, + "seriesOverrides": [ + + ], + "spaceLength": 10, + "span": 6, + "stack": true, + "steppedLine": false, + "targets": [ + { + "expr": "(\n instance_device:node_disk_io_time_seconds:rate5m{job=\"node-exporter\", cluster=\"$cluster\"}\n / scalar(count(instance_device:node_disk_io_time_seconds:rate5m{job=\"node-exporter\", cluster=\"$cluster\"}))\n) != 0\n", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{`{{`}}instance{{`}}`}} {{`{{`}}device{{`}}`}}", + "refId": "A" + } + ], + "thresholds": [ + + ], + "timeFrom": null, + "timeShift": null, + "title": "Disk IO Utilisation", + "tooltip": { + "shared": true, + "sort": 2, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ + + ] + }, + "yaxes": [ + { + "format": "percentunit", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "percentunit", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + }, + { + "aliasColors": { + + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 10, + "fillGradient": 0, + "gridPos": { + + }, + "id": 9, + "legend": { + "alignAsTable": false, + "avg": false, + "current": false, + "max": false, + "min": false, + "rightSide": false, + "show": false, + "sideWidth": null, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ + + ], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "repeat": null, + "seriesOverrides": [ + + ], + "spaceLength": 10, + "span": 6, + "stack": true, + "steppedLine": false, + "targets": [ + { + "expr": "(\n instance_device:node_disk_io_time_weighted_seconds:rate5m{job=\"node-exporter\", cluster=\"$cluster\"}\n / scalar(count(instance_device:node_disk_io_time_weighted_seconds:rate5m{job=\"node-exporter\", cluster=\"$cluster\"}))\n) != 0\n", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{`{{`}}instance{{`}}`}} {{`{{`}}device{{`}}`}}", + "refId": "A" + } + ], + "thresholds": [ + + ], + "timeFrom": null, + "timeShift": null, + "title": "Disk IO Saturation", + "tooltip": { + "shared": true, + "sort": 2, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ + + ] + }, + "yaxes": [ + { + "format": "percentunit", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "percentunit", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Disk IO", + "titleSize": "h6", + "type": "row" + }, + { + "collapse": false, + "collapsed": false, + "panels": [ + { + "aliasColors": { + + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 10, + "fillGradient": 0, + "gridPos": { + + }, + "id": 10, + "legend": { + "alignAsTable": false, + "avg": false, + "current": false, + "max": false, + "min": false, + "rightSide": false, + "show": false, + "sideWidth": null, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ + + ], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "repeat": null, + "seriesOverrides": [ + + ], + "spaceLength": 10, + "span": 12, + "stack": true, + "steppedLine": false, + "targets": [ + { + "expr": "sum without (device) (\n max without (fstype, mountpoint) ((\n node_filesystem_size_bytes{job=\"node-exporter\", fstype!=\"\", mountpoint!=\"\", cluster=\"$cluster\"}\n -\n node_filesystem_avail_bytes{job=\"node-exporter\", fstype!=\"\", mountpoint!=\"\", cluster=\"$cluster\"}\n ) != 0)\n)\n/ scalar(sum(max without (fstype, mountpoint) (node_filesystem_size_bytes{job=\"node-exporter\", fstype!=\"\", mountpoint!=\"\", cluster=\"$cluster\"})))\n", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{`{{`}}instance{{`}}`}}", + "refId": "A" + } + ], + "thresholds": [ + + ], + "timeFrom": null, + "timeShift": null, + "title": "Disk Space Utilisation", + "tooltip": { + "shared": true, + "sort": 2, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ + + ] + }, + "yaxes": [ + { + "format": "percentunit", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "percentunit", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Disk Space", + "titleSize": "h6", + "type": "row" + } + ], + "schemaVersion": 14, + "style": "dark", + "tags": [ + "node-exporter-mixin" + ], + "templating": { + "list": [ + { + "current": { + "text": "Prometheus", + "value": "Prometheus" + }, + "hide": 0, + "label": "Data Source", + "name": "datasource", + "options": [ + + ], + "query": "prometheus", + "refresh": 1, + "regex": "", + "type": "datasource" + }, + { + "allValue": null, + "current": { + "text": "", + "value": "" + }, + "datasource": "$datasource", + "hide": {{ if .Values.grafana.sidecar.dashboards.multicluster.global.enabled }}0{{ else }}2{{ end }}, + "includeAll": false, + "label": null, + "multi": false, + "name": "cluster", + "options": [ + + ], + "query": "label_values(node_time_seconds, cluster)", + "refresh": 2, + "regex": "", + "sort": 1, + "tagValuesQuery": "", + "tags": [ + + ], + "tagsQuery": "", + "type": "query", + "useTags": false + } + ] + }, + "time": { + "from": "now-1h", + "to": "now" + }, + "timepicker": { + "refresh_intervals": [ + "5s", + "10s", + "30s", + "1m", + "5m", + "15m", + "30m", + "1h", + "2h", + "1d" + ], + "time_options": [ + "5m", + "15m", + "1h", + "6h", + "12h", + "24h", + "2d", + "7d", + "30d" + ] + }, + "timezone": "{{ .Values.grafana.defaultDashboardsTimezone }}", + "title": "Node Exporter / USE Method / Cluster", + "version": 0 + } +{{- end }} \ No newline at end of file diff --git a/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/templates/grafana/dashboards-1.14/node-rsrc-use.yaml b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/templates/grafana/dashboards-1.14/node-rsrc-use.yaml new file mode 100644 index 0000000000..75e69afa22 --- /dev/null +++ b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/templates/grafana/dashboards-1.14/node-rsrc-use.yaml @@ -0,0 +1,1089 @@ +{{- /* +Generated from 'node-rsrc-use' from https://raw.githubusercontent.com/prometheus-operator/kube-prometheus/a8ba97a150c75be42010c75d10b720c55e182f1a/manifests/grafana-dashboardDefinitions.yaml +Do not change in-place! In order to change this file first read following link: +https://github.com/prometheus-community/helm-charts/tree/main/charts/kube-prometheus-stack/hack +*/ -}} +{{- $kubeTargetVersion := default .Capabilities.KubeVersion.GitVersion .Values.kubeTargetVersionOverride }} +{{- if and (or .Values.grafana.enabled .Values.grafana.forceDeployDashboards) (semverCompare ">=1.14.0-0" $kubeTargetVersion) (semverCompare "<9.9.9-9" $kubeTargetVersion) .Values.grafana.defaultDashboardsEnabled (or .Values.nodeExporter.enabled .Values.nodeExporter.forceDeployDashboards) }} +apiVersion: v1 +kind: ConfigMap +metadata: + namespace: {{ .Values.grafana.defaultDashboards.namespace }} + name: {{ printf "%s-%s" (include "kube-prometheus-stack.fullname" $) "node-rsrc-use" | trunc 63 | trimSuffix "-" }} + annotations: +{{ toYaml .Values.grafana.sidecar.dashboards.annotations | indent 4 }} + labels: + {{- if $.Values.grafana.sidecar.dashboards.label }} + {{ $.Values.grafana.sidecar.dashboards.label }}: {{ ternary $.Values.grafana.sidecar.dashboards.labelValue "1" (not (empty $.Values.grafana.sidecar.dashboards.labelValue)) | quote }} + {{- end }} + app: {{ template "kube-prometheus-stack.name" $ }}-grafana +{{ include "kube-prometheus-stack.labels" $ | indent 4 }} +data: + node-rsrc-use.json: |- + { + "__inputs": [ + + ], + "__requires": [ + + ], + "annotations": { + "list": [ + + ] + }, + "editable": false, + "gnetId": null, + "graphTooltip": 1, + "hideControls": false, + "id": null, + "links": [ + + ], + "refresh": "30s", + "rows": [ + { + "collapse": false, + "collapsed": false, + "panels": [ + { + "aliasColors": { + + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 10, + "fillGradient": 0, + "gridPos": { + + }, + "id": 2, + "legend": { + "alignAsTable": false, + "avg": false, + "current": false, + "max": false, + "min": false, + "rightSide": false, + "show": false, + "sideWidth": null, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ + + ], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "repeat": null, + "seriesOverrides": [ + + ], + "spaceLength": 10, + "span": 6, + "stack": true, + "steppedLine": false, + "targets": [ + { + "expr": "instance:node_cpu_utilisation:rate5m{job=\"node-exporter\", instance=\"$instance\", cluster=\"$cluster\"} != 0", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "Utilisation", + "refId": "A" + } + ], + "thresholds": [ + + ], + "timeFrom": null, + "timeShift": null, + "title": "CPU Utilisation", + "tooltip": { + "shared": true, + "sort": 2, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ + + ] + }, + "yaxes": [ + { + "format": "percentunit", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "percentunit", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + }, + { + "aliasColors": { + + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 10, + "fillGradient": 0, + "gridPos": { + + }, + "id": 3, + "legend": { + "alignAsTable": false, + "avg": false, + "current": false, + "max": false, + "min": false, + "rightSide": false, + "show": false, + "sideWidth": null, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ + + ], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "repeat": null, + "seriesOverrides": [ + + ], + "spaceLength": 10, + "span": 6, + "stack": true, + "steppedLine": false, + "targets": [ + { + "expr": "instance:node_load1_per_cpu:ratio{job=\"node-exporter\", instance=\"$instance\", cluster=\"$cluster\"} != 0", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "Saturation", + "refId": "A" + } + ], + "thresholds": [ + + ], + "timeFrom": null, + "timeShift": null, + "title": "CPU Saturation (Load1 per CPU)", + "tooltip": { + "shared": true, + "sort": 2, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ + + ] + }, + "yaxes": [ + { + "format": "percentunit", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "percentunit", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "CPU", + "titleSize": "h6", + "type": "row" + }, + { + "collapse": false, + "collapsed": false, + "panels": [ + { + "aliasColors": { + + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 10, + "fillGradient": 0, + "gridPos": { + + }, + "id": 4, + "legend": { + "alignAsTable": false, + "avg": false, + "current": false, + "max": false, + "min": false, + "rightSide": false, + "show": false, + "sideWidth": null, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ + + ], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "repeat": null, + "seriesOverrides": [ + + ], + "spaceLength": 10, + "span": 6, + "stack": true, + "steppedLine": false, + "targets": [ + { + "expr": "instance:node_memory_utilisation:ratio{job=\"node-exporter\", instance=\"$instance\", cluster=\"$cluster\"} != 0", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "Utilisation", + "refId": "A" + } + ], + "thresholds": [ + + ], + "timeFrom": null, + "timeShift": null, + "title": "Memory Utilisation", + "tooltip": { + "shared": true, + "sort": 2, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ + + ] + }, + "yaxes": [ + { + "format": "percentunit", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "percentunit", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + }, + { + "aliasColors": { + + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 10, + "fillGradient": 0, + "gridPos": { + + }, + "id": 5, + "legend": { + "alignAsTable": false, + "avg": false, + "current": false, + "max": false, + "min": false, + "rightSide": false, + "show": false, + "sideWidth": null, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ + + ], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "repeat": null, + "seriesOverrides": [ + + ], + "spaceLength": 10, + "span": 6, + "stack": true, + "steppedLine": false, + "targets": [ + { + "expr": "instance:node_vmstat_pgmajfault:rate5m{job=\"node-exporter\", instance=\"$instance\", cluster=\"$cluster\"} != 0", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "Major page Faults", + "refId": "A" + } + ], + "thresholds": [ + + ], + "timeFrom": null, + "timeShift": null, + "title": "Memory Saturation (Major Page Faults)", + "tooltip": { + "shared": true, + "sort": 2, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ + + ] + }, + "yaxes": [ + { + "format": "rds", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "rds", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Memory", + "titleSize": "h6", + "type": "row" + }, + { + "collapse": false, + "collapsed": false, + "panels": [ + { + "aliasColors": { + + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 10, + "fillGradient": 0, + "gridPos": { + + }, + "id": 6, + "legend": { + "alignAsTable": false, + "avg": false, + "current": false, + "max": false, + "min": false, + "rightSide": false, + "show": false, + "sideWidth": null, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ + + ], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "repeat": null, + "seriesOverrides": [ + { + "alias": "/Receive/", + "stack": "A" + }, + { + "alias": "/Transmit/", + "stack": "B", + "transform": "negative-Y" + } + ], + "spaceLength": 10, + "span": 6, + "stack": true, + "steppedLine": false, + "targets": [ + { + "expr": "instance:node_network_receive_bytes_excluding_lo:rate5m{job=\"node-exporter\", instance=\"$instance\", cluster=\"$cluster\"} != 0", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "Receive", + "refId": "A" + }, + { + "expr": "instance:node_network_transmit_bytes_excluding_lo:rate5m{job=\"node-exporter\", instance=\"$instance\", cluster=\"$cluster\"} != 0", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "Transmit", + "refId": "B" + } + ], + "thresholds": [ + + ], + "timeFrom": null, + "timeShift": null, + "title": "Network Utilisation (Bytes Receive/Transmit)", + "tooltip": { + "shared": true, + "sort": 2, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ + + ] + }, + "yaxes": [ + { + "format": "Bps", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "Bps", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + }, + { + "aliasColors": { + + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 10, + "fillGradient": 0, + "gridPos": { + + }, + "id": 7, + "legend": { + "alignAsTable": false, + "avg": false, + "current": false, + "max": false, + "min": false, + "rightSide": false, + "show": false, + "sideWidth": null, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ + + ], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "repeat": null, + "seriesOverrides": [ + { + "alias": "/ Receive/", + "stack": "A" + }, + { + "alias": "/ Transmit/", + "stack": "B", + "transform": "negative-Y" + } + ], + "spaceLength": 10, + "span": 6, + "stack": true, + "steppedLine": false, + "targets": [ + { + "expr": "instance:node_network_receive_drop_excluding_lo:rate5m{job=\"node-exporter\", instance=\"$instance\", cluster=\"$cluster\"} != 0", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "Receive", + "refId": "A" + }, + { + "expr": "instance:node_network_transmit_drop_excluding_lo:rate5m{job=\"node-exporter\", instance=\"$instance\", cluster=\"$cluster\"} != 0", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "Transmit", + "refId": "B" + } + ], + "thresholds": [ + + ], + "timeFrom": null, + "timeShift": null, + "title": "Network Saturation (Drops Receive/Transmit)", + "tooltip": { + "shared": true, + "sort": 2, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ + + ] + }, + "yaxes": [ + { + "format": "Bps", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "Bps", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Network", + "titleSize": "h6", + "type": "row" + }, + { + "collapse": false, + "collapsed": false, + "panels": [ + { + "aliasColors": { + + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 10, + "fillGradient": 0, + "gridPos": { + + }, + "id": 8, + "legend": { + "alignAsTable": false, + "avg": false, + "current": false, + "max": false, + "min": false, + "rightSide": false, + "show": false, + "sideWidth": null, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ + + ], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "repeat": null, + "seriesOverrides": [ + + ], + "spaceLength": 10, + "span": 6, + "stack": true, + "steppedLine": false, + "targets": [ + { + "expr": "instance_device:node_disk_io_time_seconds:rate5m{job=\"node-exporter\", instance=\"$instance\", cluster=\"$cluster\"} != 0", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{`{{`}}device{{`}}`}}", + "refId": "A" + } + ], + "thresholds": [ + + ], + "timeFrom": null, + "timeShift": null, + "title": "Disk IO Utilisation", + "tooltip": { + "shared": true, + "sort": 2, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ + + ] + }, + "yaxes": [ + { + "format": "percentunit", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "percentunit", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + }, + { + "aliasColors": { + + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 10, + "fillGradient": 0, + "gridPos": { + + }, + "id": 9, + "legend": { + "alignAsTable": false, + "avg": false, + "current": false, + "max": false, + "min": false, + "rightSide": false, + "show": false, + "sideWidth": null, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ + + ], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "repeat": null, + "seriesOverrides": [ + + ], + "spaceLength": 10, + "span": 6, + "stack": true, + "steppedLine": false, + "targets": [ + { + "expr": "instance_device:node_disk_io_time_weighted_seconds:rate5m{job=\"node-exporter\", instance=\"$instance\", cluster=\"$cluster\"} != 0", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{`{{`}}device{{`}}`}}", + "refId": "A" + } + ], + "thresholds": [ + + ], + "timeFrom": null, + "timeShift": null, + "title": "Disk IO Saturation", + "tooltip": { + "shared": true, + "sort": 2, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ + + ] + }, + "yaxes": [ + { + "format": "percentunit", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "percentunit", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Disk IO", + "titleSize": "h6", + "type": "row" + }, + { + "collapse": false, + "collapsed": false, + "panels": [ + { + "aliasColors": { + + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 10, + "fillGradient": 0, + "gridPos": { + + }, + "id": 10, + "legend": { + "alignAsTable": false, + "avg": false, + "current": false, + "max": false, + "min": false, + "rightSide": false, + "show": false, + "sideWidth": null, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ + + ], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "repeat": null, + "seriesOverrides": [ + + ], + "spaceLength": 10, + "span": 12, + "stack": true, + "steppedLine": false, + "targets": [ + { + "expr": "sort_desc(1 -\n (\n max without (mountpoint, fstype) (node_filesystem_avail_bytes{job=\"node-exporter\", fstype!=\"\", instance=\"$instance\", cluster=\"$cluster\"})\n /\n max without (mountpoint, fstype) (node_filesystem_size_bytes{job=\"node-exporter\", fstype!=\"\", instance=\"$instance\", cluster=\"$cluster\"})\n ) != 0\n)\n", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{`{{`}}device{{`}}`}}", + "refId": "A" + } + ], + "thresholds": [ + + ], + "timeFrom": null, + "timeShift": null, + "title": "Disk Space Utilisation", + "tooltip": { + "shared": true, + "sort": 2, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ + + ] + }, + "yaxes": [ + { + "format": "percentunit", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "percentunit", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Disk Space", + "titleSize": "h6", + "type": "row" + } + ], + "schemaVersion": 14, + "style": "dark", + "tags": [ + "node-exporter-mixin" + ], + "templating": { + "list": [ + { + "current": { + "text": "Prometheus", + "value": "Prometheus" + }, + "hide": 0, + "label": "Data Source", + "name": "datasource", + "options": [ + + ], + "query": "prometheus", + "refresh": 1, + "regex": "", + "type": "datasource" + }, + { + "allValue": null, + "current": { + "text": "", + "value": "" + }, + "datasource": "$datasource", + "hide": {{ if .Values.grafana.sidecar.dashboards.multicluster.global.enabled }}0{{ else }}2{{ end }}, + "includeAll": false, + "label": null, + "multi": false, + "name": "cluster", + "options": [ + + ], + "query": "label_values(node_time_seconds, cluster)", + "refresh": 2, + "regex": "", + "sort": 1, + "tagValuesQuery": "", + "tags": [ + + ], + "tagsQuery": "", + "type": "query", + "useTags": false + }, + { + "allValue": null, + "current": { + + }, + "datasource": "$datasource", + "hide": 0, + "includeAll": false, + "label": null, + "multi": false, + "name": "instance", + "options": [ + + ], + "query": "label_values(node_exporter_build_info{job=\"node-exporter\", cluster=\"$cluster\"}, instance)", + "refresh": 2, + "regex": "", + "sort": 1, + "tagValuesQuery": "", + "tags": [ + + ], + "tagsQuery": "", + "type": "query", + "useTags": false + } + ] + }, + "time": { + "from": "now-1h", + "to": "now" + }, + "timepicker": { + "refresh_intervals": [ + "5s", + "10s", + "30s", + "1m", + "5m", + "15m", + "30m", + "1h", + "2h", + "1d" + ], + "time_options": [ + "5m", + "15m", + "1h", + "6h", + "12h", + "24h", + "2d", + "7d", + "30d" + ] + }, + "timezone": "{{ .Values.grafana.defaultDashboardsTimezone }}", + "title": "Node Exporter / USE Method / Node", + "version": 0 + } +{{- end }} \ No newline at end of file diff --git a/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/templates/grafana/dashboards-1.14/nodes-darwin.yaml b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/templates/grafana/dashboards-1.14/nodes-darwin.yaml new file mode 100644 index 0000000000..fe11875324 --- /dev/null +++ b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/templates/grafana/dashboards-1.14/nodes-darwin.yaml @@ -0,0 +1,1073 @@ +{{- /* +Generated from 'nodes-darwin' from https://raw.githubusercontent.com/prometheus-operator/kube-prometheus/a8ba97a150c75be42010c75d10b720c55e182f1a/manifests/grafana-dashboardDefinitions.yaml +Do not change in-place! In order to change this file first read following link: +https://github.com/prometheus-community/helm-charts/tree/main/charts/kube-prometheus-stack/hack +*/ -}} +{{- $kubeTargetVersion := default .Capabilities.KubeVersion.GitVersion .Values.kubeTargetVersionOverride }} +{{- if and (or .Values.grafana.enabled .Values.grafana.forceDeployDashboards) (semverCompare ">=1.14.0-0" $kubeTargetVersion) (semverCompare "<9.9.9-9" $kubeTargetVersion) .Values.grafana.defaultDashboardsEnabled (and (or .Values.nodeExporter.enabled .Values.nodeExporter.forceDeployDashboards) .Values.nodeExporter.operatingSystems.darwin.enabled) }} +apiVersion: v1 +kind: ConfigMap +metadata: + namespace: {{ .Values.grafana.defaultDashboards.namespace }} + name: {{ printf "%s-%s" (include "kube-prometheus-stack.fullname" $) "nodes-darwin" | trunc 63 | trimSuffix "-" }} + annotations: +{{ toYaml .Values.grafana.sidecar.dashboards.annotations | indent 4 }} + labels: + {{- if $.Values.grafana.sidecar.dashboards.label }} + {{ $.Values.grafana.sidecar.dashboards.label }}: {{ ternary $.Values.grafana.sidecar.dashboards.labelValue "1" (not (empty $.Values.grafana.sidecar.dashboards.labelValue)) | quote }} + {{- end }} + app: {{ template "kube-prometheus-stack.name" $ }}-grafana +{{ include "kube-prometheus-stack.labels" $ | indent 4 }} +data: + nodes-darwin.json: |- + { + "__inputs": [ + + ], + "__requires": [ + + ], + "annotations": { + "list": [ + + ] + }, + "editable": false, + "gnetId": null, + "graphTooltip": 1, + "hideControls": false, + "id": null, + "links": [ + + ], + "refresh": "30s", + "rows": [ + { + "collapse": false, + "collapsed": false, + "panels": [ + { + "aliasColors": { + + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "fillGradient": 0, + "gridPos": { + + }, + "id": 2, + "legend": { + "alignAsTable": false, + "avg": false, + "current": false, + "max": false, + "min": false, + "rightSide": false, + "show": true, + "sideWidth": null, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ + + ], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "repeat": null, + "seriesOverrides": [ + + ], + "spaceLength": 10, + "span": 6, + "stack": true, + "steppedLine": false, + "targets": [ + { + "expr": "(\n (1 - sum without (mode) (rate(node_cpu_seconds_total{job=\"node-exporter\", mode=~\"idle|iowait|steal\", instance=\"$instance\"}[$__rate_interval])))\n/ ignoring(cpu) group_left\n count without (cpu, mode) (node_cpu_seconds_total{job=\"node-exporter\", mode=\"idle\", instance=\"$instance\"})\n)\n", + "format": "time_series", + "intervalFactor": 5, + "legendFormat": "{{`{{`}}cpu{{`}}`}}", + "refId": "A" + } + ], + "thresholds": [ + + ], + "timeFrom": null, + "timeShift": null, + "title": "CPU Usage", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ + + ] + }, + "yaxes": [ + { + "format": "percentunit", + "label": null, + "logBase": 1, + "max": 1, + "min": 0, + "show": true + }, + { + "format": "percentunit", + "label": null, + "logBase": 1, + "max": 1, + "min": 0, + "show": true + } + ] + }, + { + "aliasColors": { + + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 0, + "fillGradient": 0, + "gridPos": { + + }, + "id": 3, + "legend": { + "alignAsTable": false, + "avg": false, + "current": false, + "max": false, + "min": false, + "rightSide": false, + "show": true, + "sideWidth": null, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ + + ], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "repeat": null, + "seriesOverrides": [ + + ], + "spaceLength": 10, + "span": 6, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "node_load1{job=\"node-exporter\", instance=\"$instance\"}", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "1m load average", + "refId": "A" + }, + { + "expr": "node_load5{job=\"node-exporter\", instance=\"$instance\"}", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "5m load average", + "refId": "B" + }, + { + "expr": "node_load15{job=\"node-exporter\", instance=\"$instance\"}", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "15m load average", + "refId": "C" + }, + { + "expr": "count(node_cpu_seconds_total{job=\"node-exporter\", instance=\"$instance\", mode=\"idle\"})", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "logical cores", + "refId": "D" + } + ], + "thresholds": [ + + ], + "timeFrom": null, + "timeShift": null, + "title": "Load Average", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ + + ] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "CPU", + "titleSize": "h6", + "type": "row" + }, + { + "collapse": false, + "collapsed": false, + "panels": [ + { + "aliasColors": { + + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "fillGradient": 0, + "gridPos": { + + }, + "id": 4, + "legend": { + "alignAsTable": false, + "avg": false, + "current": false, + "max": false, + "min": false, + "rightSide": false, + "show": true, + "sideWidth": null, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ + + ], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "repeat": null, + "seriesOverrides": [ + + ], + "spaceLength": 10, + "span": 9, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "node_memory_total_bytes{job=\"node-exporter\", instance=\"$instance\"}", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "Physical Memory", + "refId": "A" + }, + { + "expr": "(\n node_memory_internal_bytes{job=\"node-exporter\", instance=\"$instance\"} -\n node_memory_purgeable_bytes{job=\"node-exporter\", instance=\"$instance\"} +\n node_memory_wired_bytes{job=\"node-exporter\", instance=\"$instance\"} +\n node_memory_compressed_bytes{job=\"node-exporter\", instance=\"$instance\"}\n)\n", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "Memory Used", + "refId": "B" + }, + { + "expr": "(\n node_memory_internal_bytes{job=\"node-exporter\", instance=\"$instance\"} -\n node_memory_purgeable_bytes{job=\"node-exporter\", instance=\"$instance\"}\n)\n", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "App Memory", + "refId": "C" + }, + { + "expr": "node_memory_wired_bytes{job=\"node-exporter\", instance=\"$instance\"}", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "Wired Memory", + "refId": "D" + }, + { + "expr": "node_memory_compressed_bytes{job=\"node-exporter\", instance=\"$instance\"}", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "Compressed", + "refId": "E" + } + ], + "thresholds": [ + + ], + "timeFrom": null, + "timeShift": null, + "title": "Memory Usage", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ + + ] + }, + "yaxes": [ + { + "format": "bytes", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "bytes", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + } + ] + }, + { + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "max": 100, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "rgba(50, 172, 45, 0.97)" + }, + { + "color": "rgba(237, 129, 40, 0.89)", + "value": 80 + }, + { + "color": "rgba(245, 54, 54, 0.9)", + "value": 90 + } + ] + }, + "unit": "percent" + } + }, + "gridPos": { + + }, + "id": 5, + "span": 3, + "targets": [ + { + "expr": "(\n (\n avg(node_memory_internal_bytes{job=\"node-exporter\", instance=\"$instance\"}) -\n avg(node_memory_purgeable_bytes{job=\"node-exporter\", instance=\"$instance\"}) +\n avg(node_memory_wired_bytes{job=\"node-exporter\", instance=\"$instance\"}) +\n avg(node_memory_compressed_bytes{job=\"node-exporter\", instance=\"$instance\"})\n ) /\n avg(node_memory_total_bytes{job=\"node-exporter\", instance=\"$instance\"})\n)\n*\n100\n", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "" + } + ], + "title": "Memory Usage", + "transparent": false, + "type": "gauge" + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Memory", + "titleSize": "h6", + "type": "row" + }, + { + "collapse": false, + "collapsed": false, + "panels": [ + { + "aliasColors": { + + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 0, + "fillGradient": 0, + "gridPos": { + + }, + "id": 6, + "legend": { + "alignAsTable": false, + "avg": false, + "current": false, + "max": false, + "min": false, + "rightSide": false, + "show": true, + "sideWidth": null, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ + + ], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "repeat": null, + "seriesOverrides": [ + { + "alias": "/ read| written/", + "yaxis": 1 + }, + { + "alias": "/ io time/", + "yaxis": 2 + } + ], + "spaceLength": 10, + "span": 6, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "rate(node_disk_read_bytes_total{job=\"node-exporter\", instance=\"$instance\", device=~\"(/dev/)?(mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|md.+|dasd.+)\"}[$__rate_interval])", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "{{`{{`}}device{{`}}`}} read", + "refId": "A" + }, + { + "expr": "rate(node_disk_written_bytes_total{job=\"node-exporter\", instance=\"$instance\", device=~\"(/dev/)?(mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|md.+|dasd.+)\"}[$__rate_interval])", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "{{`{{`}}device{{`}}`}} written", + "refId": "B" + }, + { + "expr": "rate(node_disk_io_time_seconds_total{job=\"node-exporter\", instance=\"$instance\", device=~\"(/dev/)?(mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|md.+|dasd.+)\"}[$__rate_interval])", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "{{`{{`}}device{{`}}`}} io time", + "refId": "C" + } + ], + "thresholds": [ + + ], + "timeFrom": null, + "timeShift": null, + "title": "Disk I/O", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ + + ] + }, + "yaxes": [ + { + "format": "Bps", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "percentunit", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + }, + { + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": { + + }, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + }, + { + "color": "yellow", + "value": 0.8 + }, + { + "color": "red", + "value": 0.9 + } + ] + }, + "unit": "decbytes" + }, + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "Mounted on" + }, + "properties": [ + { + "id": "custom.width", + "value": 260 + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Size" + }, + "properties": [ + { + "id": "custom.width", + "value": 93 + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Used" + }, + "properties": [ + { + "id": "custom.width", + "value": 72 + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Available" + }, + "properties": [ + { + "id": "custom.width", + "value": 88 + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Used, %" + }, + "properties": [ + { + "id": "unit", + "value": "percentunit" + }, + { + "id": "custom.displayMode", + "value": "gradient-gauge" + }, + { + "id": "max", + "value": 1 + }, + { + "id": "min", + "value": 0 + } + ] + } + ] + }, + "gridPos": { + + }, + "id": 7, + "span": 6, + "targets": [ + { + "expr": "max by (mountpoint) (node_filesystem_size_bytes{job=\"node-exporter\", instance=\"$instance\", fstype!=\"\", mountpoint!=\"\"})\n", + "format": "table", + "instant": true, + "intervalFactor": 2, + "legendFormat": "" + }, + { + "expr": "max by (mountpoint) (node_filesystem_avail_bytes{job=\"node-exporter\", instance=\"$instance\", fstype!=\"\", mountpoint!=\"\"})\n", + "format": "table", + "instant": true, + "intervalFactor": 2, + "legendFormat": "" + } + ], + "title": "Disk Space Usage", + "transformations": [ + { + "id": "groupBy", + "options": { + "fields": { + "Value #A": { + "aggregations": [ + "lastNotNull" + ], + "operation": "aggregate" + }, + "Value #B": { + "aggregations": [ + "lastNotNull" + ], + "operation": "aggregate" + }, + "mountpoint": { + "aggregations": [ + + ], + "operation": "groupby" + } + } + } + }, + { + "id": "merge", + "options": { + + } + }, + { + "id": "calculateField", + "options": { + "alias": "Used", + "binary": { + "left": "Value #A (lastNotNull)", + "operator": "-", + "reducer": "sum", + "right": "Value #B (lastNotNull)" + }, + "mode": "binary", + "reduce": { + "reducer": "sum" + } + } + }, + { + "id": "calculateField", + "options": { + "alias": "Used, %", + "binary": { + "left": "Used", + "operator": "/", + "reducer": "sum", + "right": "Value #A (lastNotNull)" + }, + "mode": "binary", + "reduce": { + "reducer": "sum" + } + } + }, + { + "id": "organize", + "options": { + "excludeByName": { + + }, + "indexByName": { + + }, + "renameByName": { + "Value #A (lastNotNull)": "Size", + "Value #B (lastNotNull)": "Available", + "mountpoint": "Mounted on" + } + } + }, + { + "id": "sortBy", + "options": { + "fields": { + + }, + "sort": [ + { + "field": "Mounted on" + } + ] + } + } + ], + "transparent": false, + "type": "table" + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Disk", + "titleSize": "h6", + "type": "row" + }, + { + "collapse": false, + "collapsed": false, + "panels": [ + { + "aliasColors": { + + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "description": "Network received (bits/s)", + "fill": 0, + "fillGradient": 0, + "gridPos": { + + }, + "id": 8, + "legend": { + "alignAsTable": false, + "avg": false, + "current": false, + "max": false, + "min": false, + "rightSide": false, + "show": true, + "sideWidth": null, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ + + ], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "repeat": null, + "seriesOverrides": [ + + ], + "spaceLength": 10, + "span": 6, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "rate(node_network_receive_bytes_total{job=\"node-exporter\", instance=\"$instance\", device!=\"lo\"}[$__rate_interval]) * 8", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "{{`{{`}}device{{`}}`}}", + "refId": "A" + } + ], + "thresholds": [ + + ], + "timeFrom": null, + "timeShift": null, + "title": "Network Received", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ + + ] + }, + "yaxes": [ + { + "format": "bps", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "bps", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + } + ] + }, + { + "aliasColors": { + + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "description": "Network transmitted (bits/s)", + "fill": 0, + "fillGradient": 0, + "gridPos": { + + }, + "id": 9, + "legend": { + "alignAsTable": false, + "avg": false, + "current": false, + "max": false, + "min": false, + "rightSide": false, + "show": true, + "sideWidth": null, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ + + ], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "repeat": null, + "seriesOverrides": [ + + ], + "spaceLength": 10, + "span": 6, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "rate(node_network_transmit_bytes_total{job=\"node-exporter\", instance=\"$instance\", device!=\"lo\"}[$__rate_interval]) * 8", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "{{`{{`}}device{{`}}`}}", + "refId": "A" + } + ], + "thresholds": [ + + ], + "timeFrom": null, + "timeShift": null, + "title": "Network Transmitted", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ + + ] + }, + "yaxes": [ + { + "format": "bps", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "bps", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Network", + "titleSize": "h6", + "type": "row" + } + ], + "schemaVersion": 14, + "style": "dark", + "tags": [ + "node-exporter-mixin" + ], + "templating": { + "list": [ + { + "current": { + "text": "Prometheus", + "value": "Prometheus" + }, + "hide": 0, + "label": "Data Source", + "name": "datasource", + "options": [ + + ], + "query": "prometheus", + "refresh": 1, + "regex": "", + "type": "datasource" + }, + { + "allValue": null, + "current": { + + }, + "datasource": "$datasource", + "hide": 0, + "includeAll": false, + "label": "Instance", + "multi": false, + "name": "instance", + "options": [ + + ], + "query": "label_values(node_uname_info{job=\"node-exporter\", sysname=\"Darwin\"}, instance)", + "refresh": 2, + "regex": "", + "sort": 0, + "tagValuesQuery": "", + "tags": [ + + ], + "tagsQuery": "", + "type": "query", + "useTags": false + } + ] + }, + "time": { + "from": "now-1h", + "to": "now" + }, + "timepicker": { + "refresh_intervals": [ + "5s", + "10s", + "30s", + "1m", + "5m", + "15m", + "30m", + "1h", + "2h", + "1d" + ], + "time_options": [ + "5m", + "15m", + "1h", + "6h", + "12h", + "24h", + "2d", + "7d", + "30d" + ] + }, + "timezone": "{{ .Values.grafana.defaultDashboardsTimezone }}", + "title": "Node Exporter / MacOS", + "version": 0 + } +{{- end }} \ No newline at end of file diff --git a/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/templates/grafana/dashboards-1.14/nodes.yaml b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/templates/grafana/dashboards-1.14/nodes.yaml new file mode 100644 index 0000000000..0da40a7b99 --- /dev/null +++ b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/templates/grafana/dashboards-1.14/nodes.yaml @@ -0,0 +1,1066 @@ +{{- /* +Generated from 'nodes' from https://raw.githubusercontent.com/prometheus-operator/kube-prometheus/a8ba97a150c75be42010c75d10b720c55e182f1a/manifests/grafana-dashboardDefinitions.yaml +Do not change in-place! In order to change this file first read following link: +https://github.com/prometheus-community/helm-charts/tree/main/charts/kube-prometheus-stack/hack +*/ -}} +{{- $kubeTargetVersion := default .Capabilities.KubeVersion.GitVersion .Values.kubeTargetVersionOverride }} +{{- if and (or .Values.grafana.enabled .Values.grafana.forceDeployDashboards) (semverCompare ">=1.14.0-0" $kubeTargetVersion) (semverCompare "<9.9.9-9" $kubeTargetVersion) .Values.grafana.defaultDashboardsEnabled (and (or .Values.nodeExporter.enabled .Values.nodeExporter.forceDeployDashboards) .Values.nodeExporter.operatingSystems.linux.enabled) }} +apiVersion: v1 +kind: ConfigMap +metadata: + namespace: {{ .Values.grafana.defaultDashboards.namespace }} + name: {{ printf "%s-%s" (include "kube-prometheus-stack.fullname" $) "nodes" | trunc 63 | trimSuffix "-" }} + annotations: +{{ toYaml .Values.grafana.sidecar.dashboards.annotations | indent 4 }} + labels: + {{- if $.Values.grafana.sidecar.dashboards.label }} + {{ $.Values.grafana.sidecar.dashboards.label }}: {{ ternary $.Values.grafana.sidecar.dashboards.labelValue "1" (not (empty $.Values.grafana.sidecar.dashboards.labelValue)) | quote }} + {{- end }} + app: {{ template "kube-prometheus-stack.name" $ }}-grafana +{{ include "kube-prometheus-stack.labels" $ | indent 4 }} +data: + nodes.json: |- + { + "__inputs": [ + + ], + "__requires": [ + + ], + "annotations": { + "list": [ + + ] + }, + "editable": false, + "gnetId": null, + "graphTooltip": 1, + "hideControls": false, + "id": null, + "links": [ + + ], + "refresh": "30s", + "rows": [ + { + "collapse": false, + "collapsed": false, + "panels": [ + { + "aliasColors": { + + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "fillGradient": 0, + "gridPos": { + + }, + "id": 2, + "legend": { + "alignAsTable": false, + "avg": false, + "current": false, + "max": false, + "min": false, + "rightSide": false, + "show": true, + "sideWidth": null, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ + + ], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "repeat": null, + "seriesOverrides": [ + + ], + "spaceLength": 10, + "span": 6, + "stack": true, + "steppedLine": false, + "targets": [ + { + "expr": "(\n (1 - sum without (mode) (rate(node_cpu_seconds_total{job=\"node-exporter\", mode=~\"idle|iowait|steal\", instance=\"$instance\"}[$__rate_interval])))\n/ ignoring(cpu) group_left\n count without (cpu, mode) (node_cpu_seconds_total{job=\"node-exporter\", mode=\"idle\", instance=\"$instance\"})\n)\n", + "format": "time_series", + "intervalFactor": 5, + "legendFormat": "{{`{{`}}cpu{{`}}`}}", + "refId": "A" + } + ], + "thresholds": [ + + ], + "timeFrom": null, + "timeShift": null, + "title": "CPU Usage", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ + + ] + }, + "yaxes": [ + { + "format": "percentunit", + "label": null, + "logBase": 1, + "max": 1, + "min": 0, + "show": true + }, + { + "format": "percentunit", + "label": null, + "logBase": 1, + "max": 1, + "min": 0, + "show": true + } + ] + }, + { + "aliasColors": { + + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 0, + "fillGradient": 0, + "gridPos": { + + }, + "id": 3, + "legend": { + "alignAsTable": false, + "avg": false, + "current": false, + "max": false, + "min": false, + "rightSide": false, + "show": true, + "sideWidth": null, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ + + ], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "repeat": null, + "seriesOverrides": [ + + ], + "spaceLength": 10, + "span": 6, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "node_load1{job=\"node-exporter\", instance=\"$instance\"}", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "1m load average", + "refId": "A" + }, + { + "expr": "node_load5{job=\"node-exporter\", instance=\"$instance\"}", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "5m load average", + "refId": "B" + }, + { + "expr": "node_load15{job=\"node-exporter\", instance=\"$instance\"}", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "15m load average", + "refId": "C" + }, + { + "expr": "count(node_cpu_seconds_total{job=\"node-exporter\", instance=\"$instance\", mode=\"idle\"})", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "logical cores", + "refId": "D" + } + ], + "thresholds": [ + + ], + "timeFrom": null, + "timeShift": null, + "title": "Load Average", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ + + ] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "CPU", + "titleSize": "h6", + "type": "row" + }, + { + "collapse": false, + "collapsed": false, + "panels": [ + { + "aliasColors": { + + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "fillGradient": 0, + "gridPos": { + + }, + "id": 4, + "legend": { + "alignAsTable": false, + "avg": false, + "current": false, + "max": false, + "min": false, + "rightSide": false, + "show": true, + "sideWidth": null, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ + + ], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "repeat": null, + "seriesOverrides": [ + + ], + "spaceLength": 10, + "span": 9, + "stack": true, + "steppedLine": false, + "targets": [ + { + "expr": "(\n node_memory_MemTotal_bytes{job=\"node-exporter\", instance=\"$instance\"}\n-\n node_memory_MemFree_bytes{job=\"node-exporter\", instance=\"$instance\"}\n-\n node_memory_Buffers_bytes{job=\"node-exporter\", instance=\"$instance\"}\n-\n node_memory_Cached_bytes{job=\"node-exporter\", instance=\"$instance\"}\n)\n", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "memory used", + "refId": "A" + }, + { + "expr": "node_memory_Buffers_bytes{job=\"node-exporter\", instance=\"$instance\"}", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "memory buffers", + "refId": "B" + }, + { + "expr": "node_memory_Cached_bytes{job=\"node-exporter\", instance=\"$instance\"}", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "memory cached", + "refId": "C" + }, + { + "expr": "node_memory_MemFree_bytes{job=\"node-exporter\", instance=\"$instance\"}", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "memory free", + "refId": "D" + } + ], + "thresholds": [ + + ], + "timeFrom": null, + "timeShift": null, + "title": "Memory Usage", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ + + ] + }, + "yaxes": [ + { + "format": "bytes", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "bytes", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + } + ] + }, + { + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "max": 100, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "rgba(50, 172, 45, 0.97)" + }, + { + "color": "rgba(237, 129, 40, 0.89)", + "value": 80 + }, + { + "color": "rgba(245, 54, 54, 0.9)", + "value": 90 + } + ] + }, + "unit": "percent" + } + }, + "gridPos": { + + }, + "id": 5, + "span": 3, + "targets": [ + { + "expr": "100 -\n(\n avg(node_memory_MemAvailable_bytes{job=\"node-exporter\", instance=\"$instance\"}) /\n avg(node_memory_MemTotal_bytes{job=\"node-exporter\", instance=\"$instance\"})\n* 100\n)\n", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "" + } + ], + "title": "Memory Usage", + "transparent": false, + "type": "gauge" + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Memory", + "titleSize": "h6", + "type": "row" + }, + { + "collapse": false, + "collapsed": false, + "panels": [ + { + "aliasColors": { + + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 0, + "fillGradient": 0, + "gridPos": { + + }, + "id": 6, + "legend": { + "alignAsTable": false, + "avg": false, + "current": false, + "max": false, + "min": false, + "rightSide": false, + "show": true, + "sideWidth": null, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ + + ], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "repeat": null, + "seriesOverrides": [ + { + "alias": "/ read| written/", + "yaxis": 1 + }, + { + "alias": "/ io time/", + "yaxis": 2 + } + ], + "spaceLength": 10, + "span": 6, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "rate(node_disk_read_bytes_total{job=\"node-exporter\", instance=\"$instance\", device=~\"(/dev/)?(mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|md.+|dasd.+)\"}[$__rate_interval])", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "{{`{{`}}device{{`}}`}} read", + "refId": "A" + }, + { + "expr": "rate(node_disk_written_bytes_total{job=\"node-exporter\", instance=\"$instance\", device=~\"(/dev/)?(mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|md.+|dasd.+)\"}[$__rate_interval])", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "{{`{{`}}device{{`}}`}} written", + "refId": "B" + }, + { + "expr": "rate(node_disk_io_time_seconds_total{job=\"node-exporter\", instance=\"$instance\", device=~\"(/dev/)?(mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|md.+|dasd.+)\"}[$__rate_interval])", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "{{`{{`}}device{{`}}`}} io time", + "refId": "C" + } + ], + "thresholds": [ + + ], + "timeFrom": null, + "timeShift": null, + "title": "Disk I/O", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ + + ] + }, + "yaxes": [ + { + "format": "Bps", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "percentunit", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + }, + { + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": { + + }, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + }, + { + "color": "yellow", + "value": 0.8 + }, + { + "color": "red", + "value": 0.9 + } + ] + }, + "unit": "decbytes" + }, + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "Mounted on" + }, + "properties": [ + { + "id": "custom.width", + "value": 260 + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Size" + }, + "properties": [ + { + "id": "custom.width", + "value": 93 + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Used" + }, + "properties": [ + { + "id": "custom.width", + "value": 72 + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Available" + }, + "properties": [ + { + "id": "custom.width", + "value": 88 + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Used, %" + }, + "properties": [ + { + "id": "unit", + "value": "percentunit" + }, + { + "id": "custom.displayMode", + "value": "gradient-gauge" + }, + { + "id": "max", + "value": 1 + }, + { + "id": "min", + "value": 0 + } + ] + } + ] + }, + "gridPos": { + + }, + "id": 7, + "span": 6, + "targets": [ + { + "expr": "max by (mountpoint) (node_filesystem_size_bytes{job=\"node-exporter\", instance=\"$instance\", fstype!=\"\", mountpoint!=\"\"})\n", + "format": "table", + "instant": true, + "intervalFactor": 2, + "legendFormat": "" + }, + { + "expr": "max by (mountpoint) (node_filesystem_avail_bytes{job=\"node-exporter\", instance=\"$instance\", fstype!=\"\", mountpoint!=\"\"})\n", + "format": "table", + "instant": true, + "intervalFactor": 2, + "legendFormat": "" + } + ], + "title": "Disk Space Usage", + "transformations": [ + { + "id": "groupBy", + "options": { + "fields": { + "Value #A": { + "aggregations": [ + "lastNotNull" + ], + "operation": "aggregate" + }, + "Value #B": { + "aggregations": [ + "lastNotNull" + ], + "operation": "aggregate" + }, + "mountpoint": { + "aggregations": [ + + ], + "operation": "groupby" + } + } + } + }, + { + "id": "merge", + "options": { + + } + }, + { + "id": "calculateField", + "options": { + "alias": "Used", + "binary": { + "left": "Value #A (lastNotNull)", + "operator": "-", + "reducer": "sum", + "right": "Value #B (lastNotNull)" + }, + "mode": "binary", + "reduce": { + "reducer": "sum" + } + } + }, + { + "id": "calculateField", + "options": { + "alias": "Used, %", + "binary": { + "left": "Used", + "operator": "/", + "reducer": "sum", + "right": "Value #A (lastNotNull)" + }, + "mode": "binary", + "reduce": { + "reducer": "sum" + } + } + }, + { + "id": "organize", + "options": { + "excludeByName": { + + }, + "indexByName": { + + }, + "renameByName": { + "Value #A (lastNotNull)": "Size", + "Value #B (lastNotNull)": "Available", + "mountpoint": "Mounted on" + } + } + }, + { + "id": "sortBy", + "options": { + "fields": { + + }, + "sort": [ + { + "field": "Mounted on" + } + ] + } + } + ], + "transparent": false, + "type": "table" + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Disk", + "titleSize": "h6", + "type": "row" + }, + { + "collapse": false, + "collapsed": false, + "panels": [ + { + "aliasColors": { + + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "description": "Network received (bits/s)", + "fill": 0, + "fillGradient": 0, + "gridPos": { + + }, + "id": 8, + "legend": { + "alignAsTable": false, + "avg": false, + "current": false, + "max": false, + "min": false, + "rightSide": false, + "show": true, + "sideWidth": null, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ + + ], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "repeat": null, + "seriesOverrides": [ + + ], + "spaceLength": 10, + "span": 6, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "rate(node_network_receive_bytes_total{job=\"node-exporter\", instance=\"$instance\", device!=\"lo\"}[$__rate_interval]) * 8", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "{{`{{`}}device{{`}}`}}", + "refId": "A" + } + ], + "thresholds": [ + + ], + "timeFrom": null, + "timeShift": null, + "title": "Network Received", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ + + ] + }, + "yaxes": [ + { + "format": "bps", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "bps", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + } + ] + }, + { + "aliasColors": { + + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "description": "Network transmitted (bits/s)", + "fill": 0, + "fillGradient": 0, + "gridPos": { + + }, + "id": 9, + "legend": { + "alignAsTable": false, + "avg": false, + "current": false, + "max": false, + "min": false, + "rightSide": false, + "show": true, + "sideWidth": null, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ + + ], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "repeat": null, + "seriesOverrides": [ + + ], + "spaceLength": 10, + "span": 6, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "rate(node_network_transmit_bytes_total{job=\"node-exporter\", instance=\"$instance\", device!=\"lo\"}[$__rate_interval]) * 8", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "{{`{{`}}device{{`}}`}}", + "refId": "A" + } + ], + "thresholds": [ + + ], + "timeFrom": null, + "timeShift": null, + "title": "Network Transmitted", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ + + ] + }, + "yaxes": [ + { + "format": "bps", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "bps", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Network", + "titleSize": "h6", + "type": "row" + } + ], + "schemaVersion": 14, + "style": "dark", + "tags": [ + "node-exporter-mixin" + ], + "templating": { + "list": [ + { + "current": { + "text": "Prometheus", + "value": "Prometheus" + }, + "hide": 0, + "label": "Data Source", + "name": "datasource", + "options": [ + + ], + "query": "prometheus", + "refresh": 1, + "regex": "", + "type": "datasource" + }, + { + "allValue": null, + "current": { + + }, + "datasource": "$datasource", + "hide": 0, + "includeAll": false, + "label": "Instance", + "multi": false, + "name": "instance", + "options": [ + + ], + "query": "label_values(node_uname_info{job=\"node-exporter\", sysname!=\"Darwin\"}, instance)", + "refresh": 2, + "regex": "", + "sort": 0, + "tagValuesQuery": "", + "tags": [ + + ], + "tagsQuery": "", + "type": "query", + "useTags": false + } + ] + }, + "time": { + "from": "now-1h", + "to": "now" + }, + "timepicker": { + "refresh_intervals": [ + "5s", + "10s", + "30s", + "1m", + "5m", + "15m", + "30m", + "1h", + "2h", + "1d" + ], + "time_options": [ + "5m", + "15m", + "1h", + "6h", + "12h", + "24h", + "2d", + "7d", + "30d" + ] + }, + "timezone": "{{ .Values.grafana.defaultDashboardsTimezone }}", + "title": "Node Exporter / Nodes", + "version": 0 + } +{{- end }} \ No newline at end of file diff --git a/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/templates/grafana/dashboards-1.14/persistentvolumesusage.yaml b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/templates/grafana/dashboards-1.14/persistentvolumesusage.yaml new file mode 100644 index 0000000000..4d1e33208b --- /dev/null +++ b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/templates/grafana/dashboards-1.14/persistentvolumesusage.yaml @@ -0,0 +1,587 @@ +{{- /* +Generated from 'persistentvolumesusage' from https://raw.githubusercontent.com/prometheus-operator/kube-prometheus/a8ba97a150c75be42010c75d10b720c55e182f1a/manifests/grafana-dashboardDefinitions.yaml +Do not change in-place! In order to change this file first read following link: +https://github.com/prometheus-community/helm-charts/tree/main/charts/kube-prometheus-stack/hack +*/ -}} +{{- $kubeTargetVersion := default .Capabilities.KubeVersion.GitVersion .Values.kubeTargetVersionOverride }} +{{- if and (or .Values.grafana.enabled .Values.grafana.forceDeployDashboards) (semverCompare ">=1.14.0-0" $kubeTargetVersion) (semverCompare "<9.9.9-9" $kubeTargetVersion) .Values.grafana.defaultDashboardsEnabled }} +apiVersion: v1 +kind: ConfigMap +metadata: + namespace: {{ .Values.grafana.defaultDashboards.namespace }} + name: {{ printf "%s-%s" (include "kube-prometheus-stack.fullname" $) "persistentvolumesusage" | trunc 63 | trimSuffix "-" }} + annotations: +{{ toYaml .Values.grafana.sidecar.dashboards.annotations | indent 4 }} + labels: + {{- if $.Values.grafana.sidecar.dashboards.label }} + {{ $.Values.grafana.sidecar.dashboards.label }}: {{ ternary $.Values.grafana.sidecar.dashboards.labelValue "1" (not (empty $.Values.grafana.sidecar.dashboards.labelValue)) | quote }} + {{- end }} + app: {{ template "kube-prometheus-stack.name" $ }}-grafana +{{ include "kube-prometheus-stack.labels" $ | indent 4 }} +data: + persistentvolumesusage.json: |- + { + "__inputs": [ + + ], + "__requires": [ + + ], + "annotations": { + "list": [ + + ] + }, + "editable": false, + "gnetId": null, + "graphTooltip": 0, + "hideControls": false, + "id": null, + "links": [ + + ], + "refresh": "10s", + "rows": [ + { + "collapse": false, + "collapsed": false, + "panels": [ + { + "aliasColors": { + + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "fillGradient": 0, + "gridPos": { + + }, + "id": 2, + "interval": "1m", + "legend": { + "alignAsTable": true, + "avg": true, + "current": true, + "max": true, + "min": true, + "rightSide": true, + "show": true, + "sideWidth": null, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "links": [ + + ], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "repeat": null, + "seriesOverrides": [ + + ], + "spaceLength": 10, + "span": 9, + "stack": true, + "steppedLine": false, + "targets": [ + { + "expr": "(\n sum without(instance, node) (topk(1, (kubelet_volume_stats_capacity_bytes{cluster=\"$cluster\", job=\"{{ include "exporter.kubelet.jobName" . }}\", metrics_path=\"/metrics\", namespace=\"$namespace\", persistentvolumeclaim=\"$volume\"})))\n -\n sum without(instance, node) (topk(1, (kubelet_volume_stats_available_bytes{cluster=\"$cluster\", job=\"{{ include "exporter.kubelet.jobName" . }}\", metrics_path=\"/metrics\", namespace=\"$namespace\", persistentvolumeclaim=\"$volume\"})))\n)\n", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "Used Space", + "refId": "A" + }, + { + "expr": "sum without(instance, node) (topk(1, (kubelet_volume_stats_available_bytes{cluster=\"$cluster\", job=\"{{ include "exporter.kubelet.jobName" . }}\", metrics_path=\"/metrics\", namespace=\"$namespace\", persistentvolumeclaim=\"$volume\"})))\n", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "Free Space", + "refId": "B" + } + ], + "thresholds": [ + + ], + "timeFrom": null, + "timeShift": null, + "title": "Volume Space Usage", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ + + ] + }, + "yaxes": [ + { + "format": "bytes", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "bytes", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + } + ] + }, + { + "cacheTimeout": null, + "colorBackground": false, + "colorValue": false, + "colors": [ + "rgba(50, 172, 45, 0.97)", + "rgba(237, 129, 40, 0.89)", + "rgba(245, 54, 54, 0.9)" + ], + "datasource": "$datasource", + "format": "percent", + "gauge": { + "maxValue": 100, + "minValue": 0, + "show": true, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "gridPos": { + + }, + "id": 3, + "interval": "1m", + "legend": { + "alignAsTable": true, + "rightSide": true + }, + "links": [ + + ], + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": 100, + "nullPointMode": "connected", + "nullText": null, + "postfix": "", + "postfixFontSize": "50%", + "prefix": "", + "prefixFontSize": "50%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], + "span": 3, + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": false, + "lineColor": "rgb(31, 120, 193)", + "show": false + }, + "tableColumn": "", + "targets": [ + { + "expr": "max without(instance,node) (\n(\n topk(1, kubelet_volume_stats_capacity_bytes{cluster=\"$cluster\", job=\"{{ include "exporter.kubelet.jobName" . }}\", metrics_path=\"/metrics\", namespace=\"$namespace\", persistentvolumeclaim=\"$volume\"})\n -\n topk(1, kubelet_volume_stats_available_bytes{cluster=\"$cluster\", job=\"{{ include "exporter.kubelet.jobName" . }}\", metrics_path=\"/metrics\", namespace=\"$namespace\", persistentvolumeclaim=\"$volume\"})\n)\n/\ntopk(1, kubelet_volume_stats_capacity_bytes{cluster=\"$cluster\", job=\"{{ include "exporter.kubelet.jobName" . }}\", metrics_path=\"/metrics\", namespace=\"$namespace\", persistentvolumeclaim=\"$volume\"})\n* 100)\n", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "", + "refId": "A" + } + ], + "thresholds": "80, 90", + "title": "Volume Space Usage", + "tooltip": { + "shared": false + }, + "type": "singlestat", + "valueFontSize": "80%", + "valueMaps": [ + { + "op": "=", + "text": "N/A", + "value": "null" + } + ], + "valueName": "current" + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": false, + "title": "Dashboard Row", + "titleSize": "h6", + "type": "row" + }, + { + "collapse": false, + "collapsed": false, + "panels": [ + { + "aliasColors": { + + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "fillGradient": 0, + "gridPos": { + + }, + "id": 4, + "interval": "1m", + "legend": { + "alignAsTable": true, + "avg": true, + "current": true, + "max": true, + "min": true, + "rightSide": true, + "show": true, + "sideWidth": null, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "links": [ + + ], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "repeat": null, + "seriesOverrides": [ + + ], + "spaceLength": 10, + "span": 9, + "stack": true, + "steppedLine": false, + "targets": [ + { + "expr": "sum without(instance, node) (topk(1, (kubelet_volume_stats_inodes_used{cluster=\"$cluster\", job=\"{{ include "exporter.kubelet.jobName" . }}\", metrics_path=\"/metrics\", namespace=\"$namespace\", persistentvolumeclaim=\"$volume\"})))\n", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "Used inodes", + "refId": "A" + }, + { + "expr": "(\n sum without(instance, node) (topk(1, (kubelet_volume_stats_inodes{cluster=\"$cluster\", job=\"{{ include "exporter.kubelet.jobName" . }}\", metrics_path=\"/metrics\", namespace=\"$namespace\", persistentvolumeclaim=\"$volume\"})))\n -\n sum without(instance, node) (topk(1, (kubelet_volume_stats_inodes_used{cluster=\"$cluster\", job=\"{{ include "exporter.kubelet.jobName" . }}\", metrics_path=\"/metrics\", namespace=\"$namespace\", persistentvolumeclaim=\"$volume\"})))\n)\n", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": " Free inodes", + "refId": "B" + } + ], + "thresholds": [ + + ], + "timeFrom": null, + "timeShift": null, + "title": "Volume inodes Usage", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ + + ] + }, + "yaxes": [ + { + "format": "none", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "none", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + } + ] + }, + { + "cacheTimeout": null, + "colorBackground": false, + "colorValue": false, + "colors": [ + "rgba(50, 172, 45, 0.97)", + "rgba(237, 129, 40, 0.89)", + "rgba(245, 54, 54, 0.9)" + ], + "datasource": "$datasource", + "format": "percent", + "gauge": { + "maxValue": 100, + "minValue": 0, + "show": true, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "gridPos": { + + }, + "id": 5, + "interval": "1m", + "legend": { + "alignAsTable": true, + "rightSide": true + }, + "links": [ + + ], + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": 100, + "nullPointMode": "connected", + "nullText": null, + "postfix": "", + "postfixFontSize": "50%", + "prefix": "", + "prefixFontSize": "50%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], + "span": 3, + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": false, + "lineColor": "rgb(31, 120, 193)", + "show": false + }, + "tableColumn": "", + "targets": [ + { + "expr": "max without(instance,node) (\ntopk(1, kubelet_volume_stats_inodes_used{cluster=\"$cluster\", job=\"{{ include "exporter.kubelet.jobName" . }}\", metrics_path=\"/metrics\", namespace=\"$namespace\", persistentvolumeclaim=\"$volume\"})\n/\ntopk(1, kubelet_volume_stats_inodes{cluster=\"$cluster\", job=\"{{ include "exporter.kubelet.jobName" . }}\", metrics_path=\"/metrics\", namespace=\"$namespace\", persistentvolumeclaim=\"$volume\"})\n* 100)\n", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "", + "refId": "A" + } + ], + "thresholds": "80, 90", + "title": "Volume inodes Usage", + "tooltip": { + "shared": false + }, + "type": "singlestat", + "valueFontSize": "80%", + "valueMaps": [ + { + "op": "=", + "text": "N/A", + "value": "null" + } + ], + "valueName": "current" + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": false, + "title": "Dashboard Row", + "titleSize": "h6", + "type": "row" + } + ], + "schemaVersion": 14, + "style": "dark", + "tags": [ + "kubernetes-mixin" + ], + "templating": { + "list": [ + { + "current": { + "text": "Prometheus", + "value": "Prometheus" + }, + "hide": 0, + "label": "Data Source", + "name": "datasource", + "options": [ + + ], + "query": "prometheus", + "refresh": 1, + "regex": "", + "type": "datasource" + }, + { + "allValue": null, + "current": { + + }, + "datasource": "$datasource", + "hide": {{ if .Values.grafana.sidecar.dashboards.multicluster.global.enabled }}0{{ else }}2{{ end }}, + "includeAll": false, + "label": "cluster", + "multi": false, + "name": "cluster", + "options": [ + + ], + "query": "label_values(kubelet_volume_stats_capacity_bytes{job=\"{{ include "exporter.kubelet.jobName" . }}\", metrics_path=\"/metrics\"}, cluster)", + "refresh": 2, + "regex": "", + "sort": 1, + "tagValuesQuery": "", + "tags": [ + + ], + "tagsQuery": "", + "type": "query", + "useTags": false + }, + { + "allValue": null, + "current": { + + }, + "datasource": "$datasource", + "hide": 0, + "includeAll": false, + "label": "Namespace", + "multi": false, + "name": "namespace", + "options": [ + + ], + "query": "label_values(kubelet_volume_stats_capacity_bytes{cluster=\"$cluster\", job=\"{{ include "exporter.kubelet.jobName" . }}\", metrics_path=\"/metrics\"}, namespace)", + "refresh": 2, + "regex": "", + "sort": 1, + "tagValuesQuery": "", + "tags": [ + + ], + "tagsQuery": "", + "type": "query", + "useTags": false + }, + { + "allValue": null, + "current": { + + }, + "datasource": "$datasource", + "hide": 0, + "includeAll": false, + "label": "PersistentVolumeClaim", + "multi": false, + "name": "volume", + "options": [ + + ], + "query": "label_values(kubelet_volume_stats_capacity_bytes{cluster=\"$cluster\", job=\"{{ include "exporter.kubelet.jobName" . }}\", metrics_path=\"/metrics\", namespace=\"$namespace\"}, persistentvolumeclaim)", + "refresh": 2, + "regex": "", + "sort": 1, + "tagValuesQuery": "", + "tags": [ + + ], + "tagsQuery": "", + "type": "query", + "useTags": false + } + ] + }, + "time": { + "from": "now-7d", + "to": "now" + }, + "timepicker": { + "refresh_intervals": [ + "5s", + "10s", + "30s", + "1m", + "5m", + "15m", + "30m", + "1h", + "2h", + "1d" + ], + "time_options": [ + "5m", + "15m", + "1h", + "6h", + "12h", + "24h", + "2d", + "7d", + "30d" + ] + }, + "timezone": "{{ .Values.grafana.defaultDashboardsTimezone }}", + "title": "Kubernetes / Persistent Volumes", + "uid": "919b92a8e8041bd567af9edab12c840c", + "version": 0 + } +{{- end }} \ No newline at end of file diff --git a/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/templates/grafana/dashboards-1.14/pod-total.yaml b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/templates/grafana/dashboards-1.14/pod-total.yaml new file mode 100644 index 0000000000..9a7e7d0603 --- /dev/null +++ b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/templates/grafana/dashboards-1.14/pod-total.yaml @@ -0,0 +1,1228 @@ +{{- /* +Generated from 'pod-total' from https://raw.githubusercontent.com/prometheus-operator/kube-prometheus/a8ba97a150c75be42010c75d10b720c55e182f1a/manifests/grafana-dashboardDefinitions.yaml +Do not change in-place! In order to change this file first read following link: +https://github.com/prometheus-community/helm-charts/tree/main/charts/kube-prometheus-stack/hack +*/ -}} +{{- $kubeTargetVersion := default .Capabilities.KubeVersion.GitVersion .Values.kubeTargetVersionOverride }} +{{- if and (or .Values.grafana.enabled .Values.grafana.forceDeployDashboards) (semverCompare ">=1.14.0-0" $kubeTargetVersion) (semverCompare "<9.9.9-9" $kubeTargetVersion) .Values.grafana.defaultDashboardsEnabled }} +apiVersion: v1 +kind: ConfigMap +metadata: + namespace: {{ .Values.grafana.defaultDashboards.namespace }} + name: {{ printf "%s-%s" (include "kube-prometheus-stack.fullname" $) "pod-total" | trunc 63 | trimSuffix "-" }} + annotations: +{{ toYaml .Values.grafana.sidecar.dashboards.annotations | indent 4 }} + labels: + {{- if $.Values.grafana.sidecar.dashboards.label }} + {{ $.Values.grafana.sidecar.dashboards.label }}: {{ ternary $.Values.grafana.sidecar.dashboards.labelValue "1" (not (empty $.Values.grafana.sidecar.dashboards.labelValue)) | quote }} + {{- end }} + app: {{ template "kube-prometheus-stack.name" $ }}-grafana +{{ include "kube-prometheus-stack.labels" $ | indent 4 }} +data: + pod-total.json: |- + { + "__inputs": [ + + ], + "__requires": [ + + ], + "annotations": { + "list": [ + { + "builtIn": 1, + "datasource": "-- Grafana --", + "enable": true, + "hide": true, + "iconColor": "rgba(0, 211, 255, 1)", + "name": "Annotations & Alerts", + "type": "dashboard" + } + ] + }, + "editable": true, + "gnetId": null, + "graphTooltip": 0, + "hideControls": false, + "id": null, + "links": [ + + ], + "panels": [ + { + "collapse": false, + "collapsed": false, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 0 + }, + "id": 2, + "panels": [ + + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Current Bandwidth", + "titleSize": "h6", + "type": "row" + }, + { + "cacheTimeout": null, + "colorBackground": false, + "colorValue": false, + "colors": [ + "#299c46", + "rgba(237, 129, 40, 0.89)", + "#d44a3a" + ], + "datasource": "$datasource", + "decimals": 0, + "format": "time_series", + "gauge": { + "maxValue": 100, + "minValue": 0, + "show": false, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "gridPos": { + "h": 9, + "w": 12, + "x": 0, + "y": 1 + }, + "height": 9, + "id": 3, + "interval": null, + "links": [ + + ], + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": 100, + "minSpan": 12, + "nullPointMode": "connected", + "nullText": null, + "options": { + "fieldOptions": { + "calcs": [ + "last" + ], + "defaults": { + "max": 10000000000, + "min": 0, + "title": "$namespace: $pod", + "unit": "Bps" + }, + "mappings": [ + + ], + "override": { + + }, + "thresholds": [ + { + "color": "dark-green", + "index": 0, + "value": null + }, + { + "color": "dark-yellow", + "index": 1, + "value": 5000000000 + }, + { + "color": "dark-red", + "index": 2, + "value": 7000000000 + } + ], + "values": false + } + }, + "postfix": "", + "postfixFontSize": "50%", + "prefix": "", + "prefixFontSize": "50%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], + "span": 12, + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": false, + "lineColor": "rgb(31, 120, 193)", + "show": false + }, + "tableColumn": "", + "targets": [ + { + "expr": "sum(irate(container_network_receive_bytes_total{cluster=\"$cluster\",namespace=~\"$namespace\", pod=~\"$pod\"}[$interval:$resolution]))", + "format": "time_series", + "instant": null, + "intervalFactor": 1, + "legendFormat": "", + "refId": "A" + } + ], + "thresholds": "", + "timeFrom": null, + "timeShift": null, + "title": "Current Rate of Bytes Received", + "type": "gauge", + "valueFontSize": "80%", + "valueMaps": [ + { + "op": "=", + "text": "N/A", + "value": "null" + } + ], + "valueName": "current" + }, + { + "cacheTimeout": null, + "colorBackground": false, + "colorValue": false, + "colors": [ + "#299c46", + "rgba(237, 129, 40, 0.89)", + "#d44a3a" + ], + "datasource": "$datasource", + "decimals": 0, + "format": "time_series", + "gauge": { + "maxValue": 100, + "minValue": 0, + "show": false, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "gridPos": { + "h": 9, + "w": 12, + "x": 12, + "y": 1 + }, + "height": 9, + "id": 4, + "interval": null, + "links": [ + + ], + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": 100, + "minSpan": 12, + "nullPointMode": "connected", + "nullText": null, + "options": { + "fieldOptions": { + "calcs": [ + "last" + ], + "defaults": { + "max": 10000000000, + "min": 0, + "title": "$namespace: $pod", + "unit": "Bps" + }, + "mappings": [ + + ], + "override": { + + }, + "thresholds": [ + { + "color": "dark-green", + "index": 0, + "value": null + }, + { + "color": "dark-yellow", + "index": 1, + "value": 5000000000 + }, + { + "color": "dark-red", + "index": 2, + "value": 7000000000 + } + ], + "values": false + } + }, + "postfix": "", + "postfixFontSize": "50%", + "prefix": "", + "prefixFontSize": "50%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], + "span": 12, + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": false, + "lineColor": "rgb(31, 120, 193)", + "show": false + }, + "tableColumn": "", + "targets": [ + { + "expr": "sum(irate(container_network_transmit_bytes_total{cluster=\"$cluster\",namespace=~\"$namespace\", pod=~\"$pod\"}[$interval:$resolution]))", + "format": "time_series", + "instant": null, + "intervalFactor": 1, + "legendFormat": "", + "refId": "A" + } + ], + "thresholds": "", + "timeFrom": null, + "timeShift": null, + "title": "Current Rate of Bytes Transmitted", + "type": "gauge", + "valueFontSize": "80%", + "valueMaps": [ + { + "op": "=", + "text": "N/A", + "value": "null" + } + ], + "valueName": "current" + }, + { + "collapse": false, + "collapsed": false, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 10 + }, + "id": 5, + "panels": [ + + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Bandwidth", + "titleSize": "h6", + "type": "row" + }, + { + "aliasColors": { + + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 2, + "fillGradient": 0, + "gridPos": { + "h": 9, + "w": 12, + "x": 0, + "y": 11 + }, + "id": 6, + "legend": { + "alignAsTable": false, + "avg": false, + "current": false, + "hideEmpty": true, + "hideZero": true, + "max": false, + "min": false, + "rightSide": false, + "show": true, + "sideWidth": null, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 2, + "links": [ + + ], + "minSpan": 12, + "nullPointMode": "connected", + "paceLength": 10, + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "repeat": null, + "seriesOverrides": [ + + ], + "spaceLength": 10, + "span": 12, + "stack": true, + "steppedLine": false, + "targets": [ + { + "expr": "sum(irate(container_network_receive_bytes_total{cluster=\"$cluster\",namespace=~\"$namespace\", pod=~\"$pod\"}[$interval:$resolution])) by (pod)", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "{{`{{`}}pod{{`}}`}}", + "refId": "A", + "step": 10 + } + ], + "thresholds": [ + + ], + "timeFrom": null, + "timeShift": null, + "title": "Receive Bandwidth", + "tooltip": { + "shared": true, + "sort": 2, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ + + ] + }, + "yaxes": [ + { + "format": "Bps", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "Bps", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + } + ] + }, + { + "aliasColors": { + + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 2, + "fillGradient": 0, + "gridPos": { + "h": 9, + "w": 12, + "x": 12, + "y": 11 + }, + "id": 7, + "legend": { + "alignAsTable": false, + "avg": false, + "current": false, + "hideEmpty": true, + "hideZero": true, + "max": false, + "min": false, + "rightSide": false, + "show": true, + "sideWidth": null, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 2, + "links": [ + + ], + "minSpan": 12, + "nullPointMode": "connected", + "paceLength": 10, + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "repeat": null, + "seriesOverrides": [ + + ], + "spaceLength": 10, + "span": 12, + "stack": true, + "steppedLine": false, + "targets": [ + { + "expr": "sum(irate(container_network_transmit_bytes_total{cluster=\"$cluster\",namespace=~\"$namespace\", pod=~\"$pod\"}[$interval:$resolution])) by (pod)", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "{{`{{`}}pod{{`}}`}}", + "refId": "A", + "step": 10 + } + ], + "thresholds": [ + + ], + "timeFrom": null, + "timeShift": null, + "title": "Transmit Bandwidth", + "tooltip": { + "shared": true, + "sort": 2, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ + + ] + }, + "yaxes": [ + { + "format": "Bps", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "Bps", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + } + ] + }, + { + "collapse": true, + "collapsed": true, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 20 + }, + "id": 8, + "panels": [ + { + "aliasColors": { + + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 2, + "fillGradient": 0, + "gridPos": { + "h": 10, + "w": 12, + "x": 0, + "y": 21 + }, + "id": 9, + "legend": { + "alignAsTable": false, + "avg": false, + "current": false, + "hideEmpty": true, + "hideZero": true, + "max": false, + "min": false, + "rightSide": false, + "show": true, + "sideWidth": null, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 2, + "links": [ + + ], + "minSpan": 12, + "nullPointMode": "connected", + "paceLength": 10, + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "repeat": null, + "seriesOverrides": [ + + ], + "spaceLength": 10, + "span": 12, + "stack": true, + "steppedLine": false, + "targets": [ + { + "expr": "sum(irate(container_network_receive_packets_total{cluster=\"$cluster\",namespace=~\"$namespace\", pod=~\"$pod\"}[$interval:$resolution])) by (pod)", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "{{`{{`}}pod{{`}}`}}", + "refId": "A", + "step": 10 + } + ], + "thresholds": [ + + ], + "timeFrom": null, + "timeShift": null, + "title": "Rate of Received Packets", + "tooltip": { + "shared": true, + "sort": 2, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ + + ] + }, + "yaxes": [ + { + "format": "pps", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "pps", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + } + ] + }, + { + "aliasColors": { + + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 2, + "fillGradient": 0, + "gridPos": { + "h": 10, + "w": 12, + "x": 12, + "y": 21 + }, + "id": 10, + "legend": { + "alignAsTable": false, + "avg": false, + "current": false, + "hideEmpty": true, + "hideZero": true, + "max": false, + "min": false, + "rightSide": false, + "show": true, + "sideWidth": null, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 2, + "links": [ + + ], + "minSpan": 12, + "nullPointMode": "connected", + "paceLength": 10, + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "repeat": null, + "seriesOverrides": [ + + ], + "spaceLength": 10, + "span": 12, + "stack": true, + "steppedLine": false, + "targets": [ + { + "expr": "sum(irate(container_network_transmit_packets_total{cluster=\"$cluster\",namespace=~\"$namespace\", pod=~\"$pod\"}[$interval:$resolution])) by (pod)", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "{{`{{`}}pod{{`}}`}}", + "refId": "A", + "step": 10 + } + ], + "thresholds": [ + + ], + "timeFrom": null, + "timeShift": null, + "title": "Rate of Transmitted Packets", + "tooltip": { + "shared": true, + "sort": 2, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ + + ] + }, + "yaxes": [ + { + "format": "pps", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "pps", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Packets", + "titleSize": "h6", + "type": "row" + }, + { + "collapse": true, + "collapsed": true, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 21 + }, + "id": 11, + "panels": [ + { + "aliasColors": { + + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 2, + "fillGradient": 0, + "gridPos": { + "h": 10, + "w": 12, + "x": 0, + "y": 32 + }, + "id": 12, + "legend": { + "alignAsTable": false, + "avg": false, + "current": false, + "hideEmpty": true, + "hideZero": true, + "max": false, + "min": false, + "rightSide": false, + "show": true, + "sideWidth": null, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 2, + "links": [ + + ], + "minSpan": 12, + "nullPointMode": "connected", + "paceLength": 10, + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "repeat": null, + "seriesOverrides": [ + + ], + "spaceLength": 10, + "span": 12, + "stack": true, + "steppedLine": false, + "targets": [ + { + "expr": "sum(irate(container_network_receive_packets_dropped_total{cluster=\"$cluster\",namespace=~\"$namespace\", pod=~\"$pod\"}[$interval:$resolution])) by (pod)", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "{{`{{`}}pod{{`}}`}}", + "refId": "A", + "step": 10 + } + ], + "thresholds": [ + + ], + "timeFrom": null, + "timeShift": null, + "title": "Rate of Received Packets Dropped", + "tooltip": { + "shared": true, + "sort": 2, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ + + ] + }, + "yaxes": [ + { + "format": "pps", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "pps", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + } + ] + }, + { + "aliasColors": { + + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 2, + "fillGradient": 0, + "gridPos": { + "h": 10, + "w": 12, + "x": 12, + "y": 32 + }, + "id": 13, + "legend": { + "alignAsTable": false, + "avg": false, + "current": false, + "hideEmpty": true, + "hideZero": true, + "max": false, + "min": false, + "rightSide": false, + "show": true, + "sideWidth": null, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 2, + "links": [ + + ], + "minSpan": 12, + "nullPointMode": "connected", + "paceLength": 10, + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "repeat": null, + "seriesOverrides": [ + + ], + "spaceLength": 10, + "span": 12, + "stack": true, + "steppedLine": false, + "targets": [ + { + "expr": "sum(irate(container_network_transmit_packets_dropped_total{cluster=\"$cluster\",namespace=~\"$namespace\", pod=~\"$pod\"}[$interval:$resolution])) by (pod)", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "{{`{{`}}pod{{`}}`}}", + "refId": "A", + "step": 10 + } + ], + "thresholds": [ + + ], + "timeFrom": null, + "timeShift": null, + "title": "Rate of Transmitted Packets Dropped", + "tooltip": { + "shared": true, + "sort": 2, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ + + ] + }, + "yaxes": [ + { + "format": "pps", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "pps", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Errors", + "titleSize": "h6", + "type": "row" + } + ], + "refresh": "10s", + "rows": [ + + ], + "schemaVersion": 18, + "style": "dark", + "tags": [ + "kubernetes-mixin" + ], + "templating": { + "list": [ + { + "current": { + "text": "Prometheus", + "value": "Prometheus" + }, + "hide": 0, + "label": "Data Source", + "name": "datasource", + "options": [ + + ], + "query": "prometheus", + "refresh": 1, + "regex": "", + "type": "datasource" + }, + { + "allValue": null, + "current": { + + }, + "datasource": "$datasource", + "hide": {{ if .Values.grafana.sidecar.dashboards.multicluster.global.enabled }}0{{ else }}2{{ end }}, + "includeAll": false, + "label": null, + "multi": false, + "name": "cluster", + "options": [ + + ], + "query": "label_values(up{job=\"{{ include "exporter.kubelet.jobName" . }}\", metrics_path=\"/metrics/cadvisor\"}, cluster)", + "refresh": 2, + "regex": "", + "sort": 0, + "tagValuesQuery": "", + "tags": [ + + ], + "tagsQuery": "", + "type": "query", + "useTags": false + }, + { + "allValue": ".+", + "auto": false, + "auto_count": 30, + "auto_min": "10s", + "current": { + "text": "kube-system", + "value": "kube-system" + }, + "datasource": "$datasource", + "definition": "label_values(container_network_receive_packets_total{cluster=\"$cluster\"}, namespace)", + "hide": 0, + "includeAll": true, + "label": null, + "multi": false, + "name": "namespace", + "options": [ + + ], + "query": "label_values(container_network_receive_packets_total{cluster=\"$cluster\"}, namespace)", + "refresh": 2, + "regex": "", + "skipUrlSync": false, + "sort": 1, + "tagValuesQuery": "", + "tags": [ + + ], + "tagsQuery": "", + "type": "query", + "useTags": false + }, + { + "allValue": ".+", + "auto": false, + "auto_count": 30, + "auto_min": "10s", + "current": { + "text": "", + "value": "" + }, + "datasource": "$datasource", + "definition": "label_values(container_network_receive_packets_total{cluster=\"$cluster\",namespace=~\"$namespace\"}, pod)", + "hide": 0, + "includeAll": false, + "label": null, + "multi": false, + "name": "pod", + "options": [ + + ], + "query": "label_values(container_network_receive_packets_total{cluster=\"$cluster\",namespace=~\"$namespace\"}, pod)", + "refresh": 2, + "regex": "", + "skipUrlSync": false, + "sort": 1, + "tagValuesQuery": "", + "tags": [ + + ], + "tagsQuery": "", + "type": "query", + "useTags": false + }, + { + "allValue": null, + "auto": false, + "auto_count": 30, + "auto_min": "10s", + "current": { + "text": "5m", + "value": "5m" + }, + "datasource": "$datasource", + "hide": 0, + "includeAll": false, + "label": null, + "multi": false, + "name": "resolution", + "options": [ + { + "selected": false, + "text": "30s", + "value": "30s" + }, + { + "selected": true, + "text": "5m", + "value": "5m" + }, + { + "selected": false, + "text": "1h", + "value": "1h" + } + ], + "query": "30s,5m,1h", + "refresh": 2, + "regex": "", + "skipUrlSync": false, + "sort": 1, + "tagValuesQuery": "", + "tags": [ + + ], + "tagsQuery": "", + "type": "interval", + "useTags": false + }, + { + "allValue": null, + "auto": false, + "auto_count": 30, + "auto_min": "10s", + "current": { + "text": "5m", + "value": "5m" + }, + "datasource": "$datasource", + "hide": 2, + "includeAll": false, + "label": null, + "multi": false, + "name": "interval", + "options": [ + { + "selected": true, + "text": "4h", + "value": "4h" + } + ], + "query": "4h", + "refresh": 2, + "regex": "", + "skipUrlSync": false, + "sort": 1, + "tagValuesQuery": "", + "tags": [ + + ], + "tagsQuery": "", + "type": "interval", + "useTags": false + } + ] + }, + "time": { + "from": "now-1h", + "to": "now" + }, + "timepicker": { + "refresh_intervals": [ + "5s", + "10s", + "30s", + "1m", + "5m", + "15m", + "30m", + "1h", + "2h", + "1d" + ], + "time_options": [ + "5m", + "15m", + "1h", + "6h", + "12h", + "24h", + "2d", + "7d", + "30d" + ] + }, + "timezone": "{{ .Values.grafana.defaultDashboardsTimezone }}", + "title": "Kubernetes / Networking / Pod", + "uid": "7a18067ce943a40ae25454675c19ff5c", + "version": 0 + } +{{- end }} \ No newline at end of file diff --git a/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/templates/grafana/dashboards-1.14/prometheus-remote-write.yaml b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/templates/grafana/dashboards-1.14/prometheus-remote-write.yaml new file mode 100644 index 0000000000..5c11900e69 --- /dev/null +++ b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/templates/grafana/dashboards-1.14/prometheus-remote-write.yaml @@ -0,0 +1,1674 @@ +{{- /* +Generated from 'prometheus-remote-write' from https://raw.githubusercontent.com/prometheus-operator/kube-prometheus/a8ba97a150c75be42010c75d10b720c55e182f1a/manifests/grafana-dashboardDefinitions.yaml +Do not change in-place! In order to change this file first read following link: +https://github.com/prometheus-community/helm-charts/tree/main/charts/kube-prometheus-stack/hack +*/ -}} +{{- $kubeTargetVersion := default .Capabilities.KubeVersion.GitVersion .Values.kubeTargetVersionOverride }} +{{- if and (or .Values.grafana.enabled .Values.grafana.forceDeployDashboards) (semverCompare ">=1.14.0-0" $kubeTargetVersion) (semverCompare "<9.9.9-9" $kubeTargetVersion) .Values.grafana.defaultDashboardsEnabled .Values.prometheus.prometheusSpec.remoteWriteDashboards }} +apiVersion: v1 +kind: ConfigMap +metadata: + namespace: {{ .Values.grafana.defaultDashboards.namespace }} + name: {{ printf "%s-%s" (include "kube-prometheus-stack.fullname" $) "prometheus-remote-write" | trunc 63 | trimSuffix "-" }} + annotations: +{{ toYaml .Values.grafana.sidecar.dashboards.annotations | indent 4 }} + labels: + {{- if $.Values.grafana.sidecar.dashboards.label }} + {{ $.Values.grafana.sidecar.dashboards.label }}: {{ ternary $.Values.grafana.sidecar.dashboards.labelValue "1" (not (empty $.Values.grafana.sidecar.dashboards.labelValue)) | quote }} + {{- end }} + app: {{ template "kube-prometheus-stack.name" $ }}-grafana +{{ include "kube-prometheus-stack.labels" $ | indent 4 }} +data: + prometheus-remote-write.json: |- + { + "__inputs": [ + + ], + "__requires": [ + + ], + "annotations": { + "list": [ + + ] + }, + "editable": true, + "gnetId": null, + "graphTooltip": 0, + "hideControls": false, + "id": null, + "links": [ + + ], + "refresh": "60s", + "rows": [ + { + "collapse": false, + "collapsed": false, + "panels": [ + { + "aliasColors": { + + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "fillGradient": 0, + "gridPos": { + + }, + "id": 2, + "legend": { + "alignAsTable": false, + "avg": false, + "current": false, + "max": false, + "min": false, + "rightSide": false, + "show": true, + "sideWidth": null, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ + + ], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "repeat": null, + "seriesOverrides": [ + + ], + "spaceLength": 10, + "span": 6, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "(\n prometheus_remote_storage_highest_timestamp_in_seconds{cluster=~\"$cluster\", instance=~\"$instance\"} \n- \n ignoring(remote_name, url) group_right(instance) (prometheus_remote_storage_queue_highest_sent_timestamp_seconds{cluster=~\"$cluster\", instance=~\"$instance\"} != 0)\n)\n", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{`{{`}}cluster{{`}}`}}:{{`{{`}}instance{{`}}`}} {{`{{`}}remote_name{{`}}`}}:{{`{{`}}url{{`}}`}}", + "refId": "A" + } + ], + "thresholds": [ + + ], + "timeFrom": null, + "timeShift": null, + "title": "Highest Timestamp In vs. Highest Timestamp Sent", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ + + ] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + }, + { + "aliasColors": { + + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "fillGradient": 0, + "gridPos": { + + }, + "id": 3, + "legend": { + "alignAsTable": false, + "avg": false, + "current": false, + "max": false, + "min": false, + "rightSide": false, + "show": true, + "sideWidth": null, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ + + ], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "repeat": null, + "seriesOverrides": [ + + ], + "spaceLength": 10, + "span": 6, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "clamp_min(\n rate(prometheus_remote_storage_highest_timestamp_in_seconds{cluster=~\"$cluster\", instance=~\"$instance\"}[5m]) \n- \n ignoring (remote_name, url) group_right(instance) rate(prometheus_remote_storage_queue_highest_sent_timestamp_seconds{cluster=~\"$cluster\", instance=~\"$instance\"}[5m])\n, 0)\n", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{`{{`}}cluster{{`}}`}}:{{`{{`}}instance{{`}}`}} {{`{{`}}remote_name{{`}}`}}:{{`{{`}}url{{`}}`}}", + "refId": "A" + } + ], + "thresholds": [ + + ], + "timeFrom": null, + "timeShift": null, + "title": "Rate[5m]", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ + + ] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Timestamps", + "titleSize": "h6", + "type": "row" + }, + { + "collapse": false, + "collapsed": false, + "panels": [ + { + "aliasColors": { + + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "fillGradient": 0, + "gridPos": { + + }, + "id": 4, + "legend": { + "alignAsTable": false, + "avg": false, + "current": false, + "max": false, + "min": false, + "rightSide": false, + "show": true, + "sideWidth": null, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ + + ], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "repeat": null, + "seriesOverrides": [ + + ], + "spaceLength": 10, + "span": 12, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "rate(\n prometheus_remote_storage_samples_in_total{cluster=~\"$cluster\", instance=~\"$instance\"}[5m])\n- \n ignoring(remote_name, url) group_right(instance) (rate(prometheus_remote_storage_succeeded_samples_total{cluster=~\"$cluster\", instance=~\"$instance\"}[5m]) or rate(prometheus_remote_storage_samples_total{cluster=~\"$cluster\", instance=~\"$instance\"}[5m]))\n- \n (rate(prometheus_remote_storage_dropped_samples_total{cluster=~\"$cluster\", instance=~\"$instance\"}[5m]) or rate(prometheus_remote_storage_samples_dropped_total{cluster=~\"$cluster\", instance=~\"$instance\"}[5m]))\n", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{`{{`}}cluster{{`}}`}}:{{`{{`}}instance{{`}}`}} {{`{{`}}remote_name{{`}}`}}:{{`{{`}}url{{`}}`}}", + "refId": "A" + } + ], + "thresholds": [ + + ], + "timeFrom": null, + "timeShift": null, + "title": "Rate, in vs. succeeded or dropped [5m]", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ + + ] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Samples", + "titleSize": "h6", + "type": "row" + }, + { + "collapse": false, + "collapsed": false, + "panels": [ + { + "aliasColors": { + + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "fillGradient": 0, + "gridPos": { + + }, + "id": 5, + "legend": { + "alignAsTable": false, + "avg": false, + "current": false, + "max": false, + "min": false, + "rightSide": false, + "show": true, + "sideWidth": null, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ + + ], + "minSpan": 6, + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "repeat": null, + "seriesOverrides": [ + + ], + "spaceLength": 10, + "span": 12, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "prometheus_remote_storage_shards{cluster=~\"$cluster\", instance=~\"$instance\"}", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{`{{`}}cluster{{`}}`}}:{{`{{`}}instance{{`}}`}} {{`{{`}}remote_name{{`}}`}}:{{`{{`}}url{{`}}`}}", + "refId": "A" + } + ], + "thresholds": [ + + ], + "timeFrom": null, + "timeShift": null, + "title": "Current Shards", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ + + ] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + }, + { + "aliasColors": { + + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "fillGradient": 0, + "gridPos": { + + }, + "id": 6, + "legend": { + "alignAsTable": false, + "avg": false, + "current": false, + "max": false, + "min": false, + "rightSide": false, + "show": true, + "sideWidth": null, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ + + ], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "repeat": null, + "seriesOverrides": [ + + ], + "spaceLength": 10, + "span": 4, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "prometheus_remote_storage_shards_max{cluster=~\"$cluster\", instance=~\"$instance\"}", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{`{{`}}cluster{{`}}`}}:{{`{{`}}instance{{`}}`}} {{`{{`}}remote_name{{`}}`}}:{{`{{`}}url{{`}}`}}", + "refId": "A" + } + ], + "thresholds": [ + + ], + "timeFrom": null, + "timeShift": null, + "title": "Max Shards", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ + + ] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + }, + { + "aliasColors": { + + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "fillGradient": 0, + "gridPos": { + + }, + "id": 7, + "legend": { + "alignAsTable": false, + "avg": false, + "current": false, + "max": false, + "min": false, + "rightSide": false, + "show": true, + "sideWidth": null, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ + + ], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "repeat": null, + "seriesOverrides": [ + + ], + "spaceLength": 10, + "span": 4, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "prometheus_remote_storage_shards_min{cluster=~\"$cluster\", instance=~\"$instance\"}", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{`{{`}}cluster{{`}}`}}:{{`{{`}}instance{{`}}`}} {{`{{`}}remote_name{{`}}`}}:{{`{{`}}url{{`}}`}}", + "refId": "A" + } + ], + "thresholds": [ + + ], + "timeFrom": null, + "timeShift": null, + "title": "Min Shards", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ + + ] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + }, + { + "aliasColors": { + + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "fillGradient": 0, + "gridPos": { + + }, + "id": 8, + "legend": { + "alignAsTable": false, + "avg": false, + "current": false, + "max": false, + "min": false, + "rightSide": false, + "show": true, + "sideWidth": null, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ + + ], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "repeat": null, + "seriesOverrides": [ + + ], + "spaceLength": 10, + "span": 4, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "prometheus_remote_storage_shards_desired{cluster=~\"$cluster\", instance=~\"$instance\"}", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{`{{`}}cluster{{`}}`}}:{{`{{`}}instance{{`}}`}} {{`{{`}}remote_name{{`}}`}}:{{`{{`}}url{{`}}`}}", + "refId": "A" + } + ], + "thresholds": [ + + ], + "timeFrom": null, + "timeShift": null, + "title": "Desired Shards", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ + + ] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Shards", + "titleSize": "h6", + "type": "row" + }, + { + "collapse": false, + "collapsed": false, + "panels": [ + { + "aliasColors": { + + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "fillGradient": 0, + "gridPos": { + + }, + "id": 9, + "legend": { + "alignAsTable": false, + "avg": false, + "current": false, + "max": false, + "min": false, + "rightSide": false, + "show": true, + "sideWidth": null, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ + + ], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "repeat": null, + "seriesOverrides": [ + + ], + "spaceLength": 10, + "span": 6, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "prometheus_remote_storage_shard_capacity{cluster=~\"$cluster\", instance=~\"$instance\"}", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{`{{`}}cluster{{`}}`}}:{{`{{`}}instance{{`}}`}} {{`{{`}}remote_name{{`}}`}}:{{`{{`}}url{{`}}`}}", + "refId": "A" + } + ], + "thresholds": [ + + ], + "timeFrom": null, + "timeShift": null, + "title": "Shard Capacity", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ + + ] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + }, + { + "aliasColors": { + + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "fillGradient": 0, + "gridPos": { + + }, + "id": 10, + "legend": { + "alignAsTable": false, + "avg": false, + "current": false, + "max": false, + "min": false, + "rightSide": false, + "show": true, + "sideWidth": null, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ + + ], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "repeat": null, + "seriesOverrides": [ + + ], + "spaceLength": 10, + "span": 6, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "prometheus_remote_storage_pending_samples{cluster=~\"$cluster\", instance=~\"$instance\"} or prometheus_remote_storage_samples_pending{cluster=~\"$cluster\", instance=~\"$instance\"}", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{`{{`}}cluster{{`}}`}}:{{`{{`}}instance{{`}}`}} {{`{{`}}remote_name{{`}}`}}:{{`{{`}}url{{`}}`}}", + "refId": "A" + } + ], + "thresholds": [ + + ], + "timeFrom": null, + "timeShift": null, + "title": "Pending Samples", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ + + ] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Shard Details", + "titleSize": "h6", + "type": "row" + }, + { + "collapse": false, + "collapsed": false, + "panels": [ + { + "aliasColors": { + + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "fillGradient": 0, + "gridPos": { + + }, + "id": 11, + "legend": { + "alignAsTable": false, + "avg": false, + "current": false, + "max": false, + "min": false, + "rightSide": false, + "show": true, + "sideWidth": null, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ + + ], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "repeat": null, + "seriesOverrides": [ + + ], + "spaceLength": 10, + "span": 6, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "prometheus_tsdb_wal_segment_current{cluster=~\"$cluster\", instance=~\"$instance\"}", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{`{{`}}cluster{{`}}`}}:{{`{{`}}instance{{`}}`}}", + "refId": "A" + } + ], + "thresholds": [ + + ], + "timeFrom": null, + "timeShift": null, + "title": "TSDB Current Segment", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ + + ] + }, + "yaxes": [ + { + "format": "none", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + }, + { + "aliasColors": { + + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "fillGradient": 0, + "gridPos": { + + }, + "id": 12, + "legend": { + "alignAsTable": false, + "avg": false, + "current": false, + "max": false, + "min": false, + "rightSide": false, + "show": true, + "sideWidth": null, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ + + ], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "repeat": null, + "seriesOverrides": [ + + ], + "spaceLength": 10, + "span": 6, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "prometheus_wal_watcher_current_segment{cluster=~\"$cluster\", instance=~\"$instance\"}", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{`{{`}}cluster{{`}}`}}:{{`{{`}}instance{{`}}`}} {{`{{`}}consumer{{`}}`}}", + "refId": "A" + } + ], + "thresholds": [ + + ], + "timeFrom": null, + "timeShift": null, + "title": "Remote Write Current Segment", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ + + ] + }, + "yaxes": [ + { + "format": "none", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Segments", + "titleSize": "h6", + "type": "row" + }, + { + "collapse": false, + "collapsed": false, + "panels": [ + { + "aliasColors": { + + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "fillGradient": 0, + "gridPos": { + + }, + "id": 13, + "legend": { + "alignAsTable": false, + "avg": false, + "current": false, + "max": false, + "min": false, + "rightSide": false, + "show": true, + "sideWidth": null, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ + + ], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "repeat": null, + "seriesOverrides": [ + + ], + "spaceLength": 10, + "span": 3, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "rate(prometheus_remote_storage_dropped_samples_total{cluster=~\"$cluster\", instance=~\"$instance\"}[5m]) or rate(prometheus_remote_storage_samples_dropped_total{cluster=~\"$cluster\", instance=~\"$instance\"}[5m])", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{`{{`}}cluster{{`}}`}}:{{`{{`}}instance{{`}}`}} {{`{{`}}remote_name{{`}}`}}:{{`{{`}}url{{`}}`}}", + "refId": "A" + } + ], + "thresholds": [ + + ], + "timeFrom": null, + "timeShift": null, + "title": "Dropped Samples", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ + + ] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + }, + { + "aliasColors": { + + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "fillGradient": 0, + "gridPos": { + + }, + "id": 14, + "legend": { + "alignAsTable": false, + "avg": false, + "current": false, + "max": false, + "min": false, + "rightSide": false, + "show": true, + "sideWidth": null, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ + + ], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "repeat": null, + "seriesOverrides": [ + + ], + "spaceLength": 10, + "span": 3, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "rate(prometheus_remote_storage_failed_samples_total{cluster=~\"$cluster\", instance=~\"$instance\"}[5m]) or rate(prometheus_remote_storage_samples_failed_total{cluster=~\"$cluster\", instance=~\"$instance\"}[5m])", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{`{{`}}cluster{{`}}`}}:{{`{{`}}instance{{`}}`}} {{`{{`}}remote_name{{`}}`}}:{{`{{`}}url{{`}}`}}", + "refId": "A" + } + ], + "thresholds": [ + + ], + "timeFrom": null, + "timeShift": null, + "title": "Failed Samples", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ + + ] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + }, + { + "aliasColors": { + + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "fillGradient": 0, + "gridPos": { + + }, + "id": 15, + "legend": { + "alignAsTable": false, + "avg": false, + "current": false, + "max": false, + "min": false, + "rightSide": false, + "show": true, + "sideWidth": null, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ + + ], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "repeat": null, + "seriesOverrides": [ + + ], + "spaceLength": 10, + "span": 3, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "rate(prometheus_remote_storage_retried_samples_total{cluster=~\"$cluster\", instance=~\"$instance\"}[5m]) or rate(prometheus_remote_storage_samples_retried_total{cluster=~\"$cluster\", instance=~\"$instance\"}[5m])", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{`{{`}}cluster{{`}}`}}:{{`{{`}}instance{{`}}`}} {{`{{`}}remote_name{{`}}`}}:{{`{{`}}url{{`}}`}}", + "refId": "A" + } + ], + "thresholds": [ + + ], + "timeFrom": null, + "timeShift": null, + "title": "Retried Samples", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ + + ] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + }, + { + "aliasColors": { + + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "fillGradient": 0, + "gridPos": { + + }, + "id": 16, + "legend": { + "alignAsTable": false, + "avg": false, + "current": false, + "max": false, + "min": false, + "rightSide": false, + "show": true, + "sideWidth": null, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ + + ], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "repeat": null, + "seriesOverrides": [ + + ], + "spaceLength": 10, + "span": 3, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "rate(prometheus_remote_storage_enqueue_retries_total{cluster=~\"$cluster\", instance=~\"$instance\"}[5m])", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{`{{`}}cluster{{`}}`}}:{{`{{`}}instance{{`}}`}} {{`{{`}}remote_name{{`}}`}}:{{`{{`}}url{{`}}`}}", + "refId": "A" + } + ], + "thresholds": [ + + ], + "timeFrom": null, + "timeShift": null, + "title": "Enqueue Retries", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ + + ] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Misc. Rates", + "titleSize": "h6", + "type": "row" + } + ], + "schemaVersion": 14, + "style": "dark", + "tags": [ + "prometheus-mixin" + ], + "templating": { + "list": [ + { + "current": { + "text": "Prometheus", + "value": "Prometheus" + }, + "hide": 0, + "label": "Data Source", + "name": "datasource", + "options": [ + + ], + "query": "prometheus", + "refresh": 1, + "regex": "", + "type": "datasource" + }, + { + "allValue": null, + "current": { + "text": { + "selected": true, + "text": "All", + "value": "$__all" + }, + "value": { + "selected": true, + "text": "All", + "value": "$__all" + } + }, + "datasource": "$datasource", + "hide": {{ if .Values.grafana.sidecar.dashboards.multicluster.global.enabled }}0{{ else }}2{{ end }}, + "includeAll": true, + "label": null, + "multi": false, + "name": "cluster", + "options": [ + + ], + "query": "label_values(kube_pod_container_info{image=~\".*prometheus.*\"}, cluster)", + "refresh": 2, + "regex": "", + "sort": 0, + "tagValuesQuery": "", + "tags": [ + + ], + "tagsQuery": "", + "type": "query", + "useTags": false + }, + { + "allValue": null, + "current": { + "text": { + "selected": true, + "text": "All", + "value": "$__all" + }, + "value": { + "selected": true, + "text": "All", + "value": "$__all" + } + }, + "datasource": "$datasource", + "hide": 0, + "includeAll": true, + "label": null, + "multi": false, + "name": "instance", + "options": [ + + ], + "query": "label_values(prometheus_build_info{cluster=~\"$cluster\"}, instance)", + "refresh": 2, + "regex": "", + "sort": 0, + "tagValuesQuery": "", + "tags": [ + + ], + "tagsQuery": "", + "type": "query", + "useTags": false + }, + { + "allValue": null, + "current": { + + }, + "datasource": "$datasource", + "hide": 0, + "includeAll": true, + "label": null, + "multi": false, + "name": "url", + "options": [ + + ], + "query": "label_values(prometheus_remote_storage_shards{cluster=~\"$cluster\", instance=~\"$instance\"}, url)", + "refresh": 2, + "regex": "", + "sort": 0, + "tagValuesQuery": "", + "tags": [ + + ], + "tagsQuery": "", + "type": "query", + "useTags": false + } + ] + }, + "time": { + "from": "now-6h", + "to": "now" + }, + "timepicker": { + "refresh_intervals": [ + "5s", + "10s", + "30s", + "1m", + "5m", + "15m", + "30m", + "1h", + "2h", + "1d" + ], + "time_options": [ + "5m", + "15m", + "1h", + "6h", + "12h", + "24h", + "2d", + "7d", + "30d" + ] + }, + "timezone": "{{ .Values.grafana.defaultDashboardsTimezone }}", + "title": "Prometheus / Remote Write", + "version": 0 + } +{{- end }} \ No newline at end of file diff --git a/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/templates/grafana/dashboards-1.14/prometheus.yaml b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/templates/grafana/dashboards-1.14/prometheus.yaml new file mode 100644 index 0000000000..27f7c44e2c --- /dev/null +++ b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/templates/grafana/dashboards-1.14/prometheus.yaml @@ -0,0 +1,1235 @@ +{{- /* +Generated from 'prometheus' from https://raw.githubusercontent.com/prometheus-operator/kube-prometheus/a8ba97a150c75be42010c75d10b720c55e182f1a/manifests/grafana-dashboardDefinitions.yaml +Do not change in-place! In order to change this file first read following link: +https://github.com/prometheus-community/helm-charts/tree/main/charts/kube-prometheus-stack/hack +*/ -}} +{{- $kubeTargetVersion := default .Capabilities.KubeVersion.GitVersion .Values.kubeTargetVersionOverride }} +{{- if and (or .Values.grafana.enabled .Values.grafana.forceDeployDashboards) (semverCompare ">=1.14.0-0" $kubeTargetVersion) (semverCompare "<9.9.9-9" $kubeTargetVersion) .Values.grafana.defaultDashboardsEnabled }} +apiVersion: v1 +kind: ConfigMap +metadata: + namespace: {{ .Values.grafana.defaultDashboards.namespace }} + name: {{ printf "%s-%s" (include "kube-prometheus-stack.fullname" $) "prometheus" | trunc 63 | trimSuffix "-" }} + annotations: +{{ toYaml .Values.grafana.sidecar.dashboards.annotations | indent 4 }} + labels: + {{- if $.Values.grafana.sidecar.dashboards.label }} + {{ $.Values.grafana.sidecar.dashboards.label }}: {{ ternary $.Values.grafana.sidecar.dashboards.labelValue "1" (not (empty $.Values.grafana.sidecar.dashboards.labelValue)) | quote }} + {{- end }} + app: {{ template "kube-prometheus-stack.name" $ }}-grafana +{{ include "kube-prometheus-stack.labels" $ | indent 4 }} +data: + prometheus.json: |- + { + "annotations": { + "list": [ + + ] + }, + "editable": true, + "gnetId": null, + "graphTooltip": 0, + "hideControls": false, + "links": [ + + ], + "refresh": "60s", + "rows": [ + { + "collapse": false, + "height": "250px", + "panels": [ + { + "aliasColors": { + + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "id": 1, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ + + ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ + + ], + "spaceLength": 10, + "span": 12, + "stack": false, + "steppedLine": false, + "styles": [ + { + "alias": "Time", + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "pattern": "Time", + "type": "hidden" + }, + { + "alias": "Count", + "colorMode": null, + "colors": [ + + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "link": false, + "linkTargetBlank": false, + "linkTooltip": "Drill down", + "linkUrl": "", + "pattern": "Value #A", + "thresholds": [ + + ], + "type": "hidden", + "unit": "short" + }, + { + "alias": "Uptime", + "colorMode": null, + "colors": [ + + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "link": false, + "linkTargetBlank": false, + "linkTooltip": "Drill down", + "linkUrl": "", + "pattern": "Value #B", + "thresholds": [ + + ], + "type": "number", + "unit": "s" + }, + { + "alias": "Instance", + "colorMode": null, + "colors": [ + + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "link": false, + "linkTargetBlank": false, + "linkTooltip": "Drill down", + "linkUrl": "", + "pattern": "instance", + "thresholds": [ + + ], + "type": "number", + "unit": "short" + }, + { + "alias": "Job", + "colorMode": null, + "colors": [ + + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "link": false, + "linkTargetBlank": false, + "linkTooltip": "Drill down", + "linkUrl": "", + "pattern": "job", + "thresholds": [ + + ], + "type": "number", + "unit": "short" + }, + { + "alias": "Version", + "colorMode": null, + "colors": [ + + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "link": false, + "linkTargetBlank": false, + "linkTooltip": "Drill down", + "linkUrl": "", + "pattern": "version", + "thresholds": [ + + ], + "type": "number", + "unit": "short" + }, + { + "alias": "", + "colorMode": null, + "colors": [ + + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "pattern": "/.*/", + "thresholds": [ + + ], + "type": "string", + "unit": "short" + } + ], + "targets": [ + { + "expr": "count by (job, instance, version) (prometheus_build_info{job=~\"$job\", instance=~\"$instance\"})", + "format": "table", + "instant": true, + "intervalFactor": 2, + "legendFormat": "", + "refId": "A", + "step": 10 + }, + { + "expr": "max by (job, instance) (time() - process_start_time_seconds{job=~\"$job\", instance=~\"$instance\"})", + "format": "table", + "instant": true, + "intervalFactor": 2, + "legendFormat": "", + "refId": "B", + "step": 10 + } + ], + "thresholds": [ + + ], + "timeFrom": null, + "timeShift": null, + "title": "Prometheus Stats", + "tooltip": { + "shared": true, + "sort": 2, + "value_type": "individual" + }, + "transform": "table", + "type": "table", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ + + ] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Prometheus Stats", + "titleSize": "h6" + }, + { + "collapse": false, + "height": "250px", + "panels": [ + { + "aliasColors": { + + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "id": 2, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ + + ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ + + ], + "spaceLength": 10, + "span": 6, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum(rate(prometheus_target_sync_length_seconds_sum{job=~\"$job\",instance=~\"$instance\"}[5m])) by (scrape_job) * 1e3", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{`{{`}}scrape_job{{`}}`}}", + "legendLink": null, + "step": 10 + } + ], + "thresholds": [ + + ], + "timeFrom": null, + "timeShift": null, + "title": "Target Sync", + "tooltip": { + "shared": true, + "sort": 2, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ + + ] + }, + "yaxes": [ + { + "format": "ms", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + }, + { + "aliasColors": { + + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 10, + "id": 3, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 0, + "links": [ + + ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ + + ], + "spaceLength": 10, + "span": 6, + "stack": true, + "steppedLine": false, + "targets": [ + { + "expr": "sum(prometheus_sd_discovered_targets{job=~\"$job\",instance=~\"$instance\"})", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "Targets", + "legendLink": null, + "step": 10 + } + ], + "thresholds": [ + + ], + "timeFrom": null, + "timeShift": null, + "title": "Targets", + "tooltip": { + "shared": true, + "sort": 2, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ + + ] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Discovery", + "titleSize": "h6" + }, + { + "collapse": false, + "height": "250px", + "panels": [ + { + "aliasColors": { + + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "id": 4, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ + + ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ + + ], + "spaceLength": 10, + "span": 4, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "rate(prometheus_target_interval_length_seconds_sum{job=~\"$job\",instance=~\"$instance\"}[5m]) / rate(prometheus_target_interval_length_seconds_count{job=~\"$job\",instance=~\"$instance\"}[5m]) * 1e3", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{`{{`}}interval{{`}}`}} configured", + "legendLink": null, + "step": 10 + } + ], + "thresholds": [ + + ], + "timeFrom": null, + "timeShift": null, + "title": "Average Scrape Interval Duration", + "tooltip": { + "shared": true, + "sort": 2, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ + + ] + }, + "yaxes": [ + { + "format": "ms", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + }, + { + "aliasColors": { + + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 10, + "id": 5, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 0, + "links": [ + + ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ + + ], + "spaceLength": 10, + "span": 4, + "stack": true, + "steppedLine": false, + "targets": [ + { + "expr": "sum by (job) (rate(prometheus_target_scrapes_exceeded_body_size_limit_total[1m]))", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "exceeded body size limit: {{`{{`}}job{{`}}`}}", + "legendLink": null, + "step": 10 + }, + { + "expr": "sum by (job) (rate(prometheus_target_scrapes_exceeded_sample_limit_total[1m]))", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "exceeded sample limit: {{`{{`}}job{{`}}`}}", + "legendLink": null, + "step": 10 + }, + { + "expr": "sum by (job) (rate(prometheus_target_scrapes_sample_duplicate_timestamp_total[1m]))", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "duplicate timestamp: {{`{{`}}job{{`}}`}}", + "legendLink": null, + "step": 10 + }, + { + "expr": "sum by (job) (rate(prometheus_target_scrapes_sample_out_of_bounds_total[1m]))", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "out of bounds: {{`{{`}}job{{`}}`}}", + "legendLink": null, + "step": 10 + }, + { + "expr": "sum by (job) (rate(prometheus_target_scrapes_sample_out_of_order_total[1m]))", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "out of order: {{`{{`}}job{{`}}`}}", + "legendLink": null, + "step": 10 + } + ], + "thresholds": [ + + ], + "timeFrom": null, + "timeShift": null, + "title": "Scrape failures", + "tooltip": { + "shared": true, + "sort": 2, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ + + ] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + }, + { + "aliasColors": { + + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 10, + "id": 6, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 0, + "links": [ + + ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ + + ], + "spaceLength": 10, + "span": 4, + "stack": true, + "steppedLine": false, + "targets": [ + { + "expr": "rate(prometheus_tsdb_head_samples_appended_total{job=~\"$job\",instance=~\"$instance\"}[5m])", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{`{{`}}job{{`}}`}} {{`{{`}}instance{{`}}`}}", + "legendLink": null, + "step": 10 + } + ], + "thresholds": [ + + ], + "timeFrom": null, + "timeShift": null, + "title": "Appended Samples", + "tooltip": { + "shared": true, + "sort": 2, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ + + ] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Retrieval", + "titleSize": "h6" + }, + { + "collapse": false, + "height": "250px", + "panels": [ + { + "aliasColors": { + + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 10, + "id": 7, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 0, + "links": [ + + ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ + + ], + "spaceLength": 10, + "span": 6, + "stack": true, + "steppedLine": false, + "targets": [ + { + "expr": "prometheus_tsdb_head_series{job=~\"$job\",instance=~\"$instance\"}", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{`{{`}}job{{`}}`}} {{`{{`}}instance{{`}}`}} head series", + "legendLink": null, + "step": 10 + } + ], + "thresholds": [ + + ], + "timeFrom": null, + "timeShift": null, + "title": "Head Series", + "tooltip": { + "shared": true, + "sort": 2, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ + + ] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + }, + { + "aliasColors": { + + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 10, + "id": 8, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 0, + "links": [ + + ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ + + ], + "spaceLength": 10, + "span": 6, + "stack": true, + "steppedLine": false, + "targets": [ + { + "expr": "prometheus_tsdb_head_chunks{job=~\"$job\",instance=~\"$instance\"}", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{`{{`}}job{{`}}`}} {{`{{`}}instance{{`}}`}} head chunks", + "legendLink": null, + "step": 10 + } + ], + "thresholds": [ + + ], + "timeFrom": null, + "timeShift": null, + "title": "Head Chunks", + "tooltip": { + "shared": true, + "sort": 2, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ + + ] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Storage", + "titleSize": "h6" + }, + { + "collapse": false, + "height": "250px", + "panels": [ + { + "aliasColors": { + + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 10, + "id": 9, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 0, + "links": [ + + ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ + + ], + "spaceLength": 10, + "span": 6, + "stack": true, + "steppedLine": false, + "targets": [ + { + "expr": "rate(prometheus_engine_query_duration_seconds_count{job=~\"$job\",instance=~\"$instance\",slice=\"inner_eval\"}[5m])", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{`{{`}}job{{`}}`}} {{`{{`}}instance{{`}}`}}", + "legendLink": null, + "step": 10 + } + ], + "thresholds": [ + + ], + "timeFrom": null, + "timeShift": null, + "title": "Query Rate", + "tooltip": { + "shared": true, + "sort": 2, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ + + ] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + }, + { + "aliasColors": { + + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 10, + "id": 10, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 0, + "links": [ + + ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ + + ], + "spaceLength": 10, + "span": 6, + "stack": true, + "steppedLine": false, + "targets": [ + { + "expr": "max by (slice) (prometheus_engine_query_duration_seconds{quantile=\"0.9\",job=~\"$job\",instance=~\"$instance\"}) * 1e3", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{`{{`}}slice{{`}}`}}", + "legendLink": null, + "step": 10 + } + ], + "thresholds": [ + + ], + "timeFrom": null, + "timeShift": null, + "title": "Stage Duration", + "tooltip": { + "shared": true, + "sort": 2, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ + + ] + }, + "yaxes": [ + { + "format": "ms", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Query", + "titleSize": "h6" + } + ], + "schemaVersion": 14, + "style": "dark", + "tags": [ + "prometheus-mixin" + ], + "templating": { + "list": [ + { + "current": { + "text": "Prometheus", + "value": "Prometheus" + }, + "hide": 0, + "label": "Data Source", + "name": "datasource", + "options": [ + + ], + "query": "prometheus", + "refresh": 1, + "regex": "", + "type": "datasource" + }, + { + "allValue": ".+", + "current": { + "selected": true, + "text": "All", + "value": "$__all" + }, + "datasource": "$datasource", + "hide": 0, + "includeAll": true, + "label": "job", + "multi": true, + "name": "job", + "options": [ + + ], + "query": "label_values(prometheus_build_info{job=\"prometheus-k8s\",namespace=\"monitoring\"}, job)", + "refresh": 1, + "regex": "", + "sort": 2, + "tagValuesQuery": "", + "tags": [ + + ], + "tagsQuery": "", + "type": "query", + "useTags": false + }, + { + "allValue": ".+", + "current": { + "selected": true, + "text": "All", + "value": "$__all" + }, + "datasource": "$datasource", + "hide": 0, + "includeAll": true, + "label": "instance", + "multi": true, + "name": "instance", + "options": [ + + ], + "query": "label_values(prometheus_build_info{job=~\"$job\"}, instance)", + "refresh": 1, + "regex": "", + "sort": 2, + "tagValuesQuery": "", + "tags": [ + + ], + "tagsQuery": "", + "type": "query", + "useTags": false + } + ] + }, + "time": { + "from": "now-1h", + "to": "now" + }, + "timepicker": { + "refresh_intervals": [ + "5s", + "10s", + "30s", + "1m", + "5m", + "15m", + "30m", + "1h", + "2h", + "1d" + ], + "time_options": [ + "5m", + "15m", + "1h", + "6h", + "12h", + "24h", + "2d", + "7d", + "30d" + ] + }, + "timezone": "{{ .Values.grafana.defaultDashboardsTimezone }}", + "title": "Prometheus / Overview", + "uid": "", + "version": 0 + } +{{- end }} \ No newline at end of file diff --git a/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/templates/grafana/dashboards-1.14/proxy.yaml b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/templates/grafana/dashboards-1.14/proxy.yaml new file mode 100644 index 0000000000..410812451e --- /dev/null +++ b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/templates/grafana/dashboards-1.14/proxy.yaml @@ -0,0 +1,1276 @@ +{{- /* +Generated from 'proxy' from https://raw.githubusercontent.com/prometheus-operator/kube-prometheus/a8ba97a150c75be42010c75d10b720c55e182f1a/manifests/grafana-dashboardDefinitions.yaml +Do not change in-place! In order to change this file first read following link: +https://github.com/prometheus-community/helm-charts/tree/main/charts/kube-prometheus-stack/hack +*/ -}} +{{- $kubeTargetVersion := default .Capabilities.KubeVersion.GitVersion .Values.kubeTargetVersionOverride }} +{{- if and (or .Values.grafana.enabled .Values.grafana.forceDeployDashboards) (semverCompare ">=1.14.0-0" $kubeTargetVersion) (semverCompare "<9.9.9-9" $kubeTargetVersion) .Values.grafana.defaultDashboardsEnabled }} +{{- if (include "exporter.kubeProxy.enabled" .)}} +apiVersion: v1 +kind: ConfigMap +metadata: + namespace: {{ .Values.grafana.defaultDashboards.namespace }} + name: {{ printf "%s-%s" (include "kube-prometheus-stack.fullname" $) "proxy" | trunc 63 | trimSuffix "-" }} + annotations: +{{ toYaml .Values.grafana.sidecar.dashboards.annotations | indent 4 }} + labels: + {{- if $.Values.grafana.sidecar.dashboards.label }} + {{ $.Values.grafana.sidecar.dashboards.label }}: {{ ternary $.Values.grafana.sidecar.dashboards.labelValue "1" (not (empty $.Values.grafana.sidecar.dashboards.labelValue)) | quote }} + {{- end }} + app: {{ template "kube-prometheus-stack.name" $ }}-grafana +{{ include "kube-prometheus-stack.labels" $ | indent 4 }} +data: + proxy.json: |- + { + "__inputs": [ + + ], + "__requires": [ + + ], + "annotations": { + "list": [ + + ] + }, + "editable": false, + "gnetId": null, + "graphTooltip": 0, + "hideControls": false, + "id": null, + "links": [ + + ], + "refresh": "10s", + "rows": [ + { + "collapse": false, + "collapsed": false, + "panels": [ + { + "cacheTimeout": null, + "colorBackground": false, + "colorValue": false, + "colors": [ + "#299c46", + "rgba(237, 129, 40, 0.89)", + "#d44a3a" + ], + "datasource": "$datasource", + "format": "none", + "gauge": { + "maxValue": 100, + "minValue": 0, + "show": false, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "gridPos": { + + }, + "id": 2, + "interval": "1m", + "legend": { + "alignAsTable": true, + "rightSide": true + }, + "links": [ + + ], + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": 100, + "nullPointMode": "connected", + "nullText": null, + "postfix": "", + "postfixFontSize": "50%", + "prefix": "", + "prefixFontSize": "50%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], + "span": 2, + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": false, + "lineColor": "rgb(31, 120, 193)", + "show": false + }, + "tableColumn": "", + "targets": [ + { + {{- if .Values.k3sServer.enabled }} + "expr": "sum(up{cluster=\"$cluster\", job=\"{{ include "exporter.kubeProxy.jobName" . }}\", metrics_path=\"/metrics\"})", + {{- else }} + "expr": "sum(up{cluster=\"$cluster\", job=\"{{ include "exporter.kubeProxy.jobName" . }}\"})", + {{- end }} + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "", + "refId": "A" + } + ], + "thresholds": "", + "title": "Up", + "tooltip": { + "shared": false + }, + "type": "singlestat", + "valueFontSize": "80%", + "valueMaps": [ + { + "op": "=", + "text": "N/A", + "value": "null" + } + ], + "valueName": "min" + }, + { + "aliasColors": { + + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "fillGradient": 0, + "gridPos": { + + }, + "id": 3, + "interval": "1m", + "legend": { + "alignAsTable": true, + "avg": false, + "current": false, + "max": false, + "min": false, + "rightSide": true, + "show": true, + "sideWidth": null, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ + + ], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "repeat": null, + "seriesOverrides": [ + + ], + "spaceLength": 10, + "span": 5, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum(rate(kubeproxy_sync_proxy_rules_duration_seconds_count{cluster=\"$cluster\", job=\"{{ include "exporter.kubeProxy.jobName" . }}\", instance=~\"$instance\"}[$__rate_interval]))", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "rate", + "refId": "A" + } + ], + "thresholds": [ + + ], + "timeFrom": null, + "timeShift": null, + "title": "Rules Sync Rate", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ + + ] + }, + "yaxes": [ + { + "format": "ops", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "ops", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + } + ] + }, + { + "aliasColors": { + + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "fillGradient": 0, + "gridPos": { + + }, + "id": 4, + "interval": "1m", + "legend": { + "alignAsTable": true, + "avg": false, + "current": true, + "max": false, + "min": false, + "rightSide": true, + "show": true, + "sideWidth": null, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "links": [ + + ], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "repeat": null, + "seriesOverrides": [ + + ], + "spaceLength": 10, + "span": 5, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "histogram_quantile(0.99,rate(kubeproxy_sync_proxy_rules_duration_seconds_bucket{cluster=\"$cluster\", job=\"{{ include "exporter.kubeProxy.jobName" . }}\", instance=~\"$instance\"}[$__rate_interval]))", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{`{{`}}instance{{`}}`}}", + "refId": "A" + } + ], + "thresholds": [ + + ], + "timeFrom": null, + "timeShift": null, + "title": "Rule Sync Latency 99th Quantile", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ + + ] + }, + "yaxes": [ + { + "format": "s", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "s", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": false, + "title": "Dashboard Row", + "titleSize": "h6", + "type": "row" + }, + { + "collapse": false, + "collapsed": false, + "panels": [ + { + "aliasColors": { + + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "fillGradient": 0, + "gridPos": { + + }, + "id": 5, + "interval": "1m", + "legend": { + "alignAsTable": true, + "avg": false, + "current": false, + "max": false, + "min": false, + "rightSide": true, + "show": true, + "sideWidth": null, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ + + ], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "repeat": null, + "seriesOverrides": [ + + ], + "spaceLength": 10, + "span": 6, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum(rate(kubeproxy_network_programming_duration_seconds_count{cluster=\"$cluster\", job=\"{{ include "exporter.kubeProxy.jobName" . }}\", instance=~\"$instance\"}[$__rate_interval]))", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "rate", + "refId": "A" + } + ], + "thresholds": [ + + ], + "timeFrom": null, + "timeShift": null, + "title": "Network Programming Rate", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ + + ] + }, + "yaxes": [ + { + "format": "ops", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "ops", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + } + ] + }, + { + "aliasColors": { + + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "fillGradient": 0, + "gridPos": { + + }, + "id": 6, + "interval": "1m", + "legend": { + "alignAsTable": true, + "avg": false, + "current": true, + "max": false, + "min": false, + "rightSide": true, + "show": true, + "sideWidth": null, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "links": [ + + ], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "repeat": null, + "seriesOverrides": [ + + ], + "spaceLength": 10, + "span": 6, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "histogram_quantile(0.99, sum(rate(kubeproxy_network_programming_duration_seconds_bucket{cluster=\"$cluster\", job=\"{{ include "exporter.kubeProxy.jobName" . }}\", instance=~\"$instance\"}[$__rate_interval])) by (instance, le))", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{`{{`}}instance{{`}}`}}", + "refId": "A" + } + ], + "thresholds": [ + + ], + "timeFrom": null, + "timeShift": null, + "title": "Network Programming Latency 99th Quantile", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ + + ] + }, + "yaxes": [ + { + "format": "s", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "s", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": false, + "title": "Dashboard Row", + "titleSize": "h6", + "type": "row" + }, + { + "collapse": false, + "collapsed": false, + "panels": [ + { + "aliasColors": { + + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "fillGradient": 0, + "gridPos": { + + }, + "id": 7, + "interval": "1m", + "legend": { + "alignAsTable": true, + "avg": false, + "current": false, + "max": false, + "min": false, + "rightSide": true, + "show": true, + "sideWidth": null, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ + + ], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "repeat": null, + "seriesOverrides": [ + + ], + "spaceLength": 10, + "span": 4, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum(rate(rest_client_requests_total{cluster=\"$cluster\", job=\"{{ include "exporter.kubeProxy.jobName" . }}\", instance=~\"$instance\",code=~\"2..\"}[$__rate_interval]))", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "2xx", + "refId": "A" + }, + { + "expr": "sum(rate(rest_client_requests_total{cluster=\"$cluster\", job=\"{{ include "exporter.kubeProxy.jobName" . }}\", instance=~\"$instance\",code=~\"3..\"}[$__rate_interval]))", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "3xx", + "refId": "B" + }, + { + "expr": "sum(rate(rest_client_requests_total{cluster=\"$cluster\", job=\"{{ include "exporter.kubeProxy.jobName" . }}\", instance=~\"$instance\",code=~\"4..\"}[$__rate_interval]))", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "4xx", + "refId": "C" + }, + { + "expr": "sum(rate(rest_client_requests_total{cluster=\"$cluster\", job=\"{{ include "exporter.kubeProxy.jobName" . }}\", instance=~\"$instance\",code=~\"5..\"}[$__rate_interval]))", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "5xx", + "refId": "D" + } + ], + "thresholds": [ + + ], + "timeFrom": null, + "timeShift": null, + "title": "Kube API Request Rate", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ + + ] + }, + "yaxes": [ + { + "format": "ops", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "ops", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + }, + { + "aliasColors": { + + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "fillGradient": 0, + "gridPos": { + + }, + "id": 8, + "interval": "1m", + "legend": { + "alignAsTable": true, + "avg": false, + "current": false, + "max": false, + "min": false, + "rightSide": true, + "show": true, + "sideWidth": null, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ + + ], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "repeat": null, + "seriesOverrides": [ + + ], + "spaceLength": 10, + "span": 8, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "histogram_quantile(0.99, sum(rate(rest_client_request_duration_seconds_bucket{cluster=\"$cluster\", job=\"{{ include "exporter.kubeProxy.jobName" . }}\",instance=~\"$instance\",verb=\"POST\"}[$__rate_interval])) by (verb, url, le))", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{`{{`}}verb{{`}}`}} {{`{{`}}url{{`}}`}}", + "refId": "A" + } + ], + "thresholds": [ + + ], + "timeFrom": null, + "timeShift": null, + "title": "Post Request Latency 99th Quantile", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ + + ] + }, + "yaxes": [ + { + "format": "s", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "s", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": false, + "title": "Dashboard Row", + "titleSize": "h6", + "type": "row" + }, + { + "collapse": false, + "collapsed": false, + "panels": [ + { + "aliasColors": { + + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "fillGradient": 0, + "gridPos": { + + }, + "id": 9, + "interval": "1m", + "legend": { + "alignAsTable": true, + "avg": false, + "current": true, + "max": false, + "min": false, + "rightSide": true, + "show": true, + "sideWidth": null, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "links": [ + + ], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "repeat": null, + "seriesOverrides": [ + + ], + "spaceLength": 10, + "span": 12, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "histogram_quantile(0.99, sum(rate(rest_client_request_duration_seconds_bucket{cluster=\"$cluster\", job=\"{{ include "exporter.kubeProxy.jobName" . }}\", instance=~\"$instance\", verb=\"GET\"}[$__rate_interval])) by (verb, url, le))", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{`{{`}}verb{{`}}`}} {{`{{`}}url{{`}}`}}", + "refId": "A" + } + ], + "thresholds": [ + + ], + "timeFrom": null, + "timeShift": null, + "title": "Get Request Latency 99th Quantile", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ + + ] + }, + "yaxes": [ + { + "format": "s", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "s", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": false, + "title": "Dashboard Row", + "titleSize": "h6", + "type": "row" + }, + { + "collapse": false, + "collapsed": false, + "panels": [ + { + "aliasColors": { + + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "fillGradient": 0, + "gridPos": { + + }, + "id": 10, + "interval": "1m", + "legend": { + "alignAsTable": true, + "avg": false, + "current": false, + "max": false, + "min": false, + "rightSide": true, + "show": true, + "sideWidth": null, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ + + ], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "repeat": null, + "seriesOverrides": [ + + ], + "spaceLength": 10, + "span": 4, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "process_resident_memory_bytes{cluster=\"$cluster\", job=\"{{ include "exporter.kubeProxy.jobName" . }}\",instance=~\"$instance\"}", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{`{{`}}instance{{`}}`}}", + "refId": "A" + } + ], + "thresholds": [ + + ], + "timeFrom": null, + "timeShift": null, + "title": "Memory", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ + + ] + }, + "yaxes": [ + { + "format": "bytes", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "bytes", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + }, + { + "aliasColors": { + + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "fillGradient": 0, + "gridPos": { + + }, + "id": 11, + "interval": "1m", + "legend": { + "alignAsTable": true, + "avg": false, + "current": false, + "max": false, + "min": false, + "rightSide": true, + "show": true, + "sideWidth": null, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ + + ], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "repeat": null, + "seriesOverrides": [ + + ], + "spaceLength": 10, + "span": 4, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "rate(process_cpu_seconds_total{cluster=\"$cluster\", job=\"{{ include "exporter.kubeProxy.jobName" . }}\",instance=~\"$instance\"}[$__rate_interval])", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{`{{`}}instance{{`}}`}}", + "refId": "A" + } + ], + "thresholds": [ + + ], + "timeFrom": null, + "timeShift": null, + "title": "CPU usage", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ + + ] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + } + ] + }, + { + "aliasColors": { + + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "fillGradient": 0, + "gridPos": { + + }, + "id": 12, + "interval": "1m", + "legend": { + "alignAsTable": true, + "avg": false, + "current": false, + "max": false, + "min": false, + "rightSide": true, + "show": true, + "sideWidth": null, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ + + ], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "repeat": null, + "seriesOverrides": [ + + ], + "spaceLength": 10, + "span": 4, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "go_goroutines{cluster=\"$cluster\", job=\"{{ include "exporter.kubeProxy.jobName" . }}\",instance=~\"$instance\"}", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{`{{`}}instance{{`}}`}}", + "refId": "A" + } + ], + "thresholds": [ + + ], + "timeFrom": null, + "timeShift": null, + "title": "Goroutines", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ + + ] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": false, + "title": "Dashboard Row", + "titleSize": "h6", + "type": "row" + } + ], + "schemaVersion": 14, + "style": "dark", + "tags": [ + "kubernetes-mixin" + ], + "templating": { + "list": [ + { + "current": { + "text": "Prometheus", + "value": "Prometheus" + }, + "hide": 0, + "label": "Data Source", + "name": "datasource", + "options": [ + + ], + "query": "prometheus", + "refresh": 1, + "regex": "", + "type": "datasource" + }, + { + "allValue": null, + "current": { + + }, + "datasource": "$datasource", + "hide": {{ if .Values.grafana.sidecar.dashboards.multicluster.global.enabled }}0{{ else }}2{{ end }}, + "includeAll": false, + "label": "cluster", + "multi": false, + "name": "cluster", + "options": [ + + ], + "query": "label_values(up{job=\"{{ include "exporter.kubeProxy.jobName" . }}\"}, cluster)", + "refresh": 2, + "regex": "", + "sort": 1, + "tagValuesQuery": "", + "tags": [ + + ], + "tagsQuery": "", + "type": "query", + "useTags": false + }, + { + "allValue": null, + "current": { + + }, + "datasource": "$datasource", + "hide": 0, + "includeAll": true, + "label": null, + "multi": false, + "name": "instance", + "options": [ + + ], + "query": "label_values(up{job=\"{{ include "exporter.kubeProxy.jobName" . }}\", cluster=\"$cluster\", job=\"{{ include "exporter.kubeProxy.jobName" . }}\"}, instance)", + "refresh": 2, + "regex": "", + "sort": 1, + "tagValuesQuery": "", + "tags": [ + + ], + "tagsQuery": "", + "type": "query", + "useTags": false + } + ] + }, + "time": { + "from": "now-1h", + "to": "now" + }, + "timepicker": { + "refresh_intervals": [ + "5s", + "10s", + "30s", + "1m", + "5m", + "15m", + "30m", + "1h", + "2h", + "1d" + ], + "time_options": [ + "5m", + "15m", + "1h", + "6h", + "12h", + "24h", + "2d", + "7d", + "30d" + ] + }, + "timezone": "{{ .Values.grafana.defaultDashboardsTimezone }}", + "title": "Kubernetes / Proxy", + "uid": "632e265de029684c40b21cb76bca4f94", + "version": 0 + } +{{- end }}{{- end }} diff --git a/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/templates/grafana/dashboards-1.14/scheduler.yaml b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/templates/grafana/dashboards-1.14/scheduler.yaml new file mode 100644 index 0000000000..ee0cf08b2f --- /dev/null +++ b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/templates/grafana/dashboards-1.14/scheduler.yaml @@ -0,0 +1,1118 @@ +{{- /* +Generated from 'scheduler' from https://raw.githubusercontent.com/prometheus-operator/kube-prometheus/a8ba97a150c75be42010c75d10b720c55e182f1a/manifests/grafana-dashboardDefinitions.yaml +Do not change in-place! In order to change this file first read following link: +https://github.com/prometheus-community/helm-charts/tree/main/charts/kube-prometheus-stack/hack +*/ -}} +{{- $kubeTargetVersion := default .Capabilities.KubeVersion.GitVersion .Values.kubeTargetVersionOverride }} +{{- if and (or .Values.grafana.enabled .Values.grafana.forceDeployDashboards) (semverCompare ">=1.14.0-0" $kubeTargetVersion) (semverCompare "<9.9.9-9" $kubeTargetVersion) .Values.grafana.defaultDashboardsEnabled }} +{{- if (include "exporter.kubeScheduler.enabled" .)}} +apiVersion: v1 +kind: ConfigMap +metadata: + namespace: {{ .Values.grafana.defaultDashboards.namespace }} + name: {{ printf "%s-%s" (include "kube-prometheus-stack.fullname" $) "scheduler" | trunc 63 | trimSuffix "-" }} + annotations: +{{ toYaml .Values.grafana.sidecar.dashboards.annotations | indent 4 }} + labels: + {{- if $.Values.grafana.sidecar.dashboards.label }} + {{ $.Values.grafana.sidecar.dashboards.label }}: {{ ternary $.Values.grafana.sidecar.dashboards.labelValue "1" (not (empty $.Values.grafana.sidecar.dashboards.labelValue)) | quote }} + {{- end }} + app: {{ template "kube-prometheus-stack.name" $ }}-grafana +{{ include "kube-prometheus-stack.labels" $ | indent 4 }} +data: + scheduler.json: |- + { + "__inputs": [ + + ], + "__requires": [ + + ], + "annotations": { + "list": [ + + ] + }, + "editable": false, + "gnetId": null, + "graphTooltip": 0, + "hideControls": false, + "id": null, + "links": [ + + ], + "refresh": "10s", + "rows": [ + { + "collapse": false, + "collapsed": false, + "panels": [ + { + "cacheTimeout": null, + "colorBackground": false, + "colorValue": false, + "colors": [ + "#299c46", + "rgba(237, 129, 40, 0.89)", + "#d44a3a" + ], + "datasource": "$datasource", + "format": "none", + "gauge": { + "maxValue": 100, + "minValue": 0, + "show": false, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "gridPos": { + + }, + "id": 2, + "interval": "1m", + "legend": { + "alignAsTable": true, + "rightSide": true + }, + "links": [ + + ], + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": 100, + "nullPointMode": "connected", + "nullText": null, + "postfix": "", + "postfixFontSize": "50%", + "prefix": "", + "prefixFontSize": "50%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], + "span": 2, + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": false, + "lineColor": "rgb(31, 120, 193)", + "show": false + }, + "tableColumn": "", + "targets": [ + { + {{- if .Values.k3sServer.enabled }} + "expr": "sum(up{cluster=\"$cluster\", job=\"{{ include "exporter.kubeScheduler.jobName" . }}\", metrics_path=\"/metrics\"})", + {{- else }} + "expr": "sum(up{cluster=\"$cluster\", job=\"{{ include "exporter.kubeScheduler.jobName" . }}\"})", + {{- end }} + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "", + "refId": "A" + } + ], + "thresholds": "", + "title": "Up", + "tooltip": { + "shared": false + }, + "type": "singlestat", + "valueFontSize": "80%", + "valueMaps": [ + { + "op": "=", + "text": "N/A", + "value": "null" + } + ], + "valueName": "min" + }, + { + "aliasColors": { + + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "fillGradient": 0, + "gridPos": { + + }, + "id": 3, + "interval": "1m", + "legend": { + "alignAsTable": true, + "avg": false, + "current": true, + "max": false, + "min": false, + "rightSide": true, + "show": true, + "sideWidth": null, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "links": [ + + ], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "repeat": null, + "seriesOverrides": [ + + ], + "spaceLength": 10, + "span": 5, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum(rate(scheduler_e2e_scheduling_duration_seconds_count{cluster=\"$cluster\", job=\"{{ include "exporter.kubeScheduler.jobName" . }}\", instance=~\"$instance\"}[$__rate_interval])) by (cluster, instance)", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{`{{`}}cluster{{`}}`}} {{`{{`}}instance{{`}}`}} e2e", + "refId": "A" + }, + { + "expr": "sum(rate(scheduler_binding_duration_seconds_count{cluster=\"$cluster\", job=\"{{ include "exporter.kubeScheduler.jobName" . }}\", instance=~\"$instance\"}[$__rate_interval])) by (cluster, instance)", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{`{{`}}cluster{{`}}`}} {{`{{`}}instance{{`}}`}} binding", + "refId": "B" + }, + { + "expr": "sum(rate(scheduler_scheduling_algorithm_duration_seconds_count{cluster=\"$cluster\", job=\"{{ include "exporter.kubeScheduler.jobName" . }}\", instance=~\"$instance\"}[$__rate_interval])) by (cluster, instance)", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{`{{`}}cluster{{`}}`}} {{`{{`}}instance{{`}}`}} scheduling algorithm", + "refId": "C" + }, + { + "expr": "sum(rate(scheduler_volume_scheduling_duration_seconds_count{cluster=\"$cluster\", job=\"{{ include "exporter.kubeScheduler.jobName" . }}\", instance=~\"$instance\"}[$__rate_interval])) by (cluster, instance)", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{`{{`}}cluster{{`}}`}} {{`{{`}}instance{{`}}`}} volume", + "refId": "D" + } + ], + "thresholds": [ + + ], + "timeFrom": null, + "timeShift": null, + "title": "Scheduling Rate", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ + + ] + }, + "yaxes": [ + { + "format": "ops", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "ops", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + } + ] + }, + { + "aliasColors": { + + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "fillGradient": 0, + "gridPos": { + + }, + "id": 4, + "interval": "1m", + "legend": { + "alignAsTable": true, + "avg": false, + "current": true, + "max": false, + "min": false, + "rightSide": true, + "show": true, + "sideWidth": null, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "links": [ + + ], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "repeat": null, + "seriesOverrides": [ + + ], + "spaceLength": 10, + "span": 5, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "histogram_quantile(0.99, sum(rate(scheduler_e2e_scheduling_duration_seconds_bucket{cluster=\"$cluster\", job=\"{{ include "exporter.kubeScheduler.jobName" . }}\",instance=~\"$instance\"}[$__rate_interval])) by (cluster, instance, le))", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{`{{`}}cluster{{`}}`}} {{`{{`}}instance{{`}}`}} e2e", + "refId": "A" + }, + { + "expr": "histogram_quantile(0.99, sum(rate(scheduler_binding_duration_seconds_bucket{cluster=\"$cluster\", job=\"{{ include "exporter.kubeScheduler.jobName" . }}\",instance=~\"$instance\"}[$__rate_interval])) by (cluster, instance, le))", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{`{{`}}cluster{{`}}`}} {{`{{`}}instance{{`}}`}} binding", + "refId": "B" + }, + { + "expr": "histogram_quantile(0.99, sum(rate(scheduler_scheduling_algorithm_duration_seconds_bucket{cluster=\"$cluster\", job=\"{{ include "exporter.kubeScheduler.jobName" . }}\",instance=~\"$instance\"}[$__rate_interval])) by (cluster, instance, le))", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{`{{`}}cluster{{`}}`}} {{`{{`}}instance{{`}}`}} scheduling algorithm", + "refId": "C" + }, + { + "expr": "histogram_quantile(0.99, sum(rate(scheduler_volume_scheduling_duration_seconds_bucket{cluster=\"$cluster\", job=\"{{ include "exporter.kubeScheduler.jobName" . }}\",instance=~\"$instance\"}[$__rate_interval])) by (cluster, instance, le))", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{`{{`}}cluster{{`}}`}} {{`{{`}}instance{{`}}`}} volume", + "refId": "D" + } + ], + "thresholds": [ + + ], + "timeFrom": null, + "timeShift": null, + "title": "Scheduling latency 99th Quantile", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ + + ] + }, + "yaxes": [ + { + "format": "s", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "s", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": false, + "title": "Dashboard Row", + "titleSize": "h6", + "type": "row" + }, + { + "collapse": false, + "collapsed": false, + "panels": [ + { + "aliasColors": { + + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "fillGradient": 0, + "gridPos": { + + }, + "id": 5, + "interval": "1m", + "legend": { + "alignAsTable": true, + "avg": false, + "current": false, + "max": false, + "min": false, + "rightSide": true, + "show": true, + "sideWidth": null, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ + + ], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "repeat": null, + "seriesOverrides": [ + + ], + "spaceLength": 10, + "span": 4, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum(rate(rest_client_requests_total{cluster=\"$cluster\", job=\"{{ include "exporter.kubeScheduler.jobName" . }}\", instance=~\"$instance\",code=~\"2..\"}[$__rate_interval]))", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "2xx", + "refId": "A" + }, + { + "expr": "sum(rate(rest_client_requests_total{cluster=\"$cluster\", job=\"{{ include "exporter.kubeScheduler.jobName" . }}\", instance=~\"$instance\",code=~\"3..\"}[$__rate_interval]))", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "3xx", + "refId": "B" + }, + { + "expr": "sum(rate(rest_client_requests_total{cluster=\"$cluster\", job=\"{{ include "exporter.kubeScheduler.jobName" . }}\", instance=~\"$instance\",code=~\"4..\"}[$__rate_interval]))", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "4xx", + "refId": "C" + }, + { + "expr": "sum(rate(rest_client_requests_total{cluster=\"$cluster\", job=\"{{ include "exporter.kubeScheduler.jobName" . }}\", instance=~\"$instance\",code=~\"5..\"}[$__rate_interval]))", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "5xx", + "refId": "D" + } + ], + "thresholds": [ + + ], + "timeFrom": null, + "timeShift": null, + "title": "Kube API Request Rate", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ + + ] + }, + "yaxes": [ + { + "format": "ops", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "ops", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + } + ] + }, + { + "aliasColors": { + + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "fillGradient": 0, + "gridPos": { + + }, + "id": 6, + "interval": "1m", + "legend": { + "alignAsTable": true, + "avg": false, + "current": false, + "max": false, + "min": false, + "rightSide": true, + "show": true, + "sideWidth": null, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ + + ], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "repeat": null, + "seriesOverrides": [ + + ], + "spaceLength": 10, + "span": 8, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "histogram_quantile(0.99, sum(rate(rest_client_request_duration_seconds_bucket{cluster=\"$cluster\", job=\"{{ include "exporter.kubeScheduler.jobName" . }}\", instance=~\"$instance\", verb=\"POST\"}[$__rate_interval])) by (verb, url, le))", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{`{{`}}verb{{`}}`}} {{`{{`}}url{{`}}`}}", + "refId": "A" + } + ], + "thresholds": [ + + ], + "timeFrom": null, + "timeShift": null, + "title": "Post Request Latency 99th Quantile", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ + + ] + }, + "yaxes": [ + { + "format": "s", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "s", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": false, + "title": "Dashboard Row", + "titleSize": "h6", + "type": "row" + }, + { + "collapse": false, + "collapsed": false, + "panels": [ + { + "aliasColors": { + + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "fillGradient": 0, + "gridPos": { + + }, + "id": 7, + "interval": "1m", + "legend": { + "alignAsTable": true, + "avg": false, + "current": true, + "max": false, + "min": false, + "rightSide": true, + "show": true, + "sideWidth": null, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "links": [ + + ], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "repeat": null, + "seriesOverrides": [ + + ], + "spaceLength": 10, + "span": 12, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "histogram_quantile(0.99, sum(rate(rest_client_request_duration_seconds_bucket{cluster=\"$cluster\", job=\"{{ include "exporter.kubeScheduler.jobName" . }}\", instance=~\"$instance\", verb=\"GET\"}[$__rate_interval])) by (verb, url, le))", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{`{{`}}verb{{`}}`}} {{`{{`}}url{{`}}`}}", + "refId": "A" + } + ], + "thresholds": [ + + ], + "timeFrom": null, + "timeShift": null, + "title": "Get Request Latency 99th Quantile", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ + + ] + }, + "yaxes": [ + { + "format": "s", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "s", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": false, + "title": "Dashboard Row", + "titleSize": "h6", + "type": "row" + }, + { + "collapse": false, + "collapsed": false, + "panels": [ + { + "aliasColors": { + + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "fillGradient": 0, + "gridPos": { + + }, + "id": 8, + "interval": "1m", + "legend": { + "alignAsTable": true, + "avg": false, + "current": false, + "max": false, + "min": false, + "rightSide": true, + "show": true, + "sideWidth": null, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ + + ], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "repeat": null, + "seriesOverrides": [ + + ], + "spaceLength": 10, + "span": 4, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "process_resident_memory_bytes{cluster=\"$cluster\", job=\"{{ include "exporter.kubeScheduler.jobName" . }}\", instance=~\"$instance\"}", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{`{{`}}instance{{`}}`}}", + "refId": "A" + } + ], + "thresholds": [ + + ], + "timeFrom": null, + "timeShift": null, + "title": "Memory", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ + + ] + }, + "yaxes": [ + { + "format": "bytes", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "bytes", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + }, + { + "aliasColors": { + + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "fillGradient": 0, + "gridPos": { + + }, + "id": 9, + "interval": "1m", + "legend": { + "alignAsTable": true, + "avg": false, + "current": false, + "max": false, + "min": false, + "rightSide": true, + "show": true, + "sideWidth": null, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ + + ], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "repeat": null, + "seriesOverrides": [ + + ], + "spaceLength": 10, + "span": 4, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "rate(process_cpu_seconds_total{cluster=\"$cluster\", job=\"{{ include "exporter.kubeScheduler.jobName" . }}\", instance=~\"$instance\"}[$__rate_interval])", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{`{{`}}instance{{`}}`}}", + "refId": "A" + } + ], + "thresholds": [ + + ], + "timeFrom": null, + "timeShift": null, + "title": "CPU usage", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ + + ] + }, + "yaxes": [ + { + "format": "bytes", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "bytes", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + } + ] + }, + { + "aliasColors": { + + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "fillGradient": 0, + "gridPos": { + + }, + "id": 10, + "interval": "1m", + "legend": { + "alignAsTable": true, + "avg": false, + "current": false, + "max": false, + "min": false, + "rightSide": true, + "show": true, + "sideWidth": null, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ + + ], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "repeat": null, + "seriesOverrides": [ + + ], + "spaceLength": 10, + "span": 4, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "go_goroutines{cluster=\"$cluster\", job=\"{{ include "exporter.kubeScheduler.jobName" . }}\",instance=~\"$instance\"}", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{`{{`}}instance{{`}}`}}", + "refId": "A" + } + ], + "thresholds": [ + + ], + "timeFrom": null, + "timeShift": null, + "title": "Goroutines", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ + + ] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": false, + "title": "Dashboard Row", + "titleSize": "h6", + "type": "row" + } + ], + "schemaVersion": 14, + "style": "dark", + "tags": [ + "kubernetes-mixin" + ], + "templating": { + "list": [ + { + "current": { + "text": "Prometheus", + "value": "Prometheus" + }, + "hide": 0, + "label": "Data Source", + "name": "datasource", + "options": [ + + ], + "query": "prometheus", + "refresh": 1, + "regex": "", + "type": "datasource" + }, + { + "allValue": null, + "current": { + + }, + "datasource": "$datasource", + "hide": {{ if .Values.grafana.sidecar.dashboards.multicluster.global.enabled }}0{{ else }}2{{ end }}, + "includeAll": false, + "label": "cluster", + "multi": false, + "name": "cluster", + "options": [ + + ], + "query": "label_values(up{job=\"{{ include "exporter.kubeScheduler.jobName" . }}\"}, cluster)", + "refresh": 2, + "regex": "", + "sort": 1, + "tagValuesQuery": "", + "tags": [ + + ], + "tagsQuery": "", + "type": "query", + "useTags": false + }, + { + "allValue": null, + "current": { + + }, + "datasource": "$datasource", + "hide": 0, + "includeAll": true, + "label": null, + "multi": false, + "name": "instance", + "options": [ + + ], + "query": "label_values(up{job=\"{{ include "exporter.kubeScheduler.jobName" . }}\", cluster=\"$cluster\"}, instance)", + "refresh": 2, + "regex": "", + "sort": 1, + "tagValuesQuery": "", + "tags": [ + + ], + "tagsQuery": "", + "type": "query", + "useTags": false + } + ] + }, + "time": { + "from": "now-1h", + "to": "now" + }, + "timepicker": { + "refresh_intervals": [ + "5s", + "10s", + "30s", + "1m", + "5m", + "15m", + "30m", + "1h", + "2h", + "1d" + ], + "time_options": [ + "5m", + "15m", + "1h", + "6h", + "12h", + "24h", + "2d", + "7d", + "30d" + ] + }, + "timezone": "{{ .Values.grafana.defaultDashboardsTimezone }}", + "title": "Kubernetes / Scheduler", + "uid": "2e6b6a3b4bddf1427b3a55aa1311c656", + "version": 0 + } +{{- end }} +{{- end }} \ No newline at end of file diff --git a/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/templates/grafana/dashboards-1.14/workload-total.yaml b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/templates/grafana/dashboards-1.14/workload-total.yaml new file mode 100644 index 0000000000..5aafccdebe --- /dev/null +++ b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/templates/grafana/dashboards-1.14/workload-total.yaml @@ -0,0 +1,1438 @@ +{{- /* +Generated from 'workload-total' from https://raw.githubusercontent.com/prometheus-operator/kube-prometheus/a8ba97a150c75be42010c75d10b720c55e182f1a/manifests/grafana-dashboardDefinitions.yaml +Do not change in-place! In order to change this file first read following link: +https://github.com/prometheus-community/helm-charts/tree/main/charts/kube-prometheus-stack/hack +*/ -}} +{{- $kubeTargetVersion := default .Capabilities.KubeVersion.GitVersion .Values.kubeTargetVersionOverride }} +{{- if and (or .Values.grafana.enabled .Values.grafana.forceDeployDashboards) (semverCompare ">=1.14.0-0" $kubeTargetVersion) (semverCompare "<9.9.9-9" $kubeTargetVersion) .Values.grafana.defaultDashboardsEnabled }} +apiVersion: v1 +kind: ConfigMap +metadata: + namespace: {{ .Values.grafana.defaultDashboards.namespace }} + name: {{ printf "%s-%s" (include "kube-prometheus-stack.fullname" $) "workload-total" | trunc 63 | trimSuffix "-" }} + annotations: +{{ toYaml .Values.grafana.sidecar.dashboards.annotations | indent 4 }} + labels: + {{- if $.Values.grafana.sidecar.dashboards.label }} + {{ $.Values.grafana.sidecar.dashboards.label }}: {{ ternary $.Values.grafana.sidecar.dashboards.labelValue "1" (not (empty $.Values.grafana.sidecar.dashboards.labelValue)) | quote }} + {{- end }} + app: {{ template "kube-prometheus-stack.name" $ }}-grafana +{{ include "kube-prometheus-stack.labels" $ | indent 4 }} +data: + workload-total.json: |- + { + "__inputs": [ + + ], + "__requires": [ + + ], + "annotations": { + "list": [ + { + "builtIn": 1, + "datasource": "-- Grafana --", + "enable": true, + "hide": true, + "iconColor": "rgba(0, 211, 255, 1)", + "name": "Annotations & Alerts", + "type": "dashboard" + } + ] + }, + "editable": true, + "gnetId": null, + "graphTooltip": 0, + "hideControls": false, + "id": null, + "links": [ + + ], + "panels": [ + { + "collapse": false, + "collapsed": false, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 0 + }, + "id": 2, + "panels": [ + + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Current Bandwidth", + "titleSize": "h6", + "type": "row" + }, + { + "aliasColors": { + + }, + "bars": true, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 2, + "fillGradient": 0, + "gridPos": { + "h": 9, + "w": 12, + "x": 0, + "y": 1 + }, + "id": 3, + "legend": { + "alignAsTable": true, + "avg": false, + "current": true, + "hideEmpty": true, + "hideZero": true, + "max": false, + "min": false, + "rightSide": true, + "show": true, + "sideWidth": null, + "sort": "current", + "sortDesc": true, + "total": false, + "values": true + }, + "lines": false, + "linewidth": 1, + "links": [ + + ], + "minSpan": 24, + "nullPointMode": "null", + "paceLength": 10, + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "repeat": null, + "seriesOverrides": [ + + ], + "spaceLength": 10, + "span": 24, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sort_desc(sum(irate(container_network_receive_bytes_total{job=\"{{ include "exporter.kubelet.jobName" . }}\", metrics_path=\"/metrics/cadvisor\", cluster=\"$cluster\",namespace=~\"$namespace\"}[$interval:$resolution])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\",namespace=~\"$namespace\", workload=~\"$workload\", workload_type=\"$type\"}) by (pod))\n", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "{{`{{`}} pod {{`}}`}}", + "refId": "A", + "step": 10 + } + ], + "thresholds": [ + + ], + "timeFrom": null, + "timeShift": null, + "title": "Current Rate of Bytes Received", + "tooltip": { + "shared": true, + "sort": 2, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "series", + "name": null, + "show": false, + "values": [ + "current" + ] + }, + "yaxes": [ + { + "format": "Bps", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "Bps", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + } + ] + }, + { + "aliasColors": { + + }, + "bars": true, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 2, + "fillGradient": 0, + "gridPos": { + "h": 9, + "w": 12, + "x": 12, + "y": 1 + }, + "id": 4, + "legend": { + "alignAsTable": true, + "avg": false, + "current": true, + "hideEmpty": true, + "hideZero": true, + "max": false, + "min": false, + "rightSide": true, + "show": true, + "sideWidth": null, + "sort": "current", + "sortDesc": true, + "total": false, + "values": true + }, + "lines": false, + "linewidth": 1, + "links": [ + + ], + "minSpan": 24, + "nullPointMode": "null", + "paceLength": 10, + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "repeat": null, + "seriesOverrides": [ + + ], + "spaceLength": 10, + "span": 24, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sort_desc(sum(irate(container_network_transmit_bytes_total{job=\"{{ include "exporter.kubelet.jobName" . }}\", metrics_path=\"/metrics/cadvisor\", cluster=\"$cluster\",namespace=~\"$namespace\"}[$interval:$resolution])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\",namespace=~\"$namespace\", workload=~\"$workload\", workload_type=\"$type\"}) by (pod))\n", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "{{`{{`}} pod {{`}}`}}", + "refId": "A", + "step": 10 + } + ], + "thresholds": [ + + ], + "timeFrom": null, + "timeShift": null, + "title": "Current Rate of Bytes Transmitted", + "tooltip": { + "shared": true, + "sort": 2, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "series", + "name": null, + "show": false, + "values": [ + "current" + ] + }, + "yaxes": [ + { + "format": "Bps", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "Bps", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + } + ] + }, + { + "collapse": true, + "collapsed": true, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 10 + }, + "id": 5, + "panels": [ + { + "aliasColors": { + + }, + "bars": true, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 2, + "fillGradient": 0, + "gridPos": { + "h": 9, + "w": 12, + "x": 0, + "y": 11 + }, + "id": 6, + "legend": { + "alignAsTable": true, + "avg": false, + "current": true, + "hideEmpty": true, + "hideZero": true, + "max": false, + "min": false, + "rightSide": true, + "show": true, + "sideWidth": null, + "sort": "current", + "sortDesc": true, + "total": false, + "values": true + }, + "lines": false, + "linewidth": 1, + "links": [ + + ], + "minSpan": 24, + "nullPointMode": "null", + "paceLength": 10, + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "repeat": null, + "seriesOverrides": [ + + ], + "spaceLength": 10, + "span": 24, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sort_desc(avg(irate(container_network_receive_bytes_total{job=\"{{ include "exporter.kubelet.jobName" . }}\", metrics_path=\"/metrics/cadvisor\", cluster=\"$cluster\",namespace=~\"$namespace\"}[$interval:$resolution])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\",namespace=~\"$namespace\", workload=~\"$workload\", workload_type=\"$type\"}) by (pod))\n", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "{{`{{`}} pod {{`}}`}}", + "refId": "A", + "step": 10 + } + ], + "thresholds": [ + + ], + "timeFrom": null, + "timeShift": null, + "title": "Average Rate of Bytes Received", + "tooltip": { + "shared": true, + "sort": 2, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "series", + "name": null, + "show": false, + "values": [ + "current" + ] + }, + "yaxes": [ + { + "format": "Bps", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "Bps", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + } + ] + }, + { + "aliasColors": { + + }, + "bars": true, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 2, + "fillGradient": 0, + "gridPos": { + "h": 9, + "w": 12, + "x": 12, + "y": 11 + }, + "id": 7, + "legend": { + "alignAsTable": true, + "avg": false, + "current": true, + "hideEmpty": true, + "hideZero": true, + "max": false, + "min": false, + "rightSide": true, + "show": true, + "sideWidth": null, + "sort": "current", + "sortDesc": true, + "total": false, + "values": true + }, + "lines": false, + "linewidth": 1, + "links": [ + + ], + "minSpan": 24, + "nullPointMode": "null", + "paceLength": 10, + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "repeat": null, + "seriesOverrides": [ + + ], + "spaceLength": 10, + "span": 24, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sort_desc(avg(irate(container_network_transmit_bytes_total{job=\"{{ include "exporter.kubelet.jobName" . }}\", metrics_path=\"/metrics/cadvisor\", cluster=\"$cluster\",namespace=~\"$namespace\"}[$interval:$resolution])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\",namespace=~\"$namespace\", workload=~\"$workload\", workload_type=\"$type\"}) by (pod))\n", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "{{`{{`}} pod {{`}}`}}", + "refId": "A", + "step": 10 + } + ], + "thresholds": [ + + ], + "timeFrom": null, + "timeShift": null, + "title": "Average Rate of Bytes Transmitted", + "tooltip": { + "shared": true, + "sort": 2, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "series", + "name": null, + "show": false, + "values": [ + "current" + ] + }, + "yaxes": [ + { + "format": "Bps", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "Bps", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Average Bandwidth", + "titleSize": "h6", + "type": "row" + }, + { + "collapse": false, + "collapsed": false, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 11 + }, + "id": 8, + "panels": [ + + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Bandwidth HIstory", + "titleSize": "h6", + "type": "row" + }, + { + "aliasColors": { + + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 2, + "fillGradient": 0, + "gridPos": { + "h": 9, + "w": 12, + "x": 0, + "y": 12 + }, + "id": 9, + "legend": { + "alignAsTable": false, + "avg": false, + "current": false, + "hideEmpty": true, + "hideZero": true, + "max": false, + "min": false, + "rightSide": false, + "show": true, + "sideWidth": null, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 2, + "links": [ + + ], + "minSpan": 12, + "nullPointMode": "connected", + "paceLength": 10, + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "repeat": null, + "seriesOverrides": [ + + ], + "spaceLength": 10, + "span": 12, + "stack": true, + "steppedLine": false, + "targets": [ + { + "expr": "sort_desc(sum(irate(container_network_receive_bytes_total{job=\"{{ include "exporter.kubelet.jobName" . }}\", metrics_path=\"/metrics/cadvisor\", cluster=\"$cluster\",namespace=~\"$namespace\"}[$interval:$resolution])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\",namespace=~\"$namespace\", workload=~\"$workload\", workload_type=\"$type\"}) by (pod))\n", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "{{`{{`}}pod{{`}}`}}", + "refId": "A", + "step": 10 + } + ], + "thresholds": [ + + ], + "timeFrom": null, + "timeShift": null, + "title": "Receive Bandwidth", + "tooltip": { + "shared": true, + "sort": 2, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ + + ] + }, + "yaxes": [ + { + "format": "Bps", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "Bps", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + } + ] + }, + { + "aliasColors": { + + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 2, + "fillGradient": 0, + "gridPos": { + "h": 9, + "w": 12, + "x": 12, + "y": 12 + }, + "id": 10, + "legend": { + "alignAsTable": false, + "avg": false, + "current": false, + "hideEmpty": true, + "hideZero": true, + "max": false, + "min": false, + "rightSide": false, + "show": true, + "sideWidth": null, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 2, + "links": [ + + ], + "minSpan": 12, + "nullPointMode": "connected", + "paceLength": 10, + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "repeat": null, + "seriesOverrides": [ + + ], + "spaceLength": 10, + "span": 12, + "stack": true, + "steppedLine": false, + "targets": [ + { + "expr": "sort_desc(sum(irate(container_network_transmit_bytes_total{job=\"{{ include "exporter.kubelet.jobName" . }}\", metrics_path=\"/metrics/cadvisor\", cluster=\"$cluster\",namespace=~\"$namespace\"}[$interval:$resolution])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\",namespace=~\"$namespace\", workload=~\"$workload\", workload_type=\"$type\"}) by (pod))\n", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "{{`{{`}}pod{{`}}`}}", + "refId": "A", + "step": 10 + } + ], + "thresholds": [ + + ], + "timeFrom": null, + "timeShift": null, + "title": "Transmit Bandwidth", + "tooltip": { + "shared": true, + "sort": 2, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ + + ] + }, + "yaxes": [ + { + "format": "Bps", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "Bps", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + } + ] + }, + { + "collapse": true, + "collapsed": true, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 21 + }, + "id": 11, + "panels": [ + { + "aliasColors": { + + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 2, + "fillGradient": 0, + "gridPos": { + "h": 9, + "w": 12, + "x": 0, + "y": 22 + }, + "id": 12, + "legend": { + "alignAsTable": false, + "avg": false, + "current": false, + "hideEmpty": true, + "hideZero": true, + "max": false, + "min": false, + "rightSide": false, + "show": true, + "sideWidth": null, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 2, + "links": [ + + ], + "minSpan": 12, + "nullPointMode": "connected", + "paceLength": 10, + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "repeat": null, + "seriesOverrides": [ + + ], + "spaceLength": 10, + "span": 12, + "stack": true, + "steppedLine": false, + "targets": [ + { + "expr": "sort_desc(sum(irate(container_network_receive_packets_total{job=\"{{ include "exporter.kubelet.jobName" . }}\", metrics_path=\"/metrics/cadvisor\", cluster=\"$cluster\",namespace=~\"$namespace\"}[$interval:$resolution])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\",namespace=~\"$namespace\", workload=~\"$workload\", workload_type=\"$type\"}) by (pod))\n", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "{{`{{`}}pod{{`}}`}}", + "refId": "A", + "step": 10 + } + ], + "thresholds": [ + + ], + "timeFrom": null, + "timeShift": null, + "title": "Rate of Received Packets", + "tooltip": { + "shared": true, + "sort": 2, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ + + ] + }, + "yaxes": [ + { + "format": "pps", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "pps", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + } + ] + }, + { + "aliasColors": { + + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 2, + "fillGradient": 0, + "gridPos": { + "h": 9, + "w": 12, + "x": 12, + "y": 22 + }, + "id": 13, + "legend": { + "alignAsTable": false, + "avg": false, + "current": false, + "hideEmpty": true, + "hideZero": true, + "max": false, + "min": false, + "rightSide": false, + "show": true, + "sideWidth": null, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 2, + "links": [ + + ], + "minSpan": 12, + "nullPointMode": "connected", + "paceLength": 10, + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "repeat": null, + "seriesOverrides": [ + + ], + "spaceLength": 10, + "span": 12, + "stack": true, + "steppedLine": false, + "targets": [ + { + "expr": "sort_desc(sum(irate(container_network_transmit_packets_total{job=\"{{ include "exporter.kubelet.jobName" . }}\", metrics_path=\"/metrics/cadvisor\", cluster=\"$cluster\",namespace=~\"$namespace\"}[$interval:$resolution])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\",namespace=~\"$namespace\", workload=~\"$workload\", workload_type=\"$type\"}) by (pod))\n", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "{{`{{`}}pod{{`}}`}}", + "refId": "A", + "step": 10 + } + ], + "thresholds": [ + + ], + "timeFrom": null, + "timeShift": null, + "title": "Rate of Transmitted Packets", + "tooltip": { + "shared": true, + "sort": 2, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ + + ] + }, + "yaxes": [ + { + "format": "pps", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "pps", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Packets", + "titleSize": "h6", + "type": "row" + }, + { + "collapse": true, + "collapsed": true, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 22 + }, + "id": 14, + "panels": [ + { + "aliasColors": { + + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 2, + "fillGradient": 0, + "gridPos": { + "h": 9, + "w": 12, + "x": 0, + "y": 23 + }, + "id": 15, + "legend": { + "alignAsTable": false, + "avg": false, + "current": false, + "hideEmpty": true, + "hideZero": true, + "max": false, + "min": false, + "rightSide": false, + "show": true, + "sideWidth": null, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 2, + "links": [ + + ], + "minSpan": 12, + "nullPointMode": "connected", + "paceLength": 10, + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "repeat": null, + "seriesOverrides": [ + + ], + "spaceLength": 10, + "span": 12, + "stack": true, + "steppedLine": false, + "targets": [ + { + "expr": "sort_desc(sum(irate(container_network_receive_packets_dropped_total{job=\"{{ include "exporter.kubelet.jobName" . }}\", metrics_path=\"/metrics/cadvisor\", cluster=\"$cluster\",namespace=~\"$namespace\"}[$interval:$resolution])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\",namespace=~\"$namespace\", workload=~\"$workload\", workload_type=\"$type\"}) by (pod))\n", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "{{`{{`}}pod{{`}}`}}", + "refId": "A", + "step": 10 + } + ], + "thresholds": [ + + ], + "timeFrom": null, + "timeShift": null, + "title": "Rate of Received Packets Dropped", + "tooltip": { + "shared": true, + "sort": 2, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ + + ] + }, + "yaxes": [ + { + "format": "pps", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "pps", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + } + ] + }, + { + "aliasColors": { + + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 2, + "fillGradient": 0, + "gridPos": { + "h": 9, + "w": 12, + "x": 12, + "y": 23 + }, + "id": 16, + "legend": { + "alignAsTable": false, + "avg": false, + "current": false, + "hideEmpty": true, + "hideZero": true, + "max": false, + "min": false, + "rightSide": false, + "show": true, + "sideWidth": null, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 2, + "links": [ + + ], + "minSpan": 12, + "nullPointMode": "connected", + "paceLength": 10, + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "repeat": null, + "seriesOverrides": [ + + ], + "spaceLength": 10, + "span": 12, + "stack": true, + "steppedLine": false, + "targets": [ + { + "expr": "sort_desc(sum(irate(container_network_transmit_packets_dropped_total{job=\"{{ include "exporter.kubelet.jobName" . }}\", metrics_path=\"/metrics/cadvisor\", cluster=\"$cluster\",namespace=~\"$namespace\"}[$interval:$resolution])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\",namespace=~\"$namespace\", workload=~\"$workload\", workload_type=\"$type\"}) by (pod))\n", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "{{`{{`}}pod{{`}}`}}", + "refId": "A", + "step": 10 + } + ], + "thresholds": [ + + ], + "timeFrom": null, + "timeShift": null, + "title": "Rate of Transmitted Packets Dropped", + "tooltip": { + "shared": true, + "sort": 2, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ + + ] + }, + "yaxes": [ + { + "format": "pps", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "pps", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Errors", + "titleSize": "h6", + "type": "row" + } + ], + "refresh": "10s", + "rows": [ + + ], + "schemaVersion": 18, + "style": "dark", + "tags": [ + "kubernetes-mixin" + ], + "templating": { + "list": [ + { + "current": { + "text": "Prometheus", + "value": "Prometheus" + }, + "hide": 0, + "label": "Data Source", + "name": "datasource", + "options": [ + + ], + "query": "prometheus", + "refresh": 1, + "regex": "", + "type": "datasource" + }, + { + "allValue": null, + "current": { + + }, + "datasource": "$datasource", + "hide": {{ if .Values.grafana.sidecar.dashboards.multicluster.global.enabled }}0{{ else }}2{{ end }}, + "includeAll": false, + "label": null, + "multi": false, + "name": "cluster", + "options": [ + + ], + "query": "label_values(kube_pod_info{job=\"kube-state-metrics\"}, cluster)", + "refresh": 2, + "regex": "", + "sort": 0, + "tagValuesQuery": "", + "tags": [ + + ], + "tagsQuery": "", + "type": "query", + "useTags": false + }, + { + "allValue": ".+", + "auto": false, + "auto_count": 30, + "auto_min": "10s", + "current": { + "text": "kube-system", + "value": "kube-system" + }, + "datasource": "$datasource", + "definition": "label_values(container_network_receive_packets_total{job=\"{{ include "exporter.kubelet.jobName" . }}\", metrics_path=\"/metrics/cadvisor\", cluster=\"$cluster\"}, namespace)", + "hide": 0, + "includeAll": true, + "label": null, + "multi": false, + "name": "namespace", + "options": [ + + ], + "query": "label_values(container_network_receive_packets_total{job=\"{{ include "exporter.kubelet.jobName" . }}\", metrics_path=\"/metrics/cadvisor\", cluster=\"$cluster\"}, namespace)", + "refresh": 2, + "regex": "", + "skipUrlSync": false, + "sort": 1, + "tagValuesQuery": "", + "tags": [ + + ], + "tagsQuery": "", + "type": "query", + "useTags": false + }, + { + "allValue": null, + "auto": false, + "auto_count": 30, + "auto_min": "10s", + "current": { + "text": "", + "value": "" + }, + "datasource": "$datasource", + "definition": "label_values(namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\",namespace=~\"$namespace\"}, workload)", + "hide": 0, + "includeAll": false, + "label": null, + "multi": false, + "name": "workload", + "options": [ + + ], + "query": "label_values(namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\",namespace=~\"$namespace\"}, workload)", + "refresh": 2, + "regex": "", + "skipUrlSync": false, + "sort": 1, + "tagValuesQuery": "", + "tags": [ + + ], + "tagsQuery": "", + "type": "query", + "useTags": false + }, + { + "allValue": null, + "auto": false, + "auto_count": 30, + "auto_min": "10s", + "current": { + "text": "deployment", + "value": "deployment" + }, + "datasource": "$datasource", + "definition": "label_values(namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\",namespace=~\"$namespace\", workload=~\"$workload\"}, workload_type)", + "hide": 0, + "includeAll": false, + "label": null, + "multi": false, + "name": "type", + "options": [ + + ], + "query": "label_values(namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\",namespace=~\"$namespace\", workload=~\"$workload\"}, workload_type)", + "refresh": 2, + "regex": "", + "skipUrlSync": false, + "sort": 0, + "tagValuesQuery": "", + "tags": [ + + ], + "tagsQuery": "", + "type": "query", + "useTags": false + }, + { + "allValue": null, + "auto": false, + "auto_count": 30, + "auto_min": "10s", + "current": { + "text": "5m", + "value": "5m" + }, + "datasource": "$datasource", + "hide": 0, + "includeAll": false, + "label": null, + "multi": false, + "name": "resolution", + "options": [ + { + "selected": false, + "text": "30s", + "value": "30s" + }, + { + "selected": true, + "text": "5m", + "value": "5m" + }, + { + "selected": false, + "text": "1h", + "value": "1h" + } + ], + "query": "30s,5m,1h", + "refresh": 2, + "regex": "", + "skipUrlSync": false, + "sort": 1, + "tagValuesQuery": "", + "tags": [ + + ], + "tagsQuery": "", + "type": "interval", + "useTags": false + }, + { + "allValue": null, + "auto": false, + "auto_count": 30, + "auto_min": "10s", + "current": { + "text": "5m", + "value": "5m" + }, + "datasource": "$datasource", + "hide": 2, + "includeAll": false, + "label": null, + "multi": false, + "name": "interval", + "options": [ + { + "selected": true, + "text": "4h", + "value": "4h" + } + ], + "query": "4h", + "refresh": 2, + "regex": "", + "skipUrlSync": false, + "sort": 1, + "tagValuesQuery": "", + "tags": [ + + ], + "tagsQuery": "", + "type": "interval", + "useTags": false + } + ] + }, + "time": { + "from": "now-1h", + "to": "now" + }, + "timepicker": { + "refresh_intervals": [ + "5s", + "10s", + "30s", + "1m", + "5m", + "15m", + "30m", + "1h", + "2h", + "1d" + ], + "time_options": [ + "5m", + "15m", + "1h", + "6h", + "12h", + "24h", + "2d", + "7d", + "30d" + ] + }, + "timezone": "{{ .Values.grafana.defaultDashboardsTimezone }}", + "title": "Kubernetes / Networking / Workload", + "uid": "728bf77cc1166d2f3133bf25846876cc", + "version": 0 + } +{{- end }} \ No newline at end of file diff --git a/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/templates/grafana/namespaces.yaml b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/templates/grafana/namespaces.yaml new file mode 100644 index 0000000000..39ed210ed4 --- /dev/null +++ b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/templates/grafana/namespaces.yaml @@ -0,0 +1,13 @@ +{{- if and .Values.grafana.enabled .Values.grafana.defaultDashboardsEnabled (not .Values.grafana.defaultDashboards.useExistingNamespace) }} +apiVersion: v1 +kind: Namespace +metadata: + name: {{ .Values.grafana.defaultDashboards.namespace }} + labels: + name: {{ .Values.grafana.defaultDashboards.namespace }} +{{ include "kube-prometheus-stack.labels" . | indent 4 }} + annotations: +{{- if not .Values.grafana.defaultDashboards.cleanupOnUninstall }} + helm.sh/resource-policy: "keep" +{{- end }} +{{- end }} diff --git a/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/templates/prometheus-operator/_prometheus-operator.tpl b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/templates/prometheus-operator/_prometheus-operator.tpl new file mode 100644 index 0000000000..6ae9dc72e6 --- /dev/null +++ b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/templates/prometheus-operator/_prometheus-operator.tpl @@ -0,0 +1,7 @@ +{{/* Generate basic labels for prometheus-operator */}} +{{- define "kube-prometheus-stack.prometheus-operator.labels" }} +{{- include "kube-prometheus-stack.labels" . }} +app: {{ template "kube-prometheus-stack.name" . }}-operator +app.kubernetes.io/name: {{ template "kube-prometheus-stack.name" . }}-prometheus-operator +app.kubernetes.io/component: prometheus-operator +{{- end }} diff --git a/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/templates/prometheus-operator/admission-webhooks/_prometheus-operator-webhook.tpl b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/templates/prometheus-operator/admission-webhooks/_prometheus-operator-webhook.tpl new file mode 100644 index 0000000000..f419caf54b --- /dev/null +++ b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/templates/prometheus-operator/admission-webhooks/_prometheus-operator-webhook.tpl @@ -0,0 +1,6 @@ +{{/* Generate basic labels for prometheus-operator-webhook */}} +{{- define "kube-prometheus-stack.prometheus-operator-webhook.labels" }} +{{- include "kube-prometheus-stack.labels" . }} +app.kubernetes.io/name: {{ template "kube-prometheus-stack.name" . }}-prometheus-operator +app.kubernetes.io/component: prometheus-operator-webhook +{{- end }} diff --git a/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/templates/prometheus-operator/admission-webhooks/deployment/deployment.yaml b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/templates/prometheus-operator/admission-webhooks/deployment/deployment.yaml new file mode 100644 index 0000000000..054eac4a77 --- /dev/null +++ b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/templates/prometheus-operator/admission-webhooks/deployment/deployment.yaml @@ -0,0 +1,143 @@ +{{- if and .Values.prometheusOperator.enabled .Values.prometheusOperator.admissionWebhooks.deployment.enabled }} +apiVersion: apps/v1 +kind: Deployment +metadata: + name: {{ template "kube-prometheus-stack.operator.fullname" . }}-webhook + namespace: {{ template "kube-prometheus-stack.namespace" . }} + labels: + app: {{ template "kube-prometheus-stack.name" . }}-operator-webhook + {{- include "kube-prometheus-stack.prometheus-operator-webhook.labels" . | nindent 4 }} +{{- if .Values.prometheusOperator.admissionWebhooks.deployment.labels }} +{{ toYaml .Values.prometheusOperator.admissionWebhooks.deployment.labels | indent 4 }} +{{- end }} +{{- if .Values.prometheusOperator.admissionWebhooks.deployment.annotations }} + annotations: +{{ toYaml .Values.prometheusOperator.admissionWebhooks.deployment.annotations | indent 4 }} +{{- end }} +spec: + replicas: {{ .Values.prometheusOperator.admissionWebhooks.deployment.replicas }} + revisionHistoryLimit: {{ .Values.prometheusOperator.admissionWebhooks.deployment.revisionHistoryLimit }} + {{- with .Values.prometheusOperator.admissionWebhooks.deployment.strategy }} + strategy: + {{- toYaml . | nindent 4 }} + {{- end }} + selector: + matchLabels: + app: {{ template "kube-prometheus-stack.name" . }}-operator-webhook + release: {{ $.Release.Name | quote }} + template: + metadata: + labels: + app: {{ template "kube-prometheus-stack.name" . }}-operator-webhook + {{- include "kube-prometheus-stack.prometheus-operator-webhook.labels" . | nindent 8 }} +{{- if .Values.prometheusOperator.admissionWebhooks.deployment.podLabels }} +{{ toYaml .Values.prometheusOperator.admissionWebhooks.deployment.podLabels | indent 8 }} +{{- end }} +{{- if .Values.prometheusOperator.admissionWebhooks.deployment.podAnnotations }} + annotations: +{{ toYaml .Values.prometheusOperator.admissionWebhooks.deployment.podAnnotations | indent 8 }} +{{- end }} + spec: + {{- if .Values.prometheusOperator.admissionWebhooks.deployment.priorityClassName }} + priorityClassName: {{ .Values.prometheusOperator.admissionWebhooks.deployment.priorityClassName }} + {{- end }} + {{- if .Values.global.imagePullSecrets }} + imagePullSecrets: + {{- include "kube-prometheus-stack.imagePullSecrets" . | indent 8 }} + {{- end }} + containers: + - name: prometheus-operator-admission-webhook + {{- $operatorRegistry := .Values.global.imageRegistry | default .Values.prometheusOperator.admissionWebhooks.deployment.image.registry -}} + {{- if .Values.prometheusOperator.admissionWebhooks.deployment.image.sha }} + image: "{{ $operatorRegistry }}/{{ .Values.prometheusOperator.admissionWebhooks.deployment.image.repository }}:{{ .Values.prometheusOperator.admissionWebhooks.deployment.image.tag | default .Chart.AppVersion }}@sha256:{{ .Values.prometheusOperator.admissionWebhooks.deployment.image.sha }}" + {{- else }} + image: "{{ $operatorRegistry }}/{{ .Values.prometheusOperator.admissionWebhooks.deployment.image.repository }}:{{ .Values.prometheusOperator.admissionWebhooks.deployment.image.tag | default .Chart.AppVersion }}" + {{- end }} + imagePullPolicy: "{{ .Values.prometheusOperator.admissionWebhooks.deployment.image.pullPolicy }}" + args: + {{- if .Values.prometheusOperator.admissionWebhooks.deployment.logFormat }} + - --log-format={{ .Values.prometheusOperator.admissionWebhooks.deployment.logFormat }} + {{- end }} + {{- if .Values.prometheusOperator.admissionWebhooks.deployment.logLevel }} + - --log-level={{ .Values.prometheusOperator.admissionWebhooks.deployment.logLevel }} + {{- end }} + {{- if .Values.prometheusOperator.admissionWebhooks.deployment.tls.enabled }} + - "--web.enable-tls=true" + - "--web.cert-file=/cert/{{ if .Values.prometheusOperator.admissionWebhooks.certManager.enabled }}tls.crt{{ else }}cert{{ end }}" + - "--web.key-file=/cert/{{ if .Values.prometheusOperator.admissionWebhooks.certManager.enabled }}tls.key{{ else }}key{{ end }}" + - "--web.listen-address=:{{ .Values.prometheusOperator.admissionWebhooks.deployment.tls.internalPort }}" + - "--web.tls-min-version={{ .Values.prometheusOperator.admissionWebhooks.deployment.tls.tlsMinVersion }}" + ports: + - containerPort: {{ .Values.prometheusOperator.admissionWebhooks.deployment.tls.internalPort }} + name: https + {{- else }} + ports: + - containerPort: 8080 + name: http + {{- end }} + {{- if .Values.prometheusOperator.admissionWebhooks.deployment.readinessProbe.enabled }} + readinessProbe: + httpGet: + path: /healthz + port: {{ .Values.prometheusOperator.admissionWebhooks.deployment.tls.enabled | ternary "https" "http" }} + scheme: {{ .Values.prometheusOperator.admissionWebhooks.deployment.tls.enabled | ternary "HTTPS" "HTTP" }} + initialDelaySeconds: {{ .Values.prometheusOperator.admissionWebhooks.deployment.readinessProbe.initialDelaySeconds }} + periodSeconds: {{ .Values.prometheusOperator.admissionWebhooks.deployment.readinessProbe.periodSeconds }} + timeoutSeconds: {{ .Values.prometheusOperator.admissionWebhooks.deployment.readinessProbe.timeoutSeconds }} + successThreshold: {{ .Values.prometheusOperator.admissionWebhooks.deployment.readinessProbe.successThreshold }} + failureThreshold: {{ .Values.prometheusOperator.admissionWebhooks.deployment.readinessProbe.failureThreshold }} + {{- end }} + {{- if .Values.prometheusOperator.admissionWebhooks.deployment.livenessProbe.enabled }} + livenessProbe: + httpGet: + path: /healthz + port: {{ .Values.prometheusOperator.admissionWebhooks.deployment.tls.enabled | ternary "https" "http" }} + scheme: {{ .Values.prometheusOperator.admissionWebhooks.deployment.tls.enabled | ternary "HTTPS" "HTTP" }} + initialDelaySeconds: {{ .Values.prometheusOperator.admissionWebhooks.deployment.livenessProbe.initialDelaySeconds }} + periodSeconds: {{ .Values.prometheusOperator.admissionWebhooks.deployment.livenessProbe.periodSeconds }} + timeoutSeconds: {{ .Values.prometheusOperator.admissionWebhooks.deployment.livenessProbe.timeoutSeconds }} + successThreshold: {{ .Values.prometheusOperator.admissionWebhooks.deployment.livenessProbe.successThreshold }} + failureThreshold: {{ .Values.prometheusOperator.admissionWebhooks.deployment.livenessProbe.failureThreshold }} + {{- end }} + resources: +{{ toYaml .Values.prometheusOperator.admissionWebhooks.deployment.resources | indent 12 }} + securityContext: +{{ toYaml .Values.prometheusOperator.admissionWebhooks.deployment.containerSecurityContext | indent 12 }} +{{- if .Values.prometheusOperator.admissionWebhooks.deployment.tls.enabled }} + volumeMounts: + - name: tls-secret + mountPath: /cert + readOnly: true + volumes: + - name: tls-secret + secret: + defaultMode: 420 + secretName: {{ template "kube-prometheus-stack.fullname" . }}-admission +{{- end }} + {{- with .Values.prometheusOperator.admissionWebhooks.deployment.dnsConfig }} + dnsConfig: +{{ toYaml . | indent 8 }} + {{- end }} +{{- if .Values.prometheusOperator.admissionWebhooks.deployment.securityContext }} + securityContext: +{{ toYaml .Values.prometheusOperator.admissionWebhooks.deployment.securityContext | indent 8 }} +{{- end }} + serviceAccountName: {{ template "kube-prometheus-stack.operator.serviceAccountName" . }}-webhook + automountServiceAccountToken: {{ .Values.prometheusOperator.admissionWebhooks.deployment.automountServiceAccountToken }} +{{- if .Values.prometheusOperator.admissionWebhooks.deployment.hostNetwork }} + hostNetwork: true + dnsPolicy: ClusterFirstWithHostNet +{{- end }} + {{- with .Values.prometheusOperator.admissionWebhooks.deployment.nodeSelector }} + nodeSelector: +{{ toYaml . | indent 8 }} + {{- end }} + {{- with .Values.prometheusOperator.admissionWebhooks.deployment.affinity }} + affinity: +{{ toYaml . | indent 8 }} + {{- end }} + {{- with .Values.prometheusOperator.admissionWebhooks.deployment.tolerations }} + tolerations: +{{ toYaml . | indent 8 }} + {{- end }} +{{- end }} diff --git a/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/templates/prometheus-operator/admission-webhooks/deployment/pdb.yaml b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/templates/prometheus-operator/admission-webhooks/deployment/pdb.yaml new file mode 100644 index 0000000000..52dd78f624 --- /dev/null +++ b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/templates/prometheus-operator/admission-webhooks/deployment/pdb.yaml @@ -0,0 +1,15 @@ +{{- if .Values.prometheusOperator.admissionWebhooks.deployment.podDisruptionBudget -}} +apiVersion: policy/v1{{ ternary "" "beta1" ($.Capabilities.APIVersions.Has "policy/v1/PodDisruptionBudget") }} +kind: PodDisruptionBudget +metadata: + name: {{ template "kube-prometheus-stack.operator.fullname" . }}-webhook + namespace: {{ template "kube-prometheus-stack.namespace" . }} + labels: + {{- include "kube-prometheus-stack.prometheus-operator-webhook.labels" . | nindent 4 }} +spec: + selector: + matchLabels: + app: {{ template "kube-prometheus-stack.name" . }}-operator-webhook + release: {{ $.Release.Name | quote }} +{{ toYaml .Values.prometheusOperator.admissionWebhooks.deployment.podDisruptionBudget | indent 2 }} +{{- end }} diff --git a/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/templates/prometheus-operator/admission-webhooks/deployment/service.yaml b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/templates/prometheus-operator/admission-webhooks/deployment/service.yaml new file mode 100644 index 0000000000..b06c129123 --- /dev/null +++ b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/templates/prometheus-operator/admission-webhooks/deployment/service.yaml @@ -0,0 +1,58 @@ +{{- if and .Values.prometheusOperator.enabled .Values.prometheusOperator.admissionWebhooks.deployment.enabled }} +apiVersion: v1 +kind: Service +metadata: + name: {{ template "kube-prometheus-stack.operator.fullname" . }}-webhook + namespace: {{ template "kube-prometheus-stack.namespace" . }} + labels: + app: {{ template "kube-prometheus-stack.name" . }}-operator-webhook + {{- include "kube-prometheus-stack.prometheus-operator-webhook.labels" . | nindent 4 }} +{{- if .Values.prometheusOperator.admissionWebhooks.deployment.service.labels }} +{{ toYaml .Values.prometheusOperator.admissionWebhooks.deployment.service.labels | indent 4 }} +{{- end }} +{{- if .Values.prometheusOperator.admissionWebhooks.deployment.service.annotations }} + annotations: +{{ toYaml .Values.prometheusOperator.admissionWebhooks.deployment.service.annotations | indent 4 }} +{{- end }} +spec: +{{- if .Values.prometheusOperator.admissionWebhooks.deployment.service.clusterIP }} + clusterIP: {{ .Values.prometheusOperator.admissionWebhooks.deployment.service.clusterIP }} +{{- end }} +{{- if .Values.prometheusOperator.admissionWebhooks.deployment.service.externalIPs }} + externalIPs: +{{ toYaml .Values.prometheusOperator.admissionWebhooks.deployment.service.externalIPs | indent 4 }} +{{- end }} +{{- if .Values.prometheusOperator.admissionWebhooks.deployment.service.loadBalancerIP }} + loadBalancerIP: {{ .Values.prometheusOperator.admissionWebhooks.deployment.service.loadBalancerIP }} +{{- end }} +{{- if .Values.prometheusOperator.admissionWebhooks.deployment.service.loadBalancerSourceRanges }} + loadBalancerSourceRanges: + {{- range $cidr := .Values.prometheusOperator.admissionWebhooks.deployment.service.loadBalancerSourceRanges }} + - {{ $cidr }} + {{- end }} +{{- end }} +{{- if ne .Values.prometheusOperator.admissionWebhooks.deployment.service.type "ClusterIP" }} + externalTrafficPolicy: {{ .Values.prometheusOperator.admissionWebhooks.deployment.service.externalTrafficPolicy }} +{{- end }} + ports: + {{- if not .Values.prometheusOperator.admissionWebhooks.deployment.tls.enabled }} + - name: http + {{- if eq .Values.prometheusOperator.admissionWebhooks.deployment.service.type "NodePort" }} + nodePort: {{ .Values.prometheusOperator.admissionWebhooks.deployment.service.nodePort }} + {{- end }} + port: 8080 + targetPort: http + {{- end }} + {{- if .Values.prometheusOperator.admissionWebhooks.deployment.tls.enabled }} + - name: https + {{- if eq .Values.prometheusOperator.admissionWebhooks.deployment.service.type "NodePort"}} + nodePort: {{ .Values.prometheusOperator.admissionWebhooks.deployment.service.nodePortTls }} + {{- end }} + port: 443 + targetPort: https + {{- end }} + selector: + app: {{ template "kube-prometheus-stack.name" . }}-operator-webhook + release: {{ $.Release.Name | quote }} + type: "{{ .Values.prometheusOperator.admissionWebhooks.deployment.service.type }}" +{{- end }} diff --git a/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/templates/prometheus-operator/admission-webhooks/deployment/serviceaccount.yaml b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/templates/prometheus-operator/admission-webhooks/deployment/serviceaccount.yaml new file mode 100644 index 0000000000..55511da36b --- /dev/null +++ b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/templates/prometheus-operator/admission-webhooks/deployment/serviceaccount.yaml @@ -0,0 +1,15 @@ +{{- if and .Values.prometheusOperator.enabled .Values.prometheusOperator.admissionWebhooks.deployment.enabled }} +apiVersion: v1 +kind: ServiceAccount +automountServiceAccountToken: {{ .Values.prometheusOperator.admissionWebhooks.deployment.serviceAccount.automountServiceAccountToken }} +metadata: + name: {{ template "kube-prometheus-stack.operator.admissionWebhooks.serviceAccountName" . }} + namespace: {{ template "kube-prometheus-stack.namespace" . }} + labels: + app: {{ template "kube-prometheus-stack.name" . }}-operator + {{- include "kube-prometheus-stack.prometheus-operator-webhook.labels" . | indent 4 }} +{{- if .Values.global.imagePullSecrets }} +imagePullSecrets: +{{ include "kube-prometheus-stack.imagePullSecrets" . | trim | indent 2 }} +{{- end }} +{{- end }} diff --git a/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/templates/prometheus-operator/admission-webhooks/job-patch/ciliumnetworkpolicy-createSecret.yaml b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/templates/prometheus-operator/admission-webhooks/job-patch/ciliumnetworkpolicy-createSecret.yaml new file mode 100644 index 0000000000..f7543b0f1a --- /dev/null +++ b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/templates/prometheus-operator/admission-webhooks/job-patch/ciliumnetworkpolicy-createSecret.yaml @@ -0,0 +1,36 @@ +{{- if and .Values.prometheusOperator.networkPolicy.enabled (eq .Values.prometheusOperator.networkPolicy.flavor "cilium") }} +{{- if and .Values.prometheusOperator.enabled .Values.prometheusOperator.admissionWebhooks.enabled .Values.prometheusOperator.admissionWebhooks.patch.enabled (not .Values.prometheusOperator.admissionWebhooks.certManager.enabled) }} +apiVersion: cilium.io/v2 +kind: CiliumNetworkPolicy +metadata: + name: {{ template "kube-prometheus-stack.fullname" . }}-admission-create + namespace: {{ template "kube-prometheus-stack.namespace" . }} + annotations: + helm.sh/hook: pre-install,pre-upgrade + helm.sh/hook-delete-policy: before-hook-creation,hook-succeeded + ## Ensure this is run before the job + helm.sh/hook-weight: "-5" + {{- with .Values.prometheusOperator.admissionWebhooks.annotations }} + {{ toYaml . | nindent 4 }} + {{- end }} + labels: + app: {{ template "kube-prometheus-stack.name" $ }}-admission-create + {{- include "kube-prometheus-stack.prometheus-operator-webhook.labels" $ | nindent 4 }} +spec: + endpointSelector: + matchLabels: + app: {{ template "kube-prometheus-stack.name" $ }}-admission-create + {{- if .Values.prometheusOperator.networkPolicy.matchLabels }} + {{ toYaml .Values.prometheusOperator.networkPolicy.matchLabels | nindent 6 }} + {{- else }} + {{- include "kube-prometheus-stack.prometheus-operator-webhook.labels" $ | nindent 6 }} + {{- end }} + egress: + {{- if and .Values.prometheusOperator.networkPolicy.cilium .Values.prometheusOperator.networkPolicy.cilium.egress }} + {{ toYaml .Values.prometheusOperator.networkPolicy.cilium.egress | nindent 6 }} + {{- else }} + - toEntities: + - kube-apiserver + {{- end }} +{{- end }} +{{- end }} diff --git a/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/templates/prometheus-operator/admission-webhooks/job-patch/ciliumnetworkpolicy-patchWebhook.yaml b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/templates/prometheus-operator/admission-webhooks/job-patch/ciliumnetworkpolicy-patchWebhook.yaml new file mode 100644 index 0000000000..4e3b0d9225 --- /dev/null +++ b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/templates/prometheus-operator/admission-webhooks/job-patch/ciliumnetworkpolicy-patchWebhook.yaml @@ -0,0 +1,36 @@ +{{- if and .Values.prometheusOperator.networkPolicy.enabled (eq .Values.prometheusOperator.networkPolicy.flavor "cilium") }} +{{- if and .Values.prometheusOperator.enabled .Values.prometheusOperator.admissionWebhooks.enabled .Values.prometheusOperator.admissionWebhooks.patch.enabled (not .Values.prometheusOperator.admissionWebhooks.certManager.enabled) }} +apiVersion: cilium.io/v2 +kind: CiliumNetworkPolicy +metadata: + name: {{ template "kube-prometheus-stack.fullname" . }}-admission-patch + namespace: {{ template "kube-prometheus-stack.namespace" . }} + annotations: + helm.sh/hook: post-install,post-upgrade + helm.sh/hook-delete-policy: before-hook-creation,hook-succeeded + ## Ensure this is run before the job + helm.sh/hook-weight: "-5" + {{- with .Values.prometheusOperator.admissionWebhooks.patch.annotations }} + {{ toYaml . | nindent 4 }} + {{- end }} + labels: + app: {{ template "kube-prometheus-stack.name" $ }}-admission-patch + {{- include "kube-prometheus-stack.prometheus-operator-webhook.labels" $ | nindent 4 }} +spec: + endpointSelector: + matchLabels: + app: {{ template "kube-prometheus-stack.name" $ }}-admission-patch + {{- if .Values.prometheusOperator.networkPolicy.matchLabels }} + {{ toYaml .Values.prometheusOperator.networkPolicy.matchLabels | nindent 6 }} + {{- else }} + {{- include "kube-prometheus-stack.prometheus-operator-webhook.labels" $ | nindent 6 }} + {{- end }} + egress: + {{- if and .Values.prometheusOperator.networkPolicy.cilium .Values.prometheusOperator.networkPolicy.cilium.egress }} + {{ toYaml .Values.prometheusOperator.networkPolicy.cilium.egress | nindent 6 }} + {{- else }} + - toEntities: + - kube-apiserver + {{- end }} +{{- end }} +{{- end }} diff --git a/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/templates/prometheus-operator/admission-webhooks/job-patch/clusterrole.yaml b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/templates/prometheus-operator/admission-webhooks/job-patch/clusterrole.yaml new file mode 100644 index 0000000000..b81257c168 --- /dev/null +++ b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/templates/prometheus-operator/admission-webhooks/job-patch/clusterrole.yaml @@ -0,0 +1,33 @@ +{{- if and .Values.prometheusOperator.enabled .Values.prometheusOperator.admissionWebhooks.enabled .Values.prometheusOperator.admissionWebhooks.patch.enabled .Values.global.rbac.create (not .Values.prometheusOperator.admissionWebhooks.certManager.enabled) }} +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRole +metadata: + name: {{ template "kube-prometheus-stack.fullname" . }}-admission + annotations: + "helm.sh/hook": pre-install,pre-upgrade,post-install,post-upgrade + "helm.sh/hook-delete-policy": before-hook-creation,hook-succeeded + labels: + app: {{ template "kube-prometheus-stack.name" $ }}-admission + {{- include "kube-prometheus-stack.prometheus-operator-webhook.labels" $ | nindent 4 }} +rules: + - apiGroups: + - admissionregistration.k8s.io + resources: + - validatingwebhookconfigurations + - mutatingwebhookconfigurations + verbs: + - get + - update +{{- if and (.Capabilities.APIVersions.Has "policy/v1beta1/PodSecurityPolicy") (or .Values.global.cattle.psp.enabled .Values.global.rbac.pspEnabled) }} +{{- $kubeTargetVersion := default .Capabilities.KubeVersion.GitVersion .Values.kubeTargetVersionOverride }} +{{- if semverCompare "> 1.15.0-0" $kubeTargetVersion }} + - apiGroups: ['policy'] +{{- else }} + - apiGroups: ['extensions'] +{{- end }} + resources: ['podsecuritypolicies'] + verbs: ['use'] + resourceNames: + - {{ template "kube-prometheus-stack.fullname" . }}-admission +{{- end }} +{{- end }} diff --git a/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/templates/prometheus-operator/admission-webhooks/job-patch/clusterrolebinding.yaml b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/templates/prometheus-operator/admission-webhooks/job-patch/clusterrolebinding.yaml new file mode 100644 index 0000000000..4cf1335b22 --- /dev/null +++ b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/templates/prometheus-operator/admission-webhooks/job-patch/clusterrolebinding.yaml @@ -0,0 +1,20 @@ +{{- if and .Values.prometheusOperator.enabled .Values.prometheusOperator.admissionWebhooks.enabled .Values.prometheusOperator.admissionWebhooks.patch.enabled .Values.global.rbac.create (not .Values.prometheusOperator.admissionWebhooks.certManager.enabled) }} +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRoleBinding +metadata: + name: {{ template "kube-prometheus-stack.fullname" . }}-admission + annotations: + "helm.sh/hook": pre-install,pre-upgrade,post-install,post-upgrade + "helm.sh/hook-delete-policy": before-hook-creation,hook-succeeded + labels: + app: {{ template "kube-prometheus-stack.name" $ }}-admission + {{- include "kube-prometheus-stack.prometheus-operator-webhook.labels" $ | nindent 4 }} +roleRef: + apiGroup: rbac.authorization.k8s.io + kind: ClusterRole + name: {{ template "kube-prometheus-stack.fullname" . }}-admission +subjects: + - kind: ServiceAccount + name: {{ template "kube-prometheus-stack.fullname" . }}-admission + namespace: {{ template "kube-prometheus-stack.namespace" . }} +{{- end }} diff --git a/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/templates/prometheus-operator/admission-webhooks/job-patch/job-createSecret.yaml b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/templates/prometheus-operator/admission-webhooks/job-patch/job-createSecret.yaml new file mode 100644 index 0000000000..baed83db48 --- /dev/null +++ b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/templates/prometheus-operator/admission-webhooks/job-patch/job-createSecret.yaml @@ -0,0 +1,73 @@ +{{- if and .Values.prometheusOperator.enabled .Values.prometheusOperator.admissionWebhooks.enabled .Values.prometheusOperator.admissionWebhooks.patch.enabled (not .Values.prometheusOperator.admissionWebhooks.certManager.enabled) }} +apiVersion: batch/v1 +kind: Job +metadata: + name: {{ template "kube-prometheus-stack.fullname" . }}-admission-create + namespace: {{ template "kube-prometheus-stack.namespace" . }} + annotations: + "helm.sh/hook": pre-install,pre-upgrade + "helm.sh/hook-delete-policy": before-hook-creation,hook-succeeded +{{- with .Values.prometheusOperator.admissionWebhooks.annotations }} +{{ toYaml . | indent 4 }} +{{- end }} + labels: + app: {{ template "kube-prometheus-stack.name" $ }}-admission-create + {{- include "kube-prometheus-stack.prometheus-operator-webhook.labels" $ | nindent 4 }} +spec: + {{- if .Capabilities.APIVersions.Has "batch/v1alpha1" }} + # Alpha feature since k8s 1.12 + ttlSecondsAfterFinished: 0 + {{- end }} + template: + metadata: + name: {{ template "kube-prometheus-stack.fullname" . }}-admission-create +{{- with .Values.prometheusOperator.admissionWebhooks.patch.podAnnotations }} + annotations: +{{ toYaml . | indent 8 }} +{{- end }} + labels: + app: {{ template "kube-prometheus-stack.name" $ }}-admission-create + {{- include "kube-prometheus-stack.prometheus-operator-webhook.labels" $ | nindent 8 }} + spec: + {{- if .Values.prometheusOperator.admissionWebhooks.patch.priorityClassName }} + priorityClassName: {{ .Values.prometheusOperator.admissionWebhooks.patch.priorityClassName }} + {{- end }} + containers: + - name: create + {{- $registry := include "monitoring_registry" . | default .Values.prometheusOperator.admissionWebhooks.patch.image.registry -}} + {{- if .Values.prometheusOperator.admissionWebhooks.patch.image.sha }} + image: {{ $registry }}/{{ .Values.prometheusOperator.admissionWebhooks.patch.image.repository }}:{{ .Values.prometheusOperator.admissionWebhooks.patch.image.tag }}@sha256:{{ .Values.prometheusOperator.admissionWebhooks.patch.image.sha }} + {{- else }} + image: {{ $registry }}/{{ .Values.prometheusOperator.admissionWebhooks.patch.image.repository }}:{{ .Values.prometheusOperator.admissionWebhooks.patch.image.tag }} + {{- end }} + imagePullPolicy: {{ .Values.prometheusOperator.admissionWebhooks.patch.image.pullPolicy }} + args: + - create + - --host={{- include "kube-prometheus-stack.operator.admission-webhook.dnsNames" . | replace "\n" "," }} + - --namespace={{ template "kube-prometheus-stack.namespace" . }} + - --secret-name={{ template "kube-prometheus-stack.fullname" . }}-admission + {{- with .Values.prometheusOperator.admissionWebhooks.createSecretJob }} + securityContext: + {{ toYaml .securityContext | nindent 12 }} + {{- end }} + resources: +{{ toYaml .Values.prometheusOperator.admissionWebhooks.patch.resources | indent 12 }} + restartPolicy: OnFailure + serviceAccountName: {{ template "kube-prometheus-stack.fullname" . }}-admission + nodeSelector: {{ include "linux-node-selector" . | nindent 8 }} +{{- with .Values.prometheusOperator.admissionWebhooks.patch.nodeSelector }} +{{ toYaml . | indent 8 }} +{{- end }} + {{- with .Values.prometheusOperator.admissionWebhooks.patch.affinity }} + affinity: +{{ toYaml . | indent 8 }} + {{- end }} + tolerations: {{ include "linux-node-tolerations" . | nindent 8 }} +{{- with .Values.prometheusOperator.admissionWebhooks.patch.tolerations }} +{{ toYaml . | indent 8 }} + {{- end }} +{{- if .Values.prometheusOperator.admissionWebhooks.patch.securityContext }} + securityContext: +{{ toYaml .Values.prometheusOperator.admissionWebhooks.patch.securityContext | indent 8 }} +{{- end }} +{{- end }} diff --git a/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/templates/prometheus-operator/admission-webhooks/job-patch/job-patchWebhook.yaml b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/templates/prometheus-operator/admission-webhooks/job-patch/job-patchWebhook.yaml new file mode 100644 index 0000000000..5639cc9e80 --- /dev/null +++ b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/templates/prometheus-operator/admission-webhooks/job-patch/job-patchWebhook.yaml @@ -0,0 +1,74 @@ +{{- if and .Values.prometheusOperator.enabled .Values.prometheusOperator.admissionWebhooks.enabled .Values.prometheusOperator.admissionWebhooks.patch.enabled (not .Values.prometheusOperator.admissionWebhooks.certManager.enabled) }} +apiVersion: batch/v1 +kind: Job +metadata: + name: {{ template "kube-prometheus-stack.fullname" . }}-admission-patch + namespace: {{ template "kube-prometheus-stack.namespace" . }} + annotations: + "helm.sh/hook": post-install,post-upgrade + "helm.sh/hook-delete-policy": before-hook-creation,hook-succeeded +{{- with .Values.prometheusOperator.admissionWebhooks.patch.annotations }} +{{ toYaml . | indent 4 }} +{{- end }} + labels: + app: {{ template "kube-prometheus-stack.name" $ }}-admission-patch + {{- include "kube-prometheus-stack.prometheus-operator-webhook.labels" $ | nindent 4 }} +spec: + {{- if .Capabilities.APIVersions.Has "batch/v1alpha1" }} + # Alpha feature since k8s 1.12 + ttlSecondsAfterFinished: 0 + {{- end }} + template: + metadata: + name: {{ template "kube-prometheus-stack.fullname" . }}-admission-patch +{{- with .Values.prometheusOperator.admissionWebhooks.patch.podAnnotations }} + annotations: +{{ toYaml . | indent 8 }} +{{- end }} + labels: + app: {{ template "kube-prometheus-stack.name" $ }}-admission-patch + {{- include "kube-prometheus-stack.prometheus-operator-webhook.labels" $ | nindent 8 }} + spec: + {{- if .Values.prometheusOperator.admissionWebhooks.patch.priorityClassName }} + priorityClassName: {{ .Values.prometheusOperator.admissionWebhooks.patch.priorityClassName }} + {{- end }} + containers: + - name: patch + {{- $registry := include "monitoring_registry" . | default .Values.prometheusOperator.admissionWebhooks.patch.image.registry -}} + {{- if .Values.prometheusOperator.admissionWebhooks.patch.image.sha }} + image: {{ $registry }}/{{ .Values.prometheusOperator.admissionWebhooks.patch.image.repository }}:{{ .Values.prometheusOperator.admissionWebhooks.patch.image.tag }}@sha256:{{ .Values.prometheusOperator.admissionWebhooks.patch.image.sha }} + {{- else }} + image: {{ $registry }}/{{ .Values.prometheusOperator.admissionWebhooks.patch.image.repository }}:{{ .Values.prometheusOperator.admissionWebhooks.patch.image.tag }} + {{- end }} + imagePullPolicy: {{ .Values.prometheusOperator.admissionWebhooks.patch.image.pullPolicy }} + args: + - patch + - --webhook-name={{ template "kube-prometheus-stack.fullname" . }}-admission + - --namespace={{ template "kube-prometheus-stack.namespace" . }} + - --secret-name={{ template "kube-prometheus-stack.fullname" . }}-admission + - --patch-failure-policy={{ .Values.prometheusOperator.admissionWebhooks.failurePolicy }} + {{- with .Values.prometheusOperator.admissionWebhooks.patchWebhookJob }} + securityContext: + {{ toYaml .securityContext | nindent 12 }} + {{- end }} + resources: +{{ toYaml .Values.prometheusOperator.admissionWebhooks.patch.resources | indent 12 }} + restartPolicy: OnFailure + serviceAccountName: {{ template "kube-prometheus-stack.fullname" . }}-admission + nodeSelector: {{ include "linux-node-selector" . | nindent 8 }} +{{- with .Values.prometheusOperator.admissionWebhooks.patch.nodeSelector }} +{{ toYaml . | indent 8 }} +{{- end }} + {{- with .Values.prometheusOperator.admissionWebhooks.patch.affinity }} + affinity: +{{ toYaml . | indent 8 }} + {{- end }} + tolerations: {{ include "linux-node-tolerations" . | nindent 8 }} +{{- with .Values.prometheusOperator.admissionWebhooks.patch.tolerations }} +{{ toYaml . | indent 8 }} + {{- end }} +{{- if .Values.prometheusOperator.admissionWebhooks.patch.securityContext }} + securityContext: +{{ toYaml .Values.prometheusOperator.admissionWebhooks.patch.securityContext | indent 8 }} +{{- end }} +{{- end }} diff --git a/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/templates/prometheus-operator/admission-webhooks/job-patch/networkpolicy-createSecret.yaml b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/templates/prometheus-operator/admission-webhooks/job-patch/networkpolicy-createSecret.yaml new file mode 100644 index 0000000000..864deb52a0 --- /dev/null +++ b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/templates/prometheus-operator/admission-webhooks/job-patch/networkpolicy-createSecret.yaml @@ -0,0 +1,33 @@ +{{- if and .Values.prometheusOperator.networkPolicy.enabled (eq .Values.prometheusOperator.networkPolicy.flavor "kubernetes") }} +{{- if and .Values.prometheusOperator.enabled .Values.prometheusOperator.admissionWebhooks.enabled .Values.prometheusOperator.admissionWebhooks.patch.enabled (not .Values.prometheusOperator.admissionWebhooks.certManager.enabled) }} +apiVersion: networking.k8s.io/v1 +kind: NetworkPolicy +metadata: + name: {{ template "kube-prometheus-stack.fullname" . }}-admission-create + namespace: {{ template "kube-prometheus-stack.namespace" . }} + annotations: + "helm.sh/hook": pre-install,pre-upgrade + "helm.sh/hook-delete-policy": before-hook-creation,hook-succeeded + ## Ensure this is run before the job + "helm.sh/hook-weight": "-5" + {{- with .Values.prometheusOperator.admissionWebhooks.annotations }} + {{ toYaml . | nindent 4 }} + {{- end }} + labels: + app: {{ template "kube-prometheus-stack.name" $ }}-admission-create + {{- include "kube-prometheus-stack.prometheus-operator-webhook.labels" $ | nindent 4 }} +spec: + podSelector: + matchLabels: + app: {{ template "kube-prometheus-stack.name" $ }}-admission-create + {{- if .Values.prometheusOperator.networkPolicy.matchLabels }} + {{ toYaml .Values.prometheusOperator.networkPolicy.matchLabels | nindent 6 }} + {{- else }} + {{- include "kube-prometheus-stack.prometheus-operator-webhook.labels" $ | nindent 6 }} + {{- end }} + egress: + - {} + policyTypes: + - Egress +{{- end }} +{{- end }} diff --git a/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/templates/prometheus-operator/admission-webhooks/job-patch/networkpolicy-patchWebhook.yaml b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/templates/prometheus-operator/admission-webhooks/job-patch/networkpolicy-patchWebhook.yaml new file mode 100644 index 0000000000..076c467004 --- /dev/null +++ b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/templates/prometheus-operator/admission-webhooks/job-patch/networkpolicy-patchWebhook.yaml @@ -0,0 +1,33 @@ +{{- if and .Values.prometheusOperator.networkPolicy.enabled (eq .Values.prometheusOperator.networkPolicy.flavor "kubernetes") }} +{{- if and .Values.prometheusOperator.enabled .Values.prometheusOperator.admissionWebhooks.enabled .Values.prometheusOperator.admissionWebhooks.patch.enabled (not .Values.prometheusOperator.admissionWebhooks.certManager.enabled) }} +apiVersion: networking.k8s.io/v1 +kind: NetworkPolicy +metadata: + name: {{ template "kube-prometheus-stack.fullname" . }}-admission-patch + namespace: {{ template "kube-prometheus-stack.namespace" . }} + annotations: + "helm.sh/hook": post-install,post-upgrade + "helm.sh/hook-delete-policy": before-hook-creation,hook-succeeded + ## Ensure this is run before the job + "helm.sh/hook-weight": "-5" + {{- with .Values.prometheusOperator.admissionWebhooks.patch.annotations }} + {{ toYaml . | nindent 4 }} + {{- end }} + labels: + app: {{ template "kube-prometheus-stack.name" $ }}-admission-patch + {{- include "kube-prometheus-stack.prometheus-operator-webhook.labels" $ | nindent 4 }} +spec: + podSelector: + matchLabels: + app: {{ template "kube-prometheus-stack.name" $ }}-admission-patch + {{- if .Values.prometheusOperator.networkPolicy.matchLabels }} + {{ toYaml .Values.prometheusOperator.networkPolicy.matchLabels | nindent 6 }} + {{- else }} + {{- include "kube-prometheus-stack.prometheus-operator-webhook.labels" $ | nindent 6 }} + {{- end }} + egress: + - {} + policyTypes: + - Egress +{{- end }} +{{- end }} diff --git a/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/templates/prometheus-operator/admission-webhooks/job-patch/psp.yaml b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/templates/prometheus-operator/admission-webhooks/job-patch/psp.yaml new file mode 100644 index 0000000000..0113b6a5d8 --- /dev/null +++ b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/templates/prometheus-operator/admission-webhooks/job-patch/psp.yaml @@ -0,0 +1,47 @@ +{{- if and (.Capabilities.APIVersions.Has "policy/v1beta1/PodSecurityPolicy") .Values.prometheusOperator.enabled .Values.prometheusOperator.admissionWebhooks.enabled .Values.prometheusOperator.admissionWebhooks.patch.enabled (or .Values.global.cattle.psp.enabled (and .Values.global.rbac.create .Values.global.rbac.pspEnabled)) (not .Values.prometheusOperator.admissionWebhooks.certManager.enabled) }} +apiVersion: policy/v1beta1 +kind: PodSecurityPolicy +metadata: + name: {{ template "kube-prometheus-stack.fullname" . }}-admission + annotations: + "helm.sh/hook": pre-install,pre-upgrade,post-install,post-upgrade + "helm.sh/hook-delete-policy": before-hook-creation,hook-succeeded +{{- if .Values.global.rbac.pspAnnotations }} +{{ toYaml .Values.global.rbac.pspAnnotations | indent 4 }} +{{- end }} + labels: + app: {{ template "kube-prometheus-stack.name" . }}-admission + {{- include "kube-prometheus-stack.prometheus-operator-webhook.labels" . | nindent 4 }} +spec: + privileged: false + # Allow core volume types. + volumes: + - 'configMap' + - 'emptyDir' + - 'projected' + - 'secret' + - 'downwardAPI' + - 'persistentVolumeClaim' + hostNetwork: false + hostIPC: false + hostPID: false + runAsUser: + # Permits the container to run with root privileges as well. + rule: 'RunAsAny' + seLinux: + # This policy assumes the nodes are using AppArmor rather than SELinux. + rule: 'RunAsAny' + supplementalGroups: + rule: 'MustRunAs' + ranges: + # Allow adding the root group. + - min: 0 + max: 65535 + fsGroup: + rule: 'MustRunAs' + ranges: + # Allow adding the root group. + - min: 0 + max: 65535 + readOnlyRootFilesystem: false +{{- end }} diff --git a/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/templates/prometheus-operator/admission-webhooks/job-patch/role.yaml b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/templates/prometheus-operator/admission-webhooks/job-patch/role.yaml new file mode 100644 index 0000000000..f15abf4395 --- /dev/null +++ b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/templates/prometheus-operator/admission-webhooks/job-patch/role.yaml @@ -0,0 +1,21 @@ +{{- if and .Values.prometheusOperator.enabled .Values.prometheusOperator.admissionWebhooks.enabled .Values.prometheusOperator.admissionWebhooks.patch.enabled .Values.global.rbac.create (not .Values.prometheusOperator.admissionWebhooks.certManager.enabled) }} +apiVersion: rbac.authorization.k8s.io/v1 +kind: Role +metadata: + name: {{ template "kube-prometheus-stack.fullname" . }}-admission + namespace: {{ template "kube-prometheus-stack.namespace" . }} + annotations: + "helm.sh/hook": pre-install,pre-upgrade,post-install,post-upgrade + "helm.sh/hook-delete-policy": before-hook-creation,hook-succeeded + labels: + app: {{ template "kube-prometheus-stack.name" $ }}-admission + {{- include "kube-prometheus-stack.prometheus-operator-webhook.labels" $ | nindent 4 }} +rules: + - apiGroups: + - "" + resources: + - secrets + verbs: + - get + - create +{{- end }} diff --git a/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/templates/prometheus-operator/admission-webhooks/job-patch/rolebinding.yaml b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/templates/prometheus-operator/admission-webhooks/job-patch/rolebinding.yaml new file mode 100644 index 0000000000..30bde920b6 --- /dev/null +++ b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/templates/prometheus-operator/admission-webhooks/job-patch/rolebinding.yaml @@ -0,0 +1,21 @@ +{{- if and .Values.prometheusOperator.enabled .Values.prometheusOperator.admissionWebhooks.enabled .Values.prometheusOperator.admissionWebhooks.patch.enabled .Values.global.rbac.create (not .Values.prometheusOperator.admissionWebhooks.certManager.enabled) }} +apiVersion: rbac.authorization.k8s.io/v1 +kind: RoleBinding +metadata: + name: {{ template "kube-prometheus-stack.fullname" . }}-admission + namespace: {{ template "kube-prometheus-stack.namespace" . }} + annotations: + "helm.sh/hook": pre-install,pre-upgrade,post-install,post-upgrade + "helm.sh/hook-delete-policy": before-hook-creation,hook-succeeded + labels: + app: {{ template "kube-prometheus-stack.name" $ }}-admission + {{- include "kube-prometheus-stack.prometheus-operator-webhook.labels" $ | nindent 4 }} +roleRef: + apiGroup: rbac.authorization.k8s.io + kind: Role + name: {{ template "kube-prometheus-stack.fullname" . }}-admission +subjects: + - kind: ServiceAccount + name: {{ template "kube-prometheus-stack.fullname" . }}-admission + namespace: {{ template "kube-prometheus-stack.namespace" . }} +{{- end }} diff --git a/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/templates/prometheus-operator/admission-webhooks/job-patch/serviceaccount.yaml b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/templates/prometheus-operator/admission-webhooks/job-patch/serviceaccount.yaml new file mode 100644 index 0000000000..02594547d1 --- /dev/null +++ b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/templates/prometheus-operator/admission-webhooks/job-patch/serviceaccount.yaml @@ -0,0 +1,17 @@ +{{- if and .Values.prometheusOperator.enabled .Values.prometheusOperator.admissionWebhooks.enabled .Values.prometheusOperator.admissionWebhooks.patch.enabled .Values.global.rbac.create (not .Values.prometheusOperator.admissionWebhooks.certManager.enabled) }} +apiVersion: v1 +kind: ServiceAccount +metadata: + name: {{ template "kube-prometheus-stack.fullname" . }}-admission + namespace: {{ template "kube-prometheus-stack.namespace" . }} + annotations: + "helm.sh/hook": pre-install,pre-upgrade,post-install,post-upgrade + "helm.sh/hook-delete-policy": before-hook-creation,hook-succeeded + labels: + app: {{ template "kube-prometheus-stack.name" $ }}-admission + {{- include "kube-prometheus-stack.prometheus-operator-webhook.labels" $ | nindent 4 }} +{{- if .Values.global.imagePullSecrets }} +imagePullSecrets: +{{ include "kube-prometheus-stack.imagePullSecrets" . | trim | indent 2 }} +{{- end }} +{{- end }} diff --git a/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/templates/prometheus-operator/admission-webhooks/mutatingWebhookConfiguration.yaml b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/templates/prometheus-operator/admission-webhooks/mutatingWebhookConfiguration.yaml new file mode 100644 index 0000000000..da01f3b57e --- /dev/null +++ b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/templates/prometheus-operator/admission-webhooks/mutatingWebhookConfiguration.yaml @@ -0,0 +1,77 @@ +{{- if and .Values.prometheusOperator.enabled .Values.prometheusOperator.admissionWebhooks.enabled }} +apiVersion: admissionregistration.k8s.io/v1 +kind: MutatingWebhookConfiguration +metadata: + name: {{ template "kube-prometheus-stack.fullname" . }}-admission +{{- if .Values.prometheusOperator.admissionWebhooks.certManager.enabled }} + annotations: + certmanager.k8s.io/inject-ca-from: {{ printf "%s/%s-admission" (include "kube-prometheus-stack.namespace" .) (include "kube-prometheus-stack.fullname" .) | quote }} + cert-manager.io/inject-ca-from: {{ printf "%s/%s-admission" (include "kube-prometheus-stack.namespace" .) (include "kube-prometheus-stack.fullname" .) | quote }} +{{- end }} + labels: + app: {{ template "kube-prometheus-stack.name" $ }}-admission + {{- include "kube-prometheus-stack.prometheus-operator-webhook.labels" $ | nindent 4 }} +webhooks: + - name: prometheusrulemutate.monitoring.coreos.com + {{- if eq .Values.prometheusOperator.admissionWebhooks.failurePolicy "IgnoreOnInstallOnly" }} + failurePolicy: {{ .Release.IsInstall | ternary "Ignore" "Fail" }} + {{- else if .Values.prometheusOperator.admissionWebhooks.failurePolicy }} + failurePolicy: {{ .Values.prometheusOperator.admissionWebhooks.failurePolicy }} + {{- else if .Values.prometheusOperator.admissionWebhooks.patch.enabled }} + failurePolicy: Ignore + {{- else }} + failurePolicy: Fail + {{- end }} + rules: + - apiGroups: + - monitoring.coreos.com + apiVersions: + - "*" + resources: + - prometheusrules + operations: + - CREATE + - UPDATE + clientConfig: + service: + namespace: {{ template "kube-prometheus-stack.namespace" . }} + name: {{ template "kube-prometheus-stack.operator.fullname" $ }}{{ if .Values.prometheusOperator.admissionWebhooks.deployment.enabled }}-webhook{{ end }} + path: /admission-prometheusrules/mutate + {{- if and .Values.prometheusOperator.admissionWebhooks.caBundle (not .Values.prometheusOperator.admissionWebhooks.patch.enabled) (not .Values.prometheusOperator.admissionWebhooks.certManager.enabled) }} + caBundle: {{ .Values.prometheusOperator.admissionWebhooks.caBundle }} + {{- end }} + timeoutSeconds: {{ .Values.prometheusOperator.admissionWebhooks.timeoutSeconds }} + admissionReviewVersions: ["v1", "v1beta1"] + sideEffects: None + {{- if or .Values.prometheusOperator.denyNamespaces .Values.prometheusOperator.namespaces .Values.prometheusOperator.admissionWebhooks.namespaceSelector }} + namespaceSelector: + {{- with (omit .Values.prometheusOperator.admissionWebhooks.namespaceSelector "matchExpressions") }} + {{- toYaml . | nindent 6 }} + {{- end }} + {{- if or .Values.prometheusOperator.denyNamespaces .Values.prometheusOperator.namespaces .Values.prometheusOperator.admissionWebhooks.namespaceSelector.matchExpressions }} + matchExpressions: + {{- with (.Values.prometheusOperator.admissionWebhooks.namespaceSelector.matchExpressions) }} + {{- toYaml . | nindent 6 }} + {{- end }} + {{- if .Values.prometheusOperator.denyNamespaces }} + - key: kubernetes.io/metadata.name + operator: NotIn + values: + {{- range $namespace := mustUniq .Values.prometheusOperator.denyNamespaces }} + - {{ $namespace }} + {{- end }} + {{- else if and .Values.prometheusOperator.namespaces .Values.prometheusOperator.namespaces.additional }} + - key: kubernetes.io/metadata.name + operator: In + values: + {{- if and .Values.prometheusOperator.namespaces.releaseNamespace (default .Values.prometheusOperator.namespaces.releaseNamespace true) }} + {{- $namespace := printf "%s" (include "kube-prometheus-stack.namespace" .) }} + - {{ $namespace }} + {{- end }} + {{- range $namespace := mustUniq .Values.prometheusOperator.namespaces.additional }} + - {{ $namespace }} + {{- end }} + {{- end }} + {{- end }} + {{- end }} +{{- end }} diff --git a/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/templates/prometheus-operator/admission-webhooks/validatingWebhookConfiguration.yaml b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/templates/prometheus-operator/admission-webhooks/validatingWebhookConfiguration.yaml new file mode 100644 index 0000000000..4827871cca --- /dev/null +++ b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/templates/prometheus-operator/admission-webhooks/validatingWebhookConfiguration.yaml @@ -0,0 +1,77 @@ +{{- if and .Values.prometheusOperator.enabled .Values.prometheusOperator.admissionWebhooks.enabled }} +apiVersion: admissionregistration.k8s.io/v1 +kind: ValidatingWebhookConfiguration +metadata: + name: {{ template "kube-prometheus-stack.fullname" . }}-admission +{{- if .Values.prometheusOperator.admissionWebhooks.certManager.enabled }} + annotations: + certmanager.k8s.io/inject-ca-from: {{ printf "%s/%s-admission" (include "kube-prometheus-stack.namespace" .) (include "kube-prometheus-stack.fullname" .) | quote }} + cert-manager.io/inject-ca-from: {{ printf "%s/%s-admission" (include "kube-prometheus-stack.namespace" .) (include "kube-prometheus-stack.fullname" .) | quote }} +{{- end }} + labels: + app: {{ template "kube-prometheus-stack.name" $ }}-admission + {{- include "kube-prometheus-stack.prometheus-operator-webhook.labels" $ | nindent 4 }} +webhooks: + - name: prometheusrulemutate.monitoring.coreos.com + {{- if eq .Values.prometheusOperator.admissionWebhooks.failurePolicy "IgnoreOnInstallOnly" }} + failurePolicy: {{ .Release.IsInstall | ternary "Ignore" "Fail" }} + {{- else if .Values.prometheusOperator.admissionWebhooks.failurePolicy }} + failurePolicy: {{ .Values.prometheusOperator.admissionWebhooks.failurePolicy }} + {{- else if .Values.prometheusOperator.admissionWebhooks.patch.enabled }} + failurePolicy: Ignore + {{- else }} + failurePolicy: Fail + {{- end }} + rules: + - apiGroups: + - monitoring.coreos.com + apiVersions: + - "*" + resources: + - prometheusrules + operations: + - CREATE + - UPDATE + clientConfig: + service: + namespace: {{ template "kube-prometheus-stack.namespace" . }} + name: {{ template "kube-prometheus-stack.operator.fullname" $ }}{{ if .Values.prometheusOperator.admissionWebhooks.deployment.enabled }}-webhook{{ end }} + path: /admission-prometheusrules/validate + {{- if and .Values.prometheusOperator.admissionWebhooks.caBundle (not .Values.prometheusOperator.admissionWebhooks.patch.enabled) (not .Values.prometheusOperator.admissionWebhooks.certManager.enabled) }} + caBundle: {{ .Values.prometheusOperator.admissionWebhooks.caBundle }} + {{- end }} + timeoutSeconds: {{ .Values.prometheusOperator.admissionWebhooks.timeoutSeconds }} + admissionReviewVersions: ["v1", "v1beta1"] + sideEffects: None + {{- if or .Values.prometheusOperator.denyNamespaces .Values.prometheusOperator.namespaces .Values.prometheusOperator.admissionWebhooks.namespaceSelector }} + namespaceSelector: + {{- with (omit .Values.prometheusOperator.admissionWebhooks.namespaceSelector "matchExpressions") }} + {{- toYaml . | nindent 6 }} + {{- end }} + {{- if or .Values.prometheusOperator.denyNamespaces .Values.prometheusOperator.namespaces .Values.prometheusOperator.admissionWebhooks.namespaceSelector.matchExpressions }} + matchExpressions: + {{- with (.Values.prometheusOperator.admissionWebhooks.namespaceSelector.matchExpressions) }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- if .Values.prometheusOperator.denyNamespaces }} + - key: kubernetes.io/metadata.name + operator: NotIn + values: + {{- range $namespace := mustUniq .Values.prometheusOperator.denyNamespaces }} + - {{ $namespace }} + {{- end }} + {{- else if and .Values.prometheusOperator.namespaces .Values.prometheusOperator.namespaces.additional }} + - key: kubernetes.io/metadata.name + operator: In + values: + {{- if and .Values.prometheusOperator.namespaces.releaseNamespace (default .Values.prometheusOperator.namespaces.releaseNamespace true) }} + {{- $namespace := printf "%s" (include "kube-prometheus-stack.namespace" .) }} + - {{ $namespace }} + {{- end }} + {{- range $namespace := mustUniq .Values.prometheusOperator.namespaces.additional }} + - {{ $namespace }} + {{- end }} + {{- end }} + {{- end }} + {{- end }} +{{- end }} diff --git a/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/templates/prometheus-operator/certmanager.yaml b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/templates/prometheus-operator/certmanager.yaml new file mode 100644 index 0000000000..cb27e49f48 --- /dev/null +++ b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/templates/prometheus-operator/certmanager.yaml @@ -0,0 +1,55 @@ +{{- if .Values.prometheusOperator.admissionWebhooks.certManager.enabled -}} +{{- if not .Values.prometheusOperator.admissionWebhooks.certManager.issuerRef -}} +# Create a selfsigned Issuer, in order to create a root CA certificate for +# signing webhook serving certificates +apiVersion: cert-manager.io/v1 +kind: Issuer +metadata: + name: {{ template "kube-prometheus-stack.fullname" . }}-self-signed-issuer + namespace: {{ template "kube-prometheus-stack.namespace" . }} +spec: + selfSigned: {} +--- +# Generate a CA Certificate used to sign certificates for the webhook +apiVersion: cert-manager.io/v1 +kind: Certificate +metadata: + name: {{ template "kube-prometheus-stack.fullname" . }}-root-cert + namespace: {{ template "kube-prometheus-stack.namespace" . }} +spec: + secretName: {{ template "kube-prometheus-stack.fullname" . }}-root-cert + duration: {{ .Values.prometheusOperator.admissionWebhooks.certManager.rootCert.duration | default "43800h0m0s" | quote }} + issuerRef: + name: {{ template "kube-prometheus-stack.fullname" . }}-self-signed-issuer + commonName: "ca.webhook.kube-prometheus-stack" + isCA: true +--- +# Create an Issuer that uses the above generated CA certificate to issue certs +apiVersion: cert-manager.io/v1 +kind: Issuer +metadata: + name: {{ template "kube-prometheus-stack.fullname" . }}-root-issuer + namespace: {{ template "kube-prometheus-stack.namespace" . }} +spec: + ca: + secretName: {{ template "kube-prometheus-stack.fullname" . }}-root-cert +{{- end }} +--- +# generate a server certificate for the apiservices to use +apiVersion: cert-manager.io/v1 +kind: Certificate +metadata: + name: {{ template "kube-prometheus-stack.fullname" . }}-admission + namespace: {{ template "kube-prometheus-stack.namespace" . }} +spec: + secretName: {{ template "kube-prometheus-stack.fullname" . }}-admission + duration: {{ .Values.prometheusOperator.admissionWebhooks.certManager.admissionCert.duration | default "8760h0m0s" | quote }} + issuerRef: + {{- if .Values.prometheusOperator.admissionWebhooks.certManager.issuerRef }} + {{- toYaml .Values.prometheusOperator.admissionWebhooks.certManager.issuerRef | nindent 4 }} + {{- else }} + name: {{ template "kube-prometheus-stack.fullname" . }}-root-issuer + {{- end }} + dnsNames: + {{- include "kube-prometheus-stack.operator.admission-webhook.dnsNames" . | splitList "\n" | toYaml | nindent 4 }} +{{- end -}} diff --git a/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/templates/prometheus-operator/ciliumnetworkpolicy.yaml b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/templates/prometheus-operator/ciliumnetworkpolicy.yaml new file mode 100644 index 0000000000..07e2e99967 --- /dev/null +++ b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/templates/prometheus-operator/ciliumnetworkpolicy.yaml @@ -0,0 +1,40 @@ +{{- if and .Values.prometheusOperator.networkPolicy.enabled (eq .Values.prometheusOperator.networkPolicy.flavor "cilium") }} +apiVersion: cilium.io/v2 +kind: CiliumNetworkPolicy +metadata: + name: {{ template "kube-prometheus-stack.operator.fullname" . }} + namespace: {{ template "kube-prometheus-stack.namespace" . }} + labels: + {{- include "kube-prometheus-stack.prometheus-operator.labels" . | nindent 4 }} +spec: + endpointSelector: + matchLabels: + {{- if .Values.prometheusOperator.networkPolicy.matchLabels }} + app: {{ template "kube-prometheus-stack.name" . }}-operator + {{ toYaml .Values.prometheusOperator.networkPolicy.matchLabels | nindent 6 }} + {{- else }} + {{- include "kube-prometheus-stack.prometheus-operator.labels" $ | nindent 6 }} + {{- end }} + egress: + {{- if and .Values.prometheusOperator.networkPolicy.cilium .Values.prometheusOperator.networkPolicy.cilium.egress }} + {{ toYaml .Values.prometheusOperator.networkPolicy.cilium.egress | nindent 6 }} + {{- else }} + - toEntities: + - kube-apiserver + {{- end }} + ingress: + - toPorts: + - ports: + {{- if .Values.prometheusOperator.tls.enabled }} + - port: {{ .Values.prometheusOperator.tls.internalPort | quote }} + {{- else }} + - port: "8080" + {{- end }} + protocol: "TCP" + {{- if not .Values.prometheusOperator.tls.enabled }} + rules: + http: + - method: "GET" + path: "/metrics" + {{- end }} +{{- end }} diff --git a/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/templates/prometheus-operator/clusterrole.yaml b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/templates/prometheus-operator/clusterrole.yaml new file mode 100644 index 0000000000..fd11b69eed --- /dev/null +++ b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/templates/prometheus-operator/clusterrole.yaml @@ -0,0 +1,109 @@ +{{- if and .Values.prometheusOperator.enabled .Values.global.rbac.create }} +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRole +metadata: + name: {{ template "kube-prometheus-stack.operator.fullname" . }} + labels: + {{- include "kube-prometheus-stack.prometheus-operator.labels" . | nindent 4 }} +rules: +- apiGroups: + - monitoring.coreos.com + resources: + - alertmanagers + - alertmanagers/finalizers + - alertmanagers/status + - alertmanagerconfigs + - prometheuses + - prometheuses/finalizers + - prometheuses/status + - prometheusagents + - prometheusagents/finalizers + - prometheusagents/status + - thanosrulers + - thanosrulers/finalizers + - thanosrulers/status + - scrapeconfigs + - servicemonitors + - podmonitors + - probes + - prometheusrules + verbs: + - '*' +- apiGroups: + - apps + resources: + - statefulsets + verbs: + - '*' +- apiGroups: + - "" + resources: + - configmaps + - secrets + verbs: + - '*' +- apiGroups: + - "" + resources: + - pods + verbs: + - list + - delete +- apiGroups: + - "" + resources: + - services + - services/finalizers + - endpoints + verbs: + - get + - create + - update + - delete +- apiGroups: + - "" + resources: + - nodes + verbs: + - list + - watch +- apiGroups: + - "" + resources: + - namespaces + verbs: + - get + - list + - watch +- apiGroups: + - "" + resources: + - events + verbs: + - patch + - create +- apiGroups: + - networking.k8s.io + resources: + - ingresses + verbs: + - get + - list + - watch +- apiGroups: + - storage.k8s.io + resources: + - storageclasses + verbs: + - get +{{- if .Capabilities.APIVersions.Has "discovery.k8s.io/v1/EndpointSlice" }} +- apiGroups: + - discovery.k8s.io + resources: + - endpointslices + verbs: + - get + - list + - watch +{{- end }} +{{- end }} diff --git a/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/templates/prometheus-operator/clusterrolebinding.yaml b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/templates/prometheus-operator/clusterrolebinding.yaml new file mode 100644 index 0000000000..ad9e3ef6c5 --- /dev/null +++ b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/templates/prometheus-operator/clusterrolebinding.yaml @@ -0,0 +1,16 @@ +{{- if and .Values.prometheusOperator.enabled .Values.global.rbac.create }} +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRoleBinding +metadata: + name: {{ template "kube-prometheus-stack.operator.fullname" . }} + labels: + {{- include "kube-prometheus-stack.prometheus-operator.labels" . | nindent 4 }} +roleRef: + apiGroup: rbac.authorization.k8s.io + kind: ClusterRole + name: {{ template "kube-prometheus-stack.operator.fullname" . }} +subjects: +- kind: ServiceAccount + name: {{ template "kube-prometheus-stack.operator.serviceAccountName" . }} + namespace: {{ template "kube-prometheus-stack.namespace" . }} +{{- end }} diff --git a/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/templates/prometheus-operator/deployment.yaml b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/templates/prometheus-operator/deployment.yaml new file mode 100644 index 0000000000..8a01b2912a --- /dev/null +++ b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/templates/prometheus-operator/deployment.yaml @@ -0,0 +1,204 @@ +{{- $namespace := printf "%s" (include "kube-prometheus-stack.namespace" .) }} +{{- $defaultKubeletSvcName := printf "%s-kubelet" (include "kube-prometheus-stack.fullname" .) }} +{{- if .Values.prometheusOperator.enabled }} +apiVersion: apps/v1 +kind: Deployment +metadata: + name: {{ template "kube-prometheus-stack.operator.fullname" . }} + namespace: {{ template "kube-prometheus-stack.namespace" . }} + labels: + {{- include "kube-prometheus-stack.prometheus-operator.labels" . | nindent 4 }} +{{- if .Values.prometheusOperator.labels }} +{{ toYaml .Values.prometheusOperator.labels | indent 4 }} +{{- end }} +{{- if .Values.prometheusOperator.annotations }} + annotations: +{{ toYaml .Values.prometheusOperator.annotations | indent 4 }} +{{- end }} +spec: + replicas: 1 + revisionHistoryLimit: {{ .Values.prometheusOperator.revisionHistoryLimit }} + selector: + matchLabels: + app: {{ template "kube-prometheus-stack.name" . }}-operator + release: {{ $.Release.Name | quote }} + {{- with .Values.prometheusOperator.strategy }} + strategy: + {{- toYaml . | nindent 4 }} + {{- end }} + template: + metadata: + labels: + {{- include "kube-prometheus-stack.prometheus-operator.labels" . | nindent 8 }} +{{- if .Values.prometheusOperator.podLabels }} +{{ toYaml .Values.prometheusOperator.podLabels | indent 8 }} +{{- end }} +{{- if .Values.prometheusOperator.podAnnotations }} + annotations: +{{ toYaml .Values.prometheusOperator.podAnnotations | indent 8 }} +{{- end }} + spec: + {{- if .Values.prometheusOperator.priorityClassName }} + priorityClassName: {{ .Values.prometheusOperator.priorityClassName }} + {{- end }} + {{- if .Values.global.imagePullSecrets }} + imagePullSecrets: + {{- include "kube-prometheus-stack.imagePullSecrets" . | indent 8 }} + {{- end }} + containers: + - name: {{ template "kube-prometheus-stack.name" . }} + {{- $base_registry := (include "monitoring_registry" .) }} + {{- $configReloaderRegistry := $base_registry | default .Values.prometheusOperator.prometheusConfigReloader.image.registry -}} + {{- $operatorRegistry := $base_registry | default .Values.prometheusOperator.image.registry -}} + {{- $thanosRegistry := $base_registry | default .Values.prometheusOperator.thanosImage.registry -}} + {{- if .Values.prometheusOperator.image.sha }} + image: "{{ $operatorRegistry }}/{{ .Values.prometheusOperator.image.repository }}:{{ .Values.prometheusOperator.image.tag | default .Chart.AppVersion }}@sha256:{{ .Values.prometheusOperator.image.sha }}" + {{- else }} + image: "{{ $operatorRegistry }}/{{ .Values.prometheusOperator.image.repository }}:{{ .Values.prometheusOperator.image.tag | default .Chart.AppVersion }}" + {{- end }} + imagePullPolicy: "{{ .Values.prometheusOperator.image.pullPolicy }}" + args: + {{- if .Values.prometheusOperator.kubeletService.enabled }} + - --kubelet-service={{ .Values.prometheusOperator.kubeletService.namespace }}/{{ default $defaultKubeletSvcName .Values.prometheusOperator.kubeletService.name }} + {{- end }} + {{- if .Values.prometheusOperator.logFormat }} + - --log-format={{ .Values.prometheusOperator.logFormat }} + {{- end }} + {{- if .Values.prometheusOperator.logLevel }} + - --log-level={{ .Values.prometheusOperator.logLevel }} + {{- end }} + {{- if .Values.prometheusOperator.denyNamespaces }} + - --deny-namespaces={{ tpl (.Values.prometheusOperator.denyNamespaces | join ",") $ }} + {{- end }} + {{- with $.Values.prometheusOperator.namespaces }} + {{- $namespaces := list }} + {{- if .releaseNamespace }} + {{- $namespaces = append $namespaces $namespace }} + {{- end }} + {{- if .additional }} + {{- range $ns := .additional }} + {{- $namespaces = append $namespaces (tpl $ns $) }} + {{- end }} + {{- end }} + - --namespaces={{ $namespaces | mustUniq | join "," }} + {{- end }} + - --localhost=127.0.0.1 + {{- if .Values.prometheusOperator.prometheusDefaultBaseImage }} + - --prometheus-default-base-image={{ $base_registry | default .Values.prometheusOperator.prometheusDefaultBaseImageRegistry }}/{{ .Values.prometheusOperator.prometheusDefaultBaseImage }} + {{- end }} + {{- if .Values.prometheusOperator.alertmanagerDefaultBaseImage }} + - --alertmanager-default-base-image={{ $base_registry | default .Values.prometheusOperator.alertmanagerDefaultBaseImageRegistry }}/{{ .Values.prometheusOperator.alertmanagerDefaultBaseImage }} + {{- end }} + {{- if .Values.prometheusOperator.prometheusConfigReloader.image.sha }} + - --prometheus-config-reloader={{ $configReloaderRegistry }}/{{ .Values.prometheusOperator.prometheusConfigReloader.image.repository }}:{{ .Values.prometheusOperator.prometheusConfigReloader.image.tag | default .Chart.AppVersion }}@sha256:{{ .Values.prometheusOperator.prometheusConfigReloader.image.sha }} + {{- else }} + - --prometheus-config-reloader={{ $configReloaderRegistry }}/{{ .Values.prometheusOperator.prometheusConfigReloader.image.repository }}:{{ .Values.prometheusOperator.prometheusConfigReloader.image.tag | default .Chart.AppVersion }} + {{- end }} + - --config-reloader-cpu-request={{ (((.Values.prometheusOperator.prometheusConfigReloader.resources).requests).cpu) | default 0 }} + - --config-reloader-cpu-limit={{ (((.Values.prometheusOperator.prometheusConfigReloader.resources).limits).cpu) | default 0 }} + - --config-reloader-memory-request={{ (((.Values.prometheusOperator.prometheusConfigReloader.resources).requests).memory) | default 0 }} + - --config-reloader-memory-limit={{ (((.Values.prometheusOperator.prometheusConfigReloader.resources).limits).memory) | default 0 }} + {{- if .Values.prometheusOperator.prometheusConfigReloader.enableProbe }} + - --enable-config-reloader-probes=true + {{- end }} + {{- if .Values.prometheusOperator.alertmanagerInstanceNamespaces }} + - --alertmanager-instance-namespaces={{ .Values.prometheusOperator.alertmanagerInstanceNamespaces | join "," }} + {{- end }} + {{- if .Values.prometheusOperator.alertmanagerInstanceSelector }} + - --alertmanager-instance-selector={{ .Values.prometheusOperator.alertmanagerInstanceSelector }} + {{- end }} + {{- if .Values.prometheusOperator.alertmanagerConfigNamespaces }} + - --alertmanager-config-namespaces={{ .Values.prometheusOperator.alertmanagerConfigNamespaces | join "," }} + {{- end }} + {{- if .Values.prometheusOperator.prometheusInstanceNamespaces }} + - --prometheus-instance-namespaces={{ .Values.prometheusOperator.prometheusInstanceNamespaces | join "," }} + {{- end }} + {{- if .Values.prometheusOperator.prometheusInstanceSelector }} + - --prometheus-instance-selector={{ .Values.prometheusOperator.prometheusInstanceSelector }} + {{- end }} + {{- if .Values.prometheusOperator.thanosImage.sha }} + - --thanos-default-base-image={{ $thanosRegistry }}/{{ .Values.prometheusOperator.thanosImage.repository }}:{{ .Values.prometheusOperator.thanosImage.tag }}@sha256:{{ .Values.prometheusOperator.thanosImage.sha }} + {{- else }} + - --thanos-default-base-image={{ $thanosRegistry }}/{{ .Values.prometheusOperator.thanosImage.repository }}:{{ .Values.prometheusOperator.thanosImage.tag }} + {{- end }} + {{- if .Values.prometheusOperator.thanosRulerInstanceNamespaces }} + - --thanos-ruler-instance-namespaces={{ .Values.prometheusOperator.thanosRulerInstanceNamespaces | join "," }} + {{- end }} + {{- if .Values.prometheusOperator.thanosRulerInstanceSelector }} + - --thanos-ruler-instance-selector={{ .Values.prometheusOperator.thanosRulerInstanceSelector }} + {{- end }} + {{- if .Values.prometheusOperator.secretFieldSelector }} + - --secret-field-selector={{ tpl (.Values.prometheusOperator.secretFieldSelector) $ }} + {{- end }} + {{- if .Values.prometheusOperator.clusterDomain }} + - --cluster-domain={{ .Values.prometheusOperator.clusterDomain }} + {{- end }} + {{- if .Values.prometheusOperator.tls.enabled }} + - --web.enable-tls=true + - --web.cert-file=/cert/{{ if .Values.prometheusOperator.admissionWebhooks.certManager.enabled }}tls.crt{{ else }}cert{{ end }} + - --web.key-file=/cert/{{ if .Values.prometheusOperator.admissionWebhooks.certManager.enabled }}tls.key{{ else }}key{{ end }} + - --web.listen-address=:{{ .Values.prometheusOperator.tls.internalPort }} + - --web.tls-min-version={{ .Values.prometheusOperator.tls.tlsMinVersion }} + ports: + - containerPort: {{ .Values.prometheusOperator.tls.internalPort }} + name: https + {{- else }} + ports: + - containerPort: 8080 + name: http + {{- end }} + env: + {{- range $key, $value := .Values.prometheusOperator.env }} + - name: {{ $key }} + value: {{ $value | quote }} + {{- end }} + resources: +{{ toYaml .Values.prometheusOperator.resources | indent 12 }} + securityContext: +{{ toYaml .Values.prometheusOperator.containerSecurityContext | indent 12 }} + volumeMounts: + {{- if .Values.prometheusOperator.tls.enabled }} + - name: tls-secret + mountPath: /cert + readOnly: true + {{- end }} + {{- with .Values.prometheusOperator.extraVolumeMounts }} + {{- toYaml . | nindent 12 }} + {{- end }} + volumes: + {{- if .Values.prometheusOperator.tls.enabled }} + - name: tls-secret + secret: + defaultMode: 420 + secretName: {{ template "kube-prometheus-stack.fullname" . }}-admission + {{- end }} + {{- with .Values.prometheusOperator.extraVolumes }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.prometheusOperator.dnsConfig }} + dnsConfig: +{{ toYaml . | indent 8 }} + {{- end }} +{{- if .Values.prometheusOperator.securityContext }} + securityContext: +{{ toYaml .Values.prometheusOperator.securityContext | indent 8 }} +{{- end }} + serviceAccountName: {{ template "kube-prometheus-stack.operator.serviceAccountName" . }} + automountServiceAccountToken: {{ .Values.prometheusOperator.automountServiceAccountToken }} +{{- if .Values.prometheusOperator.hostNetwork }} + hostNetwork: true + dnsPolicy: ClusterFirstWithHostNet +{{- end }} + nodeSelector: {{ include "linux-node-selector" . | nindent 8 }} +{{- with .Values.prometheusOperator.nodeSelector }} +{{ toYaml . | indent 8 }} +{{- end }} + {{- with .Values.prometheusOperator.affinity }} + affinity: +{{ toYaml . | indent 8 }} + {{- end }} + tolerations: {{ include "linux-node-tolerations" . | nindent 8 }} +{{- with .Values.prometheusOperator.tolerations }} +{{ toYaml . | indent 8 }} +{{- end }} +{{- end }} diff --git a/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/templates/prometheus-operator/networkpolicy.yaml b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/templates/prometheus-operator/networkpolicy.yaml new file mode 100644 index 0000000000..cfd5b0b8c7 --- /dev/null +++ b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/templates/prometheus-operator/networkpolicy.yaml @@ -0,0 +1,29 @@ +{{- if and .Values.prometheusOperator.networkPolicy.enabled (eq .Values.prometheusOperator.networkPolicy.flavor "kubernetes") }} +apiVersion: {{ template "kube-prometheus-stack.prometheus.networkPolicy.apiVersion" . }} +kind: NetworkPolicy +metadata: + name: {{ template "kube-prometheus-stack.operator.fullname" . }} + namespace: {{ template "kube-prometheus-stack.namespace" . }} + labels: + {{- include "kube-prometheus-stack.prometheus-operator.labels" . | nindent 4 }} +spec: + egress: + - {} + ingress: + - ports: + {{- if .Values.prometheusOperator.tls.enabled }} + - port: {{ .Values.prometheusOperator.tls.internalPort }} + {{- else }} + - port: 8080 + {{- end }} + policyTypes: + - Egress + - Ingress + podSelector: + matchLabels: + app: {{ template "kube-prometheus-stack.name" . }}-operator + release: {{ $.Release.Name | quote }} + {{- if .Values.prometheusOperator.networkPolicy.matchLabels }} + {{ toYaml .Values.prometheusOperator.networkPolicy.matchLabels | nindent 6 }} + {{- end }} +{{- end }} diff --git a/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/templates/prometheus-operator/psp-clusterrole.yaml b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/templates/prometheus-operator/psp-clusterrole.yaml new file mode 100644 index 0000000000..61bc3d9040 --- /dev/null +++ b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/templates/prometheus-operator/psp-clusterrole.yaml @@ -0,0 +1,21 @@ +{{- if .Capabilities.APIVersions.Has "policy/v1beta1/PodSecurityPolicy" }} +{{- if and .Values.prometheusOperator.enabled (or .Values.global.cattle.psp.enabled (and .Values.global.rbac.create .Values.global.rbac.pspEnabled)) }} +kind: ClusterRole +apiVersion: rbac.authorization.k8s.io/v1 +metadata: + name: {{ template "kube-prometheus-stack.operator.fullname" . }}-psp + labels: + {{- include "kube-prometheus-stack.prometheus-operator.labels" . | nindent 4 }} +rules: +{{- $kubeTargetVersion := default .Capabilities.KubeVersion.GitVersion .Values.kubeTargetVersionOverride }} +{{- if semverCompare "> 1.15.0-0" $kubeTargetVersion }} +- apiGroups: ['policy'] +{{- else }} +- apiGroups: ['extensions'] +{{- end }} + resources: ['podsecuritypolicies'] + verbs: ['use'] + resourceNames: + - {{ template "kube-prometheus-stack.operator.fullname" . }} +{{- end }} +{{- end }} diff --git a/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/templates/prometheus-operator/psp-clusterrolebinding.yaml b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/templates/prometheus-operator/psp-clusterrolebinding.yaml new file mode 100644 index 0000000000..40e0fc5c15 --- /dev/null +++ b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/templates/prometheus-operator/psp-clusterrolebinding.yaml @@ -0,0 +1,18 @@ +{{- if and .Values.prometheusOperator.enabled (or .Values.global.cattle.psp.enabled (and .Values.global.rbac.create .Values.global.rbac.pspEnabled)) }} +{{- if .Capabilities.APIVersions.Has "policy/v1beta1/PodSecurityPolicy" }} +kind: ClusterRoleBinding +apiVersion: rbac.authorization.k8s.io/v1 +metadata: + name: {{ template "kube-prometheus-stack.operator.fullname" . }}-psp + labels: + {{- include "kube-prometheus-stack.prometheus-operator.labels" . | nindent 4 }} +roleRef: + apiGroup: rbac.authorization.k8s.io + kind: ClusterRole + name: {{ template "kube-prometheus-stack.operator.fullname" . }}-psp +subjects: + - kind: ServiceAccount + name: {{ template "kube-prometheus-stack.operator.serviceAccountName" . }} + namespace: {{ template "kube-prometheus-stack.namespace" . }} +{{- end }} +{{- end }} diff --git a/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/templates/prometheus-operator/psp.yaml b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/templates/prometheus-operator/psp.yaml new file mode 100644 index 0000000000..28a9075d3e --- /dev/null +++ b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/templates/prometheus-operator/psp.yaml @@ -0,0 +1,46 @@ +{{- if and .Values.prometheusOperator.enabled (or .Values.global.cattle.psp.enabled (and .Values.global.rbac.create .Values.global.rbac.pspEnabled)) }} +{{- if .Capabilities.APIVersions.Has "policy/v1beta1/PodSecurityPolicy" }} +apiVersion: policy/v1beta1 +kind: PodSecurityPolicy +metadata: + name: {{ template "kube-prometheus-stack.operator.fullname" . }} + labels: + {{- include "kube-prometheus-stack.prometheus-operator.labels" . | nindent 4 }} +{{- if .Values.global.rbac.pspAnnotations }} + annotations: +{{ toYaml .Values.global.rbac.pspAnnotations | indent 4 }} +{{- end }} +spec: + privileged: false + # Allow core volume types. + volumes: + - 'configMap' + - 'emptyDir' + - 'projected' + - 'secret' + - 'downwardAPI' + - 'persistentVolumeClaim' + hostNetwork: {{ .Values.prometheusOperator.hostNetwork }} + hostIPC: false + hostPID: false + runAsUser: + # Permits the container to run with root privileges as well. + rule: 'RunAsAny' + seLinux: + # This policy assumes the nodes are using AppArmor rather than SELinux. + rule: 'RunAsAny' + supplementalGroups: + rule: 'MustRunAs' + ranges: + # Allow adding the root group. + - min: 0 + max: 65535 + fsGroup: + rule: 'MustRunAs' + ranges: + # Allow adding the root group. + - min: 0 + max: 65535 + readOnlyRootFilesystem: false +{{- end }} +{{- end }} diff --git a/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/templates/prometheus-operator/service.yaml b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/templates/prometheus-operator/service.yaml new file mode 100644 index 0000000000..d45ab22d08 --- /dev/null +++ b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/templates/prometheus-operator/service.yaml @@ -0,0 +1,57 @@ +{{- if .Values.prometheusOperator.enabled }} +apiVersion: v1 +kind: Service +metadata: + name: {{ template "kube-prometheus-stack.operator.fullname" . }} + namespace: {{ template "kube-prometheus-stack.namespace" . }} + labels: + {{- include "kube-prometheus-stack.prometheus-operator.labels" . | nindent 4 }} +{{- if .Values.prometheusOperator.service.labels }} +{{ toYaml .Values.prometheusOperator.service.labels | indent 4 }} +{{- end }} +{{- if .Values.prometheusOperator.service.annotations }} + annotations: +{{ toYaml .Values.prometheusOperator.service.annotations | indent 4 }} +{{- end }} +spec: +{{- if .Values.prometheusOperator.service.clusterIP }} + clusterIP: {{ .Values.prometheusOperator.service.clusterIP }} +{{- end }} +{{- if .Values.prometheusOperator.service.externalIPs }} + externalIPs: +{{ toYaml .Values.prometheusOperator.service.externalIPs | indent 4 }} +{{- end }} +{{- if .Values.prometheusOperator.service.loadBalancerIP }} + loadBalancerIP: {{ .Values.prometheusOperator.service.loadBalancerIP }} +{{- end }} +{{- if .Values.prometheusOperator.service.loadBalancerSourceRanges }} + loadBalancerSourceRanges: + {{- range $cidr := .Values.prometheusOperator.service.loadBalancerSourceRanges }} + - {{ $cidr }} + {{- end }} +{{- end }} +{{- if ne .Values.prometheusOperator.service.type "ClusterIP" }} + externalTrafficPolicy: {{ .Values.prometheusOperator.service.externalTrafficPolicy }} +{{- end }} + ports: + {{- if not .Values.prometheusOperator.tls.enabled }} + - name: http + {{- if eq .Values.prometheusOperator.service.type "NodePort" }} + nodePort: {{ .Values.prometheusOperator.service.nodePort }} + {{- end }} + port: 8080 + targetPort: http + {{- end }} + {{- if .Values.prometheusOperator.tls.enabled }} + - name: https + {{- if eq .Values.prometheusOperator.service.type "NodePort"}} + nodePort: {{ .Values.prometheusOperator.service.nodePortTls }} + {{- end }} + port: 443 + targetPort: https + {{- end }} + selector: + app: {{ template "kube-prometheus-stack.name" . }}-operator + release: {{ $.Release.Name | quote }} + type: "{{ .Values.prometheusOperator.service.type }}" +{{- end }} diff --git a/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/templates/prometheus-operator/serviceaccount.yaml b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/templates/prometheus-operator/serviceaccount.yaml new file mode 100644 index 0000000000..4f84974f9b --- /dev/null +++ b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/templates/prometheus-operator/serviceaccount.yaml @@ -0,0 +1,14 @@ +{{- if and .Values.prometheusOperator.enabled .Values.prometheusOperator.serviceAccount.create }} +apiVersion: v1 +kind: ServiceAccount +metadata: + name: {{ template "kube-prometheus-stack.operator.serviceAccountName" . }} + namespace: {{ template "kube-prometheus-stack.namespace" . }} + labels: + {{- include "kube-prometheus-stack.prometheus-operator.labels" . | nindent 4 }} +automountServiceAccountToken: {{ .Values.prometheusOperator.serviceAccount.automountServiceAccountToken }} +{{- if .Values.global.imagePullSecrets }} +imagePullSecrets: +{{ include "kube-prometheus-stack.imagePullSecrets" . | trim | indent 2 }} +{{- end }} +{{- end }} diff --git a/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/templates/prometheus-operator/servicemonitor.yaml b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/templates/prometheus-operator/servicemonitor.yaml new file mode 100644 index 0000000000..cbe79e1253 --- /dev/null +++ b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/templates/prometheus-operator/servicemonitor.yaml @@ -0,0 +1,57 @@ +{{- if and .Values.prometheusOperator.enabled .Values.prometheusOperator.serviceMonitor.selfMonitor }} +apiVersion: monitoring.coreos.com/v1 +kind: ServiceMonitor +metadata: + name: {{ template "kube-prometheus-stack.operator.fullname" . }} + namespace: {{ template "kube-prometheus-stack.namespace" . }} + labels: + {{- include "kube-prometheus-stack.prometheus-operator.labels" . | nindent 4 }} +{{- with .Values.prometheusOperator.serviceMonitor.additionalLabels }} +{{ toYaml . | indent 4 }} +{{- end }} +spec: + {{- include "servicemonitor.scrapeLimits" .Values.prometheusOperator.serviceMonitor | nindent 2 }} + endpoints: + {{- if .Values.prometheusOperator.tls.enabled }} + - port: https + scheme: https + tlsConfig: + serverName: {{ template "kube-prometheus-stack.operator.fullname" . }} + ca: + secret: + name: {{ template "kube-prometheus-stack.fullname" . }}-admission + key: {{ if .Values.prometheusOperator.admissionWebhooks.certManager.enabled }}ca.crt{{ else }}ca{{ end }} + optional: false + {{- else }} + - port: http + {{- end }} + honorLabels: true + {{- if .Values.prometheusOperator.serviceMonitor.interval }} + interval: {{ .Values.prometheusOperator.serviceMonitor.interval }} + {{- end }} + metricRelabelings: + {{- if .Values.prometheusOperator.serviceMonitor.metricRelabelings }} + {{ tpl (toYaml .Values.prometheusOperator.serviceMonitor.metricRelabelings | indent 6) . }} + {{- end }} + {{ if .Values.global.cattle.clusterId }} + - sourceLabels: [__address__] + targetLabel: cluster_id + replacement: {{ .Values.global.cattle.clusterId }} + {{- end }} + {{ if .Values.global.cattle.clusterName}} + - sourceLabels: [__address__] + targetLabel: cluster_name + replacement: {{ .Values.global.cattle.clusterName }} + {{- end }} +{{- if .Values.prometheusOperator.serviceMonitor.relabelings }} + relabelings: +{{ toYaml .Values.prometheusOperator.serviceMonitor.relabelings | indent 6 }} +{{- end }} + selector: + matchLabels: + app: {{ template "kube-prometheus-stack.name" . }}-operator + release: {{ $.Release.Name | quote }} + namespaceSelector: + matchNames: + - {{ printf "%s" (include "kube-prometheus-stack.namespace" .) | quote }} +{{- end }} diff --git a/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/templates/prometheus-operator/verticalpodautoscaler.yaml b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/templates/prometheus-operator/verticalpodautoscaler.yaml new file mode 100644 index 0000000000..f225d16dde --- /dev/null +++ b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/templates/prometheus-operator/verticalpodautoscaler.yaml @@ -0,0 +1,40 @@ +{{- if and (.Capabilities.APIVersions.Has "autoscaling.k8s.io/v1") (.Values.prometheusOperator.verticalPodAutoscaler.enabled) }} +apiVersion: autoscaling.k8s.io/v1 +kind: VerticalPodAutoscaler +metadata: + name: {{ template "kube-prometheus-stack.operator.fullname" . }} + namespace: {{ template "kube-prometheus-stack.namespace" . }} + labels: + {{- include "kube-prometheus-stack.prometheus-operator.labels" . | nindent 4 }} +spec: + {{- with .Values.prometheusOperator.verticalPodAutoscaler.recommenders }} + recommenders: + {{- toYaml . | nindent 4 }} + {{- end }} + resourcePolicy: + containerPolicies: + - containerName: {{ template "kube-prometheus-stack.name" . }} + {{- with .Values.prometheusOperator.verticalPodAutoscaler.controlledResources }} + controlledResources: + {{- toYaml . | nindent 8 }} + {{- end }} + {{- if .Values.prometheusOperator.verticalPodAutoscaler.controlledValues }} + controlledValues: {{ .Values.prometheusOperator.verticalPodAutoscaler.controlledValues }} + {{- end }} + {{- if .Values.prometheusOperator.verticalPodAutoscaler.maxAllowed }} + maxAllowed: + {{- toYaml .Values.prometheusOperator.verticalPodAutoscaler.maxAllowed | nindent 8 }} + {{- end }} + {{- if .Values.prometheusOperator.verticalPodAutoscaler.minAllowed }} + minAllowed: + {{- toYaml .Values.prometheusOperator.verticalPodAutoscaler.minAllowed | nindent 8 }} + {{- end }} + targetRef: + apiVersion: apps/v1 + kind: Deployment + name: {{ template "kube-prometheus-stack.operator.fullname" . }} + {{- with .Values.prometheusOperator.verticalPodAutoscaler.updatePolicy }} + updatePolicy: + {{- toYaml . | nindent 4 }} + {{- end }} +{{- end }} diff --git a/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/templates/prometheus/_rules.tpl b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/templates/prometheus/_rules.tpl new file mode 100644 index 0000000000..4a8213d089 --- /dev/null +++ b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/templates/prometheus/_rules.tpl @@ -0,0 +1,44 @@ +{{- /* +Generated file. Do not change in-place! In order to change this file first read following link: +https://github.com/prometheus-community/helm-charts/tree/main/charts/kube-prometheus-stack/hack +*/ -}} +{{- define "rules.names" }} +rules: + - "alertmanager.rules" + - "config-reloaders" + - "etcd" + - "general.rules" + - "k8s.rules.container-cpu-usage-seconds-total" + - "k8s.rules.container-memory-cache" + - "k8s.rules.container-memory-rss" + - "k8s.rules.container-memory-swap" + - "k8s.rules.container-memory-working-set-bytes" + - "k8s.rules.container-resource" + - "k8s.rules.pod-owner" + - "kube-apiserver-availability.rules" + - "kube-apiserver-burnrate.rules" + - "kube-apiserver-histogram.rules" + - "kube-apiserver-slos" + - "kube-prometheus-general.rules" + - "kube-prometheus-node-recording.rules" + - "kube-scheduler.rules" + - "kube-state-metrics" + - "kubelet.rules" + - "kubernetes-apps" + - "kubernetes-resources" + - "kubernetes-storage" + - "kubernetes-system" + - "kubernetes-system-kube-proxy" + - "kubernetes-system-apiserver" + - "kubernetes-system-kubelet" + - "kubernetes-system-controller-manager" + - "kubernetes-system-scheduler" + - "node-exporter.rules" + - "node-exporter" + - "node.rules" + - "node-network" + - "prometheus-operator" + - "prometheus" + - "windows.node.rules" + - "windows.pod.rules" +{{- end }} \ No newline at end of file diff --git a/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/templates/prometheus/additionalAlertRelabelConfigs.yaml b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/templates/prometheus/additionalAlertRelabelConfigs.yaml new file mode 100644 index 0000000000..bff930981a --- /dev/null +++ b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/templates/prometheus/additionalAlertRelabelConfigs.yaml @@ -0,0 +1,16 @@ +{{- if and .Values.prometheus.enabled .Values.prometheus.prometheusSpec.additionalAlertRelabelConfigs }} +apiVersion: v1 +kind: Secret +metadata: + name: {{ template "kube-prometheus-stack.fullname" . }}-prometheus-am-relabel-confg + namespace: {{ template "kube-prometheus-stack.namespace" . }} +{{- if .Values.prometheus.prometheusSpec.additionalPrometheusSecretsAnnotations }} + annotations: +{{ toYaml .Values.prometheus.prometheusSpec.additionalPrometheusSecretsAnnotations | indent 4 }} +{{- end }} + labels: + app: {{ template "kube-prometheus-stack.name" . }}-prometheus-am-relabel-confg +{{ include "kube-prometheus-stack.labels" . | indent 4 }} +data: + additional-alert-relabel-configs.yaml: {{ toYaml .Values.prometheus.prometheusSpec.additionalAlertRelabelConfigs | b64enc | quote }} +{{- end }} diff --git a/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/templates/prometheus/additionalAlertmanagerConfigs.yaml b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/templates/prometheus/additionalAlertmanagerConfigs.yaml new file mode 100644 index 0000000000..2fe8fdb816 --- /dev/null +++ b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/templates/prometheus/additionalAlertmanagerConfigs.yaml @@ -0,0 +1,16 @@ +{{- if and .Values.prometheus.enabled .Values.prometheus.prometheusSpec.additionalAlertManagerConfigs }} +apiVersion: v1 +kind: Secret +metadata: + name: {{ template "kube-prometheus-stack.fullname" . }}-prometheus-am-confg + namespace: {{ template "kube-prometheus-stack.namespace" . }} +{{- if .Values.prometheus.prometheusSpec.additionalPrometheusSecretsAnnotations }} + annotations: +{{ toYaml .Values.prometheus.prometheusSpec.additionalPrometheusSecretsAnnotations | indent 4 }} +{{- end }} + labels: + app: {{ template "kube-prometheus-stack.name" . }}-prometheus-am-confg +{{ include "kube-prometheus-stack.labels" . | indent 4 }} +data: + additional-alertmanager-configs.yaml: {{ tpl (toYaml .Values.prometheus.prometheusSpec.additionalAlertManagerConfigs) . | b64enc | quote }} +{{- end }} diff --git a/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/templates/prometheus/additionalPrometheusRules.yaml b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/templates/prometheus/additionalPrometheusRules.yaml new file mode 100644 index 0000000000..cb4aabaa7b --- /dev/null +++ b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/templates/prometheus/additionalPrometheusRules.yaml @@ -0,0 +1,43 @@ +{{- if or .Values.additionalPrometheusRules .Values.additionalPrometheusRulesMap}} +apiVersion: v1 +kind: List +metadata: + name: {{ include "kube-prometheus-stack.fullname" $ }}-additional-prometheus-rules + namespace: {{ template "kube-prometheus-stack.namespace" . }} +items: +{{- if .Values.additionalPrometheusRulesMap }} +{{- range $prometheusRuleName, $prometheusRule := .Values.additionalPrometheusRulesMap }} + - apiVersion: monitoring.coreos.com/v1 + kind: PrometheusRule + metadata: + name: {{ template "kube-prometheus-stack.name" $ }}-{{ $prometheusRuleName }} + namespace: {{ template "kube-prometheus-stack.namespace" $ }} + labels: + app: {{ template "kube-prometheus-stack.name" $ }} +{{ include "kube-prometheus-stack.labels" $ | indent 8 }} + {{- if $prometheusRule.additionalLabels }} +{{ toYaml $prometheusRule.additionalLabels | indent 8 }} + {{- end }} + spec: + groups: +{{ toYaml $prometheusRule.groups| indent 8 }} +{{- end }} +{{- else }} +{{- range .Values.additionalPrometheusRules }} + - apiVersion: monitoring.coreos.com/v1 + kind: PrometheusRule + metadata: + name: {{ template "kube-prometheus-stack.name" $ }}-{{ .name }} + namespace: {{ template "kube-prometheus-stack.namespace" $ }} + labels: + app: {{ template "kube-prometheus-stack.name" $ }} +{{ include "kube-prometheus-stack.labels" $ | indent 8 }} + {{- if .additionalLabels }} +{{ toYaml .additionalLabels | indent 8 }} + {{- end }} + spec: + groups: +{{ toYaml .groups| indent 8 }} +{{- end }} +{{- end }} +{{- end }} diff --git a/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/templates/prometheus/additionalScrapeConfigs.yaml b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/templates/prometheus/additionalScrapeConfigs.yaml new file mode 100644 index 0000000000..ebdf766fde --- /dev/null +++ b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/templates/prometheus/additionalScrapeConfigs.yaml @@ -0,0 +1,20 @@ +{{- if and .Values.prometheus.enabled .Values.prometheus.prometheusSpec.additionalScrapeConfigs }} +apiVersion: v1 +kind: Secret +metadata: + name: {{ template "kube-prometheus-stack.fullname" . }}-prometheus-scrape-confg + namespace: {{ template "kube-prometheus-stack.namespace" . }} +{{- if .Values.prometheus.prometheusSpec.additionalPrometheusSecretsAnnotations }} + annotations: +{{ toYaml .Values.prometheus.prometheusSpec.additionalPrometheusSecretsAnnotations | indent 4 }} +{{- end }} + labels: + app: {{ template "kube-prometheus-stack.name" . }}-prometheus-scrape-confg +{{ include "kube-prometheus-stack.labels" . | indent 4 }} +data: +{{- if eq ( typeOf .Values.prometheus.prometheusSpec.additionalScrapeConfigs ) "string" }} + additional-scrape-configs.yaml: {{ tpl .Values.prometheus.prometheusSpec.additionalScrapeConfigs $ | b64enc | quote }} +{{- else }} + additional-scrape-configs.yaml: {{ tpl (toYaml .Values.prometheus.prometheusSpec.additionalScrapeConfigs) $ | b64enc | quote }} +{{- end }} +{{- end }} diff --git a/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/templates/prometheus/ciliumnetworkpolicy.yaml b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/templates/prometheus/ciliumnetworkpolicy.yaml new file mode 100644 index 0000000000..74d61d7c13 --- /dev/null +++ b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/templates/prometheus/ciliumnetworkpolicy.yaml @@ -0,0 +1,27 @@ +{{- if and .Values.prometheus.networkPolicy.enabled (eq .Values.prometheus.networkPolicy.flavor "cilium") }} +apiVersion: cilium.io/v2 +kind: CiliumNetworkPolicy +metadata: + name: {{ template "kube-prometheus-stack.fullname" . }}-prometheus + namespace: {{ template "kube-prometheus-stack.namespace" . }} + labels: + app: {{ template "kube-prometheus-stack.name" . }}-prometheus + {{- include "kube-prometheus-stack.labels" . | nindent 4 }} +spec: + endpointSelector: + {{- if .Values.prometheus.networkPolicy.cilium.endpointSelector }} + {{- toYaml .Values.prometheus.networkPolicy.cilium.endpointSelector | nindent 4 }} + {{- else }} + matchExpressions: + - {key: app.kubernetes.io/name, operator: In, values: [prometheus]} + - {key: prometheus, operator: In, values: [{{ template "kube-prometheus-stack.prometheus.crname" . }}]} + {{- end }} + {{- if and .Values.prometheus.networkPolicy.cilium .Values.prometheus.networkPolicy.cilium.egress }} + egress: + {{ toYaml .Values.prometheus.networkPolicy.cilium.egress | nindent 4 }} + {{- end }} + {{- if and .Values.prometheus.networkPolicy.cilium .Values.prometheus.networkPolicy.cilium.ingress }} + ingress: + {{ toYaml .Values.prometheus.networkPolicy.cilium.ingress | nindent 4 }} + {{- end }} +{{- end }} diff --git a/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/templates/prometheus/clusterrole.yaml b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/templates/prometheus/clusterrole.yaml new file mode 100644 index 0000000000..3585b5db11 --- /dev/null +++ b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/templates/prometheus/clusterrole.yaml @@ -0,0 +1,30 @@ +{{- if and .Values.prometheus.enabled .Values.global.rbac.create }} +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRole +metadata: + name: {{ template "kube-prometheus-stack.fullname" . }}-prometheus + labels: + app: {{ template "kube-prometheus-stack.name" . }}-prometheus +{{ include "kube-prometheus-stack.labels" . | indent 4 }} +rules: +# This permission are not in the kube-prometheus repo +# they're grabbed from https://github.com/prometheus/prometheus/blob/master/documentation/examples/rbac-setup.yml +- apiGroups: [""] + resources: + - nodes + - nodes/metrics + - services + - endpoints + - pods + verbs: ["get", "list", "watch"] +- apiGroups: + - "networking.k8s.io" + resources: + - ingresses + verbs: ["get", "list", "watch"] +- nonResourceURLs: ["/metrics", "/metrics/cadvisor"] + verbs: ["get"] +{{- if .Values.prometheus.additionalRulesForClusterRole }} +{{ toYaml .Values.prometheus.additionalRulesForClusterRole | indent 0 }} +{{- end }} +{{- end }} diff --git a/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/templates/prometheus/clusterrolebinding.yaml b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/templates/prometheus/clusterrolebinding.yaml new file mode 100644 index 0000000000..9fc4f65da4 --- /dev/null +++ b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/templates/prometheus/clusterrolebinding.yaml @@ -0,0 +1,18 @@ +{{- if and .Values.prometheus.enabled .Values.global.rbac.create }} +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRoleBinding +metadata: + name: {{ template "kube-prometheus-stack.fullname" . }}-prometheus + labels: + app: {{ template "kube-prometheus-stack.name" . }}-prometheus +{{ include "kube-prometheus-stack.labels" . | indent 4 }} +roleRef: + apiGroup: rbac.authorization.k8s.io + kind: ClusterRole + name: {{ template "kube-prometheus-stack.fullname" . }}-prometheus +subjects: + - kind: ServiceAccount + name: {{ template "kube-prometheus-stack.prometheus.serviceAccountName" . }} + namespace: {{ template "kube-prometheus-stack.namespace" . }} +{{- end }} + diff --git a/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/templates/prometheus/csi-secret.yaml b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/templates/prometheus/csi-secret.yaml new file mode 100644 index 0000000000..e05382f633 --- /dev/null +++ b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/templates/prometheus/csi-secret.yaml @@ -0,0 +1,12 @@ +{{- if and .Values.prometheus.prometheusSpec.thanos .Values.prometheus.prometheusSpec.thanos.secretProviderClass }} +--- +apiVersion: secrets-store.csi.x-k8s.io/v1alpha1 +kind: SecretProviderClass +metadata: + name: {{ template "kube-prometheus-stack.fullname" . }}-prometheus + namespace: {{ template "kube-prometheus-stack.namespace" . }} + labels: + app: {{ template "kube-prometheus-stack.name" . }}-prometheus +spec: +{{ toYaml .Values.prometheus.prometheusSpec.thanos.secretProviderClass | indent 2 }} +{{- end }} diff --git a/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/templates/prometheus/extrasecret.yaml b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/templates/prometheus/extrasecret.yaml new file mode 100644 index 0000000000..17f3478a46 --- /dev/null +++ b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/templates/prometheus/extrasecret.yaml @@ -0,0 +1,20 @@ +{{- if .Values.prometheus.extraSecret.data -}} +{{- $secretName := printf "prometheus-%s-extra" (include "kube-prometheus-stack.fullname" . ) -}} +apiVersion: v1 +kind: Secret +metadata: + name: {{ default $secretName .Values.prometheus.extraSecret.name }} + namespace: {{ template "kube-prometheus-stack.namespace" . }} +{{- if .Values.prometheus.extraSecret.annotations }} + annotations: +{{ toYaml .Values.prometheus.extraSecret.annotations | indent 4 }} +{{- end }} + labels: + app: {{ template "kube-prometheus-stack.name" . }}-prometheus + app.kubernetes.io/component: prometheus +{{ include "kube-prometheus-stack.labels" . | indent 4 }} +data: +{{- range $key, $val := .Values.prometheus.extraSecret.data }} + {{ $key }}: {{ $val | b64enc | quote }} +{{- end }} +{{- end }} diff --git a/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/templates/prometheus/ingress.yaml b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/templates/prometheus/ingress.yaml new file mode 100644 index 0000000000..d2f6af5dd1 --- /dev/null +++ b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/templates/prometheus/ingress.yaml @@ -0,0 +1,77 @@ +{{- if and .Values.prometheus.enabled .Values.prometheus.ingress.enabled -}} + {{- $pathType := .Values.prometheus.ingress.pathType | default "ImplementationSpecific" -}} + {{- $serviceName := printf "%s-%s" (include "kube-prometheus-stack.fullname" .) "prometheus" -}} + {{- $servicePort := .Values.prometheus.ingress.servicePort | default .Values.prometheus.service.port -}} + {{- $routePrefix := list .Values.prometheus.prometheusSpec.routePrefix -}} + {{- $paths := .Values.prometheus.ingress.paths | default $routePrefix -}} + {{- $apiIsStable := eq (include "kube-prometheus-stack.ingress.isStable" .) "true" -}} + {{- $ingressSupportsPathType := eq (include "kube-prometheus-stack.ingress.supportsPathType" .) "true" -}} +apiVersion: {{ include "kube-prometheus-stack.ingress.apiVersion" . }} +kind: Ingress +metadata: +{{- if .Values.prometheus.ingress.annotations }} + annotations: + {{- tpl (toYaml .Values.prometheus.ingress.annotations) . | nindent 4 }} +{{- end }} + name: {{ $serviceName }} + namespace: {{ template "kube-prometheus-stack.namespace" . }} + labels: + app: {{ template "kube-prometheus-stack.name" . }}-prometheus +{{ include "kube-prometheus-stack.labels" . | indent 4 }} +{{- if .Values.prometheus.ingress.labels }} +{{ toYaml .Values.prometheus.ingress.labels | indent 4 }} +{{- end }} +spec: + {{- if $apiIsStable }} + {{- if .Values.prometheus.ingress.ingressClassName }} + ingressClassName: {{ .Values.prometheus.ingress.ingressClassName }} + {{- end }} + {{- end }} + rules: + {{- if .Values.prometheus.ingress.hosts }} + {{- range $host := .Values.prometheus.ingress.hosts }} + - host: {{ tpl $host $ }} + http: + paths: + {{- range $p := $paths }} + - path: {{ tpl $p $ }} + {{- if and $pathType $ingressSupportsPathType }} + pathType: {{ $pathType }} + {{- end }} + backend: + {{- if $apiIsStable }} + service: + name: {{ $serviceName }} + port: + number: {{ $servicePort }} + {{- else }} + serviceName: {{ $serviceName }} + servicePort: {{ $servicePort }} + {{- end }} + {{- end -}} + {{- end -}} + {{- else }} + - http: + paths: + {{- range $p := $paths }} + - path: {{ tpl $p $ }} + {{- if and $pathType $ingressSupportsPathType }} + pathType: {{ $pathType }} + {{- end }} + backend: + {{- if $apiIsStable }} + service: + name: {{ $serviceName }} + port: + number: {{ $servicePort }} + {{- else }} + serviceName: {{ $serviceName }} + servicePort: {{ $servicePort }} + {{- end }} + {{- end -}} + {{- end -}} + {{- if .Values.prometheus.ingress.tls }} + tls: +{{ tpl (toYaml .Values.prometheus.ingress.tls | indent 4) . }} + {{- end -}} +{{- end -}} diff --git a/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/templates/prometheus/ingressThanosSidecar.yaml b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/templates/prometheus/ingressThanosSidecar.yaml new file mode 100644 index 0000000000..3f507cfa9f --- /dev/null +++ b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/templates/prometheus/ingressThanosSidecar.yaml @@ -0,0 +1,77 @@ +{{- if and .Values.prometheus.enabled .Values.prometheus.thanosIngress.enabled }} +{{- $pathType := .Values.prometheus.thanosIngress.pathType | default "" }} +{{- $serviceName := printf "%s-%s" (include "kube-prometheus-stack.fullname" .) "thanos-discovery" }} +{{- $thanosPort := .Values.prometheus.thanosIngress.servicePort -}} +{{- $routePrefix := list .Values.prometheus.prometheusSpec.routePrefix }} +{{- $paths := .Values.prometheus.thanosIngress.paths | default $routePrefix -}} +{{- $apiIsStable := eq (include "kube-prometheus-stack.ingress.isStable" .) "true" -}} +{{- $ingressSupportsPathType := eq (include "kube-prometheus-stack.ingress.supportsPathType" .) "true" -}} +apiVersion: {{ include "kube-prometheus-stack.ingress.apiVersion" . }} +kind: Ingress +metadata: +{{- if .Values.prometheus.thanosIngress.annotations }} + annotations: + {{- tpl (toYaml .Values.prometheus.thanosIngress.annotations) . | nindent 4 }} +{{- end }} + name: {{ template "kube-prometheus-stack.fullname" . }}-thanos-gateway + namespace: {{ template "kube-prometheus-stack.namespace" . }} + labels: + app: {{ template "kube-prometheus-stack.name" . }}-prometheus +{{ include "kube-prometheus-stack.labels" . | indent 4 }} +{{- if .Values.prometheus.thanosIngress.labels }} +{{ toYaml .Values.prometheus.thanosIngress.labels | indent 4 }} +{{- end }} +spec: + {{- if $apiIsStable }} + {{- if .Values.prometheus.thanosIngress.ingressClassName }} + ingressClassName: {{ .Values.prometheus.thanosIngress.ingressClassName }} + {{- end }} + {{- end }} + rules: + {{- if .Values.prometheus.thanosIngress.hosts }} + {{- range $host := .Values.prometheus.thanosIngress.hosts }} + - host: {{ tpl $host $ }} + http: + paths: + {{- range $p := $paths }} + - path: {{ tpl $p $ }} + {{- if and $pathType $ingressSupportsPathType }} + pathType: {{ $pathType }} + {{- end }} + backend: + {{- if $apiIsStable }} + service: + name: {{ $serviceName }} + port: + number: {{ $thanosPort }} + {{- else }} + serviceName: {{ $serviceName }} + servicePort: {{ $thanosPort }} + {{- end }} + {{- end -}} + {{- end -}} + {{- else }} + - http: + paths: + {{- range $p := $paths }} + - path: {{ tpl $p $ }} + {{- if and $pathType $ingressSupportsPathType }} + pathType: {{ $pathType }} + {{- end }} + backend: + {{- if $apiIsStable }} + service: + name: {{ $serviceName }} + port: + number: {{ $thanosPort }} + {{- else }} + serviceName: {{ $serviceName }} + servicePort: {{ $thanosPort }} + {{- end }} + {{- end -}} + {{- end -}} + {{- if .Values.prometheus.thanosIngress.tls }} + tls: +{{ tpl (toYaml .Values.prometheus.thanosIngress.tls | indent 4) . }} + {{- end -}} +{{- end -}} diff --git a/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/templates/prometheus/ingressperreplica.yaml b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/templates/prometheus/ingressperreplica.yaml new file mode 100644 index 0000000000..1d76d135c8 --- /dev/null +++ b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/templates/prometheus/ingressperreplica.yaml @@ -0,0 +1,67 @@ +{{- if and .Values.prometheus.enabled .Values.prometheus.servicePerReplica.enabled .Values.prometheus.ingressPerReplica.enabled }} +{{- $pathType := .Values.prometheus.ingressPerReplica.pathType | default "" }} +{{- $count := .Values.prometheus.prometheusSpec.replicas | int -}} +{{- $servicePort := .Values.prometheus.servicePerReplica.port -}} +{{- $ingressValues := .Values.prometheus.ingressPerReplica -}} +{{- $apiIsStable := eq (include "kube-prometheus-stack.ingress.isStable" .) "true" -}} +{{- $ingressSupportsPathType := eq (include "kube-prometheus-stack.ingress.supportsPathType" .) "true" -}} +apiVersion: v1 +kind: List +metadata: + name: {{ include "kube-prometheus-stack.fullname" $ }}-prometheus-ingressperreplica + namespace: {{ template "kube-prometheus-stack.namespace" $ }} +items: +{{ range $i, $e := until $count }} + - kind: Ingress + apiVersion: {{ include "kube-prometheus-stack.ingress.apiVersion" $ }} + metadata: + name: {{ include "kube-prometheus-stack.fullname" $ }}-prometheus-{{ $i }} + namespace: {{ template "kube-prometheus-stack.namespace" $ }} + labels: + app: {{ include "kube-prometheus-stack.name" $ }}-prometheus + {{ include "kube-prometheus-stack.labels" $ | indent 8 }} + {{- if $ingressValues.labels }} +{{ toYaml $ingressValues.labels | indent 8 }} + {{- end }} + {{- if $ingressValues.annotations }} + annotations: + {{- tpl (toYaml $ingressValues.annotations) $ | nindent 8 }} + {{- end }} + spec: + {{- if $apiIsStable }} + {{- if $ingressValues.ingressClassName }} + ingressClassName: {{ $ingressValues.ingressClassName }} + {{- end }} + {{- end }} + rules: + - host: {{ $ingressValues.hostPrefix }}-{{ $i }}.{{ $ingressValues.hostDomain }} + http: + paths: + {{- range $p := $ingressValues.paths }} + - path: {{ tpl $p $ }} + {{- if and $pathType $ingressSupportsPathType }} + pathType: {{ $pathType }} + {{- end }} + backend: + {{- if $apiIsStable }} + service: + name: {{ include "kube-prometheus-stack.fullname" $ }}-prometheus-{{ $i }} + port: + number: {{ $servicePort }} + {{- else }} + serviceName: {{ include "kube-prometheus-stack.fullname" $ }}-prometheus-{{ $i }} + servicePort: {{ $servicePort }} + {{- end }} + {{- end -}} + {{- if or $ingressValues.tlsSecretName $ingressValues.tlsSecretPerReplica.enabled }} + tls: + - hosts: + - {{ $ingressValues.hostPrefix }}-{{ $i }}.{{ $ingressValues.hostDomain }} + {{- if $ingressValues.tlsSecretPerReplica.enabled }} + secretName: {{ $ingressValues.tlsSecretPerReplica.prefix }}-{{ $i }} + {{- else }} + secretName: {{ $ingressValues.tlsSecretName }} + {{- end }} + {{- end }} +{{- end -}} +{{- end -}} diff --git a/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/templates/prometheus/networkpolicy.yaml b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/templates/prometheus/networkpolicy.yaml new file mode 100644 index 0000000000..1296a79063 --- /dev/null +++ b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/templates/prometheus/networkpolicy.yaml @@ -0,0 +1,34 @@ +{{- if and .Values.prometheus.networkPolicy.enabled (eq .Values.prometheus.networkPolicy.flavor "kubernetes") }} +apiVersion: {{ template "kube-prometheus-stack.prometheus.networkPolicy.apiVersion" . }} +kind: NetworkPolicy +metadata: + labels: + app: {{ template "kube-prometheus-stack.name" . }}-prometheus + {{- include "kube-prometheus-stack.labels" . | nindent 4 }} + name: {{ template "kube-prometheus-stack.fullname" . }}-prometheus + namespace: {{ template "kube-prometheus-stack.namespace" . }} +spec: + {{- if .Values.prometheus.networkPolicy.egress }} + egress: + {{- toYaml .Values.prometheus.networkPolicy.egress | nindent 4 }} + {{- end }} + {{- if .Values.prometheus.networkPolicy.ingress }} + ingress: + {{- toYaml .Values.prometheus.networkPolicy.ingress | nindent 4 }} + {{- end }} + policyTypes: + - Egress + - Ingress + podSelector: + {{- if .Values.prometheus.networkPolicy.podSelector }} + {{- toYaml .Values.prometheus.networkPolicy.podSelector | nindent 4 }} + {{- else }} + matchLabels: + {{- if .Values.prometheus.agentMode }} + app.kubernetes.io/name: prometheus-agent + {{- else }} + app.kubernetes.io/name: prometheus + {{- end }} + operator.prometheus.io/name: {{ template "kube-prometheus-stack.prometheus.crname" . }} + {{- end }} +{{- end }} diff --git a/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/templates/prometheus/nginx-config.yaml b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/templates/prometheus/nginx-config.yaml new file mode 100644 index 0000000000..e4d91f9a9e --- /dev/null +++ b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/templates/prometheus/nginx-config.yaml @@ -0,0 +1,68 @@ +apiVersion: v1 +kind: ConfigMap +metadata: + name: prometheus-nginx-proxy-config + namespace: {{ template "kube-prometheus-stack.namespace" . }} + labels: + app: {{ template "kube-prometheus-stack.name" . }}-prometheus +{{ include "kube-prometheus-stack.labels" . | indent 4 }} +{{- if .Values.prometheus.annotations }} + annotations: +{{ toYaml .Values.prometheus.annotations | indent 4 }} +{{- end }} +data: + nginx.conf: |- + worker_processes auto; + error_log /dev/stdout warn; + pid /var/cache/nginx/nginx.pid; + + events { + worker_connections 1024; + } + + http { + include /etc/nginx/mime.types; + log_format main '[$time_local - $status] $remote_addr - $remote_user $request ($http_referer)'; + + proxy_connect_timeout 10; + proxy_read_timeout 180; + proxy_send_timeout 5; + proxy_buffering off; + proxy_cache_path /var/cache/nginx/cache levels=1:2 keys_zone=my_zone:100m inactive=1d max_size=10g; + + server { + listen 8081; + access_log off; + + gzip on; + gzip_min_length 1k; + gzip_comp_level 2; + gzip_types text/plain application/javascript application/x-javascript text/css application/xml text/javascript image/jpeg image/gif image/png; + gzip_vary on; + gzip_disable "MSIE [1-6]\."; + + proxy_set_header Host $host; + + location / { + proxy_cache my_zone; + proxy_cache_valid 200 302 1d; + proxy_cache_valid 301 30d; + proxy_cache_valid any 5m; + proxy_cache_bypass $http_cache_control; + add_header X-Proxy-Cache $upstream_cache_status; + add_header Cache-Control "public"; + + proxy_pass http://localhost:9090/; + + sub_filter_once off; + sub_filter 'var PATH_PREFIX = "";' 'var PATH_PREFIX = ".";'; + + if ($request_filename ~ .*\.(?:js|css|jpg|jpeg|gif|png|ico|cur|gz|svg|svgz|mp4|ogg|ogv|webm)$) { + expires 90d; + } + + rewrite ^/k8s/clusters/.*/proxy(.*) /$1 break; + + } + } + } diff --git a/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/templates/prometheus/podDisruptionBudget.yaml b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/templates/prometheus/podDisruptionBudget.yaml new file mode 100644 index 0000000000..48f3f1f5a6 --- /dev/null +++ b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/templates/prometheus/podDisruptionBudget.yaml @@ -0,0 +1,25 @@ +{{- if and .Values.prometheus.enabled .Values.prometheus.podDisruptionBudget.enabled }} +apiVersion: {{ include "kube-prometheus-stack.pdb.apiVersion" . }} +kind: PodDisruptionBudget +metadata: + name: {{ template "kube-prometheus-stack.fullname" . }}-prometheus + namespace: {{ template "kube-prometheus-stack.namespace" . }} + labels: + app: {{ template "kube-prometheus-stack.name" . }}-prometheus +{{ include "kube-prometheus-stack.labels" . | indent 4 }} +spec: + {{- if .Values.prometheus.podDisruptionBudget.minAvailable }} + minAvailable: {{ .Values.prometheus.podDisruptionBudget.minAvailable }} + {{- end }} + {{- if .Values.prometheus.podDisruptionBudget.maxUnavailable }} + maxUnavailable: {{ .Values.prometheus.podDisruptionBudget.maxUnavailable }} + {{- end }} + selector: + matchLabels: + {{- if .Values.prometheus.agentMode }} + app.kubernetes.io/name: prometheus-agent + {{- else }} + app.kubernetes.io/name: prometheus + {{- end }} + operator.prometheus.io/name: {{ template "kube-prometheus-stack.prometheus.crname" . }} +{{- end }} diff --git a/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/templates/prometheus/podmonitors.yaml b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/templates/prometheus/podmonitors.yaml new file mode 100644 index 0000000000..4e748c23b5 --- /dev/null +++ b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/templates/prometheus/podmonitors.yaml @@ -0,0 +1,38 @@ +{{- if and .Values.prometheus.enabled .Values.prometheus.additionalPodMonitors }} +apiVersion: v1 +kind: List +items: +{{- range .Values.prometheus.additionalPodMonitors }} + - apiVersion: monitoring.coreos.com/v1 + kind: PodMonitor + metadata: + name: {{ .name }} + namespace: {{ template "kube-prometheus-stack.namespace" $ }} + labels: + app: {{ template "kube-prometheus-stack.name" $ }}-prometheus +{{ include "kube-prometheus-stack.labels" $ | indent 8 }} + {{- if .additionalLabels }} +{{ toYaml .additionalLabels | indent 8 }} + {{- end }} + spec: + {{- include "servicemonitor.scrapeLimits" . | nindent 6 }} + podMetricsEndpoints: +{{ toYaml .podMetricsEndpoints | indent 8 }} + {{- if .jobLabel }} + jobLabel: {{ .jobLabel }} + {{- end }} + {{- if .namespaceSelector }} + namespaceSelector: +{{ toYaml .namespaceSelector | indent 8 }} + {{- end }} + selector: +{{ toYaml .selector | indent 8 }} + {{- if .podTargetLabels }} + podTargetLabels: +{{ toYaml .podTargetLabels | indent 8 }} + {{- end }} + {{- if .sampleLimit }} + sampleLimit: {{ .sampleLimit }} + {{- end }} +{{- end }} +{{- end }} diff --git a/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/templates/prometheus/prometheus.yaml b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/templates/prometheus/prometheus.yaml new file mode 100644 index 0000000000..5c3c8d4d1f --- /dev/null +++ b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/templates/prometheus/prometheus.yaml @@ -0,0 +1,472 @@ +{{- if .Values.prometheus.enabled }} +{{- if .Values.prometheus.agentMode }} +apiVersion: monitoring.coreos.com/v1alpha1 +kind: PrometheusAgent +{{- else }} +apiVersion: monitoring.coreos.com/v1 +kind: Prometheus +{{- end }} +metadata: + name: {{ template "kube-prometheus-stack.prometheus.crname" . }} + namespace: {{ template "kube-prometheus-stack.namespace" . }} + labels: + app: {{ template "kube-prometheus-stack.name" . }}-prometheus +{{ include "kube-prometheus-stack.labels" . | indent 4 }} +{{- if .Values.prometheus.annotations }} + annotations: +{{ toYaml .Values.prometheus.annotations | indent 4 }} +{{- end }} +spec: +{{- if and (not .Values.prometheus.agentMode) (or .Values.prometheus.prometheusSpec.alertingEndpoints .Values.alertmanager.enabled) }} + alerting: + alertmanagers: +{{- if .Values.prometheus.prometheusSpec.alertingEndpoints }} +{{ toYaml .Values.prometheus.prometheusSpec.alertingEndpoints | indent 6 }} +{{- else if .Values.alertmanager.enabled }} + - namespace: {{ template "kube-prometheus-stack.namespace" . }} + name: {{ template "kube-prometheus-stack.fullname" . }}-alertmanager + port: {{ .Values.alertmanager.alertmanagerSpec.portName }} + {{- if .Values.alertmanager.alertmanagerSpec.routePrefix }} + pathPrefix: "{{ .Values.alertmanager.alertmanagerSpec.routePrefix }}" + {{- end }} + {{- if .Values.alertmanager.alertmanagerSpec.scheme }} + scheme: {{ .Values.alertmanager.alertmanagerSpec.scheme }} + {{- end }} + {{- if .Values.alertmanager.alertmanagerSpec.tlsConfig }} + tlsConfig: +{{ toYaml .Values.alertmanager.alertmanagerSpec.tlsConfig | indent 10 }} + {{- end }} + apiVersion: {{ .Values.alertmanager.apiVersion }} +{{- end }} +{{- end }} +{{- if .Values.prometheus.prometheusSpec.apiserverConfig }} + apiserverConfig: +{{ toYaml .Values.prometheus.prometheusSpec.apiserverConfig | indent 4}} +{{- end }} +{{- if .Values.prometheus.prometheusSpec.image }} + {{- $registry := include "monitoring_registry" . | default .Values.prometheus.prometheusSpec.image.registry -}} + {{- if and .Values.prometheus.prometheusSpec.image.tag .Values.prometheus.prometheusSpec.image.sha }} + image: "{{ $registry }}/{{ .Values.prometheus.prometheusSpec.image.repository }}:{{ .Values.prometheus.prometheusSpec.image.tag }}@sha256:{{ .Values.prometheus.prometheusSpec.image.sha }}" + {{- else if .Values.prometheus.prometheusSpec.image.sha }} + image: "{{ $registry }}/{{ .Values.prometheus.prometheusSpec.image.repository }}@sha256:{{ .Values.prometheus.prometheusSpec.image.sha }}" + {{- else if .Values.prometheus.prometheusSpec.image.tag }} + image: "{{ $registry }}/{{ .Values.prometheus.prometheusSpec.image.repository }}:{{ .Values.prometheus.prometheusSpec.image.tag }}" + {{- else }} + image: "{{ $registry }}/{{ .Values.prometheus.prometheusSpec.image.repository }}" + {{- end }} + version: {{ default .Values.prometheus.prometheusSpec.image.tag .Values.prometheus.prometheusSpec.version }} +{{- end }} +{{- if .Values.prometheus.prometheusSpec.additionalArgs }} + additionalArgs: +{{ toYaml .Values.prometheus.prometheusSpec.additionalArgs | indent 4}} +{{- end -}} +{{- if .Values.prometheus.prometheusSpec.externalLabels }} + externalLabels: +{{ tpl (toYaml .Values.prometheus.prometheusSpec.externalLabels | indent 4) . }} +{{- end }} +{{- if .Values.prometheus.prometheusSpec.prometheusExternalLabelNameClear }} + prometheusExternalLabelName: "" +{{- else if .Values.prometheus.prometheusSpec.prometheusExternalLabelName }} + prometheusExternalLabelName: "{{ .Values.prometheus.prometheusSpec.prometheusExternalLabelName }}" +{{- end }} +{{- if .Values.prometheus.prometheusSpec.replicaExternalLabelNameClear }} + replicaExternalLabelName: "" +{{- else if .Values.prometheus.prometheusSpec.replicaExternalLabelName }} + replicaExternalLabelName: "{{ .Values.prometheus.prometheusSpec.replicaExternalLabelName }}" +{{- end }} +{{- if .Values.prometheus.prometheusSpec.enableRemoteWriteReceiver }} + enableRemoteWriteReceiver: {{ .Values.prometheus.prometheusSpec.enableRemoteWriteReceiver }} +{{- end }} +{{- if .Values.prometheus.prometheusSpec.externalUrl }} + externalUrl: "{{ tpl .Values.prometheus.prometheusSpec.externalUrl . }}" +{{- else if and .Values.prometheus.ingress.enabled .Values.prometheus.ingress.hosts }} + externalUrl: "http://{{ tpl (index .Values.prometheus.ingress.hosts 0) . }}{{ .Values.prometheus.prometheusSpec.routePrefix }}" +{{- else if not (or (kindIs "invalid" .Values.global.cattle.url) (kindIs "invalid" .Values.global.cattle.clusterId)) }} + externalUrl: "{{ .Values.global.cattle.url }}/k8s/clusters/{{ .Values.global.cattle.clusterId }}/api/v1/namespaces/{{ template "kube-prometheus-stack.namespace" . }}/services/http:{{ template "kube-prometheus-stack.fullname" . }}-prometheus:{{ .Values.prometheus.service.port }}/proxy" +{{- else }} + externalUrl: http://{{ template "kube-prometheus-stack.fullname" . }}-prometheus.{{ template "kube-prometheus-stack.namespace" . }}:{{ .Values.prometheus.service.port }} +{{- end }} + nodeSelector: {{ include "linux-node-selector" . | nindent 4 }} +{{- if .Values.prometheus.prometheusSpec.nodeSelector }} +{{ toYaml .Values.prometheus.prometheusSpec.nodeSelector | indent 4 }} +{{- end }} + paused: {{ .Values.prometheus.prometheusSpec.paused }} + replicas: {{ .Values.prometheus.prometheusSpec.replicas }} + shards: {{ .Values.prometheus.prometheusSpec.shards }} + logLevel: {{ .Values.prometheus.prometheusSpec.logLevel }} + logFormat: {{ .Values.prometheus.prometheusSpec.logFormat }} + listenLocal: {{ .Values.prometheus.prometheusSpec.listenLocal }} +{{- if not .Values.prometheus.agentMode }} + enableAdminAPI: {{ .Values.prometheus.prometheusSpec.enableAdminAPI }} +{{- end }} +{{- if .Values.prometheus.prometheusSpec.web }} + web: +{{ toYaml .Values.prometheus.prometheusSpec.web | indent 4 }} +{{- end }} +{{- if and (not .Values.prometheus.agentMode) .Values.prometheus.prometheusSpec.exemplars }} + exemplars: + {{ toYaml .Values.prometheus.prometheusSpec.exemplars | indent 4 }} +{{- end }} +{{- if .Values.prometheus.prometheusSpec.enableFeatures }} + enableFeatures: +{{- range $enableFeatures := .Values.prometheus.prometheusSpec.enableFeatures }} + - {{ tpl $enableFeatures $ }} +{{- end }} +{{- end }} +{{- if .Values.prometheus.prometheusSpec.scrapeInterval }} + scrapeInterval: {{ .Values.prometheus.prometheusSpec.scrapeInterval }} +{{- end }} +{{- if .Values.prometheus.prometheusSpec.scrapeTimeout }} + scrapeTimeout: {{ .Values.prometheus.prometheusSpec.scrapeTimeout }} +{{- end }} +{{- if and (not .Values.prometheus.agentMode) .Values.prometheus.prometheusSpec.evaluationInterval }} + evaluationInterval: {{ .Values.prometheus.prometheusSpec.evaluationInterval }} +{{- end }} +{{- if .Values.prometheus.prometheusSpec.resources }} + resources: +{{ toYaml .Values.prometheus.prometheusSpec.resources | indent 4 }} +{{- end }} +{{- if not .Values.prometheus.agentMode }} + retention: {{ .Values.prometheus.prometheusSpec.retention | quote }} +{{- if .Values.prometheus.prometheusSpec.retentionSize }} + retentionSize: {{ .Values.prometheus.prometheusSpec.retentionSize | quote }} +{{- end }} +{{- if .Values.prometheus.prometheusSpec.tsdb }} + tsdb: + {{- if .Values.prometheus.prometheusSpec.tsdb.outOfOrderTimeWindow }} + outOfOrderTimeWindow: {{ .Values.prometheus.prometheusSpec.tsdb.outOfOrderTimeWindow }} + {{- end }} +{{- end }} +{{- end }} +{{- if eq .Values.prometheus.prometheusSpec.walCompression false }} + walCompression: false +{{ else }} + walCompression: true +{{- end }} +{{- if .Values.prometheus.prometheusSpec.routePrefix }} + routePrefix: {{ .Values.prometheus.prometheusSpec.routePrefix | quote }} +{{- end }} +{{- if .Values.prometheus.prometheusSpec.secrets }} + secrets: +{{ toYaml .Values.prometheus.prometheusSpec.secrets | indent 4 }} +{{- end }} +{{- if .Values.prometheus.prometheusSpec.configMaps }} + configMaps: +{{ toYaml .Values.prometheus.prometheusSpec.configMaps | indent 4 }} +{{- end }} + serviceAccountName: {{ template "kube-prometheus-stack.prometheus.serviceAccountName" . }} +{{- if .Values.prometheus.prometheusSpec.serviceMonitorSelector }} + serviceMonitorSelector: +{{ tpl (toYaml .Values.prometheus.prometheusSpec.serviceMonitorSelector | indent 4) . }} +{{ else if .Values.prometheus.prometheusSpec.serviceMonitorSelectorNilUsesHelmValues }} + serviceMonitorSelector: + matchLabels: + release: {{ $.Release.Name | quote }} +{{ else }} + serviceMonitorSelector: {} +{{- end }} +{{- if .Values.prometheus.prometheusSpec.serviceMonitorNamespaceSelector }} + serviceMonitorNamespaceSelector: +{{ tpl (toYaml .Values.prometheus.prometheusSpec.serviceMonitorNamespaceSelector | indent 4) . }} +{{ else }} + serviceMonitorNamespaceSelector: {} +{{- end }} +{{- if .Values.prometheus.prometheusSpec.podMonitorSelector }} + podMonitorSelector: +{{ tpl (toYaml .Values.prometheus.prometheusSpec.podMonitorSelector | indent 4) . }} +{{ else if .Values.prometheus.prometheusSpec.podMonitorSelectorNilUsesHelmValues }} + podMonitorSelector: + matchLabels: + release: {{ $.Release.Name | quote }} +{{ else }} + podMonitorSelector: {} +{{- end }} +{{- if .Values.prometheus.prometheusSpec.podMonitorNamespaceSelector }} + podMonitorNamespaceSelector: +{{ tpl (toYaml .Values.prometheus.prometheusSpec.podMonitorNamespaceSelector | indent 4) . }} +{{ else }} + podMonitorNamespaceSelector: {} +{{- end }} +{{- if .Values.prometheus.prometheusSpec.probeSelector }} + probeSelector: +{{ tpl (toYaml .Values.prometheus.prometheusSpec.probeSelector | indent 4) . }} +{{ else if .Values.prometheus.prometheusSpec.probeSelectorNilUsesHelmValues }} + probeSelector: + matchLabels: + release: {{ $.Release.Name | quote }} +{{ else }} + probeSelector: {} +{{- end }} +{{- if .Values.prometheus.prometheusSpec.probeNamespaceSelector }} + probeNamespaceSelector: +{{ tpl (toYaml .Values.prometheus.prometheusSpec.probeNamespaceSelector | indent 4) . }} +{{ else }} + probeNamespaceSelector: {} +{{- end }} +{{- if and (not .Values.prometheus.agentMode) (or .Values.prometheus.prometheusSpec.remoteRead .Values.prometheus.prometheusSpec.additionalRemoteRead) }} + remoteRead: +{{- if .Values.prometheus.prometheusSpec.remoteRead }} +{{ tpl (toYaml .Values.prometheus.prometheusSpec.remoteRead | indent 4) . }} +{{- end }} +{{- if .Values.prometheus.prometheusSpec.additionalRemoteRead }} +{{ toYaml .Values.prometheus.prometheusSpec.additionalRemoteRead | indent 4 }} +{{- end }} +{{- end }} +{{- if (or .Values.prometheus.prometheusSpec.remoteWrite .Values.prometheus.prometheusSpec.additionalRemoteWrite) }} + remoteWrite: +{{- if .Values.prometheus.prometheusSpec.remoteWrite }} +{{ tpl (toYaml .Values.prometheus.prometheusSpec.remoteWrite | indent 4) . }} +{{- end }} +{{- if .Values.prometheus.prometheusSpec.additionalRemoteWrite }} +{{ toYaml .Values.prometheus.prometheusSpec.additionalRemoteWrite | indent 4 }} +{{- end }} +{{- end }} +{{- if .Values.prometheus.prometheusSpec.securityContext }} + securityContext: +{{ toYaml .Values.prometheus.prometheusSpec.securityContext | indent 4 }} +{{- end }} +{{- if not .Values.prometheus.agentMode }} +{{- if .Values.prometheus.prometheusSpec.ruleNamespaceSelector }} + ruleNamespaceSelector: +{{ tpl (toYaml .Values.prometheus.prometheusSpec.ruleNamespaceSelector | indent 4) . }} +{{ else }} + ruleNamespaceSelector: {} +{{- end }} +{{- if .Values.prometheus.prometheusSpec.ruleSelector }} + ruleSelector: +{{ tpl (toYaml .Values.prometheus.prometheusSpec.ruleSelector | indent 4) . }} +{{- else if .Values.prometheus.prometheusSpec.ruleSelectorNilUsesHelmValues }} + ruleSelector: + matchLabels: + release: {{ $.Release.Name | quote }} +{{ else }} + ruleSelector: {} +{{- end }} +{{- end }} +{{- if .Values.prometheus.prometheusSpec.scrapeConfigSelector }} + scrapeConfigSelector: +{{ tpl (toYaml .Values.prometheus.prometheusSpec.scrapeConfigSelector | indent 4) . }} +{{ else if .Values.prometheus.prometheusSpec.scrapeConfigSelectorNilUsesHelmValues }} + scrapeConfigSelector: + matchLabels: + release: {{ $.Release.Name | quote }} +{{ else }} + scrapeConfigSelector: {} +{{- end }} +{{- if .Values.prometheus.prometheusSpec.scrapeConfigNamespaceSelector }} + scrapeConfigNamespaceSelector: +{{ tpl (toYaml .Values.prometheus.prometheusSpec.scrapeConfigNamespaceSelector | indent 4) . }} +{{ else }} + scrapeConfigNamespaceSelector: {} +{{- end }} +{{- if .Values.prometheus.prometheusSpec.storageSpec }} + storage: +{{ tpl (toYaml .Values.prometheus.prometheusSpec.storageSpec | indent 4) . }} +{{- end }} +{{- if .Values.prometheus.prometheusSpec.podMetadata }} + podMetadata: +{{ tpl (toYaml .Values.prometheus.prometheusSpec.podMetadata | indent 4) . }} +{{- end }} +{{- if and (not .Values.prometheus.agentMode) .Values.prometheus.prometheusSpec.query }} + query: +{{ toYaml .Values.prometheus.prometheusSpec.query | indent 4}} +{{- end }} +{{- if or .Values.prometheus.prometheusSpec.podAntiAffinity .Values.prometheus.prometheusSpec.affinity }} + affinity: +{{- if .Values.prometheus.prometheusSpec.affinity }} +{{ toYaml .Values.prometheus.prometheusSpec.affinity | indent 4 }} +{{- end }} +{{- if eq .Values.prometheus.prometheusSpec.podAntiAffinity "hard" }} + podAntiAffinity: + requiredDuringSchedulingIgnoredDuringExecution: + - topologyKey: {{ .Values.prometheus.prometheusSpec.podAntiAffinityTopologyKey }} + labelSelector: + matchExpressions: + - {key: app.kubernetes.io/name, operator: In, values: [prometheus]} + - {key: prometheus, operator: In, values: [{{ template "kube-prometheus-stack.prometheus.crname" . }}]} +{{- else if eq .Values.prometheus.prometheusSpec.podAntiAffinity "soft" }} + podAntiAffinity: + preferredDuringSchedulingIgnoredDuringExecution: + - weight: 100 + podAffinityTerm: + topologyKey: {{ .Values.prometheus.prometheusSpec.podAntiAffinityTopologyKey }} + labelSelector: + matchExpressions: + - {key: app.kubernetes.io/name, operator: In, values: [prometheus]} + - {key: prometheus, operator: In, values: [{{ template "kube-prometheus-stack.prometheus.crname" . }}]} +{{- end }} +{{- end }} + tolerations: {{ include "linux-node-tolerations" . | nindent 4 }} +{{- if .Values.prometheus.prometheusSpec.tolerations }} +{{ toYaml .Values.prometheus.prometheusSpec.tolerations | indent 4 }} +{{- end }} +{{- if .Values.prometheus.prometheusSpec.topologySpreadConstraints }} + topologySpreadConstraints: +{{ toYaml .Values.prometheus.prometheusSpec.topologySpreadConstraints | indent 4 }} +{{- end }} +{{- if .Values.global.imagePullSecrets }} + imagePullSecrets: +{{ include "kube-prometheus-stack.imagePullSecrets" . | trim | indent 4 }} +{{- end }} +{{- if .Values.prometheus.prometheusSpec.additionalScrapeConfigs }} + additionalScrapeConfigs: + name: {{ template "kube-prometheus-stack.fullname" . }}-prometheus-scrape-confg + key: additional-scrape-configs.yaml +{{- end }} +{{- if .Values.prometheus.prometheusSpec.additionalScrapeConfigsSecret.enabled }} + additionalScrapeConfigs: + name: {{ .Values.prometheus.prometheusSpec.additionalScrapeConfigsSecret.name }} + key: {{ .Values.prometheus.prometheusSpec.additionalScrapeConfigsSecret.key }} +{{- end }} +{{- if not .Values.prometheus.agentMode }} +{{- if or .Values.prometheus.prometheusSpec.additionalAlertManagerConfigs .Values.prometheus.prometheusSpec.additionalAlertManagerConfigsSecret }} + additionalAlertManagerConfigs: +{{- if .Values.prometheus.prometheusSpec.additionalAlertManagerConfigs }} + name: {{ template "kube-prometheus-stack.fullname" . }}-prometheus-am-confg + key: additional-alertmanager-configs.yaml +{{- end }} +{{- if .Values.prometheus.prometheusSpec.additionalAlertManagerConfigsSecret }} + name: {{ .Values.prometheus.prometheusSpec.additionalAlertManagerConfigsSecret.name }} + key: {{ .Values.prometheus.prometheusSpec.additionalAlertManagerConfigsSecret.key }} + {{- if hasKey .Values.prometheus.prometheusSpec.additionalAlertManagerConfigsSecret "optional" }} + optional: {{ .Values.prometheus.prometheusSpec.additionalAlertManagerConfigsSecret.optional }} + {{- end }} +{{- end }} +{{- end }} +{{- if .Values.prometheus.prometheusSpec.additionalAlertRelabelConfigs }} + additionalAlertRelabelConfigs: + name: {{ template "kube-prometheus-stack.fullname" . }}-prometheus-am-relabel-confg + key: additional-alert-relabel-configs.yaml +{{- end }} +{{- if .Values.prometheus.prometheusSpec.additionalAlertRelabelConfigsSecret }} + additionalAlertRelabelConfigs: + name: {{ .Values.prometheus.prometheusSpec.additionalAlertRelabelConfigsSecret.name }} + key: {{ .Values.prometheus.prometheusSpec.additionalAlertRelabelConfigsSecret.key }} +{{- end }} +{{- end }} +{{- if .Values.prometheus.prometheusSpec.containers }} + containers: +{{ tpl .Values.prometheus.prometheusSpec.containers $ | indent 4 }} +{{- end }} +{{- if .Values.prometheus.prometheusSpec.initContainers }} + initContainers: +{{ toYaml .Values.prometheus.prometheusSpec.initContainers | indent 4 }} +{{- end }} +{{- if .Values.prometheus.prometheusSpec.priorityClassName }} + priorityClassName: {{ .Values.prometheus.prometheusSpec.priorityClassName }} +{{- end }} +{{- if not .Values.prometheus.agentMode }} +{{- if .Values.prometheus.prometheusSpec.thanos }} + thanos: +{{- with (omit .Values.prometheus.prometheusSpec.thanos "objectStorageConfig")}} +{{ toYaml . | indent 4 }} +{{- end }} +{{- if ((.Values.prometheus.prometheusSpec.thanos.objectStorageConfig).existingSecret) }} + objectStorageConfig: + key: "{{.Values.prometheus.prometheusSpec.thanos.objectStorageConfig.existingSecret.key }}" + name: "{{.Values.prometheus.prometheusSpec.thanos.objectStorageConfig.existingSecret.name }}" +{{- else if ((.Values.prometheus.prometheusSpec.thanos.objectStorageConfig).secret) }} + objectStorageConfig: + key: object-storage-configs.yaml + name: {{ template "kube-prometheus-stack.fullname" . }}-prometheus +{{- end }} +{{- end }} +{{- if .Values.prometheus.prometheusSpec.disableCompaction }} + disableCompaction: {{ .Values.prometheus.prometheusSpec.disableCompaction }} +{{- end }} +{{- end }} + portName: {{ .Values.prometheus.prometheusSpec.portName }} +{{- if .Values.prometheus.prometheusSpec.volumes }} + volumes: +{{ toYaml .Values.prometheus.prometheusSpec.volumes | indent 4 }} +{{- end }} +{{- if .Values.prometheus.prometheusSpec.volumeMounts }} + volumeMounts: +{{ toYaml .Values.prometheus.prometheusSpec.volumeMounts | indent 4 }} +{{- end }} +{{- if .Values.prometheus.prometheusSpec.arbitraryFSAccessThroughSMs }} + arbitraryFSAccessThroughSMs: +{{ toYaml .Values.prometheus.prometheusSpec.arbitraryFSAccessThroughSMs | indent 4 }} +{{- end }} +{{- if .Values.prometheus.prometheusSpec.overrideHonorLabels }} + overrideHonorLabels: {{ .Values.prometheus.prometheusSpec.overrideHonorLabels }} +{{- end }} +{{- if .Values.prometheus.prometheusSpec.overrideHonorTimestamps }} + overrideHonorTimestamps: {{ .Values.prometheus.prometheusSpec.overrideHonorTimestamps }} +{{- end }} +{{- if .Values.prometheus.prometheusSpec.ignoreNamespaceSelectors }} + ignoreNamespaceSelectors: {{ .Values.prometheus.prometheusSpec.ignoreNamespaceSelectors }} +{{- end }} +{{- if .Values.prometheus.prometheusSpec.enforcedNamespaceLabel }} + enforcedNamespaceLabel: {{ .Values.prometheus.prometheusSpec.enforcedNamespaceLabel }} +{{- $prometheusDefaultRulesExcludedFromEnforce := (include "rules.names" .) | fromYaml }} +{{- if not .Values.prometheus.agentMode }} + prometheusRulesExcludedFromEnforce: +{{- range $prometheusDefaultRulesExcludedFromEnforce.rules }} + - ruleNamespace: "{{ template "kube-prometheus-stack.namespace" $ }}" + ruleName: "{{ printf "%s-%s" (include "kube-prometheus-stack.fullname" $) . | trunc 63 | trimSuffix "-" }}" +{{- end }} +{{- if .Values.prometheus.prometheusSpec.prometheusRulesExcludedFromEnforce }} +{{ toYaml .Values.prometheus.prometheusSpec.prometheusRulesExcludedFromEnforce | indent 4 }} +{{- end }} +{{- end }} + excludedFromEnforcement: +{{- range $prometheusDefaultRulesExcludedFromEnforce.rules }} + - group: monitoring.coreos.com + resource: prometheusrules + namespace: "{{ template "kube-prometheus-stack.namespace" $ }}" + name: "{{ printf "%s-%s" (include "kube-prometheus-stack.fullname" $) . | trunc 63 | trimSuffix "-" }}" +{{- end }} +{{- if .Values.prometheus.prometheusSpec.excludedFromEnforcement }} +{{ tpl (toYaml .Values.prometheus.prometheusSpec.excludedFromEnforcement | indent 4) . }} +{{- end }} +{{- end }} +{{- if and (not .Values.prometheus.agentMode) .Values.prometheus.prometheusSpec.queryLogFile }} + queryLogFile: {{ .Values.prometheus.prometheusSpec.queryLogFile }} +{{- end }} +{{- if .Values.prometheus.prometheusSpec.sampleLimit }} + sampleLimit: {{ .Values.prometheus.prometheusSpec.sampleLimit }} +{{- end }} +{{- if .Values.prometheus.prometheusSpec.enforcedKeepDroppedTargets }} + enforcedKeepDroppedTargets: {{ .Values.prometheus.prometheusSpec.enforcedKeepDroppedTargets }} +{{- end }} +{{- if .Values.prometheus.prometheusSpec.enforcedSampleLimit }} + enforcedSampleLimit: {{ .Values.prometheus.prometheusSpec.enforcedSampleLimit }} +{{- end }} +{{- if .Values.prometheus.prometheusSpec.enforcedTargetLimit }} + enforcedTargetLimit: {{ .Values.prometheus.prometheusSpec.enforcedTargetLimit }} +{{- end }} +{{- if .Values.prometheus.prometheusSpec.enforcedLabelLimit }} + enforcedLabelLimit: {{ .Values.prometheus.prometheusSpec.enforcedLabelLimit }} +{{- end }} +{{- if .Values.prometheus.prometheusSpec.enforcedLabelNameLengthLimit }} + enforcedLabelNameLengthLimit: {{ .Values.prometheus.prometheusSpec.enforcedLabelNameLengthLimit }} +{{- end }} +{{- if .Values.prometheus.prometheusSpec.enforcedLabelValueLengthLimit}} + enforcedLabelValueLengthLimit: {{ .Values.prometheus.prometheusSpec.enforcedLabelValueLengthLimit }} +{{- end }} +{{- if and (not .Values.prometheus.agentMode) .Values.prometheus.prometheusSpec.allowOverlappingBlocks }} + allowOverlappingBlocks: {{ .Values.prometheus.prometheusSpec.allowOverlappingBlocks }} +{{- end }} +{{- if .Values.prometheus.prometheusSpec.minReadySeconds }} + minReadySeconds: {{ .Values.prometheus.prometheusSpec.minReadySeconds }} +{{- end }} +{{- if .Values.prometheus.prometheusSpec.maximumStartupDurationSeconds }} + maximumStartupDurationSeconds: {{ .Values.prometheus.prometheusSpec.maximumStartupDurationSeconds }} +{{- end }} + hostNetwork: {{ .Values.prometheus.prometheusSpec.hostNetwork }} +{{- if .Values.prometheus.prometheusSpec.hostAliases }} + hostAliases: +{{ toYaml .Values.prometheus.prometheusSpec.hostAliases | indent 4 }} +{{- end }} +{{- if .Values.prometheus.prometheusSpec.tracingConfig }} + tracingConfig: +{{ toYaml .Values.prometheus.prometheusSpec.tracingConfig | indent 4 }} +{{- end }} +{{- with .Values.prometheus.prometheusSpec.additionalConfig }} + {{- tpl (toYaml .) $ | nindent 2 }} +{{- end }} +{{- with .Values.prometheus.prometheusSpec.additionalConfigString }} + {{- tpl . $ | nindent 2 }} +{{- end }} +{{- end }} diff --git a/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/templates/prometheus/psp-clusterrole.yaml b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/templates/prometheus/psp-clusterrole.yaml new file mode 100644 index 0000000000..71476cd18b --- /dev/null +++ b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/templates/prometheus/psp-clusterrole.yaml @@ -0,0 +1,22 @@ +{{- if and .Values.prometheus.enabled (or .Values.global.cattle.psp.enabled (and .Values.global.rbac.create .Values.global.rbac.pspEnabled)) }} +{{- if .Capabilities.APIVersions.Has "policy/v1beta1/PodSecurityPolicy" }} +kind: ClusterRole +apiVersion: rbac.authorization.k8s.io/v1 +metadata: + name: {{ template "kube-prometheus-stack.fullname" . }}-prometheus-psp + labels: + app: {{ template "kube-prometheus-stack.name" . }}-prometheus +{{ include "kube-prometheus-stack.labels" . | indent 4 }} +rules: +{{- $kubeTargetVersion := default .Capabilities.KubeVersion.GitVersion .Values.kubeTargetVersionOverride }} +{{- if semverCompare "> 1.15.0-0" $kubeTargetVersion }} +- apiGroups: ['policy'] +{{- else }} +- apiGroups: ['extensions'] +{{- end }} + resources: ['podsecuritypolicies'] + verbs: ['use'] + resourceNames: + - {{ template "kube-prometheus-stack.fullname" . }}-prometheus +{{- end }} +{{- end }} \ No newline at end of file diff --git a/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/templates/prometheus/psp-clusterrolebinding.yaml b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/templates/prometheus/psp-clusterrolebinding.yaml new file mode 100644 index 0000000000..a393928c78 --- /dev/null +++ b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/templates/prometheus/psp-clusterrolebinding.yaml @@ -0,0 +1,19 @@ +{{- if and .Values.prometheus.enabled (or .Values.global.cattle.psp.enabled (and .Values.global.rbac.create .Values.global.rbac.pspEnabled)) }} +{{- if .Capabilities.APIVersions.Has "policy/v1beta1/PodSecurityPolicy" }} +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRoleBinding +metadata: + name: {{ template "kube-prometheus-stack.fullname" . }}-prometheus-psp + labels: + app: {{ template "kube-prometheus-stack.name" . }}-prometheus +{{ include "kube-prometheus-stack.labels" . | indent 4 }} +roleRef: + apiGroup: rbac.authorization.k8s.io + kind: ClusterRole + name: {{ template "kube-prometheus-stack.fullname" . }}-prometheus-psp +subjects: + - kind: ServiceAccount + name: {{ template "kube-prometheus-stack.prometheus.serviceAccountName" . }} + namespace: {{ template "kube-prometheus-stack.namespace" . }} +{{- end }} +{{- end }} diff --git a/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/templates/prometheus/psp.yaml b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/templates/prometheus/psp.yaml new file mode 100644 index 0000000000..62d3854151 --- /dev/null +++ b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/templates/prometheus/psp.yaml @@ -0,0 +1,58 @@ +{{- if and .Values.prometheus.enabled (or .Values.global.cattle.psp.enabled (and .Values.global.rbac.create .Values.global.rbac.pspEnabled)) }} +{{- if .Capabilities.APIVersions.Has "policy/v1beta1/PodSecurityPolicy" }} +apiVersion: policy/v1beta1 +kind: PodSecurityPolicy +metadata: + name: {{ template "kube-prometheus-stack.fullname" . }}-prometheus + labels: + app: {{ template "kube-prometheus-stack.name" . }}-prometheus +{{- if .Values.global.rbac.pspAnnotations }} + annotations: +{{ toYaml .Values.global.rbac.pspAnnotations | indent 4 }} +{{- end }} +{{ include "kube-prometheus-stack.labels" . | indent 4 }} +spec: + privileged: false + # Allow core volume types. + volumes: + - 'configMap' + - 'emptyDir' + - 'projected' + - 'secret' + - 'downwardAPI' + - 'persistentVolumeClaim' +{{- if .Values.prometheus.podSecurityPolicy.volumes }} +{{ toYaml .Values.prometheus.podSecurityPolicy.volumes | indent 4 }} +{{- end }} + hostNetwork: false + hostIPC: false + hostPID: false + runAsUser: + # Permits the container to run with root privileges as well. + rule: 'RunAsAny' + seLinux: + # This policy assumes the nodes are using AppArmor rather than SELinux. + rule: 'RunAsAny' + supplementalGroups: + rule: 'MustRunAs' + ranges: + # Allow adding the root group. + - min: 0 + max: 65535 + fsGroup: + rule: 'MustRunAs' + ranges: + # Allow adding the root group. + - min: 0 + max: 65535 + readOnlyRootFilesystem: false +{{- if .Values.prometheus.podSecurityPolicy.allowedCapabilities }} + allowedCapabilities: +{{ toYaml .Values.prometheus.podSecurityPolicy.allowedCapabilities | indent 4 }} +{{- end }} +{{- if .Values.prometheus.podSecurityPolicy.allowedHostPaths }} + allowedHostPaths: +{{ toYaml .Values.prometheus.podSecurityPolicy.allowedHostPaths | indent 4 }} +{{- end }} +{{- end }} +{{- end }} diff --git a/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/templates/prometheus/rules-1.14/alertmanager.rules.yaml b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/templates/prometheus/rules-1.14/alertmanager.rules.yaml new file mode 100644 index 0000000000..b66f052ade --- /dev/null +++ b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/templates/prometheus/rules-1.14/alertmanager.rules.yaml @@ -0,0 +1,305 @@ +{{- /* +Generated from 'alertmanager.rules' group from https://raw.githubusercontent.com/prometheus-operator/kube-prometheus/a8ba97a150c75be42010c75d10b720c55e182f1a/manifests/alertmanager-prometheusRule.yaml +Do not change in-place! In order to change this file first read following link: +https://github.com/prometheus-community/helm-charts/tree/main/charts/kube-prometheus-stack/hack +*/ -}} +{{- $kubeTargetVersion := default .Capabilities.KubeVersion.GitVersion .Values.kubeTargetVersionOverride }} +{{- if and (semverCompare ">=1.14.0-0" $kubeTargetVersion) (semverCompare "<9.9.9-9" $kubeTargetVersion) .Values.defaultRules.create .Values.defaultRules.rules.alertmanager }} +{{- $alertmanagerJob := printf "%s-%s" (include "kube-prometheus-stack.fullname" .) "alertmanager" }} +{{- $namespace := printf "%s" (include "kube-prometheus-stack.namespace" .) }} +{{- if and .Values.alertmanager.enabled .Values.alertmanager.serviceMonitor.selfMonitor }} +apiVersion: monitoring.coreos.com/v1 +kind: PrometheusRule +metadata: + name: {{ printf "%s-%s" (include "kube-prometheus-stack.fullname" .) "alertmanager.rules" | trunc 63 | trimSuffix "-" }} + namespace: {{ template "kube-prometheus-stack.namespace" . }} + labels: + app: {{ template "kube-prometheus-stack.name" . }} +{{ include "kube-prometheus-stack.labels" . | indent 4 }} +{{- if .Values.defaultRules.labels }} +{{ toYaml .Values.defaultRules.labels | indent 4 }} +{{- end }} +{{- if .Values.defaultRules.annotations }} + annotations: +{{ toYaml .Values.defaultRules.annotations | indent 4 }} +{{- end }} +spec: + groups: + - name: alertmanager.rules + rules: +{{- if not (.Values.defaultRules.disabled.AlertmanagerFailedReload | default false) }} + - alert: AlertmanagerFailedReload + annotations: +{{- if .Values.defaultRules.additionalRuleAnnotations }} +{{ toYaml .Values.defaultRules.additionalRuleAnnotations | indent 8 }} +{{- end }} +{{- if .Values.defaultRules.additionalRuleGroupAnnotations.alertmanager }} +{{ toYaml .Values.defaultRules.additionalRuleGroupAnnotations.alertmanager | indent 8 }} +{{- end }} + description: Configuration has failed to load for {{`{{`}} $labels.namespace {{`}}`}}/{{`{{`}} $labels.pod{{`}}`}}. + runbook_url: {{ .Values.defaultRules.runbookUrl }}/alertmanager/alertmanagerfailedreload + summary: Reloading an Alertmanager configuration has failed. + expr: |- + # Without max_over_time, failed scrapes could create false negatives, see + # https://www.robustperception.io/alerting-on-gauges-in-prometheus-2-0 for details. + max_over_time(alertmanager_config_last_reload_successful{job="{{ $alertmanagerJob }}",namespace="{{ $namespace }}"}[5m]) == 0 + for: {{ dig "AlertmanagerFailedReload" "for" "10m" .Values.customRules }} + {{- with .Values.defaultRules.keepFiringFor }} + keep_firing_for: "{{ . }}" + {{- end }} + labels: + severity: {{ dig "AlertmanagerFailedReload" "severity" "critical" .Values.customRules }} + {{- if or .Values.defaultRules.additionalRuleLabels .Values.defaultRules.additionalRuleGroupLabels.alertmanager }} + {{- with .Values.defaultRules.additionalRuleLabels }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.defaultRules.additionalRuleGroupLabels.alertmanager }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- end }} +{{- end }} +{{- if not (.Values.defaultRules.disabled.AlertmanagerMembersInconsistent | default false) }} + - alert: AlertmanagerMembersInconsistent + annotations: +{{- if .Values.defaultRules.additionalRuleAnnotations }} +{{ toYaml .Values.defaultRules.additionalRuleAnnotations | indent 8 }} +{{- end }} +{{- if .Values.defaultRules.additionalRuleGroupAnnotations.alertmanager }} +{{ toYaml .Values.defaultRules.additionalRuleGroupAnnotations.alertmanager | indent 8 }} +{{- end }} + description: Alertmanager {{`{{`}} $labels.namespace {{`}}`}}/{{`{{`}} $labels.pod{{`}}`}} has only found {{`{{`}} $value {{`}}`}} members of the {{`{{`}}$labels.job{{`}}`}} cluster. + runbook_url: {{ .Values.defaultRules.runbookUrl }}/alertmanager/alertmanagermembersinconsistent + summary: A member of an Alertmanager cluster has not found all other cluster members. + expr: |- + # Without max_over_time, failed scrapes could create false negatives, see + # https://www.robustperception.io/alerting-on-gauges-in-prometheus-2-0 for details. + max_over_time(alertmanager_cluster_members{job="{{ $alertmanagerJob }}",namespace="{{ $namespace }}"}[5m]) + < on ({{ range $.Values.defaultRules.additionalAggregationLabels }}{{ . }},{{ end }}namespace,service,cluster) group_left + count by ({{ range $.Values.defaultRules.additionalAggregationLabels }}{{ . }},{{ end }}namespace,service,cluster) (max_over_time(alertmanager_cluster_members{job="{{ $alertmanagerJob }}",namespace="{{ $namespace }}"}[5m])) + for: {{ dig "AlertmanagerMembersInconsistent" "for" "15m" .Values.customRules }} + {{- with .Values.defaultRules.keepFiringFor }} + keep_firing_for: "{{ . }}" + {{- end }} + labels: + severity: {{ dig "AlertmanagerMembersInconsistent" "severity" "critical" .Values.customRules }} + {{- if or .Values.defaultRules.additionalRuleLabels .Values.defaultRules.additionalRuleGroupLabels.alertmanager }} + {{- with .Values.defaultRules.additionalRuleLabels }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.defaultRules.additionalRuleGroupLabels.alertmanager }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- end }} +{{- end }} +{{- if not (.Values.defaultRules.disabled.AlertmanagerFailedToSendAlerts | default false) }} + - alert: AlertmanagerFailedToSendAlerts + annotations: +{{- if .Values.defaultRules.additionalRuleAnnotations }} +{{ toYaml .Values.defaultRules.additionalRuleAnnotations | indent 8 }} +{{- end }} +{{- if .Values.defaultRules.additionalRuleGroupAnnotations.alertmanager }} +{{ toYaml .Values.defaultRules.additionalRuleGroupAnnotations.alertmanager | indent 8 }} +{{- end }} + description: Alertmanager {{`{{`}} $labels.namespace {{`}}`}}/{{`{{`}} $labels.pod{{`}}`}} failed to send {{`{{`}} $value | humanizePercentage {{`}}`}} of notifications to {{`{{`}} $labels.integration {{`}}`}}. + runbook_url: {{ .Values.defaultRules.runbookUrl }}/alertmanager/alertmanagerfailedtosendalerts + summary: An Alertmanager instance failed to send notifications. + expr: |- + ( + rate(alertmanager_notifications_failed_total{job="{{ $alertmanagerJob }}",namespace="{{ $namespace }}"}[5m]) + / + ignoring (reason) group_left rate(alertmanager_notifications_total{job="{{ $alertmanagerJob }}",namespace="{{ $namespace }}"}[5m]) + ) + > 0.01 + for: {{ dig "AlertmanagerFailedToSendAlerts" "for" "5m" .Values.customRules }} + {{- with .Values.defaultRules.keepFiringFor }} + keep_firing_for: "{{ . }}" + {{- end }} + labels: + severity: {{ dig "AlertmanagerFailedToSendAlerts" "severity" "warning" .Values.customRules }} + {{- if or .Values.defaultRules.additionalRuleLabels .Values.defaultRules.additionalRuleGroupLabels.alertmanager }} + {{- with .Values.defaultRules.additionalRuleLabels }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.defaultRules.additionalRuleGroupLabels.alertmanager }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- end }} +{{- end }} +{{- if not (.Values.defaultRules.disabled.AlertmanagerClusterFailedToSendAlerts | default false) }} + - alert: AlertmanagerClusterFailedToSendAlerts + annotations: +{{- if .Values.defaultRules.additionalRuleAnnotations }} +{{ toYaml .Values.defaultRules.additionalRuleAnnotations | indent 8 }} +{{- end }} +{{- if .Values.defaultRules.additionalRuleGroupAnnotations.alertmanager }} +{{ toYaml .Values.defaultRules.additionalRuleGroupAnnotations.alertmanager | indent 8 }} +{{- end }} + description: The minimum notification failure rate to {{`{{`}} $labels.integration {{`}}`}} sent from any instance in the {{`{{`}}$labels.job{{`}}`}} cluster is {{`{{`}} $value | humanizePercentage {{`}}`}}. + runbook_url: {{ .Values.defaultRules.runbookUrl }}/alertmanager/alertmanagerclusterfailedtosendalerts + summary: All Alertmanager instances in a cluster failed to send notifications to a critical integration. + expr: |- + min by ({{ range $.Values.defaultRules.additionalAggregationLabels }}{{ . }},{{ end }}namespace,service, integration) ( + rate(alertmanager_notifications_failed_total{job="{{ $alertmanagerJob }}",namespace="{{ $namespace }}", integration=~`.*`}[5m]) + / + ignoring (reason) group_left rate(alertmanager_notifications_total{job="{{ $alertmanagerJob }}",namespace="{{ $namespace }}", integration=~`.*`}[5m]) + ) + > 0.01 + for: {{ dig "AlertmanagerClusterFailedToSendAlerts" "for" "5m" .Values.customRules }} + {{- with .Values.defaultRules.keepFiringFor }} + keep_firing_for: "{{ . }}" + {{- end }} + labels: + severity: {{ dig "AlertmanagerClusterFailedToSendAlerts" "severity" "critical" .Values.customRules }} + {{- if or .Values.defaultRules.additionalRuleLabels .Values.defaultRules.additionalRuleGroupLabels.alertmanager }} + {{- with .Values.defaultRules.additionalRuleLabels }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.defaultRules.additionalRuleGroupLabels.alertmanager }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- end }} +{{- end }} +{{- if not (.Values.defaultRules.disabled.AlertmanagerClusterFailedToSendAlerts | default false) }} + - alert: AlertmanagerClusterFailedToSendAlerts + annotations: +{{- if .Values.defaultRules.additionalRuleAnnotations }} +{{ toYaml .Values.defaultRules.additionalRuleAnnotations | indent 8 }} +{{- end }} +{{- if .Values.defaultRules.additionalRuleGroupAnnotations.alertmanager }} +{{ toYaml .Values.defaultRules.additionalRuleGroupAnnotations.alertmanager | indent 8 }} +{{- end }} + description: The minimum notification failure rate to {{`{{`}} $labels.integration {{`}}`}} sent from any instance in the {{`{{`}}$labels.job{{`}}`}} cluster is {{`{{`}} $value | humanizePercentage {{`}}`}}. + runbook_url: {{ .Values.defaultRules.runbookUrl }}/alertmanager/alertmanagerclusterfailedtosendalerts + summary: All Alertmanager instances in a cluster failed to send notifications to a non-critical integration. + expr: |- + min by ({{ range $.Values.defaultRules.additionalAggregationLabels }}{{ . }},{{ end }}namespace,service, integration) ( + rate(alertmanager_notifications_failed_total{job="{{ $alertmanagerJob }}",namespace="{{ $namespace }}", integration!~`.*`}[5m]) + / + ignoring (reason) group_left rate(alertmanager_notifications_total{job="{{ $alertmanagerJob }}",namespace="{{ $namespace }}", integration!~`.*`}[5m]) + ) + > 0.01 + for: {{ dig "AlertmanagerClusterFailedToSendAlerts" "for" "5m" .Values.customRules }} + {{- with .Values.defaultRules.keepFiringFor }} + keep_firing_for: "{{ . }}" + {{- end }} + labels: + severity: {{ dig "AlertmanagerClusterFailedToSendAlerts" "severity" "warning" .Values.customRules }} + {{- if or .Values.defaultRules.additionalRuleLabels .Values.defaultRules.additionalRuleGroupLabels.alertmanager }} + {{- with .Values.defaultRules.additionalRuleLabels }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.defaultRules.additionalRuleGroupLabels.alertmanager }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- end }} +{{- end }} +{{- if not (.Values.defaultRules.disabled.AlertmanagerConfigInconsistent | default false) }} + - alert: AlertmanagerConfigInconsistent + annotations: +{{- if .Values.defaultRules.additionalRuleAnnotations }} +{{ toYaml .Values.defaultRules.additionalRuleAnnotations | indent 8 }} +{{- end }} +{{- if .Values.defaultRules.additionalRuleGroupAnnotations.alertmanager }} +{{ toYaml .Values.defaultRules.additionalRuleGroupAnnotations.alertmanager | indent 8 }} +{{- end }} + description: Alertmanager instances within the {{`{{`}}$labels.job{{`}}`}} cluster have different configurations. + runbook_url: {{ .Values.defaultRules.runbookUrl }}/alertmanager/alertmanagerconfiginconsistent + summary: Alertmanager instances within the same cluster have different configurations. + expr: |- + count by ({{ range $.Values.defaultRules.additionalAggregationLabels }}{{ . }},{{ end }}namespace,service,cluster) ( + count_values by ({{ range $.Values.defaultRules.additionalAggregationLabels }}{{ . }},{{ end }}namespace,service,cluster) ("config_hash", alertmanager_config_hash{job="{{ $alertmanagerJob }}",namespace="{{ $namespace }}"}) + ) + != 1 + for: {{ dig "AlertmanagerConfigInconsistent" "for" "20m" .Values.customRules }} + {{- with .Values.defaultRules.keepFiringFor }} + keep_firing_for: "{{ . }}" + {{- end }} + labels: + severity: {{ dig "AlertmanagerConfigInconsistent" "severity" "critical" .Values.customRules }} + {{- if or .Values.defaultRules.additionalRuleLabels .Values.defaultRules.additionalRuleGroupLabels.alertmanager }} + {{- with .Values.defaultRules.additionalRuleLabels }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.defaultRules.additionalRuleGroupLabels.alertmanager }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- end }} +{{- end }} +{{- if not (.Values.defaultRules.disabled.AlertmanagerClusterDown | default false) }} + - alert: AlertmanagerClusterDown + annotations: +{{- if .Values.defaultRules.additionalRuleAnnotations }} +{{ toYaml .Values.defaultRules.additionalRuleAnnotations | indent 8 }} +{{- end }} +{{- if .Values.defaultRules.additionalRuleGroupAnnotations.alertmanager }} +{{ toYaml .Values.defaultRules.additionalRuleGroupAnnotations.alertmanager | indent 8 }} +{{- end }} + description: '{{`{{`}} $value | humanizePercentage {{`}}`}} of Alertmanager instances within the {{`{{`}}$labels.job{{`}}`}} cluster have been up for less than half of the last 5m.' + runbook_url: {{ .Values.defaultRules.runbookUrl }}/alertmanager/alertmanagerclusterdown + summary: Half or more of the Alertmanager instances within the same cluster are down. + expr: |- + ( + count by ({{ range $.Values.defaultRules.additionalAggregationLabels }}{{ . }},{{ end }}namespace,service,cluster) ( + avg_over_time(up{job="{{ $alertmanagerJob }}",namespace="{{ $namespace }}"}[5m]) < 0.5 + ) + / + count by ({{ range $.Values.defaultRules.additionalAggregationLabels }}{{ . }},{{ end }}namespace,service,cluster) ( + up{job="{{ $alertmanagerJob }}",namespace="{{ $namespace }}"} + ) + ) + >= 0.5 + for: {{ dig "AlertmanagerClusterDown" "for" "5m" .Values.customRules }} + {{- with .Values.defaultRules.keepFiringFor }} + keep_firing_for: "{{ . }}" + {{- end }} + labels: + severity: {{ dig "AlertmanagerClusterDown" "severity" "critical" .Values.customRules }} + {{- if or .Values.defaultRules.additionalRuleLabels .Values.defaultRules.additionalRuleGroupLabels.alertmanager }} + {{- with .Values.defaultRules.additionalRuleLabels }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.defaultRules.additionalRuleGroupLabels.alertmanager }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- end }} +{{- end }} +{{- if not (.Values.defaultRules.disabled.AlertmanagerClusterCrashlooping | default false) }} + - alert: AlertmanagerClusterCrashlooping + annotations: +{{- if .Values.defaultRules.additionalRuleAnnotations }} +{{ toYaml .Values.defaultRules.additionalRuleAnnotations | indent 8 }} +{{- end }} +{{- if .Values.defaultRules.additionalRuleGroupAnnotations.alertmanager }} +{{ toYaml .Values.defaultRules.additionalRuleGroupAnnotations.alertmanager | indent 8 }} +{{- end }} + description: '{{`{{`}} $value | humanizePercentage {{`}}`}} of Alertmanager instances within the {{`{{`}}$labels.job{{`}}`}} cluster have restarted at least 5 times in the last 10m.' + runbook_url: {{ .Values.defaultRules.runbookUrl }}/alertmanager/alertmanagerclustercrashlooping + summary: Half or more of the Alertmanager instances within the same cluster are crashlooping. + expr: |- + ( + count by ({{ range $.Values.defaultRules.additionalAggregationLabels }}{{ . }},{{ end }}namespace,service,cluster) ( + changes(process_start_time_seconds{job="{{ $alertmanagerJob }}",namespace="{{ $namespace }}"}[10m]) > 4 + ) + / + count by ({{ range $.Values.defaultRules.additionalAggregationLabels }}{{ . }},{{ end }}namespace,service,cluster) ( + up{job="{{ $alertmanagerJob }}",namespace="{{ $namespace }}"} + ) + ) + >= 0.5 + for: {{ dig "AlertmanagerClusterCrashlooping" "for" "5m" .Values.customRules }} + {{- with .Values.defaultRules.keepFiringFor }} + keep_firing_for: "{{ . }}" + {{- end }} + labels: + severity: {{ dig "AlertmanagerClusterCrashlooping" "severity" "critical" .Values.customRules }} + {{- if or .Values.defaultRules.additionalRuleLabels .Values.defaultRules.additionalRuleGroupLabels.alertmanager }} + {{- with .Values.defaultRules.additionalRuleLabels }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.defaultRules.additionalRuleGroupLabels.alertmanager }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- end }} +{{- end }} +{{- end }} +{{- end }} diff --git a/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/templates/prometheus/rules-1.14/config-reloaders.yaml b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/templates/prometheus/rules-1.14/config-reloaders.yaml new file mode 100644 index 0000000000..8416d6df40 --- /dev/null +++ b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/templates/prometheus/rules-1.14/config-reloaders.yaml @@ -0,0 +1,57 @@ +{{- /* +Generated from 'config-reloaders' group from https://raw.githubusercontent.com/prometheus-operator/kube-prometheus/a8ba97a150c75be42010c75d10b720c55e182f1a/manifests/prometheusOperator-prometheusRule.yaml +Do not change in-place! In order to change this file first read following link: +https://github.com/prometheus-community/helm-charts/tree/main/charts/kube-prometheus-stack/hack +*/ -}} +{{- $kubeTargetVersion := default .Capabilities.KubeVersion.GitVersion .Values.kubeTargetVersionOverride }} +{{- if and (semverCompare ">=1.14.0-0" $kubeTargetVersion) (semverCompare "<9.9.9-9" $kubeTargetVersion) .Values.defaultRules.create .Values.defaultRules.rules.configReloaders }} +apiVersion: monitoring.coreos.com/v1 +kind: PrometheusRule +metadata: + name: {{ printf "%s-%s" (include "kube-prometheus-stack.fullname" .) "config-reloaders" | trunc 63 | trimSuffix "-" }} + namespace: {{ template "kube-prometheus-stack.namespace" . }} + labels: + app: {{ template "kube-prometheus-stack.name" . }} +{{ include "kube-prometheus-stack.labels" . | indent 4 }} +{{- if .Values.defaultRules.labels }} +{{ toYaml .Values.defaultRules.labels | indent 4 }} +{{- end }} +{{- if .Values.defaultRules.annotations }} + annotations: +{{ toYaml .Values.defaultRules.annotations | indent 4 }} +{{- end }} +spec: + groups: + - name: config-reloaders + rules: +{{- if not (.Values.defaultRules.disabled.ConfigReloaderSidecarErrors | default false) }} + - alert: ConfigReloaderSidecarErrors + annotations: +{{- if .Values.defaultRules.additionalRuleAnnotations }} +{{ toYaml .Values.defaultRules.additionalRuleAnnotations | indent 8 }} +{{- end }} +{{- if .Values.defaultRules.additionalRuleGroupAnnotations.configReloaders }} +{{ toYaml .Values.defaultRules.additionalRuleGroupAnnotations.configReloaders | indent 8 }} +{{- end }} + description: 'Errors encountered while the {{`{{`}}$labels.pod{{`}}`}} config-reloader sidecar attempts to sync config in {{`{{`}}$labels.namespace{{`}}`}} namespace. + + As a result, configuration for service running in {{`{{`}}$labels.pod{{`}}`}} may be stale and cannot be updated anymore.' + runbook_url: {{ .Values.defaultRules.runbookUrl }}/prometheus-operator/configreloadersidecarerrors + summary: config-reloader sidecar has not had a successful reload for 10m + expr: max_over_time(reloader_last_reload_successful{namespace=~".+"}[5m]) == 0 + for: {{ dig "ConfigReloaderSidecarErrors" "for" "10m" .Values.customRules }} + {{- with .Values.defaultRules.keepFiringFor }} + keep_firing_for: "{{ . }}" + {{- end }} + labels: + severity: {{ dig "ConfigReloaderSidecarErrors" "severity" "warning" .Values.customRules }} + {{- if or .Values.defaultRules.additionalRuleLabels .Values.defaultRules.additionalRuleGroupLabels.configReloaders }} + {{- with .Values.defaultRules.additionalRuleLabels }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.defaultRules.additionalRuleGroupLabels.configReloaders }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- end }} +{{- end }} +{{- end }} \ No newline at end of file diff --git a/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/templates/prometheus/rules-1.14/etcd.yaml b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/templates/prometheus/rules-1.14/etcd.yaml new file mode 100644 index 0000000000..a1d7a508f8 --- /dev/null +++ b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/templates/prometheus/rules-1.14/etcd.yaml @@ -0,0 +1,461 @@ +{{- /* +Generated from 'etcd' group from https://github.com/etcd-io/etcd.git +Do not change in-place! In order to change this file first read following link: +https://github.com/prometheus-community/helm-charts/tree/main/charts/kube-prometheus-stack/hack +*/ -}} +{{- $kubeTargetVersion := default .Capabilities.KubeVersion.GitVersion .Values.kubeTargetVersionOverride }} +{{- if and (semverCompare ">=1.14.0-0" $kubeTargetVersion) (semverCompare "<9.9.9-9" $kubeTargetVersion) .Values.defaultRules.create .Values.defaultRules.rules.etcd }} +{{- if (include "exporter.kubeEtcd.enabled" .)}} +apiVersion: monitoring.coreos.com/v1 +kind: PrometheusRule +metadata: + name: {{ printf "%s-%s" (include "kube-prometheus-stack.fullname" .) "etcd" | trunc 63 | trimSuffix "-" }} + namespace: {{ template "kube-prometheus-stack.namespace" . }} + labels: + app: {{ template "kube-prometheus-stack.name" . }} +{{ include "kube-prometheus-stack.labels" . | indent 4 }} +{{- if .Values.defaultRules.labels }} +{{ toYaml .Values.defaultRules.labels | indent 4 }} +{{- end }} +{{- if .Values.defaultRules.annotations }} + annotations: +{{ toYaml .Values.defaultRules.annotations | indent 4 }} +{{- end }} +spec: + groups: + - name: etcd + rules: +{{- if not (.Values.defaultRules.disabled.etcdMembersDown | default false) }} + - alert: etcdMembersDown + annotations: +{{- if .Values.defaultRules.additionalRuleAnnotations }} +{{ toYaml .Values.defaultRules.additionalRuleAnnotations | indent 8 }} +{{- end }} +{{- if .Values.defaultRules.additionalRuleGroupAnnotations.etcd }} +{{ toYaml .Values.defaultRules.additionalRuleGroupAnnotations.etcd | indent 8 }} +{{- end }} + description: 'etcd cluster "{{`{{`}} $labels.job {{`}}`}}": members are down ({{`{{`}} $value {{`}}`}}).' + summary: etcd cluster members are down. + expr: |- + max without (endpoint) ( + sum without (instance) (up{job=~".*etcd.*"} == bool 0) + or + count without (To) ( + sum without (instance) (rate(etcd_network_peer_sent_failures_total{job=~".*etcd.*"}[120s])) > 0.01 + ) + ) + > 0 + for: {{ dig "etcdMembersDown" "for" "10m" .Values.customRules }} + {{- with .Values.defaultRules.keepFiringFor }} + keep_firing_for: "{{ . }}" + {{- end }} + labels: + severity: {{ dig "etcdMembersDown" "severity" "critical" .Values.customRules }} + {{- if or .Values.defaultRules.additionalRuleLabels .Values.defaultRules.additionalRuleGroupLabels.etcd }} + {{- with .Values.defaultRules.additionalRuleLabels }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.defaultRules.additionalRuleGroupLabels.etcd }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- end }} +{{- end }} +{{- if not (.Values.defaultRules.disabled.etcdInsufficientMembers | default false) }} + - alert: etcdInsufficientMembers + annotations: +{{- if .Values.defaultRules.additionalRuleAnnotations }} +{{ toYaml .Values.defaultRules.additionalRuleAnnotations | indent 8 }} +{{- end }} +{{- if .Values.defaultRules.additionalRuleGroupAnnotations.etcd }} +{{ toYaml .Values.defaultRules.additionalRuleGroupAnnotations.etcd | indent 8 }} +{{- end }} + description: 'etcd cluster "{{`{{`}} $labels.job {{`}}`}}": insufficient members ({{`{{`}} $value {{`}}`}}).' + summary: etcd cluster has insufficient number of members. + expr: sum(up{job=~".*etcd.*"} == bool 1) without (instance) < ((count(up{job=~".*etcd.*"}) without (instance) + 1) / 2) + for: {{ dig "etcdInsufficientMembers" "for" "3m" .Values.customRules }} + {{- with .Values.defaultRules.keepFiringFor }} + keep_firing_for: "{{ . }}" + {{- end }} + labels: + severity: {{ dig "etcdInsufficientMembers" "severity" "critical" .Values.customRules }} + {{- if or .Values.defaultRules.additionalRuleLabels .Values.defaultRules.additionalRuleGroupLabels.etcd }} + {{- with .Values.defaultRules.additionalRuleLabels }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.defaultRules.additionalRuleGroupLabels.etcd }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- end }} +{{- end }} +{{- if not (.Values.defaultRules.disabled.etcdNoLeader | default false) }} + - alert: etcdNoLeader + annotations: +{{- if .Values.defaultRules.additionalRuleAnnotations }} +{{ toYaml .Values.defaultRules.additionalRuleAnnotations | indent 8 }} +{{- end }} +{{- if .Values.defaultRules.additionalRuleGroupAnnotations.etcd }} +{{ toYaml .Values.defaultRules.additionalRuleGroupAnnotations.etcd | indent 8 }} +{{- end }} + description: 'etcd cluster "{{`{{`}} $labels.job {{`}}`}}": member {{`{{`}} $labels.instance {{`}}`}} has no leader.' + summary: etcd cluster has no leader. + expr: etcd_server_has_leader{job=~".*etcd.*"} == 0 + for: {{ dig "etcdNoLeader" "for" "1m" .Values.customRules }} + {{- with .Values.defaultRules.keepFiringFor }} + keep_firing_for: "{{ . }}" + {{- end }} + labels: + severity: {{ dig "etcdNoLeader" "severity" "critical" .Values.customRules }} + {{- if or .Values.defaultRules.additionalRuleLabels .Values.defaultRules.additionalRuleGroupLabels.etcd }} + {{- with .Values.defaultRules.additionalRuleLabels }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.defaultRules.additionalRuleGroupLabels.etcd }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- end }} +{{- end }} +{{- if not (.Values.defaultRules.disabled.etcdHighNumberOfLeaderChanges | default false) }} + - alert: etcdHighNumberOfLeaderChanges + annotations: +{{- if .Values.defaultRules.additionalRuleAnnotations }} +{{ toYaml .Values.defaultRules.additionalRuleAnnotations | indent 8 }} +{{- end }} +{{- if .Values.defaultRules.additionalRuleGroupAnnotations.etcd }} +{{ toYaml .Values.defaultRules.additionalRuleGroupAnnotations.etcd | indent 8 }} +{{- end }} + description: 'etcd cluster "{{`{{`}} $labels.job {{`}}`}}": {{`{{`}} $value {{`}}`}} leader changes within the last 15 minutes. Frequent elections may be a sign of insufficient resources, high network latency, or disruptions by other components and should be investigated.' + summary: etcd cluster has high number of leader changes. + expr: increase((max without (instance) (etcd_server_leader_changes_seen_total{job=~".*etcd.*"}) or 0*absent(etcd_server_leader_changes_seen_total{job=~".*etcd.*"}))[15m:1m]) >= 4 + for: {{ dig "etcdHighNumberOfLeaderChanges" "for" "5m" .Values.customRules }} + {{- with .Values.defaultRules.keepFiringFor }} + keep_firing_for: "{{ . }}" + {{- end }} + labels: + severity: {{ dig "etcdHighNumberOfLeaderChanges" "severity" "warning" .Values.customRules }} + {{- if or .Values.defaultRules.additionalRuleLabels .Values.defaultRules.additionalRuleGroupLabels.etcd }} + {{- with .Values.defaultRules.additionalRuleLabels }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.defaultRules.additionalRuleGroupLabels.etcd }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- end }} +{{- end }} +{{- if not (.Values.defaultRules.disabled.etcdHighNumberOfFailedGRPCRequests | default false) }} + - alert: etcdHighNumberOfFailedGRPCRequests + annotations: +{{- if .Values.defaultRules.additionalRuleAnnotations }} +{{ toYaml .Values.defaultRules.additionalRuleAnnotations | indent 8 }} +{{- end }} +{{- if .Values.defaultRules.additionalRuleGroupAnnotations.etcd }} +{{ toYaml .Values.defaultRules.additionalRuleGroupAnnotations.etcd | indent 8 }} +{{- end }} + description: 'etcd cluster "{{`{{`}} $labels.job {{`}}`}}": {{`{{`}} $value {{`}}`}}% of requests for {{`{{`}} $labels.grpc_method {{`}}`}} failed on etcd instance {{`{{`}} $labels.instance {{`}}`}}.' + summary: etcd cluster has high number of failed grpc requests. + expr: |- + 100 * sum(rate(grpc_server_handled_total{job=~".*etcd.*", grpc_code=~"Unknown|FailedPrecondition|ResourceExhausted|Internal|Unavailable|DataLoss|DeadlineExceeded"}[5m])) without (grpc_type, grpc_code) + / + sum(rate(grpc_server_handled_total{job=~".*etcd.*"}[5m])) without (grpc_type, grpc_code) + > 1 + for: {{ dig "etcdHighNumberOfFailedGRPCRequests" "for" "10m" .Values.customRules }} + {{- with .Values.defaultRules.keepFiringFor }} + keep_firing_for: "{{ . }}" + {{- end }} + labels: + severity: {{ dig "etcdHighNumberOfFailedGRPCRequests" "severity" "warning" .Values.customRules }} + {{- if or .Values.defaultRules.additionalRuleLabels .Values.defaultRules.additionalRuleGroupLabels.etcd }} + {{- with .Values.defaultRules.additionalRuleLabels }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.defaultRules.additionalRuleGroupLabels.etcd }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- end }} +{{- end }} +{{- if not (.Values.defaultRules.disabled.etcdHighNumberOfFailedGRPCRequests | default false) }} + - alert: etcdHighNumberOfFailedGRPCRequests + annotations: +{{- if .Values.defaultRules.additionalRuleAnnotations }} +{{ toYaml .Values.defaultRules.additionalRuleAnnotations | indent 8 }} +{{- end }} +{{- if .Values.defaultRules.additionalRuleGroupAnnotations.etcd }} +{{ toYaml .Values.defaultRules.additionalRuleGroupAnnotations.etcd | indent 8 }} +{{- end }} + description: 'etcd cluster "{{`{{`}} $labels.job {{`}}`}}": {{`{{`}} $value {{`}}`}}% of requests for {{`{{`}} $labels.grpc_method {{`}}`}} failed on etcd instance {{`{{`}} $labels.instance {{`}}`}}.' + summary: etcd cluster has high number of failed grpc requests. + expr: |- + 100 * sum(rate(grpc_server_handled_total{job=~".*etcd.*", grpc_code=~"Unknown|FailedPrecondition|ResourceExhausted|Internal|Unavailable|DataLoss|DeadlineExceeded"}[5m])) without (grpc_type, grpc_code) + / + sum(rate(grpc_server_handled_total{job=~".*etcd.*"}[5m])) without (grpc_type, grpc_code) + > 5 + for: {{ dig "etcdHighNumberOfFailedGRPCRequests" "for" "5m" .Values.customRules }} + {{- with .Values.defaultRules.keepFiringFor }} + keep_firing_for: "{{ . }}" + {{- end }} + labels: + severity: {{ dig "etcdHighNumberOfFailedGRPCRequests" "severity" "critical" .Values.customRules }} + {{- if or .Values.defaultRules.additionalRuleLabels .Values.defaultRules.additionalRuleGroupLabels.etcd }} + {{- with .Values.defaultRules.additionalRuleLabels }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.defaultRules.additionalRuleGroupLabels.etcd }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- end }} +{{- end }} +{{- if not (.Values.defaultRules.disabled.etcdGRPCRequestsSlow | default false) }} + - alert: etcdGRPCRequestsSlow + annotations: +{{- if .Values.defaultRules.additionalRuleAnnotations }} +{{ toYaml .Values.defaultRules.additionalRuleAnnotations | indent 8 }} +{{- end }} +{{- if .Values.defaultRules.additionalRuleGroupAnnotations.etcd }} +{{ toYaml .Values.defaultRules.additionalRuleGroupAnnotations.etcd | indent 8 }} +{{- end }} + description: 'etcd cluster "{{`{{`}} $labels.job {{`}}`}}": 99th percentile of gRPC requests is {{`{{`}} $value {{`}}`}}s on etcd instance {{`{{`}} $labels.instance {{`}}`}} for {{`{{`}} $labels.grpc_method {{`}}`}} method.' + summary: etcd grpc requests are slow + expr: |- + histogram_quantile(0.99, sum(rate(grpc_server_handling_seconds_bucket{job=~".*etcd.*", grpc_method!="Defragment", grpc_type="unary"}[5m])) without(grpc_type)) + > 0.15 + for: {{ dig "etcdGRPCRequestsSlow" "for" "10m" .Values.customRules }} + {{- with .Values.defaultRules.keepFiringFor }} + keep_firing_for: "{{ . }}" + {{- end }} + labels: + severity: {{ dig "etcdGRPCRequestsSlow" "severity" "critical" .Values.customRules }} + {{- if or .Values.defaultRules.additionalRuleLabels .Values.defaultRules.additionalRuleGroupLabels.etcd }} + {{- with .Values.defaultRules.additionalRuleLabels }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.defaultRules.additionalRuleGroupLabels.etcd }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- end }} +{{- end }} +{{- if not (.Values.defaultRules.disabled.etcdMemberCommunicationSlow | default false) }} + - alert: etcdMemberCommunicationSlow + annotations: +{{- if .Values.defaultRules.additionalRuleAnnotations }} +{{ toYaml .Values.defaultRules.additionalRuleAnnotations | indent 8 }} +{{- end }} +{{- if .Values.defaultRules.additionalRuleGroupAnnotations.etcd }} +{{ toYaml .Values.defaultRules.additionalRuleGroupAnnotations.etcd | indent 8 }} +{{- end }} + description: 'etcd cluster "{{`{{`}} $labels.job {{`}}`}}": member communication with {{`{{`}} $labels.To {{`}}`}} is taking {{`{{`}} $value {{`}}`}}s on etcd instance {{`{{`}} $labels.instance {{`}}`}}.' + summary: etcd cluster member communication is slow. + expr: |- + histogram_quantile(0.99, rate(etcd_network_peer_round_trip_time_seconds_bucket{job=~".*etcd.*"}[5m])) + > 0.15 + for: {{ dig "etcdMemberCommunicationSlow" "for" "10m" .Values.customRules }} + {{- with .Values.defaultRules.keepFiringFor }} + keep_firing_for: "{{ . }}" + {{- end }} + labels: + severity: {{ dig "etcdMemberCommunicationSlow" "severity" "warning" .Values.customRules }} + {{- if or .Values.defaultRules.additionalRuleLabels .Values.defaultRules.additionalRuleGroupLabels.etcd }} + {{- with .Values.defaultRules.additionalRuleLabels }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.defaultRules.additionalRuleGroupLabels.etcd }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- end }} +{{- end }} +{{- if not (.Values.defaultRules.disabled.etcdHighNumberOfFailedProposals | default false) }} + - alert: etcdHighNumberOfFailedProposals + annotations: +{{- if .Values.defaultRules.additionalRuleAnnotations }} +{{ toYaml .Values.defaultRules.additionalRuleAnnotations | indent 8 }} +{{- end }} +{{- if .Values.defaultRules.additionalRuleGroupAnnotations.etcd }} +{{ toYaml .Values.defaultRules.additionalRuleGroupAnnotations.etcd | indent 8 }} +{{- end }} + description: 'etcd cluster "{{`{{`}} $labels.job {{`}}`}}": {{`{{`}} $value {{`}}`}} proposal failures within the last 30 minutes on etcd instance {{`{{`}} $labels.instance {{`}}`}}.' + summary: etcd cluster has high number of proposal failures. + expr: rate(etcd_server_proposals_failed_total{job=~".*etcd.*"}[15m]) > 5 + for: {{ dig "etcdHighNumberOfFailedProposals" "for" "15m" .Values.customRules }} + {{- with .Values.defaultRules.keepFiringFor }} + keep_firing_for: "{{ . }}" + {{- end }} + labels: + severity: {{ dig "etcdHighNumberOfFailedProposals" "severity" "warning" .Values.customRules }} + {{- if or .Values.defaultRules.additionalRuleLabels .Values.defaultRules.additionalRuleGroupLabels.etcd }} + {{- with .Values.defaultRules.additionalRuleLabels }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.defaultRules.additionalRuleGroupLabels.etcd }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- end }} +{{- end }} +{{- if not (.Values.defaultRules.disabled.etcdHighFsyncDurations | default false) }} + - alert: etcdHighFsyncDurations + annotations: +{{- if .Values.defaultRules.additionalRuleAnnotations }} +{{ toYaml .Values.defaultRules.additionalRuleAnnotations | indent 8 }} +{{- end }} +{{- if .Values.defaultRules.additionalRuleGroupAnnotations.etcd }} +{{ toYaml .Values.defaultRules.additionalRuleGroupAnnotations.etcd | indent 8 }} +{{- end }} + description: 'etcd cluster "{{`{{`}} $labels.job {{`}}`}}": 99th percentile fsync durations are {{`{{`}} $value {{`}}`}}s on etcd instance {{`{{`}} $labels.instance {{`}}`}}.' + summary: etcd cluster 99th percentile fsync durations are too high. + expr: |- + histogram_quantile(0.99, rate(etcd_disk_wal_fsync_duration_seconds_bucket{job=~".*etcd.*"}[5m])) + > 0.5 + for: {{ dig "etcdHighFsyncDurations" "for" "10m" .Values.customRules }} + {{- with .Values.defaultRules.keepFiringFor }} + keep_firing_for: "{{ . }}" + {{- end }} + labels: + severity: {{ dig "etcdHighFsyncDurations" "severity" "warning" .Values.customRules }} + {{- if or .Values.defaultRules.additionalRuleLabels .Values.defaultRules.additionalRuleGroupLabels.etcd }} + {{- with .Values.defaultRules.additionalRuleLabels }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.defaultRules.additionalRuleGroupLabels.etcd }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- end }} +{{- end }} +{{- if not (.Values.defaultRules.disabled.etcdHighFsyncDurations | default false) }} + - alert: etcdHighFsyncDurations + annotations: +{{- if .Values.defaultRules.additionalRuleAnnotations }} +{{ toYaml .Values.defaultRules.additionalRuleAnnotations | indent 8 }} +{{- end }} +{{- if .Values.defaultRules.additionalRuleGroupAnnotations.etcd }} +{{ toYaml .Values.defaultRules.additionalRuleGroupAnnotations.etcd | indent 8 }} +{{- end }} + description: 'etcd cluster "{{`{{`}} $labels.job {{`}}`}}": 99th percentile fsync durations are {{`{{`}} $value {{`}}`}}s on etcd instance {{`{{`}} $labels.instance {{`}}`}}.' + summary: etcd cluster 99th percentile fsync durations are too high. + expr: |- + histogram_quantile(0.99, rate(etcd_disk_wal_fsync_duration_seconds_bucket{job=~".*etcd.*"}[5m])) + > 1 + for: {{ dig "etcdHighFsyncDurations" "for" "10m" .Values.customRules }} + {{- with .Values.defaultRules.keepFiringFor }} + keep_firing_for: "{{ . }}" + {{- end }} + labels: + severity: {{ dig "etcdHighFsyncDurations" "severity" "critical" .Values.customRules }} + {{- if or .Values.defaultRules.additionalRuleLabels .Values.defaultRules.additionalRuleGroupLabels.etcd }} + {{- with .Values.defaultRules.additionalRuleLabels }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.defaultRules.additionalRuleGroupLabels.etcd }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- end }} +{{- end }} +{{- if not (.Values.defaultRules.disabled.etcdHighCommitDurations | default false) }} + - alert: etcdHighCommitDurations + annotations: +{{- if .Values.defaultRules.additionalRuleAnnotations }} +{{ toYaml .Values.defaultRules.additionalRuleAnnotations | indent 8 }} +{{- end }} +{{- if .Values.defaultRules.additionalRuleGroupAnnotations.etcd }} +{{ toYaml .Values.defaultRules.additionalRuleGroupAnnotations.etcd | indent 8 }} +{{- end }} + description: 'etcd cluster "{{`{{`}} $labels.job {{`}}`}}": 99th percentile commit durations {{`{{`}} $value {{`}}`}}s on etcd instance {{`{{`}} $labels.instance {{`}}`}}.' + summary: etcd cluster 99th percentile commit durations are too high. + expr: |- + histogram_quantile(0.99, rate(etcd_disk_backend_commit_duration_seconds_bucket{job=~".*etcd.*"}[5m])) + > 0.25 + for: {{ dig "etcdHighCommitDurations" "for" "10m" .Values.customRules }} + {{- with .Values.defaultRules.keepFiringFor }} + keep_firing_for: "{{ . }}" + {{- end }} + labels: + severity: {{ dig "etcdHighCommitDurations" "severity" "warning" .Values.customRules }} + {{- if or .Values.defaultRules.additionalRuleLabels .Values.defaultRules.additionalRuleGroupLabels.etcd }} + {{- with .Values.defaultRules.additionalRuleLabels }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.defaultRules.additionalRuleGroupLabels.etcd }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- end }} +{{- end }} +{{- if not (.Values.defaultRules.disabled.etcdDatabaseQuotaLowSpace | default false) }} + - alert: etcdDatabaseQuotaLowSpace + annotations: +{{- if .Values.defaultRules.additionalRuleAnnotations }} +{{ toYaml .Values.defaultRules.additionalRuleAnnotations | indent 8 }} +{{- end }} +{{- if .Values.defaultRules.additionalRuleGroupAnnotations.etcd }} +{{ toYaml .Values.defaultRules.additionalRuleGroupAnnotations.etcd | indent 8 }} +{{- end }} + description: 'etcd cluster "{{`{{`}} $labels.job {{`}}`}}": database size exceeds the defined quota on etcd instance {{`{{`}} $labels.instance {{`}}`}}, please defrag or increase the quota as the writes to etcd will be disabled when it is full.' + summary: etcd cluster database is running full. + expr: (last_over_time(etcd_mvcc_db_total_size_in_bytes{job=~".*etcd.*"}[5m]) / last_over_time(etcd_server_quota_backend_bytes{job=~".*etcd.*"}[5m]))*100 > 95 + for: {{ dig "etcdDatabaseQuotaLowSpace" "for" "10m" .Values.customRules }} + {{- with .Values.defaultRules.keepFiringFor }} + keep_firing_for: "{{ . }}" + {{- end }} + labels: + severity: {{ dig "etcdDatabaseQuotaLowSpace" "severity" "critical" .Values.customRules }} + {{- if or .Values.defaultRules.additionalRuleLabels .Values.defaultRules.additionalRuleGroupLabels.etcd }} + {{- with .Values.defaultRules.additionalRuleLabels }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.defaultRules.additionalRuleGroupLabels.etcd }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- end }} +{{- end }} +{{- if not (.Values.defaultRules.disabled.etcdExcessiveDatabaseGrowth | default false) }} + - alert: etcdExcessiveDatabaseGrowth + annotations: +{{- if .Values.defaultRules.additionalRuleAnnotations }} +{{ toYaml .Values.defaultRules.additionalRuleAnnotations | indent 8 }} +{{- end }} +{{- if .Values.defaultRules.additionalRuleGroupAnnotations.etcd }} +{{ toYaml .Values.defaultRules.additionalRuleGroupAnnotations.etcd | indent 8 }} +{{- end }} + description: 'etcd cluster "{{`{{`}} $labels.job {{`}}`}}": Predicting running out of disk space in the next four hours, based on write observations within the past four hours on etcd instance {{`{{`}} $labels.instance {{`}}`}}, please check as it might be disruptive.' + summary: etcd cluster database growing very fast. + expr: predict_linear(etcd_mvcc_db_total_size_in_bytes{job=~".*etcd.*"}[4h], 4*60*60) > etcd_server_quota_backend_bytes{job=~".*etcd.*"} + for: {{ dig "etcdExcessiveDatabaseGrowth" "for" "10m" .Values.customRules }} + {{- with .Values.defaultRules.keepFiringFor }} + keep_firing_for: "{{ . }}" + {{- end }} + labels: + severity: {{ dig "etcdExcessiveDatabaseGrowth" "severity" "warning" .Values.customRules }} + {{- if or .Values.defaultRules.additionalRuleLabels .Values.defaultRules.additionalRuleGroupLabels.etcd }} + {{- with .Values.defaultRules.additionalRuleLabels }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.defaultRules.additionalRuleGroupLabels.etcd }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- end }} +{{- end }} +{{- if not (.Values.defaultRules.disabled.etcdDatabaseHighFragmentationRatio | default false) }} + - alert: etcdDatabaseHighFragmentationRatio + annotations: +{{- if .Values.defaultRules.additionalRuleAnnotations }} +{{ toYaml .Values.defaultRules.additionalRuleAnnotations | indent 8 }} +{{- end }} +{{- if .Values.defaultRules.additionalRuleGroupAnnotations.etcd }} +{{ toYaml .Values.defaultRules.additionalRuleGroupAnnotations.etcd | indent 8 }} +{{- end }} + description: 'etcd cluster "{{`{{`}} $labels.job {{`}}`}}": database size in use on instance {{`{{`}} $labels.instance {{`}}`}} is {{`{{`}} $value | humanizePercentage {{`}}`}} of the actual allocated disk space, please run defragmentation (e.g. etcdctl defrag) to retrieve the unused fragmented disk space.' + runbook_url: https://etcd.io/docs/v3.5/op-guide/maintenance/#defragmentation + summary: etcd database size in use is less than 50% of the actual allocated storage. + expr: (last_over_time(etcd_mvcc_db_total_size_in_use_in_bytes{job=~".*etcd.*"}[5m]) / last_over_time(etcd_mvcc_db_total_size_in_bytes{job=~".*etcd.*"}[5m])) < 0.5 and etcd_mvcc_db_total_size_in_use_in_bytes{job=~".*etcd.*"} > 104857600 + for: {{ dig "etcdDatabaseHighFragmentationRatio" "for" "10m" .Values.customRules }} + {{- with .Values.defaultRules.keepFiringFor }} + keep_firing_for: "{{ . }}" + {{- end }} + labels: + severity: {{ dig "etcdDatabaseHighFragmentationRatio" "severity" "warning" .Values.customRules }} + {{- if or .Values.defaultRules.additionalRuleLabels .Values.defaultRules.additionalRuleGroupLabels.etcd }} + {{- with .Values.defaultRules.additionalRuleLabels }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.defaultRules.additionalRuleGroupLabels.etcd }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- end }} +{{- end }} +{{- end }} +{{- end }} \ No newline at end of file diff --git a/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/templates/prometheus/rules-1.14/general.rules.yaml b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/templates/prometheus/rules-1.14/general.rules.yaml new file mode 100644 index 0000000000..8aca0b85f5 --- /dev/null +++ b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/templates/prometheus/rules-1.14/general.rules.yaml @@ -0,0 +1,125 @@ +{{- /* +Generated from 'general.rules' group from https://raw.githubusercontent.com/prometheus-operator/kube-prometheus/a8ba97a150c75be42010c75d10b720c55e182f1a/manifests/kubePrometheus-prometheusRule.yaml +Do not change in-place! In order to change this file first read following link: +https://github.com/prometheus-community/helm-charts/tree/main/charts/kube-prometheus-stack/hack +*/ -}} +{{- $kubeTargetVersion := default .Capabilities.KubeVersion.GitVersion .Values.kubeTargetVersionOverride }} +{{- if and (semverCompare ">=1.14.0-0" $kubeTargetVersion) (semverCompare "<9.9.9-9" $kubeTargetVersion) .Values.defaultRules.create .Values.defaultRules.rules.general }} +apiVersion: monitoring.coreos.com/v1 +kind: PrometheusRule +metadata: + name: {{ printf "%s-%s" (include "kube-prometheus-stack.fullname" .) "general.rules" | trunc 63 | trimSuffix "-" }} + namespace: {{ template "kube-prometheus-stack.namespace" . }} + labels: + app: {{ template "kube-prometheus-stack.name" . }} +{{ include "kube-prometheus-stack.labels" . | indent 4 }} +{{- if .Values.defaultRules.labels }} +{{ toYaml .Values.defaultRules.labels | indent 4 }} +{{- end }} +{{- if .Values.defaultRules.annotations }} + annotations: +{{ toYaml .Values.defaultRules.annotations | indent 4 }} +{{- end }} +spec: + groups: + - name: general.rules + rules: +{{- if not (.Values.defaultRules.disabled.TargetDown | default false) }} + - alert: TargetDown + annotations: +{{- if .Values.defaultRules.additionalRuleAnnotations }} +{{ toYaml .Values.defaultRules.additionalRuleAnnotations | indent 8 }} +{{- end }} +{{- if .Values.defaultRules.additionalRuleGroupAnnotations.general }} +{{ toYaml .Values.defaultRules.additionalRuleGroupAnnotations.general | indent 8 }} +{{- end }} + description: '{{`{{`}} printf "%.4g" $value {{`}}`}}% of the {{`{{`}} $labels.job {{`}}`}}/{{`{{`}} $labels.service {{`}}`}} targets in {{`{{`}} $labels.namespace {{`}}`}} namespace are down.' + runbook_url: {{ .Values.defaultRules.runbookUrl }}/general/targetdown + summary: One or more targets are unreachable. + expr: 100 * (count(up == 0) BY (cluster, job, namespace, service) / count(up) BY (cluster, job, namespace, service)) > 10 + for: {{ dig "TargetDown" "for" "10m" .Values.customRules }} + {{- with .Values.defaultRules.keepFiringFor }} + keep_firing_for: "{{ . }}" + {{- end }} + labels: + severity: {{ dig "TargetDown" "severity" "warning" .Values.customRules }} + {{- if or .Values.defaultRules.additionalRuleLabels .Values.defaultRules.additionalRuleGroupLabels.general }} + {{- with .Values.defaultRules.additionalRuleLabels }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.defaultRules.additionalRuleGroupLabels.general }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- end }} +{{- end }} +{{- if not (.Values.defaultRules.disabled.Watchdog | default false) }} + - alert: Watchdog + annotations: +{{- if .Values.defaultRules.additionalRuleAnnotations }} +{{ toYaml .Values.defaultRules.additionalRuleAnnotations | indent 8 }} +{{- end }} +{{- if .Values.defaultRules.additionalRuleGroupAnnotations.general }} +{{ toYaml .Values.defaultRules.additionalRuleGroupAnnotations.general | indent 8 }} +{{- end }} + description: 'This is an alert meant to ensure that the entire alerting pipeline is functional. + + This alert is always firing, therefore it should always be firing in Alertmanager + + and always fire against a receiver. There are integrations with various notification + + mechanisms that send a notification when this alert is not firing. For example the + + "DeadMansSnitch" integration in PagerDuty. + + ' + runbook_url: {{ .Values.defaultRules.runbookUrl }}/general/watchdog + summary: An alert that should always be firing to certify that Alertmanager is working properly. + expr: vector(1) + labels: + severity: {{ dig "Watchdog" "severity" "none" .Values.customRules }} + {{- if or .Values.defaultRules.additionalRuleLabels .Values.defaultRules.additionalRuleGroupLabels.general }} + {{- with .Values.defaultRules.additionalRuleLabels }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.defaultRules.additionalRuleGroupLabels.general }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- end }} +{{- end }} +{{- if not (.Values.defaultRules.disabled.InfoInhibitor | default false) }} + - alert: InfoInhibitor + annotations: +{{- if .Values.defaultRules.additionalRuleAnnotations }} +{{ toYaml .Values.defaultRules.additionalRuleAnnotations | indent 8 }} +{{- end }} +{{- if .Values.defaultRules.additionalRuleGroupAnnotations.general }} +{{ toYaml .Values.defaultRules.additionalRuleGroupAnnotations.general | indent 8 }} +{{- end }} + description: 'This is an alert that is used to inhibit info alerts. + + By themselves, the info-level alerts are sometimes very noisy, but they are relevant when combined with + + other alerts. + + This alert fires whenever there''s a severity="info" alert, and stops firing when another alert with a + + severity of ''warning'' or ''critical'' starts firing on the same namespace. + + This alert should be routed to a null receiver and configured to inhibit alerts with severity="info". + + ' + runbook_url: {{ .Values.defaultRules.runbookUrl }}/general/infoinhibitor + summary: Info-level alert inhibition. + expr: ALERTS{severity = "info"} == 1 unless on ({{ range $.Values.defaultRules.additionalAggregationLabels }}{{ . }},{{ end }}namespace) ALERTS{alertname != "InfoInhibitor", severity =~ "warning|critical", alertstate="firing"} == 1 + labels: + severity: {{ dig "InfoInhibitor" "severity" "none" .Values.customRules }} + {{- if or .Values.defaultRules.additionalRuleLabels .Values.defaultRules.additionalRuleGroupLabels.general }} + {{- with .Values.defaultRules.additionalRuleLabels }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.defaultRules.additionalRuleGroupLabels.general }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- end }} +{{- end }} +{{- end }} \ No newline at end of file diff --git a/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/templates/prometheus/rules-1.14/k8s.rules.container_cpu_usage_seconds_total.yaml b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/templates/prometheus/rules-1.14/k8s.rules.container_cpu_usage_seconds_total.yaml new file mode 100644 index 0000000000..9de5f5bc9c --- /dev/null +++ b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/templates/prometheus/rules-1.14/k8s.rules.container_cpu_usage_seconds_total.yaml @@ -0,0 +1,43 @@ +{{- /* +Generated from 'k8s.rules.container-cpu-usage-seconds-total' group from https://raw.githubusercontent.com/prometheus-operator/kube-prometheus/a8ba97a150c75be42010c75d10b720c55e182f1a/manifests/kubernetesControlPlane-prometheusRule.yaml +Do not change in-place! In order to change this file first read following link: +https://github.com/prometheus-community/helm-charts/tree/main/charts/kube-prometheus-stack/hack +*/ -}} +{{- $kubeTargetVersion := default .Capabilities.KubeVersion.GitVersion .Values.kubeTargetVersionOverride }} +{{- if and (semverCompare ">=1.14.0-0" $kubeTargetVersion) (semverCompare "<9.9.9-9" $kubeTargetVersion) .Values.defaultRules.create .Values.defaultRules.rules.k8sContainerCpuUsageSecondsTotal }} +apiVersion: monitoring.coreos.com/v1 +kind: PrometheusRule +metadata: + name: {{ printf "%s-%s" (include "kube-prometheus-stack.fullname" .) "k8s.rules.container-cpu-usage-seconds-total" | trunc 63 | trimSuffix "-" }} + namespace: {{ template "kube-prometheus-stack.namespace" . }} + labels: + app: {{ template "kube-prometheus-stack.name" . }} +{{ include "kube-prometheus-stack.labels" . | indent 4 }} +{{- if .Values.defaultRules.labels }} +{{ toYaml .Values.defaultRules.labels | indent 4 }} +{{- end }} +{{- if .Values.defaultRules.annotations }} + annotations: +{{ toYaml .Values.defaultRules.annotations | indent 4 }} +{{- end }} +spec: + groups: + - name: k8s.rules.container_cpu_usage_seconds_total + rules: + - expr: |- + sum by ({{ range $.Values.defaultRules.additionalAggregationLabels }}{{ . }},{{ end }}cluster, namespace, pod, container) ( + irate(container_cpu_usage_seconds_total{job="kubelet", metrics_path="/metrics/cadvisor", image!=""}[5m]) + ) * on ({{ range $.Values.defaultRules.additionalAggregationLabels }}{{ . }},{{ end }}cluster, namespace, pod) group_left(node) topk by ({{ range $.Values.defaultRules.additionalAggregationLabels }}{{ . }},{{ end }}cluster, namespace, pod) ( + 1, max by ({{ range $.Values.defaultRules.additionalAggregationLabels }}{{ . }},{{ end }}cluster, namespace, pod, node) (kube_pod_info{node!=""}) + ) + record: node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate + {{- if or .Values.defaultRules.additionalRuleLabels .Values.defaultRules.additionalRuleGroupLabels.k8sContainerCpuUsageSecondsTotal }} + labels: + {{- with .Values.defaultRules.additionalRuleLabels }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.defaultRules.additionalRuleGroupLabels.k8sContainerCpuUsageSecondsTotal }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- end }} +{{- end }} \ No newline at end of file diff --git a/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/templates/prometheus/rules-1.14/k8s.rules.container_memory_cache.yaml b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/templates/prometheus/rules-1.14/k8s.rules.container_memory_cache.yaml new file mode 100644 index 0000000000..323f41f9cb --- /dev/null +++ b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/templates/prometheus/rules-1.14/k8s.rules.container_memory_cache.yaml @@ -0,0 +1,42 @@ +{{- /* +Generated from 'k8s.rules.container-memory-cache' group from https://raw.githubusercontent.com/prometheus-operator/kube-prometheus/a8ba97a150c75be42010c75d10b720c55e182f1a/manifests/kubernetesControlPlane-prometheusRule.yaml +Do not change in-place! In order to change this file first read following link: +https://github.com/prometheus-community/helm-charts/tree/main/charts/kube-prometheus-stack/hack +*/ -}} +{{- $kubeTargetVersion := default .Capabilities.KubeVersion.GitVersion .Values.kubeTargetVersionOverride }} +{{- if and (semverCompare ">=1.14.0-0" $kubeTargetVersion) (semverCompare "<9.9.9-9" $kubeTargetVersion) .Values.defaultRules.create .Values.defaultRules.rules.k8sContainerMemoryCache }} +apiVersion: monitoring.coreos.com/v1 +kind: PrometheusRule +metadata: + name: {{ printf "%s-%s" (include "kube-prometheus-stack.fullname" .) "k8s.rules.container-memory-cache" | trunc 63 | trimSuffix "-" }} + namespace: {{ template "kube-prometheus-stack.namespace" . }} + labels: + app: {{ template "kube-prometheus-stack.name" . }} +{{ include "kube-prometheus-stack.labels" . | indent 4 }} +{{- if .Values.defaultRules.labels }} +{{ toYaml .Values.defaultRules.labels | indent 4 }} +{{- end }} +{{- if .Values.defaultRules.annotations }} + annotations: +{{ toYaml .Values.defaultRules.annotations | indent 4 }} +{{- end }} +spec: + groups: + - name: k8s.rules.container_memory_cache + rules: + - expr: |- + container_memory_cache{job="kubelet", metrics_path="/metrics/cadvisor", image!=""} + * on ({{ range $.Values.defaultRules.additionalAggregationLabels }}{{ . }},{{ end }}cluster, namespace, pod) group_left(node) topk by ({{ range $.Values.defaultRules.additionalAggregationLabels }}{{ . }},{{ end }}cluster, namespace, pod) (1, + max by ({{ range $.Values.defaultRules.additionalAggregationLabels }}{{ . }},{{ end }}cluster, namespace, pod, node) (kube_pod_info{node!=""}) + ) + record: node_namespace_pod_container:container_memory_cache + {{- if or .Values.defaultRules.additionalRuleLabels .Values.defaultRules.additionalRuleGroupLabels.k8sContainerMemoryCache }} + labels: + {{- with .Values.defaultRules.additionalRuleLabels }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.defaultRules.additionalRuleGroupLabels.k8sContainerMemoryCache }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- end }} +{{- end }} \ No newline at end of file diff --git a/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/templates/prometheus/rules-1.14/k8s.rules.container_memory_rss.yaml b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/templates/prometheus/rules-1.14/k8s.rules.container_memory_rss.yaml new file mode 100644 index 0000000000..312d73c889 --- /dev/null +++ b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/templates/prometheus/rules-1.14/k8s.rules.container_memory_rss.yaml @@ -0,0 +1,42 @@ +{{- /* +Generated from 'k8s.rules.container-memory-rss' group from https://raw.githubusercontent.com/prometheus-operator/kube-prometheus/a8ba97a150c75be42010c75d10b720c55e182f1a/manifests/kubernetesControlPlane-prometheusRule.yaml +Do not change in-place! In order to change this file first read following link: +https://github.com/prometheus-community/helm-charts/tree/main/charts/kube-prometheus-stack/hack +*/ -}} +{{- $kubeTargetVersion := default .Capabilities.KubeVersion.GitVersion .Values.kubeTargetVersionOverride }} +{{- if and (semverCompare ">=1.14.0-0" $kubeTargetVersion) (semverCompare "<9.9.9-9" $kubeTargetVersion) .Values.defaultRules.create .Values.defaultRules.rules.k8sContainerMemoryRss }} +apiVersion: monitoring.coreos.com/v1 +kind: PrometheusRule +metadata: + name: {{ printf "%s-%s" (include "kube-prometheus-stack.fullname" .) "k8s.rules.container-memory-rss" | trunc 63 | trimSuffix "-" }} + namespace: {{ template "kube-prometheus-stack.namespace" . }} + labels: + app: {{ template "kube-prometheus-stack.name" . }} +{{ include "kube-prometheus-stack.labels" . | indent 4 }} +{{- if .Values.defaultRules.labels }} +{{ toYaml .Values.defaultRules.labels | indent 4 }} +{{- end }} +{{- if .Values.defaultRules.annotations }} + annotations: +{{ toYaml .Values.defaultRules.annotations | indent 4 }} +{{- end }} +spec: + groups: + - name: k8s.rules.container_memory_rss + rules: + - expr: |- + container_memory_rss{job="kubelet", metrics_path="/metrics/cadvisor", image!=""} + * on ({{ range $.Values.defaultRules.additionalAggregationLabels }}{{ . }},{{ end }}cluster, namespace, pod) group_left(node) topk by ({{ range $.Values.defaultRules.additionalAggregationLabels }}{{ . }},{{ end }}cluster, namespace, pod) (1, + max by ({{ range $.Values.defaultRules.additionalAggregationLabels }}{{ . }},{{ end }}cluster, namespace, pod, node) (kube_pod_info{node!=""}) + ) + record: node_namespace_pod_container:container_memory_rss + {{- if or .Values.defaultRules.additionalRuleLabels .Values.defaultRules.additionalRuleGroupLabels.k8sContainerMemoryRss }} + labels: + {{- with .Values.defaultRules.additionalRuleLabels }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.defaultRules.additionalRuleGroupLabels.k8sContainerMemoryRss }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- end }} +{{- end }} \ No newline at end of file diff --git a/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/templates/prometheus/rules-1.14/k8s.rules.container_memory_swap.yaml b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/templates/prometheus/rules-1.14/k8s.rules.container_memory_swap.yaml new file mode 100644 index 0000000000..136595e801 --- /dev/null +++ b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/templates/prometheus/rules-1.14/k8s.rules.container_memory_swap.yaml @@ -0,0 +1,42 @@ +{{- /* +Generated from 'k8s.rules.container-memory-swap' group from https://raw.githubusercontent.com/prometheus-operator/kube-prometheus/a8ba97a150c75be42010c75d10b720c55e182f1a/manifests/kubernetesControlPlane-prometheusRule.yaml +Do not change in-place! In order to change this file first read following link: +https://github.com/prometheus-community/helm-charts/tree/main/charts/kube-prometheus-stack/hack +*/ -}} +{{- $kubeTargetVersion := default .Capabilities.KubeVersion.GitVersion .Values.kubeTargetVersionOverride }} +{{- if and (semverCompare ">=1.14.0-0" $kubeTargetVersion) (semverCompare "<9.9.9-9" $kubeTargetVersion) .Values.defaultRules.create .Values.defaultRules.rules.k8sContainerMemorySwap }} +apiVersion: monitoring.coreos.com/v1 +kind: PrometheusRule +metadata: + name: {{ printf "%s-%s" (include "kube-prometheus-stack.fullname" .) "k8s.rules.container-memory-swap" | trunc 63 | trimSuffix "-" }} + namespace: {{ template "kube-prometheus-stack.namespace" . }} + labels: + app: {{ template "kube-prometheus-stack.name" . }} +{{ include "kube-prometheus-stack.labels" . | indent 4 }} +{{- if .Values.defaultRules.labels }} +{{ toYaml .Values.defaultRules.labels | indent 4 }} +{{- end }} +{{- if .Values.defaultRules.annotations }} + annotations: +{{ toYaml .Values.defaultRules.annotations | indent 4 }} +{{- end }} +spec: + groups: + - name: k8s.rules.container_memory_swap + rules: + - expr: |- + container_memory_swap{job="kubelet", metrics_path="/metrics/cadvisor", image!=""} + * on ({{ range $.Values.defaultRules.additionalAggregationLabels }}{{ . }},{{ end }}cluster, namespace, pod) group_left(node) topk by ({{ range $.Values.defaultRules.additionalAggregationLabels }}{{ . }},{{ end }}cluster, namespace, pod) (1, + max by ({{ range $.Values.defaultRules.additionalAggregationLabels }}{{ . }},{{ end }}cluster, namespace, pod, node) (kube_pod_info{node!=""}) + ) + record: node_namespace_pod_container:container_memory_swap + {{- if or .Values.defaultRules.additionalRuleLabels .Values.defaultRules.additionalRuleGroupLabels.k8sContainerMemorySwap }} + labels: + {{- with .Values.defaultRules.additionalRuleLabels }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.defaultRules.additionalRuleGroupLabels.k8sContainerMemorySwap }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- end }} +{{- end }} \ No newline at end of file diff --git a/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/templates/prometheus/rules-1.14/k8s.rules.container_memory_working_set_bytes.yaml b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/templates/prometheus/rules-1.14/k8s.rules.container_memory_working_set_bytes.yaml new file mode 100644 index 0000000000..d308b7473a --- /dev/null +++ b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/templates/prometheus/rules-1.14/k8s.rules.container_memory_working_set_bytes.yaml @@ -0,0 +1,42 @@ +{{- /* +Generated from 'k8s.rules.container-memory-working-set-bytes' group from https://raw.githubusercontent.com/prometheus-operator/kube-prometheus/a8ba97a150c75be42010c75d10b720c55e182f1a/manifests/kubernetesControlPlane-prometheusRule.yaml +Do not change in-place! In order to change this file first read following link: +https://github.com/prometheus-community/helm-charts/tree/main/charts/kube-prometheus-stack/hack +*/ -}} +{{- $kubeTargetVersion := default .Capabilities.KubeVersion.GitVersion .Values.kubeTargetVersionOverride }} +{{- if and (semverCompare ">=1.14.0-0" $kubeTargetVersion) (semverCompare "<9.9.9-9" $kubeTargetVersion) .Values.defaultRules.create .Values.defaultRules.rules.k8sContainerMemoryWorkingSetBytes }} +apiVersion: monitoring.coreos.com/v1 +kind: PrometheusRule +metadata: + name: {{ printf "%s-%s" (include "kube-prometheus-stack.fullname" .) "k8s.rules.container-memory-working-set-bytes" | trunc 63 | trimSuffix "-" }} + namespace: {{ template "kube-prometheus-stack.namespace" . }} + labels: + app: {{ template "kube-prometheus-stack.name" . }} +{{ include "kube-prometheus-stack.labels" . | indent 4 }} +{{- if .Values.defaultRules.labels }} +{{ toYaml .Values.defaultRules.labels | indent 4 }} +{{- end }} +{{- if .Values.defaultRules.annotations }} + annotations: +{{ toYaml .Values.defaultRules.annotations | indent 4 }} +{{- end }} +spec: + groups: + - name: k8s.rules.container_memory_working_set_bytes + rules: + - expr: |- + container_memory_working_set_bytes{job="kubelet", metrics_path="/metrics/cadvisor", image!=""} + * on ({{ range $.Values.defaultRules.additionalAggregationLabels }}{{ . }},{{ end }}cluster, namespace, pod) group_left(node) topk by ({{ range $.Values.defaultRules.additionalAggregationLabels }}{{ . }},{{ end }}cluster, namespace, pod) (1, + max by ({{ range $.Values.defaultRules.additionalAggregationLabels }}{{ . }},{{ end }}cluster, namespace, pod, node) (kube_pod_info{node!=""}) + ) + record: node_namespace_pod_container:container_memory_working_set_bytes + {{- if or .Values.defaultRules.additionalRuleLabels .Values.defaultRules.additionalRuleGroupLabels.k8sContainerMemoryWorkingSetBytes }} + labels: + {{- with .Values.defaultRules.additionalRuleLabels }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.defaultRules.additionalRuleGroupLabels.k8sContainerMemoryWorkingSetBytes }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- end }} +{{- end }} \ No newline at end of file diff --git a/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/templates/prometheus/rules-1.14/k8s.rules.container_resource.yaml b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/templates/prometheus/rules-1.14/k8s.rules.container_resource.yaml new file mode 100644 index 0000000000..2d896e59e4 --- /dev/null +++ b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/templates/prometheus/rules-1.14/k8s.rules.container_resource.yaml @@ -0,0 +1,168 @@ +{{- /* +Generated from 'k8s.rules.container-resource' group from https://raw.githubusercontent.com/prometheus-operator/kube-prometheus/a8ba97a150c75be42010c75d10b720c55e182f1a/manifests/kubernetesControlPlane-prometheusRule.yaml +Do not change in-place! In order to change this file first read following link: +https://github.com/prometheus-community/helm-charts/tree/main/charts/kube-prometheus-stack/hack +*/ -}} +{{- $kubeTargetVersion := default .Capabilities.KubeVersion.GitVersion .Values.kubeTargetVersionOverride }} +{{- if and (semverCompare ">=1.14.0-0" $kubeTargetVersion) (semverCompare "<9.9.9-9" $kubeTargetVersion) .Values.defaultRules.create .Values.defaultRules.rules.k8sContainerResource }} +{{- $kubeStateMetricsJob := include "kube-prometheus-stack-kube-state-metrics.name" . }} +apiVersion: monitoring.coreos.com/v1 +kind: PrometheusRule +metadata: + name: {{ printf "%s-%s" (include "kube-prometheus-stack.fullname" .) "k8s.rules.container-resource" | trunc 63 | trimSuffix "-" }} + namespace: {{ template "kube-prometheus-stack.namespace" . }} + labels: + app: {{ template "kube-prometheus-stack.name" . }} +{{ include "kube-prometheus-stack.labels" . | indent 4 }} +{{- if .Values.defaultRules.labels }} +{{ toYaml .Values.defaultRules.labels | indent 4 }} +{{- end }} +{{- if .Values.defaultRules.annotations }} + annotations: +{{ toYaml .Values.defaultRules.annotations | indent 4 }} +{{- end }} +spec: + groups: + - name: k8s.rules.container_resource + rules: + - expr: |- + kube_pod_container_resource_requests{resource="memory",job="{{ $kubeStateMetricsJob }}"} * on ({{ range $.Values.defaultRules.additionalAggregationLabels }}{{ . }},{{ end }}namespace, pod, cluster) + group_left() max by ({{ range $.Values.defaultRules.additionalAggregationLabels }}{{ . }},{{ end }}namespace, pod, cluster) ( + (kube_pod_status_phase{phase=~"Pending|Running"} == 1) + ) + record: cluster:namespace:pod_memory:active:kube_pod_container_resource_requests + {{- if or .Values.defaultRules.additionalRuleLabels .Values.defaultRules.additionalRuleGroupLabels.k8sContainerResource }} + labels: + {{- with .Values.defaultRules.additionalRuleLabels }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.defaultRules.additionalRuleGroupLabels.k8sContainerResource }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- end }} + - expr: |- + sum by ({{ range $.Values.defaultRules.additionalAggregationLabels }}{{ . }},{{ end }}namespace, cluster) ( + sum by ({{ range $.Values.defaultRules.additionalAggregationLabels }}{{ . }},{{ end }}namespace, pod, cluster) ( + max by ({{ range $.Values.defaultRules.additionalAggregationLabels }}{{ . }},{{ end }}namespace, pod, container, cluster) ( + kube_pod_container_resource_requests{resource="memory",job="{{ $kubeStateMetricsJob }}"} + ) * on ({{ range $.Values.defaultRules.additionalAggregationLabels }}{{ . }},{{ end }}namespace, pod, cluster) group_left() max by ({{ range $.Values.defaultRules.additionalAggregationLabels }}{{ . }},{{ end }}namespace, pod, cluster) ( + kube_pod_status_phase{phase=~"Pending|Running"} == 1 + ) + ) + ) + record: namespace_memory:kube_pod_container_resource_requests:sum + {{- if or .Values.defaultRules.additionalRuleLabels .Values.defaultRules.additionalRuleGroupLabels.k8sContainerResource }} + labels: + {{- with .Values.defaultRules.additionalRuleLabels }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.defaultRules.additionalRuleGroupLabels.k8sContainerResource }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- end }} + - expr: |- + kube_pod_container_resource_requests{resource="cpu",job="{{ $kubeStateMetricsJob }}"} * on ({{ range $.Values.defaultRules.additionalAggregationLabels }}{{ . }},{{ end }}namespace, pod, cluster) + group_left() max by ({{ range $.Values.defaultRules.additionalAggregationLabels }}{{ . }},{{ end }}namespace, pod, cluster) ( + (kube_pod_status_phase{phase=~"Pending|Running"} == 1) + ) + record: cluster:namespace:pod_cpu:active:kube_pod_container_resource_requests + {{- if or .Values.defaultRules.additionalRuleLabels .Values.defaultRules.additionalRuleGroupLabels.k8sContainerResource }} + labels: + {{- with .Values.defaultRules.additionalRuleLabels }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.defaultRules.additionalRuleGroupLabels.k8sContainerResource }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- end }} + - expr: |- + sum by ({{ range $.Values.defaultRules.additionalAggregationLabels }}{{ . }},{{ end }}namespace, cluster) ( + sum by ({{ range $.Values.defaultRules.additionalAggregationLabels }}{{ . }},{{ end }}namespace, pod, cluster) ( + max by ({{ range $.Values.defaultRules.additionalAggregationLabels }}{{ . }},{{ end }}namespace, pod, container, cluster) ( + kube_pod_container_resource_requests{resource="cpu",job="{{ $kubeStateMetricsJob }}"} + ) * on ({{ range $.Values.defaultRules.additionalAggregationLabels }}{{ . }},{{ end }}namespace, pod, cluster) group_left() max by ({{ range $.Values.defaultRules.additionalAggregationLabels }}{{ . }},{{ end }}namespace, pod, cluster) ( + kube_pod_status_phase{phase=~"Pending|Running"} == 1 + ) + ) + ) + record: namespace_cpu:kube_pod_container_resource_requests:sum + {{- if or .Values.defaultRules.additionalRuleLabels .Values.defaultRules.additionalRuleGroupLabels.k8sContainerResource }} + labels: + {{- with .Values.defaultRules.additionalRuleLabels }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.defaultRules.additionalRuleGroupLabels.k8sContainerResource }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- end }} + - expr: |- + kube_pod_container_resource_limits{resource="memory",job="{{ $kubeStateMetricsJob }}"} * on ({{ range $.Values.defaultRules.additionalAggregationLabels }}{{ . }},{{ end }}namespace, pod, cluster) + group_left() max by ({{ range $.Values.defaultRules.additionalAggregationLabels }}{{ . }},{{ end }}namespace, pod, cluster) ( + (kube_pod_status_phase{phase=~"Pending|Running"} == 1) + ) + record: cluster:namespace:pod_memory:active:kube_pod_container_resource_limits + {{- if or .Values.defaultRules.additionalRuleLabels .Values.defaultRules.additionalRuleGroupLabels.k8sContainerResource }} + labels: + {{- with .Values.defaultRules.additionalRuleLabels }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.defaultRules.additionalRuleGroupLabels.k8sContainerResource }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- end }} + - expr: |- + sum by ({{ range $.Values.defaultRules.additionalAggregationLabels }}{{ . }},{{ end }}namespace, cluster) ( + sum by ({{ range $.Values.defaultRules.additionalAggregationLabels }}{{ . }},{{ end }}namespace, pod, cluster) ( + max by ({{ range $.Values.defaultRules.additionalAggregationLabels }}{{ . }},{{ end }}namespace, pod, container, cluster) ( + kube_pod_container_resource_limits{resource="memory",job="{{ $kubeStateMetricsJob }}"} + ) * on ({{ range $.Values.defaultRules.additionalAggregationLabels }}{{ . }},{{ end }}namespace, pod, cluster) group_left() max by ({{ range $.Values.defaultRules.additionalAggregationLabels }}{{ . }},{{ end }}namespace, pod, cluster) ( + kube_pod_status_phase{phase=~"Pending|Running"} == 1 + ) + ) + ) + record: namespace_memory:kube_pod_container_resource_limits:sum + {{- if or .Values.defaultRules.additionalRuleLabels .Values.defaultRules.additionalRuleGroupLabels.k8sContainerResource }} + labels: + {{- with .Values.defaultRules.additionalRuleLabels }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.defaultRules.additionalRuleGroupLabels.k8sContainerResource }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- end }} + - expr: |- + kube_pod_container_resource_limits{resource="cpu",job="{{ $kubeStateMetricsJob }}"} * on ({{ range $.Values.defaultRules.additionalAggregationLabels }}{{ . }},{{ end }}namespace, pod, cluster) + group_left() max by ({{ range $.Values.defaultRules.additionalAggregationLabels }}{{ . }},{{ end }}namespace, pod, cluster) ( + (kube_pod_status_phase{phase=~"Pending|Running"} == 1) + ) + record: cluster:namespace:pod_cpu:active:kube_pod_container_resource_limits + {{- if or .Values.defaultRules.additionalRuleLabels .Values.defaultRules.additionalRuleGroupLabels.k8sContainerResource }} + labels: + {{- with .Values.defaultRules.additionalRuleLabels }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.defaultRules.additionalRuleGroupLabels.k8sContainerResource }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- end }} + - expr: |- + sum by ({{ range $.Values.defaultRules.additionalAggregationLabels }}{{ . }},{{ end }}namespace, cluster) ( + sum by ({{ range $.Values.defaultRules.additionalAggregationLabels }}{{ . }},{{ end }}namespace, pod, cluster) ( + max by ({{ range $.Values.defaultRules.additionalAggregationLabels }}{{ . }},{{ end }}namespace, pod, container, cluster) ( + kube_pod_container_resource_limits{resource="cpu",job="{{ $kubeStateMetricsJob }}"} + ) * on ({{ range $.Values.defaultRules.additionalAggregationLabels }}{{ . }},{{ end }}namespace, pod, cluster) group_left() max by ({{ range $.Values.defaultRules.additionalAggregationLabels }}{{ . }},{{ end }}namespace, pod, cluster) ( + kube_pod_status_phase{phase=~"Pending|Running"} == 1 + ) + ) + ) + record: namespace_cpu:kube_pod_container_resource_limits:sum + {{- if or .Values.defaultRules.additionalRuleLabels .Values.defaultRules.additionalRuleGroupLabels.k8sContainerResource }} + labels: + {{- with .Values.defaultRules.additionalRuleLabels }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.defaultRules.additionalRuleGroupLabels.k8sContainerResource }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- end }} +{{- end }} \ No newline at end of file diff --git a/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/templates/prometheus/rules-1.14/k8s.rules.pod_owner.yaml b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/templates/prometheus/rules-1.14/k8s.rules.pod_owner.yaml new file mode 100644 index 0000000000..4915b25e73 --- /dev/null +++ b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/templates/prometheus/rules-1.14/k8s.rules.pod_owner.yaml @@ -0,0 +1,107 @@ +{{- /* +Generated from 'k8s.rules.pod-owner' group from https://raw.githubusercontent.com/prometheus-operator/kube-prometheus/a8ba97a150c75be42010c75d10b720c55e182f1a/manifests/kubernetesControlPlane-prometheusRule.yaml +Do not change in-place! In order to change this file first read following link: +https://github.com/prometheus-community/helm-charts/tree/main/charts/kube-prometheus-stack/hack +*/ -}} +{{- $kubeTargetVersion := default .Capabilities.KubeVersion.GitVersion .Values.kubeTargetVersionOverride }} +{{- if and (semverCompare ">=1.14.0-0" $kubeTargetVersion) (semverCompare "<9.9.9-9" $kubeTargetVersion) .Values.defaultRules.create .Values.defaultRules.rules.k8sPodOwner }} +{{- $kubeStateMetricsJob := include "kube-prometheus-stack-kube-state-metrics.name" . }} +apiVersion: monitoring.coreos.com/v1 +kind: PrometheusRule +metadata: + name: {{ printf "%s-%s" (include "kube-prometheus-stack.fullname" .) "k8s.rules.pod-owner" | trunc 63 | trimSuffix "-" }} + namespace: {{ template "kube-prometheus-stack.namespace" . }} + labels: + app: {{ template "kube-prometheus-stack.name" . }} +{{ include "kube-prometheus-stack.labels" . | indent 4 }} +{{- if .Values.defaultRules.labels }} +{{ toYaml .Values.defaultRules.labels | indent 4 }} +{{- end }} +{{- if .Values.defaultRules.annotations }} + annotations: +{{ toYaml .Values.defaultRules.annotations | indent 4 }} +{{- end }} +spec: + groups: + - name: k8s.rules.pod_owner + rules: + - expr: |- + max by ({{ range $.Values.defaultRules.additionalAggregationLabels }}{{ . }},{{ end }}cluster, namespace, workload, pod) ( + label_replace( + label_replace( + kube_pod_owner{job="{{ $kubeStateMetricsJob }}", owner_kind="ReplicaSet"}, + "replicaset", "$1", "owner_name", "(.*)" + ) * on ({{ range $.Values.defaultRules.additionalAggregationLabels }}{{ . }},{{ end }}replicaset, namespace) group_left(owner_name) topk by ({{ range $.Values.defaultRules.additionalAggregationLabels }}{{ . }},{{ end }}replicaset, namespace) ( + 1, max by ({{ range $.Values.defaultRules.additionalAggregationLabels }}{{ . }},{{ end }}replicaset, namespace, owner_name) ( + kube_replicaset_owner{job="{{ $kubeStateMetricsJob }}"} + ) + ), + "workload", "$1", "owner_name", "(.*)" + ) + ) + labels: + workload_type: deployment + {{- if or .Values.defaultRules.additionalRuleLabels .Values.defaultRules.additionalRuleGroupLabels.k8sPodOwner }} + {{- with .Values.defaultRules.additionalRuleLabels }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.defaultRules.additionalRuleGroupLabels.k8sPodOwner }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- end }} + record: namespace_workload_pod:kube_pod_owner:relabel + - expr: |- + max by ({{ range $.Values.defaultRules.additionalAggregationLabels }}{{ . }},{{ end }}cluster, namespace, workload, pod) ( + label_replace( + kube_pod_owner{job="{{ $kubeStateMetricsJob }}", owner_kind="DaemonSet"}, + "workload", "$1", "owner_name", "(.*)" + ) + ) + labels: + workload_type: daemonset + {{- if or .Values.defaultRules.additionalRuleLabels .Values.defaultRules.additionalRuleGroupLabels.k8sPodOwner }} + {{- with .Values.defaultRules.additionalRuleLabels }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.defaultRules.additionalRuleGroupLabels.k8sPodOwner }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- end }} + record: namespace_workload_pod:kube_pod_owner:relabel + - expr: |- + max by ({{ range $.Values.defaultRules.additionalAggregationLabels }}{{ . }},{{ end }}cluster, namespace, workload, pod) ( + label_replace( + kube_pod_owner{job="{{ $kubeStateMetricsJob }}", owner_kind="StatefulSet"}, + "workload", "$1", "owner_name", "(.*)" + ) + ) + labels: + workload_type: statefulset + {{- if or .Values.defaultRules.additionalRuleLabels .Values.defaultRules.additionalRuleGroupLabels.k8sPodOwner }} + {{- with .Values.defaultRules.additionalRuleLabels }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.defaultRules.additionalRuleGroupLabels.k8sPodOwner }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- end }} + record: namespace_workload_pod:kube_pod_owner:relabel + - expr: |- + max by ({{ range $.Values.defaultRules.additionalAggregationLabels }}{{ . }},{{ end }}cluster, namespace, workload, pod) ( + label_replace( + kube_pod_owner{job="{{ $kubeStateMetricsJob }}", owner_kind="Job"}, + "workload", "$1", "owner_name", "(.*)" + ) + ) + labels: + workload_type: job + {{- if or .Values.defaultRules.additionalRuleLabels .Values.defaultRules.additionalRuleGroupLabels.k8sPodOwner }} + {{- with .Values.defaultRules.additionalRuleLabels }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.defaultRules.additionalRuleGroupLabels.k8sPodOwner }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- end }} + record: namespace_workload_pod:kube_pod_owner:relabel +{{- end }} \ No newline at end of file diff --git a/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/templates/prometheus/rules-1.14/k8s.rules.yaml b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/templates/prometheus/rules-1.14/k8s.rules.yaml new file mode 100644 index 0000000000..c61bd222ab --- /dev/null +++ b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/templates/prometheus/rules-1.14/k8s.rules.yaml @@ -0,0 +1,237 @@ +{{- /* +Generated from 'k8s.rules' group from https://raw.githubusercontent.com/prometheus-operator/kube-prometheus/main/manifests/kubernetesControlPlane-prometheusRule.yaml +Do not change in-place! In order to change this file first read following link: +https://github.com/prometheus-community/helm-charts/tree/main/charts/kube-prometheus-stack/hack +*/ -}} +{{- $kubeTargetVersion := default .Capabilities.KubeVersion.GitVersion .Values.kubeTargetVersionOverride }} +{{- if and (semverCompare ">=1.14.0-0" $kubeTargetVersion) (semverCompare "<9.9.9-9" $kubeTargetVersion) .Values.defaultRules.create .Values.defaultRules.rules.k8s }} +apiVersion: monitoring.coreos.com/v1 +kind: PrometheusRule +metadata: + name: {{ printf "%s-%s" (include "kube-prometheus-stack.fullname" .) "k8s.rules" | trunc 63 | trimSuffix "-" }} + namespace: {{ template "kube-prometheus-stack.namespace" . }} + labels: + app: {{ template "kube-prometheus-stack.name" . }} +{{ include "kube-prometheus-stack.labels" . | indent 4 }} +{{- if .Values.defaultRules.labels }} +{{ toYaml .Values.defaultRules.labels | indent 4 }} +{{- end }} +{{- if .Values.defaultRules.annotations }} + annotations: +{{ toYaml .Values.defaultRules.annotations | indent 4 }} +{{- end }} +spec: + groups: + - name: k8s.rules + rules: + - expr: |- + sum by (cluster, namespace, pod, container) ( + irate(container_cpu_usage_seconds_total{job="{{ include "exporter.kubelet.jobName" . }}", metrics_path="/metrics/cadvisor", image!=""}[5m]) + ) * on (cluster, namespace, pod) group_left(node) topk by (cluster, namespace, pod) ( + 1, max by(cluster, namespace, pod, node) (kube_pod_info{node!=""}) + ) + record: node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate + {{- if .Values.defaultRules.additionalRuleLabels }} + labels: + {{ toYaml .Values.defaultRules.additionalRuleLabels | nindent 8 }} + {{- end }} + - expr: |- + container_memory_working_set_bytes{job="kubelet", metrics_path="/metrics/cadvisor", image!=""} + * on (cluster, namespace, pod) group_left(node) topk by(cluster, namespace, pod) (1, + max by(cluster, namespace, pod, node) (kube_pod_info{node!=""}) + ) + record: node_namespace_pod_container:container_memory_working_set_bytes + {{- if .Values.defaultRules.additionalRuleLabels }} + labels: + {{ toYaml .Values.defaultRules.additionalRuleLabels | nindent 8 }} + {{- end }} + - expr: |- + container_memory_rss{job="kubelet", metrics_path="/metrics/cadvisor", image!=""} + * on (cluster, namespace, pod) group_left(node) topk by(cluster, namespace, pod) (1, + max by(cluster, namespace, pod, node) (kube_pod_info{node!=""}) + ) + record: node_namespace_pod_container:container_memory_rss + {{- if .Values.defaultRules.additionalRuleLabels }} + labels: + {{ toYaml .Values.defaultRules.additionalRuleLabels | nindent 8 }} + {{- end }} + - expr: |- + container_memory_cache{job="kubelet", metrics_path="/metrics/cadvisor", image!=""} + * on (cluster, namespace, pod) group_left(node) topk by(cluster, namespace, pod) (1, + max by(cluster, namespace, pod, node) (kube_pod_info{node!=""}) + ) + record: node_namespace_pod_container:container_memory_cache + {{- if .Values.defaultRules.additionalRuleLabels }} + labels: + {{ toYaml .Values.defaultRules.additionalRuleLabels | nindent 8 }} + {{- end }} + - expr: |- + container_memory_swap{job="kubelet", metrics_path="/metrics/cadvisor", image!=""} + * on (cluster, namespace, pod) group_left(node) topk by(cluster, namespace, pod) (1, + max by(cluster, namespace, pod, node) (kube_pod_info{node!=""}) + ) + record: node_namespace_pod_container:container_memory_swap + {{- if .Values.defaultRules.additionalRuleLabels }} + labels: + {{ toYaml .Values.defaultRules.additionalRuleLabels | nindent 8 }} + {{- end }} + - expr: |- + kube_pod_container_resource_requests{resource="memory",job="kube-state-metrics"} * on (namespace, pod, cluster) + group_left() max by (namespace, pod, cluster) ( + (kube_pod_status_phase{phase=~"Pending|Running"} == 1) + ) + record: cluster:namespace:pod_memory:active:kube_pod_container_resource_requests + {{- if .Values.defaultRules.additionalRuleLabels }} + labels: + {{ toYaml .Values.defaultRules.additionalRuleLabels | nindent 8 }} + {{- end }} + - expr: |- + sum by (namespace, cluster) ( + sum by (namespace, pod, cluster) ( + max by (namespace, pod, container, cluster) ( + kube_pod_container_resource_requests{resource="memory",job="kube-state-metrics"} + ) * on(namespace, pod, cluster) group_left() max by (namespace, pod, cluster) ( + kube_pod_status_phase{phase=~"Pending|Running"} == 1 + ) + ) + ) + record: namespace_memory:kube_pod_container_resource_requests:sum + {{- if .Values.defaultRules.additionalRuleLabels }} + labels: + {{ toYaml .Values.defaultRules.additionalRuleLabels | nindent 8 }} + {{- end }} + - expr: |- + kube_pod_container_resource_requests{resource="cpu",job="kube-state-metrics"} * on (namespace, pod, cluster) + group_left() max by (namespace, pod, cluster) ( + (kube_pod_status_phase{phase=~"Pending|Running"} == 1) + ) + record: cluster:namespace:pod_cpu:active:kube_pod_container_resource_requests + {{- if .Values.defaultRules.additionalRuleLabels }} + labels: + {{ toYaml .Values.defaultRules.additionalRuleLabels | nindent 8 }} + {{- end }} + - expr: |- + sum by (namespace, cluster) ( + sum by (namespace, pod, cluster) ( + max by (namespace, pod, container, cluster) ( + kube_pod_container_resource_requests{resource="cpu",job="kube-state-metrics"} + ) * on(namespace, pod, cluster) group_left() max by (namespace, pod, cluster) ( + kube_pod_status_phase{phase=~"Pending|Running"} == 1 + ) + ) + ) + record: namespace_cpu:kube_pod_container_resource_requests:sum + {{- if .Values.defaultRules.additionalRuleLabels }} + labels: + {{ toYaml .Values.defaultRules.additionalRuleLabels | nindent 8 }} + {{- end }} + - expr: |- + kube_pod_container_resource_limits{resource="memory",job="kube-state-metrics"} * on (namespace, pod, cluster) + group_left() max by (namespace, pod, cluster) ( + (kube_pod_status_phase{phase=~"Pending|Running"} == 1) + ) + record: cluster:namespace:pod_memory:active:kube_pod_container_resource_limits + {{- if .Values.defaultRules.additionalRuleLabels }} + labels: + {{ toYaml .Values.defaultRules.additionalRuleLabels | nindent 8 }} + {{- end }} + - expr: |- + sum by (namespace, cluster) ( + sum by (namespace, pod, cluster) ( + max by (namespace, pod, container, cluster) ( + kube_pod_container_resource_limits{resource="memory",job="kube-state-metrics"} + ) * on(namespace, pod, cluster) group_left() max by (namespace, pod, cluster) ( + kube_pod_status_phase{phase=~"Pending|Running"} == 1 + ) + ) + ) + record: namespace_memory:kube_pod_container_resource_limits:sum + {{- if .Values.defaultRules.additionalRuleLabels }} + labels: + {{ toYaml .Values.defaultRules.additionalRuleLabels | nindent 8 }} + {{- end }} + - expr: |- + kube_pod_container_resource_limits{resource="cpu",job="kube-state-metrics"} * on (namespace, pod, cluster) + group_left() max by (namespace, pod, cluster) ( + (kube_pod_status_phase{phase=~"Pending|Running"} == 1) + ) + record: cluster:namespace:pod_cpu:active:kube_pod_container_resource_limits + {{- if .Values.defaultRules.additionalRuleLabels }} + labels: + {{ toYaml .Values.defaultRules.additionalRuleLabels | nindent 8 }} + {{- end }} + - expr: |- + sum by (namespace, cluster) ( + sum by (namespace, pod, cluster) ( + max by (namespace, pod, container, cluster) ( + kube_pod_container_resource_limits{resource="cpu",job="kube-state-metrics"} + ) * on(namespace, pod, cluster) group_left() max by (namespace, pod, cluster) ( + kube_pod_status_phase{phase=~"Pending|Running"} == 1 + ) + ) + ) + record: namespace_cpu:kube_pod_container_resource_limits:sum + {{- if .Values.defaultRules.additionalRuleLabels }} + labels: + {{ toYaml .Values.defaultRules.additionalRuleLabels | nindent 8 }} + {{- end }} + - expr: |- + max by (cluster, namespace, workload, pod) ( + label_replace( + label_replace( + kube_pod_owner{job="kube-state-metrics", owner_kind="ReplicaSet"}, + "replicaset", "$1", "owner_name", "(.*)" + ) * on(replicaset, namespace) group_left(owner_name) topk by(replicaset, namespace) ( + 1, max by (replicaset, namespace, owner_name) ( + kube_replicaset_owner{job="kube-state-metrics"} + ) + ), + "workload", "$1", "owner_name", "(.*)" + ) + ) + labels: + workload_type: deployment + {{- if .Values.defaultRules.additionalRuleLabels }} + {{ toYaml .Values.defaultRules.additionalRuleLabels | nindent 8 }} + {{- end }} + record: namespace_workload_pod:kube_pod_owner:relabel + - expr: |- + max by (cluster, namespace, workload, pod) ( + label_replace( + kube_pod_owner{job="kube-state-metrics", owner_kind="DaemonSet"}, + "workload", "$1", "owner_name", "(.*)" + ) + ) + labels: + workload_type: daemonset + {{- if .Values.defaultRules.additionalRuleLabels }} + {{ toYaml .Values.defaultRules.additionalRuleLabels | nindent 8 }} + {{- end }} + record: namespace_workload_pod:kube_pod_owner:relabel + - expr: |- + max by (cluster, namespace, workload, pod) ( + label_replace( + kube_pod_owner{job="kube-state-metrics", owner_kind="StatefulSet"}, + "workload", "$1", "owner_name", "(.*)" + ) + ) + labels: + workload_type: statefulset + {{- if .Values.defaultRules.additionalRuleLabels }} + {{ toYaml .Values.defaultRules.additionalRuleLabels | nindent 8 }} + {{- end }} + record: namespace_workload_pod:kube_pod_owner:relabel + - expr: |- + max by (cluster, namespace, workload, pod) ( + label_replace( + kube_pod_owner{job="kube-state-metrics", owner_kind="Job"}, + "workload", "$1", "owner_name", "(.*)" + ) + ) + labels: + workload_type: job + {{- if .Values.defaultRules.additionalRuleLabels }} + {{ toYaml .Values.defaultRules.additionalRuleLabels | nindent 8 }} + {{- end }} + record: namespace_workload_pod:kube_pod_owner:relabel +{{- end }} \ No newline at end of file diff --git a/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/templates/prometheus/rules-1.14/kube-apiserver-availability.rules.yaml b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/templates/prometheus/rules-1.14/kube-apiserver-availability.rules.yaml new file mode 100644 index 0000000000..6194e9c614 --- /dev/null +++ b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/templates/prometheus/rules-1.14/kube-apiserver-availability.rules.yaml @@ -0,0 +1,273 @@ +{{- /* +Generated from 'kube-apiserver-availability.rules' group from https://raw.githubusercontent.com/prometheus-operator/kube-prometheus/a8ba97a150c75be42010c75d10b720c55e182f1a/manifests/kubernetesControlPlane-prometheusRule.yaml +Do not change in-place! In order to change this file first read following link: +https://github.com/prometheus-community/helm-charts/tree/main/charts/kube-prometheus-stack/hack +*/ -}} +{{- $kubeTargetVersion := default .Capabilities.KubeVersion.GitVersion .Values.kubeTargetVersionOverride }} +{{- if and (semverCompare ">=1.14.0-0" $kubeTargetVersion) (semverCompare "<9.9.9-9" $kubeTargetVersion) .Values.defaultRules.create .Values.kubeApiServer.enabled .Values.defaultRules.rules.kubeApiserverAvailability }} +apiVersion: monitoring.coreos.com/v1 +kind: PrometheusRule +metadata: + name: {{ printf "%s-%s" (include "kube-prometheus-stack.fullname" .) "kube-apiserver-availability.rules" | trunc 63 | trimSuffix "-" }} + namespace: {{ template "kube-prometheus-stack.namespace" . }} + labels: + app: {{ template "kube-prometheus-stack.name" . }} +{{ include "kube-prometheus-stack.labels" . | indent 4 }} +{{- if .Values.defaultRules.labels }} +{{ toYaml .Values.defaultRules.labels | indent 4 }} +{{- end }} +{{- if .Values.defaultRules.annotations }} + annotations: +{{ toYaml .Values.defaultRules.annotations | indent 4 }} +{{- end }} +spec: + groups: + - interval: 3m + name: kube-apiserver-availability.rules + rules: + - expr: avg_over_time(code_verb:apiserver_request_total:increase1h[30d]) * 24 * 30 + record: code_verb:apiserver_request_total:increase30d + {{- if or .Values.defaultRules.additionalRuleLabels .Values.defaultRules.additionalRuleGroupLabels.kubeApiserverAvailability }} + labels: + {{- with .Values.defaultRules.additionalRuleLabels }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.defaultRules.additionalRuleGroupLabels.kubeApiserverAvailability }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- end }} + - expr: sum by ({{ range $.Values.defaultRules.additionalAggregationLabels }}{{ . }},{{ end }}cluster, code) (code_verb:apiserver_request_total:increase30d{verb=~"LIST|GET"}) + labels: + verb: read + {{- if or .Values.defaultRules.additionalRuleLabels .Values.defaultRules.additionalRuleGroupLabels.kubeApiserverAvailability }} + {{- with .Values.defaultRules.additionalRuleLabels }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.defaultRules.additionalRuleGroupLabels.kubeApiserverAvailability }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- end }} + record: code:apiserver_request_total:increase30d + - expr: sum by ({{ range $.Values.defaultRules.additionalAggregationLabels }}{{ . }},{{ end }}cluster, code) (code_verb:apiserver_request_total:increase30d{verb=~"POST|PUT|PATCH|DELETE"}) + labels: + verb: write + {{- if or .Values.defaultRules.additionalRuleLabels .Values.defaultRules.additionalRuleGroupLabels.kubeApiserverAvailability }} + {{- with .Values.defaultRules.additionalRuleLabels }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.defaultRules.additionalRuleGroupLabels.kubeApiserverAvailability }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- end }} + record: code:apiserver_request_total:increase30d + - expr: sum by ({{ range $.Values.defaultRules.additionalAggregationLabels }}{{ . }},{{ end }}cluster, verb, scope) (increase(apiserver_request_sli_duration_seconds_count{job="apiserver"}[1h])) + record: cluster_verb_scope:apiserver_request_sli_duration_seconds_count:increase1h + {{- if or .Values.defaultRules.additionalRuleLabels .Values.defaultRules.additionalRuleGroupLabels.kubeApiserverAvailability }} + labels: + {{- with .Values.defaultRules.additionalRuleLabels }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.defaultRules.additionalRuleGroupLabels.kubeApiserverAvailability }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- end }} + - expr: sum by ({{ range $.Values.defaultRules.additionalAggregationLabels }}{{ . }},{{ end }}cluster, verb, scope) (avg_over_time(cluster_verb_scope:apiserver_request_sli_duration_seconds_count:increase1h[30d]) * 24 * 30) + record: cluster_verb_scope:apiserver_request_sli_duration_seconds_count:increase30d + {{- if or .Values.defaultRules.additionalRuleLabels .Values.defaultRules.additionalRuleGroupLabels.kubeApiserverAvailability }} + labels: + {{- with .Values.defaultRules.additionalRuleLabels }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.defaultRules.additionalRuleGroupLabels.kubeApiserverAvailability }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- end }} + - expr: sum by ({{ range $.Values.defaultRules.additionalAggregationLabels }}{{ . }},{{ end }}cluster, verb, scope, le) (increase(apiserver_request_sli_duration_seconds_bucket[1h])) + record: cluster_verb_scope_le:apiserver_request_sli_duration_seconds_bucket:increase1h + {{- if or .Values.defaultRules.additionalRuleLabels .Values.defaultRules.additionalRuleGroupLabels.kubeApiserverAvailability }} + labels: + {{- with .Values.defaultRules.additionalRuleLabels }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.defaultRules.additionalRuleGroupLabels.kubeApiserverAvailability }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- end }} + - expr: sum by ({{ range $.Values.defaultRules.additionalAggregationLabels }}{{ . }},{{ end }}cluster, verb, scope, le) (avg_over_time(cluster_verb_scope_le:apiserver_request_sli_duration_seconds_bucket:increase1h[30d]) * 24 * 30) + record: cluster_verb_scope_le:apiserver_request_sli_duration_seconds_bucket:increase30d + {{- if or .Values.defaultRules.additionalRuleLabels .Values.defaultRules.additionalRuleGroupLabels.kubeApiserverAvailability }} + labels: + {{- with .Values.defaultRules.additionalRuleLabels }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.defaultRules.additionalRuleGroupLabels.kubeApiserverAvailability }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- end }} + - expr: |- + 1 - ( + ( + # write too slow + sum by (cluster) (cluster_verb_scope:apiserver_request{{ if (semverCompare ">=1.23.0-0" $kubeTargetVersion) }}_slo{{ end }}_duration_seconds_count:increase30d{verb=~"POST|PUT|PATCH|DELETE"}) + - + sum by (cluster) (cluster_verb_scope_le:apiserver_request{{ if (semverCompare ">=1.23.0-0" $kubeTargetVersion) }}_slo{{ end }}_duration_seconds_bucket:increase30d{verb=~"POST|PUT|PATCH|DELETE",le="1"}) + ) + + ( + # read too slow + sum by (cluster) (cluster_verb_scope:apiserver_request{{ if (semverCompare ">=1.23.0-0" $kubeTargetVersion) }}_slo{{ end }}_duration_seconds_count:increase30d{verb=~"LIST|GET"}) + - + ( + ( + sum by (cluster) (cluster_verb_scope_le:apiserver_request{{ if (semverCompare ">=1.23.0-0" $kubeTargetVersion) }}_slo{{ end }}_duration_seconds_bucket:increase30d{verb=~"LIST|GET",scope=~"resource|",le="1"}) + or + vector(0) + ) + + + sum by (cluster) (cluster_verb_scope_le:apiserver_request{{ if (semverCompare ">=1.23.0-0" $kubeTargetVersion) }}_slo{{ end }}_duration_seconds_bucket:increase30d{verb=~"LIST|GET",scope="namespace",le="5"}) + + + sum by (cluster) (cluster_verb_scope_le:apiserver_request{{ if (semverCompare ">=1.23.0-0" $kubeTargetVersion) }}_slo{{ end }}_duration_seconds_bucket:increase30d{verb=~"LIST|GET",scope="cluster",le="30"}) + ) + ) + + # errors + sum by ({{ range $.Values.defaultRules.additionalAggregationLabels }}{{ . }},{{ end }}cluster) (code:apiserver_request_total:increase30d{code=~"5.."} or vector(0)) + ) + / + sum by ({{ range $.Values.defaultRules.additionalAggregationLabels }}{{ . }},{{ end }}cluster) (code:apiserver_request_total:increase30d) + labels: + verb: all + {{- if or .Values.defaultRules.additionalRuleLabels .Values.defaultRules.additionalRuleGroupLabels.kubeApiserverAvailability }} + {{- with .Values.defaultRules.additionalRuleLabels }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.defaultRules.additionalRuleGroupLabels.kubeApiserverAvailability }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- end }} + record: apiserver_request:availability30d + - expr: |- + 1 - ( + sum by (cluster) (cluster_verb_scope:apiserver_request{{ if (semverCompare ">=1.23.0-0" $kubeTargetVersion) }}_slo{{ end }}_duration_seconds_count:increase30d{verb=~"LIST|GET"}) + - + ( + # too slow + ( + sum by (cluster) (cluster_verb_scope_le:apiserver_request{{ if (semverCompare ">=1.23.0-0" $kubeTargetVersion) }}_slo{{ end }}_duration_seconds_bucket:increase30d{verb=~"LIST|GET",scope=~"resource|",le="1"}) + or + vector(0) + ) + + + sum by (cluster) (cluster_verb_scope_le:apiserver_request{{ if (semverCompare ">=1.23.0-0" $kubeTargetVersion) }}_slo{{ end }}_duration_seconds_bucket:increase30d{verb=~"LIST|GET",scope="namespace",le="5"}) + + + sum by (cluster) (cluster_verb_scope_le:apiserver_request{{ if (semverCompare ">=1.23.0-0" $kubeTargetVersion) }}_slo{{ end }}_duration_seconds_bucket:increase30d{verb=~"LIST|GET",scope="cluster",le="30"}) + ) + + + # errors + sum by ({{ range $.Values.defaultRules.additionalAggregationLabels }}{{ . }},{{ end }}cluster) (code:apiserver_request_total:increase30d{verb="read",code=~"5.."} or vector(0)) + ) + / + sum by ({{ range $.Values.defaultRules.additionalAggregationLabels }}{{ . }},{{ end }}cluster) (code:apiserver_request_total:increase30d{verb="read"}) + labels: + verb: read + {{- if or .Values.defaultRules.additionalRuleLabels .Values.defaultRules.additionalRuleGroupLabels.kubeApiserverAvailability }} + {{- with .Values.defaultRules.additionalRuleLabels }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.defaultRules.additionalRuleGroupLabels.kubeApiserverAvailability }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- end }} + record: apiserver_request:availability30d + - expr: |- + 1 - ( + ( + # too slow + sum by (cluster) (cluster_verb_scope:apiserver_request{{ if (semverCompare ">=1.23.0-0" $kubeTargetVersion) }}_slo{{ end }}_duration_seconds_count:increase30d{verb=~"POST|PUT|PATCH|DELETE"}) + - + sum by (cluster) (cluster_verb_scope_le:apiserver_request{{ if (semverCompare ">=1.23.0-0" $kubeTargetVersion) }}_slo{{ end }}_duration_seconds_bucket:increase30d{verb=~"POST|PUT|PATCH|DELETE",le="1"}) + ) + + + # errors + sum by ({{ range $.Values.defaultRules.additionalAggregationLabels }}{{ . }},{{ end }}cluster) (code:apiserver_request_total:increase30d{verb="write",code=~"5.."} or vector(0)) + ) + / + sum by ({{ range $.Values.defaultRules.additionalAggregationLabels }}{{ . }},{{ end }}cluster) (code:apiserver_request_total:increase30d{verb="write"}) + labels: + verb: write + {{- if or .Values.defaultRules.additionalRuleLabels .Values.defaultRules.additionalRuleGroupLabels.kubeApiserverAvailability }} + {{- with .Values.defaultRules.additionalRuleLabels }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.defaultRules.additionalRuleGroupLabels.kubeApiserverAvailability }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- end }} + record: apiserver_request:availability30d + - expr: sum by ({{ range $.Values.defaultRules.additionalAggregationLabels }}{{ . }},{{ end }}cluster,code,resource) (rate(apiserver_request_total{job="apiserver",verb=~"LIST|GET"}[5m])) + labels: + verb: read + {{- if or .Values.defaultRules.additionalRuleLabels .Values.defaultRules.additionalRuleGroupLabels.kubeApiserverAvailability }} + {{- with .Values.defaultRules.additionalRuleLabels }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.defaultRules.additionalRuleGroupLabels.kubeApiserverAvailability }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- end }} + record: code_resource:apiserver_request_total:rate5m + - expr: sum by ({{ range $.Values.defaultRules.additionalAggregationLabels }}{{ . }},{{ end }}cluster,code,resource) (rate(apiserver_request_total{job="apiserver",verb=~"POST|PUT|PATCH|DELETE"}[5m])) + labels: + verb: write + {{- if or .Values.defaultRules.additionalRuleLabels .Values.defaultRules.additionalRuleGroupLabels.kubeApiserverAvailability }} + {{- with .Values.defaultRules.additionalRuleLabels }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.defaultRules.additionalRuleGroupLabels.kubeApiserverAvailability }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- end }} + record: code_resource:apiserver_request_total:rate5m + - expr: sum by ({{ range $.Values.defaultRules.additionalAggregationLabels }}{{ . }},{{ end }}cluster, code, verb) (increase(apiserver_request_total{job="apiserver",verb=~"LIST|GET|POST|PUT|PATCH|DELETE",code=~"2.."}[1h])) + record: code_verb:apiserver_request_total:increase1h + {{- if or .Values.defaultRules.additionalRuleLabels .Values.defaultRules.additionalRuleGroupLabels.kubeApiserverAvailability }} + labels: + {{- with .Values.defaultRules.additionalRuleLabels }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.defaultRules.additionalRuleGroupLabels.kubeApiserverAvailability }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- end }} + - expr: sum by ({{ range $.Values.defaultRules.additionalAggregationLabels }}{{ . }},{{ end }}cluster, code, verb) (increase(apiserver_request_total{job="apiserver",verb=~"LIST|GET|POST|PUT|PATCH|DELETE",code=~"3.."}[1h])) + record: code_verb:apiserver_request_total:increase1h + {{- if or .Values.defaultRules.additionalRuleLabels .Values.defaultRules.additionalRuleGroupLabels.kubeApiserverAvailability }} + labels: + {{- with .Values.defaultRules.additionalRuleLabels }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.defaultRules.additionalRuleGroupLabels.kubeApiserverAvailability }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- end }} + - expr: sum by ({{ range $.Values.defaultRules.additionalAggregationLabels }}{{ . }},{{ end }}cluster, code, verb) (increase(apiserver_request_total{job="apiserver",verb=~"LIST|GET|POST|PUT|PATCH|DELETE",code=~"4.."}[1h])) + record: code_verb:apiserver_request_total:increase1h + {{- if or .Values.defaultRules.additionalRuleLabels .Values.defaultRules.additionalRuleGroupLabels.kubeApiserverAvailability }} + labels: + {{- with .Values.defaultRules.additionalRuleLabels }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.defaultRules.additionalRuleGroupLabels.kubeApiserverAvailability }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- end }} + - expr: sum by ({{ range $.Values.defaultRules.additionalAggregationLabels }}{{ . }},{{ end }}cluster, code, verb) (increase(apiserver_request_total{job="apiserver",verb=~"LIST|GET|POST|PUT|PATCH|DELETE",code=~"5.."}[1h])) + record: code_verb:apiserver_request_total:increase1h + {{- if or .Values.defaultRules.additionalRuleLabels .Values.defaultRules.additionalRuleGroupLabels.kubeApiserverAvailability }} + labels: + {{- with .Values.defaultRules.additionalRuleLabels }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.defaultRules.additionalRuleGroupLabels.kubeApiserverAvailability }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- end }} +{{- end }} \ No newline at end of file diff --git a/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/templates/prometheus/rules-1.14/kube-apiserver-burnrate.rules.yaml b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/templates/prometheus/rules-1.14/kube-apiserver-burnrate.rules.yaml new file mode 100644 index 0000000000..e6666a6f41 --- /dev/null +++ b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/templates/prometheus/rules-1.14/kube-apiserver-burnrate.rules.yaml @@ -0,0 +1,440 @@ +{{- /* +Generated from 'kube-apiserver-burnrate.rules' group from https://raw.githubusercontent.com/prometheus-operator/kube-prometheus/a8ba97a150c75be42010c75d10b720c55e182f1a/manifests/kubernetesControlPlane-prometheusRule.yaml +Do not change in-place! In order to change this file first read following link: +https://github.com/prometheus-community/helm-charts/tree/main/charts/kube-prometheus-stack/hack +*/ -}} +{{- $kubeTargetVersion := default .Capabilities.KubeVersion.GitVersion .Values.kubeTargetVersionOverride }} +{{- if and (semverCompare ">=1.14.0-0" $kubeTargetVersion) (semverCompare "<9.9.9-9" $kubeTargetVersion) .Values.defaultRules.create .Values.kubeApiServer.enabled .Values.defaultRules.rules.kubeApiserverBurnrate }} +apiVersion: monitoring.coreos.com/v1 +kind: PrometheusRule +metadata: + name: {{ printf "%s-%s" (include "kube-prometheus-stack.fullname" .) "kube-apiserver-burnrate.rules" | trunc 63 | trimSuffix "-" }} + namespace: {{ template "kube-prometheus-stack.namespace" . }} + labels: + app: {{ template "kube-prometheus-stack.name" . }} +{{ include "kube-prometheus-stack.labels" . | indent 4 }} +{{- if .Values.defaultRules.labels }} +{{ toYaml .Values.defaultRules.labels | indent 4 }} +{{- end }} +{{- if .Values.defaultRules.annotations }} + annotations: +{{ toYaml .Values.defaultRules.annotations | indent 4 }} +{{- end }} +spec: + groups: + - name: kube-apiserver-burnrate.rules + rules: + - expr: |- + ( + ( + # too slow + sum by (cluster) (rate(apiserver_request{{ if (semverCompare ">=1.23.0-0" $kubeTargetVersion) }}_slo{{ end }}_duration_seconds_count{job="apiserver",verb=~"LIST|GET",subresource!~"proxy|attach|log|exec|portforward"}[1d])) + - + ( + ( + sum by (cluster) (rate(apiserver_request{{ if (semverCompare ">=1.23.0-0" $kubeTargetVersion) }}_slo{{ end }}_duration_seconds_bucket{job="apiserver",verb=~"LIST|GET",subresource!~"proxy|attach|log|exec|portforward",scope=~"resource|",le="1"}[1d])) + or + vector(0) + ) + + + sum by (cluster) (rate(apiserver_request{{ if (semverCompare ">=1.23.0-0" $kubeTargetVersion) }}_slo{{ end }}_duration_seconds_bucket{job="apiserver",verb=~"LIST|GET",subresource!~"proxy|attach|log|exec|portforward",scope="namespace",le="5"}[1d])) + + + sum by (cluster) (rate(apiserver_request{{ if (semverCompare ">=1.23.0-0" $kubeTargetVersion) }}_slo{{ end }}_duration_seconds_bucket{job="apiserver",verb=~"LIST|GET",subresource!~"proxy|attach|log|exec|portforward",scope="cluster",le="30"}[1d])) + ) + ) + + + # errors + sum by ({{ range $.Values.defaultRules.additionalAggregationLabels }}{{ . }},{{ end }}cluster) (rate(apiserver_request_total{job="apiserver",verb=~"LIST|GET",code=~"5.."}[1d])) + ) + / + sum by ({{ range $.Values.defaultRules.additionalAggregationLabels }}{{ . }},{{ end }}cluster) (rate(apiserver_request_total{job="apiserver",verb=~"LIST|GET"}[1d])) + labels: + verb: read + {{- if or .Values.defaultRules.additionalRuleLabels .Values.defaultRules.additionalRuleGroupLabels.kubeApiserverBurnrate }} + {{- with .Values.defaultRules.additionalRuleLabels }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.defaultRules.additionalRuleGroupLabels.kubeApiserverBurnrate }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- end }} + record: apiserver_request:burnrate1d + - expr: |- + ( + ( + # too slow + sum by (cluster) (rate(apiserver_request{{ if (semverCompare ">=1.23.0-0" $kubeTargetVersion) }}_slo{{ end }}_duration_seconds_count{job="apiserver",verb=~"LIST|GET",subresource!~"proxy|attach|log|exec|portforward"}[1h])) + - + ( + ( + sum by (cluster) (rate(apiserver_request{{ if (semverCompare ">=1.23.0-0" $kubeTargetVersion) }}_slo{{ end }}_duration_seconds_bucket{job="apiserver",verb=~"LIST|GET",subresource!~"proxy|attach|log|exec|portforward",scope=~"resource|",le="1"}[1h])) + or + vector(0) + ) + + + sum by (cluster) (rate(apiserver_request{{ if (semverCompare ">=1.23.0-0" $kubeTargetVersion) }}_slo{{ end }}_duration_seconds_bucket{job="apiserver",verb=~"LIST|GET",subresource!~"proxy|attach|log|exec|portforward",scope="namespace",le="5"}[1h])) + + + sum by (cluster) (rate(apiserver_request{{ if (semverCompare ">=1.23.0-0" $kubeTargetVersion) }}_slo{{ end }}_duration_seconds_bucket{job="apiserver",verb=~"LIST|GET",subresource!~"proxy|attach|log|exec|portforward",scope="cluster",le="30"}[1h])) + ) + ) + + + # errors + sum by ({{ range $.Values.defaultRules.additionalAggregationLabels }}{{ . }},{{ end }}cluster) (rate(apiserver_request_total{job="apiserver",verb=~"LIST|GET",code=~"5.."}[1h])) + ) + / + sum by ({{ range $.Values.defaultRules.additionalAggregationLabels }}{{ . }},{{ end }}cluster) (rate(apiserver_request_total{job="apiserver",verb=~"LIST|GET"}[1h])) + labels: + verb: read + {{- if or .Values.defaultRules.additionalRuleLabels .Values.defaultRules.additionalRuleGroupLabels.kubeApiserverBurnrate }} + {{- with .Values.defaultRules.additionalRuleLabels }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.defaultRules.additionalRuleGroupLabels.kubeApiserverBurnrate }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- end }} + record: apiserver_request:burnrate1h + - expr: |- + ( + ( + # too slow + sum by (cluster) (rate(apiserver_request{{ if (semverCompare ">=1.23.0-0" $kubeTargetVersion) }}_slo{{ end }}_duration_seconds_count{job="apiserver",verb=~"LIST|GET",subresource!~"proxy|attach|log|exec|portforward"}[2h])) + - + ( + ( + sum by (cluster) (rate(apiserver_request{{ if (semverCompare ">=1.23.0-0" $kubeTargetVersion) }}_slo{{ end }}_duration_seconds_bucket{job="apiserver",verb=~"LIST|GET",subresource!~"proxy|attach|log|exec|portforward",scope=~"resource|",le="1"}[2h])) + or + vector(0) + ) + + + sum by (cluster) (rate(apiserver_request{{ if (semverCompare ">=1.23.0-0" $kubeTargetVersion) }}_slo{{ end }}_duration_seconds_bucket{job="apiserver",verb=~"LIST|GET",subresource!~"proxy|attach|log|exec|portforward",scope="namespace",le="5"}[2h])) + + + sum by (cluster) (rate(apiserver_request{{ if (semverCompare ">=1.23.0-0" $kubeTargetVersion) }}_slo{{ end }}_duration_seconds_bucket{job="apiserver",verb=~"LIST|GET",subresource!~"proxy|attach|log|exec|portforward",scope="cluster",le="30"}[2h])) + ) + ) + + + # errors + sum by ({{ range $.Values.defaultRules.additionalAggregationLabels }}{{ . }},{{ end }}cluster) (rate(apiserver_request_total{job="apiserver",verb=~"LIST|GET",code=~"5.."}[2h])) + ) + / + sum by ({{ range $.Values.defaultRules.additionalAggregationLabels }}{{ . }},{{ end }}cluster) (rate(apiserver_request_total{job="apiserver",verb=~"LIST|GET"}[2h])) + labels: + verb: read + {{- if or .Values.defaultRules.additionalRuleLabels .Values.defaultRules.additionalRuleGroupLabels.kubeApiserverBurnrate }} + {{- with .Values.defaultRules.additionalRuleLabels }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.defaultRules.additionalRuleGroupLabels.kubeApiserverBurnrate }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- end }} + record: apiserver_request:burnrate2h + - expr: |- + ( + ( + # too slow + sum by (cluster) (rate(apiserver_request{{ if (semverCompare ">=1.23.0-0" $kubeTargetVersion) }}_slo{{ end }}_duration_seconds_count{job="apiserver",verb=~"LIST|GET",subresource!~"proxy|attach|log|exec|portforward"}[30m])) + - + ( + ( + sum by (cluster) (rate(apiserver_request{{ if (semverCompare ">=1.23.0-0" $kubeTargetVersion) }}_slo{{ end }}_duration_seconds_bucket{job="apiserver",verb=~"LIST|GET",subresource!~"proxy|attach|log|exec|portforward",scope=~"resource|",le="1"}[30m])) + or + vector(0) + ) + + + sum by (cluster) (rate(apiserver_request{{ if (semverCompare ">=1.23.0-0" $kubeTargetVersion) }}_slo{{ end }}_duration_seconds_bucket{job="apiserver",verb=~"LIST|GET",subresource!~"proxy|attach|log|exec|portforward",scope="namespace",le="5"}[30m])) + + + sum by (cluster) (rate(apiserver_request{{ if (semverCompare ">=1.23.0-0" $kubeTargetVersion) }}_slo{{ end }}_duration_seconds_bucket{job="apiserver",verb=~"LIST|GET",subresource!~"proxy|attach|log|exec|portforward",scope="cluster",le="30"}[30m])) + ) + ) + + + # errors + sum by ({{ range $.Values.defaultRules.additionalAggregationLabels }}{{ . }},{{ end }}cluster) (rate(apiserver_request_total{job="apiserver",verb=~"LIST|GET",code=~"5.."}[30m])) + ) + / + sum by ({{ range $.Values.defaultRules.additionalAggregationLabels }}{{ . }},{{ end }}cluster) (rate(apiserver_request_total{job="apiserver",verb=~"LIST|GET"}[30m])) + labels: + verb: read + {{- if or .Values.defaultRules.additionalRuleLabels .Values.defaultRules.additionalRuleGroupLabels.kubeApiserverBurnrate }} + {{- with .Values.defaultRules.additionalRuleLabels }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.defaultRules.additionalRuleGroupLabels.kubeApiserverBurnrate }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- end }} + record: apiserver_request:burnrate30m + - expr: |- + ( + ( + # too slow + sum by (cluster) (rate(apiserver_request{{ if (semverCompare ">=1.23.0-0" $kubeTargetVersion) }}_slo{{ end }}_duration_seconds_count{job="apiserver",verb=~"LIST|GET",subresource!~"proxy|attach|log|exec|portforward"}[3d])) + - + ( + ( + sum by (cluster) (rate(apiserver_request{{ if (semverCompare ">=1.23.0-0" $kubeTargetVersion) }}_slo{{ end }}_duration_seconds_bucket{job="apiserver",verb=~"LIST|GET",subresource!~"proxy|attach|log|exec|portforward",scope=~"resource|",le="1"}[3d])) + or + vector(0) + ) + + + sum by (cluster) (rate(apiserver_request{{ if (semverCompare ">=1.23.0-0" $kubeTargetVersion) }}_slo{{ end }}_duration_seconds_bucket{job="apiserver",verb=~"LIST|GET",subresource!~"proxy|attach|log|exec|portforward",scope="namespace",le="5"}[3d])) + + + sum by (cluster) (rate(apiserver_request{{ if (semverCompare ">=1.23.0-0" $kubeTargetVersion) }}_slo{{ end }}_duration_seconds_bucket{job="apiserver",verb=~"LIST|GET",subresource!~"proxy|attach|log|exec|portforward",scope="cluster",le="30"}[3d])) + ) + ) + + + # errors + sum by ({{ range $.Values.defaultRules.additionalAggregationLabels }}{{ . }},{{ end }}cluster) (rate(apiserver_request_total{job="apiserver",verb=~"LIST|GET",code=~"5.."}[3d])) + ) + / + sum by ({{ range $.Values.defaultRules.additionalAggregationLabels }}{{ . }},{{ end }}cluster) (rate(apiserver_request_total{job="apiserver",verb=~"LIST|GET"}[3d])) + labels: + verb: read + {{- if or .Values.defaultRules.additionalRuleLabels .Values.defaultRules.additionalRuleGroupLabels.kubeApiserverBurnrate }} + {{- with .Values.defaultRules.additionalRuleLabels }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.defaultRules.additionalRuleGroupLabels.kubeApiserverBurnrate }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- end }} + record: apiserver_request:burnrate3d + - expr: |- + ( + ( + # too slow + sum by (cluster) (rate(apiserver_request{{ if (semverCompare ">=1.23.0-0" $kubeTargetVersion) }}_slo{{ end }}_duration_seconds_count{job="apiserver",verb=~"LIST|GET",subresource!~"proxy|attach|log|exec|portforward"}[5m])) + - + ( + ( + sum by (cluster) (rate(apiserver_request{{ if (semverCompare ">=1.23.0-0" $kubeTargetVersion) }}_slo{{ end }}_duration_seconds_bucket{job="apiserver",verb=~"LIST|GET",subresource!~"proxy|attach|log|exec|portforward",scope=~"resource|",le="1"}[5m])) + or + vector(0) + ) + + + sum by (cluster) (rate(apiserver_request{{ if (semverCompare ">=1.23.0-0" $kubeTargetVersion) }}_slo{{ end }}_duration_seconds_bucket{job="apiserver",verb=~"LIST|GET",subresource!~"proxy|attach|log|exec|portforward",scope="namespace",le="5"}[5m])) + + + sum by (cluster) (rate(apiserver_request{{ if (semverCompare ">=1.23.0-0" $kubeTargetVersion) }}_slo{{ end }}_duration_seconds_bucket{job="apiserver",verb=~"LIST|GET",subresource!~"proxy|attach|log|exec|portforward",scope="cluster",le="30"}[5m])) + ) + ) + + + # errors + sum by ({{ range $.Values.defaultRules.additionalAggregationLabels }}{{ . }},{{ end }}cluster) (rate(apiserver_request_total{job="apiserver",verb=~"LIST|GET",code=~"5.."}[5m])) + ) + / + sum by ({{ range $.Values.defaultRules.additionalAggregationLabels }}{{ . }},{{ end }}cluster) (rate(apiserver_request_total{job="apiserver",verb=~"LIST|GET"}[5m])) + labels: + verb: read + {{- if or .Values.defaultRules.additionalRuleLabels .Values.defaultRules.additionalRuleGroupLabels.kubeApiserverBurnrate }} + {{- with .Values.defaultRules.additionalRuleLabels }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.defaultRules.additionalRuleGroupLabels.kubeApiserverBurnrate }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- end }} + record: apiserver_request:burnrate5m + - expr: |- + ( + ( + # too slow + sum by (cluster) (rate(apiserver_request{{ if (semverCompare ">=1.23.0-0" $kubeTargetVersion) }}_slo{{ end }}_duration_seconds_count{job="apiserver",verb=~"LIST|GET",subresource!~"proxy|attach|log|exec|portforward"}[6h])) + - + ( + ( + sum by (cluster) (rate(apiserver_request{{ if (semverCompare ">=1.23.0-0" $kubeTargetVersion) }}_slo{{ end }}_duration_seconds_bucket{job="apiserver",verb=~"LIST|GET",subresource!~"proxy|attach|log|exec|portforward",scope=~"resource|",le="1"}[6h])) + or + vector(0) + ) + + + sum by (cluster) (rate(apiserver_request{{ if (semverCompare ">=1.23.0-0" $kubeTargetVersion) }}_slo{{ end }}_duration_seconds_bucket{job="apiserver",verb=~"LIST|GET",subresource!~"proxy|attach|log|exec|portforward",scope="namespace",le="5"}[6h])) + + + sum by (cluster) (rate(apiserver_request{{ if (semverCompare ">=1.23.0-0" $kubeTargetVersion) }}_slo{{ end }}_duration_seconds_bucket{job="apiserver",verb=~"LIST|GET",subresource!~"proxy|attach|log|exec|portforward",scope="cluster",le="30"}[6h])) + ) + ) + + + # errors + sum by ({{ range $.Values.defaultRules.additionalAggregationLabels }}{{ . }},{{ end }}cluster) (rate(apiserver_request_total{job="apiserver",verb=~"LIST|GET",code=~"5.."}[6h])) + ) + / + sum by ({{ range $.Values.defaultRules.additionalAggregationLabels }}{{ . }},{{ end }}cluster) (rate(apiserver_request_total{job="apiserver",verb=~"LIST|GET"}[6h])) + labels: + verb: read + {{- if or .Values.defaultRules.additionalRuleLabels .Values.defaultRules.additionalRuleGroupLabels.kubeApiserverBurnrate }} + {{- with .Values.defaultRules.additionalRuleLabels }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.defaultRules.additionalRuleGroupLabels.kubeApiserverBurnrate }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- end }} + record: apiserver_request:burnrate6h + - expr: |- + ( + ( + # too slow + sum by (cluster) (rate(apiserver_request{{ if (semverCompare ">=1.23.0-0" $kubeTargetVersion) }}_slo{{ end }}_duration_seconds_count{job="apiserver",verb=~"POST|PUT|PATCH|DELETE",subresource!~"proxy|attach|log|exec|portforward"}[1d])) + - + sum by (cluster) (rate(apiserver_request{{ if (semverCompare ">=1.23.0-0" $kubeTargetVersion) }}_slo{{ end }}_duration_seconds_bucket{job="apiserver",verb=~"POST|PUT|PATCH|DELETE",subresource!~"proxy|attach|log|exec|portforward",le="1"}[1d])) + ) + + + sum by ({{ range $.Values.defaultRules.additionalAggregationLabels }}{{ . }},{{ end }}cluster) (rate(apiserver_request_total{job="apiserver",verb=~"POST|PUT|PATCH|DELETE",code=~"5.."}[1d])) + ) + / + sum by ({{ range $.Values.defaultRules.additionalAggregationLabels }}{{ . }},{{ end }}cluster) (rate(apiserver_request_total{job="apiserver",verb=~"POST|PUT|PATCH|DELETE"}[1d])) + labels: + verb: write + {{- if or .Values.defaultRules.additionalRuleLabels .Values.defaultRules.additionalRuleGroupLabels.kubeApiserverBurnrate }} + {{- with .Values.defaultRules.additionalRuleLabels }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.defaultRules.additionalRuleGroupLabels.kubeApiserverBurnrate }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- end }} + record: apiserver_request:burnrate1d + - expr: |- + ( + ( + # too slow + sum by (cluster) (rate(apiserver_request{{ if (semverCompare ">=1.23.0-0" $kubeTargetVersion) }}_slo{{ end }}_duration_seconds_count{job="apiserver",verb=~"POST|PUT|PATCH|DELETE",subresource!~"proxy|attach|log|exec|portforward"}[1h])) + - + sum by (cluster) (rate(apiserver_request{{ if (semverCompare ">=1.23.0-0" $kubeTargetVersion) }}_slo{{ end }}_duration_seconds_bucket{job="apiserver",verb=~"POST|PUT|PATCH|DELETE",subresource!~"proxy|attach|log|exec|portforward",le="1"}[1h])) + ) + + + sum by ({{ range $.Values.defaultRules.additionalAggregationLabels }}{{ . }},{{ end }}cluster) (rate(apiserver_request_total{job="apiserver",verb=~"POST|PUT|PATCH|DELETE",code=~"5.."}[1h])) + ) + / + sum by ({{ range $.Values.defaultRules.additionalAggregationLabels }}{{ . }},{{ end }}cluster) (rate(apiserver_request_total{job="apiserver",verb=~"POST|PUT|PATCH|DELETE"}[1h])) + labels: + verb: write + {{- if or .Values.defaultRules.additionalRuleLabels .Values.defaultRules.additionalRuleGroupLabels.kubeApiserverBurnrate }} + {{- with .Values.defaultRules.additionalRuleLabels }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.defaultRules.additionalRuleGroupLabels.kubeApiserverBurnrate }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- end }} + record: apiserver_request:burnrate1h + - expr: |- + ( + ( + # too slow + sum by (cluster) (rate(apiserver_request{{ if (semverCompare ">=1.23.0-0" $kubeTargetVersion) }}_slo{{ end }}_duration_seconds_count{job="apiserver",verb=~"POST|PUT|PATCH|DELETE",subresource!~"proxy|attach|log|exec|portforward"}[2h])) + - + sum by (cluster) (rate(apiserver_request{{ if (semverCompare ">=1.23.0-0" $kubeTargetVersion) }}_slo{{ end }}_duration_seconds_bucket{job="apiserver",verb=~"POST|PUT|PATCH|DELETE",subresource!~"proxy|attach|log|exec|portforward",le="1"}[2h])) + ) + + + sum by ({{ range $.Values.defaultRules.additionalAggregationLabels }}{{ . }},{{ end }}cluster) (rate(apiserver_request_total{job="apiserver",verb=~"POST|PUT|PATCH|DELETE",code=~"5.."}[2h])) + ) + / + sum by ({{ range $.Values.defaultRules.additionalAggregationLabels }}{{ . }},{{ end }}cluster) (rate(apiserver_request_total{job="apiserver",verb=~"POST|PUT|PATCH|DELETE"}[2h])) + labels: + verb: write + {{- if or .Values.defaultRules.additionalRuleLabels .Values.defaultRules.additionalRuleGroupLabels.kubeApiserverBurnrate }} + {{- with .Values.defaultRules.additionalRuleLabels }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.defaultRules.additionalRuleGroupLabels.kubeApiserverBurnrate }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- end }} + record: apiserver_request:burnrate2h + - expr: |- + ( + ( + # too slow + sum by (cluster) (rate(apiserver_request{{ if (semverCompare ">=1.23.0-0" $kubeTargetVersion) }}_slo{{ end }}_duration_seconds_count{job="apiserver",verb=~"POST|PUT|PATCH|DELETE",subresource!~"proxy|attach|log|exec|portforward"}[30m])) + - + sum by (cluster) (rate(apiserver_request{{ if (semverCompare ">=1.23.0-0" $kubeTargetVersion) }}_slo{{ end }}_duration_seconds_bucket{job="apiserver",verb=~"POST|PUT|PATCH|DELETE",subresource!~"proxy|attach|log|exec|portforward",le="1"}[30m])) + ) + + + sum by ({{ range $.Values.defaultRules.additionalAggregationLabels }}{{ . }},{{ end }}cluster) (rate(apiserver_request_total{job="apiserver",verb=~"POST|PUT|PATCH|DELETE",code=~"5.."}[30m])) + ) + / + sum by ({{ range $.Values.defaultRules.additionalAggregationLabels }}{{ . }},{{ end }}cluster) (rate(apiserver_request_total{job="apiserver",verb=~"POST|PUT|PATCH|DELETE"}[30m])) + labels: + verb: write + {{- if or .Values.defaultRules.additionalRuleLabels .Values.defaultRules.additionalRuleGroupLabels.kubeApiserverBurnrate }} + {{- with .Values.defaultRules.additionalRuleLabels }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.defaultRules.additionalRuleGroupLabels.kubeApiserverBurnrate }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- end }} + record: apiserver_request:burnrate30m + - expr: |- + ( + ( + # too slow + sum by (cluster) (rate(apiserver_request{{ if (semverCompare ">=1.23.0-0" $kubeTargetVersion) }}_slo{{ end }}_duration_seconds_count{job="apiserver",verb=~"POST|PUT|PATCH|DELETE",subresource!~"proxy|attach|log|exec|portforward"}[3d])) + - + sum by (cluster) (rate(apiserver_request{{ if (semverCompare ">=1.23.0-0" $kubeTargetVersion) }}_slo{{ end }}_duration_seconds_bucket{job="apiserver",verb=~"POST|PUT|PATCH|DELETE",subresource!~"proxy|attach|log|exec|portforward",le="1"}[3d])) + ) + + + sum by ({{ range $.Values.defaultRules.additionalAggregationLabels }}{{ . }},{{ end }}cluster) (rate(apiserver_request_total{job="apiserver",verb=~"POST|PUT|PATCH|DELETE",code=~"5.."}[3d])) + ) + / + sum by ({{ range $.Values.defaultRules.additionalAggregationLabels }}{{ . }},{{ end }}cluster) (rate(apiserver_request_total{job="apiserver",verb=~"POST|PUT|PATCH|DELETE"}[3d])) + labels: + verb: write + {{- if or .Values.defaultRules.additionalRuleLabels .Values.defaultRules.additionalRuleGroupLabels.kubeApiserverBurnrate }} + {{- with .Values.defaultRules.additionalRuleLabels }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.defaultRules.additionalRuleGroupLabels.kubeApiserverBurnrate }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- end }} + record: apiserver_request:burnrate3d + - expr: |- + ( + ( + # too slow + sum by (cluster) (rate(apiserver_request{{ if (semverCompare ">=1.23.0-0" $kubeTargetVersion) }}_slo{{ end }}_duration_seconds_count{job="apiserver",verb=~"POST|PUT|PATCH|DELETE",subresource!~"proxy|attach|log|exec|portforward"}[5m])) + - + sum by (cluster) (rate(apiserver_request{{ if (semverCompare ">=1.23.0-0" $kubeTargetVersion) }}_slo{{ end }}_duration_seconds_bucket{job="apiserver",verb=~"POST|PUT|PATCH|DELETE",subresource!~"proxy|attach|log|exec|portforward",le="1"}[5m])) + ) + + + sum by ({{ range $.Values.defaultRules.additionalAggregationLabels }}{{ . }},{{ end }}cluster) (rate(apiserver_request_total{job="apiserver",verb=~"POST|PUT|PATCH|DELETE",code=~"5.."}[5m])) + ) + / + sum by ({{ range $.Values.defaultRules.additionalAggregationLabels }}{{ . }},{{ end }}cluster) (rate(apiserver_request_total{job="apiserver",verb=~"POST|PUT|PATCH|DELETE"}[5m])) + labels: + verb: write + {{- if or .Values.defaultRules.additionalRuleLabels .Values.defaultRules.additionalRuleGroupLabels.kubeApiserverBurnrate }} + {{- with .Values.defaultRules.additionalRuleLabels }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.defaultRules.additionalRuleGroupLabels.kubeApiserverBurnrate }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- end }} + record: apiserver_request:burnrate5m + - expr: |- + ( + ( + # too slow + sum by (cluster) (rate(apiserver_request{{ if (semverCompare ">=1.23.0-0" $kubeTargetVersion) }}_slo{{ end }}_duration_seconds_count{job="apiserver",verb=~"POST|PUT|PATCH|DELETE",subresource!~"proxy|attach|log|exec|portforward"}[6h])) + - + sum by (cluster) (rate(apiserver_request{{ if (semverCompare ">=1.23.0-0" $kubeTargetVersion) }}_slo{{ end }}_duration_seconds_bucket{job="apiserver",verb=~"POST|PUT|PATCH|DELETE",subresource!~"proxy|attach|log|exec|portforward",le="1"}[6h])) + ) + + + sum by ({{ range $.Values.defaultRules.additionalAggregationLabels }}{{ . }},{{ end }}cluster) (rate(apiserver_request_total{job="apiserver",verb=~"POST|PUT|PATCH|DELETE",code=~"5.."}[6h])) + ) + / + sum by ({{ range $.Values.defaultRules.additionalAggregationLabels }}{{ . }},{{ end }}cluster) (rate(apiserver_request_total{job="apiserver",verb=~"POST|PUT|PATCH|DELETE"}[6h])) + labels: + verb: write + {{- if or .Values.defaultRules.additionalRuleLabels .Values.defaultRules.additionalRuleGroupLabels.kubeApiserverBurnrate }} + {{- with .Values.defaultRules.additionalRuleLabels }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.defaultRules.additionalRuleGroupLabels.kubeApiserverBurnrate }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- end }} + record: apiserver_request:burnrate6h +{{- end }} \ No newline at end of file diff --git a/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/templates/prometheus/rules-1.14/kube-apiserver-histogram.rules.yaml b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/templates/prometheus/rules-1.14/kube-apiserver-histogram.rules.yaml new file mode 100644 index 0000000000..d145341952 --- /dev/null +++ b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/templates/prometheus/rules-1.14/kube-apiserver-histogram.rules.yaml @@ -0,0 +1,53 @@ +{{- /* +Generated from 'kube-apiserver-histogram.rules' group from https://raw.githubusercontent.com/prometheus-operator/kube-prometheus/a8ba97a150c75be42010c75d10b720c55e182f1a/manifests/kubernetesControlPlane-prometheusRule.yaml +Do not change in-place! In order to change this file first read following link: +https://github.com/prometheus-community/helm-charts/tree/main/charts/kube-prometheus-stack/hack +*/ -}} +{{- $kubeTargetVersion := default .Capabilities.KubeVersion.GitVersion .Values.kubeTargetVersionOverride }} +{{- if and (semverCompare ">=1.14.0-0" $kubeTargetVersion) (semverCompare "<9.9.9-9" $kubeTargetVersion) .Values.defaultRules.create .Values.kubeApiServer.enabled .Values.defaultRules.rules.kubeApiserverHistogram }} +apiVersion: monitoring.coreos.com/v1 +kind: PrometheusRule +metadata: + name: {{ printf "%s-%s" (include "kube-prometheus-stack.fullname" .) "kube-apiserver-histogram.rules" | trunc 63 | trimSuffix "-" }} + namespace: {{ template "kube-prometheus-stack.namespace" . }} + labels: + app: {{ template "kube-prometheus-stack.name" . }} +{{ include "kube-prometheus-stack.labels" . | indent 4 }} +{{- if .Values.defaultRules.labels }} +{{ toYaml .Values.defaultRules.labels | indent 4 }} +{{- end }} +{{- if .Values.defaultRules.annotations }} + annotations: +{{ toYaml .Values.defaultRules.annotations | indent 4 }} +{{- end }} +spec: + groups: + - name: kube-apiserver-histogram.rules + rules: + - expr: histogram_quantile(0.99, sum by (cluster, le, resource) (rate(apiserver_request{{ if (semverCompare ">=1.23.0-0" $kubeTargetVersion) }}_slo{{ end }}_duration_seconds_bucket{job="apiserver",verb=~"LIST|GET",subresource!~"proxy|attach|log|exec|portforward"}[5m]))) > 0 + labels: + quantile: '0.99' + verb: read + {{- if or .Values.defaultRules.additionalRuleLabels .Values.defaultRules.additionalRuleGroupLabels.kubeApiserverHistogram }} + {{- with .Values.defaultRules.additionalRuleLabels }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.defaultRules.additionalRuleGroupLabels.kubeApiserverHistogram }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- end }} + record: cluster_quantile:apiserver_request_sli_duration_seconds:histogram_quantile + - expr: histogram_quantile(0.99, sum by ({{ range $.Values.defaultRules.additionalAggregationLabels }}{{ . }},{{ end }}cluster, le, resource) (rate(apiserver_request_sli_duration_seconds_bucket{job="apiserver",verb=~"POST|PUT|PATCH|DELETE",subresource!~"proxy|attach|log|exec|portforward"}[5m]))) > 0 + labels: + quantile: '0.99' + verb: write + {{- if or .Values.defaultRules.additionalRuleLabels .Values.defaultRules.additionalRuleGroupLabels.kubeApiserverHistogram }} + {{- with .Values.defaultRules.additionalRuleLabels }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.defaultRules.additionalRuleGroupLabels.kubeApiserverHistogram }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- end }} + record: cluster_quantile:apiserver_request_sli_duration_seconds:histogram_quantile +{{- end }} \ No newline at end of file diff --git a/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/templates/prometheus/rules-1.14/kube-apiserver-slos.yaml b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/templates/prometheus/rules-1.14/kube-apiserver-slos.yaml new file mode 100644 index 0000000000..30ef9a4293 --- /dev/null +++ b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/templates/prometheus/rules-1.14/kube-apiserver-slos.yaml @@ -0,0 +1,159 @@ +{{- /* +Generated from 'kube-apiserver-slos' group from https://raw.githubusercontent.com/prometheus-operator/kube-prometheus/a8ba97a150c75be42010c75d10b720c55e182f1a/manifests/kubernetesControlPlane-prometheusRule.yaml +Do not change in-place! In order to change this file first read following link: +https://github.com/prometheus-community/helm-charts/tree/main/charts/kube-prometheus-stack/hack +*/ -}} +{{- $kubeTargetVersion := default .Capabilities.KubeVersion.GitVersion .Values.kubeTargetVersionOverride }} +{{- if and (semverCompare ">=1.14.0-0" $kubeTargetVersion) (semverCompare "<9.9.9-9" $kubeTargetVersion) .Values.defaultRules.create .Values.kubeApiServer.enabled .Values.defaultRules.rules.kubeApiserverSlos }} +apiVersion: monitoring.coreos.com/v1 +kind: PrometheusRule +metadata: + name: {{ printf "%s-%s" (include "kube-prometheus-stack.fullname" .) "kube-apiserver-slos" | trunc 63 | trimSuffix "-" }} + namespace: {{ template "kube-prometheus-stack.namespace" . }} + labels: + app: {{ template "kube-prometheus-stack.name" . }} +{{ include "kube-prometheus-stack.labels" . | indent 4 }} +{{- if .Values.defaultRules.labels }} +{{ toYaml .Values.defaultRules.labels | indent 4 }} +{{- end }} +{{- if .Values.defaultRules.annotations }} + annotations: +{{ toYaml .Values.defaultRules.annotations | indent 4 }} +{{- end }} +spec: + groups: + - name: kube-apiserver-slos + rules: +{{- if not (.Values.defaultRules.disabled.KubeAPIErrorBudgetBurn | default false) }} + - alert: KubeAPIErrorBudgetBurn + annotations: +{{- if .Values.defaultRules.additionalRuleAnnotations }} +{{ toYaml .Values.defaultRules.additionalRuleAnnotations | indent 8 }} +{{- end }} +{{- if .Values.defaultRules.additionalRuleGroupAnnotations.kubeApiserverSlos }} +{{ toYaml .Values.defaultRules.additionalRuleGroupAnnotations.kubeApiserverSlos | indent 8 }} +{{- end }} + description: The API server is burning too much error budget. + runbook_url: {{ .Values.defaultRules.runbookUrl }}/kubernetes/kubeapierrorbudgetburn + summary: The API server is burning too much error budget. + expr: |- + sum(apiserver_request:burnrate1h) > (14.40 * 0.01000) + and + sum(apiserver_request:burnrate5m) > (14.40 * 0.01000) + for: {{ dig "KubeAPIErrorBudgetBurn" "for" "2m" .Values.customRules }} + {{- with .Values.defaultRules.keepFiringFor }} + keep_firing_for: "{{ . }}" + {{- end }} + labels: + long: 1h + severity: {{ dig "KubeAPIErrorBudgetBurn" "severity" "critical" .Values.customRules }} + short: 5m + {{- if or .Values.defaultRules.additionalRuleLabels .Values.defaultRules.additionalRuleGroupLabels.kubeApiserverSlos }} + {{- with .Values.defaultRules.additionalRuleLabels }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.defaultRules.additionalRuleGroupLabels.kubeApiserverSlos }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- end }} +{{- end }} +{{- if not (.Values.defaultRules.disabled.KubeAPIErrorBudgetBurn | default false) }} + - alert: KubeAPIErrorBudgetBurn + annotations: +{{- if .Values.defaultRules.additionalRuleAnnotations }} +{{ toYaml .Values.defaultRules.additionalRuleAnnotations | indent 8 }} +{{- end }} +{{- if .Values.defaultRules.additionalRuleGroupAnnotations.kubeApiserverSlos }} +{{ toYaml .Values.defaultRules.additionalRuleGroupAnnotations.kubeApiserverSlos | indent 8 }} +{{- end }} + description: The API server is burning too much error budget. + runbook_url: {{ .Values.defaultRules.runbookUrl }}/kubernetes/kubeapierrorbudgetburn + summary: The API server is burning too much error budget. + expr: |- + sum(apiserver_request:burnrate6h) > (6.00 * 0.01000) + and + sum(apiserver_request:burnrate30m) > (6.00 * 0.01000) + for: {{ dig "KubeAPIErrorBudgetBurn" "for" "15m" .Values.customRules }} + {{- with .Values.defaultRules.keepFiringFor }} + keep_firing_for: "{{ . }}" + {{- end }} + labels: + long: 6h + severity: {{ dig "KubeAPIErrorBudgetBurn" "severity" "critical" .Values.customRules }} + short: 30m + {{- if or .Values.defaultRules.additionalRuleLabels .Values.defaultRules.additionalRuleGroupLabels.kubeApiserverSlos }} + {{- with .Values.defaultRules.additionalRuleLabels }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.defaultRules.additionalRuleGroupLabels.kubeApiserverSlos }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- end }} +{{- end }} +{{- if not (.Values.defaultRules.disabled.KubeAPIErrorBudgetBurn | default false) }} + - alert: KubeAPIErrorBudgetBurn + annotations: +{{- if .Values.defaultRules.additionalRuleAnnotations }} +{{ toYaml .Values.defaultRules.additionalRuleAnnotations | indent 8 }} +{{- end }} +{{- if .Values.defaultRules.additionalRuleGroupAnnotations.kubeApiserverSlos }} +{{ toYaml .Values.defaultRules.additionalRuleGroupAnnotations.kubeApiserverSlos | indent 8 }} +{{- end }} + description: The API server is burning too much error budget. + runbook_url: {{ .Values.defaultRules.runbookUrl }}/kubernetes/kubeapierrorbudgetburn + summary: The API server is burning too much error budget. + expr: |- + sum(apiserver_request:burnrate1d) > (3.00 * 0.01000) + and + sum(apiserver_request:burnrate2h) > (3.00 * 0.01000) + for: {{ dig "KubeAPIErrorBudgetBurn" "for" "1h" .Values.customRules }} + {{- with .Values.defaultRules.keepFiringFor }} + keep_firing_for: "{{ . }}" + {{- end }} + labels: + long: 1d + severity: {{ dig "KubeAPIErrorBudgetBurn" "severity" "warning" .Values.customRules }} + short: 2h + {{- if or .Values.defaultRules.additionalRuleLabels .Values.defaultRules.additionalRuleGroupLabels.kubeApiserverSlos }} + {{- with .Values.defaultRules.additionalRuleLabels }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.defaultRules.additionalRuleGroupLabels.kubeApiserverSlos }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- end }} +{{- end }} +{{- if not (.Values.defaultRules.disabled.KubeAPIErrorBudgetBurn | default false) }} + - alert: KubeAPIErrorBudgetBurn + annotations: +{{- if .Values.defaultRules.additionalRuleAnnotations }} +{{ toYaml .Values.defaultRules.additionalRuleAnnotations | indent 8 }} +{{- end }} +{{- if .Values.defaultRules.additionalRuleGroupAnnotations.kubeApiserverSlos }} +{{ toYaml .Values.defaultRules.additionalRuleGroupAnnotations.kubeApiserverSlos | indent 8 }} +{{- end }} + description: The API server is burning too much error budget. + runbook_url: {{ .Values.defaultRules.runbookUrl }}/kubernetes/kubeapierrorbudgetburn + summary: The API server is burning too much error budget. + expr: |- + sum(apiserver_request:burnrate3d) > (1.00 * 0.01000) + and + sum(apiserver_request:burnrate6h) > (1.00 * 0.01000) + for: {{ dig "KubeAPIErrorBudgetBurn" "for" "3h" .Values.customRules }} + {{- with .Values.defaultRules.keepFiringFor }} + keep_firing_for: "{{ . }}" + {{- end }} + labels: + long: 3d + severity: {{ dig "KubeAPIErrorBudgetBurn" "severity" "warning" .Values.customRules }} + short: 6h + {{- if or .Values.defaultRules.additionalRuleLabels .Values.defaultRules.additionalRuleGroupLabels.kubeApiserverSlos }} + {{- with .Values.defaultRules.additionalRuleLabels }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.defaultRules.additionalRuleGroupLabels.kubeApiserverSlos }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- end }} +{{- end }} +{{- end }} \ No newline at end of file diff --git a/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/templates/prometheus/rules-1.14/kube-prometheus-general.rules.yaml b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/templates/prometheus/rules-1.14/kube-prometheus-general.rules.yaml new file mode 100644 index 0000000000..fcf35f389b --- /dev/null +++ b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/templates/prometheus/rules-1.14/kube-prometheus-general.rules.yaml @@ -0,0 +1,49 @@ +{{- /* +Generated from 'kube-prometheus-general.rules' group from https://raw.githubusercontent.com/prometheus-operator/kube-prometheus/a8ba97a150c75be42010c75d10b720c55e182f1a/manifests/kubePrometheus-prometheusRule.yaml +Do not change in-place! In order to change this file first read following link: +https://github.com/prometheus-community/helm-charts/tree/main/charts/kube-prometheus-stack/hack +*/ -}} +{{- $kubeTargetVersion := default .Capabilities.KubeVersion.GitVersion .Values.kubeTargetVersionOverride }} +{{- if and (semverCompare ">=1.14.0-0" $kubeTargetVersion) (semverCompare "<9.9.9-9" $kubeTargetVersion) .Values.defaultRules.create .Values.defaultRules.rules.kubePrometheusGeneral }} +apiVersion: monitoring.coreos.com/v1 +kind: PrometheusRule +metadata: + name: {{ printf "%s-%s" (include "kube-prometheus-stack.fullname" .) "kube-prometheus-general.rules" | trunc 63 | trimSuffix "-" }} + namespace: {{ template "kube-prometheus-stack.namespace" . }} + labels: + app: {{ template "kube-prometheus-stack.name" . }} +{{ include "kube-prometheus-stack.labels" . | indent 4 }} +{{- if .Values.defaultRules.labels }} +{{ toYaml .Values.defaultRules.labels | indent 4 }} +{{- end }} +{{- if .Values.defaultRules.annotations }} + annotations: +{{ toYaml .Values.defaultRules.annotations | indent 4 }} +{{- end }} +spec: + groups: + - name: kube-prometheus-general.rules + rules: + - expr: count without(instance, pod, node) (up == 1) + record: count:up1 + {{- if or .Values.defaultRules.additionalRuleLabels .Values.defaultRules.additionalRuleGroupLabels.kubePrometheusGeneral }} + labels: + {{- with .Values.defaultRules.additionalRuleLabels }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.defaultRules.additionalRuleGroupLabels.kubePrometheusGeneral }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- end }} + - expr: count without(instance, pod, node) (up == 0) + record: count:up0 + {{- if or .Values.defaultRules.additionalRuleLabels .Values.defaultRules.additionalRuleGroupLabels.kubePrometheusGeneral }} + labels: + {{- with .Values.defaultRules.additionalRuleLabels }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.defaultRules.additionalRuleGroupLabels.kubePrometheusGeneral }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- end }} +{{- end }} \ No newline at end of file diff --git a/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/templates/prometheus/rules-1.14/kube-prometheus-node-recording.rules.yaml b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/templates/prometheus/rules-1.14/kube-prometheus-node-recording.rules.yaml new file mode 100644 index 0000000000..7a0d202324 --- /dev/null +++ b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/templates/prometheus/rules-1.14/kube-prometheus-node-recording.rules.yaml @@ -0,0 +1,93 @@ +{{- /* +Generated from 'kube-prometheus-node-recording.rules' group from https://raw.githubusercontent.com/prometheus-operator/kube-prometheus/a8ba97a150c75be42010c75d10b720c55e182f1a/manifests/kubePrometheus-prometheusRule.yaml +Do not change in-place! In order to change this file first read following link: +https://github.com/prometheus-community/helm-charts/tree/main/charts/kube-prometheus-stack/hack +*/ -}} +{{- $kubeTargetVersion := default .Capabilities.KubeVersion.GitVersion .Values.kubeTargetVersionOverride }} +{{- if and (semverCompare ">=1.14.0-0" $kubeTargetVersion) (semverCompare "<9.9.9-9" $kubeTargetVersion) .Values.defaultRules.create .Values.defaultRules.rules.kubePrometheusNodeRecording }} +apiVersion: monitoring.coreos.com/v1 +kind: PrometheusRule +metadata: + name: {{ printf "%s-%s" (include "kube-prometheus-stack.fullname" .) "kube-prometheus-node-recording.rules" | trunc 63 | trimSuffix "-" }} + namespace: {{ template "kube-prometheus-stack.namespace" . }} + labels: + app: {{ template "kube-prometheus-stack.name" . }} +{{ include "kube-prometheus-stack.labels" . | indent 4 }} +{{- if .Values.defaultRules.labels }} +{{ toYaml .Values.defaultRules.labels | indent 4 }} +{{- end }} +{{- if .Values.defaultRules.annotations }} + annotations: +{{ toYaml .Values.defaultRules.annotations | indent 4 }} +{{- end }} +spec: + groups: + - name: kube-prometheus-node-recording.rules + rules: + - expr: sum(rate(node_cpu_seconds_total{mode!="idle",mode!="iowait",mode!="steal"}[3m])) BY (instance) + record: instance:node_cpu:rate:sum + {{- if or .Values.defaultRules.additionalRuleLabels .Values.defaultRules.additionalRuleGroupLabels.kubePrometheusNodeRecording }} + labels: + {{- with .Values.defaultRules.additionalRuleLabels }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.defaultRules.additionalRuleGroupLabels.kubePrometheusNodeRecording }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- end }} + - expr: sum(rate(node_network_receive_bytes_total[3m])) BY (instance) + record: instance:node_network_receive_bytes:rate:sum + {{- if or .Values.defaultRules.additionalRuleLabels .Values.defaultRules.additionalRuleGroupLabels.kubePrometheusNodeRecording }} + labels: + {{- with .Values.defaultRules.additionalRuleLabels }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.defaultRules.additionalRuleGroupLabels.kubePrometheusNodeRecording }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- end }} + - expr: sum(rate(node_network_transmit_bytes_total[3m])) BY (instance) + record: instance:node_network_transmit_bytes:rate:sum + {{- if or .Values.defaultRules.additionalRuleLabels .Values.defaultRules.additionalRuleGroupLabels.kubePrometheusNodeRecording }} + labels: + {{- with .Values.defaultRules.additionalRuleLabels }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.defaultRules.additionalRuleGroupLabels.kubePrometheusNodeRecording }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- end }} + - expr: sum(rate(node_cpu_seconds_total{mode!="idle",mode!="iowait",mode!="steal"}[5m])) WITHOUT (cpu, mode) / ON(instance) GROUP_LEFT() count(sum(node_cpu_seconds_total) BY (instance, cpu)) BY (instance) + record: instance:node_cpu:ratio + {{- if or .Values.defaultRules.additionalRuleLabels .Values.defaultRules.additionalRuleGroupLabels.kubePrometheusNodeRecording }} + labels: + {{- with .Values.defaultRules.additionalRuleLabels }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.defaultRules.additionalRuleGroupLabels.kubePrometheusNodeRecording }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- end }} + - expr: sum(rate(node_cpu_seconds_total{mode!="idle",mode!="iowait",mode!="steal"}[5m])) + record: cluster:node_cpu:sum_rate5m + {{- if or .Values.defaultRules.additionalRuleLabels .Values.defaultRules.additionalRuleGroupLabels.kubePrometheusNodeRecording }} + labels: + {{- with .Values.defaultRules.additionalRuleLabels }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.defaultRules.additionalRuleGroupLabels.kubePrometheusNodeRecording }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- end }} + - expr: cluster:node_cpu:sum_rate5m / count(sum(node_cpu_seconds_total) BY (instance, cpu)) + record: cluster:node_cpu:ratio + {{- if or .Values.defaultRules.additionalRuleLabels .Values.defaultRules.additionalRuleGroupLabels.kubePrometheusNodeRecording }} + labels: + {{- with .Values.defaultRules.additionalRuleLabels }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.defaultRules.additionalRuleGroupLabels.kubePrometheusNodeRecording }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- end }} +{{- end }} \ No newline at end of file diff --git a/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/templates/prometheus/rules-1.14/kube-scheduler.rules.yaml b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/templates/prometheus/rules-1.14/kube-scheduler.rules.yaml new file mode 100644 index 0000000000..c9d61ce37b --- /dev/null +++ b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/templates/prometheus/rules-1.14/kube-scheduler.rules.yaml @@ -0,0 +1,135 @@ +{{- /* +Generated from 'kube-scheduler.rules' group from https://raw.githubusercontent.com/prometheus-operator/kube-prometheus/a8ba97a150c75be42010c75d10b720c55e182f1a/manifests/kubernetesControlPlane-prometheusRule.yaml +Do not change in-place! In order to change this file first read following link: +https://github.com/prometheus-community/helm-charts/tree/main/charts/kube-prometheus-stack/hack +*/ -}} +{{- $kubeTargetVersion := default .Capabilities.KubeVersion.GitVersion .Values.kubeTargetVersionOverride }} +{{- if and (semverCompare ">=1.14.0-0" $kubeTargetVersion) (semverCompare "<9.9.9-9" $kubeTargetVersion) .Values.defaultRules.create .Values.kubeScheduler.enabled .Values.defaultRules.rules.kubeSchedulerRecording }} +apiVersion: monitoring.coreos.com/v1 +kind: PrometheusRule +metadata: + name: {{ printf "%s-%s" (include "kube-prometheus-stack.fullname" .) "kube-scheduler.rules" | trunc 63 | trimSuffix "-" }} + namespace: {{ template "kube-prometheus-stack.namespace" . }} + labels: + app: {{ template "kube-prometheus-stack.name" . }} +{{ include "kube-prometheus-stack.labels" . | indent 4 }} +{{- if .Values.defaultRules.labels }} +{{ toYaml .Values.defaultRules.labels | indent 4 }} +{{- end }} +{{- if .Values.defaultRules.annotations }} + annotations: +{{ toYaml .Values.defaultRules.annotations | indent 4 }} +{{- end }} +spec: + groups: + - name: kube-scheduler.rules + rules: + - expr: histogram_quantile(0.99, sum(rate(scheduler_e2e_scheduling_duration_seconds_bucket{job="{{ include "exporter.kubeScheduler.jobName" . }}"}[5m])) without(instance, pod)) + labels: + quantile: '0.99' + {{- if or .Values.defaultRules.additionalRuleLabels .Values.defaultRules.additionalRuleGroupLabels.kubeSchedulerRecording }} + {{- with .Values.defaultRules.additionalRuleLabels }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.defaultRules.additionalRuleGroupLabels.kubeSchedulerRecording }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- end }} + record: cluster_quantile:scheduler_e2e_scheduling_duration_seconds:histogram_quantile + - expr: histogram_quantile(0.99, sum(rate(scheduler_scheduling_algorithm_duration_seconds_bucket{job="{{ include "exporter.kubeScheduler.jobName" . }}"}[5m])) without(instance, pod)) + labels: + quantile: '0.99' + {{- if or .Values.defaultRules.additionalRuleLabels .Values.defaultRules.additionalRuleGroupLabels.kubeSchedulerRecording }} + {{- with .Values.defaultRules.additionalRuleLabels }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.defaultRules.additionalRuleGroupLabels.kubeSchedulerRecording }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- end }} + record: cluster_quantile:scheduler_scheduling_algorithm_duration_seconds:histogram_quantile + - expr: histogram_quantile(0.99, sum(rate(scheduler_binding_duration_seconds_bucket{job="{{ include "exporter.kubeScheduler.jobName" . }}"}[5m])) without(instance, pod)) + labels: + quantile: '0.99' + {{- if or .Values.defaultRules.additionalRuleLabels .Values.defaultRules.additionalRuleGroupLabels.kubeSchedulerRecording }} + {{- with .Values.defaultRules.additionalRuleLabels }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.defaultRules.additionalRuleGroupLabels.kubeSchedulerRecording }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- end }} + record: cluster_quantile:scheduler_binding_duration_seconds:histogram_quantile + - expr: histogram_quantile(0.9, sum(rate(scheduler_e2e_scheduling_duration_seconds_bucket{job="{{ include "exporter.kubeScheduler.jobName" . }}"}[5m])) without(instance, pod)) + labels: + quantile: '0.9' + {{- if or .Values.defaultRules.additionalRuleLabels .Values.defaultRules.additionalRuleGroupLabels.kubeSchedulerRecording }} + {{- with .Values.defaultRules.additionalRuleLabels }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.defaultRules.additionalRuleGroupLabels.kubeSchedulerRecording }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- end }} + record: cluster_quantile:scheduler_e2e_scheduling_duration_seconds:histogram_quantile + - expr: histogram_quantile(0.9, sum(rate(scheduler_scheduling_algorithm_duration_seconds_bucket{job="{{ include "exporter.kubeScheduler.jobName" . }}"}[5m])) without(instance, pod)) + labels: + quantile: '0.9' + {{- if or .Values.defaultRules.additionalRuleLabels .Values.defaultRules.additionalRuleGroupLabels.kubeSchedulerRecording }} + {{- with .Values.defaultRules.additionalRuleLabels }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.defaultRules.additionalRuleGroupLabels.kubeSchedulerRecording }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- end }} + record: cluster_quantile:scheduler_scheduling_algorithm_duration_seconds:histogram_quantile + - expr: histogram_quantile(0.9, sum(rate(scheduler_binding_duration_seconds_bucket{job="{{ include "exporter.kubeScheduler.jobName" . }}"}[5m])) without(instance, pod)) + labels: + quantile: '0.9' + {{- if or .Values.defaultRules.additionalRuleLabels .Values.defaultRules.additionalRuleGroupLabels.kubeSchedulerRecording }} + {{- with .Values.defaultRules.additionalRuleLabels }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.defaultRules.additionalRuleGroupLabels.kubeSchedulerRecording }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- end }} + record: cluster_quantile:scheduler_binding_duration_seconds:histogram_quantile + - expr: histogram_quantile(0.5, sum(rate(scheduler_e2e_scheduling_duration_seconds_bucket{job="{{ include "exporter.kubeScheduler.jobName" . }}"}[5m])) without(instance, pod)) + labels: + quantile: '0.5' + {{- if or .Values.defaultRules.additionalRuleLabels .Values.defaultRules.additionalRuleGroupLabels.kubeSchedulerRecording }} + {{- with .Values.defaultRules.additionalRuleLabels }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.defaultRules.additionalRuleGroupLabels.kubeSchedulerRecording }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- end }} + record: cluster_quantile:scheduler_e2e_scheduling_duration_seconds:histogram_quantile + - expr: histogram_quantile(0.5, sum(rate(scheduler_scheduling_algorithm_duration_seconds_bucket{job="{{ include "exporter.kubeScheduler.jobName" . }}"}[5m])) without(instance, pod)) + labels: + quantile: '0.5' + {{- if or .Values.defaultRules.additionalRuleLabels .Values.defaultRules.additionalRuleGroupLabels.kubeSchedulerRecording }} + {{- with .Values.defaultRules.additionalRuleLabels }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.defaultRules.additionalRuleGroupLabels.kubeSchedulerRecording }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- end }} + record: cluster_quantile:scheduler_scheduling_algorithm_duration_seconds:histogram_quantile + - expr: histogram_quantile(0.5, sum(rate(scheduler_binding_duration_seconds_bucket{job="{{ include "exporter.kubeScheduler.jobName" . }}"}[5m])) without(instance, pod)) + labels: + quantile: '0.5' + {{- if or .Values.defaultRules.additionalRuleLabels .Values.defaultRules.additionalRuleGroupLabels.kubeSchedulerRecording }} + {{- with .Values.defaultRules.additionalRuleLabels }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.defaultRules.additionalRuleGroupLabels.kubeSchedulerRecording }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- end }} + record: cluster_quantile:scheduler_binding_duration_seconds:histogram_quantile +{{- end }} \ No newline at end of file diff --git a/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/templates/prometheus/rules-1.14/kube-state-metrics.yaml b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/templates/prometheus/rules-1.14/kube-state-metrics.yaml new file mode 100644 index 0000000000..d1ad3cae5e --- /dev/null +++ b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/templates/prometheus/rules-1.14/kube-state-metrics.yaml @@ -0,0 +1,152 @@ +{{- /* +Generated from 'kube-state-metrics' group from https://raw.githubusercontent.com/prometheus-operator/kube-prometheus/a8ba97a150c75be42010c75d10b720c55e182f1a/manifests/kubeStateMetrics-prometheusRule.yaml +Do not change in-place! In order to change this file first read following link: +https://github.com/prometheus-community/helm-charts/tree/main/charts/kube-prometheus-stack/hack +*/ -}} +{{- $kubeTargetVersion := default .Capabilities.KubeVersion.GitVersion .Values.kubeTargetVersionOverride }} +{{- if and (semverCompare ">=1.14.0-0" $kubeTargetVersion) (semverCompare "<9.9.9-9" $kubeTargetVersion) .Values.defaultRules.create .Values.defaultRules.rules.kubeStateMetrics }} +{{- $kubeStateMetricsJob := include "kube-prometheus-stack-kube-state-metrics.name" . }} +apiVersion: monitoring.coreos.com/v1 +kind: PrometheusRule +metadata: + name: {{ printf "%s-%s" (include "kube-prometheus-stack.fullname" .) "kube-state-metrics" | trunc 63 | trimSuffix "-" }} + namespace: {{ template "kube-prometheus-stack.namespace" . }} + labels: + app: {{ template "kube-prometheus-stack.name" . }} +{{ include "kube-prometheus-stack.labels" . | indent 4 }} +{{- if .Values.defaultRules.labels }} +{{ toYaml .Values.defaultRules.labels | indent 4 }} +{{- end }} +{{- if .Values.defaultRules.annotations }} + annotations: +{{ toYaml .Values.defaultRules.annotations | indent 4 }} +{{- end }} +spec: + groups: + - name: kube-state-metrics + rules: +{{- if not (.Values.defaultRules.disabled.KubeStateMetricsListErrors | default false) }} + - alert: KubeStateMetricsListErrors + annotations: +{{- if .Values.defaultRules.additionalRuleAnnotations }} +{{ toYaml .Values.defaultRules.additionalRuleAnnotations | indent 8 }} +{{- end }} +{{- if .Values.defaultRules.additionalRuleGroupAnnotations.kubeStateMetrics }} +{{ toYaml .Values.defaultRules.additionalRuleGroupAnnotations.kubeStateMetrics | indent 8 }} +{{- end }} + description: kube-state-metrics is experiencing errors at an elevated rate in list operations. This is likely causing it to not be able to expose metrics about Kubernetes objects correctly or at all. + runbook_url: {{ .Values.defaultRules.runbookUrl }}/kube-state-metrics/kubestatemetricslisterrors + summary: kube-state-metrics is experiencing errors in list operations. + expr: |- + (sum(rate(kube_state_metrics_list_total{job="{{ $kubeStateMetricsJob }}",result="error"}[5m])) by ({{ range $.Values.defaultRules.additionalAggregationLabels }}{{ . }},{{ end }}cluster) + / + sum(rate(kube_state_metrics_list_total{job="{{ $kubeStateMetricsJob }}"}[5m])) by ({{ range $.Values.defaultRules.additionalAggregationLabels }}{{ . }},{{ end }}cluster)) + > 0.01 + for: {{ dig "KubeStateMetricsListErrors" "for" "15m" .Values.customRules }} + {{- with .Values.defaultRules.keepFiringFor }} + keep_firing_for: "{{ . }}" + {{- end }} + labels: + severity: {{ dig "KubeStateMetricsListErrors" "severity" "critical" .Values.customRules }} + {{- if or .Values.defaultRules.additionalRuleLabels .Values.defaultRules.additionalRuleGroupLabels.kubeStateMetrics }} + {{- with .Values.defaultRules.additionalRuleLabels }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.defaultRules.additionalRuleGroupLabels.kubeStateMetrics }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- end }} +{{- end }} +{{- if not (.Values.defaultRules.disabled.KubeStateMetricsWatchErrors | default false) }} + - alert: KubeStateMetricsWatchErrors + annotations: +{{- if .Values.defaultRules.additionalRuleAnnotations }} +{{ toYaml .Values.defaultRules.additionalRuleAnnotations | indent 8 }} +{{- end }} +{{- if .Values.defaultRules.additionalRuleGroupAnnotations.kubeStateMetrics }} +{{ toYaml .Values.defaultRules.additionalRuleGroupAnnotations.kubeStateMetrics | indent 8 }} +{{- end }} + description: kube-state-metrics is experiencing errors at an elevated rate in watch operations. This is likely causing it to not be able to expose metrics about Kubernetes objects correctly or at all. + runbook_url: {{ .Values.defaultRules.runbookUrl }}/kube-state-metrics/kubestatemetricswatcherrors + summary: kube-state-metrics is experiencing errors in watch operations. + expr: |- + (sum(rate(kube_state_metrics_watch_total{job="{{ $kubeStateMetricsJob }}",result="error"}[5m])) by ({{ range $.Values.defaultRules.additionalAggregationLabels }}{{ . }},{{ end }}cluster) + / + sum(rate(kube_state_metrics_watch_total{job="{{ $kubeStateMetricsJob }}"}[5m])) by ({{ range $.Values.defaultRules.additionalAggregationLabels }}{{ . }},{{ end }}cluster)) + > 0.01 + for: {{ dig "KubeStateMetricsWatchErrors" "for" "15m" .Values.customRules }} + {{- with .Values.defaultRules.keepFiringFor }} + keep_firing_for: "{{ . }}" + {{- end }} + labels: + severity: {{ dig "KubeStateMetricsWatchErrors" "severity" "critical" .Values.customRules }} + {{- if or .Values.defaultRules.additionalRuleLabels .Values.defaultRules.additionalRuleGroupLabels.kubeStateMetrics }} + {{- with .Values.defaultRules.additionalRuleLabels }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.defaultRules.additionalRuleGroupLabels.kubeStateMetrics }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- end }} +{{- end }} +{{- if not (.Values.defaultRules.disabled.KubeStateMetricsShardingMismatch | default false) }} + - alert: KubeStateMetricsShardingMismatch + annotations: +{{- if .Values.defaultRules.additionalRuleAnnotations }} +{{ toYaml .Values.defaultRules.additionalRuleAnnotations | indent 8 }} +{{- end }} +{{- if .Values.defaultRules.additionalRuleGroupAnnotations.kubeStateMetrics }} +{{ toYaml .Values.defaultRules.additionalRuleGroupAnnotations.kubeStateMetrics | indent 8 }} +{{- end }} + description: kube-state-metrics pods are running with different --total-shards configuration, some Kubernetes objects may be exposed multiple times or not exposed at all. + runbook_url: {{ .Values.defaultRules.runbookUrl }}/kube-state-metrics/kubestatemetricsshardingmismatch + summary: kube-state-metrics sharding is misconfigured. + expr: stdvar (kube_state_metrics_total_shards{job="{{ $kubeStateMetricsJob }}"}) by ({{ range $.Values.defaultRules.additionalAggregationLabels }}{{ . }},{{ end }}cluster) != 0 + for: {{ dig "KubeStateMetricsShardingMismatch" "for" "15m" .Values.customRules }} + {{- with .Values.defaultRules.keepFiringFor }} + keep_firing_for: "{{ . }}" + {{- end }} + labels: + severity: {{ dig "KubeStateMetricsShardingMismatch" "severity" "critical" .Values.customRules }} + {{- if or .Values.defaultRules.additionalRuleLabels .Values.defaultRules.additionalRuleGroupLabels.kubeStateMetrics }} + {{- with .Values.defaultRules.additionalRuleLabels }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.defaultRules.additionalRuleGroupLabels.kubeStateMetrics }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- end }} +{{- end }} +{{- if not (.Values.defaultRules.disabled.KubeStateMetricsShardsMissing | default false) }} + - alert: KubeStateMetricsShardsMissing + annotations: +{{- if .Values.defaultRules.additionalRuleAnnotations }} +{{ toYaml .Values.defaultRules.additionalRuleAnnotations | indent 8 }} +{{- end }} +{{- if .Values.defaultRules.additionalRuleGroupAnnotations.kubeStateMetrics }} +{{ toYaml .Values.defaultRules.additionalRuleGroupAnnotations.kubeStateMetrics | indent 8 }} +{{- end }} + description: kube-state-metrics shards are missing, some Kubernetes objects are not being exposed. + runbook_url: {{ .Values.defaultRules.runbookUrl }}/kube-state-metrics/kubestatemetricsshardsmissing + summary: kube-state-metrics shards are missing. + expr: |- + 2^max(kube_state_metrics_total_shards{job="{{ $kubeStateMetricsJob }}"}) by ({{ range $.Values.defaultRules.additionalAggregationLabels }}{{ . }},{{ end }}cluster) - 1 + - + sum( 2 ^ max by ({{ range $.Values.defaultRules.additionalAggregationLabels }}{{ . }},{{ end }}cluster, shard_ordinal) (kube_state_metrics_shard_ordinal{job="{{ $kubeStateMetricsJob }}"}) ) by ({{ range $.Values.defaultRules.additionalAggregationLabels }}{{ . }},{{ end }}cluster) + != 0 + for: {{ dig "KubeStateMetricsShardsMissing" "for" "15m" .Values.customRules }} + {{- with .Values.defaultRules.keepFiringFor }} + keep_firing_for: "{{ . }}" + {{- end }} + labels: + severity: {{ dig "KubeStateMetricsShardsMissing" "severity" "critical" .Values.customRules }} + {{- if or .Values.defaultRules.additionalRuleLabels .Values.defaultRules.additionalRuleGroupLabels.kubeStateMetrics }} + {{- with .Values.defaultRules.additionalRuleLabels }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.defaultRules.additionalRuleGroupLabels.kubeStateMetrics }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- end }} +{{- end }} +{{- end }} \ No newline at end of file diff --git a/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/templates/prometheus/rules-1.14/kubelet.rules.yaml b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/templates/prometheus/rules-1.14/kubelet.rules.yaml new file mode 100644 index 0000000000..39fdddf3fe --- /dev/null +++ b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/templates/prometheus/rules-1.14/kubelet.rules.yaml @@ -0,0 +1,65 @@ +{{- /* +Generated from 'kubelet.rules' group from https://raw.githubusercontent.com/prometheus-operator/kube-prometheus/a8ba97a150c75be42010c75d10b720c55e182f1a/manifests/kubernetesControlPlane-prometheusRule.yaml +Do not change in-place! In order to change this file first read following link: +https://github.com/prometheus-community/helm-charts/tree/main/charts/kube-prometheus-stack/hack +*/ -}} +{{- $kubeTargetVersion := default .Capabilities.KubeVersion.GitVersion .Values.kubeTargetVersionOverride }} +{{- if and (semverCompare ">=1.14.0-0" $kubeTargetVersion) (semverCompare "<9.9.9-9" $kubeTargetVersion) .Values.defaultRules.create .Values.defaultRules.rules.kubelet }} +{{- if (include "exporter.kubelet.enabled" .)}} +apiVersion: monitoring.coreos.com/v1 +kind: PrometheusRule +metadata: + name: {{ printf "%s-%s" (include "kube-prometheus-stack.fullname" .) "kubelet.rules" | trunc 63 | trimSuffix "-" }} + namespace: {{ template "kube-prometheus-stack.namespace" . }} + labels: + app: {{ template "kube-prometheus-stack.name" . }} +{{ include "kube-prometheus-stack.labels" . | indent 4 }} +{{- if .Values.defaultRules.labels }} +{{ toYaml .Values.defaultRules.labels | indent 4 }} +{{- end }} +{{- if .Values.defaultRules.annotations }} + annotations: +{{ toYaml .Values.defaultRules.annotations | indent 4 }} +{{- end }} +spec: + groups: + - name: kubelet.rules + rules: + - expr: histogram_quantile(0.99, sum(rate(kubelet_pleg_relist_duration_seconds_bucket{job="kubelet", metrics_path="/metrics"}[5m])) by ({{ range $.Values.defaultRules.additionalAggregationLabels }}{{ . }},{{ end }}cluster, instance, le) * on ({{ range $.Values.defaultRules.additionalAggregationLabels }}{{ . }},{{ end }}cluster, instance) group_left(node) kubelet_node_name{job="kubelet", metrics_path="/metrics"}) + labels: + quantile: '0.99' + {{- if or .Values.defaultRules.additionalRuleLabels .Values.defaultRules.additionalRuleGroupLabels.kubelet }} + {{- with .Values.defaultRules.additionalRuleLabels }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.defaultRules.additionalRuleGroupLabels.kubelet }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- end }} + record: node_quantile:kubelet_pleg_relist_duration_seconds:histogram_quantile + - expr: histogram_quantile(0.9, sum(rate(kubelet_pleg_relist_duration_seconds_bucket{job="kubelet", metrics_path="/metrics"}[5m])) by ({{ range $.Values.defaultRules.additionalAggregationLabels }}{{ . }},{{ end }}cluster, instance, le) * on ({{ range $.Values.defaultRules.additionalAggregationLabels }}{{ . }},{{ end }}cluster, instance) group_left(node) kubelet_node_name{job="kubelet", metrics_path="/metrics"}) + labels: + quantile: '0.9' + {{- if or .Values.defaultRules.additionalRuleLabels .Values.defaultRules.additionalRuleGroupLabels.kubelet }} + {{- with .Values.defaultRules.additionalRuleLabels }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.defaultRules.additionalRuleGroupLabels.kubelet }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- end }} + record: node_quantile:kubelet_pleg_relist_duration_seconds:histogram_quantile + - expr: histogram_quantile(0.5, sum(rate(kubelet_pleg_relist_duration_seconds_bucket{job="kubelet", metrics_path="/metrics"}[5m])) by ({{ range $.Values.defaultRules.additionalAggregationLabels }}{{ . }},{{ end }}cluster, instance, le) * on ({{ range $.Values.defaultRules.additionalAggregationLabels }}{{ . }},{{ end }}cluster, instance) group_left(node) kubelet_node_name{job="kubelet", metrics_path="/metrics"}) + labels: + quantile: '0.5' + {{- if or .Values.defaultRules.additionalRuleLabels .Values.defaultRules.additionalRuleGroupLabels.kubelet }} + {{- with .Values.defaultRules.additionalRuleLabels }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.defaultRules.additionalRuleGroupLabels.kubelet }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- end }} + record: node_quantile:kubelet_pleg_relist_duration_seconds:histogram_quantile +{{- end }} +{{- end }} \ No newline at end of file diff --git a/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/templates/prometheus/rules-1.14/kubernetes-apps.yaml b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/templates/prometheus/rules-1.14/kubernetes-apps.yaml new file mode 100644 index 0000000000..2a861a522c --- /dev/null +++ b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/templates/prometheus/rules-1.14/kubernetes-apps.yaml @@ -0,0 +1,568 @@ +{{- /* +Generated from 'kubernetes-apps' group from https://raw.githubusercontent.com/prometheus-operator/kube-prometheus/a8ba97a150c75be42010c75d10b720c55e182f1a/manifests/kubernetesControlPlane-prometheusRule.yaml +Do not change in-place! In order to change this file first read following link: +https://github.com/prometheus-community/helm-charts/tree/main/charts/kube-prometheus-stack/hack +*/ -}} +{{- $kubeTargetVersion := default .Capabilities.KubeVersion.GitVersion .Values.kubeTargetVersionOverride }} +{{- if and (semverCompare ">=1.14.0-0" $kubeTargetVersion) (semverCompare "<9.9.9-9" $kubeTargetVersion) .Values.defaultRules.create .Values.defaultRules.rules.kubernetesApps }} +{{- $kubeStateMetricsJob := include "kube-prometheus-stack-kube-state-metrics.name" . }} +{{- $targetNamespace := .Values.defaultRules.appNamespacesTarget }} +apiVersion: monitoring.coreos.com/v1 +kind: PrometheusRule +metadata: + name: {{ printf "%s-%s" (include "kube-prometheus-stack.fullname" .) "kubernetes-apps" | trunc 63 | trimSuffix "-" }} + namespace: {{ template "kube-prometheus-stack.namespace" . }} + labels: + app: {{ template "kube-prometheus-stack.name" . }} +{{ include "kube-prometheus-stack.labels" . | indent 4 }} +{{- if .Values.defaultRules.labels }} +{{ toYaml .Values.defaultRules.labels | indent 4 }} +{{- end }} +{{- if .Values.defaultRules.annotations }} + annotations: +{{ toYaml .Values.defaultRules.annotations | indent 4 }} +{{- end }} +spec: + groups: + - name: kubernetes-apps + rules: +{{- if not (.Values.defaultRules.disabled.KubePodCrashLooping | default false) }} + - alert: KubePodCrashLooping + annotations: +{{- if .Values.defaultRules.additionalRuleAnnotations }} +{{ toYaml .Values.defaultRules.additionalRuleAnnotations | indent 8 }} +{{- end }} +{{- if .Values.defaultRules.additionalRuleGroupAnnotations.kubernetesApps }} +{{ toYaml .Values.defaultRules.additionalRuleGroupAnnotations.kubernetesApps | indent 8 }} +{{- end }} + description: 'Pod {{`{{`}} $labels.namespace {{`}}`}}/{{`{{`}} $labels.pod {{`}}`}} ({{`{{`}} $labels.container {{`}}`}}) is in waiting state (reason: "CrashLoopBackOff").' + runbook_url: {{ .Values.defaultRules.runbookUrl }}/kubernetes/kubepodcrashlooping + summary: Pod is crash looping. + expr: max_over_time(kube_pod_container_status_waiting_reason{reason="CrashLoopBackOff", job="{{ $kubeStateMetricsJob }}", namespace=~"{{ $targetNamespace }}"}[5m]) >= 1 + for: {{ dig "KubePodCrashLooping" "for" "15m" .Values.customRules }} + {{- with .Values.defaultRules.keepFiringFor }} + keep_firing_for: "{{ . }}" + {{- end }} + labels: + severity: {{ dig "KubePodCrashLooping" "severity" "warning" .Values.customRules }} + {{- if or .Values.defaultRules.additionalRuleLabels .Values.defaultRules.additionalRuleGroupLabels.kubernetesApps }} + {{- with .Values.defaultRules.additionalRuleLabels }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.defaultRules.additionalRuleGroupLabels.kubernetesApps }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- end }} +{{- end }} +{{- if not (.Values.defaultRules.disabled.KubePodNotReady | default false) }} + - alert: KubePodNotReady + annotations: +{{- if .Values.defaultRules.additionalRuleAnnotations }} +{{ toYaml .Values.defaultRules.additionalRuleAnnotations | indent 8 }} +{{- end }} +{{- if .Values.defaultRules.additionalRuleGroupAnnotations.kubernetesApps }} +{{ toYaml .Values.defaultRules.additionalRuleGroupAnnotations.kubernetesApps | indent 8 }} +{{- end }} + description: Pod {{`{{`}} $labels.namespace {{`}}`}}/{{`{{`}} $labels.pod {{`}}`}} has been in a non-ready state for longer than 15 minutes. + runbook_url: {{ .Values.defaultRules.runbookUrl }}/kubernetes/kubepodnotready + summary: Pod has been in a non-ready state for more than 15 minutes. + expr: |- + sum by ({{ range $.Values.defaultRules.additionalAggregationLabels }}{{ . }},{{ end }}namespace, pod, cluster) ( + max by ({{ range $.Values.defaultRules.additionalAggregationLabels }}{{ . }},{{ end }}namespace, pod, cluster) ( + kube_pod_status_phase{job="{{ $kubeStateMetricsJob }}", namespace=~"{{ $targetNamespace }}", phase=~"Pending|Unknown|Failed"} + ) * on ({{ range $.Values.defaultRules.additionalAggregationLabels }}{{ . }},{{ end }}namespace, pod, cluster) group_left(owner_kind) topk by ({{ range $.Values.defaultRules.additionalAggregationLabels }}{{ . }},{{ end }}namespace, pod, cluster) ( + 1, max by ({{ range $.Values.defaultRules.additionalAggregationLabels }}{{ . }},{{ end }}namespace, pod, owner_kind, cluster) (kube_pod_owner{owner_kind!="Job"}) + ) + ) > 0 + for: {{ dig "KubePodNotReady" "for" "15m" .Values.customRules }} + {{- with .Values.defaultRules.keepFiringFor }} + keep_firing_for: "{{ . }}" + {{- end }} + labels: + severity: {{ dig "KubePodNotReady" "severity" "warning" .Values.customRules }} + {{- if or .Values.defaultRules.additionalRuleLabels .Values.defaultRules.additionalRuleGroupLabels.kubernetesApps }} + {{- with .Values.defaultRules.additionalRuleLabels }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.defaultRules.additionalRuleGroupLabels.kubernetesApps }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- end }} +{{- end }} +{{- if not (.Values.defaultRules.disabled.KubeDeploymentGenerationMismatch | default false) }} + - alert: KubeDeploymentGenerationMismatch + annotations: +{{- if .Values.defaultRules.additionalRuleAnnotations }} +{{ toYaml .Values.defaultRules.additionalRuleAnnotations | indent 8 }} +{{- end }} +{{- if .Values.defaultRules.additionalRuleGroupAnnotations.kubernetesApps }} +{{ toYaml .Values.defaultRules.additionalRuleGroupAnnotations.kubernetesApps | indent 8 }} +{{- end }} + description: Deployment generation for {{`{{`}} $labels.namespace {{`}}`}}/{{`{{`}} $labels.deployment {{`}}`}} does not match, this indicates that the Deployment has failed but has not been rolled back. + runbook_url: {{ .Values.defaultRules.runbookUrl }}/kubernetes/kubedeploymentgenerationmismatch + summary: Deployment generation mismatch due to possible roll-back + expr: |- + kube_deployment_status_observed_generation{job="{{ $kubeStateMetricsJob }}", namespace=~"{{ $targetNamespace }}"} + != + kube_deployment_metadata_generation{job="{{ $kubeStateMetricsJob }}", namespace=~"{{ $targetNamespace }}"} + for: {{ dig "KubeDeploymentGenerationMismatch" "for" "15m" .Values.customRules }} + {{- with .Values.defaultRules.keepFiringFor }} + keep_firing_for: "{{ . }}" + {{- end }} + labels: + severity: {{ dig "KubeDeploymentGenerationMismatch" "severity" "warning" .Values.customRules }} + {{- if or .Values.defaultRules.additionalRuleLabels .Values.defaultRules.additionalRuleGroupLabels.kubernetesApps }} + {{- with .Values.defaultRules.additionalRuleLabels }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.defaultRules.additionalRuleGroupLabels.kubernetesApps }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- end }} +{{- end }} +{{- if not (.Values.defaultRules.disabled.KubeDeploymentReplicasMismatch | default false) }} + - alert: KubeDeploymentReplicasMismatch + annotations: +{{- if .Values.defaultRules.additionalRuleAnnotations }} +{{ toYaml .Values.defaultRules.additionalRuleAnnotations | indent 8 }} +{{- end }} +{{- if .Values.defaultRules.additionalRuleGroupAnnotations.kubernetesApps }} +{{ toYaml .Values.defaultRules.additionalRuleGroupAnnotations.kubernetesApps | indent 8 }} +{{- end }} + description: Deployment {{`{{`}} $labels.namespace {{`}}`}}/{{`{{`}} $labels.deployment {{`}}`}} has not matched the expected number of replicas for longer than 15 minutes. + runbook_url: {{ .Values.defaultRules.runbookUrl }}/kubernetes/kubedeploymentreplicasmismatch + summary: Deployment has not matched the expected number of replicas. + expr: |- + ( + kube_deployment_spec_replicas{job="{{ $kubeStateMetricsJob }}", namespace=~"{{ $targetNamespace }}"} + > + kube_deployment_status_replicas_available{job="{{ $kubeStateMetricsJob }}", namespace=~"{{ $targetNamespace }}"} + ) and ( + changes(kube_deployment_status_replicas_updated{job="{{ $kubeStateMetricsJob }}", namespace=~"{{ $targetNamespace }}"}[10m]) + == + 0 + ) + for: {{ dig "KubeDeploymentReplicasMismatch" "for" "15m" .Values.customRules }} + {{- with .Values.defaultRules.keepFiringFor }} + keep_firing_for: "{{ . }}" + {{- end }} + labels: + severity: {{ dig "KubeDeploymentReplicasMismatch" "severity" "warning" .Values.customRules }} + {{- if or .Values.defaultRules.additionalRuleLabels .Values.defaultRules.additionalRuleGroupLabels.kubernetesApps }} + {{- with .Values.defaultRules.additionalRuleLabels }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.defaultRules.additionalRuleGroupLabels.kubernetesApps }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- end }} +{{- end }} +{{- if not (.Values.defaultRules.disabled.KubeDeploymentRolloutStuck | default false) }} + - alert: KubeDeploymentRolloutStuck + annotations: +{{- if .Values.defaultRules.additionalRuleAnnotations }} +{{ toYaml .Values.defaultRules.additionalRuleAnnotations | indent 8 }} +{{- end }} +{{- if .Values.defaultRules.additionalRuleGroupAnnotations.kubernetesApps }} +{{ toYaml .Values.defaultRules.additionalRuleGroupAnnotations.kubernetesApps | indent 8 }} +{{- end }} + description: Rollout of deployment {{`{{`}} $labels.namespace {{`}}`}}/{{`{{`}} $labels.deployment {{`}}`}} is not progressing for longer than 15 minutes. + runbook_url: {{ .Values.defaultRules.runbookUrl }}/kubernetes/kubedeploymentrolloutstuck + summary: Deployment rollout is not progressing. + expr: |- + kube_deployment_status_condition{condition="Progressing", status="false",job="{{ $kubeStateMetricsJob }}", namespace=~"{{ $targetNamespace }}"} + != 0 + for: {{ dig "KubeDeploymentRolloutStuck" "for" "15m" .Values.customRules }} + {{- with .Values.defaultRules.keepFiringFor }} + keep_firing_for: "{{ . }}" + {{- end }} + labels: + severity: {{ dig "KubeDeploymentRolloutStuck" "severity" "warning" .Values.customRules }} + {{- if or .Values.defaultRules.additionalRuleLabels .Values.defaultRules.additionalRuleGroupLabels.kubernetesApps }} + {{- with .Values.defaultRules.additionalRuleLabels }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.defaultRules.additionalRuleGroupLabels.kubernetesApps }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- end }} +{{- end }} +{{- if not (.Values.defaultRules.disabled.KubeStatefulSetReplicasMismatch | default false) }} + - alert: KubeStatefulSetReplicasMismatch + annotations: +{{- if .Values.defaultRules.additionalRuleAnnotations }} +{{ toYaml .Values.defaultRules.additionalRuleAnnotations | indent 8 }} +{{- end }} +{{- if .Values.defaultRules.additionalRuleGroupAnnotations.kubernetesApps }} +{{ toYaml .Values.defaultRules.additionalRuleGroupAnnotations.kubernetesApps | indent 8 }} +{{- end }} + description: StatefulSet {{`{{`}} $labels.namespace {{`}}`}}/{{`{{`}} $labels.statefulset {{`}}`}} has not matched the expected number of replicas for longer than 15 minutes. + runbook_url: {{ .Values.defaultRules.runbookUrl }}/kubernetes/kubestatefulsetreplicasmismatch + summary: StatefulSet has not matched the expected number of replicas. + expr: |- + ( + kube_statefulset_status_replicas_ready{job="{{ $kubeStateMetricsJob }}", namespace=~"{{ $targetNamespace }}"} + != + kube_statefulset_status_replicas{job="{{ $kubeStateMetricsJob }}", namespace=~"{{ $targetNamespace }}"} + ) and ( + changes(kube_statefulset_status_replicas_updated{job="{{ $kubeStateMetricsJob }}", namespace=~"{{ $targetNamespace }}"}[10m]) + == + 0 + ) + for: {{ dig "KubeStatefulSetReplicasMismatch" "for" "15m" .Values.customRules }} + {{- with .Values.defaultRules.keepFiringFor }} + keep_firing_for: "{{ . }}" + {{- end }} + labels: + severity: {{ dig "KubeStatefulSetReplicasMismatch" "severity" "warning" .Values.customRules }} + {{- if or .Values.defaultRules.additionalRuleLabels .Values.defaultRules.additionalRuleGroupLabels.kubernetesApps }} + {{- with .Values.defaultRules.additionalRuleLabels }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.defaultRules.additionalRuleGroupLabels.kubernetesApps }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- end }} +{{- end }} +{{- if not (.Values.defaultRules.disabled.KubeStatefulSetGenerationMismatch | default false) }} + - alert: KubeStatefulSetGenerationMismatch + annotations: +{{- if .Values.defaultRules.additionalRuleAnnotations }} +{{ toYaml .Values.defaultRules.additionalRuleAnnotations | indent 8 }} +{{- end }} +{{- if .Values.defaultRules.additionalRuleGroupAnnotations.kubernetesApps }} +{{ toYaml .Values.defaultRules.additionalRuleGroupAnnotations.kubernetesApps | indent 8 }} +{{- end }} + description: StatefulSet generation for {{`{{`}} $labels.namespace {{`}}`}}/{{`{{`}} $labels.statefulset {{`}}`}} does not match, this indicates that the StatefulSet has failed but has not been rolled back. + runbook_url: {{ .Values.defaultRules.runbookUrl }}/kubernetes/kubestatefulsetgenerationmismatch + summary: StatefulSet generation mismatch due to possible roll-back + expr: |- + kube_statefulset_status_observed_generation{job="{{ $kubeStateMetricsJob }}", namespace=~"{{ $targetNamespace }}"} + != + kube_statefulset_metadata_generation{job="{{ $kubeStateMetricsJob }}", namespace=~"{{ $targetNamespace }}"} + for: {{ dig "KubeStatefulSetGenerationMismatch" "for" "15m" .Values.customRules }} + {{- with .Values.defaultRules.keepFiringFor }} + keep_firing_for: "{{ . }}" + {{- end }} + labels: + severity: {{ dig "KubeStatefulSetGenerationMismatch" "severity" "warning" .Values.customRules }} + {{- if or .Values.defaultRules.additionalRuleLabels .Values.defaultRules.additionalRuleGroupLabels.kubernetesApps }} + {{- with .Values.defaultRules.additionalRuleLabels }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.defaultRules.additionalRuleGroupLabels.kubernetesApps }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- end }} +{{- end }} +{{- if not (.Values.defaultRules.disabled.KubeStatefulSetUpdateNotRolledOut | default false) }} + - alert: KubeStatefulSetUpdateNotRolledOut + annotations: +{{- if .Values.defaultRules.additionalRuleAnnotations }} +{{ toYaml .Values.defaultRules.additionalRuleAnnotations | indent 8 }} +{{- end }} +{{- if .Values.defaultRules.additionalRuleGroupAnnotations.kubernetesApps }} +{{ toYaml .Values.defaultRules.additionalRuleGroupAnnotations.kubernetesApps | indent 8 }} +{{- end }} + description: StatefulSet {{`{{`}} $labels.namespace {{`}}`}}/{{`{{`}} $labels.statefulset {{`}}`}} update has not been rolled out. + runbook_url: {{ .Values.defaultRules.runbookUrl }}/kubernetes/kubestatefulsetupdatenotrolledout + summary: StatefulSet update has not been rolled out. + expr: |- + ( + max without (revision) ( + kube_statefulset_status_current_revision{job="{{ $kubeStateMetricsJob }}", namespace=~"{{ $targetNamespace }}"} + unless + kube_statefulset_status_update_revision{job="{{ $kubeStateMetricsJob }}", namespace=~"{{ $targetNamespace }}"} + ) + * + ( + kube_statefulset_replicas{job="{{ $kubeStateMetricsJob }}", namespace=~"{{ $targetNamespace }}"} + != + kube_statefulset_status_replicas_updated{job="{{ $kubeStateMetricsJob }}", namespace=~"{{ $targetNamespace }}"} + ) + ) and ( + changes(kube_statefulset_status_replicas_updated{job="{{ $kubeStateMetricsJob }}", namespace=~"{{ $targetNamespace }}"}[5m]) + == + 0 + ) + for: {{ dig "KubeStatefulSetUpdateNotRolledOut" "for" "15m" .Values.customRules }} + {{- with .Values.defaultRules.keepFiringFor }} + keep_firing_for: "{{ . }}" + {{- end }} + labels: + severity: {{ dig "KubeStatefulSetUpdateNotRolledOut" "severity" "warning" .Values.customRules }} + {{- if or .Values.defaultRules.additionalRuleLabels .Values.defaultRules.additionalRuleGroupLabels.kubernetesApps }} + {{- with .Values.defaultRules.additionalRuleLabels }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.defaultRules.additionalRuleGroupLabels.kubernetesApps }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- end }} +{{- end }} +{{- if not (.Values.defaultRules.disabled.KubeDaemonSetRolloutStuck | default false) }} + - alert: KubeDaemonSetRolloutStuck + annotations: +{{- if .Values.defaultRules.additionalRuleAnnotations }} +{{ toYaml .Values.defaultRules.additionalRuleAnnotations | indent 8 }} +{{- end }} +{{- if .Values.defaultRules.additionalRuleGroupAnnotations.kubernetesApps }} +{{ toYaml .Values.defaultRules.additionalRuleGroupAnnotations.kubernetesApps | indent 8 }} +{{- end }} + description: DaemonSet {{`{{`}} $labels.namespace {{`}}`}}/{{`{{`}} $labels.daemonset {{`}}`}} has not finished or progressed for at least 15 minutes. + runbook_url: {{ .Values.defaultRules.runbookUrl }}/kubernetes/kubedaemonsetrolloutstuck + summary: DaemonSet rollout is stuck. + expr: |- + ( + ( + kube_daemonset_status_current_number_scheduled{job="{{ $kubeStateMetricsJob }}", namespace=~"{{ $targetNamespace }}"} + != + kube_daemonset_status_desired_number_scheduled{job="{{ $kubeStateMetricsJob }}", namespace=~"{{ $targetNamespace }}"} + ) or ( + kube_daemonset_status_number_misscheduled{job="{{ $kubeStateMetricsJob }}", namespace=~"{{ $targetNamespace }}"} + != + 0 + ) or ( + kube_daemonset_status_updated_number_scheduled{job="{{ $kubeStateMetricsJob }}", namespace=~"{{ $targetNamespace }}"} + != + kube_daemonset_status_desired_number_scheduled{job="{{ $kubeStateMetricsJob }}", namespace=~"{{ $targetNamespace }}"} + ) or ( + kube_daemonset_status_number_available{job="{{ $kubeStateMetricsJob }}", namespace=~"{{ $targetNamespace }}"} + != + kube_daemonset_status_desired_number_scheduled{job="{{ $kubeStateMetricsJob }}", namespace=~"{{ $targetNamespace }}"} + ) + ) and ( + changes(kube_daemonset_status_updated_number_scheduled{job="{{ $kubeStateMetricsJob }}", namespace=~"{{ $targetNamespace }}"}[5m]) + == + 0 + ) + for: {{ dig "KubeDaemonSetRolloutStuck" "for" "15m" .Values.customRules }} + {{- with .Values.defaultRules.keepFiringFor }} + keep_firing_for: "{{ . }}" + {{- end }} + labels: + severity: {{ dig "KubeDaemonSetRolloutStuck" "severity" "warning" .Values.customRules }} + {{- if or .Values.defaultRules.additionalRuleLabels .Values.defaultRules.additionalRuleGroupLabels.kubernetesApps }} + {{- with .Values.defaultRules.additionalRuleLabels }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.defaultRules.additionalRuleGroupLabels.kubernetesApps }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- end }} +{{- end }} +{{- if not (.Values.defaultRules.disabled.KubeContainerWaiting | default false) }} + - alert: KubeContainerWaiting + annotations: +{{- if .Values.defaultRules.additionalRuleAnnotations }} +{{ toYaml .Values.defaultRules.additionalRuleAnnotations | indent 8 }} +{{- end }} +{{- if .Values.defaultRules.additionalRuleGroupAnnotations.kubernetesApps }} +{{ toYaml .Values.defaultRules.additionalRuleGroupAnnotations.kubernetesApps | indent 8 }} +{{- end }} + description: pod/{{`{{`}} $labels.pod {{`}}`}} in namespace {{`{{`}} $labels.namespace {{`}}`}} on container {{`{{`}} $labels.container{{`}}`}} has been in waiting state for longer than 1 hour. + runbook_url: {{ .Values.defaultRules.runbookUrl }}/kubernetes/kubecontainerwaiting + summary: Pod container waiting longer than 1 hour + expr: sum by ({{ range $.Values.defaultRules.additionalAggregationLabels }}{{ . }},{{ end }}namespace, pod, container, cluster) (kube_pod_container_status_waiting_reason{job="{{ $kubeStateMetricsJob }}", namespace=~"{{ $targetNamespace }}"}) > 0 + for: {{ dig "KubeContainerWaiting" "for" "1h" .Values.customRules }} + {{- with .Values.defaultRules.keepFiringFor }} + keep_firing_for: "{{ . }}" + {{- end }} + labels: + severity: {{ dig "KubeContainerWaiting" "severity" "warning" .Values.customRules }} + {{- if or .Values.defaultRules.additionalRuleLabels .Values.defaultRules.additionalRuleGroupLabels.kubernetesApps }} + {{- with .Values.defaultRules.additionalRuleLabels }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.defaultRules.additionalRuleGroupLabels.kubernetesApps }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- end }} +{{- end }} +{{- if not (.Values.defaultRules.disabled.KubeDaemonSetNotScheduled | default false) }} + - alert: KubeDaemonSetNotScheduled + annotations: +{{- if .Values.defaultRules.additionalRuleAnnotations }} +{{ toYaml .Values.defaultRules.additionalRuleAnnotations | indent 8 }} +{{- end }} +{{- if .Values.defaultRules.additionalRuleGroupAnnotations.kubernetesApps }} +{{ toYaml .Values.defaultRules.additionalRuleGroupAnnotations.kubernetesApps | indent 8 }} +{{- end }} + description: '{{`{{`}} $value {{`}}`}} Pods of DaemonSet {{`{{`}} $labels.namespace {{`}}`}}/{{`{{`}} $labels.daemonset {{`}}`}} are not scheduled.' + runbook_url: {{ .Values.defaultRules.runbookUrl }}/kubernetes/kubedaemonsetnotscheduled + summary: DaemonSet pods are not scheduled. + expr: |- + kube_daemonset_status_desired_number_scheduled{job="{{ $kubeStateMetricsJob }}", namespace=~"{{ $targetNamespace }}"} + - + kube_daemonset_status_current_number_scheduled{job="{{ $kubeStateMetricsJob }}", namespace=~"{{ $targetNamespace }}"} > 0 + for: {{ dig "KubeDaemonSetNotScheduled" "for" "10m" .Values.customRules }} + {{- with .Values.defaultRules.keepFiringFor }} + keep_firing_for: "{{ . }}" + {{- end }} + labels: + severity: {{ dig "KubeDaemonSetNotScheduled" "severity" "warning" .Values.customRules }} + {{- if or .Values.defaultRules.additionalRuleLabels .Values.defaultRules.additionalRuleGroupLabels.kubernetesApps }} + {{- with .Values.defaultRules.additionalRuleLabels }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.defaultRules.additionalRuleGroupLabels.kubernetesApps }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- end }} +{{- end }} +{{- if not (.Values.defaultRules.disabled.KubeDaemonSetMisScheduled | default false) }} + - alert: KubeDaemonSetMisScheduled + annotations: +{{- if .Values.defaultRules.additionalRuleAnnotations }} +{{ toYaml .Values.defaultRules.additionalRuleAnnotations | indent 8 }} +{{- end }} +{{- if .Values.defaultRules.additionalRuleGroupAnnotations.kubernetesApps }} +{{ toYaml .Values.defaultRules.additionalRuleGroupAnnotations.kubernetesApps | indent 8 }} +{{- end }} + description: '{{`{{`}} $value {{`}}`}} Pods of DaemonSet {{`{{`}} $labels.namespace {{`}}`}}/{{`{{`}} $labels.daemonset {{`}}`}} are running where they are not supposed to run.' + runbook_url: {{ .Values.defaultRules.runbookUrl }}/kubernetes/kubedaemonsetmisscheduled + summary: DaemonSet pods are misscheduled. + expr: kube_daemonset_status_number_misscheduled{job="{{ $kubeStateMetricsJob }}", namespace=~"{{ $targetNamespace }}"} > 0 + for: {{ dig "KubeDaemonSetMisScheduled" "for" "15m" .Values.customRules }} + {{- with .Values.defaultRules.keepFiringFor }} + keep_firing_for: "{{ . }}" + {{- end }} + labels: + severity: {{ dig "KubeDaemonSetMisScheduled" "severity" "warning" .Values.customRules }} + {{- if or .Values.defaultRules.additionalRuleLabels .Values.defaultRules.additionalRuleGroupLabels.kubernetesApps }} + {{- with .Values.defaultRules.additionalRuleLabels }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.defaultRules.additionalRuleGroupLabels.kubernetesApps }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- end }} +{{- end }} +{{- if not (.Values.defaultRules.disabled.KubeJobNotCompleted | default false) }} + - alert: KubeJobNotCompleted + annotations: +{{- if .Values.defaultRules.additionalRuleAnnotations }} +{{ toYaml .Values.defaultRules.additionalRuleAnnotations | indent 8 }} +{{- end }} +{{- if .Values.defaultRules.additionalRuleGroupAnnotations.kubernetesApps }} +{{ toYaml .Values.defaultRules.additionalRuleGroupAnnotations.kubernetesApps | indent 8 }} +{{- end }} + description: Job {{`{{`}} $labels.namespace {{`}}`}}/{{`{{`}} $labels.job_name {{`}}`}} is taking more than {{`{{`}} "43200" | humanizeDuration {{`}}`}} to complete. + runbook_url: {{ .Values.defaultRules.runbookUrl }}/kubernetes/kubejobnotcompleted + summary: Job did not complete in time + expr: |- + time() - max by ({{ range $.Values.defaultRules.additionalAggregationLabels }}{{ . }},{{ end }}namespace, job_name, cluster) (kube_job_status_start_time{job="{{ $kubeStateMetricsJob }}", namespace=~"{{ $targetNamespace }}"} + and + kube_job_status_active{job="{{ $kubeStateMetricsJob }}", namespace=~"{{ $targetNamespace }}"} > 0) > 43200 + labels: + severity: {{ dig "KubeJobNotCompleted" "severity" "warning" .Values.customRules }} + {{- if or .Values.defaultRules.additionalRuleLabels .Values.defaultRules.additionalRuleGroupLabels.kubernetesApps }} + {{- with .Values.defaultRules.additionalRuleLabels }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.defaultRules.additionalRuleGroupLabels.kubernetesApps }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- end }} +{{- end }} +{{- if not (.Values.defaultRules.disabled.KubeJobFailed | default false) }} + - alert: KubeJobFailed + annotations: +{{- if .Values.defaultRules.additionalRuleAnnotations }} +{{ toYaml .Values.defaultRules.additionalRuleAnnotations | indent 8 }} +{{- end }} +{{- if .Values.defaultRules.additionalRuleGroupAnnotations.kubernetesApps }} +{{ toYaml .Values.defaultRules.additionalRuleGroupAnnotations.kubernetesApps | indent 8 }} +{{- end }} + description: Job {{`{{`}} $labels.namespace {{`}}`}}/{{`{{`}} $labels.job_name {{`}}`}} failed to complete. Removing failed job after investigation should clear this alert. + runbook_url: {{ .Values.defaultRules.runbookUrl }}/kubernetes/kubejobfailed + summary: Job failed to complete. + expr: kube_job_failed{job="{{ $kubeStateMetricsJob }}", namespace=~"{{ $targetNamespace }}"} > 0 + for: {{ dig "KubeJobFailed" "for" "15m" .Values.customRules }} + {{- with .Values.defaultRules.keepFiringFor }} + keep_firing_for: "{{ . }}" + {{- end }} + labels: + severity: {{ dig "KubeJobFailed" "severity" "warning" .Values.customRules }} + {{- if or .Values.defaultRules.additionalRuleLabels .Values.defaultRules.additionalRuleGroupLabels.kubernetesApps }} + {{- with .Values.defaultRules.additionalRuleLabels }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.defaultRules.additionalRuleGroupLabels.kubernetesApps }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- end }} +{{- end }} +{{- if not (.Values.defaultRules.disabled.KubeHpaReplicasMismatch | default false) }} + - alert: KubeHpaReplicasMismatch + annotations: +{{- if .Values.defaultRules.additionalRuleAnnotations }} +{{ toYaml .Values.defaultRules.additionalRuleAnnotations | indent 8 }} +{{- end }} +{{- if .Values.defaultRules.additionalRuleGroupAnnotations.kubernetesApps }} +{{ toYaml .Values.defaultRules.additionalRuleGroupAnnotations.kubernetesApps | indent 8 }} +{{- end }} + description: HPA {{`{{`}} $labels.namespace {{`}}`}}/{{`{{`}} $labels.horizontalpodautoscaler {{`}}`}} has not matched the desired number of replicas for longer than 15 minutes. + runbook_url: {{ .Values.defaultRules.runbookUrl }}/kubernetes/kubehpareplicasmismatch + summary: HPA has not matched desired number of replicas. + expr: |- + (kube_horizontalpodautoscaler_status_desired_replicas{job="{{ $kubeStateMetricsJob }}", namespace=~"{{ $targetNamespace }}"} + != + kube_horizontalpodautoscaler_status_current_replicas{job="{{ $kubeStateMetricsJob }}", namespace=~"{{ $targetNamespace }}"}) + and + (kube_horizontalpodautoscaler_status_current_replicas{job="{{ $kubeStateMetricsJob }}", namespace=~"{{ $targetNamespace }}"} + > + kube_horizontalpodautoscaler_spec_min_replicas{job="{{ $kubeStateMetricsJob }}", namespace=~"{{ $targetNamespace }}"}) + and + (kube_horizontalpodautoscaler_status_current_replicas{job="{{ $kubeStateMetricsJob }}", namespace=~"{{ $targetNamespace }}"} + < + kube_horizontalpodautoscaler_spec_max_replicas{job="{{ $kubeStateMetricsJob }}", namespace=~"{{ $targetNamespace }}"}) + and + changes(kube_horizontalpodautoscaler_status_current_replicas{job="{{ $kubeStateMetricsJob }}", namespace=~"{{ $targetNamespace }}"}[15m]) == 0 + for: {{ dig "KubeHpaReplicasMismatch" "for" "15m" .Values.customRules }} + {{- with .Values.defaultRules.keepFiringFor }} + keep_firing_for: "{{ . }}" + {{- end }} + labels: + severity: {{ dig "KubeHpaReplicasMismatch" "severity" "warning" .Values.customRules }} + {{- if or .Values.defaultRules.additionalRuleLabels .Values.defaultRules.additionalRuleGroupLabels.kubernetesApps }} + {{- with .Values.defaultRules.additionalRuleLabels }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.defaultRules.additionalRuleGroupLabels.kubernetesApps }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- end }} +{{- end }} +{{- if not (.Values.defaultRules.disabled.KubeHpaMaxedOut | default false) }} + - alert: KubeHpaMaxedOut + annotations: +{{- if .Values.defaultRules.additionalRuleAnnotations }} +{{ toYaml .Values.defaultRules.additionalRuleAnnotations | indent 8 }} +{{- end }} +{{- if .Values.defaultRules.additionalRuleGroupAnnotations.kubernetesApps }} +{{ toYaml .Values.defaultRules.additionalRuleGroupAnnotations.kubernetesApps | indent 8 }} +{{- end }} + description: HPA {{`{{`}} $labels.namespace {{`}}`}}/{{`{{`}} $labels.horizontalpodautoscaler {{`}}`}} has been running at max replicas for longer than 15 minutes. + runbook_url: {{ .Values.defaultRules.runbookUrl }}/kubernetes/kubehpamaxedout + summary: HPA is running at max replicas + expr: |- + kube_horizontalpodautoscaler_status_current_replicas{job="{{ $kubeStateMetricsJob }}", namespace=~"{{ $targetNamespace }}"} + == + kube_horizontalpodautoscaler_spec_max_replicas{job="{{ $kubeStateMetricsJob }}", namespace=~"{{ $targetNamespace }}"} + for: {{ dig "KubeHpaMaxedOut" "for" "15m" .Values.customRules }} + {{- with .Values.defaultRules.keepFiringFor }} + keep_firing_for: "{{ . }}" + {{- end }} + labels: + severity: {{ dig "KubeHpaMaxedOut" "severity" "warning" .Values.customRules }} + {{- if or .Values.defaultRules.additionalRuleLabels .Values.defaultRules.additionalRuleGroupLabels.kubernetesApps }} + {{- with .Values.defaultRules.additionalRuleLabels }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.defaultRules.additionalRuleGroupLabels.kubernetesApps }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- end }} +{{- end }} +{{- end }} \ No newline at end of file diff --git a/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/templates/prometheus/rules-1.14/kubernetes-resources.yaml b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/templates/prometheus/rules-1.14/kubernetes-resources.yaml new file mode 100644 index 0000000000..1d32f9bbad --- /dev/null +++ b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/templates/prometheus/rules-1.14/kubernetes-resources.yaml @@ -0,0 +1,282 @@ +{{- /* +Generated from 'kubernetes-resources' group from https://raw.githubusercontent.com/prometheus-operator/kube-prometheus/a8ba97a150c75be42010c75d10b720c55e182f1a/manifests/kubernetesControlPlane-prometheusRule.yaml +Do not change in-place! In order to change this file first read following link: +https://github.com/prometheus-community/helm-charts/tree/main/charts/kube-prometheus-stack/hack +*/ -}} +{{- $kubeTargetVersion := default .Capabilities.KubeVersion.GitVersion .Values.kubeTargetVersionOverride }} +{{- if and (semverCompare ">=1.14.0-0" $kubeTargetVersion) (semverCompare "<9.9.9-9" $kubeTargetVersion) .Values.defaultRules.create .Values.defaultRules.rules.kubernetesResources }} +{{- $kubeStateMetricsJob := include "kube-prometheus-stack-kube-state-metrics.name" . }} +apiVersion: monitoring.coreos.com/v1 +kind: PrometheusRule +metadata: + name: {{ printf "%s-%s" (include "kube-prometheus-stack.fullname" .) "kubernetes-resources" | trunc 63 | trimSuffix "-" }} + namespace: {{ template "kube-prometheus-stack.namespace" . }} + labels: + app: {{ template "kube-prometheus-stack.name" . }} +{{ include "kube-prometheus-stack.labels" . | indent 4 }} +{{- if .Values.defaultRules.labels }} +{{ toYaml .Values.defaultRules.labels | indent 4 }} +{{- end }} +{{- if .Values.defaultRules.annotations }} + annotations: +{{ toYaml .Values.defaultRules.annotations | indent 4 }} +{{- end }} +spec: + groups: + - name: kubernetes-resources + rules: +{{- if not (.Values.defaultRules.disabled.KubeCPUOvercommit | default false) }} + - alert: KubeCPUOvercommit + annotations: +{{- if .Values.defaultRules.additionalRuleAnnotations }} +{{ toYaml .Values.defaultRules.additionalRuleAnnotations | indent 8 }} +{{- end }} +{{- if .Values.defaultRules.additionalRuleGroupAnnotations.kubernetesResources }} +{{ toYaml .Values.defaultRules.additionalRuleGroupAnnotations.kubernetesResources | indent 8 }} +{{- end }} + description: Cluster {{`{{`}} $labels.cluster {{`}}`}} has overcommitted CPU resource requests for Pods by {{`{{`}} $value {{`}}`}} CPU shares and cannot tolerate node failure. + runbook_url: {{ .Values.defaultRules.runbookUrl }}/kubernetes/kubecpuovercommit + summary: Cluster has overcommitted CPU resource requests. + expr: |- + sum(namespace_cpu:kube_pod_container_resource_requests:sum{job="{{ $kubeStateMetricsJob }}",}) by ({{ range $.Values.defaultRules.additionalAggregationLabels }}{{ . }},{{ end }}cluster) - (sum(kube_node_status_allocatable{job="{{ $kubeStateMetricsJob }}",resource="cpu"}) by ({{ range $.Values.defaultRules.additionalAggregationLabels }}{{ . }},{{ end }}cluster) - max(kube_node_status_allocatable{job="{{ $kubeStateMetricsJob }}",resource="cpu"}) by ({{ range $.Values.defaultRules.additionalAggregationLabels }}{{ . }},{{ end }}cluster)) > 0 + and + (sum(kube_node_status_allocatable{job="{{ $kubeStateMetricsJob }}",resource="cpu"}) by ({{ range $.Values.defaultRules.additionalAggregationLabels }}{{ . }},{{ end }}cluster) - max(kube_node_status_allocatable{job="{{ $kubeStateMetricsJob }}",resource="cpu"}) by ({{ range $.Values.defaultRules.additionalAggregationLabels }}{{ . }},{{ end }}cluster)) > 0 + for: {{ dig "KubeCPUOvercommit" "for" "10m" .Values.customRules }} + {{- with .Values.defaultRules.keepFiringFor }} + keep_firing_for: "{{ . }}" + {{- end }} + labels: + severity: {{ dig "KubeCPUOvercommit" "severity" "warning" .Values.customRules }} + {{- if or .Values.defaultRules.additionalRuleLabels .Values.defaultRules.additionalRuleGroupLabels.kubernetesResources }} + {{- with .Values.defaultRules.additionalRuleLabels }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.defaultRules.additionalRuleGroupLabels.kubernetesResources }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- end }} +{{- end }} +{{- if not (.Values.defaultRules.disabled.KubeMemoryOvercommit | default false) }} + - alert: KubeMemoryOvercommit + annotations: +{{- if .Values.defaultRules.additionalRuleAnnotations }} +{{ toYaml .Values.defaultRules.additionalRuleAnnotations | indent 8 }} +{{- end }} +{{- if .Values.defaultRules.additionalRuleGroupAnnotations.kubernetesResources }} +{{ toYaml .Values.defaultRules.additionalRuleGroupAnnotations.kubernetesResources | indent 8 }} +{{- end }} + description: Cluster {{`{{`}} $labels.cluster {{`}}`}} has overcommitted memory resource requests for Pods by {{`{{`}} $value | humanize {{`}}`}} bytes and cannot tolerate node failure. + runbook_url: {{ .Values.defaultRules.runbookUrl }}/kubernetes/kubememoryovercommit + summary: Cluster has overcommitted memory resource requests. + expr: |- + sum(namespace_memory:kube_pod_container_resource_requests:sum{}) by ({{ range $.Values.defaultRules.additionalAggregationLabels }}{{ . }},{{ end }}cluster) - (sum(kube_node_status_allocatable{resource="memory", job="{{ $kubeStateMetricsJob }}"}) by ({{ range $.Values.defaultRules.additionalAggregationLabels }}{{ . }},{{ end }}cluster) - max(kube_node_status_allocatable{resource="memory", job="{{ $kubeStateMetricsJob }}"}) by ({{ range $.Values.defaultRules.additionalAggregationLabels }}{{ . }},{{ end }}cluster)) > 0 + and + (sum(kube_node_status_allocatable{resource="memory", job="{{ $kubeStateMetricsJob }}"}) by ({{ range $.Values.defaultRules.additionalAggregationLabels }}{{ . }},{{ end }}cluster) - max(kube_node_status_allocatable{resource="memory", job="{{ $kubeStateMetricsJob }}"}) by ({{ range $.Values.defaultRules.additionalAggregationLabels }}{{ . }},{{ end }}cluster)) > 0 + for: {{ dig "KubeMemoryOvercommit" "for" "10m" .Values.customRules }} + {{- with .Values.defaultRules.keepFiringFor }} + keep_firing_for: "{{ . }}" + {{- end }} + labels: + severity: {{ dig "KubeMemoryOvercommit" "severity" "warning" .Values.customRules }} + {{- if or .Values.defaultRules.additionalRuleLabels .Values.defaultRules.additionalRuleGroupLabels.kubernetesResources }} + {{- with .Values.defaultRules.additionalRuleLabels }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.defaultRules.additionalRuleGroupLabels.kubernetesResources }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- end }} +{{- end }} +{{- if not (.Values.defaultRules.disabled.KubeCPUQuotaOvercommit | default false) }} + - alert: KubeCPUQuotaOvercommit + annotations: +{{- if .Values.defaultRules.additionalRuleAnnotations }} +{{ toYaml .Values.defaultRules.additionalRuleAnnotations | indent 8 }} +{{- end }} +{{- if .Values.defaultRules.additionalRuleGroupAnnotations.kubernetesResources }} +{{ toYaml .Values.defaultRules.additionalRuleGroupAnnotations.kubernetesResources | indent 8 }} +{{- end }} + description: Cluster {{`{{`}} $labels.cluster {{`}}`}} has overcommitted CPU resource requests for Namespaces. + runbook_url: {{ .Values.defaultRules.runbookUrl }}/kubernetes/kubecpuquotaovercommit + summary: Cluster has overcommitted CPU resource requests. + expr: |- + sum(min without(resource) (kube_resourcequota{job="{{ $kubeStateMetricsJob }}", type="hard", resource=~"(cpu|requests.cpu)"})) by ({{ range $.Values.defaultRules.additionalAggregationLabels }}{{ . }},{{ end }}cluster) + / + sum(kube_node_status_allocatable{resource="cpu", job="{{ $kubeStateMetricsJob }}"}) by ({{ range $.Values.defaultRules.additionalAggregationLabels }}{{ . }},{{ end }}cluster) + > 1.5 + for: {{ dig "KubeCPUQuotaOvercommit" "for" "5m" .Values.customRules }} + {{- with .Values.defaultRules.keepFiringFor }} + keep_firing_for: "{{ . }}" + {{- end }} + labels: + severity: {{ dig "KubeCPUQuotaOvercommit" "severity" "warning" .Values.customRules }} + {{- if or .Values.defaultRules.additionalRuleLabels .Values.defaultRules.additionalRuleGroupLabels.kubernetesResources }} + {{- with .Values.defaultRules.additionalRuleLabels }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.defaultRules.additionalRuleGroupLabels.kubernetesResources }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- end }} +{{- end }} +{{- if not (.Values.defaultRules.disabled.KubeMemoryQuotaOvercommit | default false) }} + - alert: KubeMemoryQuotaOvercommit + annotations: +{{- if .Values.defaultRules.additionalRuleAnnotations }} +{{ toYaml .Values.defaultRules.additionalRuleAnnotations | indent 8 }} +{{- end }} +{{- if .Values.defaultRules.additionalRuleGroupAnnotations.kubernetesResources }} +{{ toYaml .Values.defaultRules.additionalRuleGroupAnnotations.kubernetesResources | indent 8 }} +{{- end }} + description: Cluster {{`{{`}} $labels.cluster {{`}}`}} has overcommitted memory resource requests for Namespaces. + runbook_url: {{ .Values.defaultRules.runbookUrl }}/kubernetes/kubememoryquotaovercommit + summary: Cluster has overcommitted memory resource requests. + expr: |- + sum(min without(resource) (kube_resourcequota{job="{{ $kubeStateMetricsJob }}", type="hard", resource=~"(memory|requests.memory)"})) by ({{ range $.Values.defaultRules.additionalAggregationLabels }}{{ . }},{{ end }}cluster) + / + sum(kube_node_status_allocatable{resource="memory", job="{{ $kubeStateMetricsJob }}"}) by ({{ range $.Values.defaultRules.additionalAggregationLabels }}{{ . }},{{ end }}cluster) + > 1.5 + for: {{ dig "KubeMemoryQuotaOvercommit" "for" "5m" .Values.customRules }} + {{- with .Values.defaultRules.keepFiringFor }} + keep_firing_for: "{{ . }}" + {{- end }} + labels: + severity: {{ dig "KubeMemoryQuotaOvercommit" "severity" "warning" .Values.customRules }} + {{- if or .Values.defaultRules.additionalRuleLabels .Values.defaultRules.additionalRuleGroupLabels.kubernetesResources }} + {{- with .Values.defaultRules.additionalRuleLabels }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.defaultRules.additionalRuleGroupLabels.kubernetesResources }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- end }} +{{- end }} +{{- if not (.Values.defaultRules.disabled.KubeQuotaAlmostFull | default false) }} + - alert: KubeQuotaAlmostFull + annotations: +{{- if .Values.defaultRules.additionalRuleAnnotations }} +{{ toYaml .Values.defaultRules.additionalRuleAnnotations | indent 8 }} +{{- end }} +{{- if .Values.defaultRules.additionalRuleGroupAnnotations.kubernetesResources }} +{{ toYaml .Values.defaultRules.additionalRuleGroupAnnotations.kubernetesResources | indent 8 }} +{{- end }} + description: Namespace {{`{{`}} $labels.namespace {{`}}`}} is using {{`{{`}} $value | humanizePercentage {{`}}`}} of its {{`{{`}} $labels.resource {{`}}`}} quota. + runbook_url: {{ .Values.defaultRules.runbookUrl }}/kubernetes/kubequotaalmostfull + summary: Namespace quota is going to be full. + expr: |- + kube_resourcequota{job="{{ $kubeStateMetricsJob }}", type="used"} + / ignoring(instance, job, type) + (kube_resourcequota{job="{{ $kubeStateMetricsJob }}", type="hard"} > 0) + > 0.9 < 1 + for: {{ dig "KubeQuotaAlmostFull" "for" "15m" .Values.customRules }} + {{- with .Values.defaultRules.keepFiringFor }} + keep_firing_for: "{{ . }}" + {{- end }} + labels: + severity: {{ dig "KubeQuotaAlmostFull" "severity" "info" .Values.customRules }} + {{- if or .Values.defaultRules.additionalRuleLabels .Values.defaultRules.additionalRuleGroupLabels.kubernetesResources }} + {{- with .Values.defaultRules.additionalRuleLabels }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.defaultRules.additionalRuleGroupLabels.kubernetesResources }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- end }} +{{- end }} +{{- if not (.Values.defaultRules.disabled.KubeQuotaFullyUsed | default false) }} + - alert: KubeQuotaFullyUsed + annotations: +{{- if .Values.defaultRules.additionalRuleAnnotations }} +{{ toYaml .Values.defaultRules.additionalRuleAnnotations | indent 8 }} +{{- end }} +{{- if .Values.defaultRules.additionalRuleGroupAnnotations.kubernetesResources }} +{{ toYaml .Values.defaultRules.additionalRuleGroupAnnotations.kubernetesResources | indent 8 }} +{{- end }} + description: Namespace {{`{{`}} $labels.namespace {{`}}`}} is using {{`{{`}} $value | humanizePercentage {{`}}`}} of its {{`{{`}} $labels.resource {{`}}`}} quota. + runbook_url: {{ .Values.defaultRules.runbookUrl }}/kubernetes/kubequotafullyused + summary: Namespace quota is fully used. + expr: |- + kube_resourcequota{job="{{ $kubeStateMetricsJob }}", type="used"} + / ignoring(instance, job, type) + (kube_resourcequota{job="{{ $kubeStateMetricsJob }}", type="hard"} > 0) + == 1 + for: {{ dig "KubeQuotaFullyUsed" "for" "15m" .Values.customRules }} + {{- with .Values.defaultRules.keepFiringFor }} + keep_firing_for: "{{ . }}" + {{- end }} + labels: + severity: {{ dig "KubeQuotaFullyUsed" "severity" "info" .Values.customRules }} + {{- if or .Values.defaultRules.additionalRuleLabels .Values.defaultRules.additionalRuleGroupLabels.kubernetesResources }} + {{- with .Values.defaultRules.additionalRuleLabels }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.defaultRules.additionalRuleGroupLabels.kubernetesResources }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- end }} +{{- end }} +{{- if not (.Values.defaultRules.disabled.KubeQuotaExceeded | default false) }} + - alert: KubeQuotaExceeded + annotations: +{{- if .Values.defaultRules.additionalRuleAnnotations }} +{{ toYaml .Values.defaultRules.additionalRuleAnnotations | indent 8 }} +{{- end }} +{{- if .Values.defaultRules.additionalRuleGroupAnnotations.kubernetesResources }} +{{ toYaml .Values.defaultRules.additionalRuleGroupAnnotations.kubernetesResources | indent 8 }} +{{- end }} + description: Namespace {{`{{`}} $labels.namespace {{`}}`}} is using {{`{{`}} $value | humanizePercentage {{`}}`}} of its {{`{{`}} $labels.resource {{`}}`}} quota. + runbook_url: {{ .Values.defaultRules.runbookUrl }}/kubernetes/kubequotaexceeded + summary: Namespace quota has exceeded the limits. + expr: |- + kube_resourcequota{job="{{ $kubeStateMetricsJob }}", type="used"} + / ignoring(instance, job, type) + (kube_resourcequota{job="{{ $kubeStateMetricsJob }}", type="hard"} > 0) + > 1 + for: {{ dig "KubeQuotaExceeded" "for" "15m" .Values.customRules }} + {{- with .Values.defaultRules.keepFiringFor }} + keep_firing_for: "{{ . }}" + {{- end }} + labels: + severity: {{ dig "KubeQuotaExceeded" "severity" "warning" .Values.customRules }} + {{- if or .Values.defaultRules.additionalRuleLabels .Values.defaultRules.additionalRuleGroupLabels.kubernetesResources }} + {{- with .Values.defaultRules.additionalRuleLabels }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.defaultRules.additionalRuleGroupLabels.kubernetesResources }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- end }} +{{- end }} +{{- if not (.Values.defaultRules.disabled.CPUThrottlingHigh | default false) }} + - alert: CPUThrottlingHigh + annotations: +{{- if .Values.defaultRules.additionalRuleAnnotations }} +{{ toYaml .Values.defaultRules.additionalRuleAnnotations | indent 8 }} +{{- end }} +{{- if .Values.defaultRules.additionalRuleGroupAnnotations.kubernetesResources }} +{{ toYaml .Values.defaultRules.additionalRuleGroupAnnotations.kubernetesResources | indent 8 }} +{{- end }} + description: '{{`{{`}} $value | humanizePercentage {{`}}`}} throttling of CPU in namespace {{`{{`}} $labels.namespace {{`}}`}} for container {{`{{`}} $labels.container {{`}}`}} in pod {{`{{`}} $labels.pod {{`}}`}}.' + runbook_url: {{ .Values.defaultRules.runbookUrl }}/kubernetes/cputhrottlinghigh + summary: Processes experience elevated CPU throttling. + expr: |- + sum(increase(container_cpu_cfs_throttled_periods_total{container!="", }[5m])) by ({{ range $.Values.defaultRules.additionalAggregationLabels }}{{ . }},{{ end }}cluster, container, pod, namespace) + / + sum(increase(container_cpu_cfs_periods_total{}[5m])) by ({{ range $.Values.defaultRules.additionalAggregationLabels }}{{ . }},{{ end }}cluster, container, pod, namespace) + > ( 25 / 100 ) + for: {{ dig "CPUThrottlingHigh" "for" "15m" .Values.customRules }} + {{- with .Values.defaultRules.keepFiringFor }} + keep_firing_for: "{{ . }}" + {{- end }} + labels: + severity: {{ dig "CPUThrottlingHigh" "severity" "info" .Values.customRules }} + {{- if or .Values.defaultRules.additionalRuleLabels .Values.defaultRules.additionalRuleGroupLabels.kubernetesResources }} + {{- with .Values.defaultRules.additionalRuleLabels }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.defaultRules.additionalRuleGroupLabels.kubernetesResources }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- end }} +{{- end }} +{{- end }} \ No newline at end of file diff --git a/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/templates/prometheus/rules-1.14/kubernetes-storage.yaml b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/templates/prometheus/rules-1.14/kubernetes-storage.yaml new file mode 100644 index 0000000000..b988445653 --- /dev/null +++ b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/templates/prometheus/rules-1.14/kubernetes-storage.yaml @@ -0,0 +1,216 @@ +{{- /* +Generated from 'kubernetes-storage' group from https://raw.githubusercontent.com/prometheus-operator/kube-prometheus/a8ba97a150c75be42010c75d10b720c55e182f1a/manifests/kubernetesControlPlane-prometheusRule.yaml +Do not change in-place! In order to change this file first read following link: +https://github.com/prometheus-community/helm-charts/tree/main/charts/kube-prometheus-stack/hack +*/ -}} +{{- $kubeTargetVersion := default .Capabilities.KubeVersion.GitVersion .Values.kubeTargetVersionOverride }} +{{- if and (semverCompare ">=1.14.0-0" $kubeTargetVersion) (semverCompare "<9.9.9-9" $kubeTargetVersion) .Values.defaultRules.create .Values.defaultRules.rules.kubernetesStorage }} +{{- $kubeStateMetricsJob := include "kube-prometheus-stack-kube-state-metrics.name" . }} +{{- $targetNamespace := .Values.defaultRules.appNamespacesTarget }} +apiVersion: monitoring.coreos.com/v1 +kind: PrometheusRule +metadata: + name: {{ printf "%s-%s" (include "kube-prometheus-stack.fullname" .) "kubernetes-storage" | trunc 63 | trimSuffix "-" }} + namespace: {{ template "kube-prometheus-stack.namespace" . }} + labels: + app: {{ template "kube-prometheus-stack.name" . }} +{{ include "kube-prometheus-stack.labels" . | indent 4 }} +{{- if .Values.defaultRules.labels }} +{{ toYaml .Values.defaultRules.labels | indent 4 }} +{{- end }} +{{- if .Values.defaultRules.annotations }} + annotations: +{{ toYaml .Values.defaultRules.annotations | indent 4 }} +{{- end }} +spec: + groups: + - name: kubernetes-storage + rules: +{{- if not (.Values.defaultRules.disabled.KubePersistentVolumeFillingUp | default false) }} + - alert: KubePersistentVolumeFillingUp + annotations: +{{- if .Values.defaultRules.additionalRuleAnnotations }} +{{ toYaml .Values.defaultRules.additionalRuleAnnotations | indent 8 }} +{{- end }} +{{- if .Values.defaultRules.additionalRuleGroupAnnotations.kubernetesStorage }} +{{ toYaml .Values.defaultRules.additionalRuleGroupAnnotations.kubernetesStorage | indent 8 }} +{{- end }} + description: The PersistentVolume claimed by {{`{{`}} $labels.persistentvolumeclaim {{`}}`}} in Namespace {{`{{`}} $labels.namespace {{`}}`}} {{`{{`}} with $labels.cluster -{{`}}`}} on Cluster {{`{{`}} . {{`}}`}} {{`{{`}}- end {{`}}`}} is only {{`{{`}} $value | humanizePercentage {{`}}`}} free. + runbook_url: {{ .Values.defaultRules.runbookUrl }}/kubernetes/kubepersistentvolumefillingup + summary: PersistentVolume is filling up. + expr: |- + kubelet_volume_stats_available_bytes{job="{{ include "exporter.kubelet.jobName" . }}", namespace=~"{{ $targetNamespace }}", metrics_path="/metrics"} + / + kubelet_volume_stats_capacity_bytes{job="{{ include "exporter.kubelet.jobName" . }}", namespace=~"{{ $targetNamespace }}", metrics_path="/metrics"} + < 0.03 + and + kubelet_volume_stats_used_bytes{job="{{ include "exporter.kubelet.jobName" . }}", namespace=~"{{ $targetNamespace }}", metrics_path="/metrics"} > 0 + unless on(namespace, persistentvolumeclaim) + kube_persistentvolumeclaim_access_mode{ access_mode="ReadOnlyMany"} == 1 + unless on ({{ range $.Values.defaultRules.additionalAggregationLabels }}{{ . }},{{ end }}cluster, namespace, persistentvolumeclaim) + kube_persistentvolumeclaim_labels{label_excluded_from_alerts="true"} == 1 + for: {{ dig "KubePersistentVolumeFillingUp" "for" "1m" .Values.customRules }} + {{- with .Values.defaultRules.keepFiringFor }} + keep_firing_for: "{{ . }}" + {{- end }} + labels: + severity: {{ dig "KubePersistentVolumeFillingUp" "severity" "critical" .Values.customRules }} + {{- if or .Values.defaultRules.additionalRuleLabels .Values.defaultRules.additionalRuleGroupLabels.kubernetesStorage }} + {{- with .Values.defaultRules.additionalRuleLabels }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.defaultRules.additionalRuleGroupLabels.kubernetesStorage }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- end }} +{{- end }} +{{- if not (.Values.defaultRules.disabled.KubePersistentVolumeFillingUp | default false) }} + - alert: KubePersistentVolumeFillingUp + annotations: +{{- if .Values.defaultRules.additionalRuleAnnotations }} +{{ toYaml .Values.defaultRules.additionalRuleAnnotations | indent 8 }} +{{- end }} +{{- if .Values.defaultRules.additionalRuleGroupAnnotations.kubernetesStorage }} +{{ toYaml .Values.defaultRules.additionalRuleGroupAnnotations.kubernetesStorage | indent 8 }} +{{- end }} + description: Based on recent sampling, the PersistentVolume claimed by {{`{{`}} $labels.persistentvolumeclaim {{`}}`}} in Namespace {{`{{`}} $labels.namespace {{`}}`}} {{`{{`}} with $labels.cluster -{{`}}`}} on Cluster {{`{{`}} . {{`}}`}} {{`{{`}}- end {{`}}`}} is expected to fill up within four days. Currently {{`{{`}} $value | humanizePercentage {{`}}`}} is available. + runbook_url: {{ .Values.defaultRules.runbookUrl }}/kubernetes/kubepersistentvolumefillingup + summary: PersistentVolume is filling up. + expr: |- + ( + kubelet_volume_stats_available_bytes{job="{{ include "exporter.kubelet.jobName" . }}", namespace=~"{{ $targetNamespace }}", metrics_path="/metrics"} + / + kubelet_volume_stats_capacity_bytes{job="{{ include "exporter.kubelet.jobName" . }}", namespace=~"{{ $targetNamespace }}", metrics_path="/metrics"} + ) < 0.15 + and + kubelet_volume_stats_used_bytes{job="{{ include "exporter.kubelet.jobName" . }}", namespace=~"{{ $targetNamespace }}", metrics_path="/metrics"} > 0 + and + predict_linear(kubelet_volume_stats_available_bytes{job="{{ include "exporter.kubelet.jobName" . }}", namespace=~"{{ $targetNamespace }}", metrics_path="/metrics"}[6h], 4 * 24 * 3600) < 0 + unless on(namespace, persistentvolumeclaim) + kube_persistentvolumeclaim_access_mode{ access_mode="ReadOnlyMany"} == 1 + unless on ({{ range $.Values.defaultRules.additionalAggregationLabels }}{{ . }},{{ end }}cluster, namespace, persistentvolumeclaim) + kube_persistentvolumeclaim_labels{label_excluded_from_alerts="true"} == 1 + for: {{ dig "KubePersistentVolumeFillingUp" "for" "1h" .Values.customRules }} + {{- with .Values.defaultRules.keepFiringFor }} + keep_firing_for: "{{ . }}" + {{- end }} + labels: + severity: {{ dig "KubePersistentVolumeFillingUp" "severity" "warning" .Values.customRules }} + {{- if or .Values.defaultRules.additionalRuleLabels .Values.defaultRules.additionalRuleGroupLabels.kubernetesStorage }} + {{- with .Values.defaultRules.additionalRuleLabels }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.defaultRules.additionalRuleGroupLabels.kubernetesStorage }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- end }} +{{- end }} +{{- if not (.Values.defaultRules.disabled.KubePersistentVolumeInodesFillingUp | default false) }} + - alert: KubePersistentVolumeInodesFillingUp + annotations: +{{- if .Values.defaultRules.additionalRuleAnnotations }} +{{ toYaml .Values.defaultRules.additionalRuleAnnotations | indent 8 }} +{{- end }} +{{- if .Values.defaultRules.additionalRuleGroupAnnotations.kubernetesStorage }} +{{ toYaml .Values.defaultRules.additionalRuleGroupAnnotations.kubernetesStorage | indent 8 }} +{{- end }} + description: The PersistentVolume claimed by {{`{{`}} $labels.persistentvolumeclaim {{`}}`}} in Namespace {{`{{`}} $labels.namespace {{`}}`}} {{`{{`}} with $labels.cluster -{{`}}`}} on Cluster {{`{{`}} . {{`}}`}} {{`{{`}}- end {{`}}`}} only has {{`{{`}} $value | humanizePercentage {{`}}`}} free inodes. + runbook_url: {{ .Values.defaultRules.runbookUrl }}/kubernetes/kubepersistentvolumeinodesfillingup + summary: PersistentVolumeInodes are filling up. + expr: |- + ( + kubelet_volume_stats_inodes_free{job="{{ include "exporter.kubelet.jobName" . }}", namespace=~"{{ $targetNamespace }}", metrics_path="/metrics"} + / + kubelet_volume_stats_inodes{job="{{ include "exporter.kubelet.jobName" . }}", namespace=~"{{ $targetNamespace }}", metrics_path="/metrics"} + ) < 0.03 + and + kubelet_volume_stats_inodes_used{job="{{ include "exporter.kubelet.jobName" . }}", namespace=~"{{ $targetNamespace }}", metrics_path="/metrics"} > 0 + unless on(namespace, persistentvolumeclaim) + kube_persistentvolumeclaim_access_mode{ access_mode="ReadOnlyMany"} == 1 + unless on ({{ range $.Values.defaultRules.additionalAggregationLabels }}{{ . }},{{ end }}cluster, namespace, persistentvolumeclaim) + kube_persistentvolumeclaim_labels{label_excluded_from_alerts="true"} == 1 + for: {{ dig "KubePersistentVolumeInodesFillingUp" "for" "1m" .Values.customRules }} + {{- with .Values.defaultRules.keepFiringFor }} + keep_firing_for: "{{ . }}" + {{- end }} + labels: + severity: {{ dig "KubePersistentVolumeInodesFillingUp" "severity" "critical" .Values.customRules }} + {{- if or .Values.defaultRules.additionalRuleLabels .Values.defaultRules.additionalRuleGroupLabels.kubernetesStorage }} + {{- with .Values.defaultRules.additionalRuleLabels }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.defaultRules.additionalRuleGroupLabels.kubernetesStorage }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- end }} +{{- end }} +{{- if not (.Values.defaultRules.disabled.KubePersistentVolumeInodesFillingUp | default false) }} + - alert: KubePersistentVolumeInodesFillingUp + annotations: +{{- if .Values.defaultRules.additionalRuleAnnotations }} +{{ toYaml .Values.defaultRules.additionalRuleAnnotations | indent 8 }} +{{- end }} +{{- if .Values.defaultRules.additionalRuleGroupAnnotations.kubernetesStorage }} +{{ toYaml .Values.defaultRules.additionalRuleGroupAnnotations.kubernetesStorage | indent 8 }} +{{- end }} + description: Based on recent sampling, the PersistentVolume claimed by {{`{{`}} $labels.persistentvolumeclaim {{`}}`}} in Namespace {{`{{`}} $labels.namespace {{`}}`}} {{`{{`}} with $labels.cluster -{{`}}`}} on Cluster {{`{{`}} . {{`}}`}} {{`{{`}}- end {{`}}`}} is expected to run out of inodes within four days. Currently {{`{{`}} $value | humanizePercentage {{`}}`}} of its inodes are free. + runbook_url: {{ .Values.defaultRules.runbookUrl }}/kubernetes/kubepersistentvolumeinodesfillingup + summary: PersistentVolumeInodes are filling up. + expr: |- + ( + kubelet_volume_stats_inodes_free{job="{{ include "exporter.kubelet.jobName" . }}", namespace=~"{{ $targetNamespace }}", metrics_path="/metrics"} + / + kubelet_volume_stats_inodes{job="{{ include "exporter.kubelet.jobName" . }}", namespace=~"{{ $targetNamespace }}", metrics_path="/metrics"} + ) < 0.15 + and + kubelet_volume_stats_inodes_used{job="{{ include "exporter.kubelet.jobName" . }}", namespace=~"{{ $targetNamespace }}", metrics_path="/metrics"} > 0 + and + predict_linear(kubelet_volume_stats_inodes_free{job="{{ include "exporter.kubelet.jobName" . }}", namespace=~"{{ $targetNamespace }}", metrics_path="/metrics"}[6h], 4 * 24 * 3600) < 0 + unless on(namespace, persistentvolumeclaim) + kube_persistentvolumeclaim_access_mode{ access_mode="ReadOnlyMany"} == 1 + unless on ({{ range $.Values.defaultRules.additionalAggregationLabels }}{{ . }},{{ end }}cluster, namespace, persistentvolumeclaim) + kube_persistentvolumeclaim_labels{label_excluded_from_alerts="true"} == 1 + for: {{ dig "KubePersistentVolumeInodesFillingUp" "for" "1h" .Values.customRules }} + {{- with .Values.defaultRules.keepFiringFor }} + keep_firing_for: "{{ . }}" + {{- end }} + labels: + severity: {{ dig "KubePersistentVolumeInodesFillingUp" "severity" "warning" .Values.customRules }} + {{- if or .Values.defaultRules.additionalRuleLabels .Values.defaultRules.additionalRuleGroupLabels.kubernetesStorage }} + {{- with .Values.defaultRules.additionalRuleLabels }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.defaultRules.additionalRuleGroupLabels.kubernetesStorage }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- end }} +{{- end }} +{{- if not (.Values.defaultRules.disabled.KubePersistentVolumeErrors | default false) }} + - alert: KubePersistentVolumeErrors + annotations: +{{- if .Values.defaultRules.additionalRuleAnnotations }} +{{ toYaml .Values.defaultRules.additionalRuleAnnotations | indent 8 }} +{{- end }} +{{- if .Values.defaultRules.additionalRuleGroupAnnotations.kubernetesStorage }} +{{ toYaml .Values.defaultRules.additionalRuleGroupAnnotations.kubernetesStorage | indent 8 }} +{{- end }} + description: The persistent volume {{`{{`}} $labels.persistentvolume {{`}}`}} {{`{{`}} with $labels.cluster -{{`}}`}} on Cluster {{`{{`}} . {{`}}`}} {{`{{`}}- end {{`}}`}} has status {{`{{`}} $labels.phase {{`}}`}}. + runbook_url: {{ .Values.defaultRules.runbookUrl }}/kubernetes/kubepersistentvolumeerrors + summary: PersistentVolume is having issues with provisioning. + expr: kube_persistentvolume_status_phase{phase=~"Failed|Pending",job="{{ $kubeStateMetricsJob }}"} > 0 + for: {{ dig "KubePersistentVolumeErrors" "for" "5m" .Values.customRules }} + {{- with .Values.defaultRules.keepFiringFor }} + keep_firing_for: "{{ . }}" + {{- end }} + labels: + severity: {{ dig "KubePersistentVolumeErrors" "severity" "critical" .Values.customRules }} + {{- if or .Values.defaultRules.additionalRuleLabels .Values.defaultRules.additionalRuleGroupLabels.kubernetesStorage }} + {{- with .Values.defaultRules.additionalRuleLabels }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.defaultRules.additionalRuleGroupLabels.kubernetesStorage }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- end }} +{{- end }} +{{- end }} \ No newline at end of file diff --git a/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/templates/prometheus/rules-1.14/kubernetes-system-apiserver.yaml b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/templates/prometheus/rules-1.14/kubernetes-system-apiserver.yaml new file mode 100644 index 0000000000..af34a23f88 --- /dev/null +++ b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/templates/prometheus/rules-1.14/kubernetes-system-apiserver.yaml @@ -0,0 +1,193 @@ +{{- /* +Generated from 'kubernetes-system-apiserver' group from https://raw.githubusercontent.com/prometheus-operator/kube-prometheus/a8ba97a150c75be42010c75d10b720c55e182f1a/manifests/kubernetesControlPlane-prometheusRule.yaml +Do not change in-place! In order to change this file first read following link: +https://github.com/prometheus-community/helm-charts/tree/main/charts/kube-prometheus-stack/hack +*/ -}} +{{- $kubeTargetVersion := default .Capabilities.KubeVersion.GitVersion .Values.kubeTargetVersionOverride }} +{{- if and (semverCompare ">=1.14.0-0" $kubeTargetVersion) (semverCompare "<9.9.9-9" $kubeTargetVersion) .Values.defaultRules.create .Values.defaultRules.rules.kubernetesSystem }} +apiVersion: monitoring.coreos.com/v1 +kind: PrometheusRule +metadata: + name: {{ printf "%s-%s" (include "kube-prometheus-stack.fullname" .) "kubernetes-system-apiserver" | trunc 63 | trimSuffix "-" }} + namespace: {{ template "kube-prometheus-stack.namespace" . }} + labels: + app: {{ template "kube-prometheus-stack.name" . }} +{{ include "kube-prometheus-stack.labels" . | indent 4 }} +{{- if .Values.defaultRules.labels }} +{{ toYaml .Values.defaultRules.labels | indent 4 }} +{{- end }} +{{- if .Values.defaultRules.annotations }} + annotations: +{{ toYaml .Values.defaultRules.annotations | indent 4 }} +{{- end }} +spec: + groups: + - name: kubernetes-system-apiserver + rules: +{{- if not (.Values.defaultRules.disabled.KubeClientCertificateExpiration | default false) }} + - alert: KubeClientCertificateExpiration + annotations: +{{- if .Values.defaultRules.additionalRuleAnnotations }} +{{ toYaml .Values.defaultRules.additionalRuleAnnotations | indent 8 }} +{{- end }} +{{- if .Values.defaultRules.additionalRuleGroupAnnotations.kubernetesSystem }} +{{ toYaml .Values.defaultRules.additionalRuleGroupAnnotations.kubernetesSystem | indent 8 }} +{{- end }} + description: A client certificate used to authenticate to kubernetes apiserver is expiring in less than 7.0 days. + runbook_url: {{ .Values.defaultRules.runbookUrl }}/kubernetes/kubeclientcertificateexpiration + summary: Client certificate is about to expire. + expr: apiserver_client_certificate_expiration_seconds_count{job="apiserver"} > 0 and on ({{ range $.Values.defaultRules.additionalAggregationLabels }}{{ . }},{{ end }}job) histogram_quantile(0.01, sum by ({{ range $.Values.defaultRules.additionalAggregationLabels }}{{ . }},{{ end }}job, le) (rate(apiserver_client_certificate_expiration_seconds_bucket{job="apiserver"}[5m]))) < 604800 + for: {{ dig "KubeClientCertificateExpiration" "for" "5m" .Values.customRules }} + {{- with .Values.defaultRules.keepFiringFor }} + keep_firing_for: "{{ . }}" + {{- end }} + labels: + severity: {{ dig "KubeClientCertificateExpiration" "severity" "warning" .Values.customRules }} + {{- if or .Values.defaultRules.additionalRuleLabels .Values.defaultRules.additionalRuleGroupLabels.kubernetesSystem }} + {{- with .Values.defaultRules.additionalRuleLabels }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.defaultRules.additionalRuleGroupLabels.kubernetesSystem }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- end }} +{{- end }} +{{- if not (.Values.defaultRules.disabled.KubeClientCertificateExpiration | default false) }} + - alert: KubeClientCertificateExpiration + annotations: +{{- if .Values.defaultRules.additionalRuleAnnotations }} +{{ toYaml .Values.defaultRules.additionalRuleAnnotations | indent 8 }} +{{- end }} +{{- if .Values.defaultRules.additionalRuleGroupAnnotations.kubernetesSystem }} +{{ toYaml .Values.defaultRules.additionalRuleGroupAnnotations.kubernetesSystem | indent 8 }} +{{- end }} + description: A client certificate used to authenticate to kubernetes apiserver is expiring in less than 24.0 hours. + runbook_url: {{ .Values.defaultRules.runbookUrl }}/kubernetes/kubeclientcertificateexpiration + summary: Client certificate is about to expire. + expr: apiserver_client_certificate_expiration_seconds_count{job="apiserver"} > 0 and on ({{ range $.Values.defaultRules.additionalAggregationLabels }}{{ . }},{{ end }}job) histogram_quantile(0.01, sum by ({{ range $.Values.defaultRules.additionalAggregationLabels }}{{ . }},{{ end }}job, le) (rate(apiserver_client_certificate_expiration_seconds_bucket{job="apiserver"}[5m]))) < 86400 + for: {{ dig "KubeClientCertificateExpiration" "for" "5m" .Values.customRules }} + {{- with .Values.defaultRules.keepFiringFor }} + keep_firing_for: "{{ . }}" + {{- end }} + labels: + severity: {{ dig "KubeClientCertificateExpiration" "severity" "critical" .Values.customRules }} + {{- if or .Values.defaultRules.additionalRuleLabels .Values.defaultRules.additionalRuleGroupLabels.kubernetesSystem }} + {{- with .Values.defaultRules.additionalRuleLabels }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.defaultRules.additionalRuleGroupLabels.kubernetesSystem }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- end }} +{{- end }} +{{- if not (.Values.defaultRules.disabled.KubeAggregatedAPIErrors | default false) }} + - alert: KubeAggregatedAPIErrors + annotations: +{{- if .Values.defaultRules.additionalRuleAnnotations }} +{{ toYaml .Values.defaultRules.additionalRuleAnnotations | indent 8 }} +{{- end }} +{{- if .Values.defaultRules.additionalRuleGroupAnnotations.kubernetesSystem }} +{{ toYaml .Values.defaultRules.additionalRuleGroupAnnotations.kubernetesSystem | indent 8 }} +{{- end }} + description: Kubernetes aggregated API {{`{{`}} $labels.name {{`}}`}}/{{`{{`}} $labels.namespace {{`}}`}} has reported errors. It has appeared unavailable {{`{{`}} $value | humanize {{`}}`}} times averaged over the past 10m. + runbook_url: {{ .Values.defaultRules.runbookUrl }}/kubernetes/kubeaggregatedapierrors + summary: Kubernetes aggregated API has reported errors. + expr: sum by ({{ range $.Values.defaultRules.additionalAggregationLabels }}{{ . }},{{ end }}name, namespace, cluster)(increase(aggregator_unavailable_apiservice_total{job="apiserver"}[10m])) > 4 + labels: + severity: {{ dig "KubeAggregatedAPIErrors" "severity" "warning" .Values.customRules }} + {{- if or .Values.defaultRules.additionalRuleLabels .Values.defaultRules.additionalRuleGroupLabels.kubernetesSystem }} + {{- with .Values.defaultRules.additionalRuleLabels }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.defaultRules.additionalRuleGroupLabels.kubernetesSystem }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- end }} +{{- end }} +{{- if not (.Values.defaultRules.disabled.KubeAggregatedAPIDown | default false) }} + - alert: KubeAggregatedAPIDown + annotations: +{{- if .Values.defaultRules.additionalRuleAnnotations }} +{{ toYaml .Values.defaultRules.additionalRuleAnnotations | indent 8 }} +{{- end }} +{{- if .Values.defaultRules.additionalRuleGroupAnnotations.kubernetesSystem }} +{{ toYaml .Values.defaultRules.additionalRuleGroupAnnotations.kubernetesSystem | indent 8 }} +{{- end }} + description: Kubernetes aggregated API {{`{{`}} $labels.name {{`}}`}}/{{`{{`}} $labels.namespace {{`}}`}} has been only {{`{{`}} $value | humanize {{`}}`}}% available over the last 10m. + runbook_url: {{ .Values.defaultRules.runbookUrl }}/kubernetes/kubeaggregatedapidown + summary: Kubernetes aggregated API is down. + expr: (1 - max by ({{ range $.Values.defaultRules.additionalAggregationLabels }}{{ . }},{{ end }}name, namespace, cluster)(avg_over_time(aggregator_unavailable_apiservice{job="apiserver"}[10m]))) * 100 < 85 + for: {{ dig "KubeAggregatedAPIDown" "for" "5m" .Values.customRules }} + {{- with .Values.defaultRules.keepFiringFor }} + keep_firing_for: "{{ . }}" + {{- end }} + labels: + severity: {{ dig "KubeAggregatedAPIDown" "severity" "warning" .Values.customRules }} + {{- if or .Values.defaultRules.additionalRuleLabels .Values.defaultRules.additionalRuleGroupLabels.kubernetesSystem }} + {{- with .Values.defaultRules.additionalRuleLabels }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.defaultRules.additionalRuleGroupLabels.kubernetesSystem }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- end }} +{{- end }} +{{- if .Values.kubeApiServer.enabled }} +{{- if not (.Values.defaultRules.disabled.KubeAPIDown | default false) }} + - alert: KubeAPIDown + annotations: +{{- if .Values.defaultRules.additionalRuleAnnotations }} +{{ toYaml .Values.defaultRules.additionalRuleAnnotations | indent 8 }} +{{- end }} +{{- if .Values.defaultRules.additionalRuleGroupAnnotations.kubernetesSystem }} +{{ toYaml .Values.defaultRules.additionalRuleGroupAnnotations.kubernetesSystem | indent 8 }} +{{- end }} + description: KubeAPI has disappeared from Prometheus target discovery. + runbook_url: {{ .Values.defaultRules.runbookUrl }}/kubernetes/kubeapidown + summary: Target disappeared from Prometheus target discovery. + expr: absent(up{job="apiserver"} == 1) + for: {{ dig "KubeAPIDown" "for" "15m" .Values.customRules }} + {{- with .Values.defaultRules.keepFiringFor }} + keep_firing_for: "{{ . }}" + {{- end }} + labels: + severity: {{ dig "KubeAPIDown" "severity" "critical" .Values.customRules }} + {{- if or .Values.defaultRules.additionalRuleLabels .Values.defaultRules.additionalRuleGroupLabels.kubernetesSystem }} + {{- with .Values.defaultRules.additionalRuleLabels }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.defaultRules.additionalRuleGroupLabels.kubernetesSystem }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- end }} +{{- end }} +{{- end }} +{{- if not (.Values.defaultRules.disabled.KubeAPITerminatedRequests | default false) }} + - alert: KubeAPITerminatedRequests + annotations: +{{- if .Values.defaultRules.additionalRuleAnnotations }} +{{ toYaml .Values.defaultRules.additionalRuleAnnotations | indent 8 }} +{{- end }} +{{- if .Values.defaultRules.additionalRuleGroupAnnotations.kubernetesSystem }} +{{ toYaml .Values.defaultRules.additionalRuleGroupAnnotations.kubernetesSystem | indent 8 }} +{{- end }} + description: The kubernetes apiserver has terminated {{`{{`}} $value | humanizePercentage {{`}}`}} of its incoming requests. + runbook_url: {{ .Values.defaultRules.runbookUrl }}/kubernetes/kubeapiterminatedrequests + summary: The kubernetes apiserver has terminated {{`{{`}} $value | humanizePercentage {{`}}`}} of its incoming requests. + expr: sum(rate(apiserver_request_terminations_total{job="apiserver"}[10m])) / ( sum(rate(apiserver_request_total{job="apiserver"}[10m])) + sum(rate(apiserver_request_terminations_total{job="apiserver"}[10m])) ) > 0.20 + for: {{ dig "KubeAPITerminatedRequests" "for" "5m" .Values.customRules }} + {{- with .Values.defaultRules.keepFiringFor }} + keep_firing_for: "{{ . }}" + {{- end }} + labels: + severity: {{ dig "KubeAPITerminatedRequests" "severity" "warning" .Values.customRules }} + {{- if or .Values.defaultRules.additionalRuleLabels .Values.defaultRules.additionalRuleGroupLabels.kubernetesSystem }} + {{- with .Values.defaultRules.additionalRuleLabels }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.defaultRules.additionalRuleGroupLabels.kubernetesSystem }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- end }} +{{- end }} +{{- end }} \ No newline at end of file diff --git a/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/templates/prometheus/rules-1.14/kubernetes-system-controller-manager.yaml b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/templates/prometheus/rules-1.14/kubernetes-system-controller-manager.yaml new file mode 100644 index 0000000000..205bd59800 --- /dev/null +++ b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/templates/prometheus/rules-1.14/kubernetes-system-controller-manager.yaml @@ -0,0 +1,55 @@ +{{- /* +Generated from 'kubernetes-system-controller-manager' group from https://raw.githubusercontent.com/prometheus-operator/kube-prometheus/a8ba97a150c75be42010c75d10b720c55e182f1a/manifests/kubernetesControlPlane-prometheusRule.yaml +Do not change in-place! In order to change this file first read following link: +https://github.com/prometheus-community/helm-charts/tree/main/charts/kube-prometheus-stack/hack +*/ -}} +{{- $kubeTargetVersion := default .Capabilities.KubeVersion.GitVersion .Values.kubeTargetVersionOverride }} +{{- if and (semverCompare ">=1.14.0-0" $kubeTargetVersion) (semverCompare "<9.9.9-9" $kubeTargetVersion) .Values.defaultRules.create .Values.defaultRules.rules.kubeControllerManager }} +{{- if (include "exporter.kubeControllerManager.enabled" .)}} +apiVersion: monitoring.coreos.com/v1 +kind: PrometheusRule +metadata: + name: {{ printf "%s-%s" (include "kube-prometheus-stack.fullname" .) "kubernetes-system-controller-manager" | trunc 63 | trimSuffix "-" }} + namespace: {{ template "kube-prometheus-stack.namespace" . }} + labels: + app: {{ template "kube-prometheus-stack.name" . }} +{{ include "kube-prometheus-stack.labels" . | indent 4 }} +{{- if .Values.defaultRules.labels }} +{{ toYaml .Values.defaultRules.labels | indent 4 }} +{{- end }} +{{- if .Values.defaultRules.annotations }} + annotations: +{{ toYaml .Values.defaultRules.annotations | indent 4 }} +{{- end }} +spec: + groups: + - name: kubernetes-system-controller-manager + rules: +{{- if not (.Values.defaultRules.disabled.KubeControllerManagerDown | default false) }} + - alert: KubeControllerManagerDown + annotations: +{{- if .Values.defaultRules.additionalRuleAnnotations }} +{{ toYaml .Values.defaultRules.additionalRuleAnnotations | indent 8 }} +{{- end }} +{{- if .Values.defaultRules.additionalRuleGroupAnnotations.kubeControllerManager }} +{{ toYaml .Values.defaultRules.additionalRuleGroupAnnotations.kubeControllerManager | indent 8 }} +{{- end }} + description: KubeControllerManager has disappeared from Prometheus target discovery. + runbook_url: {{ .Values.defaultRules.runbookUrl }}/kubernetes/kubecontrollermanagerdown + summary: Target disappeared from Prometheus target discovery. + expr: absent(up{job="{{ include "exporter.kubeControllerManager.jobName" . }}"} == 1) + for: 15m + labels: + severity: {{ dig "KubeControllerManagerDown" "severity" "critical" .Values.customRules }} + {{- if or .Values.defaultRules.additionalRuleLabels .Values.defaultRules.additionalRuleGroupLabels.kubeControllerManager }} + {{- with .Values.defaultRules.additionalRuleLabels }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.defaultRules.additionalRuleGroupLabels.kubeControllerManager }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- end }} +{{- end }} +{{- end }} +{{- end }} + diff --git a/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/templates/prometheus/rules-1.14/kubernetes-system-kube-proxy.yaml b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/templates/prometheus/rules-1.14/kubernetes-system-kube-proxy.yaml new file mode 100644 index 0000000000..66b1d62001 --- /dev/null +++ b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/templates/prometheus/rules-1.14/kubernetes-system-kube-proxy.yaml @@ -0,0 +1,56 @@ +{{- /* +Generated from 'kubernetes-system-kube-proxy' group from https://raw.githubusercontent.com/prometheus-operator/kube-prometheus/main/manifests/kubernetesControlPlane-prometheusRule.yaml +Do not change in-place! In order to change this file first read following link: +https://github.com/prometheus-community/helm-charts/tree/main/charts/kube-prometheus-stack/hack +*/ -}} +{{- $kubeTargetVersion := default .Capabilities.KubeVersion.GitVersion .Values.kubeTargetVersionOverride }} +{{- if and (semverCompare ">=1.14.0-0" $kubeTargetVersion) (semverCompare "<9.9.9-9" $kubeTargetVersion) .Values.defaultRules.create .Values.defaultRules.rules.kubeProxy }} +{{- if (include "exporter.kubeProxy.enabled" .)}} +apiVersion: monitoring.coreos.com/v1 +kind: PrometheusRule +metadata: + name: {{ printf "%s-%s" (include "kube-prometheus-stack.fullname" .) "kubernetes-system-kube-proxy" | trunc 63 | trimSuffix "-" }} + namespace: {{ template "kube-prometheus-stack.namespace" . }} + labels: + app: {{ template "kube-prometheus-stack.name" . }} +{{ include "kube-prometheus-stack.labels" . | indent 4 }} +{{- if .Values.defaultRules.labels }} +{{ toYaml .Values.defaultRules.labels | indent 4 }} +{{- end }} +{{- if .Values.defaultRules.annotations }} + annotations: +{{ toYaml .Values.defaultRules.annotations | indent 4 }} +{{- end }} +spec: + groups: + - name: kubernetes-system-kube-proxy + rules: +{{- if not (.Values.defaultRules.disabled.KubeProxyDown | default false) }} + - alert: KubeProxyDown + annotations: + {{- if or .Values.defaultRules.additionalRuleLabels .Values.defaultRules.additionalRuleGroupAnnotations.kubeProxy }} + {{- with .Values.defaultRules.additionalRuleAnnotations }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.defaultRules.additionalRuleGroupAnnotations.kubeProxy }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- end }} + description: KubeProxy has disappeared from Prometheus target discovery. + runbook_url: {{ .Values.defaultRules.runbookUrl }}/kubernetes/kubeproxydown + summary: Target disappeared from Prometheus target discovery. + expr: absent(up{job="{{ include "exporter.kubeProxy.jobName" . }}"} == 1) + for: 15m + labels: + severity: {{ dig "KubeProxyDown" "labelsSeverity" "critical" .Values.customRules }} + {{- if or .Values.defaultRules.additionalRuleLabels .Values.defaultRules.additionalRuleGroupLabels.kubeProxy }} + {{- with .Values.defaultRules.additionalRuleLabels }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.defaultRules.additionalRuleGroupLabels.kubeProxy }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- end }} +{{- end }} +{{- end }} +{{- end }} diff --git a/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/templates/prometheus/rules-1.14/kubernetes-system-kubelet.yaml b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/templates/prometheus/rules-1.14/kubernetes-system-kubelet.yaml new file mode 100644 index 0000000000..2a55676735 --- /dev/null +++ b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/templates/prometheus/rules-1.14/kubernetes-system-kubelet.yaml @@ -0,0 +1,379 @@ +{{- /* +Generated from 'kubernetes-system-kubelet' group from https://raw.githubusercontent.com/prometheus-operator/kube-prometheus/a8ba97a150c75be42010c75d10b720c55e182f1a/manifests/kubernetesControlPlane-prometheusRule.yaml +Do not change in-place! In order to change this file first read following link: +https://github.com/prometheus-community/helm-charts/tree/main/charts/kube-prometheus-stack/hack +*/ -}} +{{- $kubeTargetVersion := default .Capabilities.KubeVersion.GitVersion .Values.kubeTargetVersionOverride }} +{{- if and (semverCompare ">=1.14.0-0" $kubeTargetVersion) (semverCompare "<9.9.9-9" $kubeTargetVersion) .Values.defaultRules.create .Values.defaultRules.rules.kubernetesSystem }} +{{- $kubeStateMetricsJob := include "kube-prometheus-stack-kube-state-metrics.name" . }} +apiVersion: monitoring.coreos.com/v1 +kind: PrometheusRule +metadata: + name: {{ printf "%s-%s" (include "kube-prometheus-stack.fullname" .) "kubernetes-system-kubelet" | trunc 63 | trimSuffix "-" }} + namespace: {{ template "kube-prometheus-stack.namespace" . }} + labels: + app: {{ template "kube-prometheus-stack.name" . }} +{{ include "kube-prometheus-stack.labels" . | indent 4 }} +{{- if .Values.defaultRules.labels }} +{{ toYaml .Values.defaultRules.labels | indent 4 }} +{{- end }} +{{- if .Values.defaultRules.annotations }} + annotations: +{{ toYaml .Values.defaultRules.annotations | indent 4 }} +{{- end }} +spec: + groups: + - name: kubernetes-system-kubelet + rules: +{{- if not (.Values.defaultRules.disabled.KubeNodeNotReady | default false) }} + - alert: KubeNodeNotReady + annotations: +{{- if .Values.defaultRules.additionalRuleAnnotations }} +{{ toYaml .Values.defaultRules.additionalRuleAnnotations | indent 8 }} +{{- end }} +{{- if .Values.defaultRules.additionalRuleGroupAnnotations.kubernetesSystem }} +{{ toYaml .Values.defaultRules.additionalRuleGroupAnnotations.kubernetesSystem | indent 8 }} +{{- end }} + description: '{{`{{`}} $labels.node {{`}}`}} has been unready for more than 15 minutes.' + runbook_url: {{ .Values.defaultRules.runbookUrl }}/kubernetes/kubenodenotready + summary: Node is not ready. + expr: kube_node_status_condition{job="{{ $kubeStateMetricsJob }}",condition="Ready",status="true"} == 0 + for: {{ dig "KubeNodeNotReady" "for" "15m" .Values.customRules }} + {{- with .Values.defaultRules.keepFiringFor }} + keep_firing_for: "{{ . }}" + {{- end }} + labels: + severity: {{ dig "KubeNodeNotReady" "severity" "warning" .Values.customRules }} + {{- if or .Values.defaultRules.additionalRuleLabels .Values.defaultRules.additionalRuleGroupLabels.kubernetesSystem }} + {{- with .Values.defaultRules.additionalRuleLabels }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.defaultRules.additionalRuleGroupLabels.kubernetesSystem }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- end }} +{{- end }} +{{- if not (.Values.defaultRules.disabled.KubeNodeUnreachable | default false) }} + - alert: KubeNodeUnreachable + annotations: +{{- if .Values.defaultRules.additionalRuleAnnotations }} +{{ toYaml .Values.defaultRules.additionalRuleAnnotations | indent 8 }} +{{- end }} +{{- if .Values.defaultRules.additionalRuleGroupAnnotations.kubernetesSystem }} +{{ toYaml .Values.defaultRules.additionalRuleGroupAnnotations.kubernetesSystem | indent 8 }} +{{- end }} + description: '{{`{{`}} $labels.node {{`}}`}} is unreachable and some workloads may be rescheduled.' + runbook_url: {{ .Values.defaultRules.runbookUrl }}/kubernetes/kubenodeunreachable + summary: Node is unreachable. + expr: (kube_node_spec_taint{job="{{ $kubeStateMetricsJob }}",key="node.kubernetes.io/unreachable",effect="NoSchedule"} unless ignoring(key,value) kube_node_spec_taint{job="{{ $kubeStateMetricsJob }}",key=~"ToBeDeletedByClusterAutoscaler|cloud.google.com/impending-node-termination|aws-node-termination-handler/spot-itn"}) == 1 + for: {{ dig "KubeNodeUnreachable" "for" "15m" .Values.customRules }} + {{- with .Values.defaultRules.keepFiringFor }} + keep_firing_for: "{{ . }}" + {{- end }} + labels: + severity: {{ dig "KubeNodeUnreachable" "severity" "warning" .Values.customRules }} + {{- if or .Values.defaultRules.additionalRuleLabels .Values.defaultRules.additionalRuleGroupLabels.kubernetesSystem }} + {{- with .Values.defaultRules.additionalRuleLabels }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.defaultRules.additionalRuleGroupLabels.kubernetesSystem }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- end }} +{{- end }} +{{- if not (.Values.defaultRules.disabled.KubeletTooManyPods | default false) }} + - alert: KubeletTooManyPods + annotations: +{{- if .Values.defaultRules.additionalRuleAnnotations }} +{{ toYaml .Values.defaultRules.additionalRuleAnnotations | indent 8 }} +{{- end }} +{{- if .Values.defaultRules.additionalRuleGroupAnnotations.kubernetesSystem }} +{{ toYaml .Values.defaultRules.additionalRuleGroupAnnotations.kubernetesSystem | indent 8 }} +{{- end }} + description: Kubelet '{{`{{`}} $labels.node {{`}}`}}' is running at {{`{{`}} $value | humanizePercentage {{`}}`}} of its Pod capacity. + runbook_url: {{ .Values.defaultRules.runbookUrl }}/kubernetes/kubelettoomanypods + summary: Kubelet is running at capacity. + expr: |- + count by ({{ range $.Values.defaultRules.additionalAggregationLabels }}{{ . }},{{ end }}cluster, node) ( + (kube_pod_status_phase{job="{{ $kubeStateMetricsJob }}",phase="Running"} == 1) * on ({{ range $.Values.defaultRules.additionalAggregationLabels }}{{ . }},{{ end }}instance,pod,namespace,cluster) group_left(node) topk by ({{ range $.Values.defaultRules.additionalAggregationLabels }}{{ . }},{{ end }}instance,pod,namespace,cluster) (1, kube_pod_info{job="{{ $kubeStateMetricsJob }}"}) + ) + / + max by ({{ range $.Values.defaultRules.additionalAggregationLabels }}{{ . }},{{ end }}cluster, node) ( + kube_node_status_capacity{job="{{ $kubeStateMetricsJob }}",resource="pods"} != 1 + ) > 0.95 + for: {{ dig "KubeletTooManyPods" "for" "15m" .Values.customRules }} + {{- with .Values.defaultRules.keepFiringFor }} + keep_firing_for: "{{ . }}" + {{- end }} + labels: + severity: {{ dig "KubeletTooManyPods" "severity" "info" .Values.customRules }} + {{- if or .Values.defaultRules.additionalRuleLabels .Values.defaultRules.additionalRuleGroupLabels.kubernetesSystem }} + {{- with .Values.defaultRules.additionalRuleLabels }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.defaultRules.additionalRuleGroupLabels.kubernetesSystem }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- end }} +{{- end }} +{{- if not (.Values.defaultRules.disabled.KubeNodeReadinessFlapping | default false) }} + - alert: KubeNodeReadinessFlapping + annotations: +{{- if .Values.defaultRules.additionalRuleAnnotations }} +{{ toYaml .Values.defaultRules.additionalRuleAnnotations | indent 8 }} +{{- end }} +{{- if .Values.defaultRules.additionalRuleGroupAnnotations.kubernetesSystem }} +{{ toYaml .Values.defaultRules.additionalRuleGroupAnnotations.kubernetesSystem | indent 8 }} +{{- end }} + description: The readiness status of node {{`{{`}} $labels.node {{`}}`}} has changed {{`{{`}} $value {{`}}`}} times in the last 15 minutes. + runbook_url: {{ .Values.defaultRules.runbookUrl }}/kubernetes/kubenodereadinessflapping + summary: Node readiness status is flapping. + expr: sum(changes(kube_node_status_condition{job="{{ $kubeStateMetricsJob }}",status="true",condition="Ready"}[15m])) by ({{ range $.Values.defaultRules.additionalAggregationLabels }}{{ . }},{{ end }}cluster, node) > 2 + for: {{ dig "KubeNodeReadinessFlapping" "for" "15m" .Values.customRules }} + {{- with .Values.defaultRules.keepFiringFor }} + keep_firing_for: "{{ . }}" + {{- end }} + labels: + severity: {{ dig "KubeNodeReadinessFlapping" "severity" "warning" .Values.customRules }} + {{- if or .Values.defaultRules.additionalRuleLabels .Values.defaultRules.additionalRuleGroupLabels.kubernetesSystem }} + {{- with .Values.defaultRules.additionalRuleLabels }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.defaultRules.additionalRuleGroupLabels.kubernetesSystem }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- end }} +{{- end }} +{{- if not (.Values.defaultRules.disabled.KubeletPlegDurationHigh | default false) }} + - alert: KubeletPlegDurationHigh + annotations: +{{- if .Values.defaultRules.additionalRuleAnnotations }} +{{ toYaml .Values.defaultRules.additionalRuleAnnotations | indent 8 }} +{{- end }} +{{- if .Values.defaultRules.additionalRuleGroupAnnotations.kubernetesSystem }} +{{ toYaml .Values.defaultRules.additionalRuleGroupAnnotations.kubernetesSystem | indent 8 }} +{{- end }} + description: The Kubelet Pod Lifecycle Event Generator has a 99th percentile duration of {{`{{`}} $value {{`}}`}} seconds on node {{`{{`}} $labels.node {{`}}`}}. + runbook_url: {{ .Values.defaultRules.runbookUrl }}/kubernetes/kubeletplegdurationhigh + summary: Kubelet Pod Lifecycle Event Generator is taking too long to relist. + expr: node_quantile:kubelet_pleg_relist_duration_seconds:histogram_quantile{quantile="0.99"} >= 10 + for: {{ dig "KubeletPlegDurationHigh" "for" "5m" .Values.customRules }} + {{- with .Values.defaultRules.keepFiringFor }} + keep_firing_for: "{{ . }}" + {{- end }} + labels: + severity: {{ dig "KubeletPlegDurationHigh" "severity" "warning" .Values.customRules }} + {{- if or .Values.defaultRules.additionalRuleLabels .Values.defaultRules.additionalRuleGroupLabels.kubernetesSystem }} + {{- with .Values.defaultRules.additionalRuleLabels }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.defaultRules.additionalRuleGroupLabels.kubernetesSystem }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- end }} +{{- end }} +{{- if not (.Values.defaultRules.disabled.KubeletPodStartUpLatencyHigh | default false) }} + - alert: KubeletPodStartUpLatencyHigh + annotations: +{{- if .Values.defaultRules.additionalRuleAnnotations }} +{{ toYaml .Values.defaultRules.additionalRuleAnnotations | indent 8 }} +{{- end }} +{{- if .Values.defaultRules.additionalRuleGroupAnnotations.kubernetesSystem }} +{{ toYaml .Values.defaultRules.additionalRuleGroupAnnotations.kubernetesSystem | indent 8 }} +{{- end }} + description: Kubelet Pod startup 99th percentile latency is {{`{{`}} $value {{`}}`}} seconds on node {{`{{`}} $labels.node {{`}}`}}. + runbook_url: {{ .Values.defaultRules.runbookUrl }}/kubernetes/kubeletpodstartuplatencyhigh + summary: Kubelet Pod startup latency is too high. + expr: histogram_quantile(0.99, sum(rate(kubelet_pod_worker_duration_seconds_bucket{job="{{ include "exporter.kubelet.jobName" . }}", metrics_path="/metrics"}[5m])) by (cluster, instance, le)) * on(cluster, instance) group_left(node) kubelet_node_name{job="{{ include "exporter.kubelet.jobName" . }}", metrics_path="/metrics"} > 60 + for: 15m + labels: + severity: {{ dig "KubeletPodStartUpLatencyHigh" "severity" "warning" .Values.customRules }} + {{- if or .Values.defaultRules.additionalRuleLabels .Values.defaultRules.additionalRuleGroupLabels.kubernetesSystem }} + {{- with .Values.defaultRules.additionalRuleLabels }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.defaultRules.additionalRuleGroupLabels.kubernetesSystem }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- end }} +{{- end }} +{{- if not (.Values.defaultRules.disabled.KubeletClientCertificateExpiration | default false) }} + - alert: KubeletClientCertificateExpiration + annotations: +{{- if .Values.defaultRules.additionalRuleAnnotations }} +{{ toYaml .Values.defaultRules.additionalRuleAnnotations | indent 8 }} +{{- end }} +{{- if .Values.defaultRules.additionalRuleGroupAnnotations.kubernetesSystem }} +{{ toYaml .Values.defaultRules.additionalRuleGroupAnnotations.kubernetesSystem | indent 8 }} +{{- end }} + description: Client certificate for Kubelet on node {{`{{`}} $labels.node {{`}}`}} expires in {{`{{`}} $value | humanizeDuration {{`}}`}}. + runbook_url: {{ .Values.defaultRules.runbookUrl }}/kubernetes/kubeletclientcertificateexpiration + summary: Kubelet client certificate is about to expire. + expr: kubelet_certificate_manager_client_ttl_seconds < 604800 + labels: + severity: {{ dig "KubeletClientCertificateExpiration" "severity" "warning" .Values.customRules }} + {{- if or .Values.defaultRules.additionalRuleLabels .Values.defaultRules.additionalRuleGroupLabels.kubernetesSystem }} + {{- with .Values.defaultRules.additionalRuleLabels }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.defaultRules.additionalRuleGroupLabels.kubernetesSystem }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- end }} +{{- end }} +{{- if not (.Values.defaultRules.disabled.KubeletClientCertificateExpiration | default false) }} + - alert: KubeletClientCertificateExpiration + annotations: +{{- if .Values.defaultRules.additionalRuleAnnotations }} +{{ toYaml .Values.defaultRules.additionalRuleAnnotations | indent 8 }} +{{- end }} +{{- if .Values.defaultRules.additionalRuleGroupAnnotations.kubernetesSystem }} +{{ toYaml .Values.defaultRules.additionalRuleGroupAnnotations.kubernetesSystem | indent 8 }} +{{- end }} + description: Client certificate for Kubelet on node {{`{{`}} $labels.node {{`}}`}} expires in {{`{{`}} $value | humanizeDuration {{`}}`}}. + runbook_url: {{ .Values.defaultRules.runbookUrl }}/kubernetes/kubeletclientcertificateexpiration + summary: Kubelet client certificate is about to expire. + expr: kubelet_certificate_manager_client_ttl_seconds < 86400 + labels: + severity: {{ dig "KubeletClientCertificateExpiration" "severity" "critical" .Values.customRules }} + {{- if or .Values.defaultRules.additionalRuleLabels .Values.defaultRules.additionalRuleGroupLabels.kubernetesSystem }} + {{- with .Values.defaultRules.additionalRuleLabels }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.defaultRules.additionalRuleGroupLabels.kubernetesSystem }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- end }} +{{- end }} +{{- if not (.Values.defaultRules.disabled.KubeletServerCertificateExpiration | default false) }} + - alert: KubeletServerCertificateExpiration + annotations: +{{- if .Values.defaultRules.additionalRuleAnnotations }} +{{ toYaml .Values.defaultRules.additionalRuleAnnotations | indent 8 }} +{{- end }} +{{- if .Values.defaultRules.additionalRuleGroupAnnotations.kubernetesSystem }} +{{ toYaml .Values.defaultRules.additionalRuleGroupAnnotations.kubernetesSystem | indent 8 }} +{{- end }} + description: Server certificate for Kubelet on node {{`{{`}} $labels.node {{`}}`}} expires in {{`{{`}} $value | humanizeDuration {{`}}`}}. + runbook_url: {{ .Values.defaultRules.runbookUrl }}/kubernetes/kubeletservercertificateexpiration + summary: Kubelet server certificate is about to expire. + expr: kubelet_certificate_manager_server_ttl_seconds < 604800 + labels: + severity: {{ dig "KubeletServerCertificateExpiration" "severity" "warning" .Values.customRules }} + {{- if or .Values.defaultRules.additionalRuleLabels .Values.defaultRules.additionalRuleGroupLabels.kubernetesSystem }} + {{- with .Values.defaultRules.additionalRuleLabels }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.defaultRules.additionalRuleGroupLabels.kubernetesSystem }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- end }} +{{- end }} +{{- if not (.Values.defaultRules.disabled.KubeletServerCertificateExpiration | default false) }} + - alert: KubeletServerCertificateExpiration + annotations: +{{- if .Values.defaultRules.additionalRuleAnnotations }} +{{ toYaml .Values.defaultRules.additionalRuleAnnotations | indent 8 }} +{{- end }} +{{- if .Values.defaultRules.additionalRuleGroupAnnotations.kubernetesSystem }} +{{ toYaml .Values.defaultRules.additionalRuleGroupAnnotations.kubernetesSystem | indent 8 }} +{{- end }} + description: Server certificate for Kubelet on node {{`{{`}} $labels.node {{`}}`}} expires in {{`{{`}} $value | humanizeDuration {{`}}`}}. + runbook_url: {{ .Values.defaultRules.runbookUrl }}/kubernetes/kubeletservercertificateexpiration + summary: Kubelet server certificate is about to expire. + expr: kubelet_certificate_manager_server_ttl_seconds < 86400 + labels: + severity: {{ dig "KubeletServerCertificateExpiration" "severity" "critical" .Values.customRules }} + {{- if or .Values.defaultRules.additionalRuleLabels .Values.defaultRules.additionalRuleGroupLabels.kubernetesSystem }} + {{- with .Values.defaultRules.additionalRuleLabels }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.defaultRules.additionalRuleGroupLabels.kubernetesSystem }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- end }} +{{- end }} +{{- if not (.Values.defaultRules.disabled.KubeletClientCertificateRenewalErrors | default false) }} + - alert: KubeletClientCertificateRenewalErrors + annotations: +{{- if .Values.defaultRules.additionalRuleAnnotations }} +{{ toYaml .Values.defaultRules.additionalRuleAnnotations | indent 8 }} +{{- end }} +{{- if .Values.defaultRules.additionalRuleGroupAnnotations.kubernetesSystem }} +{{ toYaml .Values.defaultRules.additionalRuleGroupAnnotations.kubernetesSystem | indent 8 }} +{{- end }} + description: Kubelet on node {{`{{`}} $labels.node {{`}}`}} has failed to renew its client certificate ({{`{{`}} $value | humanize {{`}}`}} errors in the last 5 minutes). + runbook_url: {{ .Values.defaultRules.runbookUrl }}/kubernetes/kubeletclientcertificaterenewalerrors + summary: Kubelet has failed to renew its client certificate. + expr: increase(kubelet_certificate_manager_client_expiration_renew_errors[5m]) > 0 + for: {{ dig "KubeletClientCertificateRenewalErrors" "for" "15m" .Values.customRules }} + {{- with .Values.defaultRules.keepFiringFor }} + keep_firing_for: "{{ . }}" + {{- end }} + labels: + severity: {{ dig "KubeletClientCertificateRenewalErrors" "severity" "warning" .Values.customRules }} + {{- if or .Values.defaultRules.additionalRuleLabels .Values.defaultRules.additionalRuleGroupLabels.kubernetesSystem }} + {{- with .Values.defaultRules.additionalRuleLabels }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.defaultRules.additionalRuleGroupLabels.kubernetesSystem }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- end }} +{{- end }} +{{- if not (.Values.defaultRules.disabled.KubeletServerCertificateRenewalErrors | default false) }} + - alert: KubeletServerCertificateRenewalErrors + annotations: +{{- if .Values.defaultRules.additionalRuleAnnotations }} +{{ toYaml .Values.defaultRules.additionalRuleAnnotations | indent 8 }} +{{- end }} +{{- if .Values.defaultRules.additionalRuleGroupAnnotations.kubernetesSystem }} +{{ toYaml .Values.defaultRules.additionalRuleGroupAnnotations.kubernetesSystem | indent 8 }} +{{- end }} + description: Kubelet on node {{`{{`}} $labels.node {{`}}`}} has failed to renew its server certificate ({{`{{`}} $value | humanize {{`}}`}} errors in the last 5 minutes). + runbook_url: {{ .Values.defaultRules.runbookUrl }}/kubernetes/kubeletservercertificaterenewalerrors + summary: Kubelet has failed to renew its server certificate. + expr: increase(kubelet_server_expiration_renew_errors[5m]) > 0 + for: {{ dig "KubeletServerCertificateRenewalErrors" "for" "15m" .Values.customRules }} + {{- with .Values.defaultRules.keepFiringFor }} + keep_firing_for: "{{ . }}" + {{- end }} + labels: + severity: {{ dig "KubeletServerCertificateRenewalErrors" "severity" "warning" .Values.customRules }} + {{- if or .Values.defaultRules.additionalRuleLabels .Values.defaultRules.additionalRuleGroupLabels.kubernetesSystem }} + {{- with .Values.defaultRules.additionalRuleLabels }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.defaultRules.additionalRuleGroupLabels.kubernetesSystem }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- end }} +{{- end }} +{{- if (include "exporter.kubelet.enabled" .)}} +{{- if not (.Values.defaultRules.disabled.KubeletDown | default false) }} + - alert: KubeletDown + annotations: +{{- if .Values.defaultRules.additionalRuleAnnotations }} +{{ toYaml .Values.defaultRules.additionalRuleAnnotations | indent 8 }} +{{- end }} +{{- if .Values.defaultRules.additionalRuleGroupAnnotations.kubernetesSystem }} +{{ toYaml .Values.defaultRules.additionalRuleGroupAnnotations.kubernetesSystem | indent 8 }} +{{- end }} + description: Kubelet has disappeared from Prometheus target discovery. + runbook_url: {{ .Values.defaultRules.runbookUrl }}/kubernetes/kubeletdown + summary: Target disappeared from Prometheus target discovery. + expr: absent(up{job="{{ include "exporter.kubelet.jobName" . }}", metrics_path="/metrics"} == 1) + for: 15m + labels: + severity: {{ dig "KubeletDown" "severity" "critical" .Values.customRules }} + {{- if or .Values.defaultRules.additionalRuleLabels .Values.defaultRules.additionalRuleGroupLabels.kubernetesSystem }} + {{- with .Values.defaultRules.additionalRuleLabels }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.defaultRules.additionalRuleGroupLabels.kubernetesSystem }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- end }} +{{- end }} +{{- end }} +{{- end }} \ No newline at end of file diff --git a/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/templates/prometheus/rules-1.14/kubernetes-system-scheduler.yaml b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/templates/prometheus/rules-1.14/kubernetes-system-scheduler.yaml new file mode 100644 index 0000000000..9890b1c959 --- /dev/null +++ b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/templates/prometheus/rules-1.14/kubernetes-system-scheduler.yaml @@ -0,0 +1,54 @@ +{{- /* +Generated from 'kubernetes-system-scheduler' group from https://raw.githubusercontent.com/prometheus-operator/kube-prometheus/a8ba97a150c75be42010c75d10b720c55e182f1a/manifests/kubernetesControlPlane-prometheusRule.yaml +Do not change in-place! In order to change this file first read following link: +https://github.com/prometheus-community/helm-charts/tree/main/charts/kube-prometheus-stack/hack +*/ -}} +{{- $kubeTargetVersion := default .Capabilities.KubeVersion.GitVersion .Values.kubeTargetVersionOverride }} +{{- if and (semverCompare ">=1.14.0-0" $kubeTargetVersion) (semverCompare "<9.9.9-9" $kubeTargetVersion) .Values.defaultRules.create .Values.kubeScheduler.enabled .Values.defaultRules.rules.kubeSchedulerAlerting }} +apiVersion: monitoring.coreos.com/v1 +kind: PrometheusRule +metadata: + name: {{ printf "%s-%s" (include "kube-prometheus-stack.fullname" .) "kubernetes-system-scheduler" | trunc 63 | trimSuffix "-" }} + namespace: {{ template "kube-prometheus-stack.namespace" . }} + labels: + app: {{ template "kube-prometheus-stack.name" . }} +{{ include "kube-prometheus-stack.labels" . | indent 4 }} +{{- if .Values.defaultRules.labels }} +{{ toYaml .Values.defaultRules.labels | indent 4 }} +{{- end }} +{{- if .Values.defaultRules.annotations }} + annotations: +{{ toYaml .Values.defaultRules.annotations | indent 4 }} +{{- end }} +spec: + groups: + - name: kubernetes-system-scheduler + rules: +{{- if .Values.kubeScheduler.enabled }} +{{- if not (.Values.defaultRules.disabled.KubeSchedulerDown | default false) }} + - alert: KubeSchedulerDown + annotations: +{{- if .Values.defaultRules.additionalRuleAnnotations }} +{{ toYaml .Values.defaultRules.additionalRuleAnnotations | indent 8 }} +{{- end }} +{{- if .Values.defaultRules.additionalRuleGroupAnnotations.kubeSchedulerAlerting }} +{{ toYaml .Values.defaultRules.additionalRuleGroupAnnotations.kubeSchedulerAlerting | indent 8 }} +{{- end }} + description: KubeScheduler has disappeared from Prometheus target discovery. + runbook_url: {{ .Values.defaultRules.runbookUrl }}/kubernetes/kubeschedulerdown + summary: Target disappeared from Prometheus target discovery. + expr: absent(up{job="{{ include "exporter.kubeScheduler.jobName" . }}"} == 1) + for: 15m + labels: + severity: {{ dig "KubeSchedulerDown" "severity" "critical" .Values.customRules }} + {{- if or .Values.defaultRules.additionalRuleLabels .Values.defaultRules.additionalRuleGroupLabels.kubeSchedulerAlerting }} + {{- with .Values.defaultRules.additionalRuleLabels }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.defaultRules.additionalRuleGroupLabels.kubeSchedulerAlerting }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- end }} +{{- end }} +{{- end }} +{{- end }} \ No newline at end of file diff --git a/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/templates/prometheus/rules-1.14/kubernetes-system.yaml b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/templates/prometheus/rules-1.14/kubernetes-system.yaml new file mode 100644 index 0000000000..621326d0ad --- /dev/null +++ b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/templates/prometheus/rules-1.14/kubernetes-system.yaml @@ -0,0 +1,87 @@ +{{- /* +Generated from 'kubernetes-system' group from https://raw.githubusercontent.com/prometheus-operator/kube-prometheus/a8ba97a150c75be42010c75d10b720c55e182f1a/manifests/kubernetesControlPlane-prometheusRule.yaml +Do not change in-place! In order to change this file first read following link: +https://github.com/prometheus-community/helm-charts/tree/main/charts/kube-prometheus-stack/hack +*/ -}} +{{- $kubeTargetVersion := default .Capabilities.KubeVersion.GitVersion .Values.kubeTargetVersionOverride }} +{{- if and (semverCompare ">=1.14.0-0" $kubeTargetVersion) (semverCompare "<9.9.9-9" $kubeTargetVersion) .Values.defaultRules.create .Values.defaultRules.rules.kubernetesSystem }} +apiVersion: monitoring.coreos.com/v1 +kind: PrometheusRule +metadata: + name: {{ printf "%s-%s" (include "kube-prometheus-stack.fullname" .) "kubernetes-system" | trunc 63 | trimSuffix "-" }} + namespace: {{ template "kube-prometheus-stack.namespace" . }} + labels: + app: {{ template "kube-prometheus-stack.name" . }} +{{ include "kube-prometheus-stack.labels" . | indent 4 }} +{{- if .Values.defaultRules.labels }} +{{ toYaml .Values.defaultRules.labels | indent 4 }} +{{- end }} +{{- if .Values.defaultRules.annotations }} + annotations: +{{ toYaml .Values.defaultRules.annotations | indent 4 }} +{{- end }} +spec: + groups: + - name: kubernetes-system + rules: +{{- if not (.Values.defaultRules.disabled.KubeVersionMismatch | default false) }} + - alert: KubeVersionMismatch + annotations: +{{- if .Values.defaultRules.additionalRuleAnnotations }} +{{ toYaml .Values.defaultRules.additionalRuleAnnotations | indent 8 }} +{{- end }} +{{- if .Values.defaultRules.additionalRuleGroupAnnotations.kubernetesSystem }} +{{ toYaml .Values.defaultRules.additionalRuleGroupAnnotations.kubernetesSystem | indent 8 }} +{{- end }} + description: There are {{`{{`}} $value {{`}}`}} different semantic versions of Kubernetes components running. + runbook_url: {{ .Values.defaultRules.runbookUrl }}/kubernetes/kubeversionmismatch + summary: Different semantic versions of Kubernetes components running. + expr: count by ({{ range $.Values.defaultRules.additionalAggregationLabels }}{{ . }},{{ end }}cluster) (count by ({{ range $.Values.defaultRules.additionalAggregationLabels }}{{ . }},{{ end }}git_version, cluster) (label_replace(kubernetes_build_info{job!~"kube-dns|coredns"},"git_version","$1","git_version","(v[0-9]*.[0-9]*).*"))) > 1 + for: {{ dig "KubeVersionMismatch" "for" "15m" .Values.customRules }} + {{- with .Values.defaultRules.keepFiringFor }} + keep_firing_for: "{{ . }}" + {{- end }} + labels: + severity: {{ dig "KubeVersionMismatch" "severity" "warning" .Values.customRules }} + {{- if or .Values.defaultRules.additionalRuleLabels .Values.defaultRules.additionalRuleGroupLabels.kubernetesSystem }} + {{- with .Values.defaultRules.additionalRuleLabels }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.defaultRules.additionalRuleGroupLabels.kubernetesSystem }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- end }} +{{- end }} +{{- if not (.Values.defaultRules.disabled.KubeClientErrors | default false) }} + - alert: KubeClientErrors + annotations: +{{- if .Values.defaultRules.additionalRuleAnnotations }} +{{ toYaml .Values.defaultRules.additionalRuleAnnotations | indent 8 }} +{{- end }} +{{- if .Values.defaultRules.additionalRuleGroupAnnotations.kubernetesSystem }} +{{ toYaml .Values.defaultRules.additionalRuleGroupAnnotations.kubernetesSystem | indent 8 }} +{{- end }} + description: Kubernetes API server client '{{`{{`}} $labels.job {{`}}`}}/{{`{{`}} $labels.instance {{`}}`}}' is experiencing {{`{{`}} $value | humanizePercentage {{`}}`}} errors.' + runbook_url: {{ .Values.defaultRules.runbookUrl }}/kubernetes/kubeclienterrors + summary: Kubernetes API server client is experiencing errors. + expr: |- + (sum(rate(rest_client_requests_total{job="apiserver",code=~"5.."}[5m])) by ({{ range $.Values.defaultRules.additionalAggregationLabels }}{{ . }},{{ end }}cluster, instance, job, namespace) + / + sum(rate(rest_client_requests_total{job="apiserver"}[5m])) by ({{ range $.Values.defaultRules.additionalAggregationLabels }}{{ . }},{{ end }}cluster, instance, job, namespace)) + > 0.01 + for: {{ dig "KubeClientErrors" "for" "15m" .Values.customRules }} + {{- with .Values.defaultRules.keepFiringFor }} + keep_firing_for: "{{ . }}" + {{- end }} + labels: + severity: {{ dig "KubeClientErrors" "severity" "warning" .Values.customRules }} + {{- if or .Values.defaultRules.additionalRuleLabels .Values.defaultRules.additionalRuleGroupLabels.kubernetesSystem }} + {{- with .Values.defaultRules.additionalRuleLabels }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.defaultRules.additionalRuleGroupLabels.kubernetesSystem }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- end }} +{{- end }} +{{- end }} \ No newline at end of file diff --git a/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/templates/prometheus/rules-1.14/node-exporter.rules.yaml b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/templates/prometheus/rules-1.14/node-exporter.rules.yaml new file mode 100644 index 0000000000..5d4711ae08 --- /dev/null +++ b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/templates/prometheus/rules-1.14/node-exporter.rules.yaml @@ -0,0 +1,188 @@ +{{- /* +Generated from 'node-exporter.rules' group from https://raw.githubusercontent.com/prometheus-operator/kube-prometheus/a8ba97a150c75be42010c75d10b720c55e182f1a/manifests/nodeExporter-prometheusRule.yaml +Do not change in-place! In order to change this file first read following link: +https://github.com/prometheus-community/helm-charts/tree/main/charts/kube-prometheus-stack/hack +*/ -}} +{{- $kubeTargetVersion := default .Capabilities.KubeVersion.GitVersion .Values.kubeTargetVersionOverride }} +{{- if and (semverCompare ">=1.14.0-0" $kubeTargetVersion) (semverCompare "<9.9.9-9" $kubeTargetVersion) .Values.defaultRules.create .Values.defaultRules.rules.nodeExporterRecording }} +apiVersion: monitoring.coreos.com/v1 +kind: PrometheusRule +metadata: + name: {{ printf "%s-%s" (include "kube-prometheus-stack.fullname" .) "node-exporter.rules" | trunc 63 | trimSuffix "-" }} + namespace: {{ template "kube-prometheus-stack.namespace" . }} + labels: + app: {{ template "kube-prometheus-stack.name" . }} +{{ include "kube-prometheus-stack.labels" . | indent 4 }} +{{- if .Values.defaultRules.labels }} +{{ toYaml .Values.defaultRules.labels | indent 4 }} +{{- end }} +{{- if .Values.defaultRules.annotations }} + annotations: +{{ toYaml .Values.defaultRules.annotations | indent 4 }} +{{- end }} +spec: + groups: + - name: node-exporter.rules + rules: + - expr: |- + count without (cpu, mode) ( + node_cpu_seconds_total{job="node-exporter",mode="idle"} + ) + record: instance:node_num_cpu:sum + {{- if or .Values.defaultRules.additionalRuleLabels .Values.defaultRules.additionalRuleGroupLabels.nodeExporterRecording }} + labels: + {{- with .Values.defaultRules.additionalRuleLabels }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.defaultRules.additionalRuleGroupLabels.nodeExporterRecording }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- end }} + - expr: |- + 1 - avg without (cpu) ( + sum without (mode) (rate(node_cpu_seconds_total{job="node-exporter", mode=~"idle|iowait|steal"}[5m])) + ) + record: instance:node_cpu_utilisation:rate5m + {{- if or .Values.defaultRules.additionalRuleLabels .Values.defaultRules.additionalRuleGroupLabels.nodeExporterRecording }} + labels: + {{- with .Values.defaultRules.additionalRuleLabels }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.defaultRules.additionalRuleGroupLabels.nodeExporterRecording }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- end }} + - expr: |- + ( + node_load1{job="node-exporter"} + / + instance:node_num_cpu:sum{job="node-exporter"} + ) + record: instance:node_load1_per_cpu:ratio + {{- if or .Values.defaultRules.additionalRuleLabels .Values.defaultRules.additionalRuleGroupLabels.nodeExporterRecording }} + labels: + {{- with .Values.defaultRules.additionalRuleLabels }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.defaultRules.additionalRuleGroupLabels.nodeExporterRecording }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- end }} + - expr: |- + 1 - ( + ( + node_memory_MemAvailable_bytes{job="node-exporter"} + or + ( + node_memory_Buffers_bytes{job="node-exporter"} + + + node_memory_Cached_bytes{job="node-exporter"} + + + node_memory_MemFree_bytes{job="node-exporter"} + + + node_memory_Slab_bytes{job="node-exporter"} + ) + ) + / + node_memory_MemTotal_bytes{job="node-exporter"} + ) + record: instance:node_memory_utilisation:ratio + {{- if or .Values.defaultRules.additionalRuleLabels .Values.defaultRules.additionalRuleGroupLabels.nodeExporterRecording }} + labels: + {{- with .Values.defaultRules.additionalRuleLabels }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.defaultRules.additionalRuleGroupLabels.nodeExporterRecording }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- end }} + - expr: rate(node_vmstat_pgmajfault{job="node-exporter"}[5m]) + record: instance:node_vmstat_pgmajfault:rate5m + {{- if or .Values.defaultRules.additionalRuleLabels .Values.defaultRules.additionalRuleGroupLabels.nodeExporterRecording }} + labels: + {{- with .Values.defaultRules.additionalRuleLabels }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.defaultRules.additionalRuleGroupLabels.nodeExporterRecording }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- end }} + - expr: rate(node_disk_io_time_seconds_total{job="node-exporter", device=~"(/dev/)?(mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|md.+|dasd.+)"}[5m]) + record: instance_device:node_disk_io_time_seconds:rate5m + {{- if or .Values.defaultRules.additionalRuleLabels .Values.defaultRules.additionalRuleGroupLabels.nodeExporterRecording }} + labels: + {{- with .Values.defaultRules.additionalRuleLabels }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.defaultRules.additionalRuleGroupLabels.nodeExporterRecording }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- end }} + - expr: rate(node_disk_io_time_weighted_seconds_total{job="node-exporter", device=~"(/dev/)?(mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|md.+|dasd.+)"}[5m]) + record: instance_device:node_disk_io_time_weighted_seconds:rate5m + {{- if or .Values.defaultRules.additionalRuleLabels .Values.defaultRules.additionalRuleGroupLabels.nodeExporterRecording }} + labels: + {{- with .Values.defaultRules.additionalRuleLabels }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.defaultRules.additionalRuleGroupLabels.nodeExporterRecording }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- end }} + - expr: |- + sum without (device) ( + rate(node_network_receive_bytes_total{job="node-exporter", device!="lo"}[5m]) + ) + record: instance:node_network_receive_bytes_excluding_lo:rate5m + {{- if or .Values.defaultRules.additionalRuleLabels .Values.defaultRules.additionalRuleGroupLabels.nodeExporterRecording }} + labels: + {{- with .Values.defaultRules.additionalRuleLabels }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.defaultRules.additionalRuleGroupLabels.nodeExporterRecording }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- end }} + - expr: |- + sum without (device) ( + rate(node_network_transmit_bytes_total{job="node-exporter", device!="lo"}[5m]) + ) + record: instance:node_network_transmit_bytes_excluding_lo:rate5m + {{- if or .Values.defaultRules.additionalRuleLabels .Values.defaultRules.additionalRuleGroupLabels.nodeExporterRecording }} + labels: + {{- with .Values.defaultRules.additionalRuleLabels }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.defaultRules.additionalRuleGroupLabels.nodeExporterRecording }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- end }} + - expr: |- + sum without (device) ( + rate(node_network_receive_drop_total{job="node-exporter", device!="lo"}[5m]) + ) + record: instance:node_network_receive_drop_excluding_lo:rate5m + {{- if or .Values.defaultRules.additionalRuleLabels .Values.defaultRules.additionalRuleGroupLabels.nodeExporterRecording }} + labels: + {{- with .Values.defaultRules.additionalRuleLabels }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.defaultRules.additionalRuleGroupLabels.nodeExporterRecording }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- end }} + - expr: |- + sum without (device) ( + rate(node_network_transmit_drop_total{job="node-exporter", device!="lo"}[5m]) + ) + record: instance:node_network_transmit_drop_excluding_lo:rate5m + {{- if or .Values.defaultRules.additionalRuleLabels .Values.defaultRules.additionalRuleGroupLabels.nodeExporterRecording }} + labels: + {{- with .Values.defaultRules.additionalRuleLabels }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.defaultRules.additionalRuleGroupLabels.nodeExporterRecording }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- end }} +{{- end }} \ No newline at end of file diff --git a/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/templates/prometheus/rules-1.14/node-exporter.yaml b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/templates/prometheus/rules-1.14/node-exporter.yaml new file mode 100644 index 0000000000..14738cedfa --- /dev/null +++ b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/templates/prometheus/rules-1.14/node-exporter.yaml @@ -0,0 +1,801 @@ +{{- /* +Generated from 'node-exporter' group from https://raw.githubusercontent.com/prometheus-operator/kube-prometheus/a8ba97a150c75be42010c75d10b720c55e182f1a/manifests/nodeExporter-prometheusRule.yaml +Do not change in-place! In order to change this file first read following link: +https://github.com/prometheus-community/helm-charts/tree/main/charts/kube-prometheus-stack/hack +*/ -}} +{{- $kubeTargetVersion := default .Capabilities.KubeVersion.GitVersion .Values.kubeTargetVersionOverride }} +{{- if and (semverCompare ">=1.14.0-0" $kubeTargetVersion) (semverCompare "<9.9.9-9" $kubeTargetVersion) .Values.defaultRules.create .Values.defaultRules.rules.nodeExporterAlerting }} +apiVersion: monitoring.coreos.com/v1 +kind: PrometheusRule +metadata: + name: {{ printf "%s-%s" (include "kube-prometheus-stack.fullname" .) "node-exporter" | trunc 63 | trimSuffix "-" }} + namespace: {{ template "kube-prometheus-stack.namespace" . }} + labels: + app: {{ template "kube-prometheus-stack.name" . }} +{{ include "kube-prometheus-stack.labels" . | indent 4 }} +{{- if .Values.defaultRules.labels }} +{{ toYaml .Values.defaultRules.labels | indent 4 }} +{{- end }} +{{- if .Values.defaultRules.annotations }} + annotations: +{{ toYaml .Values.defaultRules.annotations | indent 4 }} +{{- end }} +spec: + groups: + - name: node-exporter + rules: +{{- if not (.Values.defaultRules.disabled.NodeFilesystemSpaceFillingUp | default false) }} + - alert: NodeFilesystemSpaceFillingUp + annotations: +{{- if .Values.defaultRules.additionalRuleAnnotations }} +{{ toYaml .Values.defaultRules.additionalRuleAnnotations | indent 8 }} +{{- end }} +{{- if .Values.defaultRules.additionalRuleGroupAnnotations.nodeExporterAlerting }} +{{ toYaml .Values.defaultRules.additionalRuleGroupAnnotations.nodeExporterAlerting | indent 8 }} +{{- end }} + description: Filesystem on {{`{{`}} $labels.device {{`}}`}}, mounted on {{`{{`}} $labels.mountpoint {{`}}`}}, at {{`{{`}} $labels.instance {{`}}`}} has only {{`{{`}} printf "%.2f" $value {{`}}`}}% available space left and is filling up. + runbook_url: {{ .Values.defaultRules.runbookUrl }}/node/nodefilesystemspacefillingup + summary: Filesystem is predicted to run out of space within the next 24 hours. + expr: |- + ( + node_filesystem_avail_bytes{job="node-exporter",fstype!="",mountpoint!=""} / node_filesystem_size_bytes{job="node-exporter",fstype!="",mountpoint!=""} * 100 < 15 + and + predict_linear(node_filesystem_avail_bytes{job="node-exporter",fstype!="",mountpoint!=""}[6h], 24*60*60) < 0 + and + node_filesystem_readonly{job="node-exporter",fstype!="",mountpoint!=""} == 0 + ) + for: {{ dig "NodeFilesystemSpaceFillingUp" "for" "1h" .Values.customRules }} + {{- with .Values.defaultRules.keepFiringFor }} + keep_firing_for: "{{ . }}" + {{- end }} + labels: + severity: {{ dig "NodeFilesystemSpaceFillingUp" "severity" "warning" .Values.customRules }} + {{- if or .Values.defaultRules.additionalRuleLabels .Values.defaultRules.additionalRuleGroupLabels.nodeExporterAlerting }} + {{- with .Values.defaultRules.additionalRuleLabels }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.defaultRules.additionalRuleGroupLabels.nodeExporterAlerting }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- end }} +{{- end }} +{{- if not (.Values.defaultRules.disabled.NodeFilesystemSpaceFillingUp | default false) }} + - alert: NodeFilesystemSpaceFillingUp + annotations: +{{- if .Values.defaultRules.additionalRuleAnnotations }} +{{ toYaml .Values.defaultRules.additionalRuleAnnotations | indent 8 }} +{{- end }} +{{- if .Values.defaultRules.additionalRuleGroupAnnotations.nodeExporterAlerting }} +{{ toYaml .Values.defaultRules.additionalRuleGroupAnnotations.nodeExporterAlerting | indent 8 }} +{{- end }} + description: Filesystem on {{`{{`}} $labels.device {{`}}`}}, mounted on {{`{{`}} $labels.mountpoint {{`}}`}}, at {{`{{`}} $labels.instance {{`}}`}} has only {{`{{`}} printf "%.2f" $value {{`}}`}}% available space left and is filling up fast. + runbook_url: {{ .Values.defaultRules.runbookUrl }}/node/nodefilesystemspacefillingup + summary: Filesystem is predicted to run out of space within the next 4 hours. + expr: |- + ( + node_filesystem_avail_bytes{job="node-exporter",fstype!="",mountpoint!=""} / node_filesystem_size_bytes{job="node-exporter",fstype!="",mountpoint!=""} * 100 < 10 + and + predict_linear(node_filesystem_avail_bytes{job="node-exporter",fstype!="",mountpoint!=""}[6h], 4*60*60) < 0 + and + node_filesystem_readonly{job="node-exporter",fstype!="",mountpoint!=""} == 0 + ) + for: {{ dig "NodeFilesystemSpaceFillingUp" "for" "1h" .Values.customRules }} + {{- with .Values.defaultRules.keepFiringFor }} + keep_firing_for: "{{ . }}" + {{- end }} + labels: + severity: {{ dig "NodeFilesystemSpaceFillingUp" "severity" "critical" .Values.customRules }} + {{- if or .Values.defaultRules.additionalRuleLabels .Values.defaultRules.additionalRuleGroupLabels.nodeExporterAlerting }} + {{- with .Values.defaultRules.additionalRuleLabels }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.defaultRules.additionalRuleGroupLabels.nodeExporterAlerting }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- end }} +{{- end }} +{{- if not (.Values.defaultRules.disabled.NodeFilesystemAlmostOutOfSpace | default false) }} + - alert: NodeFilesystemAlmostOutOfSpace + annotations: +{{- if .Values.defaultRules.additionalRuleAnnotations }} +{{ toYaml .Values.defaultRules.additionalRuleAnnotations | indent 8 }} +{{- end }} +{{- if .Values.defaultRules.additionalRuleGroupAnnotations.nodeExporterAlerting }} +{{ toYaml .Values.defaultRules.additionalRuleGroupAnnotations.nodeExporterAlerting | indent 8 }} +{{- end }} + description: Filesystem on {{`{{`}} $labels.device {{`}}`}}, mounted on {{`{{`}} $labels.mountpoint {{`}}`}}, at {{`{{`}} $labels.instance {{`}}`}} has only {{`{{`}} printf "%.2f" $value {{`}}`}}% available space left. + runbook_url: {{ .Values.defaultRules.runbookUrl }}/node/nodefilesystemalmostoutofspace + summary: Filesystem has less than 5% space left. + expr: |- + ( + node_filesystem_avail_bytes{job="node-exporter",fstype!="",mountpoint!=""} / node_filesystem_size_bytes{job="node-exporter",fstype!="",mountpoint!=""} * 100 < 5 + and + node_filesystem_readonly{job="node-exporter",fstype!="",mountpoint!=""} == 0 + ) + for: {{ dig "NodeFilesystemAlmostOutOfSpace" "for" "30m" .Values.customRules }} + {{- with .Values.defaultRules.keepFiringFor }} + keep_firing_for: "{{ . }}" + {{- end }} + labels: + severity: {{ dig "NodeFilesystemAlmostOutOfSpace" "severity" "warning" .Values.customRules }} + {{- if or .Values.defaultRules.additionalRuleLabels .Values.defaultRules.additionalRuleGroupLabels.nodeExporterAlerting }} + {{- with .Values.defaultRules.additionalRuleLabels }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.defaultRules.additionalRuleGroupLabels.nodeExporterAlerting }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- end }} +{{- end }} +{{- if not (.Values.defaultRules.disabled.NodeFilesystemAlmostOutOfSpace | default false) }} + - alert: NodeFilesystemAlmostOutOfSpace + annotations: +{{- if .Values.defaultRules.additionalRuleAnnotations }} +{{ toYaml .Values.defaultRules.additionalRuleAnnotations | indent 8 }} +{{- end }} +{{- if .Values.defaultRules.additionalRuleGroupAnnotations.nodeExporterAlerting }} +{{ toYaml .Values.defaultRules.additionalRuleGroupAnnotations.nodeExporterAlerting | indent 8 }} +{{- end }} + description: Filesystem on {{`{{`}} $labels.device {{`}}`}}, mounted on {{`{{`}} $labels.mountpoint {{`}}`}}, at {{`{{`}} $labels.instance {{`}}`}} has only {{`{{`}} printf "%.2f" $value {{`}}`}}% available space left. + runbook_url: {{ .Values.defaultRules.runbookUrl }}/node/nodefilesystemalmostoutofspace + summary: Filesystem has less than 3% space left. + expr: |- + ( + node_filesystem_avail_bytes{job="node-exporter",fstype!="",mountpoint!=""} / node_filesystem_size_bytes{job="node-exporter",fstype!="",mountpoint!=""} * 100 < 3 + and + node_filesystem_readonly{job="node-exporter",fstype!="",mountpoint!=""} == 0 + ) + for: {{ dig "NodeFilesystemAlmostOutOfSpace" "for" "30m" .Values.customRules }} + {{- with .Values.defaultRules.keepFiringFor }} + keep_firing_for: "{{ . }}" + {{- end }} + labels: + severity: {{ dig "NodeFilesystemAlmostOutOfSpace" "severity" "critical" .Values.customRules }} + {{- if or .Values.defaultRules.additionalRuleLabels .Values.defaultRules.additionalRuleGroupLabels.nodeExporterAlerting }} + {{- with .Values.defaultRules.additionalRuleLabels }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.defaultRules.additionalRuleGroupLabels.nodeExporterAlerting }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- end }} +{{- end }} +{{- if not (.Values.defaultRules.disabled.NodeFilesystemFilesFillingUp | default false) }} + - alert: NodeFilesystemFilesFillingUp + annotations: +{{- if .Values.defaultRules.additionalRuleAnnotations }} +{{ toYaml .Values.defaultRules.additionalRuleAnnotations | indent 8 }} +{{- end }} +{{- if .Values.defaultRules.additionalRuleGroupAnnotations.nodeExporterAlerting }} +{{ toYaml .Values.defaultRules.additionalRuleGroupAnnotations.nodeExporterAlerting | indent 8 }} +{{- end }} + description: Filesystem on {{`{{`}} $labels.device {{`}}`}}, mounted on {{`{{`}} $labels.mountpoint {{`}}`}}, at {{`{{`}} $labels.instance {{`}}`}} has only {{`{{`}} printf "%.2f" $value {{`}}`}}% available inodes left and is filling up. + runbook_url: {{ .Values.defaultRules.runbookUrl }}/node/nodefilesystemfilesfillingup + summary: Filesystem is predicted to run out of inodes within the next 24 hours. + expr: |- + ( + node_filesystem_files_free{job="node-exporter",fstype!="",mountpoint!=""} / node_filesystem_files{job="node-exporter",fstype!="",mountpoint!=""} * 100 < 40 + and + predict_linear(node_filesystem_files_free{job="node-exporter",fstype!="",mountpoint!=""}[6h], 24*60*60) < 0 + and + node_filesystem_readonly{job="node-exporter",fstype!="",mountpoint!=""} == 0 + ) + for: {{ dig "NodeFilesystemFilesFillingUp" "for" "1h" .Values.customRules }} + {{- with .Values.defaultRules.keepFiringFor }} + keep_firing_for: "{{ . }}" + {{- end }} + labels: + severity: {{ dig "NodeFilesystemFilesFillingUp" "severity" "warning" .Values.customRules }} + {{- if or .Values.defaultRules.additionalRuleLabels .Values.defaultRules.additionalRuleGroupLabels.nodeExporterAlerting }} + {{- with .Values.defaultRules.additionalRuleLabels }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.defaultRules.additionalRuleGroupLabels.nodeExporterAlerting }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- end }} +{{- end }} +{{- if not (.Values.defaultRules.disabled.NodeFilesystemFilesFillingUp | default false) }} + - alert: NodeFilesystemFilesFillingUp + annotations: +{{- if .Values.defaultRules.additionalRuleAnnotations }} +{{ toYaml .Values.defaultRules.additionalRuleAnnotations | indent 8 }} +{{- end }} +{{- if .Values.defaultRules.additionalRuleGroupAnnotations.nodeExporterAlerting }} +{{ toYaml .Values.defaultRules.additionalRuleGroupAnnotations.nodeExporterAlerting | indent 8 }} +{{- end }} + description: Filesystem on {{`{{`}} $labels.device {{`}}`}}, mounted on {{`{{`}} $labels.mountpoint {{`}}`}}, at {{`{{`}} $labels.instance {{`}}`}} has only {{`{{`}} printf "%.2f" $value {{`}}`}}% available inodes left and is filling up fast. + runbook_url: {{ .Values.defaultRules.runbookUrl }}/node/nodefilesystemfilesfillingup + summary: Filesystem is predicted to run out of inodes within the next 4 hours. + expr: |- + ( + node_filesystem_files_free{job="node-exporter",fstype!="",mountpoint!=""} / node_filesystem_files{job="node-exporter",fstype!="",mountpoint!=""} * 100 < 20 + and + predict_linear(node_filesystem_files_free{job="node-exporter",fstype!="",mountpoint!=""}[6h], 4*60*60) < 0 + and + node_filesystem_readonly{job="node-exporter",fstype!="",mountpoint!=""} == 0 + ) + for: {{ dig "NodeFilesystemFilesFillingUp" "for" "1h" .Values.customRules }} + {{- with .Values.defaultRules.keepFiringFor }} + keep_firing_for: "{{ . }}" + {{- end }} + labels: + severity: {{ dig "NodeFilesystemFilesFillingUp" "severity" "critical" .Values.customRules }} + {{- if or .Values.defaultRules.additionalRuleLabels .Values.defaultRules.additionalRuleGroupLabels.nodeExporterAlerting }} + {{- with .Values.defaultRules.additionalRuleLabels }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.defaultRules.additionalRuleGroupLabels.nodeExporterAlerting }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- end }} +{{- end }} +{{- if not (.Values.defaultRules.disabled.NodeFilesystemAlmostOutOfFiles | default false) }} + - alert: NodeFilesystemAlmostOutOfFiles + annotations: +{{- if .Values.defaultRules.additionalRuleAnnotations }} +{{ toYaml .Values.defaultRules.additionalRuleAnnotations | indent 8 }} +{{- end }} +{{- if .Values.defaultRules.additionalRuleGroupAnnotations.nodeExporterAlerting }} +{{ toYaml .Values.defaultRules.additionalRuleGroupAnnotations.nodeExporterAlerting | indent 8 }} +{{- end }} + description: Filesystem on {{`{{`}} $labels.device {{`}}`}}, mounted on {{`{{`}} $labels.mountpoint {{`}}`}}, at {{`{{`}} $labels.instance {{`}}`}} has only {{`{{`}} printf "%.2f" $value {{`}}`}}% available inodes left. + runbook_url: {{ .Values.defaultRules.runbookUrl }}/node/nodefilesystemalmostoutoffiles + summary: Filesystem has less than 5% inodes left. + expr: |- + ( + node_filesystem_files_free{job="node-exporter",fstype!="",mountpoint!=""} / node_filesystem_files{job="node-exporter",fstype!="",mountpoint!=""} * 100 < 5 + and + node_filesystem_readonly{job="node-exporter",fstype!="",mountpoint!=""} == 0 + ) + for: {{ dig "NodeFilesystemAlmostOutOfFiles" "for" "1h" .Values.customRules }} + {{- with .Values.defaultRules.keepFiringFor }} + keep_firing_for: "{{ . }}" + {{- end }} + labels: + severity: {{ dig "NodeFilesystemAlmostOutOfFiles" "severity" "warning" .Values.customRules }} + {{- if or .Values.defaultRules.additionalRuleLabels .Values.defaultRules.additionalRuleGroupLabels.nodeExporterAlerting }} + {{- with .Values.defaultRules.additionalRuleLabels }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.defaultRules.additionalRuleGroupLabels.nodeExporterAlerting }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- end }} +{{- end }} +{{- if not (.Values.defaultRules.disabled.NodeFilesystemAlmostOutOfFiles | default false) }} + - alert: NodeFilesystemAlmostOutOfFiles + annotations: +{{- if .Values.defaultRules.additionalRuleAnnotations }} +{{ toYaml .Values.defaultRules.additionalRuleAnnotations | indent 8 }} +{{- end }} +{{- if .Values.defaultRules.additionalRuleGroupAnnotations.nodeExporterAlerting }} +{{ toYaml .Values.defaultRules.additionalRuleGroupAnnotations.nodeExporterAlerting | indent 8 }} +{{- end }} + description: Filesystem on {{`{{`}} $labels.device {{`}}`}}, mounted on {{`{{`}} $labels.mountpoint {{`}}`}}, at {{`{{`}} $labels.instance {{`}}`}} has only {{`{{`}} printf "%.2f" $value {{`}}`}}% available inodes left. + runbook_url: {{ .Values.defaultRules.runbookUrl }}/node/nodefilesystemalmostoutoffiles + summary: Filesystem has less than 3% inodes left. + expr: |- + ( + node_filesystem_files_free{job="node-exporter",fstype!="",mountpoint!=""} / node_filesystem_files{job="node-exporter",fstype!="",mountpoint!=""} * 100 < 3 + and + node_filesystem_readonly{job="node-exporter",fstype!="",mountpoint!=""} == 0 + ) + for: {{ dig "NodeFilesystemAlmostOutOfFiles" "for" "1h" .Values.customRules }} + {{- with .Values.defaultRules.keepFiringFor }} + keep_firing_for: "{{ . }}" + {{- end }} + labels: + severity: {{ dig "NodeFilesystemAlmostOutOfFiles" "severity" "critical" .Values.customRules }} + {{- if or .Values.defaultRules.additionalRuleLabels .Values.defaultRules.additionalRuleGroupLabels.nodeExporterAlerting }} + {{- with .Values.defaultRules.additionalRuleLabels }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.defaultRules.additionalRuleGroupLabels.nodeExporterAlerting }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- end }} +{{- end }} +{{- if not (.Values.defaultRules.disabled.NodeNetworkReceiveErrs | default false) }} + - alert: NodeNetworkReceiveErrs + annotations: +{{- if .Values.defaultRules.additionalRuleAnnotations }} +{{ toYaml .Values.defaultRules.additionalRuleAnnotations | indent 8 }} +{{- end }} +{{- if .Values.defaultRules.additionalRuleGroupAnnotations.nodeExporterAlerting }} +{{ toYaml .Values.defaultRules.additionalRuleGroupAnnotations.nodeExporterAlerting | indent 8 }} +{{- end }} + description: '{{`{{`}} $labels.instance {{`}}`}} interface {{`{{`}} $labels.device {{`}}`}} has encountered {{`{{`}} printf "%.0f" $value {{`}}`}} receive errors in the last two minutes.' + runbook_url: {{ .Values.defaultRules.runbookUrl }}/node/nodenetworkreceiveerrs + summary: Network interface is reporting many receive errors. + expr: rate(node_network_receive_errs_total{job="node-exporter"}[2m]) / rate(node_network_receive_packets_total{job="node-exporter"}[2m]) > 0.01 + for: {{ dig "NodeNetworkReceiveErrs" "for" "1h" .Values.customRules }} + {{- with .Values.defaultRules.keepFiringFor }} + keep_firing_for: "{{ . }}" + {{- end }} + labels: + severity: {{ dig "NodeNetworkReceiveErrs" "severity" "warning" .Values.customRules }} + {{- if or .Values.defaultRules.additionalRuleLabels .Values.defaultRules.additionalRuleGroupLabels.nodeExporterAlerting }} + {{- with .Values.defaultRules.additionalRuleLabels }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.defaultRules.additionalRuleGroupLabels.nodeExporterAlerting }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- end }} +{{- end }} +{{- if not (.Values.defaultRules.disabled.NodeNetworkTransmitErrs | default false) }} + - alert: NodeNetworkTransmitErrs + annotations: +{{- if .Values.defaultRules.additionalRuleAnnotations }} +{{ toYaml .Values.defaultRules.additionalRuleAnnotations | indent 8 }} +{{- end }} +{{- if .Values.defaultRules.additionalRuleGroupAnnotations.nodeExporterAlerting }} +{{ toYaml .Values.defaultRules.additionalRuleGroupAnnotations.nodeExporterAlerting | indent 8 }} +{{- end }} + description: '{{`{{`}} $labels.instance {{`}}`}} interface {{`{{`}} $labels.device {{`}}`}} has encountered {{`{{`}} printf "%.0f" $value {{`}}`}} transmit errors in the last two minutes.' + runbook_url: {{ .Values.defaultRules.runbookUrl }}/node/nodenetworktransmiterrs + summary: Network interface is reporting many transmit errors. + expr: rate(node_network_transmit_errs_total{job="node-exporter"}[2m]) / rate(node_network_transmit_packets_total{job="node-exporter"}[2m]) > 0.01 + for: {{ dig "NodeNetworkTransmitErrs" "for" "1h" .Values.customRules }} + {{- with .Values.defaultRules.keepFiringFor }} + keep_firing_for: "{{ . }}" + {{- end }} + labels: + severity: {{ dig "NodeNetworkTransmitErrs" "severity" "warning" .Values.customRules }} + {{- if or .Values.defaultRules.additionalRuleLabels .Values.defaultRules.additionalRuleGroupLabels.nodeExporterAlerting }} + {{- with .Values.defaultRules.additionalRuleLabels }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.defaultRules.additionalRuleGroupLabels.nodeExporterAlerting }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- end }} +{{- end }} +{{- if not (.Values.defaultRules.disabled.NodeHighNumberConntrackEntriesUsed | default false) }} + - alert: NodeHighNumberConntrackEntriesUsed + annotations: +{{- if .Values.defaultRules.additionalRuleAnnotations }} +{{ toYaml .Values.defaultRules.additionalRuleAnnotations | indent 8 }} +{{- end }} +{{- if .Values.defaultRules.additionalRuleGroupAnnotations.nodeExporterAlerting }} +{{ toYaml .Values.defaultRules.additionalRuleGroupAnnotations.nodeExporterAlerting | indent 8 }} +{{- end }} + description: '{{`{{`}} $value | humanizePercentage {{`}}`}} of conntrack entries are used.' + runbook_url: {{ .Values.defaultRules.runbookUrl }}/node/nodehighnumberconntrackentriesused + summary: Number of conntrack are getting close to the limit. + expr: (node_nf_conntrack_entries{job="node-exporter"} / node_nf_conntrack_entries_limit) > 0.75 + labels: + severity: {{ dig "NodeHighNumberConntrackEntriesUsed" "severity" "warning" .Values.customRules }} + {{- if or .Values.defaultRules.additionalRuleLabels .Values.defaultRules.additionalRuleGroupLabels.nodeExporterAlerting }} + {{- with .Values.defaultRules.additionalRuleLabels }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.defaultRules.additionalRuleGroupLabels.nodeExporterAlerting }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- end }} +{{- end }} +{{- if not (.Values.defaultRules.disabled.NodeTextFileCollectorScrapeError | default false) }} + - alert: NodeTextFileCollectorScrapeError + annotations: +{{- if .Values.defaultRules.additionalRuleAnnotations }} +{{ toYaml .Values.defaultRules.additionalRuleAnnotations | indent 8 }} +{{- end }} +{{- if .Values.defaultRules.additionalRuleGroupAnnotations.nodeExporterAlerting }} +{{ toYaml .Values.defaultRules.additionalRuleGroupAnnotations.nodeExporterAlerting | indent 8 }} +{{- end }} + description: Node Exporter text file collector on {{`{{`}} $labels.instance {{`}}`}} failed to scrape. + runbook_url: {{ .Values.defaultRules.runbookUrl }}/node/nodetextfilecollectorscrapeerror + summary: Node Exporter text file collector failed to scrape. + expr: node_textfile_scrape_error{job="node-exporter"} == 1 + labels: + severity: {{ dig "NodeTextFileCollectorScrapeError" "severity" "warning" .Values.customRules }} + {{- if or .Values.defaultRules.additionalRuleLabels .Values.defaultRules.additionalRuleGroupLabels.nodeExporterAlerting }} + {{- with .Values.defaultRules.additionalRuleLabels }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.defaultRules.additionalRuleGroupLabels.nodeExporterAlerting }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- end }} +{{- end }} +{{- if not (.Values.defaultRules.disabled.NodeClockSkewDetected | default false) }} + - alert: NodeClockSkewDetected + annotations: +{{- if .Values.defaultRules.additionalRuleAnnotations }} +{{ toYaml .Values.defaultRules.additionalRuleAnnotations | indent 8 }} +{{- end }} +{{- if .Values.defaultRules.additionalRuleGroupAnnotations.nodeExporterAlerting }} +{{ toYaml .Values.defaultRules.additionalRuleGroupAnnotations.nodeExporterAlerting | indent 8 }} +{{- end }} + description: Clock at {{`{{`}} $labels.instance {{`}}`}} is out of sync by more than 0.05s. Ensure NTP is configured correctly on this host. + runbook_url: {{ .Values.defaultRules.runbookUrl }}/node/nodeclockskewdetected + summary: Clock skew detected. + expr: |- + ( + node_timex_offset_seconds{job="node-exporter"} > 0.05 + and + deriv(node_timex_offset_seconds{job="node-exporter"}[5m]) >= 0 + ) + or + ( + node_timex_offset_seconds{job="node-exporter"} < -0.05 + and + deriv(node_timex_offset_seconds{job="node-exporter"}[5m]) <= 0 + ) + for: {{ dig "NodeClockSkewDetected" "for" "10m" .Values.customRules }} + {{- with .Values.defaultRules.keepFiringFor }} + keep_firing_for: "{{ . }}" + {{- end }} + labels: + severity: {{ dig "NodeClockSkewDetected" "severity" "warning" .Values.customRules }} + {{- if or .Values.defaultRules.additionalRuleLabels .Values.defaultRules.additionalRuleGroupLabels.nodeExporterAlerting }} + {{- with .Values.defaultRules.additionalRuleLabels }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.defaultRules.additionalRuleGroupLabels.nodeExporterAlerting }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- end }} +{{- end }} +{{- if not (.Values.defaultRules.disabled.NodeClockNotSynchronising | default false) }} + - alert: NodeClockNotSynchronising + annotations: +{{- if .Values.defaultRules.additionalRuleAnnotations }} +{{ toYaml .Values.defaultRules.additionalRuleAnnotations | indent 8 }} +{{- end }} +{{- if .Values.defaultRules.additionalRuleGroupAnnotations.nodeExporterAlerting }} +{{ toYaml .Values.defaultRules.additionalRuleGroupAnnotations.nodeExporterAlerting | indent 8 }} +{{- end }} + description: Clock at {{`{{`}} $labels.instance {{`}}`}} is not synchronising. Ensure NTP is configured on this host. + runbook_url: {{ .Values.defaultRules.runbookUrl }}/node/nodeclocknotsynchronising + summary: Clock not synchronising. + expr: |- + min_over_time(node_timex_sync_status{job="node-exporter"}[5m]) == 0 + and + node_timex_maxerror_seconds{job="node-exporter"} >= 16 + for: {{ dig "NodeClockNotSynchronising" "for" "10m" .Values.customRules }} + {{- with .Values.defaultRules.keepFiringFor }} + keep_firing_for: "{{ . }}" + {{- end }} + labels: + severity: {{ dig "NodeClockNotSynchronising" "severity" "warning" .Values.customRules }} + {{- if or .Values.defaultRules.additionalRuleLabels .Values.defaultRules.additionalRuleGroupLabels.nodeExporterAlerting }} + {{- with .Values.defaultRules.additionalRuleLabels }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.defaultRules.additionalRuleGroupLabels.nodeExporterAlerting }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- end }} +{{- end }} +{{- if not (.Values.defaultRules.disabled.NodeRAIDDegraded | default false) }} + - alert: NodeRAIDDegraded + annotations: +{{- if .Values.defaultRules.additionalRuleAnnotations }} +{{ toYaml .Values.defaultRules.additionalRuleAnnotations | indent 8 }} +{{- end }} +{{- if .Values.defaultRules.additionalRuleGroupAnnotations.nodeExporterAlerting }} +{{ toYaml .Values.defaultRules.additionalRuleGroupAnnotations.nodeExporterAlerting | indent 8 }} +{{- end }} + description: RAID array '{{`{{`}} $labels.device {{`}}`}}' at {{`{{`}} $labels.instance {{`}}`}} is in degraded state due to one or more disks failures. Number of spare drives is insufficient to fix issue automatically. + runbook_url: {{ .Values.defaultRules.runbookUrl }}/node/noderaiddegraded + summary: RAID Array is degraded. + expr: node_md_disks_required{job="node-exporter",device=~"(/dev/)?(mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|md.+|dasd.+)"} - ignoring (state) (node_md_disks{state="active",job="node-exporter",device=~"(/dev/)?(mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|md.+|dasd.+)"}) > 0 + for: {{ dig "NodeRAIDDegraded" "for" "15m" .Values.customRules }} + {{- with .Values.defaultRules.keepFiringFor }} + keep_firing_for: "{{ . }}" + {{- end }} + labels: + severity: {{ dig "NodeRAIDDegraded" "severity" "critical" .Values.customRules }} + {{- if or .Values.defaultRules.additionalRuleLabels .Values.defaultRules.additionalRuleGroupLabels.nodeExporterAlerting }} + {{- with .Values.defaultRules.additionalRuleLabels }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.defaultRules.additionalRuleGroupLabels.nodeExporterAlerting }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- end }} +{{- end }} +{{- if not (.Values.defaultRules.disabled.NodeRAIDDiskFailure | default false) }} + - alert: NodeRAIDDiskFailure + annotations: +{{- if .Values.defaultRules.additionalRuleAnnotations }} +{{ toYaml .Values.defaultRules.additionalRuleAnnotations | indent 8 }} +{{- end }} +{{- if .Values.defaultRules.additionalRuleGroupAnnotations.nodeExporterAlerting }} +{{ toYaml .Values.defaultRules.additionalRuleGroupAnnotations.nodeExporterAlerting | indent 8 }} +{{- end }} + description: At least one device in RAID array at {{`{{`}} $labels.instance {{`}}`}} failed. Array '{{`{{`}} $labels.device {{`}}`}}' needs attention and possibly a disk swap. + runbook_url: {{ .Values.defaultRules.runbookUrl }}/node/noderaiddiskfailure + summary: Failed device in RAID array. + expr: node_md_disks{state="failed",job="node-exporter",device=~"(/dev/)?(mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|md.+|dasd.+)"} > 0 + labels: + severity: {{ dig "NodeRAIDDiskFailure" "severity" "warning" .Values.customRules }} + {{- if or .Values.defaultRules.additionalRuleLabels .Values.defaultRules.additionalRuleGroupLabels.nodeExporterAlerting }} + {{- with .Values.defaultRules.additionalRuleLabels }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.defaultRules.additionalRuleGroupLabels.nodeExporterAlerting }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- end }} +{{- end }} +{{- if not (.Values.defaultRules.disabled.NodeFileDescriptorLimit | default false) }} + - alert: NodeFileDescriptorLimit + annotations: +{{- if .Values.defaultRules.additionalRuleAnnotations }} +{{ toYaml .Values.defaultRules.additionalRuleAnnotations | indent 8 }} +{{- end }} +{{- if .Values.defaultRules.additionalRuleGroupAnnotations.nodeExporterAlerting }} +{{ toYaml .Values.defaultRules.additionalRuleGroupAnnotations.nodeExporterAlerting | indent 8 }} +{{- end }} + description: File descriptors limit at {{`{{`}} $labels.instance {{`}}`}} is currently at {{`{{`}} printf "%.2f" $value {{`}}`}}%. + runbook_url: {{ .Values.defaultRules.runbookUrl }}/node/nodefiledescriptorlimit + summary: Kernel is predicted to exhaust file descriptors limit soon. + expr: |- + ( + node_filefd_allocated{job="node-exporter"} * 100 / node_filefd_maximum{job="node-exporter"} > 70 + ) + for: {{ dig "NodeFileDescriptorLimit" "for" "15m" .Values.customRules }} + {{- with .Values.defaultRules.keepFiringFor }} + keep_firing_for: "{{ . }}" + {{- end }} + labels: + severity: {{ dig "NodeFileDescriptorLimit" "severity" "warning" .Values.customRules }} + {{- if or .Values.defaultRules.additionalRuleLabels .Values.defaultRules.additionalRuleGroupLabels.nodeExporterAlerting }} + {{- with .Values.defaultRules.additionalRuleLabels }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.defaultRules.additionalRuleGroupLabels.nodeExporterAlerting }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- end }} +{{- end }} +{{- if not (.Values.defaultRules.disabled.NodeFileDescriptorLimit | default false) }} + - alert: NodeFileDescriptorLimit + annotations: +{{- if .Values.defaultRules.additionalRuleAnnotations }} +{{ toYaml .Values.defaultRules.additionalRuleAnnotations | indent 8 }} +{{- end }} +{{- if .Values.defaultRules.additionalRuleGroupAnnotations.nodeExporterAlerting }} +{{ toYaml .Values.defaultRules.additionalRuleGroupAnnotations.nodeExporterAlerting | indent 8 }} +{{- end }} + description: File descriptors limit at {{`{{`}} $labels.instance {{`}}`}} is currently at {{`{{`}} printf "%.2f" $value {{`}}`}}%. + runbook_url: {{ .Values.defaultRules.runbookUrl }}/node/nodefiledescriptorlimit + summary: Kernel is predicted to exhaust file descriptors limit soon. + expr: |- + ( + node_filefd_allocated{job="node-exporter"} * 100 / node_filefd_maximum{job="node-exporter"} > 90 + ) + for: {{ dig "NodeFileDescriptorLimit" "for" "15m" .Values.customRules }} + {{- with .Values.defaultRules.keepFiringFor }} + keep_firing_for: "{{ . }}" + {{- end }} + labels: + severity: {{ dig "NodeFileDescriptorLimit" "severity" "critical" .Values.customRules }} + {{- if or .Values.defaultRules.additionalRuleLabels .Values.defaultRules.additionalRuleGroupLabels.nodeExporterAlerting }} + {{- with .Values.defaultRules.additionalRuleLabels }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.defaultRules.additionalRuleGroupLabels.nodeExporterAlerting }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- end }} +{{- end }} +{{- if not (.Values.defaultRules.disabled.NodeCPUHighUsage | default false) }} + - alert: NodeCPUHighUsage + annotations: +{{- if .Values.defaultRules.additionalRuleAnnotations }} +{{ toYaml .Values.defaultRules.additionalRuleAnnotations | indent 8 }} +{{- end }} +{{- if .Values.defaultRules.additionalRuleGroupAnnotations.nodeExporterAlerting }} +{{ toYaml .Values.defaultRules.additionalRuleGroupAnnotations.nodeExporterAlerting | indent 8 }} +{{- end }} + description: 'CPU usage at {{`{{`}} $labels.instance {{`}}`}} has been above 90% for the last 15 minutes, is currently at {{`{{`}} printf "%.2f" $value {{`}}`}}%. + + ' + runbook_url: {{ .Values.defaultRules.runbookUrl }}/node/nodecpuhighusage + summary: High CPU usage. + expr: sum without(mode) (avg without (cpu) (rate(node_cpu_seconds_total{job="node-exporter", mode!="idle"}[2m]))) * 100 > 90 + for: {{ dig "NodeCPUHighUsage" "for" "15m" .Values.customRules }} + {{- with .Values.defaultRules.keepFiringFor }} + keep_firing_for: "{{ . }}" + {{- end }} + labels: + severity: {{ dig "NodeCPUHighUsage" "severity" "info" .Values.customRules }} + {{- if or .Values.defaultRules.additionalRuleLabels .Values.defaultRules.additionalRuleGroupLabels.nodeExporterAlerting }} + {{- with .Values.defaultRules.additionalRuleLabels }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.defaultRules.additionalRuleGroupLabels.nodeExporterAlerting }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- end }} +{{- end }} +{{- if not (.Values.defaultRules.disabled.NodeSystemSaturation | default false) }} + - alert: NodeSystemSaturation + annotations: +{{- if .Values.defaultRules.additionalRuleAnnotations }} +{{ toYaml .Values.defaultRules.additionalRuleAnnotations | indent 8 }} +{{- end }} +{{- if .Values.defaultRules.additionalRuleGroupAnnotations.nodeExporterAlerting }} +{{ toYaml .Values.defaultRules.additionalRuleGroupAnnotations.nodeExporterAlerting | indent 8 }} +{{- end }} + description: 'System load per core at {{`{{`}} $labels.instance {{`}}`}} has been above 2 for the last 15 minutes, is currently at {{`{{`}} printf "%.2f" $value {{`}}`}}. + + This might indicate this instance resources saturation and can cause it becoming unresponsive. + + ' + runbook_url: {{ .Values.defaultRules.runbookUrl }}/node/nodesystemsaturation + summary: System saturated, load per core is very high. + expr: |- + node_load1{job="node-exporter"} + / count without (cpu, mode) (node_cpu_seconds_total{job="node-exporter", mode="idle"}) > 2 + for: {{ dig "NodeSystemSaturation" "for" "15m" .Values.customRules }} + {{- with .Values.defaultRules.keepFiringFor }} + keep_firing_for: "{{ . }}" + {{- end }} + labels: + severity: {{ dig "NodeSystemSaturation" "severity" "warning" .Values.customRules }} + {{- if or .Values.defaultRules.additionalRuleLabels .Values.defaultRules.additionalRuleGroupLabels.nodeExporterAlerting }} + {{- with .Values.defaultRules.additionalRuleLabels }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.defaultRules.additionalRuleGroupLabels.nodeExporterAlerting }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- end }} +{{- end }} +{{- if not (.Values.defaultRules.disabled.NodeMemoryMajorPagesFaults | default false) }} + - alert: NodeMemoryMajorPagesFaults + annotations: +{{- if .Values.defaultRules.additionalRuleAnnotations }} +{{ toYaml .Values.defaultRules.additionalRuleAnnotations | indent 8 }} +{{- end }} +{{- if .Values.defaultRules.additionalRuleGroupAnnotations.nodeExporterAlerting }} +{{ toYaml .Values.defaultRules.additionalRuleGroupAnnotations.nodeExporterAlerting | indent 8 }} +{{- end }} + description: 'Memory major pages are occurring at very high rate at {{`{{`}} $labels.instance {{`}}`}}, 500 major page faults per second for the last 15 minutes, is currently at {{`{{`}} printf "%.2f" $value {{`}}`}}. + + Please check that there is enough memory available at this instance. + + ' + runbook_url: {{ .Values.defaultRules.runbookUrl }}/node/nodememorymajorpagesfaults + summary: Memory major page faults are occurring at very high rate. + expr: rate(node_vmstat_pgmajfault{job="node-exporter"}[5m]) > 500 + for: {{ dig "NodeMemoryMajorPagesFaults" "for" "15m" .Values.customRules }} + {{- with .Values.defaultRules.keepFiringFor }} + keep_firing_for: "{{ . }}" + {{- end }} + labels: + severity: {{ dig "NodeMemoryMajorPagesFaults" "severity" "warning" .Values.customRules }} + {{- if or .Values.defaultRules.additionalRuleLabels .Values.defaultRules.additionalRuleGroupLabels.nodeExporterAlerting }} + {{- with .Values.defaultRules.additionalRuleLabels }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.defaultRules.additionalRuleGroupLabels.nodeExporterAlerting }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- end }} +{{- end }} +{{- if not (.Values.defaultRules.disabled.NodeMemoryHighUtilization | default false) }} + - alert: NodeMemoryHighUtilization + annotations: +{{- if .Values.defaultRules.additionalRuleAnnotations }} +{{ toYaml .Values.defaultRules.additionalRuleAnnotations | indent 8 }} +{{- end }} +{{- if .Values.defaultRules.additionalRuleGroupAnnotations.nodeExporterAlerting }} +{{ toYaml .Values.defaultRules.additionalRuleGroupAnnotations.nodeExporterAlerting | indent 8 }} +{{- end }} + description: 'Memory is filling up at {{`{{`}} $labels.instance {{`}}`}}, has been above 90% for the last 15 minutes, is currently at {{`{{`}} printf "%.2f" $value {{`}}`}}%. + + ' + runbook_url: {{ .Values.defaultRules.runbookUrl }}/node/nodememoryhighutilization + summary: Host is running out of memory. + expr: 100 - (node_memory_MemAvailable_bytes{job="node-exporter"} / node_memory_MemTotal_bytes{job="node-exporter"} * 100) > 90 + for: {{ dig "NodeMemoryHighUtilization" "for" "15m" .Values.customRules }} + {{- with .Values.defaultRules.keepFiringFor }} + keep_firing_for: "{{ . }}" + {{- end }} + labels: + severity: {{ dig "NodeMemoryHighUtilization" "severity" "warning" .Values.customRules }} + {{- if or .Values.defaultRules.additionalRuleLabels .Values.defaultRules.additionalRuleGroupLabels.nodeExporterAlerting }} + {{- with .Values.defaultRules.additionalRuleLabels }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.defaultRules.additionalRuleGroupLabels.nodeExporterAlerting }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- end }} +{{- end }} +{{- if not (.Values.defaultRules.disabled.NodeDiskIOSaturation | default false) }} + - alert: NodeDiskIOSaturation + annotations: +{{- if .Values.defaultRules.additionalRuleAnnotations }} +{{ toYaml .Values.defaultRules.additionalRuleAnnotations | indent 8 }} +{{- end }} +{{- if .Values.defaultRules.additionalRuleGroupAnnotations.nodeExporterAlerting }} +{{ toYaml .Values.defaultRules.additionalRuleGroupAnnotations.nodeExporterAlerting | indent 8 }} +{{- end }} + description: 'Disk IO queue (aqu-sq) is high on {{`{{`}} $labels.device {{`}}`}} at {{`{{`}} $labels.instance {{`}}`}}, has been above 10 for the last 30 minutes, is currently at {{`{{`}} printf "%.2f" $value {{`}}`}}. + + This symptom might indicate disk saturation. + + ' + runbook_url: {{ .Values.defaultRules.runbookUrl }}/node/nodediskiosaturation + summary: Disk IO queue is high. + expr: rate(node_disk_io_time_weighted_seconds_total{job="node-exporter", device=~"(/dev/)?(mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|md.+|dasd.+)"}[5m]) > 10 + for: {{ dig "NodeDiskIOSaturation" "for" "30m" .Values.customRules }} + {{- with .Values.defaultRules.keepFiringFor }} + keep_firing_for: "{{ . }}" + {{- end }} + labels: + severity: {{ dig "NodeDiskIOSaturation" "severity" "warning" .Values.customRules }} + {{- if or .Values.defaultRules.additionalRuleLabels .Values.defaultRules.additionalRuleGroupLabels.nodeExporterAlerting }} + {{- with .Values.defaultRules.additionalRuleLabels }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.defaultRules.additionalRuleGroupLabels.nodeExporterAlerting }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- end }} +{{- end }} +{{- if not (.Values.defaultRules.disabled.NodeSystemdServiceFailed | default false) }} + - alert: NodeSystemdServiceFailed + annotations: +{{- if .Values.defaultRules.additionalRuleAnnotations }} +{{ toYaml .Values.defaultRules.additionalRuleAnnotations | indent 8 }} +{{- end }} +{{- if .Values.defaultRules.additionalRuleGroupAnnotations.nodeExporterAlerting }} +{{ toYaml .Values.defaultRules.additionalRuleGroupAnnotations.nodeExporterAlerting | indent 8 }} +{{- end }} + description: Systemd service {{`{{`}} $labels.name {{`}}`}} has entered failed state at {{`{{`}} $labels.instance {{`}}`}} + runbook_url: {{ .Values.defaultRules.runbookUrl }}/node/nodesystemdservicefailed + summary: Systemd service has entered failed state. + expr: node_systemd_unit_state{job="node-exporter", state="failed"} == 1 + for: {{ dig "NodeSystemdServiceFailed" "for" "5m" .Values.customRules }} + {{- with .Values.defaultRules.keepFiringFor }} + keep_firing_for: "{{ . }}" + {{- end }} + labels: + severity: {{ dig "NodeSystemdServiceFailed" "severity" "warning" .Values.customRules }} + {{- if or .Values.defaultRules.additionalRuleLabels .Values.defaultRules.additionalRuleGroupLabels.nodeExporterAlerting }} + {{- with .Values.defaultRules.additionalRuleLabels }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.defaultRules.additionalRuleGroupLabels.nodeExporterAlerting }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- end }} +{{- end }} +{{- if not (.Values.defaultRules.disabled.NodeBondingDegraded | default false) }} + - alert: NodeBondingDegraded + annotations: +{{- if .Values.defaultRules.additionalRuleAnnotations }} +{{ toYaml .Values.defaultRules.additionalRuleAnnotations | indent 8 }} +{{- end }} +{{- if .Values.defaultRules.additionalRuleGroupAnnotations.nodeExporterAlerting }} +{{ toYaml .Values.defaultRules.additionalRuleGroupAnnotations.nodeExporterAlerting | indent 8 }} +{{- end }} + description: Bonding interface {{`{{`}} $labels.master {{`}}`}} on {{`{{`}} $labels.instance {{`}}`}} is in degraded state due to one or more slave failures. + runbook_url: {{ .Values.defaultRules.runbookUrl }}/node/nodebondingdegraded + summary: Bonding interface is degraded + expr: (node_bonding_slaves - node_bonding_active) != 0 + for: {{ dig "NodeBondingDegraded" "for" "5m" .Values.customRules }} + {{- with .Values.defaultRules.keepFiringFor }} + keep_firing_for: "{{ . }}" + {{- end }} + labels: + severity: {{ dig "NodeBondingDegraded" "severity" "warning" .Values.customRules }} + {{- if or .Values.defaultRules.additionalRuleLabels .Values.defaultRules.additionalRuleGroupLabels.nodeExporterAlerting }} + {{- with .Values.defaultRules.additionalRuleLabels }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.defaultRules.additionalRuleGroupLabels.nodeExporterAlerting }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- end }} +{{- end }} +{{- end }} \ No newline at end of file diff --git a/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/templates/prometheus/rules-1.14/node-network.yaml b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/templates/prometheus/rules-1.14/node-network.yaml new file mode 100644 index 0000000000..8dc60ef66b --- /dev/null +++ b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/templates/prometheus/rules-1.14/node-network.yaml @@ -0,0 +1,55 @@ +{{- /* +Generated from 'node-network' group from https://raw.githubusercontent.com/prometheus-operator/kube-prometheus/a8ba97a150c75be42010c75d10b720c55e182f1a/manifests/kubePrometheus-prometheusRule.yaml +Do not change in-place! In order to change this file first read following link: +https://github.com/prometheus-community/helm-charts/tree/main/charts/kube-prometheus-stack/hack +*/ -}} +{{- $kubeTargetVersion := default .Capabilities.KubeVersion.GitVersion .Values.kubeTargetVersionOverride }} +{{- if and (semverCompare ">=1.14.0-0" $kubeTargetVersion) (semverCompare "<9.9.9-9" $kubeTargetVersion) .Values.defaultRules.create .Values.defaultRules.rules.network }} +apiVersion: monitoring.coreos.com/v1 +kind: PrometheusRule +metadata: + name: {{ printf "%s-%s" (include "kube-prometheus-stack.fullname" .) "node-network" | trunc 63 | trimSuffix "-" }} + namespace: {{ template "kube-prometheus-stack.namespace" . }} + labels: + app: {{ template "kube-prometheus-stack.name" . }} +{{ include "kube-prometheus-stack.labels" . | indent 4 }} +{{- if .Values.defaultRules.labels }} +{{ toYaml .Values.defaultRules.labels | indent 4 }} +{{- end }} +{{- if .Values.defaultRules.annotations }} + annotations: +{{ toYaml .Values.defaultRules.annotations | indent 4 }} +{{- end }} +spec: + groups: + - name: node-network + rules: +{{- if not (.Values.defaultRules.disabled.NodeNetworkInterfaceFlapping | default false) }} + - alert: NodeNetworkInterfaceFlapping + annotations: +{{- if .Values.defaultRules.additionalRuleAnnotations }} +{{ toYaml .Values.defaultRules.additionalRuleAnnotations | indent 8 }} +{{- end }} +{{- if .Values.defaultRules.additionalRuleGroupAnnotations.network }} +{{ toYaml .Values.defaultRules.additionalRuleGroupAnnotations.network | indent 8 }} +{{- end }} + description: Network interface "{{`{{`}} $labels.device {{`}}`}}" changing its up status often on node-exporter {{`{{`}} $labels.namespace {{`}}`}}/{{`{{`}} $labels.pod {{`}}`}} + runbook_url: {{ .Values.defaultRules.runbookUrl }}/general/nodenetworkinterfaceflapping + summary: Network interface is often changing its status + expr: changes(node_network_up{job="node-exporter",device!~"veth.+"}[2m]) > 2 + for: {{ dig "NodeNetworkInterfaceFlapping" "for" "2m" .Values.customRules }} + {{- with .Values.defaultRules.keepFiringFor }} + keep_firing_for: "{{ . }}" + {{- end }} + labels: + severity: {{ dig "NodeNetworkInterfaceFlapping" "severity" "warning" .Values.customRules }} + {{- if or .Values.defaultRules.additionalRuleLabels .Values.defaultRules.additionalRuleGroupLabels.network }} + {{- with .Values.defaultRules.additionalRuleLabels }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.defaultRules.additionalRuleGroupLabels.network }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- end }} +{{- end }} +{{- end }} \ No newline at end of file diff --git a/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/templates/prometheus/rules-1.14/node.rules.yaml b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/templates/prometheus/rules-1.14/node.rules.yaml new file mode 100644 index 0000000000..e2911b905e --- /dev/null +++ b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/templates/prometheus/rules-1.14/node.rules.yaml @@ -0,0 +1,109 @@ +{{- /* +Generated from 'node.rules' group from https://raw.githubusercontent.com/prometheus-operator/kube-prometheus/a8ba97a150c75be42010c75d10b720c55e182f1a/manifests/kubernetesControlPlane-prometheusRule.yaml +Do not change in-place! In order to change this file first read following link: +https://github.com/prometheus-community/helm-charts/tree/main/charts/kube-prometheus-stack/hack +*/ -}} +{{- $kubeTargetVersion := default .Capabilities.KubeVersion.GitVersion .Values.kubeTargetVersionOverride }} +{{- if and (semverCompare ">=1.14.0-0" $kubeTargetVersion) (semverCompare "<9.9.9-9" $kubeTargetVersion) .Values.defaultRules.create .Values.defaultRules.rules.node }} +{{- $kubeStateMetricsJob := include "kube-prometheus-stack-kube-state-metrics.name" . }} +apiVersion: monitoring.coreos.com/v1 +kind: PrometheusRule +metadata: + name: {{ printf "%s-%s" (include "kube-prometheus-stack.fullname" .) "node.rules" | trunc 63 | trimSuffix "-" }} + namespace: {{ template "kube-prometheus-stack.namespace" . }} + labels: + app: {{ template "kube-prometheus-stack.name" . }} +{{ include "kube-prometheus-stack.labels" . | indent 4 }} +{{- if .Values.defaultRules.labels }} +{{ toYaml .Values.defaultRules.labels | indent 4 }} +{{- end }} +{{- if .Values.defaultRules.annotations }} + annotations: +{{ toYaml .Values.defaultRules.annotations | indent 4 }} +{{- end }} +spec: + groups: + - name: node.rules + rules: + - expr: |- + topk by ({{ range $.Values.defaultRules.additionalAggregationLabels }}{{ . }},{{ end }}cluster, namespace, pod) (1, + max by ({{ range $.Values.defaultRules.additionalAggregationLabels }}{{ . }},{{ end }}cluster, node, namespace, pod) ( + label_replace(kube_pod_info{job="{{ $kubeStateMetricsJob }}",node!=""}, "pod", "$1", "pod", "(.*)") + )) + record: 'node_namespace_pod:kube_pod_info:' + {{- if or .Values.defaultRules.additionalRuleLabels .Values.defaultRules.additionalRuleGroupLabels.node }} + labels: + {{- with .Values.defaultRules.additionalRuleLabels }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.defaultRules.additionalRuleGroupLabels.node }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- end }} + - expr: |- + count by ({{ range $.Values.defaultRules.additionalAggregationLabels }}{{ . }},{{ end }}cluster, node) ( + node_cpu_seconds_total{mode="idle",job="node-exporter"} + * on ({{ range $.Values.defaultRules.additionalAggregationLabels }}{{ . }},{{ end }}cluster, namespace, pod) group_left(node) + topk by ({{ range $.Values.defaultRules.additionalAggregationLabels }}{{ . }},{{ end }}cluster, namespace, pod) (1, node_namespace_pod:kube_pod_info:) + ) + record: node:node_num_cpu:sum + {{- if or .Values.defaultRules.additionalRuleLabels .Values.defaultRules.additionalRuleGroupLabels.node }} + labels: + {{- with .Values.defaultRules.additionalRuleLabels }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.defaultRules.additionalRuleGroupLabels.node }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- end }} + - expr: |- + sum( + node_memory_MemAvailable_bytes{job="node-exporter"} or + ( + node_memory_Buffers_bytes{job="node-exporter"} + + node_memory_Cached_bytes{job="node-exporter"} + + node_memory_MemFree_bytes{job="node-exporter"} + + node_memory_Slab_bytes{job="node-exporter"} + ) + ) by ({{ range $.Values.defaultRules.additionalAggregationLabels }}{{ . }},{{ end }}cluster) + record: :node_memory_MemAvailable_bytes:sum + {{- if or .Values.defaultRules.additionalRuleLabels .Values.defaultRules.additionalRuleGroupLabels.node }} + labels: + {{- with .Values.defaultRules.additionalRuleLabels }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.defaultRules.additionalRuleGroupLabels.node }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- end }} + - expr: |- + avg by ({{ range $.Values.defaultRules.additionalAggregationLabels }}{{ . }},{{ end }}cluster, node) ( + sum without (mode) ( + rate(node_cpu_seconds_total{mode!="idle",mode!="iowait",mode!="steal",job="node-exporter"}[5m]) + ) + ) + record: node:node_cpu_utilization:ratio_rate5m + {{- if or .Values.defaultRules.additionalRuleLabels .Values.defaultRules.additionalRuleGroupLabels.node }} + labels: + {{- with .Values.defaultRules.additionalRuleLabels }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.defaultRules.additionalRuleGroupLabels.node }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- end }} + - expr: |- + avg by ({{ range $.Values.defaultRules.additionalAggregationLabels }}{{ . }},{{ end }}cluster) ( + node:node_cpu_utilization:ratio_rate5m + ) + record: cluster:node_cpu:ratio_rate5m + {{- if or .Values.defaultRules.additionalRuleLabels .Values.defaultRules.additionalRuleGroupLabels.node }} + labels: + {{- with .Values.defaultRules.additionalRuleLabels }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.defaultRules.additionalRuleGroupLabels.node }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- end }} +{{- end }} \ No newline at end of file diff --git a/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/templates/prometheus/rules-1.14/prometheus-operator.yaml b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/templates/prometheus/rules-1.14/prometheus-operator.yaml new file mode 100644 index 0000000000..1f288dffe3 --- /dev/null +++ b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/templates/prometheus/rules-1.14/prometheus-operator.yaml @@ -0,0 +1,253 @@ +{{- /* +Generated from 'prometheus-operator' group from https://raw.githubusercontent.com/prometheus-operator/kube-prometheus/a8ba97a150c75be42010c75d10b720c55e182f1a/manifests/prometheusOperator-prometheusRule.yaml +Do not change in-place! In order to change this file first read following link: +https://github.com/prometheus-community/helm-charts/tree/main/charts/kube-prometheus-stack/hack +*/ -}} +{{- $kubeTargetVersion := default .Capabilities.KubeVersion.GitVersion .Values.kubeTargetVersionOverride }} +{{- if and (semverCompare ">=1.14.0-0" $kubeTargetVersion) (semverCompare "<9.9.9-9" $kubeTargetVersion) .Values.defaultRules.create .Values.defaultRules.rules.prometheusOperator }} +{{- $operatorJob := printf "%s-%s" (include "kube-prometheus-stack.fullname" .) "operator" }} +{{- $namespace := printf "%s" (include "kube-prometheus-stack.namespace" .) }} +apiVersion: monitoring.coreos.com/v1 +kind: PrometheusRule +metadata: + name: {{ printf "%s-%s" (include "kube-prometheus-stack.fullname" .) "prometheus-operator" | trunc 63 | trimSuffix "-" }} + namespace: {{ template "kube-prometheus-stack.namespace" . }} + labels: + app: {{ template "kube-prometheus-stack.name" . }} +{{ include "kube-prometheus-stack.labels" . | indent 4 }} +{{- if .Values.defaultRules.labels }} +{{ toYaml .Values.defaultRules.labels | indent 4 }} +{{- end }} +{{- if .Values.defaultRules.annotations }} + annotations: +{{ toYaml .Values.defaultRules.annotations | indent 4 }} +{{- end }} +spec: + groups: + - name: prometheus-operator + rules: +{{- if not (.Values.defaultRules.disabled.PrometheusOperatorListErrors | default false) }} + - alert: PrometheusOperatorListErrors + annotations: +{{- if .Values.defaultRules.additionalRuleAnnotations }} +{{ toYaml .Values.defaultRules.additionalRuleAnnotations | indent 8 }} +{{- end }} +{{- if .Values.defaultRules.additionalRuleGroupAnnotations.prometheusOperator }} +{{ toYaml .Values.defaultRules.additionalRuleGroupAnnotations.prometheusOperator | indent 8 }} +{{- end }} + description: Errors while performing List operations in controller {{`{{`}}$labels.controller{{`}}`}} in {{`{{`}}$labels.namespace{{`}}`}} namespace. + runbook_url: {{ .Values.defaultRules.runbookUrl }}/prometheus-operator/prometheusoperatorlisterrors + summary: Errors while performing list operations in controller. + expr: (sum by ({{ range $.Values.defaultRules.additionalAggregationLabels }}{{ . }},{{ end }}cluster,controller,namespace) (rate(prometheus_operator_list_operations_failed_total{job="{{ $operatorJob }}",namespace="{{ $namespace }}"}[10m])) / sum by ({{ range $.Values.defaultRules.additionalAggregationLabels }}{{ . }},{{ end }}cluster,controller,namespace) (rate(prometheus_operator_list_operations_total{job="{{ $operatorJob }}",namespace="{{ $namespace }}"}[10m]))) > 0.4 + for: {{ dig "PrometheusOperatorListErrors" "for" "15m" .Values.customRules }} + {{- with .Values.defaultRules.keepFiringFor }} + keep_firing_for: "{{ . }}" + {{- end }} + labels: + severity: {{ dig "PrometheusOperatorListErrors" "severity" "warning" .Values.customRules }} + {{- if or .Values.defaultRules.additionalRuleLabels .Values.defaultRules.additionalRuleGroupLabels.prometheusOperator }} + {{- with .Values.defaultRules.additionalRuleLabels }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.defaultRules.additionalRuleGroupLabels.prometheusOperator }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- end }} +{{- end }} +{{- if not (.Values.defaultRules.disabled.PrometheusOperatorWatchErrors | default false) }} + - alert: PrometheusOperatorWatchErrors + annotations: +{{- if .Values.defaultRules.additionalRuleAnnotations }} +{{ toYaml .Values.defaultRules.additionalRuleAnnotations | indent 8 }} +{{- end }} +{{- if .Values.defaultRules.additionalRuleGroupAnnotations.prometheusOperator }} +{{ toYaml .Values.defaultRules.additionalRuleGroupAnnotations.prometheusOperator | indent 8 }} +{{- end }} + description: Errors while performing watch operations in controller {{`{{`}}$labels.controller{{`}}`}} in {{`{{`}}$labels.namespace{{`}}`}} namespace. + runbook_url: {{ .Values.defaultRules.runbookUrl }}/prometheus-operator/prometheusoperatorwatcherrors + summary: Errors while performing watch operations in controller. + expr: (sum by ({{ range $.Values.defaultRules.additionalAggregationLabels }}{{ . }},{{ end }}cluster,controller,namespace) (rate(prometheus_operator_watch_operations_failed_total{job="{{ $operatorJob }}",namespace="{{ $namespace }}"}[5m])) / sum by ({{ range $.Values.defaultRules.additionalAggregationLabels }}{{ . }},{{ end }}cluster,controller,namespace) (rate(prometheus_operator_watch_operations_total{job="{{ $operatorJob }}",namespace="{{ $namespace }}"}[5m]))) > 0.4 + for: {{ dig "PrometheusOperatorWatchErrors" "for" "15m" .Values.customRules }} + {{- with .Values.defaultRules.keepFiringFor }} + keep_firing_for: "{{ . }}" + {{- end }} + labels: + severity: {{ dig "PrometheusOperatorWatchErrors" "severity" "warning" .Values.customRules }} + {{- if or .Values.defaultRules.additionalRuleLabels .Values.defaultRules.additionalRuleGroupLabels.prometheusOperator }} + {{- with .Values.defaultRules.additionalRuleLabels }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.defaultRules.additionalRuleGroupLabels.prometheusOperator }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- end }} +{{- end }} +{{- if not (.Values.defaultRules.disabled.PrometheusOperatorSyncFailed | default false) }} + - alert: PrometheusOperatorSyncFailed + annotations: +{{- if .Values.defaultRules.additionalRuleAnnotations }} +{{ toYaml .Values.defaultRules.additionalRuleAnnotations | indent 8 }} +{{- end }} +{{- if .Values.defaultRules.additionalRuleGroupAnnotations.prometheusOperator }} +{{ toYaml .Values.defaultRules.additionalRuleGroupAnnotations.prometheusOperator | indent 8 }} +{{- end }} + description: Controller {{`{{`}} $labels.controller {{`}}`}} in {{`{{`}} $labels.namespace {{`}}`}} namespace fails to reconcile {{`{{`}} $value {{`}}`}} objects. + runbook_url: {{ .Values.defaultRules.runbookUrl }}/prometheus-operator/prometheusoperatorsyncfailed + summary: Last controller reconciliation failed + expr: min_over_time(prometheus_operator_syncs{status="failed",job="{{ $operatorJob }}",namespace="{{ $namespace }}"}[5m]) > 0 + for: {{ dig "PrometheusOperatorSyncFailed" "for" "10m" .Values.customRules }} + {{- with .Values.defaultRules.keepFiringFor }} + keep_firing_for: "{{ . }}" + {{- end }} + labels: + severity: {{ dig "PrometheusOperatorSyncFailed" "severity" "warning" .Values.customRules }} + {{- if or .Values.defaultRules.additionalRuleLabels .Values.defaultRules.additionalRuleGroupLabels.prometheusOperator }} + {{- with .Values.defaultRules.additionalRuleLabels }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.defaultRules.additionalRuleGroupLabels.prometheusOperator }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- end }} +{{- end }} +{{- if not (.Values.defaultRules.disabled.PrometheusOperatorReconcileErrors | default false) }} + - alert: PrometheusOperatorReconcileErrors + annotations: +{{- if .Values.defaultRules.additionalRuleAnnotations }} +{{ toYaml .Values.defaultRules.additionalRuleAnnotations | indent 8 }} +{{- end }} +{{- if .Values.defaultRules.additionalRuleGroupAnnotations.prometheusOperator }} +{{ toYaml .Values.defaultRules.additionalRuleGroupAnnotations.prometheusOperator | indent 8 }} +{{- end }} + description: '{{`{{`}} $value | humanizePercentage {{`}}`}} of reconciling operations failed for {{`{{`}} $labels.controller {{`}}`}} controller in {{`{{`}} $labels.namespace {{`}}`}} namespace.' + runbook_url: {{ .Values.defaultRules.runbookUrl }}/prometheus-operator/prometheusoperatorreconcileerrors + summary: Errors while reconciling objects. + expr: (sum by ({{ range $.Values.defaultRules.additionalAggregationLabels }}{{ . }},{{ end }}cluster,controller,namespace) (rate(prometheus_operator_reconcile_errors_total{job="{{ $operatorJob }}",namespace="{{ $namespace }}"}[5m]))) / (sum by ({{ range $.Values.defaultRules.additionalAggregationLabels }}{{ . }},{{ end }}cluster,controller,namespace) (rate(prometheus_operator_reconcile_operations_total{job="{{ $operatorJob }}",namespace="{{ $namespace }}"}[5m]))) > 0.1 + for: {{ dig "PrometheusOperatorReconcileErrors" "for" "10m" .Values.customRules }} + {{- with .Values.defaultRules.keepFiringFor }} + keep_firing_for: "{{ . }}" + {{- end }} + labels: + severity: {{ dig "PrometheusOperatorReconcileErrors" "severity" "warning" .Values.customRules }} + {{- if or .Values.defaultRules.additionalRuleLabels .Values.defaultRules.additionalRuleGroupLabels.prometheusOperator }} + {{- with .Values.defaultRules.additionalRuleLabels }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.defaultRules.additionalRuleGroupLabels.prometheusOperator }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- end }} +{{- end }} +{{- if not (.Values.defaultRules.disabled.PrometheusOperatorStatusUpdateErrors | default false) }} + - alert: PrometheusOperatorStatusUpdateErrors + annotations: +{{- if .Values.defaultRules.additionalRuleAnnotations }} +{{ toYaml .Values.defaultRules.additionalRuleAnnotations | indent 8 }} +{{- end }} +{{- if .Values.defaultRules.additionalRuleGroupAnnotations.prometheusOperator }} +{{ toYaml .Values.defaultRules.additionalRuleGroupAnnotations.prometheusOperator | indent 8 }} +{{- end }} + description: '{{`{{`}} $value | humanizePercentage {{`}}`}} of status update operations failed for {{`{{`}} $labels.controller {{`}}`}} controller in {{`{{`}} $labels.namespace {{`}}`}} namespace.' + runbook_url: {{ .Values.defaultRules.runbookUrl }}/prometheus-operator/prometheusoperatorstatusupdateerrors + summary: Errors while updating objects status. + expr: (sum by ({{ range $.Values.defaultRules.additionalAggregationLabels }}{{ . }},{{ end }}cluster,controller,namespace) (rate(prometheus_operator_status_update_errors_total{job="{{ $operatorJob }}",namespace="{{ $namespace }}"}[5m]))) / (sum by ({{ range $.Values.defaultRules.additionalAggregationLabels }}{{ . }},{{ end }}cluster,controller,namespace) (rate(prometheus_operator_status_update_operations_total{job="{{ $operatorJob }}",namespace="{{ $namespace }}"}[5m]))) > 0.1 + for: {{ dig "PrometheusOperatorStatusUpdateErrors" "for" "10m" .Values.customRules }} + {{- with .Values.defaultRules.keepFiringFor }} + keep_firing_for: "{{ . }}" + {{- end }} + labels: + severity: {{ dig "PrometheusOperatorStatusUpdateErrors" "severity" "warning" .Values.customRules }} + {{- if or .Values.defaultRules.additionalRuleLabels .Values.defaultRules.additionalRuleGroupLabels.prometheusOperator }} + {{- with .Values.defaultRules.additionalRuleLabels }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.defaultRules.additionalRuleGroupLabels.prometheusOperator }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- end }} +{{- end }} +{{- if not (.Values.defaultRules.disabled.PrometheusOperatorNodeLookupErrors | default false) }} + - alert: PrometheusOperatorNodeLookupErrors + annotations: +{{- if .Values.defaultRules.additionalRuleAnnotations }} +{{ toYaml .Values.defaultRules.additionalRuleAnnotations | indent 8 }} +{{- end }} +{{- if .Values.defaultRules.additionalRuleGroupAnnotations.prometheusOperator }} +{{ toYaml .Values.defaultRules.additionalRuleGroupAnnotations.prometheusOperator | indent 8 }} +{{- end }} + description: Errors while reconciling Prometheus in {{`{{`}} $labels.namespace {{`}}`}} Namespace. + runbook_url: {{ .Values.defaultRules.runbookUrl }}/prometheus-operator/prometheusoperatornodelookuperrors + summary: Errors while reconciling Prometheus. + expr: rate(prometheus_operator_node_address_lookup_errors_total{job="{{ $operatorJob }}",namespace="{{ $namespace }}"}[5m]) > 0.1 + for: {{ dig "PrometheusOperatorNodeLookupErrors" "for" "10m" .Values.customRules }} + {{- with .Values.defaultRules.keepFiringFor }} + keep_firing_for: "{{ . }}" + {{- end }} + labels: + severity: {{ dig "PrometheusOperatorNodeLookupErrors" "severity" "warning" .Values.customRules }} + {{- if or .Values.defaultRules.additionalRuleLabels .Values.defaultRules.additionalRuleGroupLabels.prometheusOperator }} + {{- with .Values.defaultRules.additionalRuleLabels }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.defaultRules.additionalRuleGroupLabels.prometheusOperator }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- end }} +{{- end }} +{{- if not (.Values.defaultRules.disabled.PrometheusOperatorNotReady | default false) }} + - alert: PrometheusOperatorNotReady + annotations: +{{- if .Values.defaultRules.additionalRuleAnnotations }} +{{ toYaml .Values.defaultRules.additionalRuleAnnotations | indent 8 }} +{{- end }} +{{- if .Values.defaultRules.additionalRuleGroupAnnotations.prometheusOperator }} +{{ toYaml .Values.defaultRules.additionalRuleGroupAnnotations.prometheusOperator | indent 8 }} +{{- end }} + description: Prometheus operator in {{`{{`}} $labels.namespace {{`}}`}} namespace isn't ready to reconcile {{`{{`}} $labels.controller {{`}}`}} resources. + runbook_url: {{ .Values.defaultRules.runbookUrl }}/prometheus-operator/prometheusoperatornotready + summary: Prometheus operator not ready + expr: min by ({{ range $.Values.defaultRules.additionalAggregationLabels }}{{ . }},{{ end }}cluster,controller,namespace) (max_over_time(prometheus_operator_ready{job="{{ $operatorJob }}",namespace="{{ $namespace }}"}[5m]) == 0) + for: {{ dig "PrometheusOperatorNotReady" "for" "5m" .Values.customRules }} + {{- with .Values.defaultRules.keepFiringFor }} + keep_firing_for: "{{ . }}" + {{- end }} + labels: + severity: {{ dig "PrometheusOperatorNotReady" "severity" "warning" .Values.customRules }} + {{- if or .Values.defaultRules.additionalRuleLabels .Values.defaultRules.additionalRuleGroupLabels.prometheusOperator }} + {{- with .Values.defaultRules.additionalRuleLabels }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.defaultRules.additionalRuleGroupLabels.prometheusOperator }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- end }} +{{- end }} +{{- if not (.Values.defaultRules.disabled.PrometheusOperatorRejectedResources | default false) }} + - alert: PrometheusOperatorRejectedResources + annotations: +{{- if .Values.defaultRules.additionalRuleAnnotations }} +{{ toYaml .Values.defaultRules.additionalRuleAnnotations | indent 8 }} +{{- end }} +{{- if .Values.defaultRules.additionalRuleGroupAnnotations.prometheusOperator }} +{{ toYaml .Values.defaultRules.additionalRuleGroupAnnotations.prometheusOperator | indent 8 }} +{{- end }} + description: Prometheus operator in {{`{{`}} $labels.namespace {{`}}`}} namespace rejected {{`{{`}} printf "%0.0f" $value {{`}}`}} {{`{{`}} $labels.controller {{`}}`}}/{{`{{`}} $labels.resource {{`}}`}} resources. + runbook_url: {{ .Values.defaultRules.runbookUrl }}/prometheus-operator/prometheusoperatorrejectedresources + summary: Resources rejected by Prometheus operator + expr: min_over_time(prometheus_operator_managed_resources{state="rejected",job="{{ $operatorJob }}",namespace="{{ $namespace }}"}[5m]) > 0 + for: {{ dig "PrometheusOperatorRejectedResources" "for" "5m" .Values.customRules }} + {{- with .Values.defaultRules.keepFiringFor }} + keep_firing_for: "{{ . }}" + {{- end }} + labels: + severity: {{ dig "PrometheusOperatorRejectedResources" "severity" "warning" .Values.customRules }} + {{- if or .Values.defaultRules.additionalRuleLabels .Values.defaultRules.additionalRuleGroupLabels.prometheusOperator }} + {{- with .Values.defaultRules.additionalRuleLabels }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.defaultRules.additionalRuleGroupLabels.prometheusOperator }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- end }} +{{- end }} +{{- end }} \ No newline at end of file diff --git a/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/templates/prometheus/rules-1.14/prometheus.yaml b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/templates/prometheus/rules-1.14/prometheus.yaml new file mode 100644 index 0000000000..9dfeb1f9db --- /dev/null +++ b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/templates/prometheus/rules-1.14/prometheus.yaml @@ -0,0 +1,707 @@ +{{- /* +Generated from 'prometheus' group from https://raw.githubusercontent.com/prometheus-operator/kube-prometheus/a8ba97a150c75be42010c75d10b720c55e182f1a/manifests/prometheus-prometheusRule.yaml +Do not change in-place! In order to change this file first read following link: +https://github.com/prometheus-community/helm-charts/tree/main/charts/kube-prometheus-stack/hack +*/ -}} +{{- $kubeTargetVersion := default .Capabilities.KubeVersion.GitVersion .Values.kubeTargetVersionOverride }} +{{- if and (semverCompare ">=1.14.0-0" $kubeTargetVersion) (semverCompare "<9.9.9-9" $kubeTargetVersion) .Values.defaultRules.create .Values.defaultRules.rules.prometheus }} +{{- $prometheusJob := printf "%s-%s" (include "kube-prometheus-stack.fullname" .) "prometheus" }} +{{- $namespace := printf "%s" (include "kube-prometheus-stack.namespace" .) }} +apiVersion: monitoring.coreos.com/v1 +kind: PrometheusRule +metadata: + name: {{ printf "%s-%s" (include "kube-prometheus-stack.fullname" .) "prometheus" | trunc 63 | trimSuffix "-" }} + namespace: {{ template "kube-prometheus-stack.namespace" . }} + labels: + app: {{ template "kube-prometheus-stack.name" . }} +{{ include "kube-prometheus-stack.labels" . | indent 4 }} +{{- if .Values.defaultRules.labels }} +{{ toYaml .Values.defaultRules.labels | indent 4 }} +{{- end }} +{{- if .Values.defaultRules.annotations }} + annotations: +{{ toYaml .Values.defaultRules.annotations | indent 4 }} +{{- end }} +spec: + groups: + - name: prometheus + rules: +{{- if not (.Values.defaultRules.disabled.PrometheusBadConfig | default false) }} + - alert: PrometheusBadConfig + annotations: +{{- if .Values.defaultRules.additionalRuleAnnotations }} +{{ toYaml .Values.defaultRules.additionalRuleAnnotations | indent 8 }} +{{- end }} +{{- if .Values.defaultRules.additionalRuleGroupAnnotations.prometheus }} +{{ toYaml .Values.defaultRules.additionalRuleGroupAnnotations.prometheus | indent 8 }} +{{- end }} + description: Prometheus {{`{{`}}$labels.namespace{{`}}`}}/{{`{{`}}$labels.pod{{`}}`}} has failed to reload its configuration. + runbook_url: {{ .Values.defaultRules.runbookUrl }}/prometheus/prometheusbadconfig + summary: Failed Prometheus configuration reload. + expr: |- + # Without max_over_time, failed scrapes could create false negatives, see + # https://www.robustperception.io/alerting-on-gauges-in-prometheus-2-0 for details. + max_over_time(prometheus_config_last_reload_successful{job="{{ $prometheusJob }}",namespace="{{ $namespace }}"}[5m]) == 0 + for: {{ dig "PrometheusBadConfig" "for" "10m" .Values.customRules }} + {{- with .Values.defaultRules.keepFiringFor }} + keep_firing_for: "{{ . }}" + {{- end }} + labels: + severity: {{ dig "PrometheusBadConfig" "severity" "critical" .Values.customRules }} + {{- if or .Values.defaultRules.additionalRuleLabels .Values.defaultRules.additionalRuleGroupLabels.prometheus }} + {{- with .Values.defaultRules.additionalRuleLabels }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.defaultRules.additionalRuleGroupLabels.prometheus }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- end }} +{{- end }} +{{- if not (.Values.defaultRules.disabled.PrometheusSDRefreshFailure | default false) }} + - alert: PrometheusSDRefreshFailure + annotations: +{{- if .Values.defaultRules.additionalRuleAnnotations }} +{{ toYaml .Values.defaultRules.additionalRuleAnnotations | indent 8 }} +{{- end }} +{{- if .Values.defaultRules.additionalRuleGroupAnnotations.prometheus }} +{{ toYaml .Values.defaultRules.additionalRuleGroupAnnotations.prometheus | indent 8 }} +{{- end }} + description: Prometheus {{`{{`}}$labels.namespace{{`}}`}}/{{`{{`}}$labels.pod{{`}}`}} has failed to refresh SD with mechanism {{`{{`}}$labels.mechanism{{`}}`}}. + runbook_url: {{ .Values.defaultRules.runbookUrl }}/prometheus/prometheussdrefreshfailure + summary: Failed Prometheus SD refresh. + expr: increase(prometheus_sd_refresh_failures_total{job="{{ $prometheusJob }}",namespace="{{ $namespace }}"}[10m]) > 0 + for: {{ dig "PrometheusSDRefreshFailure" "for" "20m" .Values.customRules }} + {{- with .Values.defaultRules.keepFiringFor }} + keep_firing_for: "{{ . }}" + {{- end }} + labels: + severity: {{ dig "PrometheusSDRefreshFailure" "severity" "warning" .Values.customRules }} + {{- if or .Values.defaultRules.additionalRuleLabels .Values.defaultRules.additionalRuleGroupLabels.prometheus }} + {{- with .Values.defaultRules.additionalRuleLabels }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.defaultRules.additionalRuleGroupLabels.prometheus }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- end }} +{{- end }} +{{- if not (.Values.defaultRules.disabled.PrometheusNotificationQueueRunningFull | default false) }} + - alert: PrometheusNotificationQueueRunningFull + annotations: +{{- if .Values.defaultRules.additionalRuleAnnotations }} +{{ toYaml .Values.defaultRules.additionalRuleAnnotations | indent 8 }} +{{- end }} +{{- if .Values.defaultRules.additionalRuleGroupAnnotations.prometheus }} +{{ toYaml .Values.defaultRules.additionalRuleGroupAnnotations.prometheus | indent 8 }} +{{- end }} + description: Alert notification queue of Prometheus {{`{{`}}$labels.namespace{{`}}`}}/{{`{{`}}$labels.pod{{`}}`}} is running full. + runbook_url: {{ .Values.defaultRules.runbookUrl }}/prometheus/prometheusnotificationqueuerunningfull + summary: Prometheus alert notification queue predicted to run full in less than 30m. + expr: |- + # Without min_over_time, failed scrapes could create false negatives, see + # https://www.robustperception.io/alerting-on-gauges-in-prometheus-2-0 for details. + ( + predict_linear(prometheus_notifications_queue_length{job="{{ $prometheusJob }}",namespace="{{ $namespace }}"}[5m], 60 * 30) + > + min_over_time(prometheus_notifications_queue_capacity{job="{{ $prometheusJob }}",namespace="{{ $namespace }}"}[5m]) + ) + for: {{ dig "PrometheusNotificationQueueRunningFull" "for" "15m" .Values.customRules }} + {{- with .Values.defaultRules.keepFiringFor }} + keep_firing_for: "{{ . }}" + {{- end }} + labels: + severity: {{ dig "PrometheusNotificationQueueRunningFull" "severity" "warning" .Values.customRules }} + {{- if or .Values.defaultRules.additionalRuleLabels .Values.defaultRules.additionalRuleGroupLabels.prometheus }} + {{- with .Values.defaultRules.additionalRuleLabels }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.defaultRules.additionalRuleGroupLabels.prometheus }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- end }} +{{- end }} +{{- if not (.Values.defaultRules.disabled.PrometheusErrorSendingAlertsToSomeAlertmanagers | default false) }} + - alert: PrometheusErrorSendingAlertsToSomeAlertmanagers + annotations: +{{- if .Values.defaultRules.additionalRuleAnnotations }} +{{ toYaml .Values.defaultRules.additionalRuleAnnotations | indent 8 }} +{{- end }} +{{- if .Values.defaultRules.additionalRuleGroupAnnotations.prometheus }} +{{ toYaml .Values.defaultRules.additionalRuleGroupAnnotations.prometheus | indent 8 }} +{{- end }} + description: '{{`{{`}} printf "%.1f" $value {{`}}`}}% errors while sending alerts from Prometheus {{`{{`}}$labels.namespace{{`}}`}}/{{`{{`}}$labels.pod{{`}}`}} to Alertmanager {{`{{`}}$labels.alertmanager{{`}}`}}.' + runbook_url: {{ .Values.defaultRules.runbookUrl }}/prometheus/prometheuserrorsendingalertstosomealertmanagers + summary: Prometheus has encountered more than 1% errors sending alerts to a specific Alertmanager. + expr: |- + ( + rate(prometheus_notifications_errors_total{job="{{ $prometheusJob }}",namespace="{{ $namespace }}"}[5m]) + / + rate(prometheus_notifications_sent_total{job="{{ $prometheusJob }}",namespace="{{ $namespace }}"}[5m]) + ) + * 100 + > 1 + for: {{ dig "PrometheusErrorSendingAlertsToSomeAlertmanagers" "for" "15m" .Values.customRules }} + {{- with .Values.defaultRules.keepFiringFor }} + keep_firing_for: "{{ . }}" + {{- end }} + labels: + severity: {{ dig "PrometheusErrorSendingAlertsToSomeAlertmanagers" "severity" "warning" .Values.customRules }} + {{- if or .Values.defaultRules.additionalRuleLabels .Values.defaultRules.additionalRuleGroupLabels.prometheus }} + {{- with .Values.defaultRules.additionalRuleLabels }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.defaultRules.additionalRuleGroupLabels.prometheus }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- end }} +{{- end }} +{{- if not (.Values.defaultRules.disabled.PrometheusNotConnectedToAlertmanagers | default false) }} + - alert: PrometheusNotConnectedToAlertmanagers + annotations: +{{- if .Values.defaultRules.additionalRuleAnnotations }} +{{ toYaml .Values.defaultRules.additionalRuleAnnotations | indent 8 }} +{{- end }} +{{- if .Values.defaultRules.additionalRuleGroupAnnotations.prometheus }} +{{ toYaml .Values.defaultRules.additionalRuleGroupAnnotations.prometheus | indent 8 }} +{{- end }} + description: Prometheus {{`{{`}}$labels.namespace{{`}}`}}/{{`{{`}}$labels.pod{{`}}`}} is not connected to any Alertmanagers. + runbook_url: {{ .Values.defaultRules.runbookUrl }}/prometheus/prometheusnotconnectedtoalertmanagers + summary: Prometheus is not connected to any Alertmanagers. + expr: |- + # Without max_over_time, failed scrapes could create false negatives, see + # https://www.robustperception.io/alerting-on-gauges-in-prometheus-2-0 for details. + max_over_time(prometheus_notifications_alertmanagers_discovered{job="{{ $prometheusJob }}",namespace="{{ $namespace }}"}[5m]) < 1 + for: {{ dig "PrometheusNotConnectedToAlertmanagers" "for" "10m" .Values.customRules }} + {{- with .Values.defaultRules.keepFiringFor }} + keep_firing_for: "{{ . }}" + {{- end }} + labels: + severity: {{ dig "PrometheusNotConnectedToAlertmanagers" "severity" "warning" .Values.customRules }} + {{- if or .Values.defaultRules.additionalRuleLabels .Values.defaultRules.additionalRuleGroupLabels.prometheus }} + {{- with .Values.defaultRules.additionalRuleLabels }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.defaultRules.additionalRuleGroupLabels.prometheus }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- end }} +{{- end }} +{{- if not (.Values.defaultRules.disabled.PrometheusTSDBReloadsFailing | default false) }} + - alert: PrometheusTSDBReloadsFailing + annotations: +{{- if .Values.defaultRules.additionalRuleAnnotations }} +{{ toYaml .Values.defaultRules.additionalRuleAnnotations | indent 8 }} +{{- end }} +{{- if .Values.defaultRules.additionalRuleGroupAnnotations.prometheus }} +{{ toYaml .Values.defaultRules.additionalRuleGroupAnnotations.prometheus | indent 8 }} +{{- end }} + description: Prometheus {{`{{`}}$labels.namespace{{`}}`}}/{{`{{`}}$labels.pod{{`}}`}} has detected {{`{{`}}$value | humanize{{`}}`}} reload failures over the last 3h. + runbook_url: {{ .Values.defaultRules.runbookUrl }}/prometheus/prometheustsdbreloadsfailing + summary: Prometheus has issues reloading blocks from disk. + expr: increase(prometheus_tsdb_reloads_failures_total{job="{{ $prometheusJob }}",namespace="{{ $namespace }}"}[3h]) > 0 + for: {{ dig "PrometheusTSDBReloadsFailing" "for" "4h" .Values.customRules }} + {{- with .Values.defaultRules.keepFiringFor }} + keep_firing_for: "{{ . }}" + {{- end }} + labels: + severity: {{ dig "PrometheusTSDBReloadsFailing" "severity" "warning" .Values.customRules }} + {{- if or .Values.defaultRules.additionalRuleLabels .Values.defaultRules.additionalRuleGroupLabels.prometheus }} + {{- with .Values.defaultRules.additionalRuleLabels }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.defaultRules.additionalRuleGroupLabels.prometheus }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- end }} +{{- end }} +{{- if not (.Values.defaultRules.disabled.PrometheusTSDBCompactionsFailing | default false) }} + - alert: PrometheusTSDBCompactionsFailing + annotations: +{{- if .Values.defaultRules.additionalRuleAnnotations }} +{{ toYaml .Values.defaultRules.additionalRuleAnnotations | indent 8 }} +{{- end }} +{{- if .Values.defaultRules.additionalRuleGroupAnnotations.prometheus }} +{{ toYaml .Values.defaultRules.additionalRuleGroupAnnotations.prometheus | indent 8 }} +{{- end }} + description: Prometheus {{`{{`}}$labels.namespace{{`}}`}}/{{`{{`}}$labels.pod{{`}}`}} has detected {{`{{`}}$value | humanize{{`}}`}} compaction failures over the last 3h. + runbook_url: {{ .Values.defaultRules.runbookUrl }}/prometheus/prometheustsdbcompactionsfailing + summary: Prometheus has issues compacting blocks. + expr: increase(prometheus_tsdb_compactions_failed_total{job="{{ $prometheusJob }}",namespace="{{ $namespace }}"}[3h]) > 0 + for: {{ dig "PrometheusTSDBCompactionsFailing" "for" "4h" .Values.customRules }} + {{- with .Values.defaultRules.keepFiringFor }} + keep_firing_for: "{{ . }}" + {{- end }} + labels: + severity: {{ dig "PrometheusTSDBCompactionsFailing" "severity" "warning" .Values.customRules }} + {{- if or .Values.defaultRules.additionalRuleLabels .Values.defaultRules.additionalRuleGroupLabels.prometheus }} + {{- with .Values.defaultRules.additionalRuleLabels }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.defaultRules.additionalRuleGroupLabels.prometheus }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- end }} +{{- end }} +{{- if not (.Values.defaultRules.disabled.PrometheusNotIngestingSamples | default false) }} + - alert: PrometheusNotIngestingSamples + annotations: +{{- if .Values.defaultRules.additionalRuleAnnotations }} +{{ toYaml .Values.defaultRules.additionalRuleAnnotations | indent 8 }} +{{- end }} +{{- if .Values.defaultRules.additionalRuleGroupAnnotations.prometheus }} +{{ toYaml .Values.defaultRules.additionalRuleGroupAnnotations.prometheus | indent 8 }} +{{- end }} + description: Prometheus {{`{{`}}$labels.namespace{{`}}`}}/{{`{{`}}$labels.pod{{`}}`}} is not ingesting samples. + runbook_url: {{ .Values.defaultRules.runbookUrl }}/prometheus/prometheusnotingestingsamples + summary: Prometheus is not ingesting samples. + expr: |- + ( + sum without(type) (rate(prometheus_tsdb_head_samples_appended_total{job="{{ $prometheusJob }}",namespace="{{ $namespace }}"}[5m])) <= 0 + and + ( + sum without(scrape_job) (prometheus_target_metadata_cache_entries{job="{{ $prometheusJob }}",namespace="{{ $namespace }}"}) > 0 + or + sum without(rule_group) (prometheus_rule_group_rules{job="{{ $prometheusJob }}",namespace="{{ $namespace }}"}) > 0 + ) + ) + for: {{ dig "PrometheusNotIngestingSamples" "for" "10m" .Values.customRules }} + {{- with .Values.defaultRules.keepFiringFor }} + keep_firing_for: "{{ . }}" + {{- end }} + labels: + severity: {{ dig "PrometheusNotIngestingSamples" "severity" "warning" .Values.customRules }} + {{- if or .Values.defaultRules.additionalRuleLabels .Values.defaultRules.additionalRuleGroupLabels.prometheus }} + {{- with .Values.defaultRules.additionalRuleLabels }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.defaultRules.additionalRuleGroupLabels.prometheus }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- end }} +{{- end }} +{{- if not (.Values.defaultRules.disabled.PrometheusDuplicateTimestamps | default false) }} + - alert: PrometheusDuplicateTimestamps + annotations: +{{- if .Values.defaultRules.additionalRuleAnnotations }} +{{ toYaml .Values.defaultRules.additionalRuleAnnotations | indent 8 }} +{{- end }} +{{- if .Values.defaultRules.additionalRuleGroupAnnotations.prometheus }} +{{ toYaml .Values.defaultRules.additionalRuleGroupAnnotations.prometheus | indent 8 }} +{{- end }} + description: Prometheus {{`{{`}}$labels.namespace{{`}}`}}/{{`{{`}}$labels.pod{{`}}`}} is dropping {{`{{`}} printf "%.4g" $value {{`}}`}} samples/s with different values but duplicated timestamp. + runbook_url: {{ .Values.defaultRules.runbookUrl }}/prometheus/prometheusduplicatetimestamps + summary: Prometheus is dropping samples with duplicate timestamps. + expr: rate(prometheus_target_scrapes_sample_duplicate_timestamp_total{job="{{ $prometheusJob }}",namespace="{{ $namespace }}"}[5m]) > 0 + for: {{ dig "PrometheusDuplicateTimestamps" "for" "10m" .Values.customRules }} + {{- with .Values.defaultRules.keepFiringFor }} + keep_firing_for: "{{ . }}" + {{- end }} + labels: + severity: {{ dig "PrometheusDuplicateTimestamps" "severity" "warning" .Values.customRules }} + {{- if or .Values.defaultRules.additionalRuleLabels .Values.defaultRules.additionalRuleGroupLabels.prometheus }} + {{- with .Values.defaultRules.additionalRuleLabels }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.defaultRules.additionalRuleGroupLabels.prometheus }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- end }} +{{- end }} +{{- if not (.Values.defaultRules.disabled.PrometheusOutOfOrderTimestamps | default false) }} + - alert: PrometheusOutOfOrderTimestamps + annotations: +{{- if .Values.defaultRules.additionalRuleAnnotations }} +{{ toYaml .Values.defaultRules.additionalRuleAnnotations | indent 8 }} +{{- end }} +{{- if .Values.defaultRules.additionalRuleGroupAnnotations.prometheus }} +{{ toYaml .Values.defaultRules.additionalRuleGroupAnnotations.prometheus | indent 8 }} +{{- end }} + description: Prometheus {{`{{`}}$labels.namespace{{`}}`}}/{{`{{`}}$labels.pod{{`}}`}} is dropping {{`{{`}} printf "%.4g" $value {{`}}`}} samples/s with timestamps arriving out of order. + runbook_url: {{ .Values.defaultRules.runbookUrl }}/prometheus/prometheusoutofordertimestamps + summary: Prometheus drops samples with out-of-order timestamps. + expr: rate(prometheus_target_scrapes_sample_out_of_order_total{job="{{ $prometheusJob }}",namespace="{{ $namespace }}"}[5m]) > 0 + for: {{ dig "PrometheusOutOfOrderTimestamps" "for" "10m" .Values.customRules }} + {{- with .Values.defaultRules.keepFiringFor }} + keep_firing_for: "{{ . }}" + {{- end }} + labels: + severity: {{ dig "PrometheusOutOfOrderTimestamps" "severity" "warning" .Values.customRules }} + {{- if or .Values.defaultRules.additionalRuleLabels .Values.defaultRules.additionalRuleGroupLabels.prometheus }} + {{- with .Values.defaultRules.additionalRuleLabels }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.defaultRules.additionalRuleGroupLabels.prometheus }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- end }} +{{- end }} +{{- if not (.Values.defaultRules.disabled.PrometheusRemoteStorageFailures | default false) }} + - alert: PrometheusRemoteStorageFailures + annotations: +{{- if .Values.defaultRules.additionalRuleAnnotations }} +{{ toYaml .Values.defaultRules.additionalRuleAnnotations | indent 8 }} +{{- end }} +{{- if .Values.defaultRules.additionalRuleGroupAnnotations.prometheus }} +{{ toYaml .Values.defaultRules.additionalRuleGroupAnnotations.prometheus | indent 8 }} +{{- end }} + description: Prometheus {{`{{`}}$labels.namespace{{`}}`}}/{{`{{`}}$labels.pod{{`}}`}} failed to send {{`{{`}} printf "%.1f" $value {{`}}`}}% of the samples to {{`{{`}} $labels.remote_name{{`}}`}}:{{`{{`}} $labels.url {{`}}`}} + runbook_url: {{ .Values.defaultRules.runbookUrl }}/prometheus/prometheusremotestoragefailures + summary: Prometheus fails to send samples to remote storage. + expr: |- + ( + (rate(prometheus_remote_storage_failed_samples_total{job="{{ $prometheusJob }}",namespace="{{ $namespace }}"}[5m]) or rate(prometheus_remote_storage_samples_failed_total{job="{{ $prometheusJob }}",namespace="{{ $namespace }}"}[5m])) + / + ( + (rate(prometheus_remote_storage_failed_samples_total{job="{{ $prometheusJob }}",namespace="{{ $namespace }}"}[5m]) or rate(prometheus_remote_storage_samples_failed_total{job="{{ $prometheusJob }}",namespace="{{ $namespace }}"}[5m])) + + + (rate(prometheus_remote_storage_succeeded_samples_total{job="{{ $prometheusJob }}",namespace="{{ $namespace }}"}[5m]) or rate(prometheus_remote_storage_samples_total{job="{{ $prometheusJob }}",namespace="{{ $namespace }}"}[5m])) + ) + ) + * 100 + > 1 + for: {{ dig "PrometheusRemoteStorageFailures" "for" "15m" .Values.customRules }} + {{- with .Values.defaultRules.keepFiringFor }} + keep_firing_for: "{{ . }}" + {{- end }} + labels: + severity: {{ dig "PrometheusRemoteStorageFailures" "severity" "critical" .Values.customRules }} + {{- if or .Values.defaultRules.additionalRuleLabels .Values.defaultRules.additionalRuleGroupLabels.prometheus }} + {{- with .Values.defaultRules.additionalRuleLabels }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.defaultRules.additionalRuleGroupLabels.prometheus }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- end }} +{{- end }} +{{- if not (.Values.defaultRules.disabled.PrometheusRemoteWriteBehind | default false) }} + - alert: PrometheusRemoteWriteBehind + annotations: +{{- if .Values.defaultRules.additionalRuleAnnotations }} +{{ toYaml .Values.defaultRules.additionalRuleAnnotations | indent 8 }} +{{- end }} +{{- if .Values.defaultRules.additionalRuleGroupAnnotations.prometheus }} +{{ toYaml .Values.defaultRules.additionalRuleGroupAnnotations.prometheus | indent 8 }} +{{- end }} + description: Prometheus {{`{{`}}$labels.namespace{{`}}`}}/{{`{{`}}$labels.pod{{`}}`}} remote write is {{`{{`}} printf "%.1f" $value {{`}}`}}s behind for {{`{{`}} $labels.remote_name{{`}}`}}:{{`{{`}} $labels.url {{`}}`}}. + runbook_url: {{ .Values.defaultRules.runbookUrl }}/prometheus/prometheusremotewritebehind + summary: Prometheus remote write is behind. + expr: |- + # Without max_over_time, failed scrapes could create false negatives, see + # https://www.robustperception.io/alerting-on-gauges-in-prometheus-2-0 for details. + ( + max_over_time(prometheus_remote_storage_highest_timestamp_in_seconds{job="{{ $prometheusJob }}",namespace="{{ $namespace }}"}[5m]) + - ignoring(remote_name, url) group_right + max_over_time(prometheus_remote_storage_queue_highest_sent_timestamp_seconds{job="{{ $prometheusJob }}",namespace="{{ $namespace }}"}[5m]) + ) + > 120 + for: {{ dig "PrometheusRemoteWriteBehind" "for" "15m" .Values.customRules }} + {{- with .Values.defaultRules.keepFiringFor }} + keep_firing_for: "{{ . }}" + {{- end }} + labels: + severity: {{ dig "PrometheusRemoteWriteBehind" "severity" "critical" .Values.customRules }} + {{- if or .Values.defaultRules.additionalRuleLabels .Values.defaultRules.additionalRuleGroupLabels.prometheus }} + {{- with .Values.defaultRules.additionalRuleLabels }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.defaultRules.additionalRuleGroupLabels.prometheus }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- end }} +{{- end }} +{{- if not (.Values.defaultRules.disabled.PrometheusRemoteWriteDesiredShards | default false) }} + - alert: PrometheusRemoteWriteDesiredShards + annotations: +{{- if .Values.defaultRules.additionalRuleAnnotations }} +{{ toYaml .Values.defaultRules.additionalRuleAnnotations | indent 8 }} +{{- end }} +{{- if .Values.defaultRules.additionalRuleGroupAnnotations.prometheus }} +{{ toYaml .Values.defaultRules.additionalRuleGroupAnnotations.prometheus | indent 8 }} +{{- end }} + description: Prometheus {{`{{`}}$labels.namespace{{`}}`}}/{{`{{`}}$labels.pod{{`}}`}} remote write desired shards calculation wants to run {{`{{`}} $value {{`}}`}} shards for queue {{`{{`}} $labels.remote_name{{`}}`}}:{{`{{`}} $labels.url {{`}}`}}, which is more than the max of {{`{{`}} printf `prometheus_remote_storage_shards_max{instance="%s",job="{{ $prometheusJob }}",namespace="{{ $namespace }}"}` $labels.instance | query | first | value {{`}}`}}. + runbook_url: {{ .Values.defaultRules.runbookUrl }}/prometheus/prometheusremotewritedesiredshards + summary: Prometheus remote write desired shards calculation wants to run more than configured max shards. + expr: |- + # Without max_over_time, failed scrapes could create false negatives, see + # https://www.robustperception.io/alerting-on-gauges-in-prometheus-2-0 for details. + ( + max_over_time(prometheus_remote_storage_shards_desired{job="{{ $prometheusJob }}",namespace="{{ $namespace }}"}[5m]) + > + max_over_time(prometheus_remote_storage_shards_max{job="{{ $prometheusJob }}",namespace="{{ $namespace }}"}[5m]) + ) + for: {{ dig "PrometheusRemoteWriteDesiredShards" "for" "15m" .Values.customRules }} + {{- with .Values.defaultRules.keepFiringFor }} + keep_firing_for: "{{ . }}" + {{- end }} + labels: + severity: {{ dig "PrometheusRemoteWriteDesiredShards" "severity" "warning" .Values.customRules }} + {{- if or .Values.defaultRules.additionalRuleLabels .Values.defaultRules.additionalRuleGroupLabels.prometheus }} + {{- with .Values.defaultRules.additionalRuleLabels }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.defaultRules.additionalRuleGroupLabels.prometheus }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- end }} +{{- end }} +{{- if not (.Values.defaultRules.disabled.PrometheusRuleFailures | default false) }} + - alert: PrometheusRuleFailures + annotations: +{{- if .Values.defaultRules.additionalRuleAnnotations }} +{{ toYaml .Values.defaultRules.additionalRuleAnnotations | indent 8 }} +{{- end }} +{{- if .Values.defaultRules.additionalRuleGroupAnnotations.prometheus }} +{{ toYaml .Values.defaultRules.additionalRuleGroupAnnotations.prometheus | indent 8 }} +{{- end }} + description: Prometheus {{`{{`}}$labels.namespace{{`}}`}}/{{`{{`}}$labels.pod{{`}}`}} has failed to evaluate {{`{{`}} printf "%.0f" $value {{`}}`}} rules in the last 5m. + runbook_url: {{ .Values.defaultRules.runbookUrl }}/prometheus/prometheusrulefailures + summary: Prometheus is failing rule evaluations. + expr: increase(prometheus_rule_evaluation_failures_total{job="{{ $prometheusJob }}",namespace="{{ $namespace }}"}[5m]) > 0 + for: {{ dig "PrometheusRuleFailures" "for" "15m" .Values.customRules }} + {{- with .Values.defaultRules.keepFiringFor }} + keep_firing_for: "{{ . }}" + {{- end }} + labels: + severity: {{ dig "PrometheusRuleFailures" "severity" "critical" .Values.customRules }} + {{- if or .Values.defaultRules.additionalRuleLabels .Values.defaultRules.additionalRuleGroupLabels.prometheus }} + {{- with .Values.defaultRules.additionalRuleLabels }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.defaultRules.additionalRuleGroupLabels.prometheus }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- end }} +{{- end }} +{{- if not (.Values.defaultRules.disabled.PrometheusMissingRuleEvaluations | default false) }} + - alert: PrometheusMissingRuleEvaluations + annotations: +{{- if .Values.defaultRules.additionalRuleAnnotations }} +{{ toYaml .Values.defaultRules.additionalRuleAnnotations | indent 8 }} +{{- end }} +{{- if .Values.defaultRules.additionalRuleGroupAnnotations.prometheus }} +{{ toYaml .Values.defaultRules.additionalRuleGroupAnnotations.prometheus | indent 8 }} +{{- end }} + description: Prometheus {{`{{`}}$labels.namespace{{`}}`}}/{{`{{`}}$labels.pod{{`}}`}} has missed {{`{{`}} printf "%.0f" $value {{`}}`}} rule group evaluations in the last 5m. + runbook_url: {{ .Values.defaultRules.runbookUrl }}/prometheus/prometheusmissingruleevaluations + summary: Prometheus is missing rule evaluations due to slow rule group evaluation. + expr: increase(prometheus_rule_group_iterations_missed_total{job="{{ $prometheusJob }}",namespace="{{ $namespace }}"}[5m]) > 0 + for: {{ dig "PrometheusMissingRuleEvaluations" "for" "15m" .Values.customRules }} + {{- with .Values.defaultRules.keepFiringFor }} + keep_firing_for: "{{ . }}" + {{- end }} + labels: + severity: {{ dig "PrometheusMissingRuleEvaluations" "severity" "warning" .Values.customRules }} + {{- if or .Values.defaultRules.additionalRuleLabels .Values.defaultRules.additionalRuleGroupLabels.prometheus }} + {{- with .Values.defaultRules.additionalRuleLabels }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.defaultRules.additionalRuleGroupLabels.prometheus }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- end }} +{{- end }} +{{- if not (.Values.defaultRules.disabled.PrometheusTargetLimitHit | default false) }} + - alert: PrometheusTargetLimitHit + annotations: +{{- if .Values.defaultRules.additionalRuleAnnotations }} +{{ toYaml .Values.defaultRules.additionalRuleAnnotations | indent 8 }} +{{- end }} +{{- if .Values.defaultRules.additionalRuleGroupAnnotations.prometheus }} +{{ toYaml .Values.defaultRules.additionalRuleGroupAnnotations.prometheus | indent 8 }} +{{- end }} + description: Prometheus {{`{{`}}$labels.namespace{{`}}`}}/{{`{{`}}$labels.pod{{`}}`}} has dropped {{`{{`}} printf "%.0f" $value {{`}}`}} targets because the number of targets exceeded the configured target_limit. + runbook_url: {{ .Values.defaultRules.runbookUrl }}/prometheus/prometheustargetlimithit + summary: Prometheus has dropped targets because some scrape configs have exceeded the targets limit. + expr: increase(prometheus_target_scrape_pool_exceeded_target_limit_total{job="{{ $prometheusJob }}",namespace="{{ $namespace }}"}[5m]) > 0 + for: {{ dig "PrometheusTargetLimitHit" "for" "15m" .Values.customRules }} + {{- with .Values.defaultRules.keepFiringFor }} + keep_firing_for: "{{ . }}" + {{- end }} + labels: + severity: {{ dig "PrometheusTargetLimitHit" "severity" "warning" .Values.customRules }} + {{- if or .Values.defaultRules.additionalRuleLabels .Values.defaultRules.additionalRuleGroupLabels.prometheus }} + {{- with .Values.defaultRules.additionalRuleLabels }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.defaultRules.additionalRuleGroupLabels.prometheus }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- end }} +{{- end }} +{{- if not (.Values.defaultRules.disabled.PrometheusLabelLimitHit | default false) }} + - alert: PrometheusLabelLimitHit + annotations: +{{- if .Values.defaultRules.additionalRuleAnnotations }} +{{ toYaml .Values.defaultRules.additionalRuleAnnotations | indent 8 }} +{{- end }} +{{- if .Values.defaultRules.additionalRuleGroupAnnotations.prometheus }} +{{ toYaml .Values.defaultRules.additionalRuleGroupAnnotations.prometheus | indent 8 }} +{{- end }} + description: Prometheus {{`{{`}}$labels.namespace{{`}}`}}/{{`{{`}}$labels.pod{{`}}`}} has dropped {{`{{`}} printf "%.0f" $value {{`}}`}} targets because some samples exceeded the configured label_limit, label_name_length_limit or label_value_length_limit. + runbook_url: {{ .Values.defaultRules.runbookUrl }}/prometheus/prometheuslabellimithit + summary: Prometheus has dropped targets because some scrape configs have exceeded the labels limit. + expr: increase(prometheus_target_scrape_pool_exceeded_label_limits_total{job="{{ $prometheusJob }}",namespace="{{ $namespace }}"}[5m]) > 0 + for: {{ dig "PrometheusLabelLimitHit" "for" "15m" .Values.customRules }} + {{- with .Values.defaultRules.keepFiringFor }} + keep_firing_for: "{{ . }}" + {{- end }} + labels: + severity: {{ dig "PrometheusLabelLimitHit" "severity" "warning" .Values.customRules }} + {{- if or .Values.defaultRules.additionalRuleLabels .Values.defaultRules.additionalRuleGroupLabels.prometheus }} + {{- with .Values.defaultRules.additionalRuleLabels }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.defaultRules.additionalRuleGroupLabels.prometheus }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- end }} +{{- end }} +{{- if not (.Values.defaultRules.disabled.PrometheusScrapeBodySizeLimitHit | default false) }} + - alert: PrometheusScrapeBodySizeLimitHit + annotations: +{{- if .Values.defaultRules.additionalRuleAnnotations }} +{{ toYaml .Values.defaultRules.additionalRuleAnnotations | indent 8 }} +{{- end }} +{{- if .Values.defaultRules.additionalRuleGroupAnnotations.prometheus }} +{{ toYaml .Values.defaultRules.additionalRuleGroupAnnotations.prometheus | indent 8 }} +{{- end }} + description: Prometheus {{`{{`}}$labels.namespace{{`}}`}}/{{`{{`}}$labels.pod{{`}}`}} has failed {{`{{`}} printf "%.0f" $value {{`}}`}} scrapes in the last 5m because some targets exceeded the configured body_size_limit. + runbook_url: {{ .Values.defaultRules.runbookUrl }}/prometheus/prometheusscrapebodysizelimithit + summary: Prometheus has dropped some targets that exceeded body size limit. + expr: increase(prometheus_target_scrapes_exceeded_body_size_limit_total{job="{{ $prometheusJob }}",namespace="{{ $namespace }}"}[5m]) > 0 + for: {{ dig "PrometheusScrapeBodySizeLimitHit" "for" "15m" .Values.customRules }} + {{- with .Values.defaultRules.keepFiringFor }} + keep_firing_for: "{{ . }}" + {{- end }} + labels: + severity: {{ dig "PrometheusScrapeBodySizeLimitHit" "severity" "warning" .Values.customRules }} + {{- if or .Values.defaultRules.additionalRuleLabels .Values.defaultRules.additionalRuleGroupLabels.prometheus }} + {{- with .Values.defaultRules.additionalRuleLabels }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.defaultRules.additionalRuleGroupLabels.prometheus }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- end }} +{{- end }} +{{- if not (.Values.defaultRules.disabled.PrometheusScrapeSampleLimitHit | default false) }} + - alert: PrometheusScrapeSampleLimitHit + annotations: +{{- if .Values.defaultRules.additionalRuleAnnotations }} +{{ toYaml .Values.defaultRules.additionalRuleAnnotations | indent 8 }} +{{- end }} +{{- if .Values.defaultRules.additionalRuleGroupAnnotations.prometheus }} +{{ toYaml .Values.defaultRules.additionalRuleGroupAnnotations.prometheus | indent 8 }} +{{- end }} + description: Prometheus {{`{{`}}$labels.namespace{{`}}`}}/{{`{{`}}$labels.pod{{`}}`}} has failed {{`{{`}} printf "%.0f" $value {{`}}`}} scrapes in the last 5m because some targets exceeded the configured sample_limit. + runbook_url: {{ .Values.defaultRules.runbookUrl }}/prometheus/prometheusscrapesamplelimithit + summary: Prometheus has failed scrapes that have exceeded the configured sample limit. + expr: increase(prometheus_target_scrapes_exceeded_sample_limit_total{job="{{ $prometheusJob }}",namespace="{{ $namespace }}"}[5m]) > 0 + for: {{ dig "PrometheusScrapeSampleLimitHit" "for" "15m" .Values.customRules }} + {{- with .Values.defaultRules.keepFiringFor }} + keep_firing_for: "{{ . }}" + {{- end }} + labels: + severity: {{ dig "PrometheusScrapeSampleLimitHit" "severity" "warning" .Values.customRules }} + {{- if or .Values.defaultRules.additionalRuleLabels .Values.defaultRules.additionalRuleGroupLabels.prometheus }} + {{- with .Values.defaultRules.additionalRuleLabels }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.defaultRules.additionalRuleGroupLabels.prometheus }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- end }} +{{- end }} +{{- if not (.Values.defaultRules.disabled.PrometheusTargetSyncFailure | default false) }} + - alert: PrometheusTargetSyncFailure + annotations: +{{- if .Values.defaultRules.additionalRuleAnnotations }} +{{ toYaml .Values.defaultRules.additionalRuleAnnotations | indent 8 }} +{{- end }} +{{- if .Values.defaultRules.additionalRuleGroupAnnotations.prometheus }} +{{ toYaml .Values.defaultRules.additionalRuleGroupAnnotations.prometheus | indent 8 }} +{{- end }} + description: '{{`{{`}} printf "%.0f" $value {{`}}`}} targets in Prometheus {{`{{`}}$labels.namespace{{`}}`}}/{{`{{`}}$labels.pod{{`}}`}} have failed to sync because invalid configuration was supplied.' + runbook_url: {{ .Values.defaultRules.runbookUrl }}/prometheus/prometheustargetsyncfailure + summary: Prometheus has failed to sync targets. + expr: increase(prometheus_target_sync_failed_total{job="{{ $prometheusJob }}",namespace="{{ $namespace }}"}[30m]) > 0 + for: {{ dig "PrometheusTargetSyncFailure" "for" "5m" .Values.customRules }} + {{- with .Values.defaultRules.keepFiringFor }} + keep_firing_for: "{{ . }}" + {{- end }} + labels: + severity: {{ dig "PrometheusTargetSyncFailure" "severity" "critical" .Values.customRules }} + {{- if or .Values.defaultRules.additionalRuleLabels .Values.defaultRules.additionalRuleGroupLabels.prometheus }} + {{- with .Values.defaultRules.additionalRuleLabels }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.defaultRules.additionalRuleGroupLabels.prometheus }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- end }} +{{- end }} +{{- if not (.Values.defaultRules.disabled.PrometheusHighQueryLoad | default false) }} + - alert: PrometheusHighQueryLoad + annotations: +{{- if .Values.defaultRules.additionalRuleAnnotations }} +{{ toYaml .Values.defaultRules.additionalRuleAnnotations | indent 8 }} +{{- end }} +{{- if .Values.defaultRules.additionalRuleGroupAnnotations.prometheus }} +{{ toYaml .Values.defaultRules.additionalRuleGroupAnnotations.prometheus | indent 8 }} +{{- end }} + description: Prometheus {{`{{`}}$labels.namespace{{`}}`}}/{{`{{`}}$labels.pod{{`}}`}} query API has less than 20% available capacity in its query engine for the last 15 minutes. + runbook_url: {{ .Values.defaultRules.runbookUrl }}/prometheus/prometheushighqueryload + summary: Prometheus is reaching its maximum capacity serving concurrent requests. + expr: avg_over_time(prometheus_engine_queries{job="{{ $prometheusJob }}",namespace="{{ $namespace }}"}[5m]) / max_over_time(prometheus_engine_queries_concurrent_max{job="{{ $prometheusJob }}",namespace="{{ $namespace }}"}[5m]) > 0.8 + for: {{ dig "PrometheusHighQueryLoad" "for" "15m" .Values.customRules }} + {{- with .Values.defaultRules.keepFiringFor }} + keep_firing_for: "{{ . }}" + {{- end }} + labels: + severity: {{ dig "PrometheusHighQueryLoad" "severity" "warning" .Values.customRules }} + {{- if or .Values.defaultRules.additionalRuleLabels .Values.defaultRules.additionalRuleGroupLabels.prometheus }} + {{- with .Values.defaultRules.additionalRuleLabels }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.defaultRules.additionalRuleGroupLabels.prometheus }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- end }} +{{- end }} +{{- if not (.Values.defaultRules.disabled.PrometheusErrorSendingAlertsToAnyAlertmanager | default false) }} + - alert: PrometheusErrorSendingAlertsToAnyAlertmanager + annotations: +{{- if .Values.defaultRules.additionalRuleAnnotations }} +{{ toYaml .Values.defaultRules.additionalRuleAnnotations | indent 8 }} +{{- end }} +{{- if .Values.defaultRules.additionalRuleGroupAnnotations.prometheus }} +{{ toYaml .Values.defaultRules.additionalRuleGroupAnnotations.prometheus | indent 8 }} +{{- end }} + description: '{{`{{`}} printf "%.1f" $value {{`}}`}}% minimum errors while sending alerts from Prometheus {{`{{`}}$labels.namespace{{`}}`}}/{{`{{`}}$labels.pod{{`}}`}} to any Alertmanager.' + runbook_url: {{ .Values.defaultRules.runbookUrl }}/prometheus/prometheuserrorsendingalertstoanyalertmanager + summary: Prometheus encounters more than 3% errors sending alerts to any Alertmanager. + expr: |- + min without (alertmanager) ( + rate(prometheus_notifications_errors_total{job="{{ $prometheusJob }}",namespace="{{ $namespace }}",alertmanager!~``}[5m]) + / + rate(prometheus_notifications_sent_total{job="{{ $prometheusJob }}",namespace="{{ $namespace }}",alertmanager!~``}[5m]) + ) + * 100 + > 3 + for: {{ dig "PrometheusErrorSendingAlertsToAnyAlertmanager" "for" "15m" .Values.customRules }} + {{- with .Values.defaultRules.keepFiringFor }} + keep_firing_for: "{{ . }}" + {{- end }} + labels: + severity: {{ dig "PrometheusErrorSendingAlertsToAnyAlertmanager" "severity" "critical" .Values.customRules }} + {{- if or .Values.defaultRules.additionalRuleLabels .Values.defaultRules.additionalRuleGroupLabels.prometheus }} + {{- with .Values.defaultRules.additionalRuleLabels }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.defaultRules.additionalRuleGroupLabels.prometheus }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- end }} +{{- end }} +{{- end }} \ No newline at end of file diff --git a/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/templates/prometheus/rules-1.14/windows.node.rules.yaml b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/templates/prometheus/rules-1.14/windows.node.rules.yaml new file mode 100644 index 0000000000..7c25553861 --- /dev/null +++ b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/templates/prometheus/rules-1.14/windows.node.rules.yaml @@ -0,0 +1,301 @@ +{{- /* +Generated from 'windows.node.rules' group from https://github.com/kubernetes-monitoring/kubernetes-mixin.git +Do not change in-place! In order to change this file first read following link: +https://github.com/prometheus-community/helm-charts/tree/main/charts/kube-prometheus-stack/hack +*/ -}} +{{- $kubeTargetVersion := default .Capabilities.KubeVersion.GitVersion .Values.kubeTargetVersionOverride }} +{{- if and (semverCompare ">=1.14.0-0" $kubeTargetVersion) (semverCompare "<9.9.9-9" $kubeTargetVersion) .Values.defaultRules.create .Values.windowsMonitoring.enabled .Values.defaultRules.rules.windows }} +apiVersion: monitoring.coreos.com/v1 +kind: PrometheusRule +metadata: + name: {{ printf "%s-%s" (include "kube-prometheus-stack.fullname" .) "windows.node.rules" | trunc 63 | trimSuffix "-" }} + namespace: {{ template "kube-prometheus-stack.namespace" . }} + labels: + app: {{ template "kube-prometheus-stack.name" . }} +{{ include "kube-prometheus-stack.labels" . | indent 4 }} +{{- if .Values.defaultRules.labels }} +{{ toYaml .Values.defaultRules.labels | indent 4 }} +{{- end }} +{{- if .Values.defaultRules.annotations }} + annotations: +{{ toYaml .Values.defaultRules.annotations | indent 4 }} +{{- end }} +spec: + groups: + - name: windows.node.rules + rules: + - expr: |- + count by ({{ range $.Values.defaultRules.additionalAggregationLabels }}{{ . }},{{ end }}cluster) ( + windows_system_system_up_time{job="windows-exporter"} + ) + record: node:windows_node:sum + {{- if or .Values.defaultRules.additionalRuleLabels .Values.defaultRules.additionalRuleGroupLabels.windows }} + labels: + {{- with .Values.defaultRules.additionalRuleLabels }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.defaultRules.additionalRuleGroupLabels.windows }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- end }} + - expr: |- + count by ({{ range $.Values.defaultRules.additionalAggregationLabels }}{{ . }},{{ end }}cluster, instance) (sum by ({{ range $.Values.defaultRules.additionalAggregationLabels }}{{ . }},{{ end }}cluster, instance, core) ( + windows_cpu_time_total{job="windows-exporter"} + )) + record: node:windows_node_num_cpu:sum + {{- if or .Values.defaultRules.additionalRuleLabels .Values.defaultRules.additionalRuleGroupLabels.windows }} + labels: + {{- with .Values.defaultRules.additionalRuleLabels }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.defaultRules.additionalRuleGroupLabels.windows }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- end }} + - expr: 1 - avg by ({{ range $.Values.defaultRules.additionalAggregationLabels }}{{ . }},{{ end }}cluster) (rate(windows_cpu_time_total{job="windows-exporter",mode="idle"}[1m])) + record: :windows_node_cpu_utilisation:avg1m + {{- if or .Values.defaultRules.additionalRuleLabels .Values.defaultRules.additionalRuleGroupLabels.windows }} + labels: + {{- with .Values.defaultRules.additionalRuleLabels }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.defaultRules.additionalRuleGroupLabels.windows }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- end }} + - expr: |- + 1 - avg by ({{ range $.Values.defaultRules.additionalAggregationLabels }}{{ . }},{{ end }}cluster, instance) ( + rate(windows_cpu_time_total{job="windows-exporter",mode="idle"}[1m]) + ) + record: node:windows_node_cpu_utilisation:avg1m + {{- if or .Values.defaultRules.additionalRuleLabels .Values.defaultRules.additionalRuleGroupLabels.windows }} + labels: + {{- with .Values.defaultRules.additionalRuleLabels }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.defaultRules.additionalRuleGroupLabels.windows }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- end }} + - expr: |- + 1 - + sum by ({{ range $.Values.defaultRules.additionalAggregationLabels }}{{ . }},{{ end }}cluster) (windows_memory_available_bytes{job="windows-exporter"}) + / + sum by ({{ range $.Values.defaultRules.additionalAggregationLabels }}{{ . }},{{ end }}cluster) (windows_os_visible_memory_bytes{job="windows-exporter"}) + record: ':windows_node_memory_utilisation:' + {{- if or .Values.defaultRules.additionalRuleLabels .Values.defaultRules.additionalRuleGroupLabels.windows }} + labels: + {{- with .Values.defaultRules.additionalRuleLabels }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.defaultRules.additionalRuleGroupLabels.windows }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- end }} + - expr: sum by ({{ range $.Values.defaultRules.additionalAggregationLabels }}{{ . }},{{ end }}cluster) (windows_memory_available_bytes{job="windows-exporter"} + windows_memory_cache_bytes{job="windows-exporter"}) + record: :windows_node_memory_MemFreeCached_bytes:sum + {{- if or .Values.defaultRules.additionalRuleLabels .Values.defaultRules.additionalRuleGroupLabels.windows }} + labels: + {{- with .Values.defaultRules.additionalRuleLabels }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.defaultRules.additionalRuleGroupLabels.windows }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- end }} + - expr: (windows_memory_cache_bytes{job="windows-exporter"} + windows_memory_modified_page_list_bytes{job="windows-exporter"} + windows_memory_standby_cache_core_bytes{job="windows-exporter"} + windows_memory_standby_cache_normal_priority_bytes{job="windows-exporter"} + windows_memory_standby_cache_reserve_bytes{job="windows-exporter"}) + record: node:windows_node_memory_totalCached_bytes:sum + {{- if or .Values.defaultRules.additionalRuleLabels .Values.defaultRules.additionalRuleGroupLabels.windows }} + labels: + {{- with .Values.defaultRules.additionalRuleLabels }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.defaultRules.additionalRuleGroupLabels.windows }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- end }} + - expr: sum by ({{ range $.Values.defaultRules.additionalAggregationLabels }}{{ . }},{{ end }}cluster) (windows_os_visible_memory_bytes{job="windows-exporter"}) + record: :windows_node_memory_MemTotal_bytes:sum + {{- if or .Values.defaultRules.additionalRuleLabels .Values.defaultRules.additionalRuleGroupLabels.windows }} + labels: + {{- with .Values.defaultRules.additionalRuleLabels }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.defaultRules.additionalRuleGroupLabels.windows }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- end }} + - expr: |- + sum by ({{ range $.Values.defaultRules.additionalAggregationLabels }}{{ . }},{{ end }}cluster, instance) ( + (windows_memory_available_bytes{job="windows-exporter"}) + ) + record: node:windows_node_memory_bytes_available:sum + {{- if or .Values.defaultRules.additionalRuleLabels .Values.defaultRules.additionalRuleGroupLabels.windows }} + labels: + {{- with .Values.defaultRules.additionalRuleLabels }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.defaultRules.additionalRuleGroupLabels.windows }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- end }} + - expr: |- + sum by ({{ range $.Values.defaultRules.additionalAggregationLabels }}{{ . }},{{ end }}cluster, instance) ( + windows_os_visible_memory_bytes{job="windows-exporter"} + ) + record: node:windows_node_memory_bytes_total:sum + {{- if or .Values.defaultRules.additionalRuleLabels .Values.defaultRules.additionalRuleGroupLabels.windows }} + labels: + {{- with .Values.defaultRules.additionalRuleLabels }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.defaultRules.additionalRuleGroupLabels.windows }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- end }} + - expr: |- + (node:windows_node_memory_bytes_total:sum - node:windows_node_memory_bytes_available:sum) + / + scalar(sum(node:windows_node_memory_bytes_total:sum)) + record: node:windows_node_memory_utilisation:ratio + {{- if or .Values.defaultRules.additionalRuleLabels .Values.defaultRules.additionalRuleGroupLabels.windows }} + labels: + {{- with .Values.defaultRules.additionalRuleLabels }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.defaultRules.additionalRuleGroupLabels.windows }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- end }} + - expr: 1 - (node:windows_node_memory_bytes_available:sum / node:windows_node_memory_bytes_total:sum) + record: 'node:windows_node_memory_utilisation:' + {{- if or .Values.defaultRules.additionalRuleLabels .Values.defaultRules.additionalRuleGroupLabels.windows }} + labels: + {{- with .Values.defaultRules.additionalRuleLabels }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.defaultRules.additionalRuleGroupLabels.windows }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- end }} + - expr: irate(windows_memory_swap_page_operations_total{job="windows-exporter"}[5m]) + record: node:windows_node_memory_swap_io_pages:irate + {{- if or .Values.defaultRules.additionalRuleLabels .Values.defaultRules.additionalRuleGroupLabels.windows }} + labels: + {{- with .Values.defaultRules.additionalRuleLabels }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.defaultRules.additionalRuleGroupLabels.windows }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- end }} + - expr: |- + avg by ({{ range $.Values.defaultRules.additionalAggregationLabels }}{{ . }},{{ end }}cluster) (irate(windows_logical_disk_read_seconds_total{job="windows-exporter"}[1m]) + + irate(windows_logical_disk_write_seconds_total{job="windows-exporter"}[1m]) + ) + record: :windows_node_disk_utilisation:avg_irate + {{- if or .Values.defaultRules.additionalRuleLabels .Values.defaultRules.additionalRuleGroupLabels.windows }} + labels: + {{- with .Values.defaultRules.additionalRuleLabels }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.defaultRules.additionalRuleGroupLabels.windows }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- end }} + - expr: |- + avg by ({{ range $.Values.defaultRules.additionalAggregationLabels }}{{ . }},{{ end }}cluster, instance) ( + (irate(windows_logical_disk_read_seconds_total{job="windows-exporter"}[1m]) + + irate(windows_logical_disk_write_seconds_total{job="windows-exporter"}[1m])) + ) + record: node:windows_node_disk_utilisation:avg_irate + {{- if or .Values.defaultRules.additionalRuleLabels .Values.defaultRules.additionalRuleGroupLabels.windows }} + labels: + {{- with .Values.defaultRules.additionalRuleLabels }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.defaultRules.additionalRuleGroupLabels.windows }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- end }} + - expr: |- + max by ({{ range $.Values.defaultRules.additionalAggregationLabels }}{{ . }},{{ end }}cluster,instance,volume)( + (windows_logical_disk_size_bytes{job="windows-exporter"} + - windows_logical_disk_free_bytes{job="windows-exporter"}) + / windows_logical_disk_size_bytes{job="windows-exporter"} + ) + record: 'node:windows_node_filesystem_usage:' + {{- if or .Values.defaultRules.additionalRuleLabels .Values.defaultRules.additionalRuleGroupLabels.windows }} + labels: + {{- with .Values.defaultRules.additionalRuleLabels }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.defaultRules.additionalRuleGroupLabels.windows }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- end }} + - expr: max by ({{ range $.Values.defaultRules.additionalAggregationLabels }}{{ . }},{{ end }}cluster, instance, volume) (windows_logical_disk_free_bytes{job="windows-exporter"} / windows_logical_disk_size_bytes{job="windows-exporter"}) + record: 'node:windows_node_filesystem_avail:' + {{- if or .Values.defaultRules.additionalRuleLabels .Values.defaultRules.additionalRuleGroupLabels.windows }} + labels: + {{- with .Values.defaultRules.additionalRuleLabels }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.defaultRules.additionalRuleGroupLabels.windows }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- end }} + - expr: sum by ({{ range $.Values.defaultRules.additionalAggregationLabels }}{{ . }},{{ end }}cluster) (irate(windows_net_bytes_total{job="windows-exporter"}[1m])) + record: :windows_node_net_utilisation:sum_irate + {{- if or .Values.defaultRules.additionalRuleLabels .Values.defaultRules.additionalRuleGroupLabels.windows }} + labels: + {{- with .Values.defaultRules.additionalRuleLabels }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.defaultRules.additionalRuleGroupLabels.windows }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- end }} + - expr: |- + sum by ({{ range $.Values.defaultRules.additionalAggregationLabels }}{{ . }},{{ end }}cluster, instance) ( + (irate(windows_net_bytes_total{job="windows-exporter"}[1m])) + ) + record: node:windows_node_net_utilisation:sum_irate + {{- if or .Values.defaultRules.additionalRuleLabels .Values.defaultRules.additionalRuleGroupLabels.windows }} + labels: + {{- with .Values.defaultRules.additionalRuleLabels }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.defaultRules.additionalRuleGroupLabels.windows }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- end }} + - expr: |- + sum by ({{ range $.Values.defaultRules.additionalAggregationLabels }}{{ . }},{{ end }}cluster) (irate(windows_net_packets_received_discarded_total{job="windows-exporter"}[1m])) + + sum by ({{ range $.Values.defaultRules.additionalAggregationLabels }}{{ . }},{{ end }}cluster) (irate(windows_net_packets_outbound_discarded_total{job="windows-exporter"}[1m])) + record: :windows_node_net_saturation:sum_irate + {{- if or .Values.defaultRules.additionalRuleLabels .Values.defaultRules.additionalRuleGroupLabels.windows }} + labels: + {{- with .Values.defaultRules.additionalRuleLabels }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.defaultRules.additionalRuleGroupLabels.windows }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- end }} + - expr: |- + sum by ({{ range $.Values.defaultRules.additionalAggregationLabels }}{{ . }},{{ end }}cluster, instance) ( + (irate(windows_net_packets_received_discarded_total{job="windows-exporter"}[1m]) + + irate(windows_net_packets_outbound_discarded_total{job="windows-exporter"}[1m])) + ) + record: node:windows_node_net_saturation:sum_irate + {{- if or .Values.defaultRules.additionalRuleLabels .Values.defaultRules.additionalRuleGroupLabels.windows }} + labels: + {{- with .Values.defaultRules.additionalRuleLabels }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.defaultRules.additionalRuleGroupLabels.windows }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- end }} +{{- end }} \ No newline at end of file diff --git a/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/templates/prometheus/rules-1.14/windows.pod.rules.yaml b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/templates/prometheus/rules-1.14/windows.pod.rules.yaml new file mode 100644 index 0000000000..86340b5c05 --- /dev/null +++ b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/templates/prometheus/rules-1.14/windows.pod.rules.yaml @@ -0,0 +1,158 @@ +{{- /* +Generated from 'windows.pod.rules' group from https://github.com/kubernetes-monitoring/kubernetes-mixin.git +Do not change in-place! In order to change this file first read following link: +https://github.com/prometheus-community/helm-charts/tree/main/charts/kube-prometheus-stack/hack +*/ -}} +{{- $kubeTargetVersion := default .Capabilities.KubeVersion.GitVersion .Values.kubeTargetVersionOverride }} +{{- if and (semverCompare ">=1.14.0-0" $kubeTargetVersion) (semverCompare "<9.9.9-9" $kubeTargetVersion) .Values.defaultRules.create .Values.windowsMonitoring.enabled .Values.defaultRules.rules.windows }} +{{- $kubeStateMetricsJob := include "kube-prometheus-stack-kube-state-metrics.name" . }} +apiVersion: monitoring.coreos.com/v1 +kind: PrometheusRule +metadata: + name: {{ printf "%s-%s" (include "kube-prometheus-stack.fullname" .) "windows.pod.rules" | trunc 63 | trimSuffix "-" }} + namespace: {{ template "kube-prometheus-stack.namespace" . }} + labels: + app: {{ template "kube-prometheus-stack.name" . }} +{{ include "kube-prometheus-stack.labels" . | indent 4 }} +{{- if .Values.defaultRules.labels }} +{{ toYaml .Values.defaultRules.labels | indent 4 }} +{{- end }} +{{- if .Values.defaultRules.annotations }} + annotations: +{{ toYaml .Values.defaultRules.annotations | indent 4 }} +{{- end }} +spec: + groups: + - name: windows.pod.rules + rules: + - expr: windows_container_available{job="windows-exporter", container_id != ""} * on ({{ range $.Values.defaultRules.additionalAggregationLabels }}{{ . }},{{ end }}container_id, cluster) group_left(container, pod, namespace) max(kube_pod_container_info{job="{{ $kubeStateMetricsJob }}", container_id != ""}) by ({{ range $.Values.defaultRules.additionalAggregationLabels }}{{ . }},{{ end }}container, container_id, pod, namespace, cluster) + record: windows_pod_container_available + {{- if or .Values.defaultRules.additionalRuleLabels .Values.defaultRules.additionalRuleGroupLabels.windows }} + labels: + {{- with .Values.defaultRules.additionalRuleLabels }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.defaultRules.additionalRuleGroupLabels.windows }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- end }} + - expr: windows_container_cpu_usage_seconds_total{job="windows-exporter", container_id != ""} * on ({{ range $.Values.defaultRules.additionalAggregationLabels }}{{ . }},{{ end }}container_id, cluster) group_left(container, pod, namespace) max(kube_pod_container_info{job="{{ $kubeStateMetricsJob }}", container_id != ""}) by ({{ range $.Values.defaultRules.additionalAggregationLabels }}{{ . }},{{ end }}container, container_id, pod, namespace, cluster) + record: windows_container_total_runtime + {{- if or .Values.defaultRules.additionalRuleLabels .Values.defaultRules.additionalRuleGroupLabels.windows }} + labels: + {{- with .Values.defaultRules.additionalRuleLabels }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.defaultRules.additionalRuleGroupLabels.windows }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- end }} + - expr: windows_container_memory_usage_commit_bytes{job="windows-exporter", container_id != ""} * on ({{ range $.Values.defaultRules.additionalAggregationLabels }}{{ . }},{{ end }}container_id, cluster) group_left(container, pod, namespace) max(kube_pod_container_info{job="{{ $kubeStateMetricsJob }}", container_id != ""}) by ({{ range $.Values.defaultRules.additionalAggregationLabels }}{{ . }},{{ end }}container, container_id, pod, namespace, cluster) + record: windows_container_memory_usage + {{- if or .Values.defaultRules.additionalRuleLabels .Values.defaultRules.additionalRuleGroupLabels.windows }} + labels: + {{- with .Values.defaultRules.additionalRuleLabels }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.defaultRules.additionalRuleGroupLabels.windows }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- end }} + - expr: windows_container_memory_usage_private_working_set_bytes{job="windows-exporter", container_id != ""} * on ({{ range $.Values.defaultRules.additionalAggregationLabels }}{{ . }},{{ end }}container_id, cluster) group_left(container, pod, namespace) max(kube_pod_container_info{job="{{ $kubeStateMetricsJob }}", container_id != ""}) by ({{ range $.Values.defaultRules.additionalAggregationLabels }}{{ . }},{{ end }}container, container_id, pod, namespace, cluster) + record: windows_container_private_working_set_usage + {{- if or .Values.defaultRules.additionalRuleLabels .Values.defaultRules.additionalRuleGroupLabels.windows }} + labels: + {{- with .Values.defaultRules.additionalRuleLabels }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.defaultRules.additionalRuleGroupLabels.windows }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- end }} + - expr: windows_container_network_receive_bytes_total{job="windows-exporter", container_id != ""} * on ({{ range $.Values.defaultRules.additionalAggregationLabels }}{{ . }},{{ end }}container_id, cluster) group_left(container, pod, namespace) max(kube_pod_container_info{job="{{ $kubeStateMetricsJob }}", container_id != ""}) by ({{ range $.Values.defaultRules.additionalAggregationLabels }}{{ . }},{{ end }}container, container_id, pod, namespace, cluster) + record: windows_container_network_received_bytes_total + {{- if or .Values.defaultRules.additionalRuleLabels .Values.defaultRules.additionalRuleGroupLabels.windows }} + labels: + {{- with .Values.defaultRules.additionalRuleLabels }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.defaultRules.additionalRuleGroupLabels.windows }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- end }} + - expr: windows_container_network_transmit_bytes_total{job="windows-exporter", container_id != ""} * on ({{ range $.Values.defaultRules.additionalAggregationLabels }}{{ . }},{{ end }}container_id, cluster) group_left(container, pod, namespace) max(kube_pod_container_info{job="{{ $kubeStateMetricsJob }}", container_id != ""}) by ({{ range $.Values.defaultRules.additionalAggregationLabels }}{{ . }},{{ end }}container, container_id, pod, namespace, cluster) + record: windows_container_network_transmitted_bytes_total + {{- if or .Values.defaultRules.additionalRuleLabels .Values.defaultRules.additionalRuleGroupLabels.windows }} + labels: + {{- with .Values.defaultRules.additionalRuleLabels }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.defaultRules.additionalRuleGroupLabels.windows }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- end }} + - expr: |- + max by ({{ range $.Values.defaultRules.additionalAggregationLabels }}{{ . }},{{ end }}cluster, namespace, pod, container) ( + kube_pod_container_resource_requests{resource="memory",job="{{ $kubeStateMetricsJob }}"} + ) * on ({{ range $.Values.defaultRules.additionalAggregationLabels }}{{ . }},{{ end }}container,pod,namespace,cluster) (windows_pod_container_available) + record: kube_pod_windows_container_resource_memory_request + {{- if or .Values.defaultRules.additionalRuleLabels .Values.defaultRules.additionalRuleGroupLabels.windows }} + labels: + {{- with .Values.defaultRules.additionalRuleLabels }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.defaultRules.additionalRuleGroupLabels.windows }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- end }} + - expr: kube_pod_container_resource_limits{resource="memory",job="{{ $kubeStateMetricsJob }}"} * on ({{ range $.Values.defaultRules.additionalAggregationLabels }}{{ . }},{{ end }}container,pod,namespace,cluster) (windows_pod_container_available) + record: kube_pod_windows_container_resource_memory_limit + {{- if or .Values.defaultRules.additionalRuleLabels .Values.defaultRules.additionalRuleGroupLabels.windows }} + labels: + {{- with .Values.defaultRules.additionalRuleLabels }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.defaultRules.additionalRuleGroupLabels.windows }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- end }} + - expr: |- + max by ({{ range $.Values.defaultRules.additionalAggregationLabels }}{{ . }},{{ end }}cluster, namespace, pod, container) ( + kube_pod_container_resource_requests{resource="cpu",job="{{ $kubeStateMetricsJob }}"} + ) * on ({{ range $.Values.defaultRules.additionalAggregationLabels }}{{ . }},{{ end }}container,pod,namespace,cluster) (windows_pod_container_available) + record: kube_pod_windows_container_resource_cpu_cores_request + {{- if or .Values.defaultRules.additionalRuleLabels .Values.defaultRules.additionalRuleGroupLabels.windows }} + labels: + {{- with .Values.defaultRules.additionalRuleLabels }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.defaultRules.additionalRuleGroupLabels.windows }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- end }} + - expr: kube_pod_container_resource_limits{resource="cpu",job="{{ $kubeStateMetricsJob }}"} * on ({{ range $.Values.defaultRules.additionalAggregationLabels }}{{ . }},{{ end }}container,pod,namespace,cluster) (windows_pod_container_available) + record: kube_pod_windows_container_resource_cpu_cores_limit + {{- if or .Values.defaultRules.additionalRuleLabels .Values.defaultRules.additionalRuleGroupLabels.windows }} + labels: + {{- with .Values.defaultRules.additionalRuleLabels }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.defaultRules.additionalRuleGroupLabels.windows }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- end }} + - expr: |- + sum by ({{ range $.Values.defaultRules.additionalAggregationLabels }}{{ . }},{{ end }}cluster, namespace, pod, container) ( + rate(windows_container_total_runtime{}[5m]) + ) + record: namespace_pod_container:windows_container_cpu_usage_seconds_total:sum_rate + {{- if or .Values.defaultRules.additionalRuleLabels .Values.defaultRules.additionalRuleGroupLabels.windows }} + labels: + {{- with .Values.defaultRules.additionalRuleLabels }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.defaultRules.additionalRuleGroupLabels.windows }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- end }} +{{- end }} \ No newline at end of file diff --git a/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/templates/prometheus/secret.yaml b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/templates/prometheus/secret.yaml new file mode 100644 index 0000000000..e4a1e73c7b --- /dev/null +++ b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/templates/prometheus/secret.yaml @@ -0,0 +1,15 @@ +{{- if and .Values.prometheus.enabled .Values.prometheus.prometheusSpec.thanos .Values.prometheus.prometheusSpec.thanos.objectStorageConfig}} +{{- if and .Values.prometheus.prometheusSpec.thanos.objectStorageConfig.secret (not .Values.prometheus.prometheusSpec.thanos.objectStorageConfig.existingSecret) }} +apiVersion: v1 +kind: Secret +metadata: + name: {{ template "kube-prometheus-stack.fullname" . }}-prometheus + namespace: {{ template "kube-prometheus-stack.namespace" . }} + labels: + app: {{ template "kube-prometheus-stack.name" . }}-prometheus + app.kubernetes.io/component: prometheus +{{ include "kube-prometheus-stack.labels" . | indent 4 }} +data: + object-storage-configs.yaml: {{ toYaml .Values.prometheus.prometheusSpec.thanos.objectStorageConfig.secret | b64enc | quote }} +{{- end }} +{{- end }} diff --git a/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/templates/prometheus/service.yaml b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/templates/prometheus/service.yaml new file mode 100644 index 0000000000..d61b9d6ef3 --- /dev/null +++ b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/templates/prometheus/service.yaml @@ -0,0 +1,80 @@ +{{- $kubeTargetVersion := default .Capabilities.KubeVersion.GitVersion .Values.kubeTargetVersionOverride }} +{{- if .Values.prometheus.enabled }} +apiVersion: v1 +kind: Service +metadata: + name: {{ template "kube-prometheus-stack.fullname" . }}-prometheus + namespace: {{ template "kube-prometheus-stack.namespace" . }} + labels: + app: {{ template "kube-prometheus-stack.name" . }}-prometheus + self-monitor: {{ .Values.prometheus.serviceMonitor.selfMonitor | quote }} +{{ include "kube-prometheus-stack.labels" . | indent 4 }} +{{- if .Values.prometheus.service.labels }} +{{ toYaml .Values.prometheus.service.labels | indent 4 }} +{{- end }} +{{- if .Values.prometheus.service.annotations }} + annotations: +{{ toYaml .Values.prometheus.service.annotations | indent 4 }} +{{- end }} +spec: +{{- if .Values.prometheus.service.clusterIP }} + clusterIP: {{ .Values.prometheus.service.clusterIP }} +{{- end }} +{{- if .Values.prometheus.service.externalIPs }} + externalIPs: +{{ toYaml .Values.prometheus.service.externalIPs | indent 4 }} +{{- end }} +{{- if .Values.prometheus.service.loadBalancerIP }} + loadBalancerIP: {{ .Values.prometheus.service.loadBalancerIP }} +{{- end }} +{{- if .Values.prometheus.service.loadBalancerSourceRanges }} + loadBalancerSourceRanges: + {{- range $cidr := .Values.prometheus.service.loadBalancerSourceRanges }} + - {{ $cidr }} + {{- end }} +{{- end }} +{{- if ne .Values.prometheus.service.type "ClusterIP" }} + externalTrafficPolicy: {{ .Values.prometheus.service.externalTrafficPolicy }} +{{- end }} + ports: + - name: {{ .Values.prometheus.prometheusSpec.portName }} + {{- if eq .Values.prometheus.service.type "NodePort" }} + nodePort: {{ .Values.prometheus.service.nodePort }} + {{- end }} + port: {{ .Values.prometheus.service.port }} + targetPort: {{ .Values.prometheus.service.targetPort }} + - name: reloader-web + {{- if semverCompare "> 1.20.0-0" $kubeTargetVersion }} + appProtocol: http + {{- end }} + port: {{ .Values.prometheus.service.reloaderWebPort }} + targetPort: reloader-web + {{- if .Values.prometheus.thanosIngress.enabled }} + - name: grpc + {{- if eq .Values.prometheus.service.type "NodePort" }} + nodePort: {{ .Values.prometheus.thanosIngress.nodePort }} + {{- end }} + port: {{ .Values.prometheus.thanosIngress.servicePort }} + targetPort: {{ .Values.prometheus.thanosIngress.servicePort }} + {{- end }} +{{- if .Values.prometheus.service.additionalPorts }} +{{ toYaml .Values.prometheus.service.additionalPorts | indent 2 }} +{{- end }} + publishNotReadyAddresses: {{ .Values.prometheus.service.publishNotReadyAddresses }} + selector: + {{- if .Values.prometheus.agentMode }} + app.kubernetes.io/name: prometheus-agent + {{- else }} + app.kubernetes.io/name: prometheus + {{- end }} + operator.prometheus.io/name: {{ template "kube-prometheus-stack.prometheus.crname" . }} +{{- if .Values.prometheus.service.sessionAffinity }} + sessionAffinity: {{ .Values.prometheus.service.sessionAffinity }} +{{- end }} +{{- if eq .Values.prometheus.service.sessionAffinity "ClientIP" }} + sessionAffinityConfig: + clientIP: + timeoutSeconds: {{ .Values.prometheus.service.sessionAffinityConfig.clientIP.timeoutSeconds }} +{{- end }} + type: "{{ .Values.prometheus.service.type }}" +{{- end }} diff --git a/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/templates/prometheus/serviceThanosSidecar.yaml b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/templates/prometheus/serviceThanosSidecar.yaml new file mode 100644 index 0000000000..15b89c8c23 --- /dev/null +++ b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/templates/prometheus/serviceThanosSidecar.yaml @@ -0,0 +1,39 @@ +{{- if and .Values.prometheus.enabled .Values.prometheus.thanosService.enabled }} +apiVersion: v1 +kind: Service +metadata: + name: {{ template "kube-prometheus-stack.fullname" . }}-thanos-discovery + namespace: {{ template "kube-prometheus-stack.namespace" . }} + labels: + app: {{ template "kube-prometheus-stack.name" . }}-thanos-discovery +{{ include "kube-prometheus-stack.labels" . | indent 4 }} +{{- if .Values.prometheus.thanosService.labels }} +{{ toYaml .Values.prometheus.thanosService.labels | indent 4 }} +{{- end }} +{{- if .Values.prometheus.thanosService.annotations }} + annotations: +{{ toYaml .Values.prometheus.thanosService.annotations | indent 4 }} +{{- end }} +spec: + type: {{ .Values.prometheus.thanosService.type }} + clusterIP: {{ .Values.prometheus.thanosService.clusterIP }} +{{- if ne .Values.prometheus.thanosService.type "ClusterIP" }} + externalTrafficPolicy: {{ .Values.prometheus.thanosService.externalTrafficPolicy }} +{{- end }} + ports: + - name: {{ .Values.prometheus.thanosService.portName }} + port: {{ .Values.prometheus.thanosService.port }} + targetPort: {{ .Values.prometheus.thanosService.targetPort }} + {{- if eq .Values.prometheus.thanosService.type "NodePort" }} + nodePort: {{ .Values.prometheus.thanosService.nodePort }} + {{- end }} + - name: {{ .Values.prometheus.thanosService.httpPortName }} + port: {{ .Values.prometheus.thanosService.httpPort }} + targetPort: {{ .Values.prometheus.thanosService.targetHttpPort }} + {{- if eq .Values.prometheus.thanosService.type "NodePort" }} + nodePort: {{ .Values.prometheus.thanosService.httpNodePort }} + {{- end }} + selector: + app.kubernetes.io/name: prometheus + operator.prometheus.io/name: {{ template "kube-prometheus-stack.prometheus.crname" . }} +{{- end }} diff --git a/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/templates/prometheus/serviceThanosSidecarExternal.yaml b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/templates/prometheus/serviceThanosSidecarExternal.yaml new file mode 100644 index 0000000000..453eed7f1b --- /dev/null +++ b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/templates/prometheus/serviceThanosSidecarExternal.yaml @@ -0,0 +1,46 @@ +{{- if and .Values.prometheus.enabled .Values.prometheus.thanosServiceExternal.enabled }} +apiVersion: v1 +kind: Service +metadata: + name: {{ template "kube-prometheus-stack.fullname" . }}-thanos-external + namespace: {{ template "kube-prometheus-stack.namespace" . }} + labels: +{{ include "kube-prometheus-stack.labels" . | indent 4 }} +{{- if .Values.prometheus.thanosServiceExternal.labels }} +{{ toYaml .Values.prometheus.thanosServiceExternal.labels | indent 4 }} +{{- end }} +{{- if .Values.prometheus.thanosServiceExternal.annotations }} + annotations: +{{ toYaml .Values.prometheus.thanosServiceExternal.annotations | indent 4 }} +{{- end }} +spec: + type: {{ .Values.prometheus.thanosServiceExternal.type }} +{{- if .Values.prometheus.thanosServiceExternal.loadBalancerIP }} + loadBalancerIP: {{ .Values.prometheus.thanosServiceExternal.loadBalancerIP }} +{{- end }} +{{- if .Values.prometheus.thanosServiceExternal.loadBalancerSourceRanges }} + loadBalancerSourceRanges: + {{- range $cidr := .Values.prometheus.thanosServiceExternal.loadBalancerSourceRanges }} + - {{ $cidr }} + {{- end }} +{{- end }} +{{- if ne .Values.prometheus.thanosServiceExternal.type "ClusterIP" }} + externalTrafficPolicy: {{ .Values.prometheus.thanosServiceExternal.externalTrafficPolicy }} +{{- end }} + ports: + - name: {{ .Values.prometheus.thanosServiceExternal.portName }} + port: {{ .Values.prometheus.thanosServiceExternal.port }} + targetPort: {{ .Values.prometheus.thanosServiceExternal.targetPort }} + {{- if eq .Values.prometheus.thanosServiceExternal.type "NodePort" }} + nodePort: {{ .Values.prometheus.thanosServiceExternal.nodePort }} + {{- end }} + - name: {{ .Values.prometheus.thanosServiceExternal.httpPortName }} + port: {{ .Values.prometheus.thanosServiceExternal.httpPort }} + targetPort: {{ .Values.prometheus.thanosServiceExternal.targetHttpPort }} + {{- if eq .Values.prometheus.thanosServiceExternal.type "NodePort" }} + nodePort: {{ .Values.prometheus.thanosServiceExternal.httpNodePort }} + {{- end }} + selector: + app.kubernetes.io/name: prometheus + operator.prometheus.io/name: {{ template "kube-prometheus-stack.prometheus.crname" . }} +{{- end }} diff --git a/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/templates/prometheus/serviceaccount.yaml b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/templates/prometheus/serviceaccount.yaml new file mode 100644 index 0000000000..e97b989bbd --- /dev/null +++ b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/templates/prometheus/serviceaccount.yaml @@ -0,0 +1,21 @@ +{{- if and .Values.prometheus.enabled .Values.prometheus.serviceAccount.create }} +apiVersion: v1 +kind: ServiceAccount +metadata: + name: {{ template "kube-prometheus-stack.prometheus.serviceAccountName" . }} + namespace: {{ template "kube-prometheus-stack.namespace" . }} + labels: + app: {{ template "kube-prometheus-stack.name" . }}-prometheus + app.kubernetes.io/name: {{ template "kube-prometheus-stack.name" . }}-prometheus + app.kubernetes.io/component: prometheus +{{ include "kube-prometheus-stack.labels" . | indent 4 }} +{{- if .Values.prometheus.serviceAccount.annotations }} + annotations: +{{ toYaml .Values.prometheus.serviceAccount.annotations | indent 4 }} +{{- end }} +automountServiceAccountToken: {{ .Values.prometheus.serviceAccount.automountServiceAccountToken }} +{{- if .Values.global.imagePullSecrets }} +imagePullSecrets: +{{ include "kube-prometheus-stack.imagePullSecrets" . | trim | indent 2 }} +{{- end }} +{{- end }} diff --git a/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/templates/prometheus/servicemonitor.yaml b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/templates/prometheus/servicemonitor.yaml new file mode 100644 index 0000000000..a36f3e33ca --- /dev/null +++ b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/templates/prometheus/servicemonitor.yaml @@ -0,0 +1,97 @@ +{{- if and .Values.prometheus.enabled .Values.prometheus.serviceMonitor.selfMonitor }} +apiVersion: monitoring.coreos.com/v1 +kind: ServiceMonitor +metadata: + name: {{ template "kube-prometheus-stack.fullname" . }}-prometheus + namespace: {{ template "kube-prometheus-stack.namespace" . }} + labels: + app: {{ template "kube-prometheus-stack.name" . }}-prometheus +{{ include "kube-prometheus-stack.labels" . | indent 4 }} +{{- with .Values.prometheus.serviceMonitor.additionalLabels }} +{{- toYaml . | nindent 4 }} +{{- end }} +spec: + {{- include "servicemonitor.scrapeLimits" .Values.prometheus.serviceMonitor | nindent 2 }} + selector: + matchLabels: + app: {{ template "kube-prometheus-stack.name" . }}-prometheus + release: {{ $.Release.Name | quote }} + self-monitor: "true" + namespaceSelector: + matchNames: + - {{ printf "%s" (include "kube-prometheus-stack.namespace" .) | quote }} + endpoints: + - port: {{ .Values.prometheus.prometheusSpec.portName }} + {{- if .Values.prometheus.serviceMonitor.interval }} + interval: {{ .Values.prometheus.serviceMonitor.interval }} + {{- end }} + {{- if .Values.prometheus.serviceMonitor.scheme }} + scheme: {{ .Values.prometheus.serviceMonitor.scheme }} + {{- end }} + {{- if .Values.prometheus.serviceMonitor.tlsConfig }} + tlsConfig: {{- toYaml .Values.prometheus.serviceMonitor.tlsConfig | nindent 6 }} + {{- end }} + {{- if .Values.prometheus.serviceMonitor.bearerTokenFile }} + bearerTokenFile: {{ .Values.prometheus.serviceMonitor.bearerTokenFile }} + {{- end }} + path: "{{ trimSuffix "/" .Values.prometheus.prometheusSpec.routePrefix }}/metrics" + metricRelabelings: + {{- if .Values.prometheus.serviceMonitor.metricRelabelings }} + {{- tpl (toYaml .Values.prometheus.serviceMonitor.metricRelabelings | nindent 6) . }} + {{- end }} + {{ if .Values.global.cattle.clusterId }} + - sourceLabels: [__address__] + targetLabel: cluster_id + replacement: {{ .Values.global.cattle.clusterId }} + {{- end }} + {{ if .Values.global.cattle.clusterName}} + - sourceLabels: [__address__] + targetLabel: cluster_name + replacement: {{ .Values.global.cattle.clusterName }} + {{- end }} + {{- if .Values.prometheus.serviceMonitor.relabelings }} + relabelings: {{- toYaml .Values.prometheus.serviceMonitor.relabelings | nindent 6 }} + {{- end }} + - port: reloader-web + {{- if .Values.prometheus.serviceMonitor.interval }} + interval: {{ .Values.prometheus.serviceMonitor.interval }} + {{- end }} + {{- if .Values.prometheus.serviceMonitor.scheme }} + scheme: {{ .Values.prometheus.serviceMonitor.scheme }} + {{- end }} + {{- if .Values.prometheus.serviceMonitor.tlsConfig }} + tlsConfig: {{- toYaml .Values.prometheus.serviceMonitor.tlsConfig | nindent 6 }} + {{- end }} + path: "/metrics" + {{- if .Values.prometheus.serviceMonitor.metricRelabelings }} + metricRelabelings: {{- tpl (toYaml .Values.prometheus.serviceMonitor.metricRelabelings | nindent 6) . }} + {{- end }} + {{- if .Values.prometheus.serviceMonitor.relabelings }} + relabelings: {{- toYaml .Values.prometheus.serviceMonitor.relabelings | nindent 6 }} + {{- end }} + {{- range .Values.prometheus.serviceMonitor.additionalEndpoints }} + - port: {{ .port }} + {{- if or $.Values.prometheus.serviceMonitor.interval .interval }} + interval: {{ default $.Values.prometheus.serviceMonitor.interval .interval }} + {{- end }} + {{- if or $.Values.prometheus.serviceMonitor.proxyUrl .proxyUrl }} + proxyUrl: {{ default $.Values.prometheus.serviceMonitor.proxyUrl .proxyUrl }} + {{- end }} + {{- if or $.Values.prometheus.serviceMonitor.scheme .scheme }} + scheme: {{ default $.Values.prometheus.serviceMonitor.scheme .scheme }} + {{- end }} + {{- if or $.Values.prometheus.serviceMonitor.bearerTokenFile .bearerTokenFile }} + bearerTokenFile: {{ default $.Values.prometheus.serviceMonitor.bearerTokenFile .bearerTokenFile }} + {{- end }} + {{- if or $.Values.prometheus.serviceMonitor.tlsConfig .tlsConfig }} + tlsConfig: {{- default $.Values.prometheus.serviceMonitor.tlsConfig .tlsConfig | toYaml | nindent 6 }} + {{- end }} + path: {{ .path }} + {{- if or $.Values.prometheus.serviceMonitor.metricRelabelings .metricRelabelings }} + metricRelabelings: {{- tpl (default $.Values.prometheus.serviceMonitor.metricRelabelings .metricRelabelings | toYaml | nindent 6) . }} + {{- end }} + {{- if or $.Values.prometheus.serviceMonitor.relabelings .relabelings }} + relabelings: {{- default $.Values.prometheus.serviceMonitor.relabelings .relabelings | toYaml | nindent 6 }} + {{- end }} + {{- end }} +{{- end }} diff --git a/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/templates/prometheus/servicemonitorThanosSidecar.yaml b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/templates/prometheus/servicemonitorThanosSidecar.yaml new file mode 100644 index 0000000000..0f70aabb58 --- /dev/null +++ b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/templates/prometheus/servicemonitorThanosSidecar.yaml @@ -0,0 +1,55 @@ +{{- if and .Values.prometheus.thanosService.enabled .Values.prometheus.thanosServiceMonitor.enabled }} +apiVersion: monitoring.coreos.com/v1 +kind: ServiceMonitor +metadata: + name: {{ template "kube-prometheus-stack.fullname" . }}-thanos-sidecar + namespace: {{ template "kube-prometheus-stack.namespace" . }} + labels: + app: {{ template "kube-prometheus-stack.name" . }}-thanos-sidecar +{{ include "kube-prometheus-stack.labels" . | indent 4 }} +{{- with .Values.prometheus.thanosServiceMonitor.additionalLabels }} +{{- toYaml . | nindent 4 }} +{{- end }} +spec: + {{- include "servicemonitor.scrapeLimits" .Values.prometheus.thanosServiceMonitor | nindent 2 }} + selector: + matchLabels: + app: {{ template "kube-prometheus-stack.name" . }}-thanos-discovery + release: {{ $.Release.Name | quote }} + namespaceSelector: + matchNames: + - {{ printf "%s" (include "kube-prometheus-stack.namespace" .) | quote }} + endpoints: + - port: {{ .Values.prometheus.thanosService.httpPortName }} + {{- if .Values.prometheus.thanosServiceMonitor.interval }} + interval: {{ .Values.prometheus.thanosServiceMonitor.interval }} + {{- end }} + {{- if .Values.prometheus.thanosServiceMonitor.scheme }} + scheme: {{ .Values.prometheus.thanosServiceMonitor.scheme }} + {{- end }} + {{- if .Values.prometheus.thanosServiceMonitor.tlsConfig }} + tlsConfig: {{ toYaml .Values.prometheus.thanosServiceMonitor.tlsConfig | nindent 6 }} + {{- end }} + {{- if .Values.prometheus.thanosServiceMonitor.bearerTokenFile }} + bearerTokenFile: {{ .Values.prometheus.thanosServiceMonitor.bearerTokenFile }} + {{- end }} + path: "/metrics" + metricRelabelings: + {{- if .Values.prometheus.thanosServiceMonitor.metricRelabelings}} + {{ tpl (toYaml .Values.prometheus.thanosServiceMonitor.metricRelabelings | indent 6) . }} + {{- end }} + {{ if .Values.global.cattle.clusterId }} + - sourceLabels: [__address__] + targetLabel: cluster_id + replacement: {{ .Values.global.cattle.clusterId }} + {{- end }} + {{ if .Values.global.cattle.clusterName}} + - sourceLabels: [__address__] + targetLabel: cluster_name + replacement: {{ .Values.global.cattle.clusterName }} + {{- end }} +{{- if .Values.prometheus.thanosServiceMonitor.relabelings }} + relabelings: +{{ toYaml .Values.prometheus.thanosServiceMonitor.relabelings | indent 6 }} +{{- end }} +{{- end }} diff --git a/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/templates/prometheus/servicemonitors.yaml b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/templates/prometheus/servicemonitors.yaml new file mode 100644 index 0000000000..a7a301babc --- /dev/null +++ b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/templates/prometheus/servicemonitors.yaml @@ -0,0 +1,47 @@ +{{- if and .Values.prometheus.enabled .Values.prometheus.additionalServiceMonitors }} +apiVersion: v1 +kind: List +items: +{{- range .Values.prometheus.additionalServiceMonitors }} + - apiVersion: monitoring.coreos.com/v1 + kind: ServiceMonitor + metadata: + name: {{ .name }} + namespace: {{ template "kube-prometheus-stack.namespace" $ }} + labels: + app: {{ template "kube-prometheus-stack.name" $ }}-prometheus +{{ include "kube-prometheus-stack.labels" $ | indent 8 }} + {{- if .additionalLabels }} +{{ toYaml .additionalLabels | indent 8 }} + {{- end }} + spec: + {{- include "servicemonitor.scrapeLimits" . | nindent 6 }} + endpoints: +{{ toYaml .endpoints | indent 8 }} + {{- if .jobLabel }} + jobLabel: {{ .jobLabel }} + {{- end }} + {{- if .namespaceSelector }} + namespaceSelector: +{{ toYaml .namespaceSelector | indent 8 }} + {{- end }} + selector: +{{ toYaml .selector | indent 8 }} + {{- if .targetLabels }} + targetLabels: +{{ toYaml .targetLabels | indent 8 }} + {{- end }} + {{- if .podTargetLabels }} + podTargetLabels: +{{ toYaml .podTargetLabels | indent 8 }} + {{- end }} + {{- if .metricRelabelings }} + metricRelabelings: +{{ toYaml .metricRelabelings | indent 8 }} + {{- end }} + {{- if .relabelings }} + relabelings: +{{ toYaml .relabelings | indent 8 }} + {{- end }} +{{- end }} +{{- end }} diff --git a/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/templates/prometheus/serviceperreplica.yaml b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/templates/prometheus/serviceperreplica.yaml new file mode 100644 index 0000000000..4bc7f7b869 --- /dev/null +++ b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/templates/prometheus/serviceperreplica.yaml @@ -0,0 +1,54 @@ +{{- if and .Values.prometheus.enabled .Values.prometheus.servicePerReplica.enabled }} +{{- $count := .Values.prometheus.prometheusSpec.replicas | int -}} +{{- $serviceValues := .Values.prometheus.servicePerReplica -}} +apiVersion: v1 +kind: List +metadata: + name: {{ include "kube-prometheus-stack.fullname" $ }}-prometheus-serviceperreplica + namespace: {{ template "kube-prometheus-stack.namespace" . }} +items: +{{- range $i, $e := until $count }} + - apiVersion: v1 + kind: Service + metadata: + name: {{ include "kube-prometheus-stack.fullname" $ }}-prometheus-{{ $i }} + namespace: {{ template "kube-prometheus-stack.namespace" $ }} + labels: + app: {{ include "kube-prometheus-stack.name" $ }}-prometheus +{{ include "kube-prometheus-stack.labels" $ | indent 8 }} + {{- if $serviceValues.annotations }} + annotations: +{{ toYaml $serviceValues.annotations | indent 8 }} + {{- end }} + spec: + {{- if $serviceValues.clusterIP }} + clusterIP: {{ $serviceValues.clusterIP }} + {{- end }} + {{- if $serviceValues.loadBalancerSourceRanges }} + loadBalancerSourceRanges: + {{- range $cidr := $serviceValues.loadBalancerSourceRanges }} + - {{ $cidr }} + {{- end }} + {{- end }} + {{- if ne $serviceValues.type "ClusterIP" }} + externalTrafficPolicy: {{ $serviceValues.externalTrafficPolicy }} + {{- end }} + ports: + - name: {{ $.Values.prometheus.prometheusSpec.portName }} + {{- if eq $serviceValues.type "NodePort" }} + nodePort: {{ $serviceValues.nodePort }} + {{- end }} + port: {{ $serviceValues.port }} + targetPort: {{ $serviceValues.targetPort }} + selector: + {{- if $.Values.prometheus.agentMode }} + app.kubernetes.io/name: prometheus-agent + statefulset.kubernetes.io/pod-name: prom-agent-{{ include "kube-prometheus-stack.prometheus.crname" $ }}-{{ $i }} + {{- else }} + app.kubernetes.io/name: prometheus + statefulset.kubernetes.io/pod-name: prometheus-{{ include "kube-prometheus-stack.prometheus.crname" $ }}-{{ $i }} + {{- end }} + operator.prometheus.io/name: {{ template "kube-prometheus-stack.prometheus.crname" $ }} + type: "{{ $serviceValues.type }}" +{{- end }} +{{- end }} diff --git a/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/templates/rancher-monitoring/clusterrole.yaml b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/templates/rancher-monitoring/clusterrole.yaml new file mode 100644 index 0000000000..56ca9f5eae --- /dev/null +++ b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/templates/rancher-monitoring/clusterrole.yaml @@ -0,0 +1,135 @@ +{{- if and .Values.global.rbac.create .Values.global.rbac.userRoles.create }} +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRole +metadata: + name: monitoring-admin + labels: {{ include "kube-prometheus-stack.labels" . | nindent 4 }} + {{- if .Values.global.rbac.userRoles.aggregateToDefaultRoles }} + rbac.authorization.k8s.io/aggregate-to-admin: "true" + {{- end }} +rules: +- apiGroups: + - monitoring.coreos.com + resources: + - alertmanagers + - prometheuses + - prometheuses/finalizers + - alertmanagers/finalizers + verbs: + - 'get' + - 'list' + - 'watch' +- apiGroups: + - monitoring.coreos.com + resources: + - thanosrulers + - thanosrulers/finalizers + - servicemonitors + - podmonitors + - prometheusrules + - podmonitors + - probes + - probes/finalizers + - alertmanagerconfigs + verbs: + - '*' +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRole +metadata: + name: monitoring-edit + labels: {{ include "kube-prometheus-stack.labels" . | nindent 4 }} + {{- if .Values.global.rbac.userRoles.aggregateToDefaultRoles }} + rbac.authorization.k8s.io/aggregate-to-edit: "true" + {{- end }} +rules: +- apiGroups: + - monitoring.coreos.com + resources: + - alertmanagers + - prometheuses + - prometheuses/finalizers + - alertmanagers/finalizers + verbs: + - 'get' + - 'list' + - 'watch' +- apiGroups: + - monitoring.coreos.com + resources: + - thanosrulers + - thanosrulers/finalizers + - servicemonitors + - podmonitors + - prometheusrules + - podmonitors + - probes + - alertmanagerconfigs + verbs: + - '*' +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRole +metadata: + name: monitoring-view + labels: {{ include "kube-prometheus-stack.labels" . | nindent 4 }} + {{- if .Values.global.rbac.userRoles.aggregateToDefaultRoles }} + rbac.authorization.k8s.io/aggregate-to-view: "true" + {{- end }} +rules: +- apiGroups: + - monitoring.coreos.com + resources: + - alertmanagers + - prometheuses + - prometheuses/finalizers + - alertmanagers/finalizers + - thanosrulers + - thanosrulers/finalizers + - servicemonitors + - podmonitors + - prometheusrules + - podmonitors + - probes + - probes/finalizers + - alertmanagerconfigs + verbs: + - 'get' + - 'list' + - 'watch' +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRole +metadata: + name: monitoring-ui-view + labels: {{ include "kube-prometheus-stack.labels" . | nindent 4 }} +rules: +- apiGroups: + - "" + resources: + - services/proxy + resourceNames: + - "http:{{ template "kube-prometheus-stack.fullname" . }}-prometheus:{{ .Values.prometheus.service.port }}" + - "https:{{ template "kube-prometheus-stack.fullname" . }}-prometheus:{{ .Values.prometheus.service.port }}" + - "http:{{ template "kube-prometheus-stack.fullname" . }}-alertmanager:{{ .Values.alertmanager.service.port }}" + - "https:{{ template "kube-prometheus-stack.fullname" . }}-alertmanager:{{ .Values.alertmanager.service.port }}" +{{- if .Values.grafana.enabled }} + - "http:{{ include "call-nested" (list . "grafana" "grafana.fullname") }}:{{ .Values.grafana.service.port }}" + - "https:{{ include "call-nested" (list . "grafana" "grafana.fullname") }}:{{ .Values.grafana.service.port }}" +{{- end }} + verbs: + - 'get' + - 'create' +- apiGroups: + - "" + resourceNames: + - {{ template "kube-prometheus-stack.fullname" . }}-prometheus + - {{ template "kube-prometheus-stack.fullname" . }}-alertmanager +{{- if .Values.grafana.enabled }} + - {{ include "call-nested" (list . "grafana" "grafana.fullname") }} +{{- end }} + resources: + - endpoints + verbs: + - list +{{- end }} \ No newline at end of file diff --git a/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/templates/rancher-monitoring/config-role.yaml b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/templates/rancher-monitoring/config-role.yaml new file mode 100644 index 0000000000..f48ffc827e --- /dev/null +++ b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/templates/rancher-monitoring/config-role.yaml @@ -0,0 +1,48 @@ +{{- if and .Values.global.rbac.create .Values.global.rbac.userRoles.create }} +apiVersion: rbac.authorization.k8s.io/v1 +kind: Role +metadata: + name: monitoring-config-admin + namespace: {{ template "kube-prometheus-stack.namespace" . }} + labels: {{ include "kube-prometheus-stack.labels" . | nindent 4 }} +rules: +- apiGroups: + - "" + resources: + - configmaps + - secrets + verbs: + - '*' +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: Role +metadata: + name: monitoring-config-edit + namespace: {{ template "kube-prometheus-stack.namespace" . }} + labels: {{ include "kube-prometheus-stack.labels" . | nindent 4 }} +rules: +- apiGroups: + - "" + resources: + - configmaps + - secrets + verbs: + - '*' +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: Role +metadata: + name: monitoring-config-view + namespace: {{ template "kube-prometheus-stack.namespace" . }} + labels: {{ include "kube-prometheus-stack.labels" . | nindent 4 }} +rules: +- apiGroups: + - "" + resources: + - configmaps + - secrets + verbs: + - 'get' + - 'list' + - 'watch' +{{- end }} diff --git a/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/templates/rancher-monitoring/dashboard-role.yaml b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/templates/rancher-monitoring/dashboard-role.yaml new file mode 100644 index 0000000000..d2f81976a2 --- /dev/null +++ b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/templates/rancher-monitoring/dashboard-role.yaml @@ -0,0 +1,47 @@ +{{- if and .Values.global.rbac.create .Values.global.rbac.userRoles.create .Values.grafana.enabled }} +{{- if .Values.grafana.defaultDashboardsEnabled }} +apiVersion: rbac.authorization.k8s.io/v1 +kind: Role +metadata: + name: monitoring-dashboard-admin + namespace: {{ .Values.grafana.defaultDashboards.namespace }} + labels: {{ include "kube-prometheus-stack.labels" . | nindent 4 }} +rules: +- apiGroups: + - "" + resources: + - configmaps + verbs: + - '*' +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: Role +metadata: + name: monitoring-dashboard-edit + namespace: {{ .Values.grafana.defaultDashboards.namespace }} + labels: {{ include "kube-prometheus-stack.labels" . | nindent 4 }} +rules: +- apiGroups: + - "" + resources: + - configmaps + verbs: + - '*' +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: Role +metadata: + name: monitoring-dashboard-view + namespace: {{ .Values.grafana.defaultDashboards.namespace }} + labels: {{ include "kube-prometheus-stack.labels" . | nindent 4 }} +rules: +- apiGroups: + - "" + resources: + - configmaps + verbs: + - 'get' + - 'list' + - 'watch' +{{- end }} +{{- end }} diff --git a/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/templates/rancher-monitoring/dashboards/addons/ingress-nginx-dashboard.yaml b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/templates/rancher-monitoring/dashboards/addons/ingress-nginx-dashboard.yaml new file mode 100644 index 0000000000..7b51a0bf7a --- /dev/null +++ b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/templates/rancher-monitoring/dashboards/addons/ingress-nginx-dashboard.yaml @@ -0,0 +1,18 @@ +{{- if and .Values.grafana.enabled .Values.grafana.defaultDashboardsEnabled .Values.ingressNginx.enabled }} +apiVersion: v1 +kind: ConfigMap +metadata: + namespace: {{ .Values.grafana.defaultDashboards.namespace }} + name: {{ printf "%s-%s" (include "kube-prometheus-stack.fullname" $) "ingress-nginx" | trunc 63 | trimSuffix "-" }} + {{- if .Values.grafana.sidecar.dashboards.annotations }} + annotations: {{ toYaml .Values.grafana.sidecar.dashboards.annotations | nindent 4 }} + {{- end }} + labels: + {{- if $.Values.grafana.sidecar.dashboards.label }} + {{ $.Values.grafana.sidecar.dashboards.label }}: "1" + {{- end }} + app: {{ template "kube-prometheus-stack.name" $ }}-grafana +{{ include "kube-prometheus-stack.labels" $ | indent 4 }} +data: +{{ (.Files.Glob "files/ingress-nginx/*").AsConfig | indent 2 }} +{{- end }} diff --git a/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/templates/rancher-monitoring/dashboards/rancher/cluster-dashboards.yaml b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/templates/rancher-monitoring/dashboards/rancher/cluster-dashboards.yaml new file mode 100644 index 0000000000..d73b257451 --- /dev/null +++ b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/templates/rancher-monitoring/dashboards/rancher/cluster-dashboards.yaml @@ -0,0 +1,17 @@ +{{- if and .Values.grafana.enabled .Values.grafana.defaultDashboardsEnabled }} +apiVersion: v1 +kind: ConfigMap +metadata: + namespace: {{ .Values.grafana.defaultDashboards.namespace }} + name: rancher-default-dashboards-cluster + annotations: +{{ toYaml .Values.grafana.sidecar.dashboards.annotations | indent 4 }} + labels: + {{- if $.Values.grafana.sidecar.dashboards.label }} + {{ $.Values.grafana.sidecar.dashboards.label }}: "1" + {{- end }} + app: {{ template "kube-prometheus-stack.name" $ }}-grafana +{{ include "kube-prometheus-stack.labels" $ | indent 4 }} +data: +{{ (.Files.Glob "files/rancher/cluster/*").AsConfig | indent 2 }} +{{- end }} diff --git a/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/templates/rancher-monitoring/dashboards/rancher/default-dashboard.yaml b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/templates/rancher-monitoring/dashboards/rancher/default-dashboard.yaml new file mode 100644 index 0000000000..8865efa932 --- /dev/null +++ b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/templates/rancher-monitoring/dashboards/rancher/default-dashboard.yaml @@ -0,0 +1,17 @@ +{{- if and .Values.grafana.enabled .Values.grafana.defaultDashboardsEnabled }} +apiVersion: v1 +kind: ConfigMap +metadata: + namespace: {{ .Values.grafana.defaultDashboards.namespace }} + name: rancher-default-dashboards-home + annotations: +{{ toYaml .Values.grafana.sidecar.dashboards.annotations | indent 4 }} + labels: + {{- if $.Values.grafana.sidecar.dashboards.label }} + {{ $.Values.grafana.sidecar.dashboards.label }}: "1" + {{- end }} + app: {{ template "kube-prometheus-stack.name" $ }}-grafana +{{ include "kube-prometheus-stack.labels" $ | indent 4 }} +data: +{{ (.Files.Glob "files/rancher/home/*").AsConfig | indent 2 }} +{{- end }} diff --git a/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/templates/rancher-monitoring/dashboards/rancher/fleet-dashboards.yaml b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/templates/rancher-monitoring/dashboards/rancher/fleet-dashboards.yaml new file mode 100644 index 0000000000..9b05cea2e8 --- /dev/null +++ b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/templates/rancher-monitoring/dashboards/rancher/fleet-dashboards.yaml @@ -0,0 +1,17 @@ +{{- if and .Values.grafana.enabled .Values.grafana.defaultDashboardsEnabled }} +apiVersion: v1 +kind: ConfigMap +metadata: + namespace: {{ .Values.grafana.defaultDashboards.namespace }} + name: rancher-fleet-dashboards + annotations: +{{ toYaml .Values.grafana.sidecar.dashboards.annotations | indent 4 }} + labels: + {{- if $.Values.grafana.sidecar.dashboards.label }} + {{ $.Values.grafana.sidecar.dashboards.label }}: "1" + {{- end }} + app: {{ template "kube-prometheus-stack.name" $ }}-grafana +{{ include "kube-prometheus-stack.labels" $ | indent 4 }} +data: +{{ (.Files.Glob "files/rancher/fleet/*").AsConfig | indent 2 }} +{{- end }} diff --git a/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/templates/rancher-monitoring/dashboards/rancher/k8s-dashboards.yaml b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/templates/rancher-monitoring/dashboards/rancher/k8s-dashboards.yaml new file mode 100644 index 0000000000..2afae10ef7 --- /dev/null +++ b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/templates/rancher-monitoring/dashboards/rancher/k8s-dashboards.yaml @@ -0,0 +1,31 @@ +{{- $files := (.Files.Glob "files/rancher/k8s/*").AsConfig }} +{{- $filesDict := (fromYaml $files) }} +{{- if not (include "exporter.kubeEtcd.enabled" .) }} +{{- $filesDict = (unset $filesDict "rancher-etcd-nodes.json") -}} +{{- $filesDict = (unset $filesDict "rancher-etcd.json") -}} +{{- end }} +{{- if not (include "exporter.kubeControllerManager.enabled" .) }} +{{- $filesDict = (unset $filesDict "rancher-k8s-components-nodes.json") -}} +{{- $filesDict = (unset $filesDict "rancher-k8s-components.json") -}} +{{- else }} +{{- $_ := (set $filesDict "rancher-k8s-components-nodes.json" (get $filesDict "rancher-k8s-components-nodes.json" | replace "kube-controller-manager" (include "exporter.kubeControllerManager.jobName" .))) -}} +{{- $_ := (set $filesDict "rancher-k8s-components.json" (get $filesDict "rancher-k8s-components.json" | replace "kube-controller-manager" (include "exporter.kubeControllerManager.jobName" .))) -}} +{{- end }} +{{ $files = (toYaml $filesDict) }} +{{- if and .Values.grafana.enabled .Values.grafana.defaultDashboardsEnabled }} +apiVersion: v1 +kind: ConfigMap +metadata: + namespace: {{ .Values.grafana.defaultDashboards.namespace }} + name: rancher-default-dashboards-k8s + annotations: +{{ toYaml .Values.grafana.sidecar.dashboards.annotations | indent 4 }} + labels: + {{- if $.Values.grafana.sidecar.dashboards.label }} + {{ $.Values.grafana.sidecar.dashboards.label }}: "1" + {{- end }} + app: {{ template "kube-prometheus-stack.name" $ }}-grafana +{{ include "kube-prometheus-stack.labels" $ | indent 4 }} +data: +{{ $files | indent 2 }} +{{- end }} diff --git a/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/templates/rancher-monitoring/dashboards/rancher/nodes-dashboards.yaml b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/templates/rancher-monitoring/dashboards/rancher/nodes-dashboards.yaml new file mode 100644 index 0000000000..172c36e9d1 --- /dev/null +++ b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/templates/rancher-monitoring/dashboards/rancher/nodes-dashboards.yaml @@ -0,0 +1,17 @@ +{{- if and .Values.grafana.enabled .Values.grafana.defaultDashboardsEnabled }} +apiVersion: v1 +kind: ConfigMap +metadata: + namespace: {{ .Values.grafana.defaultDashboards.namespace }} + name: rancher-default-dashboards-nodes + annotations: +{{ toYaml .Values.grafana.sidecar.dashboards.annotations | indent 4 }} + labels: + {{- if $.Values.grafana.sidecar.dashboards.label }} + {{ $.Values.grafana.sidecar.dashboards.label }}: "1" + {{- end }} + app: {{ template "kube-prometheus-stack.name" $ }}-grafana +{{ include "kube-prometheus-stack.labels" $ | indent 4 }} +data: +{{ (.Files.Glob "files/rancher/nodes/*").AsConfig | indent 2 }} +{{- end }} diff --git a/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/templates/rancher-monitoring/dashboards/rancher/performance-dashboards.yaml b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/templates/rancher-monitoring/dashboards/rancher/performance-dashboards.yaml new file mode 100644 index 0000000000..19836ec4e4 --- /dev/null +++ b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/templates/rancher-monitoring/dashboards/rancher/performance-dashboards.yaml @@ -0,0 +1,18 @@ +{{- $selector := (include "rancher.serviceMonitor.selector" .) -}} +{{- if and .Values.grafana.enabled .Values.grafana.defaultDashboardsEnabled .Values.rancherMonitoring.enabled $selector }} +apiVersion: v1 +kind: ConfigMap +metadata: + namespace: {{ .Values.grafana.defaultDashboards.namespace }} + name: rancher-default-dashboards-performance-debugging + annotations: +{{ toYaml .Values.grafana.sidecar.dashboards.annotations | indent 4 }} + labels: + {{- if $.Values.grafana.sidecar.dashboards.label }} + {{ $.Values.grafana.sidecar.dashboards.label }}: "1" + {{- end }} + app: {{ template "kube-prometheus-stack.name" $ }}-grafana +{{ include "kube-prometheus-stack.labels" $ | indent 4 }} +data: +{{ (.Files.Glob "files/rancher/performance/*").AsConfig | indent 2 }} +{{- end }} diff --git a/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/templates/rancher-monitoring/dashboards/rancher/pods-dashboards.yaml b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/templates/rancher-monitoring/dashboards/rancher/pods-dashboards.yaml new file mode 100644 index 0000000000..940f18869b --- /dev/null +++ b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/templates/rancher-monitoring/dashboards/rancher/pods-dashboards.yaml @@ -0,0 +1,17 @@ +{{- if and .Values.grafana.enabled .Values.grafana.defaultDashboardsEnabled }} +apiVersion: v1 +kind: ConfigMap +metadata: + namespace: {{ .Values.grafana.defaultDashboards.namespace }} + name: rancher-default-dashboards-pods + annotations: +{{ toYaml .Values.grafana.sidecar.dashboards.annotations | indent 4 }} + labels: + {{- if $.Values.grafana.sidecar.dashboards.label }} + {{ $.Values.grafana.sidecar.dashboards.label }}: "1" + {{- end }} + app: {{ template "kube-prometheus-stack.name" $ }}-grafana +{{ include "kube-prometheus-stack.labels" $ | indent 4 }} +data: +{{ (.Files.Glob "files/rancher/pods/*").AsConfig | indent 2 }} +{{- end }} diff --git a/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/templates/rancher-monitoring/dashboards/rancher/workload-dashboards.yaml b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/templates/rancher-monitoring/dashboards/rancher/workload-dashboards.yaml new file mode 100644 index 0000000000..d146dacdd0 --- /dev/null +++ b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/templates/rancher-monitoring/dashboards/rancher/workload-dashboards.yaml @@ -0,0 +1,17 @@ +{{- if and .Values.grafana.enabled .Values.grafana.defaultDashboardsEnabled }} +apiVersion: v1 +kind: ConfigMap +metadata: + namespace: {{ .Values.grafana.defaultDashboards.namespace }} + name: rancher-default-dashboards-workloads + annotations: +{{ toYaml .Values.grafana.sidecar.dashboards.annotations | indent 4 }} + labels: + {{- if $.Values.grafana.sidecar.dashboards.label }} + {{ $.Values.grafana.sidecar.dashboards.label }}: "1" + {{- end }} + app: {{ template "kube-prometheus-stack.name" $ }}-grafana +{{ include "kube-prometheus-stack.labels" $ | indent 4 }} +data: +{{ (.Files.Glob "files/rancher/workloads/*").AsConfig | indent 2 }} +{{- end }} diff --git a/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/templates/rancher-monitoring/exporters/fleet/servicemonitor.yaml b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/templates/rancher-monitoring/exporters/fleet/servicemonitor.yaml new file mode 100644 index 0000000000..90d24c2061 --- /dev/null +++ b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/templates/rancher-monitoring/exporters/fleet/servicemonitor.yaml @@ -0,0 +1,53 @@ +{{- if .Values.rancherMonitoring.enabled }} +apiVersion: monitoring.coreos.com/v1 +kind: ServiceMonitor +metadata: + labels: {{ include "kube-prometheus-stack.labels" . | nindent 4 }} + name: monitoring-fleet-controller + namespace: cattle-fleet-system +spec: + endpoints: + - port: metrics + metricRelabelings: + {{ if .Values.global.cattle.clusterId }} + - sourceLabels: [__address__] + targetLabel: cluster_id + replacement: {{ .Values.global.cattle.clusterId }} + {{- end }} + {{ if .Values.global.cattle.clusterName}} + - sourceLabels: [__address__] + targetLabel: cluster_name + replacement: {{ .Values.global.cattle.clusterName }} + {{- end }} + jobLabel: fleet + selector: + matchLabels: + app: fleet-controller +{{- end }} +--- +{{- if .Values.rancherMonitoring.enabled }} +apiVersion: monitoring.coreos.com/v1 +kind: ServiceMonitor +metadata: + labels: {{ include "kube-prometheus-stack.labels" . | nindent 4 }} + name: monitoring-gitops-controller + namespace: cattle-fleet-system +spec: + endpoints: + - port: metrics + metricRelabelings: + {{ if .Values.global.cattle.clusterId }} + - sourceLabels: [__address__] + targetLabel: cluster_id + replacement: {{ .Values.global.cattle.clusterId }} + {{- end }} + {{ if .Values.global.cattle.clusterName}} + - sourceLabels: [__address__] + targetLabel: cluster_name + replacement: {{ .Values.global.cattle.clusterName }} + {{- end }} + jobLabel: gitops + selector: + matchLabels: + app: gitjob +{{- end }} diff --git a/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/templates/rancher-monitoring/exporters/ingress-nginx/service.yaml b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/templates/rancher-monitoring/exporters/ingress-nginx/service.yaml new file mode 100644 index 0000000000..53a9ad6897 --- /dev/null +++ b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/templates/rancher-monitoring/exporters/ingress-nginx/service.yaml @@ -0,0 +1,27 @@ +{{- if and (not .Values.ingressNginx.enabled) (.Values.rkeIngressNginx.enabled) }} +{{- fail "Cannot set .Values.rkeIngressNginx.enabled=true when .Values.ingressNginx.enabled=false" }} +{{- end }} +{{- if and .Values.ingressNginx.enabled (not .Values.rkeIngressNginx.enabled) }} +apiVersion: v1 +kind: Service +metadata: + name: {{ template "kube-prometheus-stack.fullname" . }}-ingress-nginx + labels: + app: {{ template "kube-prometheus-stack.name" . }}-ingress-nginx + jobLabel: ingress-nginx +{{ include "kube-prometheus-stack.labels" . | indent 4 }} + namespace: {{ .Values.ingressNginx.namespace }} +spec: + clusterIP: None + ports: + - name: http-metrics + port: {{ .Values.ingressNginx.service.port }} + protocol: TCP + targetPort: {{ .Values.ingressNginx.service.targetPort }} + selector: + {{- if .Values.ingressNginx.service.selector }} +{{ toYaml .Values.ingressNginx.service.selector | indent 4 }} + {{- else }} + app: ingress-nginx + {{- end }} +{{- end }} diff --git a/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/templates/rancher-monitoring/exporters/ingress-nginx/servicemonitor.yaml b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/templates/rancher-monitoring/exporters/ingress-nginx/servicemonitor.yaml new file mode 100644 index 0000000000..b0f92e63b5 --- /dev/null +++ b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/templates/rancher-monitoring/exporters/ingress-nginx/servicemonitor.yaml @@ -0,0 +1,49 @@ +{{- if and (not .Values.ingressNginx.enabled) (.Values.rkeIngressNginx.enabled) }} +{{- fail "Cannot set .Values.rkeIngressNginx.enabled=true when .Values.ingressNginx.enabled=false" }} +{{- end }} +{{- if and .Values.ingressNginx.enabled (not .Values.rkeIngressNginx.enabled) }} +apiVersion: monitoring.coreos.com/v1 +kind: ServiceMonitor +metadata: + name: {{ template "kube-prometheus-stack.fullname" . }}-ingress-nginx + namespace: {{ .Values.ingressNginx.namespace }} + labels: + app: {{ template "kube-prometheus-stack.name" . }}-ingress-nginx +{{ include "kube-prometheus-stack.labels" . | indent 4 }} +spec: + jobLabel: jobLabel + selector: + matchLabels: + app: {{ template "kube-prometheus-stack.name" . }}-ingress-nginx + release: {{ $.Release.Name | quote }} + namespaceSelector: + matchNames: + - {{ .Values.ingressNginx.namespace }} + endpoints: + - port: http-metrics + {{- if .Values.ingressNginx.serviceMonitor.interval}} + interval: {{ .Values.ingressNginx.serviceMonitor.interval }} + {{- end }} + {{- if .Values.ingressNginx.serviceMonitor.proxyUrl }} + proxyUrl: {{ .Values.ingressNginx.serviceMonitor.proxyUrl}} + {{- end }} + bearerTokenFile: /var/run/secrets/kubernetes.io/serviceaccount/token + metricRelabelings: + {{- if .Values.ingressNginx.serviceMonitor.metricRelabelings }} + {{ tpl (toYaml .Values.ingressNginx.serviceMonitor.metricRelabelings | indent 4) . }} + {{- end }} + {{ if .Values.global.cattle.clusterId }} + - sourceLabels: [__address__] + targetLabel: cluster_id + replacement: {{ .Values.global.cattle.clusterId }} + {{- end }} + {{ if .Values.global.cattle.clusterName}} + - sourceLabels: [__address__] + targetLabel: cluster_name + replacement: {{ .Values.global.cattle.clusterName }} + {{- end }} +{{- if .Values.ingressNginx.serviceMonitor.relabelings }} + relabelings: +{{ toYaml .Values.ingressNginx.serviceMonitor.relabelings | indent 4 }} +{{- end }} +{{- end }} diff --git a/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/templates/rancher-monitoring/exporters/rancher/servicemonitor.yaml b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/templates/rancher-monitoring/exporters/rancher/servicemonitor.yaml new file mode 100644 index 0000000000..1fba8f23f7 --- /dev/null +++ b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/templates/rancher-monitoring/exporters/rancher/servicemonitor.yaml @@ -0,0 +1,58 @@ +{{- $selector := (include "rancher.serviceMonitor.selector" .) -}} +{{- if and .Values.rancherMonitoring.enabled $selector }} +apiVersion: monitoring.coreos.com/v1 +kind: ServiceMonitor +metadata: + labels: {{ include "kube-prometheus-stack.labels" . | nindent 4 }} + name: rancher + namespace: cattle-system +spec: + endpoints: + - bearerTokenFile: /var/run/secrets/kubernetes.io/serviceaccount/token + port: http + tlsConfig: + caFile: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt + insecureSkipVerify: true + serverName: rancher + metricRelabelings: + {{ if .Values.global.cattle.clusterId }} + - sourceLabels: [__address__] + targetLabel: cluster_id + replacement: {{ .Values.global.cattle.clusterId }} + {{- end }} + {{ if .Values.global.cattle.clusterName}} + - sourceLabels: [__address__] + targetLabel: cluster_name + replacement: {{ .Values.global.cattle.clusterName }} + {{- end }} + jobLabel: rancher +{{- if .Values.rancherMonitoring.namespaceSelector }} + namespaceSelector: {{ .Values.rancherMonitoring.namespaceSelector | toYaml | nindent 4 }} +{{- end }} + selector: {{ include "rancher.serviceMonitor.selector" . | nindent 4 }} +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRole +metadata: + name: {{ template "kube-prometheus-stack.fullname" . }}-rancher-metrics +rules: +- apiGroups: + - management.cattle.io + resources: + - ranchermetrics + verbs: + - get +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRoleBinding +metadata: + name: {{ template "kube-prometheus-stack.fullname" . }}-rancher-metrics +roleRef: + apiGroup: rbac.authorization.k8s.io + kind: ClusterRole + name: {{ template "kube-prometheus-stack.fullname" . }}-rancher-metrics +subjects: + - kind: ServiceAccount + name: {{ template "kube-prometheus-stack.fullname" . }}-prometheus + namespace: {{ template "kube-prometheus-stack.namespace" . }} +{{- end }} diff --git a/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/templates/rancher-monitoring/hardened.yaml b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/templates/rancher-monitoring/hardened.yaml new file mode 100644 index 0000000000..f9a66151ee --- /dev/null +++ b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/templates/rancher-monitoring/hardened.yaml @@ -0,0 +1,147 @@ +{{- $namespaces := dict "_0" .Release.Namespace -}} +{{- if and .Values.grafana.enabled .Values.grafana.defaultDashboardsEnabled (not .Values.grafana.defaultDashboards.useExistingNamespace) -}} +{{- $_ := set $namespaces "_1" .Values.grafana.defaultDashboards.namespace -}} +{{- end -}} +apiVersion: batch/v1 +kind: Job +metadata: + name: {{ .Chart.Name }}-patch-sa + namespace: {{ .Release.Namespace }} + labels: + app: {{ .Chart.Name }}-patch-sa + annotations: + "helm.sh/hook": post-install, post-upgrade + "helm.sh/hook-delete-policy": hook-succeeded, before-hook-creation +spec: + template: + metadata: + name: {{ .Chart.Name }}-patch-sa + labels: + app: {{ .Chart.Name }}-patch-sa + spec: + serviceAccountName: {{ .Chart.Name }}-patch-sa + securityContext: + runAsNonRoot: true + runAsUser: 1000 + restartPolicy: Never + nodeSelector: {{ include "linux-node-selector" . | nindent 8 }} + tolerations: {{ include "linux-node-tolerations" . | nindent 8 }} + containers: + {{- range $_, $ns := $namespaces }} + - name: patch-sa-{{ $ns }} + image: {{ template "system_default_registry" $ }}{{ $.Values.global.kubectl.repository }}:{{ $.Values.global.kubectl.tag }} + imagePullPolicy: {{ $.Values.global.kubectl.pullPolicy }} + command: ["kubectl", "patch", "serviceaccount", "default", "-p", "{\"automountServiceAccountToken\": false}"] + args: ["-n", "{{ $ns }}"] + {{- end }} +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRole +metadata: + name: {{ .Chart.Name }}-patch-sa + labels: + app: {{ .Chart.Name }}-patch-sa +rules: +- apiGroups: + - "" + resources: + - serviceaccounts + verbs: ['get', 'patch'] +{{- if .Values.global.cattle.psp.enabled }} +- apiGroups: ['policy'] + resources: ['podsecuritypolicies'] + verbs: ['use'] + resourceNames: + - {{ .Chart.Name }}-patch-sa +{{- end }} +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRoleBinding +metadata: + name: {{ .Chart.Name }}-patch-sa + labels: + app: {{ .Chart.Name }}-patch-sa +roleRef: + apiGroup: rbac.authorization.k8s.io + kind: ClusterRole + name: {{ .Chart.Name }}-patch-sa +subjects: +- kind: ServiceAccount + name: {{ .Chart.Name }}-patch-sa + namespace: {{ .Release.Namespace }} +--- +apiVersion: v1 +kind: ServiceAccount +metadata: + name: {{ .Chart.Name }}-patch-sa + namespace: {{ .Release.Namespace }} + labels: + app: {{ .Chart.Name }}-patch-sa +--- +{{- if .Values.global.cattle.psp.enabled }} +apiVersion: policy/v1beta1 +kind: PodSecurityPolicy +metadata: + name: {{ .Chart.Name }}-patch-sa + namespace: {{ .Release.Namespace }} + labels: + app: {{ .Chart.Name }}-patch-sa +spec: + privileged: false + hostNetwork: false + hostIPC: false + hostPID: false + runAsUser: + rule: 'MustRunAsNonRoot' + seLinux: + rule: 'RunAsAny' + supplementalGroups: + rule: 'MustRunAs' + ranges: + - min: 1 + max: 65535 + fsGroup: + rule: 'MustRunAs' + ranges: + - min: 1 + max: 65535 + readOnlyRootFilesystem: false + volumes: + - 'secret' +{{- range $_, $ns := $namespaces }} +--- +apiVersion: networking.k8s.io/v1 +kind: NetworkPolicy +metadata: + name: default-allow-all + namespace: {{ $ns }} +spec: + podSelector: {} + ingress: + - {} + egress: + - {} + policyTypes: + - Ingress + - Egress +{{- end }} +{{- end }} +--- +{{- if .Values.hardened.k3s.networkPolicy.enabled }} +apiVersion: networking.k8s.io/v1 +kind: NetworkPolicy +metadata: + name: rancher-monitoring-coredns-allow-all + namespace: kube-system +spec: + ingress: + - {} + egress: + - {} + policyTypes: + - Ingress + - Egress + podSelector: + matchLabels: + k8s-app: kube-dns +{{- end }} \ No newline at end of file diff --git a/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/templates/rancher-monitoring/upgrade/configmap.yaml b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/templates/rancher-monitoring/upgrade/configmap.yaml new file mode 100644 index 0000000000..53cb898214 --- /dev/null +++ b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/templates/rancher-monitoring/upgrade/configmap.yaml @@ -0,0 +1,13 @@ +{{- if .Values.upgrade.enabled }} +apiVersion: v1 +kind: ConfigMap +metadata: + name: {{ template "kube-prometheus-stack.fullname" . }}-upgrade + namespace: {{ template "kube-prometheus-stack.namespace" . }} + annotations: + "helm.sh/hook": pre-upgrade, pre-rollback + "helm.sh/hook-delete-policy": before-hook-creation, hook-succeeded, hook-failed + "helm.sh/hook-weight": "0" +data: +{{ (.Files.Glob "files/upgrade/scripts/*").AsConfig | indent 2 }} +{{- end }} \ No newline at end of file diff --git a/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/templates/rancher-monitoring/upgrade/job.yaml b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/templates/rancher-monitoring/upgrade/job.yaml new file mode 100644 index 0000000000..8f2771740c --- /dev/null +++ b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/templates/rancher-monitoring/upgrade/job.yaml @@ -0,0 +1,46 @@ +{{- if .Values.upgrade.enabled }} +apiVersion: batch/v1 +kind: Job +metadata: + name: {{ template "kube-prometheus-stack.fullname" . }}-upgrade + namespace: {{ template "kube-prometheus-stack.namespace" . }} + labels: + app: {{ template "kube-prometheus-stack.fullname" . }}-upgrade + annotations: + "helm.sh/hook": pre-upgrade, pre-rollback + "helm.sh/hook-delete-policy": before-hook-creation, hook-succeeded, hook-failed + "helm.sh/hook-weight": "2" +spec: + template: + metadata: + name: {{ template "kube-prometheus-stack.fullname" . }}-upgrade + labels: + app: {{ template "kube-prometheus-stack.fullname" . }}-upgrade + spec: + serviceAccountName: {{ template "kube-prometheus-stack.fullname" . }}-upgrade + securityContext: + runAsNonRoot: false + runAsUser: 0 + restartPolicy: Never + nodeSelector: {{ include "linux-node-selector" . | nindent 8 }} + tolerations: {{ include "linux-node-tolerations" . | nindent 8 }} + containers: + - name: run-scripts + image: {{ template "system_default_registry" . }}{{ .Values.upgrade.image.repository }}:{{ .Values.upgrade.image.tag }} + imagePullPolicy: {{ $.Values.global.kubectl.pullPolicy }} + command: + - /bin/sh + - -c + - > + for s in $(find /etc/scripts -type f); do + echo "Running $s..."; + cat $s | bash + done; + volumeMounts: + - name: upgrade + mountPath: /etc/scripts + volumes: + - name: upgrade + configMap: + name: {{ template "kube-prometheus-stack.fullname" . }}-upgrade +{{- end }} \ No newline at end of file diff --git a/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/templates/rancher-monitoring/upgrade/rbac.yaml b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/templates/rancher-monitoring/upgrade/rbac.yaml new file mode 100644 index 0000000000..e929a19925 --- /dev/null +++ b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/templates/rancher-monitoring/upgrade/rbac.yaml @@ -0,0 +1,131 @@ +{{- if .Values.upgrade.enabled }} +apiVersion: rbac.authorization.k8s.io/v1 +kind: Role +metadata: + name: {{ template "kube-prometheus-stack.fullname" . }}-upgrade + namespace: {{ template "kube-prometheus-stack.namespace" . }} + labels: + app: {{ template "kube-prometheus-stack.fullname" . }}-upgrade + annotations: + "helm.sh/hook": pre-upgrade, pre-rollback + "helm.sh/hook-delete-policy": before-hook-creation, hook-succeeded + "helm.sh/hook-weight": "1" +rules: +- apiGroups: + - apps + resources: + - deployments + - daemonsets + - statefulsets + verbs: + - 'list' + - 'delete' +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: RoleBinding +metadata: + name: {{ template "kube-prometheus-stack.fullname" . }}-upgrade + namespace: {{ template "kube-prometheus-stack.namespace" . }} + labels: + app: {{ template "kube-prometheus-stack.fullname" . }}-upgrade + annotations: + "helm.sh/hook": pre-upgrade, pre-rollback + "helm.sh/hook-delete-policy": before-hook-creation, hook-succeeded, hook-failed + "helm.sh/hook-weight": "1" +roleRef: + apiGroup: rbac.authorization.k8s.io + kind: Role + name: {{ template "kube-prometheus-stack.fullname" . }}-upgrade +subjects: +- kind: ServiceAccount + name: {{ template "kube-prometheus-stack.fullname" . }}-upgrade + namespace: {{ template "kube-prometheus-stack.namespace" . }} +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRole +metadata: + name: {{ template "kube-prometheus-stack.fullname" . }}-upgrade + labels: + app: {{ template "kube-prometheus-stack.fullname" . }}-upgrade + annotations: + "helm.sh/hook": pre-upgrade, pre-rollback + "helm.sh/hook-delete-policy": before-hook-creation, hook-succeeded, hook-failed + "helm.sh/hook-weight": "1" +rules: +{{- if .Values.global.cattle.psp.enabled }} +- apiGroups: ['policy'] + resources: ['podsecuritypolicies'] + verbs: ['use'] + resourceNames: + - {{ template "kube-prometheus-stack.fullname" . }}-upgrade +{{- end }} +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRoleBinding +metadata: + name: {{ template "kube-prometheus-stack.fullname" . }}-upgrade + labels: + app: {{ template "kube-prometheus-stack.fullname" . }}-upgrade + annotations: + "helm.sh/hook": pre-upgrade, pre-rollback + "helm.sh/hook-delete-policy": before-hook-creation, hook-succeeded, hook-failed + "helm.sh/hook-weight": "1" +roleRef: + apiGroup: rbac.authorization.k8s.io + kind: ClusterRole + name: {{ template "kube-prometheus-stack.fullname" . }}-upgrade +subjects: +- kind: ServiceAccount + name: {{ template "kube-prometheus-stack.fullname" . }}-upgrade + namespace: {{ template "kube-prometheus-stack.namespace" . }} +--- +apiVersion: v1 +kind: ServiceAccount +metadata: + name: {{ template "kube-prometheus-stack.fullname" . }}-upgrade + namespace: {{ template "kube-prometheus-stack.namespace" . }} + labels: + app: {{ template "kube-prometheus-stack.fullname" . }}-upgrade + annotations: + "helm.sh/hook": pre-upgrade, pre-rollback + "helm.sh/hook-delete-policy": before-hook-creation, hook-succeeded, hook-failed + "helm.sh/hook-weight": "1" +--- +{{- if .Values.global.cattle.psp.enabled }} +apiVersion: policy/v1beta1 +kind: PodSecurityPolicy +metadata: + name: {{ template "kube-prometheus-stack.fullname" . }}-upgrade + namespace: {{ template "kube-prometheus-stack.namespace" . }} + labels: + app: {{ template "kube-prometheus-stack.fullname" . }}-upgrade + annotations: + "helm.sh/hook": pre-upgrade, pre-rollback + "helm.sh/hook-delete-policy": before-hook-creation, hook-succeeded, hook-failed + "helm.sh/hook-weight": "1" +spec: + privileged: false + allowPrivilegeEscalation: false + hostNetwork: false + hostIPC: false + hostPID: false + runAsUser: + rule: 'RunAsAny' + seLinux: + rule: 'RunAsAny' + supplementalGroups: + rule: 'MustRunAs' + ranges: + - min: 1 + max: 65535 + fsGroup: + rule: 'MustRunAs' + ranges: + - min: 1 + max: 65535 + readOnlyRootFilesystem: false + volumes: + - 'configMap' + - 'secret' +{{- end }} +{{- end }} \ No newline at end of file diff --git a/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/templates/thanos-ruler/extrasecret.yaml b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/templates/thanos-ruler/extrasecret.yaml new file mode 100644 index 0000000000..587fca2dca --- /dev/null +++ b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/templates/thanos-ruler/extrasecret.yaml @@ -0,0 +1,20 @@ +{{- if .Values.thanosRuler.extraSecret.data -}} +{{- $secretName := printf "%s-extra" (include "kube-prometheus-stack.thanosRuler.name" . ) -}} +apiVersion: v1 +kind: Secret +metadata: + name: {{ default $secretName .Values.thanosRuler.extraSecret.name }} + namespace: {{ template "kube-prometheus-stack.namespace" . }} +{{- if .Values.thanosRuler.extraSecret.annotations }} + annotations: +{{ toYaml .Values.thanosRuler.extraSecret.annotations | indent 4 }} +{{- end }} + labels: + app: {{ template "kube-prometheus-stack.thanosRuler.name" . }} + app.kubernetes.io/component: thanos-ruler +{{ include "kube-prometheus-stack.labels" . | indent 4 }} +data: +{{- range $key, $val := .Values.thanosRuler.extraSecret.data }} + {{ $key }}: {{ $val | b64enc | quote }} +{{- end }} +{{- end }} diff --git a/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/templates/thanos-ruler/ingress.yaml b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/templates/thanos-ruler/ingress.yaml new file mode 100644 index 0000000000..e245ad448e --- /dev/null +++ b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/templates/thanos-ruler/ingress.yaml @@ -0,0 +1,77 @@ +{{- if and .Values.thanosRuler.enabled .Values.thanosRuler.ingress.enabled }} +{{- $pathType := .Values.thanosRuler.ingress.pathType | default "ImplementationSpecific" }} +{{- $serviceName := include "kube-prometheus-stack.thanosRuler.name" . }} +{{- $servicePort := .Values.thanosRuler.service.port -}} +{{- $routePrefix := list .Values.thanosRuler.thanosRulerSpec.routePrefix }} +{{- $paths := .Values.thanosRuler.ingress.paths | default $routePrefix -}} +{{- $apiIsStable := eq (include "kube-prometheus-stack.ingress.isStable" .) "true" -}} +{{- $ingressSupportsPathType := eq (include "kube-prometheus-stack.ingress.supportsPathType" .) "true" -}} +apiVersion: {{ include "kube-prometheus-stack.ingress.apiVersion" . }} +kind: Ingress +metadata: + name: {{ $serviceName }} + namespace: {{ template "kube-prometheus-stack.namespace" . }} +{{- if .Values.thanosRuler.ingress.annotations }} + annotations: + {{- tpl (toYaml .Values.thanosRuler.ingress.annotations) . | nindent 4 }} +{{- end }} + labels: + app: {{ template "kube-prometheus-stack.thanosRuler.name" . }} +{{- if .Values.thanosRuler.ingress.labels }} +{{ toYaml .Values.thanosRuler.ingress.labels | indent 4 }} +{{- end }} +{{ include "kube-prometheus-stack.labels" . | indent 4 }} +spec: + {{- if $apiIsStable }} + {{- if .Values.thanosRuler.ingress.ingressClassName }} + ingressClassName: {{ .Values.thanosRuler.ingress.ingressClassName }} + {{- end }} + {{- end }} + rules: + {{- if .Values.thanosRuler.ingress.hosts }} + {{- range $host := .Values.thanosRuler.ingress.hosts }} + - host: {{ tpl $host $ }} + http: + paths: + {{- range $p := $paths }} + - path: {{ tpl $p $ }} + {{- if and $pathType $ingressSupportsPathType }} + pathType: {{ $pathType }} + {{- end }} + backend: + {{- if $apiIsStable }} + service: + name: {{ $serviceName }} + port: + number: {{ $servicePort }} + {{- else }} + serviceName: {{ $serviceName }} + servicePort: {{ $servicePort }} + {{- end }} + {{- end -}} + {{- end -}} + {{- else }} + - http: + paths: + {{- range $p := $paths }} + - path: {{ tpl $p $ }} + {{- if and $pathType $ingressSupportsPathType }} + pathType: {{ $pathType }} + {{- end }} + backend: + {{- if $apiIsStable }} + service: + name: {{ $serviceName }} + port: + number: {{ $servicePort }} + {{- else }} + serviceName: {{ $serviceName }} + servicePort: {{ $servicePort }} + {{- end }} + {{- end -}} + {{- end -}} + {{- if .Values.thanosRuler.ingress.tls }} + tls: +{{ tpl (toYaml .Values.thanosRuler.ingress.tls | indent 4) . }} + {{- end -}} +{{- end -}} diff --git a/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/templates/thanos-ruler/podDisruptionBudget.yaml b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/templates/thanos-ruler/podDisruptionBudget.yaml new file mode 100644 index 0000000000..83e54edf9b --- /dev/null +++ b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/templates/thanos-ruler/podDisruptionBudget.yaml @@ -0,0 +1,21 @@ +{{- if and .Values.thanosRuler.enabled .Values.thanosRuler.podDisruptionBudget.enabled }} +apiVersion: {{ include "kube-prometheus-stack.pdb.apiVersion" . }} +kind: PodDisruptionBudget +metadata: + name: {{ template "kube-prometheus-stack.thanosRuler.name" . }} + namespace: {{ template "kube-prometheus-stack.namespace" . }} + labels: + app: {{ template "kube-prometheus-stack.thanosRuler.name" . }} +{{ include "kube-prometheus-stack.labels" . | indent 4 }} +spec: + {{- if .Values.thanosRuler.podDisruptionBudget.minAvailable }} + minAvailable: {{ .Values.thanosRuler.podDisruptionBudget.minAvailable }} + {{- end }} + {{- if .Values.thanosRuler.podDisruptionBudget.maxUnavailable }} + maxUnavailable: {{ .Values.thanosRuler.podDisruptionBudget.maxUnavailable }} + {{- end }} + selector: + matchLabels: + app.kubernetes.io/name: thanos-ruler + thanos-ruler: {{ template "kube-prometheus-stack.thanosRuler.name" . }} +{{- end }} diff --git a/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/templates/thanos-ruler/ruler.yaml b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/templates/thanos-ruler/ruler.yaml new file mode 100644 index 0000000000..b281221563 --- /dev/null +++ b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/templates/thanos-ruler/ruler.yaml @@ -0,0 +1,189 @@ +{{- if .Values.thanosRuler.enabled }} +apiVersion: monitoring.coreos.com/v1 +kind: ThanosRuler +metadata: + name: {{ template "kube-prometheus-stack.thanosRuler.name" . }} + namespace: {{ template "kube-prometheus-stack.namespace" . }} + labels: + app: {{ include "kube-prometheus-stack.thanosRuler.name" . }} +{{- include "kube-prometheus-stack.labels" . | indent 4 -}} +{{- if .Values.thanosRuler.annotations }} + annotations: +{{ toYaml .Values.thanosRuler.annotations | indent 4 }} +{{- end }} +spec: +{{- if .Values.thanosRuler.thanosRulerSpec.image }} + {{- $registry := include "monitoring_registry" . | default .Values.thanosRuler.thanosRulerSpec.image.registry -}} + {{- if and .Values.thanosRuler.thanosRulerSpec.image.tag .Values.thanosRuler.thanosRulerSpec.image.sha }} + image: "{{ $registry }}/{{ .Values.thanosRuler.thanosRulerSpec.image.repository }}:{{ .Values.thanosRuler.thanosRulerSpec.image.tag }}@sha256:{{ .Values.thanosRuler.thanosRulerSpec.image.sha }}" + {{- else if .Values.thanosRuler.thanosRulerSpec.image.sha }} + image: "{{ $registry }}/{{ .Values.thanosRuler.thanosRulerSpec.image.repository }}@sha256:{{ .Values.thanosRuler.thanosRulerSpec.image.sha }}" + {{- else if .Values.thanosRuler.thanosRulerSpec.image.tag }} + image: "{{ $registry }}/{{ .Values.thanosRuler.thanosRulerSpec.image.repository }}:{{ .Values.thanosRuler.thanosRulerSpec.image.tag }}" + {{- else }} + image: "{{ $registry }}/{{ .Values.thanosRuler.thanosRulerSpec.image.repository }}" + {{- end }} + {{- if .Values.thanosRuler.thanosRulerSpec.image.sha }} + sha: {{ .Values.thanosRuler.thanosRulerSpec.image.sha }} + {{- end }} +{{- end }} + replicas: {{ .Values.thanosRuler.thanosRulerSpec.replicas }} + listenLocal: {{ .Values.thanosRuler.thanosRulerSpec.listenLocal }} + serviceAccountName: {{ template "kube-prometheus-stack.thanosRuler.serviceAccountName" . }} +{{- if .Values.thanosRuler.thanosRulerSpec.externalPrefix }} + externalPrefix: "{{ tpl .Values.thanosRuler.thanosRulerSpec.externalPrefix . }}" +{{- else if and .Values.thanosRuler.ingress.enabled .Values.thanosRuler.ingress.hosts }} + externalPrefix: "http://{{ tpl (index .Values.thanosRuler.ingress.hosts 0) . }}{{ .Values.thanosRuler.thanosRulerSpec.routePrefix }}" +{{- else }} + externalPrefix: http://{{ template "kube-prometheus-stack.thanosRuler.name" . }}.{{ template "kube-prometheus-stack.namespace" . }}:{{ .Values.thanosRuler.service.port }} +{{- end }} + nodeSelector: {{ include "linux-node-selector" . | nindent 4 }} +{{- if .Values.thanosRuler.thanosRulerSpec.additionalArgs }} + additionalArgs: +{{ toYaml .Values.thanosRuler.thanosRulerSpec.additionalArgs | indent 4 }} +{{- end }} +{{- if .Values.thanosRuler.thanosRulerSpec.nodeSelector }} +{{ toYaml .Values.thanosRuler.thanosRulerSpec.nodeSelector | indent 4 }} +{{- end }} + paused: {{ .Values.thanosRuler.thanosRulerSpec.paused }} + logFormat: {{ .Values.thanosRuler.thanosRulerSpec.logFormat | quote }} + logLevel: {{ .Values.thanosRuler.thanosRulerSpec.logLevel | quote }} + retention: {{ .Values.thanosRuler.thanosRulerSpec.retention | quote }} +{{- if .Values.thanosRuler.thanosRulerSpec.evaluationInterval }} + evaluationInterval: {{ .Values.thanosRuler.thanosRulerSpec.evaluationInterval }} +{{- end }} +{{- if .Values.thanosRuler.thanosRulerSpec.ruleNamespaceSelector }} + ruleNamespaceSelector: +{{ tpl (toYaml .Values.thanosRuler.thanosRulerSpec.ruleNamespaceSelector | indent 4) . }} +{{ else }} + ruleNamespaceSelector: {} +{{- end }} +{{- if .Values.thanosRuler.thanosRulerSpec.ruleSelector }} + ruleSelector: +{{ tpl (toYaml .Values.thanosRuler.thanosRulerSpec.ruleSelector | indent 4) .}} +{{- else if .Values.thanosRuler.thanosRulerSpec.ruleSelectorNilUsesHelmValues }} + ruleSelector: + matchLabels: + release: {{ $.Release.Name | quote }} +{{ else }} + ruleSelector: {} +{{- end }} +{{- if .Values.thanosRuler.thanosRulerSpec.alertQueryUrl }} + alertQueryUrl: "{{ .Values.thanosRuler.thanosRulerSpec.alertQueryUrl }}" +{{- end}} +{{- if .Values.thanosRuler.thanosRulerSpec.alertmanagersUrl }} + alertmanagersUrl: +{{ toYaml .Values.thanosRuler.thanosRulerSpec.alertmanagersUrl | indent 4 }} +{{- end }} +{{- if .Values.thanosRuler.thanosRulerSpec.alertmanagersConfig.existingSecret }} + alertmanagersConfig: + key: "{{.Values.thanosRuler.thanosRulerSpec.alertmanagersConfig.existingSecret.key }}" + name: "{{.Values.thanosRuler.thanosRulerSpec.alertmanagersConfig.existingSecret.name }}" +{{- else if .Values.thanosRuler.thanosRulerSpec.alertmanagersConfig.secret }} + alertmanagersConfig: + key: alertmanager-configs.yaml + name: {{ template "kube-prometheus-stack.thanosRuler.name" . }} +{{- end }} +{{- if .Values.thanosRuler.thanosRulerSpec.queryEndpoints }} + queryEndpoints: +{{ toYaml .Values.thanosRuler.thanosRulerSpec.queryEndpoints | indent 4 }} +{{- end }} +{{- if .Values.thanosRuler.thanosRulerSpec.queryConfig.existingSecret }} + queryConfig: + key: "{{.Values.thanosRuler.thanosRulerSpec.queryConfig.existingSecret.key }}" + name: "{{.Values.thanosRuler.thanosRulerSpec.queryConfig.existingSecret.name }}" +{{- else if .Values.thanosRuler.thanosRulerSpec.queryConfig.secret }} + queryConfig: + key: query-configs.yaml + name: {{ template "kube-prometheus-stack.thanosRuler.name" . }} +{{- end }} +{{- if .Values.thanosRuler.thanosRulerSpec.resources }} + resources: +{{ toYaml .Values.thanosRuler.thanosRulerSpec.resources | indent 4 }} +{{- end }} +{{- if .Values.thanosRuler.thanosRulerSpec.routePrefix }} + routePrefix: "{{ .Values.thanosRuler.thanosRulerSpec.routePrefix }}" +{{- end }} +{{- if .Values.thanosRuler.thanosRulerSpec.securityContext }} + securityContext: +{{ toYaml .Values.thanosRuler.thanosRulerSpec.securityContext | indent 4 }} +{{- end }} +{{- if .Values.thanosRuler.thanosRulerSpec.storage }} + storage: +{{ toYaml .Values.thanosRuler.thanosRulerSpec.storage | indent 4 }} +{{- end }} +{{- if .Values.thanosRuler.thanosRulerSpec.objectStorageConfig.existingSecret }} + objectStorageConfig: + key: "{{.Values.thanosRuler.thanosRulerSpec.objectStorageConfig.existingSecret.key }}" + name: "{{.Values.thanosRuler.thanosRulerSpec.objectStorageConfig.existingSecret.name }}" +{{- else if .Values.thanosRuler.thanosRulerSpec.objectStorageConfig.secret }} + objectStorageConfig: + key: object-storage-configs.yaml + name: {{ template "kube-prometheus-stack.thanosRuler.name" . }} +{{- end }} +{{- if .Values.thanosRuler.thanosRulerSpec.labels }} + labels: +{{ toYaml .Values.thanosRuler.thanosRulerSpec.labels | indent 4 }} +{{- end }} +{{- if .Values.thanosRuler.thanosRulerSpec.podMetadata }} + podMetadata: +{{ toYaml .Values.thanosRuler.thanosRulerSpec.podMetadata | indent 4 }} +{{- end }} +{{- if or .Values.thanosRuler.thanosRulerSpec.podAntiAffinity .Values.thanosRuler.thanosRulerSpec.affinity }} + affinity: +{{- end }} +{{- if .Values.thanosRuler.thanosRulerSpec.affinity }} +{{ toYaml .Values.thanosRuler.thanosRulerSpec.affinity | indent 4 }} +{{- end }} +{{- if eq .Values.thanosRuler.thanosRulerSpec.podAntiAffinity "hard" }} + podAntiAffinity: + requiredDuringSchedulingIgnoredDuringExecution: + - topologyKey: {{ .Values.thanosRuler.thanosRulerSpec.podAntiAffinityTopologyKey }} + labelSelector: + matchExpressions: + - {key: app.kubernetes.io/name, operator: In, values: [thanos-ruler]} + - {key: thanos-ruler, operator: In, values: [{{ template "kube-prometheus-stack.thanosRuler.name" . }}]} +{{- else if eq .Values.thanosRuler.thanosRulerSpec.podAntiAffinity "soft" }} + podAntiAffinity: + preferredDuringSchedulingIgnoredDuringExecution: + - weight: 100 + podAffinityTerm: + topologyKey: {{ .Values.thanosRuler.thanosRulerSpec.podAntiAffinityTopologyKey }} + labelSelector: + matchExpressions: + - {key: app.kubernetes.io/name, operator: In, values: [thanos-ruler]} + - {key: thanos-ruler, operator: In, values: [{{ template "kube-prometheus-stack.thanosRuler.name" . }}]} +{{- end }} + tolerations: {{ include "linux-node-tolerations" . | nindent 4 }} +{{- if .Values.thanosRuler.thanosRulerSpec.tolerations }} +{{ toYaml .Values.thanosRuler.thanosRulerSpec.tolerations | indent 4 }} +{{- end }} +{{- if .Values.thanosRuler.thanosRulerSpec.topologySpreadConstraints }} + topologySpreadConstraints: +{{ toYaml .Values.thanosRuler.thanosRulerSpec.topologySpreadConstraints | indent 4 }} +{{- end }} +{{- if .Values.global.imagePullSecrets }} + imagePullSecrets: +{{ toYaml .Values.global.imagePullSecrets | indent 4 }} +{{- end }} +{{- if .Values.thanosRuler.thanosRulerSpec.containers }} + containers: +{{ toYaml .Values.thanosRuler.thanosRulerSpec.containers | indent 4 }} +{{- end }} +{{- if .Values.thanosRuler.thanosRulerSpec.initContainers }} + initContainers: +{{ toYaml .Values.thanosRuler.thanosRulerSpec.initContainers | indent 4 }} +{{- end }} +{{- if .Values.thanosRuler.thanosRulerSpec.priorityClassName }} + priorityClassName: {{.Values.thanosRuler.thanosRulerSpec.priorityClassName }} +{{- end }} +{{- if .Values.thanosRuler.thanosRulerSpec.volumes }} + volumes: +{{ toYaml .Values.thanosRuler.thanosRulerSpec.volumes | indent 4 }} +{{- end }} +{{- if .Values.thanosRuler.thanosRulerSpec.volumeMounts }} + volumeMounts: +{{ toYaml .Values.thanosRuler.thanosRulerSpec.volumeMounts | indent 4 }} +{{- end }} + portName: {{ .Values.thanosRuler.thanosRulerSpec.portName }} +{{- end }} diff --git a/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/templates/thanos-ruler/secret.yaml b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/templates/thanos-ruler/secret.yaml new file mode 100644 index 0000000000..acab7fd9ae --- /dev/null +++ b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/templates/thanos-ruler/secret.yaml @@ -0,0 +1,26 @@ +{{- if .Values.thanosRuler.enabled }} +apiVersion: v1 +kind: Secret +metadata: + name: {{ template "kube-prometheus-stack.thanosRuler.name" . }} + namespace: {{ template "kube-prometheus-stack.namespace" . }} + labels: + app: {{ include "kube-prometheus-stack.thanosRuler.name" . }} +{{ include "kube-prometheus-stack.labels" . | indent 4 }} +data: + {{- with .Values.thanosRuler.thanosRulerSpec.alertmanagersConfig }} + {{- if and .secret (not .existingSecret) }} + alertmanager-configs.yaml: {{ toYaml .secret | b64enc | quote }} + {{- end }} + {{- end }} + {{- with .Values.thanosRuler.thanosRulerSpec.objectStorageConfig }} + {{- if and .secret (not .existingSecret) }} + object-storage-configs.yaml: {{ toYaml .secret | b64enc | quote }} + {{- end }} + {{- end }} + {{- with .Values.thanosRuler.thanosRulerSpec.queryConfig }} + {{- if and .secret (not .existingSecret) }} + query-configs.yaml: {{ toYaml .secret | b64enc | quote }} + {{- end }} + {{- end }} +{{- end }} diff --git a/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/templates/thanos-ruler/service.yaml b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/templates/thanos-ruler/service.yaml new file mode 100644 index 0000000000..be0c844591 --- /dev/null +++ b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/templates/thanos-ruler/service.yaml @@ -0,0 +1,53 @@ +{{- if .Values.thanosRuler.enabled }} +apiVersion: v1 +kind: Service +metadata: + name: {{ template "kube-prometheus-stack.thanosRuler.name" . }} + namespace: {{ template "kube-prometheus-stack.namespace" . }} + labels: + app: {{ template "kube-prometheus-stack.thanosRuler.name" . }} + self-monitor: {{ .Values.thanosRuler.serviceMonitor.selfMonitor | quote }} +{{- include "kube-prometheus-stack.labels" . | indent 4 -}} +{{- if .Values.thanosRuler.service.labels }} +{{ toYaml .Values.thanosRuler.service.labels | indent 4 }} +{{- end }} +{{- if .Values.thanosRuler.service.annotations }} + annotations: +{{ toYaml .Values.thanosRuler.service.annotations | indent 4 }} +{{- end }} +spec: +{{- if .Values.thanosRuler.service.clusterIP }} + clusterIP: {{ .Values.thanosRuler.service.clusterIP }} +{{- end }} +{{- if .Values.thanosRuler.service.externalIPs }} + externalIPs: +{{ toYaml .Values.thanosRuler.service.externalIPs | indent 4 }} +{{- end }} +{{- if .Values.thanosRuler.service.loadBalancerIP }} + loadBalancerIP: {{ .Values.thanosRuler.service.loadBalancerIP }} +{{- end }} +{{- if .Values.thanosRuler.service.loadBalancerSourceRanges }} + loadBalancerSourceRanges: + {{- range $cidr := .Values.thanosRuler.service.loadBalancerSourceRanges }} + - {{ $cidr }} + {{- end }} +{{- end }} +{{- if ne .Values.thanosRuler.service.type "ClusterIP" }} + externalTrafficPolicy: {{ .Values.thanosRuler.service.externalTrafficPolicy }} +{{- end }} + ports: + - name: {{ .Values.thanosRuler.thanosRulerSpec.portName }} + {{- if eq .Values.thanosRuler.service.type "NodePort" }} + nodePort: {{ .Values.thanosRuler.service.nodePort }} + {{- end }} + port: {{ .Values.thanosRuler.service.port }} + targetPort: {{ .Values.thanosRuler.service.targetPort }} + protocol: TCP +{{- if .Values.thanosRuler.service.additionalPorts }} +{{ toYaml .Values.thanosRuler.service.additionalPorts | indent 2 }} +{{- end }} + selector: + app.kubernetes.io/name: thanos-ruler + thanos-ruler: {{ template "kube-prometheus-stack.thanosRuler.name" . }} + type: "{{ .Values.thanosRuler.service.type }}" +{{- end }} diff --git a/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/templates/thanos-ruler/serviceaccount.yaml b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/templates/thanos-ruler/serviceaccount.yaml new file mode 100644 index 0000000000..b58f1cd4df --- /dev/null +++ b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/templates/thanos-ruler/serviceaccount.yaml @@ -0,0 +1,20 @@ +{{- if and .Values.thanosRuler.enabled .Values.thanosRuler.serviceAccount.create }} +apiVersion: v1 +kind: ServiceAccount +metadata: + name: {{ template "kube-prometheus-stack.thanosRuler.serviceAccountName" . }} + namespace: {{ template "kube-prometheus-stack.namespace" . }} + labels: + app: {{ template "kube-prometheus-stack.thanosRuler.name" . }} + app.kubernetes.io/name: {{ template "kube-prometheus-stack.thanosRuler.name" . }} + app.kubernetes.io/component: thanos-ruler +{{- include "kube-prometheus-stack.labels" . | indent 4 -}} +{{- if .Values.thanosRuler.serviceAccount.annotations }} + annotations: +{{ toYaml .Values.thanosRuler.serviceAccount.annotations | indent 4 }} +{{- end }} +{{- if .Values.global.imagePullSecrets }} +imagePullSecrets: +{{ toYaml .Values.global.imagePullSecrets | indent 2 }} +{{- end }} +{{- end }} diff --git a/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/templates/thanos-ruler/servicemonitor.yaml b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/templates/thanos-ruler/servicemonitor.yaml new file mode 100644 index 0000000000..b2b138b498 --- /dev/null +++ b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/templates/thanos-ruler/servicemonitor.yaml @@ -0,0 +1,82 @@ +{{- if and .Values.thanosRuler.enabled .Values.thanosRuler.serviceMonitor.selfMonitor }} +apiVersion: monitoring.coreos.com/v1 +kind: ServiceMonitor +metadata: + name: {{ template "kube-prometheus-stack.thanosRuler.name" . }} + namespace: {{ template "kube-prometheus-stack.namespace" . }} + labels: + app: {{ template "kube-prometheus-stack.thanosRuler.name" . }} +{{ include "kube-prometheus-stack.labels" . | indent 4 }} +{{- with .Values.thanosRuler.serviceMonitor.additionalLabels }} +{{- toYaml . | nindent 4 }} +{{- end }} +spec: + {{- include "servicemonitor.scrapeLimits" .Values.thanosRuler.serviceMonitor | nindent 2 }} + selector: + matchLabels: + app: {{ template "kube-prometheus-stack.thanosRuler.name" . }} + release: {{ $.Release.Name | quote }} + self-monitor: {{ .Values.thanosRuler.serviceMonitor.selfMonitor | quote }} + namespaceSelector: + matchNames: + - {{ printf "%s" (include "kube-prometheus-stack.namespace" .) | quote }} + endpoints: + - port: {{ .Values.thanosRuler.thanosRulerSpec.portName }} + {{- if .Values.thanosRuler.serviceMonitor.interval }} + interval: {{ .Values.thanosRuler.serviceMonitor.interval }} + {{- end }} + {{- if .Values.thanosRuler.serviceMonitor.proxyUrl }} + proxyUrl: {{ .Values.thanosRuler.serviceMonitor.proxyUrl}} + {{- end }} + {{- if .Values.thanosRuler.serviceMonitor.scheme }} + scheme: {{ .Values.thanosRuler.serviceMonitor.scheme }} + {{- end }} + {{- if .Values.thanosRuler.serviceMonitor.bearerTokenFile }} + bearerTokenFile: {{ .Values.thanosRuler.serviceMonitor.bearerTokenFile }} + {{- end }} + {{- if .Values.thanosRuler.serviceMonitor.tlsConfig }} + tlsConfig: {{- toYaml .Values.thanosRuler.serviceMonitor.tlsConfig | nindent 6 }} + {{- end }} + path: "{{ trimSuffix "/" .Values.thanosRuler.thanosRulerSpec.routePrefix }}/metrics" + {{- if .Values.thanosRuler.serviceMonitor.metricRelabelings }} + metricRelabelings: {{- tpl (toYaml .Values.thanosRuler.serviceMonitor.metricRelabelings | nindent 6) . }} + {{- end }} + {{ if .Values.global.cattle.clusterId }} + - sourceLabels: [__address__] + targetLabel: cluster_id + replacement: {{ .Values.global.cattle.clusterId }} + {{- end }} + {{ if .Values.global.cattle.clusterName}} + - sourceLabels: [__address__] + targetLabel: cluster_name + replacement: {{ .Values.global.cattle.clusterName }} + {{- end }} + {{- if .Values.thanosRuler.serviceMonitor.relabelings }} + relabelings: {{- toYaml .Values.thanosRuler.serviceMonitor.relabelings | nindent 6 }} + {{- end }} + {{- range .Values.thanosRuler.serviceMonitor.additionalEndpoints }} + - port: {{ .port }} + {{- if or $.Values.thanosRuler.serviceMonitor.interval .interval }} + interval: {{ default $.Values.thanosRuler.serviceMonitor.interval .interval }} + {{- end }} + {{- if or $.Values.thanosRuler.serviceMonitor.proxyUrl .proxyUrl }} + proxyUrl: {{ default $.Values.thanosRuler.serviceMonitor.proxyUrl .proxyUrl }} + {{- end }} + {{- if or $.Values.thanosRuler.serviceMonitor.scheme .scheme }} + scheme: {{ default $.Values.thanosRuler.serviceMonitor.scheme .scheme }} + {{- end }} + {{- if or $.Values.thanosRuler.serviceMonitor.bearerTokenFile .bearerTokenFile }} + bearerTokenFile: {{ default $.Values.thanosRuler.serviceMonitor.bearerTokenFile .bearerTokenFile }} + {{- end }} + {{- if or $.Values.thanosRuler.serviceMonitor.tlsConfig .tlsConfig }} + tlsConfig: {{- default $.Values.thanosRuler.serviceMonitor.tlsConfig .tlsConfig | toYaml | nindent 6 }} + {{- end }} + path: {{ .path }} + {{- if or $.Values.thanosRuler.serviceMonitor.metricRelabelings .metricRelabelings }} + metricRelabelings: {{- tpl (default $.Values.thanosRuler.serviceMonitor.metricRelabelings .metricRelabelings | toYaml | nindent 6) . }} + {{- end }} + {{- if or $.Values.thanosRuler.serviceMonitor.relabelings .relabelings }} + relabelings: {{- default $.Values.thanosRuler.serviceMonitor.relabelings .relabelings | toYaml | nindent 6 }} + {{- end }} + {{- end }} +{{- end }} diff --git a/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/templates/validate-install-crd.yaml b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/templates/validate-install-crd.yaml new file mode 100644 index 0000000000..6fcb8b3a69 --- /dev/null +++ b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/templates/validate-install-crd.yaml @@ -0,0 +1,23 @@ +#{{- if gt (len (lookup "rbac.authorization.k8s.io/v1" "ClusterRole" "" "")) 0 -}} +# {{- $found := dict -}} +# {{- set $found "monitoring.coreos.com/v1alpha1/AlertmanagerConfig" false -}} +# {{- set $found "monitoring.coreos.com/v1/Alertmanager" false -}} +# {{- set $found "monitoring.coreos.com/v1/PodMonitor" false -}} +# {{- set $found "monitoring.coreos.com/v1/Probe" false -}} +# {{- set $found "monitoring.coreos.com/v1alpha1/PrometheusAgent" false -}} +# {{- set $found "monitoring.coreos.com/v1/Prometheus" false -}} +# {{- set $found "monitoring.coreos.com/v1/PrometheusRule" false -}} +# {{- set $found "monitoring.coreos.com/v1alpha1/ScrapeConfig" false -}} +# {{- set $found "monitoring.coreos.com/v1/ServiceMonitor" false -}} +# {{- set $found "monitoring.coreos.com/v1/ThanosRuler" false -}} +# {{- range .Capabilities.APIVersions -}} +# {{- if hasKey $found (toString .) -}} +# {{- set $found (toString .) true -}} +# {{- end -}} +# {{- end -}} +# {{- range $_, $exists := $found -}} +# {{- if (eq $exists false) -}} +# {{- required "Required CRDs are missing. Please install the corresponding CRD chart before installing this chart." "" -}} +# {{- end -}} +# {{- end -}} +#{{- end -}} \ No newline at end of file diff --git a/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/templates/validate-psp-install.yaml b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/templates/validate-psp-install.yaml new file mode 100644 index 0000000000..a30c59d3b7 --- /dev/null +++ b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/templates/validate-psp-install.yaml @@ -0,0 +1,7 @@ +#{{- if gt (len (lookup "rbac.authorization.k8s.io/v1" "ClusterRole" "" "")) 0 -}} +#{{- if .Values.global.cattle.psp.enabled }} +#{{- if not (.Capabilities.APIVersions.Has "policy/v1beta1/PodSecurityPolicy") }} +#{{- fail "The target cluster does not have the PodSecurityPolicy API resource. Please disable PSPs in this chart before proceeding." -}} +#{{- end }} +#{{- end }} +#{{- end }} diff --git a/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/values.yaml b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/values.yaml new file mode 100644 index 0000000000..0bf19a8572 --- /dev/null +++ b/charts/rancher-monitoring/104.1.2-rc.1+up57.0.3/values.yaml @@ -0,0 +1,5431 @@ +# Default values for kube-prometheus-stack. +# This is a YAML-formatted file. +# Declare variables to be passed into your templates. + +# Rancher Monitoring Configuration + +## Configuration for prometheus-adapter +## ref: https://github.com/prometheus-community/helm-charts/tree/main/charts/prometheus-adapter +## +prometheus-adapter: + enabled: true + prometheus: + # Change this if you change the namespaceOverride or nameOverride of prometheus-operator + url: http://rancher-monitoring-prometheus.cattle-monitoring-system.svc + port: 9090 + +## RKE PushProx Monitoring +## ref: https://github.com/rancher/charts/tree/dev-v2.9/packages/rancher-monitoring/rancher-pushprox +## +rkeControllerManager: + enabled: false + metricsPort: 10257 # default to secure port as of k8s >= 1.22 + component: kube-controller-manager + clients: + https: + enabled: true + insecureSkipVerify: true + useServiceAccountCredentials: true + port: 10011 + useLocalhost: true + nodeSelector: + node-role.kubernetes.io/controlplane: "true" + tolerations: + - effect: "NoExecute" + operator: "Exists" + - effect: "NoSchedule" + operator: "Exists" + kubeVersionOverrides: + - constraint: "< 1.22" + values: + metricsPort: 10252 # default to insecure port in k8s < 1.22 + clients: + https: + enabled: false + insecureSkipVerify: false + useServiceAccountCredentials: false + +rkeScheduler: + enabled: false + metricsPort: 10259 + component: kube-scheduler + clients: + https: + enabled: true + insecureSkipVerify: true + useServiceAccountCredentials: true + port: 10012 + useLocalhost: true + nodeSelector: + node-role.kubernetes.io/controlplane: "true" + tolerations: + - effect: "NoExecute" + operator: "Exists" + - effect: "NoSchedule" + operator: "Exists" + kubeVersionOverrides: + - constraint: "< 1.23" + values: + metricsPort: 10251 # default to insecure port in k8s < 1.23 + clients: + https: + enabled: false + insecureSkipVerify: false + useServiceAccountCredentials: false + +rkeProxy: + enabled: false + metricsPort: 10249 + component: kube-proxy + clients: + port: 10013 + useLocalhost: true + tolerations: + - effect: "NoExecute" + operator: "Exists" + - effect: "NoSchedule" + operator: "Exists" + +rkeEtcd: + enabled: false + metricsPort: 2379 + component: kube-etcd + clients: + port: 10014 + https: + enabled: true + certDir: /etc/kubernetes/ssl + certFile: kube-etcd-*.pem + keyFile: kube-etcd-*-key.pem + caCertFile: kube-ca.pem + seLinuxOptions: + # Gives rkeEtcd permissions to read files in /etc/kubernetes/* + # Type is defined in https://github.com/rancher/rancher-selinux + type: rke_kubereader_t + nodeSelector: + node-role.kubernetes.io/etcd: "true" + tolerations: + - effect: "NoExecute" + operator: "Exists" + - effect: "NoSchedule" + operator: "Exists" + +rkeIngressNginx: + enabled: false + metricsPort: 10254 + component: ingress-nginx + clients: + port: 10015 + useLocalhost: true + tolerations: + - effect: "NoExecute" + operator: "Exists" + - effect: "NoSchedule" + operator: "Exists" + nodeSelector: + node-role.kubernetes.io/worker: "true" + +## k3s PushProx Monitoring +## ref: https://github.com/rancher/charts/tree/dev-v2.9/packages/rancher-monitoring/rancher-pushprox +## +k3sServer: + enabled: false + metricsPort: 10250 + component: k3s-server + clients: + port: 10013 + useLocalhost: true + https: + enabled: true + useServiceAccountCredentials: true + insecureSkipVerify: true + rbac: + additionalRules: + - nonResourceURLs: ["/metrics/cadvisor"] + verbs: ["get"] + - apiGroups: [""] + resources: ["nodes/metrics"] + verbs: ["get"] + tolerations: + - effect: "NoExecute" + operator: "Exists" + - effect: "NoSchedule" + operator: "Exists" + serviceMonitor: + endpoints: + - port: metrics + honorLabels: true + relabelings: + - sourceLabels: [__metrics_path__] + targetLabel: metrics_path + - port: metrics + path: /metrics/cadvisor + honorLabels: true + relabelings: + - sourceLabels: [__metrics_path__] + targetLabel: metrics_path + - port: metrics + path: /metrics/probes + honorLabels: true + relabelings: + - sourceLabels: [__metrics_path__] + targetLabel: metrics_path + +hardened: + k3s: + networkPolicy: + enabled: true + +## KubeADM PushProx Monitoring +## ref: https://github.com/rancher/charts/tree/dev-v2.9/packages/rancher-monitoring/rancher-pushprox +## +kubeAdmControllerManager: + enabled: false + metricsPort: 10257 + component: kube-controller-manager + clients: + port: 10011 + useLocalhost: true + https: + enabled: true + useServiceAccountCredentials: true + insecureSkipVerify: true + nodeSelector: + node-role.kubernetes.io/master: "" + tolerations: + - effect: "NoExecute" + operator: "Exists" + - effect: "NoSchedule" + operator: "Exists" + +kubeAdmScheduler: + enabled: false + metricsPort: 10259 + component: kube-scheduler + clients: + port: 10012 + useLocalhost: true + https: + enabled: true + useServiceAccountCredentials: true + insecureSkipVerify: true + nodeSelector: + node-role.kubernetes.io/master: "" + tolerations: + - effect: "NoExecute" + operator: "Exists" + - effect: "NoSchedule" + operator: "Exists" + +kubeAdmProxy: + enabled: false + metricsPort: 10249 + component: kube-proxy + clients: + port: 10013 + useLocalhost: true + tolerations: + - effect: "NoExecute" + operator: "Exists" + - effect: "NoSchedule" + operator: "Exists" + +kubeAdmEtcd: + enabled: false + metricsPort: 2381 + component: kube-etcd + clients: + port: 10014 + useLocalhost: true + nodeSelector: + node-role.kubernetes.io/master: "" + tolerations: + - effect: "NoExecute" + operator: "Exists" + - effect: "NoSchedule" + operator: "Exists" + +## rke2 PushProx Monitoring +## ref: https://github.com/rancher/charts/tree/dev-v2.9/packages/rancher-monitoring/rancher-pushprox +## +rke2ControllerManager: + enabled: false + metricsPort: 10257 # default to secure port as of k8s >= 1.22 + component: kube-controller-manager + clients: + https: + enabled: true + insecureSkipVerify: true + useServiceAccountCredentials: true + port: 10011 + useLocalhost: true + nodeSelector: + node-role.kubernetes.io/master: "true" + tolerations: + - effect: "NoExecute" + operator: "Exists" + - effect: "NoSchedule" + operator: "Exists" + kubeVersionOverrides: + - constraint: "< 1.22" + values: + metricsPort: 10252 # default to insecure port in k8s < 1.22 + clients: + https: + enabled: false + insecureSkipVerify: false + useServiceAccountCredentials: false + +rke2Scheduler: + enabled: false + metricsPort: 10259 # default to secure port as of k8s >= 1.22 + component: kube-scheduler + clients: + https: + enabled: true + insecureSkipVerify: true + useServiceAccountCredentials: true + port: 10012 + useLocalhost: true + nodeSelector: + node-role.kubernetes.io/master: "true" + tolerations: + - effect: "NoExecute" + operator: "Exists" + - effect: "NoSchedule" + operator: "Exists" + kubeVersionOverrides: + - constraint: "< 1.22" + values: + metricsPort: 10251 # default to insecure port in k8s < 1.22 + clients: + https: + enabled: false + insecureSkipVerify: false + useServiceAccountCredentials: false + +rke2Proxy: + enabled: false + metricsPort: 10249 + component: kube-proxy + clients: + port: 10013 + useLocalhost: true + tolerations: + - effect: "NoExecute" + operator: "Exists" + - effect: "NoSchedule" + operator: "Exists" + +rke2Etcd: + enabled: false + metricsPort: 2381 + component: kube-etcd + clients: + port: 10014 + useLocalhost: true + nodeSelector: + node-role.kubernetes.io/etcd: "true" + tolerations: + - effect: "NoExecute" + operator: "Exists" + - effect: "NoSchedule" + operator: "Exists" + +rke2IngressNginx: + enabled: false + metricsPort: 10254 + component: ingress-nginx + # in the RKE2 cluster, the ingress-nginx-controller is deployed + # as a non-hostNetwork workload starting at the following versions + # - >= v1.22.12+rke2r1 < 1.23.0-0 + # - >= v1.23.9+rke2r1 < 1.24.0-0 + # - >= v1.24.3+rke2r1 < 1.25.0-0 + # - >= v1.25.0+rke2r1 + # As a result we do not need clients and proxies as we can directly create + # a service that targets the workload with the given app name + namespaceOverride: kube-system + clients: + enabled: false + proxy: + enabled: false + service: + selector: + app.kubernetes.io/name: rke2-ingress-nginx + kubeVersionOverrides: + - constraint: "< 1.21.0-0" + values: + namespaceOverride: "" + clients: + enabled: true + port: 10015 + useLocalhost: true + tolerations: + - effect: "NoExecute" + operator: "Exists" + - effect: "NoSchedule" + operator: "Exists" + affinity: + podAffinity: + requiredDuringSchedulingIgnoredDuringExecution: + - labelSelector: + matchExpressions: + - key: "app.kubernetes.io/component" + operator: "In" + values: + - "controller" + topologyKey: "kubernetes.io/hostname" + namespaces: + - "kube-system" + # in the RKE2 cluster, the ingress-nginx-controller is deployed as + # a DaemonSet with 1 pod when RKE2 version is < 1.21.0-0 + deployment: + enabled: false + proxy: + enabled: true + service: + selector: false + - constraint: ">= 1.21.0-0 < 1.22.12-0" + values: + namespaceOverride: "" + clients: + enabled: true + port: 10015 + useLocalhost: true + tolerations: + - effect: "NoExecute" + operator: "Exists" + - effect: "NoSchedule" + operator: "Exists" + affinity: + podAffinity: + requiredDuringSchedulingIgnoredDuringExecution: + - labelSelector: + matchExpressions: + - key: "app.kubernetes.io/component" + operator: "In" + values: + - "controller" + topologyKey: "kubernetes.io/hostname" + namespaces: + - "kube-system" + # in the RKE2 cluster, the ingress-nginx-controller is deployed as + # a hostNetwork Deployment with 1 pod when RKE2 version is >= 1.21.0-0 + deployment: + enabled: true + replicas: 1 + proxy: + enabled: true + service: + selector: false + - constraint: ">= 1.23.0-0 < v1.23.9-0" + values: + namespaceOverride: "" + clients: + enabled: true + port: 10015 + useLocalhost: true + tolerations: + - effect: "NoExecute" + operator: "Exists" + - effect: "NoSchedule" + operator: "Exists" + affinity: + podAffinity: + requiredDuringSchedulingIgnoredDuringExecution: + - labelSelector: + matchExpressions: + - key: "app.kubernetes.io/component" + operator: "In" + values: + - "controller" + topologyKey: "kubernetes.io/hostname" + namespaces: + - "kube-system" + # in the RKE2 cluster, the ingress-nginx-controller is deployed as + # a hostNetwork Deployment with 1 pod when RKE2 version is >= 1.20.0-0 + deployment: + enabled: true + replicas: 1 + proxy: + enabled: true + service: + selector: false + - constraint: ">= 1.24.0-0 < v1.24.3-0" + values: + namespaceOverride: "" + clients: + enabled: true + port: 10015 + useLocalhost: true + tolerations: + - effect: "NoExecute" + operator: "Exists" + - effect: "NoSchedule" + operator: "Exists" + affinity: + podAffinity: + requiredDuringSchedulingIgnoredDuringExecution: + - labelSelector: + matchExpressions: + - key: "app.kubernetes.io/component" + operator: "In" + values: + - "controller" + topologyKey: "kubernetes.io/hostname" + namespaces: + - "kube-system" + # in the RKE2 cluster, the ingress-nginx-controller is deployed as + # a hostNetwork Deployment with 1 pod when RKE2 version is >= 1.20.0-0 + deployment: + enabled: true + replicas: 1 + proxy: + enabled: true + service: + selector: false + + + +## Additional PushProx Monitoring +## ref: https://github.com/rancher/charts/tree/dev-v2.9/packages/rancher-monitoring/rancher-pushprox +## + +# hardenedKubelet can only be deployed if kubelet.enabled=true +# If enabled, it replaces the ServiceMonitor deployed by the default kubelet option with a +# PushProx-based exporter that does not require a host port to be open to scrape metrics. +hardenedKubelet: + enabled: false + metricsPort: 10250 + component: kubelet + clients: + port: 10015 + useLocalhost: true + https: + enabled: true + useServiceAccountCredentials: true + insecureSkipVerify: true + rbac: + additionalRules: + - nonResourceURLs: ["/metrics/cadvisor"] + verbs: ["get"] + - apiGroups: [""] + resources: ["nodes/metrics"] + verbs: ["get"] + tolerations: + - effect: "NoExecute" + operator: "Exists" + - effect: "NoSchedule" + operator: "Exists" + serviceMonitor: + endpoints: + - port: metrics + honorLabels: true + relabelings: + - sourceLabels: [__metrics_path__] + targetLabel: metrics_path + - port: metrics + path: /metrics/cadvisor + honorLabels: true + relabelings: + - sourceLabels: [__metrics_path__] + targetLabel: metrics_path + - port: metrics + path: /metrics/probes + honorLabels: true + relabelings: + - sourceLabels: [__metrics_path__] + targetLabel: metrics_path + +# hardenedNodeExporter can only be deployed if nodeExporter.enabled=true +# If enabled, it replaces the ServiceMonitor deployed by the default nodeExporter with a +# PushProx-based exporter that does not require a host port to be open to scrape metrics. +hardenedNodeExporter: + enabled: false + metricsPort: 9796 + component: node-exporter + clients: + port: 10016 + useLocalhost: true + tolerations: + - effect: "NoExecute" + operator: "Exists" + - effect: "NoSchedule" + operator: "Exists" + +## Upgrades +upgrade: + ## Run upgrade scripts before an upgrade or rollback via a Job hook + enabled: true + ## Image to use to run the scripts + image: + repository: rancher/shell + tag: v0.2.1 + +## Rancher Monitoring +## + +rancherMonitoring: + enabled: true + + ## A namespaceSelector to identify the namespace to find the Rancher deployment + ## + namespaceSelector: + matchNames: + - cattle-system + + ## A selector to identify the Rancher deployment + ## If not set, the chart will try to search for the Rancher deployment in the cattle-system namespace and infer the selector values from it + ## If the Rancher deployment does not exist, no resources will be deployed. + ## + selector: {} + +## Component scraping nginx-ingress-controller +## +ingressNginx: + enabled: false + + ## The namespace to search for your nginx-ingress-controller + ## + namespace: ingress-nginx + + service: + port: 9913 + targetPort: 10254 + # selector: + # app: ingress-nginx + serviceMonitor: + ## Scrape interval. If not set, the Prometheus default scrape interval is used. + ## + interval: "30s" + + ## proxyUrl: URL of a proxy that should be used for scraping. + ## + proxyUrl: "" + + ## metric relabel configs to apply to samples before ingestion. + ## + metricRelabelings: [] + # - action: keep + # regex: 'kube_(daemonset|deployment|pod|namespace|node|statefulset).+' + # sourceLabels: [__name__] + + # relabel configs to apply to samples before ingestion. + ## + relabelings: [] + # - sourceLabels: [__meta_kubernetes_pod_node_name] + # separator: ; + # regex: ^(.*)$ + # targetLabel: nodename + # replacement: $1 + # action: replace + +# Prometheus Operator Configuration + +## Provide a name in place of kube-prometheus-stack for `app:` labels +## NOTE: If you change this value, you must update the prometheus-adapter.prometheus.url +## +nameOverride: "rancher-monitoring" + +## Override the deployment namespace +## NOTE: If you change this value, you must update the prometheus-adapter.prometheus.url +## +namespaceOverride: "cattle-monitoring-system" + +## Provide a k8s version to auto dashboard import script example: kubeTargetVersionOverride: 1.26.6 +## +kubeTargetVersionOverride: "" + +## Allow kubeVersion to be overridden while creating the ingress +## +kubeVersionOverride: "" + +## Provide a name to substitute for the full names of resources +## +fullnameOverride: "" + +## Labels to apply to all resources +## +commonLabels: {} +# scmhash: abc123 +# myLabel: aakkmd + +## Install Prometheus Operator CRDs +## +crds: + enabled: true + +## custom Rules to override "for" and "severity" in defaultRules +## +customRules: {} + # AlertmanagerFailedReload: + # for: 3m + # AlertmanagerMembersInconsistent: + # for: 5m + # severity: "warning" + +## Create default rules for monitoring the cluster +## +defaultRules: + create: true + rules: + alertmanager: true + etcd: true + configReloaders: true + general: true + k8sContainerCpuUsageSecondsTotal: true + k8sContainerMemoryCache: true + k8sContainerMemoryRss: true + k8sContainerMemorySwap: true + k8sContainerResource: true + k8sContainerMemoryWorkingSetBytes: true + k8sPodOwner: true + kubeApiserverAvailability: true + kubeApiserverBurnrate: true + kubeApiserverHistogram: true + kubeApiserverSlos: true + kubeControllerManager: true + kubelet: true + kubeProxy: true + kubePrometheusGeneral: true + kubePrometheusNodeRecording: true + kubernetesApps: true + kubernetesResources: true + kubernetesStorage: true + kubernetesSystem: true + kubeSchedulerAlerting: true + kubeSchedulerRecording: true + kubeStateMetrics: true + network: true + node: true + nodeExporterAlerting: true + nodeExporterRecording: true + prometheus: true + prometheusOperator: true + windows: true + + ## Reduce app namespace alert scope + appNamespacesTarget: ".*" + + ## Set keep_firing_for for all alerts + keepFiringFor: "" + + ## Labels for default rules + labels: {} + ## Annotations for default rules + annotations: {} + + ## Additional labels for PrometheusRule alerts + additionalRuleLabels: {} + + ## Additional annotations for PrometheusRule alerts + additionalRuleAnnotations: {} + + ## Additional labels for specific PrometheusRule alert groups + additionalRuleGroupLabels: + alertmanager: {} + etcd: {} + configReloaders: {} + general: {} + k8sContainerCpuUsageSecondsTotal: {} + k8sContainerMemoryCache: {} + k8sContainerMemoryRss: {} + k8sContainerMemorySwap: {} + k8sContainerResource: {} + k8sPodOwner: {} + kubeApiserverAvailability: {} + kubeApiserverBurnrate: {} + kubeApiserverHistogram: {} + kubeApiserverSlos: {} + kubeControllerManager: {} + kubelet: {} + kubeProxy: {} + kubePrometheusGeneral: {} + kubePrometheusNodeRecording: {} + kubernetesApps: {} + kubernetesResources: {} + kubernetesStorage: {} + kubernetesSystem: {} + kubeSchedulerAlerting: {} + kubeSchedulerRecording: {} + kubeStateMetrics: {} + network: {} + node: {} + nodeExporterAlerting: {} + nodeExporterRecording: {} + prometheus: {} + prometheusOperator: {} + + ## Additional annotations for specific PrometheusRule alerts groups + additionalRuleGroupAnnotations: + alertmanager: {} + etcd: {} + configReloaders: {} + general: {} + k8sContainerCpuUsageSecondsTotal: {} + k8sContainerMemoryCache: {} + k8sContainerMemoryRss: {} + k8sContainerMemorySwap: {} + k8sContainerResource: {} + k8sPodOwner: {} + kubeApiserverAvailability: {} + kubeApiserverBurnrate: {} + kubeApiserverHistogram: {} + kubeApiserverSlos: {} + kubeControllerManager: {} + kubelet: {} + kubeProxy: {} + kubePrometheusGeneral: {} + kubePrometheusNodeRecording: {} + kubernetesApps: {} + kubernetesResources: {} + kubernetesStorage: {} + kubernetesSystem: {} + kubeSchedulerAlerting: {} + kubeSchedulerRecording: {} + kubeStateMetrics: {} + network: {} + node: {} + nodeExporterAlerting: {} + nodeExporterRecording: {} + prometheus: {} + prometheusOperator: {} + + additionalAggregationLabels: [] + + ## Prefix for runbook URLs. Use this to override the first part of the runbookURLs that is common to all rules. + runbookUrl: "https://runbooks.prometheus-operator.dev/runbooks" + + ## Disabled PrometheusRule alerts + disabled: {} + # KubeAPIDown: true + # NodeRAIDDegraded: true + +## Deprecated way to provide custom recording or alerting rules to be deployed into the cluster. +## +# additionalPrometheusRules: [] +# - name: my-rule-file +# groups: +# - name: my_group +# rules: +# - record: my_record +# expr: 100 * my_record + +## Provide custom recording or alerting rules to be deployed into the cluster. +## +additionalPrometheusRulesMap: {} +# rule-name: +# groups: +# - name: my_group +# rules: +# - record: my_record +# expr: 100 * my_record + +## +global: + cattle: + psp: + enabled: false + + systemDefaultRegistry: "" + ## Windows Monitoring + ## ref: https://github.com/rancher/charts/tree/dev-v2.5-source/packages/rancher-windows-exporter + ## + ## Deploys a DaemonSet of Prometheus exporters based on https://github.com/prometheus-community/windows_exporter. + ## Every Windows host must have a wins version of 0.1.0+ to use this chart (default as of Rancher 2.5.8). + ## To upgrade wins versions on Windows hosts, see https://github.com/rancher/wins/tree/master/charts/rancher-wins-upgrader. + ## + windows: + enabled: false + seLinux: + enabled: false + kubectl: + repository: rancher/kubectl + tag: v1.20.2 + pullPolicy: IfNotPresent + rbac: + ## Create RBAC resources for ServiceAccounts and users + ## + create: true + + userRoles: + ## Create default user ClusterRoles to allow users to interact with Prometheus CRs, ConfigMaps, and Secrets + create: true + ## Aggregate default user ClusterRoles into default k8s ClusterRoles + aggregateToDefaultRoles: true + + pspAnnotations: {} + ## Specify pod annotations + ## Ref: https://kubernetes.io/docs/concepts/policy/pod-security-policy/#apparmor + ## Ref: https://kubernetes.io/docs/concepts/policy/pod-security-policy/#seccomp + ## Ref: https://kubernetes.io/docs/concepts/policy/pod-security-policy/#sysctl + ## + # seccomp.security.alpha.kubernetes.io/allowedProfileNames: '*' + # seccomp.security.alpha.kubernetes.io/defaultProfileName: 'docker/default' + # apparmor.security.beta.kubernetes.io/defaultProfileName: 'runtime/default' + + ## Global image registry to use if it needs to be overriden for some specific use cases (e.g local registries, custom images, ...) + ## + imageRegistry: docker.io + + ## Reference to one or more secrets to be used when pulling images + ## ref: https://kubernetes.io/docs/tasks/configure-pod-container/pull-image-private-registry/ + ## + imagePullSecrets: [] + # - name: "image-pull-secret" + # or + # - "image-pull-secret" + +windowsMonitoring: + ## Deploys the windows-exporter and Windows-specific dashboards and rules (job name must be 'windows-exporter') + enabled: false + +## Configuration for prometheus-windows-exporter +## ref: https://github.com/prometheus-community/helm-charts/tree/main/charts/prometheus-windows-exporter +## +prometheus-windows-exporter: + ## Enable ServiceMonitor and set Kubernetes label to use as a job label + ## + prometheus: + monitor: + enabled: true + jobLabel: jobLabel + + releaseLabel: true + + ## Set job label to 'windows-exporter' as required by the default Prometheus rules and Grafana dashboards + ## + podLabels: + jobLabel: windows-exporter + + ## Enable memory and container metrics as required by the default Prometheus rules and Grafana dashboards + ## + config: |- + collectors: + enabled: '[defaults],memory,container' + +## Configuration for alertmanager +## ref: https://prometheus.io/docs/alerting/alertmanager/ +## +alertmanager: + + ## Deploy alertmanager + ## + enabled: true + + ## Annotations for Alertmanager + ## + annotations: {} + + ## Api that prometheus will use to communicate with alertmanager. Possible values are v1, v2 + ## + apiVersion: v2 + + ## Service account for Alertmanager to use. + ## ref: https://kubernetes.io/docs/tasks/configure-pod-container/configure-service-account/ + ## + serviceAccount: + create: true + name: "" + annotations: {} + automountServiceAccountToken: true + + ## Configure pod disruption budgets for Alertmanager + ## ref: https://kubernetes.io/docs/tasks/run-application/configure-pdb/#specifying-a-poddisruptionbudget + ## This configuration is immutable once created and will require the PDB to be deleted to be changed + ## https://github.com/kubernetes/kubernetes/issues/45398 + ## + podDisruptionBudget: + enabled: false + minAvailable: 1 + maxUnavailable: "" + + ## Alertmanager configuration directives + ## ref: https://prometheus.io/docs/alerting/configuration/#configuration-file + ## https://prometheus.io/webtools/alerting/routing-tree-editor/ + ## + config: + global: + resolve_timeout: 5m + inhibit_rules: + - source_matchers: + - 'severity = critical' + target_matchers: + - 'severity =~ warning|info' + equal: + - 'namespace' + - 'alertname' + - source_matchers: + - 'severity = warning' + target_matchers: + - 'severity = info' + equal: + - 'namespace' + - 'alertname' + - source_matchers: + - 'alertname = InfoInhibitor' + target_matchers: + - 'severity = info' + equal: + - 'namespace' + - target_matchers: + - 'alertname = InfoInhibitor' + route: + group_by: ['namespace'] + group_wait: 30s + group_interval: 5m + repeat_interval: 12h + receiver: 'null' + routes: + - receiver: 'null' + matchers: + - alertname = "Watchdog" + receivers: + - name: 'null' + templates: + - '/etc/alertmanager/config/*.tmpl' + + ## Alertmanager configuration directives (as string type, preferred over the config hash map) + ## stringConfig will be used only, if tplConfig is true + ## ref: https://prometheus.io/docs/alerting/configuration/#configuration-file + ## https://prometheus.io/webtools/alerting/routing-tree-editor/ + ## + stringConfig: "" + + ## Pass the Alertmanager configuration directives through Helm's templating + ## engine. If the Alertmanager configuration contains Alertmanager templates, + ## they'll need to be properly escaped so that they are not interpreted by + ## Helm + ## ref: https://helm.sh/docs/developing_charts/#using-the-tpl-function + ## https://prometheus.io/docs/alerting/configuration/#tmpl_string + ## https://prometheus.io/docs/alerting/notifications/ + ## https://prometheus.io/docs/alerting/notification_examples/ + tplConfig: false + + ## Alertmanager template files to format alerts + ## By default, templateFiles are placed in /etc/alertmanager/config/ and if + ## they have a .tmpl file suffix will be loaded. See config.templates above + ## to change, add other suffixes. If adding other suffixes, be sure to update + ## config.templates above to include those suffixes. + ## ref: https://prometheus.io/docs/alerting/notifications/ + ## https://prometheus.io/docs/alerting/notification_examples/ + ## + + templateFiles: + rancher_defaults.tmpl: |- + {{- define "slack.rancher.text" -}} + {{ template "rancher.text_multiple" . }} + {{- end -}} + + {{- define "rancher.text_multiple" -}} + *[GROUP - Details]* + One or more alarms in this group have triggered a notification. + + {{- if gt (len .GroupLabels.Values) 0 }} + *Group Labels:* + {{- range .GroupLabels.SortedPairs }} + • *{{ .Name }}:* `{{ .Value }}` + {{- end }} + {{- end }} + {{- if .ExternalURL }} + *Link to AlertManager:* {{ .ExternalURL }} + {{- end }} + + {{- range .Alerts }} + {{ template "rancher.text_single" . }} + {{- end }} + {{- end -}} + + {{- define "rancher.text_single" -}} + {{- if .Labels.alertname }} + *[ALERT - {{ .Labels.alertname }}]* + {{- else }} + *[ALERT]* + {{- end }} + {{- if .Labels.severity }} + *Severity:* `{{ .Labels.severity }}` + {{- end }} + {{- if .Labels.cluster }} + *Cluster:* {{ .Labels.cluster }} + {{- end }} + {{- if .Annotations.summary }} + *Summary:* {{ .Annotations.summary }} + {{- end }} + {{- if .Annotations.message }} + *Message:* {{ .Annotations.message }} + {{- end }} + {{- if .Annotations.description }} + *Description:* {{ .Annotations.description }} + {{- end }} + {{- if .Annotations.runbook_url }} + *Runbook URL:* <{{ .Annotations.runbook_url }}|:spiral_note_pad:> + {{- end }} + {{- with .Labels }} + {{- with .Remove (stringSlice "alertname" "severity" "cluster") }} + {{- if gt (len .) 0 }} + *Additional Labels:* + {{- range .SortedPairs }} + • *{{ .Name }}:* `{{ .Value }}` + {{- end }} + {{- end }} + {{- end }} + {{- end }} + {{- with .Annotations }} + {{- with .Remove (stringSlice "summary" "message" "description" "runbook_url") }} + {{- if gt (len .) 0 }} + *Additional Annotations:* + {{- range .SortedPairs }} + • *{{ .Name }}:* `{{ .Value }}` + {{- end }} + {{- end }} + {{- end }} + {{- end }} + {{- end -}} + + ingress: + enabled: false + + # For Kubernetes >= 1.18 you should specify the ingress-controller via the field ingressClassName + # See https://kubernetes.io/blog/2020/04/02/improvements-to-the-ingress-api-in-kubernetes-1.18/#specifying-the-class-of-an-ingress + # ingressClassName: nginx + + annotations: {} + + labels: {} + + ## Override ingress to a different defined port on the service + # servicePort: 8081 + ## Override ingress to a different service then the default, this is useful if you need to + ## point to a specific instance of the alertmanager (eg kube-prometheus-stack-alertmanager-0) + # serviceName: kube-prometheus-stack-alertmanager-0 + + ## Hosts must be provided if Ingress is enabled. + ## + hosts: [] + # - alertmanager.domain.com + + ## Paths to use for ingress rules - one path should match the alertmanagerSpec.routePrefix + ## + paths: [] + # - / + + ## For Kubernetes >= 1.18 you should specify the pathType (determines how Ingress paths should be matched) + ## See https://kubernetes.io/blog/2020/04/02/improvements-to-the-ingress-api-in-kubernetes-1.18/#better-path-matching-with-path-types + # pathType: ImplementationSpecific + + ## TLS configuration for Alertmanager Ingress + ## Secret must be manually created in the namespace + ## + tls: [] + # - secretName: alertmanager-general-tls + # hosts: + # - alertmanager.example.com + + ## Configuration for Alertmanager secret + ## + secret: + annotations: {} + + # by default the alertmanager secret is not overwritten if it already exists + recreateIfExists: false + + ## Configuration for creating an Ingress that will map to each Alertmanager replica service + ## alertmanager.servicePerReplica must be enabled + ## + ingressPerReplica: + enabled: false + + # For Kubernetes >= 1.18 you should specify the ingress-controller via the field ingressClassName + # See https://kubernetes.io/blog/2020/04/02/improvements-to-the-ingress-api-in-kubernetes-1.18/#specifying-the-class-of-an-ingress + # ingressClassName: nginx + + annotations: {} + labels: {} + + ## Final form of the hostname for each per replica ingress is + ## {{ ingressPerReplica.hostPrefix }}-{{ $replicaNumber }}.{{ ingressPerReplica.hostDomain }} + ## + ## Prefix for the per replica ingress that will have `-$replicaNumber` + ## appended to the end + hostPrefix: "" + ## Domain that will be used for the per replica ingress + hostDomain: "" + + ## Paths to use for ingress rules + ## + paths: [] + # - / + + ## For Kubernetes >= 1.18 you should specify the pathType (determines how Ingress paths should be matched) + ## See https://kubernetes.io/blog/2020/04/02/improvements-to-the-ingress-api-in-kubernetes-1.18/#better-path-matching-with-path-types + # pathType: ImplementationSpecific + + ## Secret name containing the TLS certificate for alertmanager per replica ingress + ## Secret must be manually created in the namespace + tlsSecretName: "" + + ## Separated secret for each per replica Ingress. Can be used together with cert-manager + ## + tlsSecretPerReplica: + enabled: false + ## Final form of the secret for each per replica ingress is + ## {{ tlsSecretPerReplica.prefix }}-{{ $replicaNumber }} + ## + prefix: "alertmanager" + + ## Configuration for Alertmanager service + ## + service: + annotations: {} + labels: {} + clusterIP: "" + + ## Port for Alertmanager Service to listen on + ## + port: 9093 + ## To be used with a proxy extraContainer port + ## + targetPort: 9093 + ## Port to expose on each node + ## Only used if service.type is 'NodePort' + ## + nodePort: 30903 + ## List of IP addresses at which the Prometheus server service is available + ## Ref: https://kubernetes.io/docs/user-guide/services/#external-ips + ## + + ## Additional ports to open for Alertmanager service + ## + additionalPorts: [] + # - name: oauth-proxy + # port: 8081 + # targetPort: 8081 + # - name: oauth-metrics + # port: 8082 + # targetPort: 8082 + + externalIPs: [] + loadBalancerIP: "" + loadBalancerSourceRanges: [] + + ## Denotes if this Service desires to route external traffic to node-local or cluster-wide endpoints + ## + externalTrafficPolicy: Cluster + + ## If you want to make sure that connections from a particular client are passed to the same Pod each time + ## Accepts 'ClientIP' or 'None' + ## + sessionAffinity: None + + ## If you want to modify the ClientIP sessionAffinity timeout + ## The value must be >0 && <=86400(for 1 day) if ServiceAffinity == "ClientIP" + ## + sessionAffinityConfig: + clientIP: + timeoutSeconds: 10800 + + ## Service type + ## + type: ClusterIP + + ## Configuration for creating a separate Service for each statefulset Alertmanager replica + ## + servicePerReplica: + enabled: false + annotations: {} + + ## Port for Alertmanager Service per replica to listen on + ## + port: 9093 + + ## To be used with a proxy extraContainer port + targetPort: 9093 + + ## Port to expose on each node + ## Only used if servicePerReplica.type is 'NodePort' + ## + nodePort: 30904 + + ## Loadbalancer source IP ranges + ## Only used if servicePerReplica.type is "LoadBalancer" + loadBalancerSourceRanges: [] + + ## Denotes if this Service desires to route external traffic to node-local or cluster-wide endpoints + ## + externalTrafficPolicy: Cluster + + ## Service type + ## + type: ClusterIP + + ## Configuration for creating a ServiceMonitor for AlertManager + ## + serviceMonitor: + ## If true, a ServiceMonitor will be created for the AlertManager service. + ## + selfMonitor: true + + ## Scrape interval. If not set, the Prometheus default scrape interval is used. + ## + interval: "" + + ## Additional labels + ## + additionalLabels: {} + + ## SampleLimit defines per-scrape limit on number of scraped samples that will be accepted. + ## + sampleLimit: 0 + + ## TargetLimit defines a limit on the number of scraped targets that will be accepted. + ## + targetLimit: 0 + + ## Per-scrape limit on number of labels that will be accepted for a sample. Only valid in Prometheus versions 2.27.0 and newer. + ## + labelLimit: 0 + + ## Per-scrape limit on length of labels name that will be accepted for a sample. Only valid in Prometheus versions 2.27.0 and newer. + ## + labelNameLengthLimit: 0 + + ## Per-scrape limit on length of labels value that will be accepted for a sample. Only valid in Prometheus versions 2.27.0 and newer. + ## + labelValueLengthLimit: 0 + + ## proxyUrl: URL of a proxy that should be used for scraping. + ## + proxyUrl: "" + + ## scheme: HTTP scheme to use for scraping. Can be used with `tlsConfig` for example if using istio mTLS. + scheme: "" + + ## enableHttp2: Whether to enable HTTP2. + ## See https://github.com/prometheus-operator/prometheus-operator/blob/main/Documentation/api.md#endpoint + enableHttp2: true + + ## tlsConfig: TLS configuration to use when scraping the endpoint. For example if using istio mTLS. + ## Of type: https://github.com/coreos/prometheus-operator/blob/main/Documentation/api.md#tlsconfig + tlsConfig: {} + + bearerTokenFile: + + ## MetricRelabelConfigs to apply to samples after scraping, but before ingestion. + ## ref: https://github.com/prometheus-operator/prometheus-operator/blob/main/Documentation/api.md#relabelconfig + ## + metricRelabelings: [] + # - action: keep + # regex: 'kube_(daemonset|deployment|pod|namespace|node|statefulset).+' + # sourceLabels: [__name__] + + ## RelabelConfigs to apply to samples before scraping + ## ref: https://github.com/prometheus-operator/prometheus-operator/blob/main/Documentation/api.md#relabelconfig + ## + relabelings: [] + # - sourceLabels: [__meta_kubernetes_pod_node_name] + # separator: ; + # regex: ^(.*)$ + # targetLabel: nodename + # replacement: $1 + # action: replace + + ## Additional Endpoints + ## + additionalEndpoints: [] + # - port: oauth-metrics + # path: /metrics + + ## Settings affecting alertmanagerSpec + ## ref: https://github.com/prometheus-operator/prometheus-operator/blob/main/Documentation/api.md#alertmanagerspec + ## + alertmanagerSpec: + ## Standard object's metadata. More info: https://github.com/kubernetes/community/blob/master/contributors/devel/sig-architecture/api-conventions.md#metadata + ## Metadata Labels and Annotations gets propagated to the Alertmanager pods. + ## + podMetadata: {} + + ## Image of Alertmanager + ## + image: + repository: rancher/mirrored-prometheus-alertmanager + tag: v0.27.0 + sha: "" + + ## If true then the user will be responsible to provide a secret with alertmanager configuration + ## So when true the config part will be ignored (including templateFiles) and the one in the secret will be used + ## + useExistingSecret: false + + ## Secrets is a list of Secrets in the same namespace as the Alertmanager object, which shall be mounted into the + ## Alertmanager Pods. The Secrets are mounted into /etc/alertmanager/secrets/. + ## + secrets: [] + + ## If false then the user will opt out of automounting API credentials. + ## + automountServiceAccountToken: true + + ## ConfigMaps is a list of ConfigMaps in the same namespace as the Alertmanager object, which shall be mounted into the Alertmanager Pods. + ## The ConfigMaps are mounted into /etc/alertmanager/configmaps/. + ## + configMaps: [] + + ## ConfigSecret is the name of a Kubernetes Secret in the same namespace as the Alertmanager object, which contains configuration for + ## this Alertmanager instance. Defaults to 'alertmanager-' The secret is mounted into /etc/alertmanager/config. + ## + # configSecret: + + ## WebTLSConfig defines the TLS parameters for HTTPS + ## ref: https://github.com/prometheus-operator/prometheus-operator/blob/main/Documentation/api.md#alertmanagerwebspec + web: {} + + ## AlertmanagerConfigs to be selected to merge and configure Alertmanager with. + ## + alertmanagerConfigSelector: {} + ## Example which selects all alertmanagerConfig resources + ## with label "alertconfig" with values any of "example-config" or "example-config-2" + # alertmanagerConfigSelector: + # matchExpressions: + # - key: alertconfig + # operator: In + # values: + # - example-config + # - example-config-2 + # + ## Example which selects all alertmanagerConfig resources with label "role" set to "example-config" + # alertmanagerConfigSelector: + # matchLabels: + # role: example-config + + ## Namespaces to be selected for AlertmanagerConfig discovery. If nil, only check own namespace. + ## + alertmanagerConfigNamespaceSelector: {} + ## Example which selects all namespaces + ## with label "alertmanagerconfig" with values any of "example-namespace" or "example-namespace-2" + # alertmanagerConfigNamespaceSelector: + # matchExpressions: + # - key: alertmanagerconfig + # operator: In + # values: + # - example-namespace + # - example-namespace-2 + + ## Example which selects all namespaces with label "alertmanagerconfig" set to "enabled" + # alertmanagerConfigNamespaceSelector: + # matchLabels: + # alertmanagerconfig: enabled + + ## AlermanagerConfig to be used as top level configuration + ## + alertmanagerConfiguration: {} + ## Example with select a global alertmanagerconfig + # alertmanagerConfiguration: + # name: global-alertmanager-Configuration + + ## Defines the strategy used by AlertmanagerConfig objects to match alerts. eg: + ## + alertmanagerConfigMatcherStrategy: {} + ## Example with use OnNamespace strategy + # alertmanagerConfigMatcherStrategy: + # type: OnNamespace + + ## Define Log Format + # Use logfmt (default) or json logging + logFormat: logfmt + + ## Log level for Alertmanager to be configured with. + ## + logLevel: info + + ## Size is the expected size of the alertmanager cluster. The controller will eventually make the size of the + ## running cluster equal to the expected size. + replicas: 1 + + ## Time duration Alertmanager shall retain data for. Default is '120h', and must match the regular expression + ## [0-9]+(ms|s|m|h) (milliseconds seconds minutes hours). + ## + retention: 120h + + ## Storage is the definition of how storage will be used by the Alertmanager instances. + ## ref: https://github.com/prometheus-operator/prometheus-operator/blob/main/Documentation/user-guides/storage.md + ## + storage: {} + # volumeClaimTemplate: + # spec: + # storageClassName: gluster + # accessModes: ["ReadWriteOnce"] + # resources: + # requests: + # storage: 50Gi + # selector: {} + + + ## The external URL the Alertmanager instances will be available under. This is necessary to generate correct URLs. This is necessary if Alertmanager is not served from root of a DNS name. string false + ## + externalUrl: + + ## The route prefix Alertmanager registers HTTP handlers for. This is useful, if using ExternalURL and a proxy is rewriting HTTP routes of a request, and the actual ExternalURL is still true, + ## but the server serves requests under a different route prefix. For example for use with kubectl proxy. + ## + routePrefix: / + + ## scheme: HTTP scheme to use. Can be used with `tlsConfig` for example if using istio mTLS. + scheme: "" + + ## tlsConfig: TLS configuration to use when connect to the endpoint. For example if using istio mTLS. + ## Of type: https://github.com/coreos/prometheus-operator/blob/main/Documentation/api.md#tlsconfig + tlsConfig: {} + + ## If set to true all actions on the underlying managed objects are not going to be performed, except for delete actions. + ## + paused: false + + ## Define which Nodes the Pods are scheduled on. + ## ref: https://kubernetes.io/docs/user-guide/node-selection/ + ## + nodeSelector: {} + + ## Define resources requests and limits for single Pods. + ## ref: https://kubernetes.io/docs/user-guide/compute-resources/ + ## + resources: + limits: + memory: 500Mi + cpu: 1000m + requests: + memory: 100Mi + cpu: 100m + + ## Pod anti-affinity can prevent the scheduler from placing Prometheus replicas on the same node. + ## The default value "soft" means that the scheduler should *prefer* to not schedule two replica pods onto the same node but no guarantee is provided. + ## The value "hard" means that the scheduler is *required* to not schedule two replica pods onto the same node. + ## The value "" will disable pod anti-affinity so that no anti-affinity rules will be configured. + ## + podAntiAffinity: "" + + ## If anti-affinity is enabled sets the topologyKey to use for anti-affinity. + ## This can be changed to, for example, failure-domain.beta.kubernetes.io/zone + ## + podAntiAffinityTopologyKey: kubernetes.io/hostname + + ## Assign custom affinity rules to the alertmanager instance + ## ref: https://kubernetes.io/docs/concepts/configuration/assign-pod-node/ + ## + affinity: {} + # nodeAffinity: + # requiredDuringSchedulingIgnoredDuringExecution: + # nodeSelectorTerms: + # - matchExpressions: + # - key: kubernetes.io/e2e-az-name + # operator: In + # values: + # - e2e-az1 + # - e2e-az2 + + ## If specified, the pod's tolerations. + ## ref: https://kubernetes.io/docs/concepts/configuration/taint-and-toleration/ + ## + tolerations: [] + # - key: "key" + # operator: "Equal" + # value: "value" + # effect: "NoSchedule" + + ## If specified, the pod's topology spread constraints. + ## ref: https://kubernetes.io/docs/concepts/workloads/pods/pod-topology-spread-constraints/ + ## + topologySpreadConstraints: [] + # - maxSkew: 1 + # topologyKey: topology.kubernetes.io/zone + # whenUnsatisfiable: DoNotSchedule + # labelSelector: + # matchLabels: + # app: alertmanager + + ## SecurityContext holds pod-level security attributes and common container settings. + ## This defaults to non root user with uid 1000 and gid 2000. *v1.PodSecurityContext false + ## ref: https://kubernetes.io/docs/tasks/configure-pod-container/security-context/ + ## + securityContext: + runAsGroup: 2000 + runAsNonRoot: true + runAsUser: 1000 + fsGroup: 2000 + seccompProfile: + type: RuntimeDefault + + ## ListenLocal makes the Alertmanager server listen on loopback, so that it does not bind against the Pod IP. + ## Note this is only for the Alertmanager UI, not the gossip communication. + ## + listenLocal: false + + ## Containers allows injecting additional containers. This is meant to allow adding an authentication proxy to an Alertmanager pod. + ## + containers: [] + # containers: + # - name: oauth-proxy + # image: quay.io/oauth2-proxy/oauth2-proxy:v7.5.1 + # args: + # - --upstream=http://127.0.0.1:9093 + # - --http-address=0.0.0.0:8081 + # - --metrics-address=0.0.0.0:8082 + # - ... + # ports: + # - containerPort: 8081 + # name: oauth-proxy + # protocol: TCP + # - containerPort: 8082 + # name: oauth-metrics + # protocol: TCP + # resources: {} + + # Additional volumes on the output StatefulSet definition. + volumes: [] + + # Additional VolumeMounts on the output StatefulSet definition. + volumeMounts: [] + + ## InitContainers allows injecting additional initContainers. This is meant to allow doing some changes + ## (permissions, dir tree) on mounted volumes before starting prometheus + initContainers: [] + + ## Priority class assigned to the Pods + ## + priorityClassName: "" + + ## AdditionalPeers allows injecting a set of additional Alertmanagers to peer with to form a highly available cluster. + ## + additionalPeers: [] + + ## PortName to use for Alert Manager. + ## + portName: "http-web" + + ## ClusterAdvertiseAddress is the explicit address to advertise in cluster. Needs to be provided for non RFC1918 [1] (public) addresses. [1] RFC1918: https://tools.ietf.org/html/rfc1918 + ## + clusterAdvertiseAddress: false + + ## clusterGossipInterval determines interval between gossip attempts. + ## Needs to be specified as GoDuration, a time duration that can be parsed by Go’s time.ParseDuration() (e.g. 45ms, 30s, 1m, 1h20m15s) + clusterGossipInterval: "" + + ## clusterPeerTimeout determines timeout for cluster peering. + ## Needs to be specified as GoDuration, a time duration that can be parsed by Go’s time.ParseDuration() (e.g. 45ms, 30s, 1m, 1h20m15s) + clusterPeerTimeout: "" + + ## clusterPushpullInterval determines interval between pushpull attempts. + ## Needs to be specified as GoDuration, a time duration that can be parsed by Go’s time.ParseDuration() (e.g. 45ms, 30s, 1m, 1h20m15s) + clusterPushpullInterval: "" + + ## ForceEnableClusterMode ensures Alertmanager does not deactivate the cluster mode when running with a single replica. + ## Use case is e.g. spanning an Alertmanager cluster across Kubernetes clusters with a single replica in each. + forceEnableClusterMode: false + + ## Minimum number of seconds for which a newly created pod should be ready without any of its container crashing for it to + ## be considered available. Defaults to 0 (pod will be considered available as soon as it is ready). + minReadySeconds: 0 + + ## Additional configuration which is not covered by the properties above. (passed through tpl) + additionalConfig: {} + + ## Additional configuration which is not covered by the properties above. + ## Useful, if you need advanced templating inside alertmanagerSpec. + ## Otherwise, use alertmanager.alertmanagerSpec.additionalConfig (passed through tpl) + additionalConfigString: "" + + ## ExtraSecret can be used to store various data in an extra secret + ## (use it for example to store hashed basic auth credentials) + extraSecret: + ## if not set, name will be auto generated + # name: "" + annotations: {} + data: {} + # auth: | + # foo:$apr1$OFG3Xybp$ckL0FHDAkoXYIlH9.cysT0 + # someoneelse:$apr1$DMZX2Z4q$6SbQIfyuLQd.xmo/P0m2c. + +## Using default values from https://github.com/grafana/helm-charts/blob/main/charts/grafana/values.yaml +## +grafana: + enabled: true + namespaceOverride: "" + + ## Grafana's primary configuration + ## NOTE: values in map will be converted to ini format + ## ref: http://docs.grafana.org/installation/configuration/ + ## + grafana.ini: + users: + auto_assign_org_role: Viewer + auth: + disable_login_form: false + auth.anonymous: + enabled: true + org_role: Viewer + auth.basic: + enabled: false + dashboards: + # Modify this value to change the default dashboard shown on the main Grafana page + default_home_dashboard_path: /tmp/dashboards/rancher-default-home.json + security: + # Required to embed dashboards in Rancher Cluster Overview Dashboard on Cluster Explorer + allow_embedding: true + + deploymentStrategy: + type: Recreate + + ## ForceDeployDatasources Create datasource configmap even if grafana deployment has been disabled + ## + forceDeployDatasources: false + + ## ForceDeployDashboard Create dashboard configmap even if grafana deployment has been disabled + ## + forceDeployDashboards: false + + ## Deploy default dashboards + ## + defaultDashboardsEnabled: true + + # Additional options for defaultDashboards + defaultDashboards: + # The default namespace to place defaultDashboards within + namespace: cattle-dashboards + # Whether to create the default namespace as a Helm managed namespace or use an existing namespace + # If false, the defaultDashboards.namespace will be created as a Helm managed namespace + useExistingNamespace: false + # Whether the Helm managed namespace created by this chart should be left behind on a Helm uninstall + # If you place other dashboards in this namespace, then they will be deleted on a helm uninstall + # Ignore if useExistingNamespace is true + cleanupOnUninstall: false + + ## Timezone for the default dashboards + ## Other options are: browser or a specific timezone, i.e. Europe/Luxembourg + ## + defaultDashboardsTimezone: utc + + ## Editable flag for the default dashboards + ## + defaultDashboardsEditable: true + + adminPassword: prom-operator + + ingress: + ## If true, Grafana Ingress will be created + ## + enabled: false + + ## IngressClassName for Grafana Ingress. + ## Should be provided if Ingress is enable. + ## + # ingressClassName: nginx + + ## Annotations for Grafana Ingress + ## + annotations: {} + # kubernetes.io/ingress.class: nginx + # kubernetes.io/tls-acme: "true" + + ## Labels to be added to the Ingress + ## + labels: {} + + ## Hostnames. + ## Must be provided if Ingress is enable. + ## + # hosts: + # - grafana.domain.com + hosts: [] + + ## Path for grafana ingress + path: / + + ## TLS configuration for grafana Ingress + ## Secret must be manually created in the namespace + ## + tls: [] + # - secretName: grafana-general-tls + # hosts: + # - grafana.example.com + + # # To make Grafana persistent (Using Statefulset) + # # + # persistence: + # enabled: true + # type: sts + # storageClassName: "storageClassName" + # accessModes: + # - ReadWriteOnce + # size: 20Gi + # finalizers: + # - kubernetes.io/pvc-protection + + serviceAccount: + create: true + autoMount: true + + sidecar: + dashboards: + enabled: true + label: grafana_dashboard + searchNamespace: cattle-dashboards + labelValue: "1" + + # Support for new table panels, when enabled grafana auto migrates the old table panels to newer table panels + enableNewTablePanelSyntax: false + + ## Annotations for Grafana dashboard configmaps + ## + annotations: {} + multicluster: + global: + enabled: false + etcd: + enabled: false + provider: + allowUiUpdates: false + datasources: + enabled: true + defaultDatasourceEnabled: true + isDefaultDatasource: true + + uid: prometheus + + ## URL of prometheus datasource + ## + # url: http://prometheus-stack-prometheus:9090/ + + ## Prometheus request timeout in seconds + # timeout: 30 + + # If not defined, will use prometheus.prometheusSpec.scrapeInterval or its default + # defaultDatasourceScrapeInterval: 15s + + ## Annotations for Grafana datasource configmaps + ## + annotations: {} + + ## Set method for HTTP to send query to datasource + httpMethod: POST + + ## Create datasource for each Pod of Prometheus StatefulSet; + ## this uses headless service `prometheus-operated` which is + ## created by Prometheus Operator + ## ref: https://github.com/prometheus-operator/prometheus-operator/blob/0fee93e12dc7c2ea1218f19ae25ec6b893460590/pkg/prometheus/statefulset.go#L255-L286 + createPrometheusReplicasDatasources: false + label: grafana_datasource + labelValue: "1" + + ## Field with internal link pointing to existing data source in Grafana. + ## Can be provisioned via additionalDataSources + exemplarTraceIdDestinations: {} + # datasourceUid: Jaeger + # traceIdLabelName: trace_id + alertmanager: + enabled: true + uid: alertmanager + handleGrafanaManagedAlerts: false + implementation: prometheus + + extraConfigmapMounts: [] + # - name: certs-configmap + # mountPath: /etc/grafana/ssl/ + # configMap: certs-configmap + # readOnly: true + + deleteDatasources: [] + # - name: example-datasource + # orgId: 1 + + ## Configure additional grafana datasources (passed through tpl) + ## ref: http://docs.grafana.org/administration/provisioning/#datasources + additionalDataSources: [] + # - name: prometheus-sample + # access: proxy + # basicAuth: true + # basicAuthPassword: pass + # basicAuthUser: daco + # editable: false + # jsonData: + # tlsSkipVerify: true + # orgId: 1 + # type: prometheus + # url: https://{{ printf "%s-prometheus.svc" .Release.Name }}:9090 + # version: 1 + + ## Passed to grafana subchart and used by servicemonitor below + ## + service: + portName: nginx-http + ## Port for Grafana Service to listen on + ## + port: 80 + ## To be used with a proxy extraContainer port + ## + targetPort: 8080 + ## Port to expose on each node + ## Only used if service.type is 'NodePort' + ## + nodePort: 30950 + ## Service type + ## + type: ClusterIP + + proxy: + image: + repository: rancher/mirrored-library-nginx + tag: 1.24.0-alpine + + ## Enable an Specify container in extraContainers. This is meant to allow adding an authentication proxy to a grafana pod + extraContainers: | + - name: grafana-proxy + args: + - nginx + - -g + - daemon off; + - -c + - /nginx/nginx.conf + image: "{{ template "system_default_registry" . }}{{ .Values.proxy.image.repository }}:{{ .Values.proxy.image.tag }}" + ports: + - containerPort: 8080 + name: nginx-http + protocol: TCP + volumeMounts: + - mountPath: /nginx + name: grafana-nginx + - mountPath: /var/cache/nginx + name: nginx-home + securityContext: + runAsUser: 101 + runAsGroup: 101 + + ## Volumes that can be used in containers + extraContainerVolumes: + - name: nginx-home + emptyDir: {} + - name: grafana-nginx + configMap: + name: grafana-nginx-proxy-config + items: + - key: nginx.conf + mode: 438 + path: nginx.conf + + ## If true, create a serviceMonitor for grafana + ## + serviceMonitor: + # If true, a ServiceMonitor CRD is created for a prometheus operator + # https://github.com/coreos/prometheus-operator + # + enabled: true + + # Path to use for scraping metrics. Might be different if server.root_url is set + # in grafana.ini + path: "/metrics" + + # namespace: monitoring (defaults to use the namespace this chart is deployed to) + + # labels for the ServiceMonitor + labels: {} + + # Scrape interval. If not set, the Prometheus default scrape interval is used. + # + interval: "" + scheme: http + tlsConfig: {} + scrapeTimeout: 30s + + ## RelabelConfigs to apply to samples before scraping + ## ref: https://github.com/prometheus-operator/prometheus-operator/blob/main/Documentation/api.md#relabelconfig + ## + relabelings: [] + # - sourceLabels: [__meta_kubernetes_pod_node_name] + # separator: ; + # regex: ^(.*)$ + # targetLabel: nodename + # replacement: $1 + # action: replace + + resources: + limits: + memory: 200Mi + cpu: 200m + requests: + memory: 100Mi + cpu: 100m + + testFramework: + enabled: false + +## Flag to disable all the kubernetes component scrapers +## +kubernetesServiceMonitors: + enabled: true + +## Component scraping the kube api server +## +kubeApiServer: + enabled: true + tlsConfig: + serverName: kubernetes + insecureSkipVerify: false + serviceMonitor: + ## Scrape interval. If not set, the Prometheus default scrape interval is used. + ## + interval: "" + + ## SampleLimit defines per-scrape limit on number of scraped samples that will be accepted. + ## + sampleLimit: 0 + + ## TargetLimit defines a limit on the number of scraped targets that will be accepted. + ## + targetLimit: 0 + + ## Per-scrape limit on number of labels that will be accepted for a sample. Only valid in Prometheus versions 2.27.0 and newer. + ## + labelLimit: 0 + + ## Per-scrape limit on length of labels name that will be accepted for a sample. Only valid in Prometheus versions 2.27.0 and newer. + ## + labelNameLengthLimit: 0 + + ## Per-scrape limit on length of labels value that will be accepted for a sample. Only valid in Prometheus versions 2.27.0 and newer. + ## + labelValueLengthLimit: 0 + + ## proxyUrl: URL of a proxy that should be used for scraping. + ## + proxyUrl: "" + + jobLabel: component + selector: + matchLabels: + component: apiserver + provider: kubernetes + + ## MetricRelabelConfigs to apply to samples after scraping, but before ingestion. + ## ref: https://github.com/prometheus-operator/prometheus-operator/blob/main/Documentation/api.md#relabelconfig + ## + metricRelabelings: + # Drop excessively noisy apiserver buckets. + - action: drop + regex: apiserver_request_duration_seconds_bucket;(0.15|0.2|0.3|0.35|0.4|0.45|0.6|0.7|0.8|0.9|1.25|1.5|1.75|2|3|3.5|4|4.5|6|7|8|9|15|25|40|50) + sourceLabels: + - __name__ + - le + # - action: keep + # regex: 'kube_(daemonset|deployment|pod|namespace|node|statefulset).+' + # sourceLabels: [__name__] + + ## RelabelConfigs to apply to samples before scraping + ## ref: https://github.com/prometheus-operator/prometheus-operator/blob/main/Documentation/api.md#relabelconfig + ## + relabelings: [] + # - sourceLabels: + # - __meta_kubernetes_namespace + # - __meta_kubernetes_service_name + # - __meta_kubernetes_endpoint_port_name + # action: keep + # regex: default;kubernetes;https + # - targetLabel: __address__ + # replacement: kubernetes.default.svc:443 + + ## Additional labels + ## + additionalLabels: {} + # foo: bar + +## Component scraping the kubelet and kubelet-hosted cAdvisor +## +kubelet: + enabled: true + namespace: kube-system + + serviceMonitor: + ## Attach metadata to discovered targets. Requires Prometheus v2.45 for endpoints created by the operator. + ## + attachMetadata: + node: false + + ## Scrape interval. If not set, the Prometheus default scrape interval is used. + ## + interval: "" + + ## If true, Prometheus use (respect) labels provided by exporter. + ## + honorLabels: true + + ## If true, Prometheus ingests metrics with timestamp provided by exporter. If false, Prometheus ingests metrics with timestamp of scrape. + ## + honorTimestamps: true + + ## SampleLimit defines per-scrape limit on number of scraped samples that will be accepted. + ## + sampleLimit: 0 + + ## TargetLimit defines a limit on the number of scraped targets that will be accepted. + ## + targetLimit: 0 + + ## Per-scrape limit on number of labels that will be accepted for a sample. Only valid in Prometheus versions 2.27.0 and newer. + ## + labelLimit: 0 + + ## Per-scrape limit on length of labels name that will be accepted for a sample. Only valid in Prometheus versions 2.27.0 and newer. + ## + labelNameLengthLimit: 0 + + ## Per-scrape limit on length of labels value that will be accepted for a sample. Only valid in Prometheus versions 2.27.0 and newer. + ## + labelValueLengthLimit: 0 + + ## proxyUrl: URL of a proxy that should be used for scraping. + ## + proxyUrl: "" + + ## Enable scraping the kubelet over https. For requirements to enable this see + ## https://github.com/prometheus-operator/prometheus-operator/issues/926 + ## + https: true + + ## Enable scraping /metrics/cadvisor from kubelet's service + ## + cAdvisor: true + + ## Enable scraping /metrics/probes from kubelet's service + ## + probes: true + + ## Enable scraping /metrics/resource from kubelet's service + ## This is disabled by default because container metrics are already exposed by cAdvisor + ## + resource: false + # From kubernetes 1.18, /metrics/resource/v1alpha1 renamed to /metrics/resource + resourcePath: "/metrics/resource/v1alpha1" + + ## MetricRelabelConfigs to apply to samples after scraping, but before ingestion. + ## ref: https://github.com/prometheus-operator/prometheus-operator/blob/main/Documentation/api.md#relabelconfig + ## + cAdvisorMetricRelabelings: + # Drop less useful container CPU metrics. + - sourceLabels: [__name__] + action: drop + regex: 'container_cpu_(cfs_throttled_seconds_total|load_average_10s|system_seconds_total|user_seconds_total)' + # Drop less useful container / always zero filesystem metrics. + - sourceLabels: [__name__] + action: drop + regex: 'container_fs_(io_current|io_time_seconds_total|io_time_weighted_seconds_total|reads_merged_total|sector_reads_total|sector_writes_total|writes_merged_total)' + # Drop less useful / always zero container memory metrics. + - sourceLabels: [__name__] + action: drop + regex: 'container_memory_(mapped_file|swap)' + # Drop less useful container process metrics. + - sourceLabels: [__name__] + action: drop + regex: 'container_(file_descriptors|tasks_state|threads_max)' + # Drop container spec metrics that overlap with kube-state-metrics. + - sourceLabels: [__name__] + action: drop + regex: 'container_spec.*' + # Drop cgroup metrics with no pod. + - sourceLabels: [id, pod] + action: drop + regex: '.+;' + # - sourceLabels: [__name__, image] + # separator: ; + # regex: container_([a-z_]+); + # replacement: $1 + # action: drop + # - sourceLabels: [__name__] + # separator: ; + # regex: container_(network_tcp_usage_total|network_udp_usage_total|tasks_state|cpu_load_average_10s) + # replacement: $1 + # action: drop + + ## MetricRelabelConfigs to apply to samples after scraping, but before ingestion. + ## ref: https://github.com/prometheus-operator/prometheus-operator/blob/main/Documentation/api.md#relabelconfig + ## + probesMetricRelabelings: [] + # - sourceLabels: [__name__, image] + # separator: ; + # regex: container_([a-z_]+); + # replacement: $1 + # action: drop + # - sourceLabels: [__name__] + # separator: ; + # regex: container_(network_tcp_usage_total|network_udp_usage_total|tasks_state|cpu_load_average_10s) + # replacement: $1 + # action: drop + + ## RelabelConfigs to apply to samples before scraping + ## ref: https://github.com/prometheus-operator/prometheus-operator/blob/main/Documentation/api.md#relabelconfig + ## + ## metrics_path is required to match upstream rules and charts + cAdvisorRelabelings: + - action: replace + sourceLabels: [__metrics_path__] + targetLabel: metrics_path + # - sourceLabels: [__meta_kubernetes_pod_node_name] + # separator: ; + # regex: ^(.*)$ + # targetLabel: nodename + # replacement: $1 + # action: replace + + ## RelabelConfigs to apply to samples before scraping + ## ref: https://github.com/prometheus-operator/prometheus-operator/blob/main/Documentation/api.md#relabelconfig + ## + probesRelabelings: + - action: replace + sourceLabels: [__metrics_path__] + targetLabel: metrics_path + # - sourceLabels: [__meta_kubernetes_pod_node_name] + # separator: ; + # regex: ^(.*)$ + # targetLabel: nodename + # replacement: $1 + # action: replace + + ## RelabelConfigs to apply to samples before scraping + ## ref: https://github.com/prometheus-operator/prometheus-operator/blob/main/Documentation/api.md#relabelconfig + ## + resourceRelabelings: + - action: replace + sourceLabels: [__metrics_path__] + targetLabel: metrics_path + # - sourceLabels: [__meta_kubernetes_pod_node_name] + # separator: ; + # regex: ^(.*)$ + # targetLabel: nodename + # replacement: $1 + # action: replace + + ## MetricRelabelConfigs to apply to samples after scraping, but before ingestion. + ## ref: https://github.com/prometheus-operator/prometheus-operator/blob/main/Documentation/api.md#relabelconfig + ## + metricRelabelings: [] + # - sourceLabels: [__name__, image] + # separator: ; + # regex: container_([a-z_]+); + # replacement: $1 + # action: drop + # - sourceLabels: [__name__] + # separator: ; + # regex: container_(network_tcp_usage_total|network_udp_usage_total|tasks_state|cpu_load_average_10s) + # replacement: $1 + # action: drop + + ## RelabelConfigs to apply to samples before scraping + ## ref: https://github.com/prometheus-operator/prometheus-operator/blob/main/Documentation/api.md#relabelconfig + ## + ## metrics_path is required to match upstream rules and charts + relabelings: + - action: replace + sourceLabels: [__metrics_path__] + targetLabel: metrics_path + # - sourceLabels: [__meta_kubernetes_pod_node_name] + # separator: ; + # regex: ^(.*)$ + # targetLabel: nodename + # replacement: $1 + # action: replace + + ## Additional labels + ## + additionalLabels: {} + # foo: bar + +## Component scraping the kube controller manager +## +kubeControllerManager: + enabled: false + + ## If your kube controller manager is not deployed as a pod, specify IPs it can be found on + ## + endpoints: [] + # - 10.141.4.22 + # - 10.141.4.23 + # - 10.141.4.24 + + ## If using kubeControllerManager.endpoints only the port and targetPort are used + ## + service: + enabled: true + ## If null or unset, the value is determined dynamically based on target Kubernetes version due to change + ## of default port in Kubernetes 1.22. + ## + port: null + targetPort: null + # selector: + # component: kube-controller-manager + + serviceMonitor: + enabled: true + ## Scrape interval. If not set, the Prometheus default scrape interval is used. + ## + interval: "" + + ## SampleLimit defines per-scrape limit on number of scraped samples that will be accepted. + ## + sampleLimit: 0 + + ## TargetLimit defines a limit on the number of scraped targets that will be accepted. + ## + targetLimit: 0 + + ## Per-scrape limit on number of labels that will be accepted for a sample. Only valid in Prometheus versions 2.27.0 and newer. + ## + labelLimit: 0 + + ## Per-scrape limit on length of labels name that will be accepted for a sample. Only valid in Prometheus versions 2.27.0 and newer. + ## + labelNameLengthLimit: 0 + + ## Per-scrape limit on length of labels value that will be accepted for a sample. Only valid in Prometheus versions 2.27.0 and newer. + ## + labelValueLengthLimit: 0 + + ## proxyUrl: URL of a proxy that should be used for scraping. + ## + proxyUrl: "" + + ## port: Name of the port the metrics will be scraped from + ## + port: http-metrics + + jobLabel: jobLabel + selector: {} + # matchLabels: + # component: kube-controller-manager + + ## Enable scraping kube-controller-manager over https. + ## Requires proper certs (not self-signed) and delegated authentication/authorization checks. + ## If null or unset, the value is determined dynamically based on target Kubernetes version. + ## + https: null + + # Skip TLS certificate validation when scraping + insecureSkipVerify: null + + # Name of the server to use when validating TLS certificate + serverName: null + + ## MetricRelabelConfigs to apply to samples after scraping, but before ingestion. + ## ref: https://github.com/prometheus-operator/prometheus-operator/blob/main/Documentation/api.md#relabelconfig + ## + metricRelabelings: [] + # - action: keep + # regex: 'kube_(daemonset|deployment|pod|namespace|node|statefulset).+' + # sourceLabels: [__name__] + + ## RelabelConfigs to apply to samples before scraping + ## ref: https://github.com/prometheus-operator/prometheus-operator/blob/main/Documentation/api.md#relabelconfig + ## + relabelings: [] + # - sourceLabels: [__meta_kubernetes_pod_node_name] + # separator: ; + # regex: ^(.*)$ + # targetLabel: nodename + # replacement: $1 + # action: replace + + ## Additional labels + ## + additionalLabels: {} + # foo: bar + +## Component scraping coreDns. Use either this or kubeDns +## +coreDns: + enabled: true + service: + enabled: true + port: 9153 + targetPort: 9153 + # selector: + # k8s-app: kube-dns + serviceMonitor: + enabled: true + ## Scrape interval. If not set, the Prometheus default scrape interval is used. + ## + interval: "" + + ## SampleLimit defines per-scrape limit on number of scraped samples that will be accepted. + ## + sampleLimit: 0 + + ## TargetLimit defines a limit on the number of scraped targets that will be accepted. + ## + targetLimit: 0 + + ## Per-scrape limit on number of labels that will be accepted for a sample. Only valid in Prometheus versions 2.27.0 and newer. + ## + labelLimit: 0 + + ## Per-scrape limit on length of labels name that will be accepted for a sample. Only valid in Prometheus versions 2.27.0 and newer. + ## + labelNameLengthLimit: 0 + + ## Per-scrape limit on length of labels value that will be accepted for a sample. Only valid in Prometheus versions 2.27.0 and newer. + ## + labelValueLengthLimit: 0 + + ## proxyUrl: URL of a proxy that should be used for scraping. + ## + proxyUrl: "" + + ## port: Name of the port the metrics will be scraped from + ## + port: http-metrics + + jobLabel: jobLabel + selector: {} + # matchLabels: + # k8s-app: kube-dns + + ## MetricRelabelConfigs to apply to samples after scraping, but before ingestion. + ## ref: https://github.com/prometheus-operator/prometheus-operator/blob/main/Documentation/api.md#relabelconfig + ## + metricRelabelings: [] + # - action: keep + # regex: 'kube_(daemonset|deployment|pod|namespace|node|statefulset).+' + # sourceLabels: [__name__] + + ## RelabelConfigs to apply to samples before scraping + ## ref: https://github.com/prometheus-operator/prometheus-operator/blob/main/Documentation/api.md#relabelconfig + ## + relabelings: [] + # - sourceLabels: [__meta_kubernetes_pod_node_name] + # separator: ; + # regex: ^(.*)$ + # targetLabel: nodename + # replacement: $1 + # action: replace + + ## Additional labels + ## + additionalLabels: {} + # foo: bar + +## Component scraping kubeDns. Use either this or coreDns +## +kubeDns: + enabled: false + service: + dnsmasq: + port: 10054 + targetPort: 10054 + skydns: + port: 10055 + targetPort: 10055 + # selector: + # k8s-app: kube-dns + serviceMonitor: + ## Scrape interval. If not set, the Prometheus default scrape interval is used. + ## + interval: "" + + ## SampleLimit defines per-scrape limit on number of scraped samples that will be accepted. + ## + sampleLimit: 0 + + ## TargetLimit defines a limit on the number of scraped targets that will be accepted. + ## + targetLimit: 0 + + ## Per-scrape limit on number of labels that will be accepted for a sample. Only valid in Prometheus versions 2.27.0 and newer. + ## + labelLimit: 0 + + ## Per-scrape limit on length of labels name that will be accepted for a sample. Only valid in Prometheus versions 2.27.0 and newer. + ## + labelNameLengthLimit: 0 + + ## Per-scrape limit on length of labels value that will be accepted for a sample. Only valid in Prometheus versions 2.27.0 and newer. + ## + labelValueLengthLimit: 0 + + ## proxyUrl: URL of a proxy that should be used for scraping. + ## + proxyUrl: "" + + jobLabel: jobLabel + selector: {} + # matchLabels: + # k8s-app: kube-dns + + ## MetricRelabelConfigs to apply to samples after scraping, but before ingestion. + ## ref: https://github.com/prometheus-operator/prometheus-operator/blob/main/Documentation/api.md#relabelconfig + ## + metricRelabelings: [] + # - action: keep + # regex: 'kube_(daemonset|deployment|pod|namespace|node|statefulset).+' + # sourceLabels: [__name__] + + ## RelabelConfigs to apply to samples before scraping + ## ref: https://github.com/prometheus-operator/prometheus-operator/blob/main/Documentation/api.md#relabelconfig + ## + relabelings: [] + # - sourceLabels: [__meta_kubernetes_pod_node_name] + # separator: ; + # regex: ^(.*)$ + # targetLabel: nodename + # replacement: $1 + # action: replace + + ## MetricRelabelConfigs to apply to samples after scraping, but before ingestion. + ## ref: https://github.com/prometheus-operator/prometheus-operator/blob/main/Documentation/api.md#relabelconfig + ## + dnsmasqMetricRelabelings: [] + # - action: keep + # regex: 'kube_(daemonset|deployment|pod|namespace|node|statefulset).+' + # sourceLabels: [__name__] + + ## RelabelConfigs to apply to samples before scraping + ## ref: https://github.com/prometheus-operator/prometheus-operator/blob/main/Documentation/api.md#relabelconfig + ## + dnsmasqRelabelings: [] + # - sourceLabels: [__meta_kubernetes_pod_node_name] + # separator: ; + # regex: ^(.*)$ + # targetLabel: nodename + # replacement: $1 + # action: replace + + ## Additional labels + ## + additionalLabels: {} + # foo: bar + +## Component scraping etcd +## +kubeEtcd: + enabled: false + + ## If your etcd is not deployed as a pod, specify IPs it can be found on + ## + endpoints: [] + # - 10.141.4.22 + # - 10.141.4.23 + # - 10.141.4.24 + + ## Etcd service. If using kubeEtcd.endpoints only the port and targetPort are used + ## + service: + enabled: true + port: 2381 + targetPort: 2381 + # selector: + # component: etcd + + ## Configure secure access to the etcd cluster by loading a secret into prometheus and + ## specifying security configuration below. For example, with a secret named etcd-client-cert + ## + ## serviceMonitor: + ## scheme: https + ## insecureSkipVerify: false + ## serverName: localhost + ## caFile: /etc/prometheus/secrets/etcd-client-cert/etcd-ca + ## certFile: /etc/prometheus/secrets/etcd-client-cert/etcd-client + ## keyFile: /etc/prometheus/secrets/etcd-client-cert/etcd-client-key + ## + serviceMonitor: + enabled: true + ## Scrape interval. If not set, the Prometheus default scrape interval is used. + ## + interval: "" + + ## SampleLimit defines per-scrape limit on number of scraped samples that will be accepted. + ## + sampleLimit: 0 + + ## TargetLimit defines a limit on the number of scraped targets that will be accepted. + ## + targetLimit: 0 + + ## Per-scrape limit on number of labels that will be accepted for a sample. Only valid in Prometheus versions 2.27.0 and newer. + ## + labelLimit: 0 + + ## Per-scrape limit on length of labels name that will be accepted for a sample. Only valid in Prometheus versions 2.27.0 and newer. + ## + labelNameLengthLimit: 0 + + ## Per-scrape limit on length of labels value that will be accepted for a sample. Only valid in Prometheus versions 2.27.0 and newer. + ## + labelValueLengthLimit: 0 + + ## proxyUrl: URL of a proxy that should be used for scraping. + ## + proxyUrl: "" + scheme: http + insecureSkipVerify: false + serverName: "" + caFile: "" + certFile: "" + keyFile: "" + + ## port: Name of the port the metrics will be scraped from + ## + port: http-metrics + + jobLabel: jobLabel + selector: {} + # matchLabels: + # component: etcd + + ## MetricRelabelConfigs to apply to samples after scraping, but before ingestion. + ## ref: https://github.com/prometheus-operator/prometheus-operator/blob/main/Documentation/api.md#relabelconfig + ## + metricRelabelings: [] + # - action: keep + # regex: 'kube_(daemonset|deployment|pod|namespace|node|statefulset).+' + # sourceLabels: [__name__] + + ## RelabelConfigs to apply to samples before scraping + ## ref: https://github.com/prometheus-operator/prometheus-operator/blob/main/Documentation/api.md#relabelconfig + ## + relabelings: [] + # - sourceLabels: [__meta_kubernetes_pod_node_name] + # separator: ; + # regex: ^(.*)$ + # targetLabel: nodename + # replacement: $1 + # action: replace + + ## Additional labels + ## + additionalLabels: {} + # foo: bar + +## Component scraping kube scheduler +## +kubeScheduler: + enabled: false + + ## If your kube scheduler is not deployed as a pod, specify IPs it can be found on + ## + endpoints: [] + # - 10.141.4.22 + # - 10.141.4.23 + # - 10.141.4.24 + + ## If using kubeScheduler.endpoints only the port and targetPort are used + ## + service: + enabled: true + ## If null or unset, the value is determined dynamically based on target Kubernetes version due to change + ## of default port in Kubernetes 1.23. + ## + port: null + targetPort: null + # selector: + # component: kube-scheduler + + serviceMonitor: + enabled: true + ## Scrape interval. If not set, the Prometheus default scrape interval is used. + ## + interval: "" + + ## SampleLimit defines per-scrape limit on number of scraped samples that will be accepted. + ## + sampleLimit: 0 + + ## TargetLimit defines a limit on the number of scraped targets that will be accepted. + ## + targetLimit: 0 + + ## Per-scrape limit on number of labels that will be accepted for a sample. Only valid in Prometheus versions 2.27.0 and newer. + ## + labelLimit: 0 + + ## Per-scrape limit on length of labels name that will be accepted for a sample. Only valid in Prometheus versions 2.27.0 and newer. + ## + labelNameLengthLimit: 0 + + ## Per-scrape limit on length of labels value that will be accepted for a sample. Only valid in Prometheus versions 2.27.0 and newer. + ## + labelValueLengthLimit: 0 + + ## proxyUrl: URL of a proxy that should be used for scraping. + ## + proxyUrl: "" + ## Enable scraping kube-scheduler over https. + ## Requires proper certs (not self-signed) and delegated authentication/authorization checks. + ## If null or unset, the value is determined dynamically based on target Kubernetes version. + ## + https: null + + ## port: Name of the port the metrics will be scraped from + ## + port: http-metrics + + jobLabel: jobLabel + selector: {} + # matchLabels: + # component: kube-scheduler + + ## Skip TLS certificate validation when scraping + insecureSkipVerify: null + + ## Name of the server to use when validating TLS certificate + serverName: null + + ## MetricRelabelConfigs to apply to samples after scraping, but before ingestion. + ## ref: https://github.com/prometheus-operator/prometheus-operator/blob/main/Documentation/api.md#relabelconfig + ## + metricRelabelings: [] + # - action: keep + # regex: 'kube_(daemonset|deployment|pod|namespace|node|statefulset).+' + # sourceLabels: [__name__] + + ## RelabelConfigs to apply to samples before scraping + ## ref: https://github.com/prometheus-operator/prometheus-operator/blob/main/Documentation/api.md#relabelconfig + ## + relabelings: [] + # - sourceLabels: [__meta_kubernetes_pod_node_name] + # separator: ; + # regex: ^(.*)$ + # targetLabel: nodename + # replacement: $1 + # action: replace + + ## Additional labels + ## + additionalLabels: {} + # foo: bar + +## Component scraping kube proxy +## +kubeProxy: + enabled: false + + ## If your kube proxy is not deployed as a pod, specify IPs it can be found on + ## + endpoints: [] + # - 10.141.4.22 + # - 10.141.4.23 + # - 10.141.4.24 + + service: + enabled: true + port: 10249 + targetPort: 10249 + # selector: + # k8s-app: kube-proxy + + serviceMonitor: + enabled: true + ## Scrape interval. If not set, the Prometheus default scrape interval is used. + ## + interval: "" + + ## SampleLimit defines per-scrape limit on number of scraped samples that will be accepted. + ## + sampleLimit: 0 + + ## TargetLimit defines a limit on the number of scraped targets that will be accepted. + ## + targetLimit: 0 + + ## Per-scrape limit on number of labels that will be accepted for a sample. Only valid in Prometheus versions 2.27.0 and newer. + ## + labelLimit: 0 + + ## Per-scrape limit on length of labels name that will be accepted for a sample. Only valid in Prometheus versions 2.27.0 and newer. + ## + labelNameLengthLimit: 0 + + ## Per-scrape limit on length of labels value that will be accepted for a sample. Only valid in Prometheus versions 2.27.0 and newer. + ## + labelValueLengthLimit: 0 + + ## proxyUrl: URL of a proxy that should be used for scraping. + ## + proxyUrl: "" + + ## port: Name of the port the metrics will be scraped from + ## + port: http-metrics + + jobLabel: jobLabel + selector: {} + # matchLabels: + # k8s-app: kube-proxy + + ## Enable scraping kube-proxy over https. + ## Requires proper certs (not self-signed) and delegated authentication/authorization checks + ## + https: false + + ## MetricRelabelConfigs to apply to samples after scraping, but before ingestion. + ## ref: https://github.com/prometheus-operator/prometheus-operator/blob/main/Documentation/api.md#relabelconfig + ## + metricRelabelings: [] + # - action: keep + # regex: 'kube_(daemonset|deployment|pod|namespace|node|statefulset).+' + # sourceLabels: [__name__] + + ## RelabelConfigs to apply to samples before scraping + ## ref: https://github.com/prometheus-operator/prometheus-operator/blob/main/Documentation/api.md#relabelconfig + ## + relabelings: [] + # - action: keep + # regex: 'kube_(daemonset|deployment|pod|namespace|node|statefulset).+' + # sourceLabels: [__name__] + + ## Additional labels + ## + additionalLabels: {} + # foo: bar + +## Component scraping kube state metrics +## +kubeStateMetrics: + enabled: true + +## Configuration for kube-state-metrics subchart +## +kube-state-metrics: + namespaceOverride: "" + rbac: + create: true + releaseLabel: true + prometheus: + monitor: + enabled: true + + ## Scrape interval. If not set, the Prometheus default scrape interval is used. + ## + interval: "" + + ## SampleLimit defines per-scrape limit on number of scraped samples that will be accepted. + ## + sampleLimit: 0 + + ## TargetLimit defines a limit on the number of scraped targets that will be accepted. + ## + targetLimit: 0 + + ## Per-scrape limit on number of labels that will be accepted for a sample. Only valid in Prometheus versions 2.27.0 and newer. + ## + labelLimit: 0 + + ## Per-scrape limit on length of labels name that will be accepted for a sample. Only valid in Prometheus versions 2.27.0 and newer. + ## + labelNameLengthLimit: 0 + + ## Per-scrape limit on length of labels value that will be accepted for a sample. Only valid in Prometheus versions 2.27.0 and newer. + ## + labelValueLengthLimit: 0 + + ## Scrape Timeout. If not set, the Prometheus default scrape timeout is used. + ## + scrapeTimeout: "" + + ## proxyUrl: URL of a proxy that should be used for scraping. + ## + proxyUrl: "" + + # Keep labels from scraped data, overriding server-side labels + ## + honorLabels: true + + ## MetricRelabelConfigs to apply to samples after scraping, but before ingestion. + ## ref: https://github.com/prometheus-operator/prometheus-operator/blob/main/Documentation/api.md#relabelconfig + ## + metricRelabelings: [] + # - action: keep + # regex: 'kube_(daemonset|deployment|pod|namespace|node|statefulset).+' + # sourceLabels: [__name__] + + ## RelabelConfigs to apply to samples before scraping + ## ref: https://github.com/prometheus-operator/prometheus-operator/blob/main/Documentation/api.md#relabelconfig + ## + relabelings: [] + # - sourceLabels: [__meta_kubernetes_pod_node_name] + # separator: ; + # regex: ^(.*)$ + # targetLabel: nodename + # replacement: $1 + # action: replace + + selfMonitor: + enabled: false + +## Deploy node exporter as a daemonset to all nodes +## +nodeExporter: + enabled: true + operatingSystems: + linux: + enabled: true + darwin: + enabled: true + + ## ForceDeployDashboard Create dashboard configmap even if nodeExporter deployment has been disabled + ## + forceDeployDashboards: false + +## Configuration for prometheus-node-exporter subchart +## +prometheus-node-exporter: + namespaceOverride: "" + podLabels: + ## Add the 'node-exporter' label to be used by serviceMonitor to match standard common usage in rules and grafana dashboards + ## + jobLabel: node-exporter + releaseLabel: true + extraArgs: + - --collector.filesystem.mount-points-exclude=^/(dev|proc|sys|var/lib/docker/.+|var/lib/kubelet/.+)($|/) + - --collector.filesystem.fs-types-exclude=^(autofs|binfmt_misc|bpf|cgroup2?|configfs|debugfs|devpts|devtmpfs|fusectl|hugetlbfs|iso9660|mqueue|nsfs|overlay|proc|procfs|pstore|rpc_pipefs|securityfs|selinuxfs|squashfs|sysfs|tracefs)$ + service: + portName: http-metrics + prometheus: + monitor: + enabled: true + + jobLabel: jobLabel + + ## Scrape interval. If not set, the Prometheus default scrape interval is used. + ## + interval: "" + + ## SampleLimit defines per-scrape limit on number of scraped samples that will be accepted. + ## + sampleLimit: 0 + + ## TargetLimit defines a limit on the number of scraped targets that will be accepted. + ## + targetLimit: 0 + + ## Per-scrape limit on number of labels that will be accepted for a sample. Only valid in Prometheus versions 2.27.0 and newer. + ## + labelLimit: 0 + + ## Per-scrape limit on length of labels name that will be accepted for a sample. Only valid in Prometheus versions 2.27.0 and newer. + ## + labelNameLengthLimit: 0 + + ## Per-scrape limit on length of labels value that will be accepted for a sample. Only valid in Prometheus versions 2.27.0 and newer. + ## + labelValueLengthLimit: 0 + + ## How long until a scrape request times out. If not set, the Prometheus default scape timeout is used. + ## + scrapeTimeout: "" + + ## proxyUrl: URL of a proxy that should be used for scraping. + ## + proxyUrl: "" + + ## MetricRelabelConfigs to apply to samples after scraping, but before ingestion. + ## ref: https://github.com/prometheus-operator/prometheus-operator/blob/main/Documentation/api.md#relabelconfig + ## + metricRelabelings: [] + # - sourceLabels: [__name__] + # separator: ; + # regex: ^node_mountstats_nfs_(event|operations|transport)_.+ + # replacement: $1 + # action: drop + + ## RelabelConfigs to apply to samples before scraping + ## ref: https://github.com/prometheus-operator/prometheus-operator/blob/main/Documentation/api.md#relabelconfig + ## + relabelings: [] + # - sourceLabels: [__meta_kubernetes_pod_node_name] + # separator: ; + # regex: ^(.*)$ + # targetLabel: nodename + # replacement: $1 + # action: replace + +## Manages Prometheus and Alertmanager components +## +prometheusOperator: + enabled: true + + ## Use '{{ template "kube-prometheus-stack.fullname" . }}-operator' by default + fullnameOverride: "" + + ## Number of old replicasets to retain ## + ## The default value is 10, 0 will garbage-collect old replicasets ## + revisionHistoryLimit: 10 + + ## Strategy of the deployment + ## + strategy: {} + + ## Prometheus-Operator v0.39.0 and later support TLS natively. + ## + tls: + enabled: true + # Value must match version names from https://golang.org/pkg/crypto/tls/#pkg-constants + tlsMinVersion: VersionTLS13 + # Users who are deploying this chart in GKE private clusters will need to add firewall rules to expose this port for admissions webhooks + internalPort: 8443 + + ## Admission webhook support for PrometheusRules resources added in Prometheus Operator 0.30 can be enabled to prevent incorrectly formatted + ## rules from making their way into prometheus and potentially preventing the container from starting + admissionWebhooks: + ## Valid values: Fail, Ignore, IgnoreOnInstallOnly + ## IgnoreOnInstallOnly - If Release.IsInstall returns "true", set "Ignore" otherwise "Fail" + failurePolicy: "" + ## The default timeoutSeconds is 10 and the maximum value is 30. + timeoutSeconds: 10 + enabled: true + ## A PEM encoded CA bundle which will be used to validate the webhook's server certificate. + ## If unspecified, system trust roots on the apiserver are used. + caBundle: "" + ## If enabled, generate a self-signed certificate, then patch the webhook configurations with the generated data. + ## On chart upgrades (or if the secret exists) the cert will not be re-generated. You can use this to provide your own + ## certs ahead of time if you wish. + ## + annotations: {} + # argocd.argoproj.io/hook: PreSync + # argocd.argoproj.io/hook-delete-policy: HookSucceeded + + namespaceSelector: {} + + deployment: + enabled: false + + ## Number of replicas + ## + replicas: 1 + + ## Strategy of the deployment + ## + strategy: {} + + # Ref: https://kubernetes.io/docs/tasks/run-application/configure-pdb/ + podDisruptionBudget: {} + # maxUnavailable: 1 + # minAvailable: 1 + + ## Number of old replicasets to retain ## + ## The default value is 10, 0 will garbage-collect old replicasets ## + revisionHistoryLimit: 10 + + ## Prometheus-Operator v0.39.0 and later support TLS natively. + ## + tls: + enabled: true + # Value must match version names from https://golang.org/pkg/crypto/tls/#pkg-constants + tlsMinVersion: VersionTLS13 + # The default webhook port is 10250 in order to work out-of-the-box in GKE private clusters and avoid adding firewall rules. + internalPort: 10250 + + ## Service account for Prometheus Operator Webhook to use. + ## ref: https://kubernetes.io/docs/tasks/configure-pod-container/configure-service-account/ + ## + serviceAccount: + automountServiceAccountToken: false + create: true + name: "" + + ## Configuration for Prometheus operator Webhook service + ## + service: + annotations: {} + labels: {} + clusterIP: "" + + ## Port to expose on each node + ## Only used if service.type is 'NodePort' + ## + nodePort: 31080 + + nodePortTls: 31443 + + ## Additional ports to open for Prometheus operator Webhook service + ## ref: https://kubernetes.io/docs/concepts/services-networking/service/#multi-port-services + ## + additionalPorts: [] + + ## Loadbalancer IP + ## Only use if service.type is "LoadBalancer" + ## + loadBalancerIP: "" + loadBalancerSourceRanges: [] + + ## Denotes if this Service desires to route external traffic to node-local or cluster-wide endpoints + ## + externalTrafficPolicy: Cluster + + ## Service type + ## NodePort, ClusterIP, LoadBalancer + ## + type: ClusterIP + + ## List of IP addresses at which the Prometheus server service is available + ## Ref: https://kubernetes.io/docs/user-guide/services/#external-ips + ## + externalIPs: [] + + # ## Labels to add to the operator webhook deployment + # ## + labels: {} + + ## Annotations to add to the operator webhook deployment + ## + annotations: {} + + ## Labels to add to the operator webhook pod + ## + podLabels: {} + + ## Annotations to add to the operator webhook pod + ## + podAnnotations: {} + + ## Assign a PriorityClassName to pods if set + # priorityClassName: "" + + ## Define Log Format + # Use logfmt (default) or json logging + # logFormat: logfmt + + ## Decrease log verbosity to errors only + # logLevel: error + + ## Prometheus-operator webhook image + ## + image: + registry: quay.io + repository: rancher/mirrored-prometheus-operator-admission-webhook + # if not set appVersion field from Chart.yaml is used + tag: v0.72.0 + sha: "" + pullPolicy: IfNotPresent + + ## Define Log Format + # Use logfmt (default) or json logging + # logFormat: logfmt + + ## Decrease log verbosity to errors only + # logLevel: error + + + ## Liveness probe + ## + livenessProbe: + enabled: true + failureThreshold: 3 + initialDelaySeconds: 30 + periodSeconds: 10 + successThreshold: 1 + timeoutSeconds: 1 + + ## Readiness probe + ## + readinessProbe: + enabled: true + failureThreshold: 3 + initialDelaySeconds: 5 + periodSeconds: 10 + successThreshold: 1 + timeoutSeconds: 1 + + ## Resource limits & requests + ## + resources: {} + # limits: + # cpu: 200m + # memory: 200Mi + # requests: + # cpu: 100m + # memory: 100Mi + + # Required for use in managed kubernetes clusters (such as AWS EKS) with custom CNI (such as calico), + # because control-plane managed by AWS cannot communicate with pods' IP CIDR and admission webhooks are not working + ## + hostNetwork: false + + ## Define which Nodes the Pods are scheduled on. + ## ref: https://kubernetes.io/docs/user-guide/node-selection/ + ## + nodeSelector: {} + + ## Tolerations for use with node taints + ## ref: https://kubernetes.io/docs/concepts/configuration/taint-and-toleration/ + ## + tolerations: [] + # - key: "key" + # operator: "Equal" + # value: "value" + # effect: "NoSchedule" + + ## Assign custom affinity rules to the prometheus operator + ## ref: https://kubernetes.io/docs/concepts/configuration/assign-pod-node/ + ## + affinity: {} + # nodeAffinity: + # requiredDuringSchedulingIgnoredDuringExecution: + # nodeSelectorTerms: + # - matchExpressions: + # - key: kubernetes.io/e2e-az-name + # operator: In + # values: + # - e2e-az1 + # - e2e-az2 + dnsConfig: {} + # nameservers: + # - 1.2.3.4 + # searches: + # - ns1.svc.cluster-domain.example + # - my.dns.search.suffix + # options: + # - name: ndots + # value: "2" + # - name: edns0 + securityContext: + fsGroup: 65534 + runAsGroup: 65534 + runAsNonRoot: true + runAsUser: 65534 + seccompProfile: + type: RuntimeDefault + + ## Container-specific security context configuration + ## ref: https://kubernetes.io/docs/tasks/configure-pod-container/security-context/ + ## + containerSecurityContext: + allowPrivilegeEscalation: false + readOnlyRootFilesystem: true + capabilities: + drop: + - ALL + + ## If false then the user will opt out of automounting API credentials. + ## + automountServiceAccountToken: true + + patch: + enabled: true + image: + repository: rancher/mirrored-ingress-nginx-kube-webhook-certgen + tag: v20221220-controller-v1.5.1-58-g787ea74b6 + sha: "" + pullPolicy: IfNotPresent + resources: {} + ## Provide a priority class name to the webhook patching job + ## + priorityClassName: "" + annotations: {} + # argocd.argoproj.io/hook: PreSync + # argocd.argoproj.io/hook-delete-policy: HookSucceeded + podAnnotations: {} + nodeSelector: {} + affinity: {} + tolerations: [] + + ## SecurityContext holds pod-level security attributes and common container settings. + ## This defaults to non root user with uid 2000 and gid 2000. *v1.PodSecurityContext false + ## ref: https://kubernetes.io/docs/tasks/configure-pod-container/security-context/ + ## + securityContext: + runAsGroup: 2000 + runAsNonRoot: true + runAsUser: 2000 + seccompProfile: + type: RuntimeDefault + + # Security context for create job container + createSecretJob: + securityContext: + allowPrivilegeEscalation: false + readOnlyRootFilesystem: true + capabilities: + drop: + - ALL + + # Security context for patch job container + patchWebhookJob: + securityContext: + allowPrivilegeEscalation: false + readOnlyRootFilesystem: true + capabilities: + drop: + - ALL + + # Use certmanager to generate webhook certs + certManager: + enabled: false + # self-signed root certificate + rootCert: + duration: "" # default to be 5y + admissionCert: + duration: "" # default to be 1y + # issuerRef: + # name: "issuer" + # kind: "ClusterIssuer" + + ## Namespaces to scope the interaction of the Prometheus Operator and the apiserver (allow list). + ## This is mutually exclusive with denyNamespaces. Setting this to an empty object will disable the configuration + ## + namespaces: {} + # releaseNamespace: true + # additional: + # - kube-system + + ## Namespaces not to scope the interaction of the Prometheus Operator (deny list). + ## + denyNamespaces: [] + + ## Filter namespaces to look for prometheus-operator custom resources + ## + alertmanagerInstanceNamespaces: [] + alertmanagerConfigNamespaces: [] + prometheusInstanceNamespaces: [] + thanosRulerInstanceNamespaces: [] + + ## The clusterDomain value will be added to the cluster.peer option of the alertmanager. + ## Without this specified option cluster.peer will have value alertmanager-monitoring-alertmanager-0.alertmanager-operated:9094 (default value) + ## With this specified option cluster.peer will have value alertmanager-monitoring-alertmanager-0.alertmanager-operated.namespace.svc.cluster-domain:9094 + ## + # clusterDomain: "cluster.local" + + networkPolicy: + ## Enable creation of NetworkPolicy resources. + ## + enabled: false + + ## Flavor of the network policy to use. + # Can be: + # * kubernetes for networking.k8s.io/v1/NetworkPolicy + # * cilium for cilium.io/v2/CiliumNetworkPolicy + flavor: kubernetes + + # cilium: + # egress: + + ## match labels used in selector + # matchLabels: {} + + ## Service account for Prometheus Operator to use. + ## ref: https://kubernetes.io/docs/tasks/configure-pod-container/configure-service-account/ + ## + serviceAccount: + create: true + name: "" + automountServiceAccountToken: true + + ## Configuration for Prometheus operator service + ## + service: + annotations: {} + labels: {} + clusterIP: "" + + ## Port to expose on each node + ## Only used if service.type is 'NodePort' + ## + nodePort: 30080 + + nodePortTls: 30443 + + ## Additional ports to open for Prometheus operator service + ## ref: https://kubernetes.io/docs/concepts/services-networking/service/#multi-port-services + ## + additionalPorts: [] + + ## Loadbalancer IP + ## Only use if service.type is "LoadBalancer" + ## + loadBalancerIP: "" + loadBalancerSourceRanges: [] + + ## Denotes if this Service desires to route external traffic to node-local or cluster-wide endpoints + ## + externalTrafficPolicy: Cluster + + ## Service type + ## NodePort, ClusterIP, LoadBalancer + ## + type: ClusterIP + + ## List of IP addresses at which the Prometheus server service is available + ## Ref: https://kubernetes.io/docs/user-guide/services/#external-ips + ## + externalIPs: [] + + # ## Labels to add to the operator deployment + # ## + labels: {} + + ## Annotations to add to the operator deployment + ## + annotations: {} + + ## Labels to add to the operator pod + ## + podLabels: {} + + ## Annotations to add to the operator pod + ## + podAnnotations: {} + + ## Assign a PriorityClassName to pods if set + # priorityClassName: "" + + ## Define Log Format + # Use logfmt (default) or json logging + # logFormat: logfmt + + ## Decrease log verbosity to errors only + # logLevel: error + + kubeletService: + ## If true, the operator will create and maintain a service for scraping kubelets + ## ref: https://github.com/prometheus-operator/prometheus-operator/blob/main/helm/prometheus-operator/README.md + ## + enabled: true + namespace: kube-system + ## Use '{{ template "kube-prometheus-stack.fullname" . }}-kubelet' by default + name: "" + + ## Create a servicemonitor for the operator + ## + serviceMonitor: + ## If true, create a serviceMonitor for prometheus operator + ## + selfMonitor: true + + ## Labels for ServiceMonitor + additionalLabels: {} + + ## Scrape interval. If not set, the Prometheus default scrape interval is used. + ## + interval: "" + + ## SampleLimit defines per-scrape limit on number of scraped samples that will be accepted. + ## + sampleLimit: 0 + + ## TargetLimit defines a limit on the number of scraped targets that will be accepted. + ## + targetLimit: 0 + + ## Per-scrape limit on number of labels that will be accepted for a sample. Only valid in Prometheus versions 2.27.0 and newer. + ## + labelLimit: 0 + + ## Per-scrape limit on length of labels name that will be accepted for a sample. Only valid in Prometheus versions 2.27.0 and newer. + ## + labelNameLengthLimit: 0 + + ## Per-scrape limit on length of labels value that will be accepted for a sample. Only valid in Prometheus versions 2.27.0 and newer. + ## + labelValueLengthLimit: 0 + + ## Scrape timeout. If not set, the Prometheus default scrape timeout is used. + scrapeTimeout: "" + + ## Metric relabel configs to apply to samples before ingestion. + ## + metricRelabelings: [] + # - action: keep + # regex: 'kube_(daemonset|deployment|pod|namespace|node|statefulset).+' + # sourceLabels: [__name__] + + # relabel configs to apply to samples before ingestion. + ## + relabelings: [] + # - sourceLabels: [__meta_kubernetes_pod_node_name] + # separator: ; + # regex: ^(.*)$ + # targetLabel: nodename + # replacement: $1 + # action: replace + + ## Resource limits & requests + ## + resources: + limits: + cpu: 200m + memory: 500Mi + requests: + cpu: 100m + memory: 100Mi + + ## Operator Environment + ## env: + ## VARIABLE: value + env: + GOGC: "30" + + # Required for use in managed kubernetes clusters (such as AWS EKS) with custom CNI (such as calico), + # because control-plane managed by AWS cannot communicate with pods' IP CIDR and admission webhooks are not working + ## + hostNetwork: false + + ## Define which Nodes the Pods are scheduled on. + ## ref: https://kubernetes.io/docs/user-guide/node-selection/ + ## + nodeSelector: {} + + ## Tolerations for use with node taints + ## ref: https://kubernetes.io/docs/concepts/configuration/taint-and-toleration/ + ## + tolerations: [] + # - key: "key" + # operator: "Equal" + # value: "value" + # effect: "NoSchedule" + + ## Assign custom affinity rules to the prometheus operator + ## ref: https://kubernetes.io/docs/concepts/configuration/assign-pod-node/ + ## + affinity: {} + # nodeAffinity: + # requiredDuringSchedulingIgnoredDuringExecution: + # nodeSelectorTerms: + # - matchExpressions: + # - key: kubernetes.io/e2e-az-name + # operator: In + # values: + # - e2e-az1 + # - e2e-az2 + dnsConfig: {} + # nameservers: + # - 1.2.3.4 + # searches: + # - ns1.svc.cluster-domain.example + # - my.dns.search.suffix + # options: + # - name: ndots + # value: "2" + # - name: edns0 + securityContext: + fsGroup: 65534 + runAsGroup: 65534 + runAsNonRoot: true + runAsUser: 65534 + seccompProfile: + type: RuntimeDefault + + ## Container-specific security context configuration + ## ref: https://kubernetes.io/docs/tasks/configure-pod-container/security-context/ + ## + containerSecurityContext: + allowPrivilegeEscalation: false + readOnlyRootFilesystem: true + capabilities: + drop: + - ALL + + # Enable vertical pod autoscaler support for prometheus-operator + verticalPodAutoscaler: + enabled: false + + # Recommender responsible for generating recommendation for the object. + # List should be empty (then the default recommender will generate the recommendation) + # or contain exactly one recommender. + # recommenders: + # - name: custom-recommender-performance + + # List of resources that the vertical pod autoscaler can control. Defaults to cpu and memory + controlledResources: [] + # Specifies which resource values should be controlled: RequestsOnly or RequestsAndLimits. + # controlledValues: RequestsAndLimits + + # Define the max allowed resources for the pod + maxAllowed: {} + # cpu: 200m + # memory: 100Mi + # Define the min allowed resources for the pod + minAllowed: {} + # cpu: 200m + # memory: 100Mi + + updatePolicy: + # Specifies minimal number of replicas which need to be alive for VPA Updater to attempt pod eviction + # minReplicas: 1 + # Specifies whether recommended updates are applied when a Pod is started and whether recommended updates + # are applied during the life of a Pod. Possible values are "Off", "Initial", "Recreate", and "Auto". + updateMode: Auto + + ## Prometheus-operator image + ## + image: + repository: rancher/mirrored-prometheus-operator-prometheus-operator + tag: v0.72.0 + sha: "" + pullPolicy: IfNotPresent + + ## Prometheus image to use for prometheuses managed by the operator + ## + # prometheusDefaultBaseImage: prometheus/prometheus + + ## Prometheus image registry to use for prometheuses managed by the operator + ## + # prometheusDefaultBaseImageRegistry: quay.io + + ## Alertmanager image to use for alertmanagers managed by the operator + ## + # alertmanagerDefaultBaseImage: prometheus/alertmanager + + ## Alertmanager image registry to use for alertmanagers managed by the operator + ## + # alertmanagerDefaultBaseImageRegistry: quay.io + + ## Prometheus-config-reloader + ## + prometheusConfigReloader: + image: + repository: rancher/mirrored-prometheus-operator-prometheus-config-reloader + tag: v0.72.0 + sha: "" + + # add prometheus config reloader liveness and readiness probe. Default: false + enableProbe: false + + # resource config for prometheusConfigReloader + resources: {} + # requests: + # cpu: 200m + # memory: 50Mi + # limits: + # cpu: 200m + # memory: 50Mi + + ## Thanos side-car image when configured + ## + thanosImage: + repository: rancher/mirrored-thanos-thanos + tag: v0.34.1 + sha: "" + + ## Set a Label Selector to filter watched prometheus and prometheusAgent + ## + prometheusInstanceSelector: "" + + ## Set a Label Selector to filter watched alertmanager + ## + alertmanagerInstanceSelector: "" + + ## Set a Label Selector to filter watched thanosRuler + thanosRulerInstanceSelector: "" + + ## Set a Field Selector to filter watched secrets + ## + secretFieldSelector: "type!=kubernetes.io/dockercfg,type!=kubernetes.io/service-account-token,type!=helm.sh/release.v1" + + ## If false then the user will opt out of automounting API credentials. + ## + automountServiceAccountToken: true + + ## Additional volumes + ## + extraVolumes: [] + + ## Additional volume mounts + ## + extraVolumeMounts: [] + +## Deploy a Prometheus instance +## +prometheus: + enabled: true + + ## Toggle prometheus into agent mode + ## Note many of features described below (e.g. rules, query, alerting, remote read, thanos) will not work in agent mode. + ## ref: https://github.com/prometheus-operator/prometheus-operator/blob/main/Documentation/designs/prometheus-agent.md + ## + agentMode: false + + ## Annotations for Prometheus + ## + annotations: {} + + ## Configure network policy for the prometheus + networkPolicy: + enabled: false + + ## Flavor of the network policy to use. + # Can be: + # * kubernetes for networking.k8s.io/v1/NetworkPolicy + # * cilium for cilium.io/v2/CiliumNetworkPolicy + flavor: kubernetes + + # cilium: + # endpointSelector: + # egress: + # ingress: + + # egress: + # - {} + # ingress: + # - {} + # podSelector: + # matchLabels: + # app: prometheus + + ## Service account for Prometheuses to use. + ## ref: https://kubernetes.io/docs/tasks/configure-pod-container/configure-service-account/ + ## + serviceAccount: + create: true + name: "" + annotations: {} + automountServiceAccountToken: true + + # Service for thanos service discovery on sidecar + # Enable this can make Thanos Query can use + # `--store=dnssrv+_grpc._tcp.${kube-prometheus-stack.fullname}-thanos-discovery.${namespace}.svc.cluster.local` to discovery + # Thanos sidecar on prometheus nodes + # (Please remember to change ${kube-prometheus-stack.fullname} and ${namespace}. Not just copy and paste!) + thanosService: + enabled: false + annotations: {} + labels: {} + + ## Denotes if this Service desires to route external traffic to node-local or cluster-wide endpoints + ## + externalTrafficPolicy: Cluster + + ## Service type + ## + type: ClusterIP + + ## gRPC port config + portName: grpc + port: 10901 + targetPort: "grpc" + + ## HTTP port config (for metrics) + httpPortName: http + httpPort: 10902 + targetHttpPort: "http" + + ## ClusterIP to assign + # Default is to make this a headless service ("None") + clusterIP: "None" + + ## Port to expose on each node, if service type is NodePort + ## + nodePort: 30901 + httpNodePort: 30902 + + # ServiceMonitor to scrape Sidecar metrics + # Needs thanosService to be enabled as well + thanosServiceMonitor: + enabled: false + interval: "" + + ## Additional labels + ## + additionalLabels: {} + + ## scheme: HTTP scheme to use for scraping. Can be used with `tlsConfig` for example if using istio mTLS. + scheme: "" + + ## tlsConfig: TLS configuration to use when scraping the endpoint. For example if using istio mTLS. + ## Of type: https://github.com/coreos/prometheus-operator/blob/main/Documentation/api.md#tlsconfig + tlsConfig: {} + + bearerTokenFile: + + ## Metric relabel configs to apply to samples before ingestion. + metricRelabelings: [] + + ## relabel configs to apply to samples before ingestion. + relabelings: [] + + # Service for external access to sidecar + # Enabling this creates a service to expose thanos-sidecar outside the cluster. + thanosServiceExternal: + enabled: false + annotations: {} + labels: {} + loadBalancerIP: "" + loadBalancerSourceRanges: [] + + ## gRPC port config + portName: grpc + port: 10901 + targetPort: "grpc" + + ## HTTP port config (for metrics) + httpPortName: http + httpPort: 10902 + targetHttpPort: "http" + + ## Denotes if this Service desires to route external traffic to node-local or cluster-wide endpoints + ## + externalTrafficPolicy: Cluster + + ## Service type + ## + type: LoadBalancer + + ## Port to expose on each node + ## + nodePort: 30901 + httpNodePort: 30902 + + ## Configuration for Prometheus service + ## + service: + annotations: {} + labels: {} + clusterIP: "" + + ## Port for Prometheus Service to listen on + ## + port: 9090 + + ## To be used with a proxy extraContainer port + targetPort: 8081 + + ## Port for Prometheus Reloader to listen on + ## + reloaderWebPort: 8080 + + ## List of IP addresses at which the Prometheus server service is available + ## Ref: https://kubernetes.io/docs/user-guide/services/#external-ips + ## + externalIPs: [] + + ## Port to expose on each node + ## Only used if service.type is 'NodePort' + ## + nodePort: 30090 + + ## Loadbalancer IP + ## Only use if service.type is "LoadBalancer" + loadBalancerIP: "" + loadBalancerSourceRanges: [] + + ## Denotes if this Service desires to route external traffic to node-local or cluster-wide endpoints + ## + externalTrafficPolicy: Cluster + + ## Service type + ## + type: ClusterIP + + ## Additional ports to open for Prometheus service + ## + additionalPorts: [] + # additionalPorts: + # - name: oauth-proxy + # port: 8081 + # targetPort: 8081 + # - name: oauth-metrics + # port: 8082 + # targetPort: 8082 + + ## Consider that all endpoints are considered "ready" even if the Pods themselves are not + ## Ref: https://kubernetes.io/docs/reference/kubernetes-api/service-resources/service-v1/#ServiceSpec + publishNotReadyAddresses: false + + ## If you want to make sure that connections from a particular client are passed to the same Pod each time + ## Accepts 'ClientIP' or 'None' + ## + sessionAffinity: None + + ## If you want to modify the ClientIP sessionAffinity timeout + ## The value must be >0 && <=86400(for 1 day) if ServiceAffinity == "ClientIP" + ## + sessionAffinityConfig: + clientIP: + timeoutSeconds: 10800 + + ## Configuration for creating a separate Service for each statefulset Prometheus replica + ## + servicePerReplica: + enabled: false + annotations: {} + + ## Port for Prometheus Service per replica to listen on + ## + port: 9090 + + ## To be used with a proxy extraContainer port + targetPort: 9090 + + ## Port to expose on each node + ## Only used if servicePerReplica.type is 'NodePort' + ## + nodePort: 30091 + + ## Loadbalancer source IP ranges + ## Only used if servicePerReplica.type is "LoadBalancer" + loadBalancerSourceRanges: [] + + ## Denotes if this Service desires to route external traffic to node-local or cluster-wide endpoints + ## + externalTrafficPolicy: Cluster + + ## Service type + ## + type: ClusterIP + + ## Configure pod disruption budgets for Prometheus + ## ref: https://kubernetes.io/docs/tasks/run-application/configure-pdb/#specifying-a-poddisruptionbudget + ## This configuration is immutable once created and will require the PDB to be deleted to be changed + ## https://github.com/kubernetes/kubernetes/issues/45398 + ## + podDisruptionBudget: + enabled: false + minAvailable: 1 + maxUnavailable: "" + + # Ingress exposes thanos sidecar outside the cluster + thanosIngress: + enabled: false + + # For Kubernetes >= 1.18 you should specify the ingress-controller via the field ingressClassName + # See https://kubernetes.io/blog/2020/04/02/improvements-to-the-ingress-api-in-kubernetes-1.18/#specifying-the-class-of-an-ingress + # ingressClassName: nginx + + annotations: {} + labels: {} + servicePort: 10901 + + ## Port to expose on each node + ## Only used if service.type is 'NodePort' + ## + nodePort: 30901 + + ## Hosts must be provided if Ingress is enabled. + ## + hosts: [] + # - thanos-gateway.domain.com + + ## Paths to use for ingress rules + ## + paths: [] + # - / + + ## For Kubernetes >= 1.18 you should specify the pathType (determines how Ingress paths should be matched) + ## See https://kubernetes.io/blog/2020/04/02/improvements-to-the-ingress-api-in-kubernetes-1.18/#better-path-matching-with-path-types + # pathType: ImplementationSpecific + + ## TLS configuration for Thanos Ingress + ## Secret must be manually created in the namespace + ## + tls: [] + # - secretName: thanos-gateway-tls + # hosts: + # - thanos-gateway.domain.com + # + + ## ExtraSecret can be used to store various data in an extra secret + ## (use it for example to store hashed basic auth credentials) + extraSecret: + ## if not set, name will be auto generated + # name: "" + annotations: {} + data: {} + # auth: | + # foo:$apr1$OFG3Xybp$ckL0FHDAkoXYIlH9.cysT0 + # someoneelse:$apr1$DMZX2Z4q$6SbQIfyuLQd.xmo/P0m2c. + + ingress: + enabled: false + + # For Kubernetes >= 1.18 you should specify the ingress-controller via the field ingressClassName + # See https://kubernetes.io/blog/2020/04/02/improvements-to-the-ingress-api-in-kubernetes-1.18/#specifying-the-class-of-an-ingress + # ingressClassName: nginx + + annotations: {} + labels: {} + + ## Redirect ingress to an additional defined port on the service + # servicePort: 8081 + + ## Hostnames. + ## Must be provided if Ingress is enabled. + ## + # hosts: + # - prometheus.domain.com + hosts: [] + + ## Paths to use for ingress rules - one path should match the prometheusSpec.routePrefix + ## + paths: [] + # - / + + ## For Kubernetes >= 1.18 you should specify the pathType (determines how Ingress paths should be matched) + ## See https://kubernetes.io/blog/2020/04/02/improvements-to-the-ingress-api-in-kubernetes-1.18/#better-path-matching-with-path-types + # pathType: ImplementationSpecific + + ## TLS configuration for Prometheus Ingress + ## Secret must be manually created in the namespace + ## + tls: [] + # - secretName: prometheus-general-tls + # hosts: + # - prometheus.example.com + + ## Configuration for creating an Ingress that will map to each Prometheus replica service + ## prometheus.servicePerReplica must be enabled + ## + ingressPerReplica: + enabled: false + + # For Kubernetes >= 1.18 you should specify the ingress-controller via the field ingressClassName + # See https://kubernetes.io/blog/2020/04/02/improvements-to-the-ingress-api-in-kubernetes-1.18/#specifying-the-class-of-an-ingress + # ingressClassName: nginx + + annotations: {} + labels: {} + + ## Final form of the hostname for each per replica ingress is + ## {{ ingressPerReplica.hostPrefix }}-{{ $replicaNumber }}.{{ ingressPerReplica.hostDomain }} + ## + ## Prefix for the per replica ingress that will have `-$replicaNumber` + ## appended to the end + hostPrefix: "" + ## Domain that will be used for the per replica ingress + hostDomain: "" + + ## Paths to use for ingress rules + ## + paths: [] + # - / + + ## For Kubernetes >= 1.18 you should specify the pathType (determines how Ingress paths should be matched) + ## See https://kubernetes.io/blog/2020/04/02/improvements-to-the-ingress-api-in-kubernetes-1.18/#better-path-matching-with-path-types + # pathType: ImplementationSpecific + + ## Secret name containing the TLS certificate for Prometheus per replica ingress + ## Secret must be manually created in the namespace + tlsSecretName: "" + + ## Separated secret for each per replica Ingress. Can be used together with cert-manager + ## + tlsSecretPerReplica: + enabled: false + ## Final form of the secret for each per replica ingress is + ## {{ tlsSecretPerReplica.prefix }}-{{ $replicaNumber }} + ## + prefix: "prometheus" + + ## Configure additional options for default pod security policy for Prometheus + ## ref: https://kubernetes.io/docs/concepts/policy/pod-security-policy/ + podSecurityPolicy: + allowedCapabilities: [] + allowedHostPaths: [] + volumes: [] + + serviceMonitor: + ## If true, create a serviceMonitor for prometheus + ## + selfMonitor: true + + ## Scrape interval. If not set, the Prometheus default scrape interval is used. + ## + interval: "" + + ## Additional labels + ## + additionalLabels: {} + + ## SampleLimit defines per-scrape limit on number of scraped samples that will be accepted. + ## + sampleLimit: 0 + + ## TargetLimit defines a limit on the number of scraped targets that will be accepted. + ## + targetLimit: 0 + + ## Per-scrape limit on number of labels that will be accepted for a sample. Only valid in Prometheus versions 2.27.0 and newer. + ## + labelLimit: 0 + + ## Per-scrape limit on length of labels name that will be accepted for a sample. Only valid in Prometheus versions 2.27.0 and newer. + ## + labelNameLengthLimit: 0 + + ## Per-scrape limit on length of labels value that will be accepted for a sample. Only valid in Prometheus versions 2.27.0 and newer. + ## + labelValueLengthLimit: 0 + + ## scheme: HTTP scheme to use for scraping. Can be used with `tlsConfig` for example if using istio mTLS. + scheme: "" + + ## tlsConfig: TLS configuration to use when scraping the endpoint. For example if using istio mTLS. + ## Of type: https://github.com/prometheus-operator/prometheus-operator/blob/main/Documentation/api.md#tlsconfig + tlsConfig: {} + + bearerTokenFile: + + ## Metric relabel configs to apply to samples before ingestion. + ## + metricRelabelings: [] + # - action: keep + # regex: 'kube_(daemonset|deployment|pod|namespace|node|statefulset).+' + # sourceLabels: [__name__] + + # relabel configs to apply to samples before ingestion. + ## + relabelings: [] + # - sourceLabels: [__meta_kubernetes_pod_node_name] + # separator: ; + # regex: ^(.*)$ + # targetLabel: nodename + # replacement: $1 + # action: replace + + ## Additional Endpoints + ## + additionalEndpoints: [] + # - port: oauth-metrics + # path: /metrics + + ## Settings affecting prometheusSpec + ## ref: https://github.com/prometheus-operator/prometheus-operator/blob/main/Documentation/api.md#prometheusspec + ## + prometheusSpec: + ## If true, pass --storage.tsdb.max-block-duration=2h to prometheus. This is already done if using Thanos + ## + disableCompaction: false + ## APIServerConfig + ## ref: https://github.com/prometheus-operator/prometheus-operator/blob/main/Documentation/api.md#apiserverconfig + ## + apiserverConfig: {} + + ## Allows setting additional arguments for the Prometheus container + ## ref: https://github.com/prometheus-operator/prometheus-operator/blob/main/Documentation/api.md#monitoring.coreos.com/v1.Prometheus + additionalArgs: [] + + ## Interval between consecutive scrapes. + ## Defaults to 30s. + ## ref: https://github.com/prometheus-operator/prometheus-operator/blob/release-0.44/pkg/prometheus/promcfg.go#L180-L183 + ## + scrapeInterval: "30s" + + ## Number of seconds to wait for target to respond before erroring + ## + # scrapeTimeout: "30s" + + ## Interval between consecutive evaluations. + ## + evaluationInterval: "30s" + + ## ListenLocal makes the Prometheus server listen on loopback, so that it does not bind against the Pod IP. + ## + listenLocal: false + + ## EnableAdminAPI enables Prometheus the administrative HTTP API which includes functionality such as deleting time series. + ## This is disabled by default. + ## ref: https://prometheus.io/docs/prometheus/latest/querying/api/#tsdb-admin-apis + ## + enableAdminAPI: false + + ## Sets version of Prometheus overriding the Prometheus version as derived + ## from the image tag. Useful in cases where the tag does not follow semver v2. + version: "" + + ## WebTLSConfig defines the TLS parameters for HTTPS + ## ref: https://github.com/prometheus-operator/prometheus-operator/blob/main/Documentation/api.md#webtlsconfig + web: {} + + ## Exemplars related settings that are runtime reloadable. + ## It requires to enable the exemplar storage feature to be effective. + exemplars: "" + ## Maximum number of exemplars stored in memory for all series. + ## If not set, Prometheus uses its default value. + ## A value of zero or less than zero disables the storage. + # maxSize: 100000 + + # EnableFeatures API enables access to Prometheus disabled features. + # ref: https://prometheus.io/docs/prometheus/latest/disabled_features/ + enableFeatures: [] + # - exemplar-storage + + ## Image of Prometheus. + ## + image: + repository: rancher/mirrored-prometheus-prometheus + tag: v2.50.1 + sha: "" + + ## Tolerations for use with node taints + ## ref: https://kubernetes.io/docs/concepts/configuration/taint-and-toleration/ + ## + tolerations: [] + # - key: "key" + # operator: "Equal" + # value: "value" + # effect: "NoSchedule" + + ## If specified, the pod's topology spread constraints. + ## ref: https://kubernetes.io/docs/concepts/workloads/pods/pod-topology-spread-constraints/ + ## + topologySpreadConstraints: [] + # - maxSkew: 1 + # topologyKey: topology.kubernetes.io/zone + # whenUnsatisfiable: DoNotSchedule + # labelSelector: + # matchLabels: + # app: prometheus + + ## Alertmanagers to which alerts will be sent + ## ref: https://github.com/prometheus-operator/prometheus-operator/blob/main/Documentation/api.md#alertmanagerendpoints + ## + ## Default configuration will connect to the alertmanager deployed as part of this release + ## + alertingEndpoints: [] + # - name: "" + # namespace: "" + # port: http + # scheme: http + # pathPrefix: "" + # tlsConfig: {} + # bearerTokenFile: "" + # apiVersion: v2 + + ## External labels to add to any time series or alerts when communicating with external systems + ## + externalLabels: {} + + ## enable --web.enable-remote-write-receiver flag on prometheus-server + ## + enableRemoteWriteReceiver: false + + ## Name of the external label used to denote replica name + ## + replicaExternalLabelName: "" + + ## If true, the Operator won't add the external label used to denote replica name + ## + replicaExternalLabelNameClear: false + + ## Name of the external label used to denote Prometheus instance name + ## + prometheusExternalLabelName: "" + + ## If true, the Operator won't add the external label used to denote Prometheus instance name + ## + prometheusExternalLabelNameClear: false + + ## External URL at which Prometheus will be reachable. + ## + externalUrl: "" + + ## Define which Nodes the Pods are scheduled on. + ## ref: https://kubernetes.io/docs/user-guide/node-selection/ + ## + nodeSelector: {} + + ## Secrets is a list of Secrets in the same namespace as the Prometheus object, which shall be mounted into the Prometheus Pods. + ## The Secrets are mounted into /etc/prometheus/secrets/. Secrets changes after initial creation of a Prometheus object are not + ## reflected in the running Pods. To change the secrets mounted into the Prometheus Pods, the object must be deleted and recreated + ## with the new list of secrets. + ## + secrets: [] + + ## ConfigMaps is a list of ConfigMaps in the same namespace as the Prometheus object, which shall be mounted into the Prometheus Pods. + ## The ConfigMaps are mounted into /etc/prometheus/configmaps/. + ## + configMaps: [] + + ## QuerySpec defines the query command line flags when starting Prometheus. + ## ref: https://github.com/prometheus-operator/prometheus-operator/blob/main/Documentation/api.md#queryspec + ## + query: {} + + ## If nil, select own namespace. Namespaces to be selected for PrometheusRules discovery. + ruleNamespaceSelector: {} + ## Example which selects PrometheusRules in namespaces with label "prometheus" set to "somelabel" + # ruleNamespaceSelector: + # matchLabels: + # prometheus: somelabel + + ## If true, a nil or {} value for prometheus.prometheusSpec.ruleSelector will cause the + ## prometheus resource to be created with selectors based on values in the helm deployment, + ## which will also match the PrometheusRule resources created + ## + ruleSelectorNilUsesHelmValues: false + + ## PrometheusRules to be selected for target discovery. + ## If {}, select all PrometheusRules + ## + ruleSelector: {} + ## Example which select all PrometheusRules resources + ## with label "prometheus" with values any of "example-rules" or "example-rules-2" + # ruleSelector: + # matchExpressions: + # - key: prometheus + # operator: In + # values: + # - example-rules + # - example-rules-2 + # + ## Example which select all PrometheusRules resources with label "role" set to "example-rules" + # ruleSelector: + # matchLabels: + # role: example-rules + + ## If true, a nil or {} value for prometheus.prometheusSpec.serviceMonitorSelector will cause the + ## prometheus resource to be created with selectors based on values in the helm deployment, + ## which will also match the servicemonitors created + ## + serviceMonitorSelectorNilUsesHelmValues: false + + ## ServiceMonitors to be selected for target discovery. + ## If {}, select all ServiceMonitors + ## + serviceMonitorSelector: {} + ## Example which selects ServiceMonitors with label "prometheus" set to "somelabel" + # serviceMonitorSelector: + # matchLabels: + # prometheus: somelabel + + ## Namespaces to be selected for ServiceMonitor discovery. + ## + serviceMonitorNamespaceSelector: {} + ## Example which selects ServiceMonitors in namespaces with label "prometheus" set to "somelabel" + # serviceMonitorNamespaceSelector: + # matchLabels: + # prometheus: somelabel + + ## If true, a nil or {} value for prometheus.prometheusSpec.podMonitorSelector will cause the + ## prometheus resource to be created with selectors based on values in the helm deployment, + ## which will also match the podmonitors created + ## + podMonitorSelectorNilUsesHelmValues: false + + ## PodMonitors to be selected for target discovery. + ## If {}, select all PodMonitors + ## + podMonitorSelector: {} + ## Example which selects PodMonitors with label "prometheus" set to "somelabel" + # podMonitorSelector: + # matchLabels: + # prometheus: somelabel + + ## If nil, select own namespace. Namespaces to be selected for PodMonitor discovery. + podMonitorNamespaceSelector: {} + ## Example which selects PodMonitor in namespaces with label "prometheus" set to "somelabel" + # podMonitorNamespaceSelector: + # matchLabels: + # prometheus: somelabel + + ## If true, a nil or {} value for prometheus.prometheusSpec.probeSelector will cause the + ## prometheus resource to be created with selectors based on values in the helm deployment, + ## which will also match the probes created + ## + probeSelectorNilUsesHelmValues: true + + ## Probes to be selected for target discovery. + ## If {}, select all Probes + ## + probeSelector: {} + ## Example which selects Probes with label "prometheus" set to "somelabel" + # probeSelector: + # matchLabels: + # prometheus: somelabel + + ## If nil, select own namespace. Namespaces to be selected for Probe discovery. + probeNamespaceSelector: {} + ## Example which selects Probe in namespaces with label "prometheus" set to "somelabel" + # probeNamespaceSelector: + # matchLabels: + # prometheus: somelabel + + ## If true, a nil or {} value for prometheus.prometheusSpec.scrapeConfigSelector will cause the + ## prometheus resource to be created with selectors based on values in the helm deployment, + ## which will also match the scrapeConfigs created + ## + scrapeConfigSelectorNilUsesHelmValues: true + + ## scrapeConfigs to be selected for target discovery. + ## If {}, select all scrapeConfigs + ## + scrapeConfigSelector: {} + ## Example which selects scrapeConfigs with label "prometheus" set to "somelabel" + # scrapeConfigSelector: + # matchLabels: + # prometheus: somelabel + + ## If nil, select own namespace. Namespaces to be selected for scrapeConfig discovery. + scrapeConfigNamespaceSelector: {} + ## Example which selects scrapeConfig in namespaces with label "prometheus" set to "somelabel" + # scrapeConfigNamespaceSelector: + # matchLabels: + # prometheus: somelabel + + ## How long to retain metrics + ## + retention: 10d + + ## Maximum size of metrics + ## + retentionSize: "" + + ## Allow out-of-order/out-of-bounds samples ingested into Prometheus for a specified duration + ## See https://prometheus.io/docs/prometheus/latest/configuration/configuration/#tsdb + tsdb: + outOfOrderTimeWindow: 0s + + ## Enable compression of the write-ahead log using Snappy. + ## + walCompression: true + + ## If true, the Operator won't process any Prometheus configuration changes + ## + paused: false + + ## Number of replicas of each shard to deploy for a Prometheus deployment. + ## Number of replicas multiplied by shards is the total number of Pods created. + ## + replicas: 1 + + ## EXPERIMENTAL: Number of shards to distribute targets onto. + ## Number of replicas multiplied by shards is the total number of Pods created. + ## Note that scaling down shards will not reshard data onto remaining instances, it must be manually moved. + ## Increasing shards will not reshard data either but it will continue to be available from the same instances. + ## To query globally use Thanos sidecar and Thanos querier or remote write data to a central location. + ## Sharding is done on the content of the `__address__` target meta-label. + ## + shards: 1 + + ## Log level for Prometheus be configured in + ## + logLevel: info + + ## Log format for Prometheus be configured in + ## + logFormat: logfmt + + ## Prefix used to register routes, overriding externalUrl route. + ## Useful for proxies that rewrite URLs. + ## + routePrefix: / + + ## Standard object's metadata. More info: https://github.com/kubernetes/community/blob/master/contributors/devel/sig-architecture/api-conventions.md#metadata + ## Metadata Labels and Annotations gets propagated to the prometheus pods. + ## + podMetadata: {} + # labels: + # app: prometheus + # k8s-app: prometheus + + ## Pod anti-affinity can prevent the scheduler from placing Prometheus replicas on the same node. + ## The default value "soft" means that the scheduler should *prefer* to not schedule two replica pods onto the same node but no guarantee is provided. + ## The value "hard" means that the scheduler is *required* to not schedule two replica pods onto the same node. + ## The value "" will disable pod anti-affinity so that no anti-affinity rules will be configured. + podAntiAffinity: "" + + ## If anti-affinity is enabled sets the topologyKey to use for anti-affinity. + ## This can be changed to, for example, failure-domain.beta.kubernetes.io/zone + ## + podAntiAffinityTopologyKey: kubernetes.io/hostname + + ## Assign custom affinity rules to the prometheus instance + ## ref: https://kubernetes.io/docs/concepts/configuration/assign-pod-node/ + ## + affinity: {} + # nodeAffinity: + # requiredDuringSchedulingIgnoredDuringExecution: + # nodeSelectorTerms: + # - matchExpressions: + # - key: kubernetes.io/e2e-az-name + # operator: In + # values: + # - e2e-az1 + # - e2e-az2 + + ## The remote_read spec configuration for Prometheus. + ## ref: https://github.com/prometheus-operator/prometheus-operator/blob/main/Documentation/api.md#remotereadspec + remoteRead: [] + # - url: http://remote1/read + ## additionalRemoteRead is appended to remoteRead + additionalRemoteRead: [] + + ## The remote_write spec configuration for Prometheus. + ## ref: https://github.com/prometheus-operator/prometheus-operator/blob/main/Documentation/api.md#remotewritespec + remoteWrite: [] + # - url: http://remote1/push + ## additionalRemoteWrite is appended to remoteWrite + additionalRemoteWrite: [] + + ## Enable/Disable Grafana dashboards provisioning for prometheus remote write feature + remoteWriteDashboards: false + + ## Resource limits & requests + ## + resources: + limits: + memory: 3000Mi + cpu: 1000m + requests: + memory: 750Mi + cpu: 750m + + ## Prometheus StorageSpec for persistent data + ## ref: https://github.com/prometheus-operator/prometheus-operator/blob/main/Documentation/user-guides/storage.md + ## + storageSpec: {} + ## Using PersistentVolumeClaim + ## + # volumeClaimTemplate: + # spec: + # storageClassName: gluster + # accessModes: ["ReadWriteOnce"] + # resources: + # requests: + # storage: 50Gi + # selector: {} + + ## Using tmpfs volume + ## + # emptyDir: + # medium: Memory + + # Additional volumes on the output StatefulSet definition. + volumes: + - name: nginx-home + emptyDir: {} + - name: prometheus-nginx + configMap: + name: prometheus-nginx-proxy-config + defaultMode: 438 + + # Additional VolumeMounts on the output StatefulSet definition. + volumeMounts: [] + + ## AdditionalScrapeConfigs allows specifying additional Prometheus scrape configurations. Scrape configurations + ## are appended to the configurations generated by the Prometheus Operator. Job configurations must have the form + ## as specified in the official Prometheus documentation: + ## https://prometheus.io/docs/prometheus/latest/configuration/configuration/#scrape_config. As scrape configs are + ## appended, the user is responsible to make sure it is valid. Note that using this feature may expose the possibility + ## to break upgrades of Prometheus. It is advised to review Prometheus release notes to ensure that no incompatible + ## scrape configs are going to break Prometheus after the upgrade. + ## AdditionalScrapeConfigs can be defined as a list or as a templated string. + ## + ## The scrape configuration example below will find master nodes, provided they have the name .*mst.*, relabel the + ## port to 2379 and allow etcd scraping provided it is running on all Kubernetes master nodes + ## + additionalScrapeConfigs: [] + # - job_name: kube-etcd + # kubernetes_sd_configs: + # - role: node + # scheme: https + # tls_config: + # ca_file: /etc/prometheus/secrets/etcd-client-cert/etcd-ca + # cert_file: /etc/prometheus/secrets/etcd-client-cert/etcd-client + # key_file: /etc/prometheus/secrets/etcd-client-cert/etcd-client-key + # relabel_configs: + # - action: labelmap + # regex: __meta_kubernetes_node_label_(.+) + # - source_labels: [__address__] + # action: replace + # targetLabel: __address__ + # regex: ([^:;]+):(\d+) + # replacement: ${1}:2379 + # - source_labels: [__meta_kubernetes_node_name] + # action: keep + # regex: .*mst.* + # - source_labels: [__meta_kubernetes_node_name] + # action: replace + # targetLabel: node + # regex: (.*) + # replacement: ${1} + # metric_relabel_configs: + # - regex: (kubernetes_io_hostname|failure_domain_beta_kubernetes_io_region|beta_kubernetes_io_os|beta_kubernetes_io_arch|beta_kubernetes_io_instance_type|failure_domain_beta_kubernetes_io_zone) + # action: labeldrop + # + ## If scrape config contains a repetitive section, you may want to use a template. + ## In the following example, you can see how to define `gce_sd_configs` for multiple zones + # additionalScrapeConfigs: | + # - job_name: "node-exporter" + # gce_sd_configs: + # {{range $zone := .Values.gcp_zones}} + # - project: "project1" + # zone: "{{$zone}}" + # port: 9100 + # {{end}} + # relabel_configs: + # ... + + + ## If additional scrape configurations are already deployed in a single secret file you can use this section. + ## Expected values are the secret name and key + ## Cannot be used with additionalScrapeConfigs + additionalScrapeConfigsSecret: {} + # enabled: false + # name: + # key: + + ## additionalPrometheusSecretsAnnotations allows to add annotations to the kubernetes secret. This can be useful + ## when deploying via spinnaker to disable versioning on the secret, strategy.spinnaker.io/versioned: 'false' + additionalPrometheusSecretsAnnotations: {} + + ## AdditionalAlertManagerConfigs allows for manual configuration of alertmanager jobs in the form as specified + ## in the official Prometheus documentation https://prometheus.io/docs/prometheus/latest/configuration/configuration/#. + ## AlertManager configurations specified are appended to the configurations generated by the Prometheus Operator. + ## As AlertManager configs are appended, the user is responsible to make sure it is valid. Note that using this + ## feature may expose the possibility to break upgrades of Prometheus. It is advised to review Prometheus release + ## notes to ensure that no incompatible AlertManager configs are going to break Prometheus after the upgrade. + ## + additionalAlertManagerConfigs: [] + # - consul_sd_configs: + # - server: consul.dev.test:8500 + # scheme: http + # datacenter: dev + # tag_separator: ',' + # services: + # - metrics-prometheus-alertmanager + + ## If additional alertmanager configurations are already deployed in a single secret, or you want to manage + ## them separately from the helm deployment, you can use this section. + ## Expected values are the secret name and key + ## Cannot be used with additionalAlertManagerConfigs + additionalAlertManagerConfigsSecret: {} + # name: + # key: + # optional: false + + ## AdditionalAlertRelabelConfigs allows specifying Prometheus alert relabel configurations. Alert relabel configurations specified are appended + ## to the configurations generated by the Prometheus Operator. Alert relabel configurations specified must have the form as specified in the + ## official Prometheus documentation: https://prometheus.io/docs/prometheus/latest/configuration/configuration/#alert_relabel_configs. + ## As alert relabel configs are appended, the user is responsible to make sure it is valid. Note that using this feature may expose the + ## possibility to break upgrades of Prometheus. It is advised to review Prometheus release notes to ensure that no incompatible alert relabel + ## configs are going to break Prometheus after the upgrade. + ## + additionalAlertRelabelConfigs: [] + # - separator: ; + # regex: prometheus_replica + # replacement: $1 + # action: labeldrop + + ## If additional alert relabel configurations are already deployed in a single secret, or you want to manage + ## them separately from the helm deployment, you can use this section. + ## Expected values are the secret name and key + ## Cannot be used with additionalAlertRelabelConfigs + additionalAlertRelabelConfigsSecret: {} + # name: + # key: + + ## SecurityContext holds pod-level security attributes and common container settings. + ## This defaults to non root user with uid 1000 and gid 2000. + ## https://github.com/prometheus-operator/prometheus-operator/blob/main/Documentation/api.md + ## + securityContext: + runAsGroup: 2000 + runAsNonRoot: true + runAsUser: 1000 + fsGroup: 2000 + seccompProfile: + type: RuntimeDefault + + ## Priority class assigned to the Pods + ## + priorityClassName: "" + + ## Thanos configuration allows configuring various aspects of a Prometheus server in a Thanos environment. + ## This section is experimental, it may change significantly without deprecation notice in any release. + ## This is experimental and may change significantly without backward compatibility in any release. + ## ref: https://github.com/prometheus-operator/prometheus-operator/blob/main/Documentation/api.md#thanosspec + ## + thanos: {} + # secretProviderClass: + # provider: gcp + # parameters: + # secrets: | + # - resourceName: "projects/$PROJECT_ID/secrets/testsecret/versions/latest" + # fileName: "objstore.yaml" + ## ObjectStorageConfig configures object storage in Thanos. + # objectStorageConfig: + # # use existing secret, if configured, objectStorageConfig.secret will not be used + # existingSecret: {} + # # name: "" + # # key: "" + # # will render objectStorageConfig secret data and configure it to be used by Thanos custom resource, + # # ignored when prometheusspec.thanos.objectStorageConfig.existingSecret is set + # # https://thanos.io/tip/thanos/storage.md/#s3 + # secret: {} + # # type: S3 + # # config: + # # bucket: "" + # # endpoint: "" + # # region: "" + # # access_key: "" + # # secret_key: "" + + proxy: + image: + repository: rancher/mirrored-library-nginx + tag: 1.24.0-alpine + + ## Containers allows injecting additional containers. This is meant to allow adding an authentication proxy to a Prometheus pod. + ## if using proxy extraContainer update targetPort with proxy container port + containers: | + - name: prometheus-proxy + args: + - nginx + - -g + - daemon off; + - -c + - /nginx/nginx.conf + image: "{{ template "system_default_registry" . }}{{ .Values.prometheus.prometheusSpec.proxy.image.repository }}:{{ .Values.prometheus.prometheusSpec.proxy.image.tag }}" + ports: + - containerPort: 8081 + name: nginx-http + protocol: TCP + volumeMounts: + - mountPath: /nginx + name: prometheus-nginx + - mountPath: /var/cache/nginx + name: nginx-home + securityContext: + runAsUser: 101 + runAsGroup: 101 + + ## InitContainers allows injecting additional initContainers. This is meant to allow doing some changes + ## (permissions, dir tree) on mounted volumes before starting prometheus + initContainers: [] + + ## PortName to use for Prometheus. + ## + portName: "http-web" + + ## ArbitraryFSAccessThroughSMs configures whether configuration based on a service monitor can access arbitrary files + ## on the file system of the Prometheus container e.g. bearer token files. + arbitraryFSAccessThroughSMs: false + + ## OverrideHonorLabels if set to true overrides all user configured honor_labels. If HonorLabels is set in ServiceMonitor + ## or PodMonitor to true, this overrides honor_labels to false. + overrideHonorLabels: false + + ## OverrideHonorTimestamps allows to globally enforce honoring timestamps in all scrape configs. + overrideHonorTimestamps: false + + ## When ignoreNamespaceSelectors is set to true, namespaceSelector from all PodMonitor, ServiceMonitor and Probe objects will be ignored, + ## they will only discover targets within the namespace of the PodMonitor, ServiceMonitor and Probe object, + ## and servicemonitors will be installed in the default service namespace. + ## Defaults to false. + ignoreNamespaceSelectors: true + + ## EnforcedNamespaceLabel enforces adding a namespace label of origin for each alert and metric that is user created. + ## The label value will always be the namespace of the object that is being created. + ## Disabled by default + enforcedNamespaceLabel: "" + + ## PrometheusRulesExcludedFromEnforce - list of prometheus rules to be excluded from enforcing of adding namespace labels. + ## Works only if enforcedNamespaceLabel set to true. Make sure both ruleNamespace and ruleName are set for each pair + ## Deprecated, use `excludedFromEnforcement` instead + prometheusRulesExcludedFromEnforce: [] + + ## ExcludedFromEnforcement - list of object references to PodMonitor, ServiceMonitor, Probe and PrometheusRule objects + ## to be excluded from enforcing a namespace label of origin. + ## Works only if enforcedNamespaceLabel set to true. + ## See https://github.com/prometheus-operator/prometheus-operator/blob/main/Documentation/api.md#objectreference + excludedFromEnforcement: [] + + ## QueryLogFile specifies the file to which PromQL queries are logged. Note that this location must be writable, + ## and can be persisted using an attached volume. Alternatively, the location can be set to a stdout location such + ## as /dev/stdout to log querie information to the default Prometheus log stream. This is only available in versions + ## of Prometheus >= 2.16.0. For more details, see the Prometheus docs (https://prometheus.io/docs/guides/query-log/) + queryLogFile: false + + # Use to set global sample_limit for Prometheus. This act as default SampleLimit for ServiceMonitor or/and PodMonitor. + # Set to 'false' to disable global sample_limit. or set to a number to override the default value. + sampleLimit: false + + # EnforcedKeepDroppedTargetsLimit defines on the number of targets dropped by relabeling that will be kept in memory. + # The value overrides any spec.keepDroppedTargets set by ServiceMonitor, PodMonitor, Probe objects unless spec.keepDroppedTargets + # is greater than zero and less than spec.enforcedKeepDroppedTargets. 0 means no limit. + enforcedKeepDroppedTargets: 0 + + ## EnforcedSampleLimit defines global limit on number of scraped samples that will be accepted. This overrides any SampleLimit + ## set per ServiceMonitor or/and PodMonitor. It is meant to be used by admins to enforce the SampleLimit to keep overall + ## number of samples/series under the desired limit. Note that if SampleLimit is lower that value will be taken instead. + enforcedSampleLimit: false + + ## EnforcedTargetLimit defines a global limit on the number of scraped targets. This overrides any TargetLimit set + ## per ServiceMonitor or/and PodMonitor. It is meant to be used by admins to enforce the TargetLimit to keep the overall + ## number of targets under the desired limit. Note that if TargetLimit is lower, that value will be taken instead, except + ## if either value is zero, in which case the non-zero value will be used. If both values are zero, no limit is enforced. + enforcedTargetLimit: false + + + ## Per-scrape limit on number of labels that will be accepted for a sample. If more than this number of labels are present + ## post metric-relabeling, the entire scrape will be treated as failed. 0 means no limit. Only valid in Prometheus versions + ## 2.27.0 and newer. + enforcedLabelLimit: false + + ## Per-scrape limit on length of labels name that will be accepted for a sample. If a label name is longer than this number + ## post metric-relabeling, the entire scrape will be treated as failed. 0 means no limit. Only valid in Prometheus versions + ## 2.27.0 and newer. + enforcedLabelNameLengthLimit: false + + ## Per-scrape limit on length of labels value that will be accepted for a sample. If a label value is longer than this + ## number post metric-relabeling, the entire scrape will be treated as failed. 0 means no limit. Only valid in Prometheus + ## versions 2.27.0 and newer. + enforcedLabelValueLengthLimit: false + + ## AllowOverlappingBlocks enables vertical compaction and vertical query merge in Prometheus. This is still experimental + ## in Prometheus so it may change in any upcoming release. + allowOverlappingBlocks: false + + ## Minimum number of seconds for which a newly created pod should be ready without any of its container crashing for it to + ## be considered available. Defaults to 0 (pod will be considered available as soon as it is ready). + minReadySeconds: 0 + + # Required for use in managed kubernetes clusters (such as AWS EKS) with custom CNI (such as calico), + # because control-plane managed by AWS cannot communicate with pods' IP CIDR and admission webhooks are not working + # Use the host's network namespace if true. Make sure to understand the security implications if you want to enable it. + # When hostNetwork is enabled, this will set dnsPolicy to ClusterFirstWithHostNet automatically. + hostNetwork: false + + # HostAlias holds the mapping between IP and hostnames that will be injected + # as an entry in the pod’s hosts file. + hostAliases: [] + # - ip: 10.10.0.100 + # hostnames: + # - a1.app.local + # - b1.app.local + + ## TracingConfig configures tracing in Prometheus. + ## See https://github.com/prometheus-operator/prometheus-operator/blob/main/Documentation/api.md#prometheustracingconfig + tracingConfig: {} + + ## Additional configuration which is not covered by the properties above. (passed through tpl) + additionalConfig: {} + + ## Additional configuration which is not covered by the properties above. + ## Useful, if you need advanced templating inside alertmanagerSpec. + ## Otherwise, use prometheus.prometheusSpec.additionalConfig (passed through tpl) + additionalConfigString: "" + + ## Defines the maximum time that the `prometheus` container's startup probe + ## will wait before being considered failed. The startup probe will return + ## success after the WAL replay is complete. If set, the value should be + ## greater than 60 (seconds). Otherwise it will be equal to 600 seconds (15 + ## minutes). + maximumStartupDurationSeconds: 0 + + additionalRulesForClusterRole: [] + # - apiGroups: [ "" ] + # resources: + # - nodes/proxy + # verbs: [ "get", "list", "watch" ] + + additionalServiceMonitors: [] + ## Name of the ServiceMonitor to create + ## + # - name: "" + + ## Additional labels to set used for the ServiceMonitorSelector. Together with standard labels from + ## the chart + ## + # additionalLabels: {} + + ## Service label for use in assembling a job name of the form