diff --git a/config/manifests/bases/grafana-operator.clusterserviceversion.yaml b/config/manifests/bases/grafana-operator.clusterserviceversion.yaml index 8a06be9b9..c2e11ec1f 100644 --- a/config/manifests/bases/grafana-operator.clusterserviceversion.yaml +++ b/config/manifests/bases/grafana-operator.clusterserviceversion.yaml @@ -16,6 +16,12 @@ spec: apiservicedefinitions: {} customresourcedefinitions: owned: + - description: GrafanaNotificationPolicy is the Schema for the GrafanaNotificationPolicy + API + displayName: Grafana Notification policy + kind: GrafanaNotificationPolicy + name: grafananotificationpolicies.grafana.integreatly.org + version: v1beta1 - description: GrafanaAlertRuleGroup is the Schema for the grafanaalertrulegroups API displayName: Grafana Alert Rule Group diff --git a/config/samples/kustomization.yaml b/config/samples/kustomization.yaml index 9567dfd5b..ae30bd4a7 100644 --- a/config/samples/kustomization.yaml +++ b/config/samples/kustomization.yaml @@ -6,4 +6,5 @@ resources: - grafana_v1beta1_grafanafolder.yaml - grafana_v1beta1_grafanaalertrulegroup.yaml - grafana_v1beta1_grafanacontactpoint.yaml +- grafana_v1beta1_grafananotificationpolicy.yaml #+kubebuilder:scaffold:manifestskustomizesamples diff --git a/docs/_index.html b/docs/_index.html index 0d01f64ae..0dbc109c9 100644 --- a/docs/_index.html +++ b/docs/_index.html @@ -5,7 +5,7 @@ +++ {{< blocks/cover title="Welcome to the Grafana Operator documentation" image_anchor="top" height="full" >}} - + Learn More diff --git a/docs/docs/alerting.md b/docs/docs/alerting.md deleted file mode 100644 index c2ee48fbe..000000000 --- a/docs/docs/alerting.md +++ /dev/null @@ -1,26 +0,0 @@ ---- -title: Alerting -weight: 13 ---- -{{% pageinfo color="primary" %}} -Alerting resources require Grafana version 9.5 or higher. -{{% /pageinfo %}} - -The Grafana Operator currently only supports _Grafana Managed Alerts_. - -For data source managed alerts, refer to the documentation and tooling available for the respective data source. -{{% alert title="Note" color="primary" %}} -When using Mimir/Prometheus, you can use the [`mimir.rules.kubernetes`](https://grafana.com/docs/agent/latest/flow/reference/components/mimir.rules.kubernetes/) component of the Grafana Agent to deploy rules as Kubernetes resources. -{{% /alert %}} - - -## Alert rule groups - -Alert Rule Groups contain a list of alerts which should evaluate at the same interval. -Every rule group must belong to a folder and contain at least one rule. - -The easiest way to get the YAML specification for an alert rule is to use the [modify export feature](https://grafana.com/docs/grafana/latest/alerting/set-up/provision-alerting-resources/export-alerting-resources/), introduced in Grafana 10. - -The following snippet shows an example alert rule group with a single alert that fires when the temperature is below zero degrees. - -{{< readfile file="examples/alertrulegroups/resources.yaml" code="true" lang="yaml" >}} diff --git a/docs/docs/alerting/_index.md b/docs/docs/alerting/_index.md new file mode 100644 index 000000000..b1b8504a0 --- /dev/null +++ b/docs/docs/alerting/_index.md @@ -0,0 +1,76 @@ +--- +title: Alerting +weight: 13 +--- +{{% pageinfo color="primary" %}} +Alerting resources require Grafana version 9.5 or higher. +{{% /pageinfo %}} + +The Grafana Operator currently only supports _Grafana Managed Alerts_. + +For data source managed alerts, refer to the documentation and tooling available for the respective data source. +{{% alert title="Note" color="primary" %}} +When using Mimir/Prometheus, you can use the [`mimir.rules.kubernetes`](https://grafana.com/docs/alloy/latest/reference/components/mimir/mimir.rules.kubernetes/) component of [Grafana Alloy](https://grafana.com/docs/alloy/latest/) to deploy rules as Kubernetes resources. +{{% /alert %}} + + +## Full example + +The following resources construct the flow outlined in the [Grafana notification documentation](https://grafana.com/docs/grafana/latest/alerting/fundamentals/notifications/). + +They create: +1. Three alert rules across two different groups +2. Two contact points for two different teams +3. A notification policy to route alerts to the correct team + +{{< figure src="notification-routing.png" title="Flowchart of alerts routed through this system" width="500" >}} + +{{% alert title="Note" color="primary" %}} +If you want to try this for yourself, you can [get started with demo data in Grafana cloud](https://grafana.com/docs/grafana-cloud/get-started/#install-demo-data-sources-and-dashboards). +The examples below utilize the data sources to give you real data to alert on. +{{% /alert %}} + +### Alert rule groups + +The first resources in this flow are _Alert Rule Groups_. +An alert rule group can contain multiple alert rules. +They group together alerts to run on the same interval and are stored in a Grafana folder, alongside dashboards. + +First, create the folder: + +{{< readfile file="../examples/notifications-full/folder.yaml" code="true" lang="yaml" >}} + +The first alert rule group is responsible for alerting on well known Kubernetes issues: + +{{< readfile file="../examples/notifications-full/kubernetes-alert-rules.yaml" code="true" lang="yaml" >}} + +The second alert rule group is responsible for alerting on security issues: + +{{< readfile file="../examples/notifications-full/security-alert-rules.yaml" code="true" lang="yaml" >}} + +After applying the resources, you can see the created rule groups in the _Alert rules_ overview page: + +![Alert rules overview page](./overview-page.png) + +### Contact Points + +Before you can route alerts to the correct receivers, you need to define how these alerts should be delivered. +[Contact points](./contact-points) specify the methods used to notify someone using different providers. + +Since the two different teams get notified using different email addresses, two contact points are required. + +{{< readfile file="../examples/notifications-full/contact-points.yaml" code="true" lang="yaml" >}} + +### Notification Policy + +Now that all parts are in place, the only missing component is the notification policy. +The instances notification policy routes alerts to contact points based on labels. +A Grafana instance can only have one notification policy applied at a time as it's a global object. + +The following notification policy routes alerts based on the team label and further configures the repetition interval for high severity alerts belonging to the operations team: + +{{< readfile file="../examples/notifications-full/notification-policy.yaml" code="true" lang="yaml" >}} + +After applying the resource, Grafana shows the following notification policy tree: + +![Notification policy tree after applying the resource](./notification-policy-tree.png) diff --git a/docs/docs/alerting/alert-rule-groups.md b/docs/docs/alerting/alert-rule-groups.md new file mode 100644 index 000000000..99ba9bbd9 --- /dev/null +++ b/docs/docs/alerting/alert-rule-groups.md @@ -0,0 +1,12 @@ +--- +title: Alert Rule Groups +--- + +Alert Rule Groups contain a list of alerts which should evaluate at the same interval. +Every rule group must belong to a folder and contain at least one rule. + +The easiest way to get the YAML specification for an alert rule is to use the [modify export feature](https://grafana.com/docs/grafana/latest/alerting/set-up/provision-alerting-resources/export-alerting-resources/), introduced in Grafana 10. + +The following snippet shows an example alert rule group with a single alert that fires when the temperature is below zero degrees. + +{{< readfile file="../examples/alertrulegroups/resources.yaml" code="true" lang="yaml" >}} diff --git a/docs/docs/alerting/contact-points.md b/docs/docs/alerting/contact-points.md new file mode 100644 index 000000000..46b74d64e --- /dev/null +++ b/docs/docs/alerting/contact-points.md @@ -0,0 +1,18 @@ +--- +title: Contact Points +--- + +Contact points contain the configuration for sending alert notifications. You can assign a contact point either in the alert rule or notification policy options. +For a complete explanation on notification policies, refer to the [upstream Grafana documentation](https://grafana.com/docs/grafana/latest/alerting/fundamentals/notifications/contact-points/). + +{{% alert title="Note" color="secondary" %}} +The Grafana operator currently only supports a single receiver per contact point definition. +As a workaround you can create multiple contact points with the same `spec.name` value. +Follow issue [#1529](https://github.com/grafana/grafana-operator/issues/1529) for further updates on this topic. +{{% /alert %}} + +The following snippet shows an example contact point which notifies a specific email address. +It also highlights how secrets and config maps can utilized to externalize some of the configuration. +This is especially useful for contact points which contain sensitive information. + +{{< readfile file="../examples/contactpoint_override/resources.yaml" code="true" lang="yaml" >}} diff --git a/docs/docs/alerting/notification-policies.md b/docs/docs/alerting/notification-policies.md new file mode 100644 index 000000000..2436f63e9 --- /dev/null +++ b/docs/docs/alerting/notification-policies.md @@ -0,0 +1,15 @@ +--- +title: Notification Policies +--- + +Notification policies provide you with a flexible way of designing how to handle notifications and minimize alert noise. +For a complete explanation on notification policies, see the [upstream Grafana documentation](https://grafana.com/docs/grafana/latest/alerting/fundamentals/notifications/notification-policies/). + +{{% alert title="Tip" color="secondary" %}} +If you already know which contact point an alert should send to, you can directly set the [`receivers`]({{% relref "/docs/api/#grafanaalertrulegroupspecrulesindexnotificationsettings" %}}) property on the alert rule. +{{% /alert %}} + + +The following snippet shows an example notification policy routing to the `operations` or `security` team based on the `team` label. + +{{< readfile file="../examples/notification-policy/resources.yaml" code="true" lang="yaml" >}} diff --git a/docs/docs/alerting/notification-policy-tree.png b/docs/docs/alerting/notification-policy-tree.png new file mode 100644 index 000000000..4142fba44 Binary files /dev/null and b/docs/docs/alerting/notification-policy-tree.png differ diff --git a/docs/docs/alerting/notification-routing.png b/docs/docs/alerting/notification-routing.png new file mode 100644 index 000000000..460734697 Binary files /dev/null and b/docs/docs/alerting/notification-routing.png differ diff --git a/docs/docs/alerting/overview-page.png b/docs/docs/alerting/overview-page.png new file mode 100644 index 000000000..5132df8c8 Binary files /dev/null and b/docs/docs/alerting/overview-page.png differ diff --git a/examples/notification-policy/resources.yaml b/examples/notification-policy/resources.yaml new file mode 100644 index 000000000..ac646008b --- /dev/null +++ b/examples/notification-policy/resources.yaml @@ -0,0 +1,24 @@ +apiVersion: grafana.integreatly.org/v1beta1 +kind: GrafanaNotificationPolicy +metadata: + name: grafananotificationpolicy-sample +spec: + instanceSelector: + matchLabels: + dashboards: "grafana" + route: + receiver: grafana-email-default + group_by: + - grafana_folder + - alertname + routes: + - receiver: grafana-email-operations + object_matchers: + - - team + - = + - operations + - receiver: grafana-email-security + object_matchers: + - - team + - = + - security diff --git a/examples/notifications-full/contact-points.yaml b/examples/notifications-full/contact-points.yaml new file mode 100644 index 000000000..66ff5e455 --- /dev/null +++ b/examples/notifications-full/contact-points.yaml @@ -0,0 +1,26 @@ +--- +apiVersion: grafana.integreatly.org/v1beta1 +kind: GrafanaContactPoint +metadata: + name: operations-team +spec: + name: operations-team + type: "email" + instanceSelector: + matchLabels: + instance: my-grafana-stack + settings: + addresses: 'operations@example.com' +--- +apiVersion: grafana.integreatly.org/v1beta1 +kind: GrafanaContactPoint +metadata: + name: security-team +spec: + name: security-team + type: "email" + instanceSelector: + matchLabels: + instance: my-grafana-stack + settings: + addresses: 'security@example.com' diff --git a/examples/notifications-full/folder.yaml b/examples/notifications-full/folder.yaml new file mode 100644 index 000000000..bafea8c5b --- /dev/null +++ b/examples/notifications-full/folder.yaml @@ -0,0 +1,8 @@ +apiVersion: grafana.integreatly.org/v1beta1 +kind: GrafanaFolder +metadata: + name: alerts-demo +spec: + instanceSelector: + matchLabels: + instance: "my-grafana-stack" diff --git a/examples/notifications-full/kubernetes-alert-rules.yaml b/examples/notifications-full/kubernetes-alert-rules.yaml new file mode 100644 index 000000000..2904df892 --- /dev/null +++ b/examples/notifications-full/kubernetes-alert-rules.yaml @@ -0,0 +1,167 @@ +--- +apiVersion: grafana.integreatly.org/v1beta1 +kind: GrafanaAlertRuleGroup +metadata: + name: kubernetes-alert-rules +spec: + folderRef: alerts-demo + instanceSelector: + matchLabels: + instance: "my-grafana-stack" + interval: 15m + rules: + - uid: be1q3344udslcf + title: Pod stuck in CrashLoop + condition: C + for: 0s + data: + - refId: A + relativeTimeRange: + from: 600 + to: 0 + datasourceUid: grafanacloud-demoinfra-prom + model: + datasource: + type: prometheus + uid: grafanacloud-demoinfra-prom + editorMode: code + expr: max_over_time(kube_pod_container_status_waiting_reason{reason="CrashLoopBackOff", job!=""}[5m]) + instant: true + intervalMs: 1000 + legendFormat: __auto + maxDataPoints: 43200 + range: false + refId: A + - refId: B + datasourceUid: __expr__ + model: + conditions: + - evaluator: + params: [] + type: gt + operator: + type: and + query: + params: + - B + reducer: + params: [] + type: last + type: query + datasource: + type: __expr__ + uid: __expr__ + expression: A + intervalMs: 1000 + maxDataPoints: 43200 + reducer: last + refId: B + type: reduce + - refId: C + datasourceUid: __expr__ + model: + conditions: + - evaluator: + params: + - 0 + type: gt + operator: + type: and + query: + params: + - C + reducer: + params: [] + type: last + type: query + datasource: + type: __expr__ + uid: __expr__ + expression: B + intervalMs: 1000 + maxDataPoints: 43200 + refId: C + type: threshold + noDataState: OK + execErrState: Error + labels: + team: operations + isPaused: false + - uid: de1q3hd5d5clce + for: 0s + title: Disk Usage - 80% + condition: C + data: + - refId: A + relativeTimeRange: + from: 600 + to: 0 + datasourceUid: grafanacloud-demoinfra-prom + model: + datasource: + type: prometheus + uid: grafanacloud-demoinfra-prom + editorMode: code + expr: node_filesystem_avail_bytes{mountpoint="/"} / node_filesystem_size_bytes{mountpoint="/"} + instant: true + intervalMs: 1000 + legendFormat: __auto + maxDataPoints: 43200 + range: false + refId: A + - refId: B + datasourceUid: __expr__ + model: + conditions: + - evaluator: + params: [] + type: gt + operator: + type: and + query: + params: + - B + reducer: + params: [] + type: last + type: query + datasource: + type: __expr__ + uid: __expr__ + expression: A + intervalMs: 1000 + maxDataPoints: 43200 + reducer: last + refId: B + type: reduce + - refId: C + datasourceUid: __expr__ + model: + conditions: + - evaluator: + params: + - 0.2 + type: lt + operator: + type: and + query: + params: + - C + reducer: + params: [] + type: last + type: query + datasource: + type: __expr__ + uid: __expr__ + expression: B + intervalMs: 1000 + maxDataPoints: 43200 + refId: C + type: threshold + noDataState: NoData + execErrState: Error + labels: + severity: high + team: operations + isPaused: false diff --git a/examples/notifications-full/notification-policy.yaml b/examples/notifications-full/notification-policy.yaml new file mode 100644 index 000000000..acd786155 --- /dev/null +++ b/examples/notifications-full/notification-policy.yaml @@ -0,0 +1,31 @@ +--- +apiVersion: grafana.integreatly.org/v1beta1 +kind: GrafanaNotificationPolicy +metadata: + name: test +spec: + instanceSelector: + matchLabels: + instance: "my-grafana-stack" + route: + receiver: grafana-default-email + group_by: + - grafana_folder + - alertname + routes: + - receiver: operations-team + object_matchers: + - - team + - = + - operations + routes: + - object_matchers: + - - severity + - = + - high + repeat_interval: 5m + - receiver: security-team + object_matchers: + - - team + - = + - security diff --git a/examples/notifications-full/security-alert-rules.yaml b/examples/notifications-full/security-alert-rules.yaml new file mode 100644 index 000000000..a52d72ff6 --- /dev/null +++ b/examples/notifications-full/security-alert-rules.yaml @@ -0,0 +1,90 @@ +--- +apiVersion: grafana.integreatly.org/v1beta1 +kind: GrafanaAlertRuleGroup +metadata: + name: security-alert-rules +spec: + folderRef: alerts-demo + instanceSelector: + matchLabels: + instance: "my-grafana-stack" + interval: 5m + rules: + - uid: fe1q7kelzb400a + title: Unauthorized log entry + condition: C + for: 0s + data: + - refId: A + queryType: range + relativeTimeRange: + from: 600 + to: 0 + datasourceUid: grafanacloud-demoinfra-logs + model: + datasource: + type: loki + uid: grafanacloud-demoinfra-logs + editorMode: code + expr: count_over_time({namespace="quickpizza",container="copy"}[5m] |= "unauthorized") + hide: false + intervalMs: 1000 + maxDataPoints: 43200 + queryType: range + refId: A + - refId: B + datasourceUid: __expr__ + model: + conditions: + - evaluator: + params: [] + type: gt + operator: + type: and + query: + params: + - B + reducer: + params: [] + type: last + type: query + datasource: + type: __expr__ + uid: __expr__ + expression: A + intervalMs: 1000 + maxDataPoints: 43200 + reducer: last + refId: B + type: reduce + - refId: C + datasourceUid: __expr__ + model: + conditions: + - evaluator: + params: + - 0 + type: gt + operator: + type: and + query: + params: + - C + reducer: + params: [] + type: last + type: query + datasource: + type: __expr__ + uid: __expr__ + expression: B + intervalMs: 1000 + maxDataPoints: 43200 + refId: C + type: threshold + noDataState: OK + execErrState: Error + labels: + team: security + severity: high + isPaused: false diff --git a/hugo/assets/scss/_styles_project.scss b/hugo/assets/scss/_styles_project.scss new file mode 100644 index 000000000..ef11a3e7a --- /dev/null +++ b/hugo/assets/scss/_styles_project.scss @@ -0,0 +1 @@ +@import 'td/code-dark'