Skip to content

Commit

Permalink
Added templating to slack alerts (#363)
Browse files Browse the repository at this point in the history
* Added templating to slack alerts

* Corrected Messages
  • Loading branch information
sfawcett123 authored Nov 10, 2020
1 parent 64588d3 commit 0be7d5b
Show file tree
Hide file tree
Showing 5 changed files with 34 additions and 35 deletions.
17 changes: 0 additions & 17 deletions monitoring/alertmanager/alertmanager.yml.tmpl

This file was deleted.

5 changes: 0 additions & 5 deletions monitoring/alertmanager/manifest.yml

This file was deleted.

11 changes: 4 additions & 7 deletions monitoring/prometheus/alert.rules
Original file line number Diff line number Diff line change
Expand Up @@ -25,12 +25,6 @@ groups:
severity: medium
annotations:
summary: Alert when memory utilization is over 70%.
- alert: StevesHighCpuTest
expr: 'max(cpu{app="get-into-teaching-app-dev"}) > 0'
labels:
severity: medium
annotations:
summary: Alert when max CPU utilization is over 70%.
- name: TTA
rules:
- alert: TooManyRequests
Expand Down Expand Up @@ -64,9 +58,12 @@ groups:
sum(increase(http_requests_received_total{controller=~".+",action=~".+",code=~"429"}[1m]))
> 0
labels:
severity: high
severity: medium
annotations:
summary: Alert when any client hits a rate limit.
runbook: https://dfedigital.atlassian.net/wiki/spaces/GGIT/pages/2152497153/Rate+Limit
dashboard: https://grafana-prod-get-into-teaching.london.cloudapps.digital/d/28EURzZGz/get-into-teaching-api?viewPanel=60&orgId=1&var-App=get-into-teaching-api-prod
description: The API has recieved too many requests, please read the runbook for advice on what action should be taken.
- alert: FailedJobs
expr: 'sum(increase(api_hangfire_jobs{state="failed"}[1m])) > 0'
labels:
Expand Down
13 changes: 7 additions & 6 deletions terraform/monitoring/modules.tf
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ locals {
}

module "prometheus" {
source = "git::https://github.com/DFE-Digital/bat-platform-building-blocks.git//terraform/modules/prometheus?ref=monitoring-terraform-0_13-tv"
source = "git::https://github.com/DFE-Digital/bat-platform-building-blocks.git//terraform/modules/prometheus"
paas_prometheus_exporter_endpoint = module.paas_prometheus_exporter.endpoint
monitoring_space_id = data.cloudfoundry_space.space.id
monitoring_instance_name = local.monitoring_org_name
Expand All @@ -34,13 +34,13 @@ module "prometheus" {
}

module "influx" {
source = "git::https://github.com/DFE-Digital/bat-platform-building-blocks.git//terraform/modules/influxdb?ref=monitoring-terraform-0_13-tv"
source = "git::https://github.com/DFE-Digital/bat-platform-building-blocks.git//terraform/modules/influxdb"
monitoring_space_id = data.cloudfoundry_space.space.id
monitoring_instance_name = local.monitoring_org_name
}

module "paas_prometheus_exporter" {
source = "git::https://github.com/DFE-Digital/bat-platform-building-blocks.git//terraform/modules/paas_prometheus_exporter?ref=monitoring-terraform-0_13-tv"
source = "git::https://github.com/DFE-Digital/bat-platform-building-blocks.git//terraform/modules/paas_prometheus_exporter"
monitoring_space_id = data.cloudfoundry_space.space.id
monitoring_instance_name = local.monitoring_org_name
paas_username = var.paas_exporter_username
Expand All @@ -49,7 +49,7 @@ module "paas_prometheus_exporter" {


module "grafana" {
source = "git::https://github.com/DFE-Digital/bat-platform-building-blocks.git//terraform/modules/grafana?ref=monitoring-terraform-0_13-tv"
source = "git::https://github.com/DFE-Digital/bat-platform-building-blocks.git//terraform/modules/grafana"
monitoring_space_id = data.cloudfoundry_space.space.id
monitoring_instance_name = "${var.environment}-${var.grafana["name"]}"
prometheus_endpoint = module.prometheus.endpoint
Expand All @@ -64,8 +64,9 @@ module "grafana" {
}

module "alertmanager" {
source = "git::https://github.com/DFE-Digital/bat-platform-building-blocks.git//terraform/modules/alertmanager?ref=monitoring-terraform-0_13-tv"
source = "git::https://github.com/DFE-Digital/bat-platform-building-blocks.git//terraform/modules/alertmanager?ref=monitoring/alertmanager/templates"
monitoring_space_id = data.cloudfoundry_space.space.id
monitoring_instance_name = "${var.environment}-${var.alertmanager["name"]}"
config = templatefile(var.alertmanager["config"], local.template_variable_map)
slack_url = var.alertmanager_slack_url
slack_channel = var.alertmanager_slack_channel
}
23 changes: 23 additions & 0 deletions terraform/monitoring/testAlert.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
#!/bin/bash

name=$RANDOM
name=STEVE
url='https://alertmanager-dev-get-into-teaching.london.cloudapps.digital:/api/v1/alerts'

echo "firing up alert $name"

# change url o
curl -XPOST $url -d "[{
\"status\": \"firing\",
\"labels\": {
\"severity\":\"medium\"
},
\"annotations\": {
\"summary\": \"This is a test of the alerting system on development, this test can be ignored.\",
\"runbook\": \"https://dfedigital.atlassian.net/wiki/spaces/GGIT/pages/2152595459/Test+Page\",
\"dashboard\": \"https://grafana-dev-get-into-teaching.london.cloudapps.digital/d/qZjcqcpGz/csp-violations?orgId=1\",
\"description\": \"Lorem ipsum dolor sit amet, consectetur adipiscing elit, sed do eiusmod tempor incididunt ut labore et dolore magna aliqua. Ut enim ad minim veniam, quis nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo consequat. Duis aute irure dolor in reprehenderit in voluptate velit esse cillum dolore eu fugiat nulla pariatur. Excepteur sint occaecat cupidatat non proident, sunt in culpa qui officia deserunt mollit anim id est laborum.\"
},
\"generatorURL\": \"http://prometheus.int.example.net/${name}\"
}]"

0 comments on commit 0be7d5b

Please sign in to comment.