-
Notifications
You must be signed in to change notification settings - Fork 78
/
config.yaml
97 lines (94 loc) · 2.68 KB
/
config.yaml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
---
default_exporters: [cloudevent]
backends:
cloud_monitoring:
project_id: ${STACKDRIVER_HOST_PROJECT_ID}
cloud_monitoring_mql:
project_id: ${STACKDRIVER_HOST_PROJECT_ID}
cloud_service_monitoring:
project_id: ${STACKDRIVER_HOST_PROJECT_ID}
samples.custom.custom_backend.CustomBackend: {}
datadog:
api_host: ${DATADOG_API_HOST}
api_key: ${DATADOG_API_KEY}
app_key: ${DATADOG_APP_KEY}
dynatrace:
api_url: ${DYNATRACE_API_URL}
api_token: ${DYNATRACE_API_TOKEN}
elasticsearch:
url: ${ELASTICSEARCH_URL}
prometheus:
url: ${PROMETHEUS_URL}
splunk:
host: ${SPLUNK_HOST}
port: ${SPLUNK_PORT}
user: ${SPLUNK_USER}
password: ${SPLUNK_PWD}
open_search:
url: ${OPENSEARCH_URL}
exporters:
cloudevent:
service_url: "http://localhost:8081"
cloud_monitoring:
project_id: ${STACKDRIVER_HOST_PROJECT_ID}
cloud_monitoring/test:
project_id: ${PUBSUB_PROJECT_ID}
samples.custom.custom_exporter.CustomMetricExporter: {}
samples.custom.custom_exporter.CustomSLOExporter: {}
datadog:
api_key: ${DATADOG_API_KEY}
app_key: ${DATADOG_APP_KEY}
dynatrace:
api_url: ${DYNATRACE_API_URL}
api_token: ${DYNATRACE_API_TOKEN}
metric_timeseries_id: custom:slo.error_budget_burn_rate
dynatrace/test:
api_url: ${DYNATRACE_API_URL}
api_token: ${DYNATRACE_API_TOKEN}
prometheus:
url: ${PROMETHEUS_PUSHGATEWAY_URL}
pubsub:
project_id: ${PUBSUB_PROJECT_ID}
topic_name: ${PUBSUB_TOPIC_NAME}
prometheus_self: { }
error_budget_policies:
default:
steps:
- name: 1 hour
burn_rate_threshold: 9
alert: true
message_alert: Page to defend the SLO
message_ok: Last hour on track
window: 3600
- name: 12 hours
burn_rate_threshold: 3
alert: true
message_alert: Page to defend the SLO
message_ok: Last 12 hours on track
window: 43200
- name: 7 days
burn_rate_threshold: 1.5
alert: false
message_alert: Dev team dedicates 25% of engineers to the reliability backlog
message_ok: Last week on track
window: 604800
- name: 28 days
burn_rate_threshold: 1
alert: false
message_alert: Freeze release, unless related to reliability or security
message_ok: Unfreeze release, per the agreed roll-out policy
window: 2419200
cloud_service_monitoring:
steps:
- name: 24 hours
burn_rate_threshold: 4
alert: true
message_alert: Page to defend the SLO
message_ok: Last 24 hours on track
window: 86400
- name: 48 hours
burn_rate_threshold: 2
alert: true
message_alert: Page to defend the SLO
message_ok: Last 48 hours on track
window: 172800