Skip to content

Commit 3e6e790

Browse files
committed
dashboard: add section with failover coordinator
This patch introduces a new section displaying failover coordinator metrics. It includes panels for coordinator status (active/passive), and instances visible to each coordinator. Need for #TNTP-197 Closes #247
1 parent f07819b commit 3e6e790

File tree

8 files changed

+1667
-901
lines changed

8 files changed

+1667
-901
lines changed

config.yml

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -86,9 +86,10 @@ metrics_prefix: ''
8686
# - tdg_iproto
8787
# - tdg_rest_api
8888
# - tdg_tasks
89-
# default is [cluster_tarantool3, replication_tarantool3, http, net, slab, mvcc, space, vinyl, cpu, runtime, luajit, operations, crud, expirationd]
89+
# default is [cluster_tarantool3, failover_coordinator, replication_tarantool3, http, net, slab, mvcc, space, vinyl, cpu, runtime, luajit, operations, crud, expirationd]
9090
sections:
9191
- cluster_tarantool3
92+
- failover_coordinator
9293
- replication_tarantool3
9394
- http
9495
- net

dashboard/build/config.libsonnet

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,7 @@ local variable = import 'dashboard/variable.libsonnet';
2828
metrics_prefix: '',
2929
sections: [
3030
'cluster_tarantool3',
31+
'failover_coordinator',
3132
'replication_tarantool3',
3233
'http',
3334
'net',

dashboard/panels/cluster.libsonnet

Lines changed: 186 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,192 @@ local prometheus = grafana.prometheus;
1111

1212
{
1313
row:: common.row('Cluster overview'),
14+
failover_coordinator_row:: common.row('Failover coordinator'),
15+
16+
coordinators_status(
17+
cfg,
18+
title='Coordinators status',
19+
):: tablePanel.new(
20+
title=title,
21+
datasource=cfg.datasource,
22+
transform='table',
23+
).addTarget(
24+
if cfg.type == variable.datasource_type.prometheus then
25+
local filters_obj = common.remove_field(cfg.filters, 'alias');
26+
local filters = common.prometheus_query_filters(filters_obj);
27+
local metric = std.format('%starantool_coordinator_active', [cfg.metrics_prefix]);
28+
prometheus.target(
29+
expr=if filters == '' then metric else std.format('%s{%s}', [metric, filters]),
30+
format='table',
31+
instant=true,
32+
)
33+
else
34+
error 'InfluxDB target is not supported yet'
35+
) {
36+
options: { cellHeight: 'sm', showHeader: true },
37+
fieldConfig: {
38+
defaults: {
39+
custom: {
40+
align: 'auto',
41+
cellOptions: { type: 'auto' },
42+
footer: { reducers: [] },
43+
inspect: false,
44+
},
45+
mappings: [],
46+
thresholds: {
47+
mode: 'absolute',
48+
steps: [
49+
{ color: 'green', value: 0 },
50+
{ color: 'red', value: 80 },
51+
],
52+
},
53+
},
54+
overrides: [
55+
{
56+
matcher: { id: 'byName', options: 'status' },
57+
properties: [
58+
{ id: 'custom.cellOptions', value: { type: 'color-text' } },
59+
{
60+
id: 'mappings',
61+
value: [
62+
{
63+
type: 'value',
64+
options: {
65+
'0': { color: 'yellow', text: 'passive' },
66+
'1': { color: 'green', text: 'active' },
67+
},
68+
},
69+
{
70+
type: 'special',
71+
options: {
72+
match: 'null',
73+
result: { color: 'red', text: 'disconnected' },
74+
},
75+
},
76+
],
77+
},
78+
],
79+
},
80+
],
81+
},
82+
transformations: [
83+
{
84+
id: 'organize',
85+
options: {
86+
excludeByName: {
87+
Time: true,
88+
__name__: true,
89+
instance: true,
90+
job: true,
91+
},
92+
indexByName: { Value: 1, alias: 0 },
93+
renameByName: { Value: 'status', alias: 'uuid' },
94+
},
95+
},
96+
],
97+
},
98+
99+
instances_seen_by_coordinators(
100+
cfg,
101+
title='Instances seen by coordinators',
102+
):: tablePanel.new(
103+
title=title,
104+
datasource=cfg.datasource,
105+
transform='table',
106+
).addTarget(
107+
if cfg.type == variable.datasource_type.prometheus then
108+
local filters_obj = common.remove_field(cfg.filters, 'alias');
109+
local filters = common.prometheus_query_filters(filters_obj);
110+
local metric = std.format('%starantool_instance_status', [cfg.metrics_prefix]);
111+
prometheus.target(
112+
expr=if filters == '' then metric else std.format('%s{%s}', [metric, filters]),
113+
format='table',
114+
instant=true,
115+
)
116+
else
117+
error 'InfluxDB target is not supported yet'
118+
) {
119+
options: { cellHeight: 'sm', showHeader: true },
120+
fieldConfig: {
121+
defaults: {
122+
custom: {
123+
align: 'auto',
124+
cellOptions: { type: 'auto' },
125+
footer: { reducers: [] },
126+
inspect: false,
127+
},
128+
mappings: [],
129+
thresholds: {
130+
mode: 'absolute',
131+
steps: [
132+
{ color: 'red', value: null },
133+
{ color: 'green', value: 1 },
134+
],
135+
},
136+
},
137+
overrides: [
138+
{
139+
matcher: { id: 'byName', options: 'status' },
140+
properties: [
141+
{ id: 'custom.cellOptions', value: { type: 'color-text' } },
142+
{
143+
id: 'mappings',
144+
value: [
145+
{
146+
type: 'value',
147+
options: {
148+
'0': { color: 'red', text: 'down' },
149+
'1': { color: 'green', text: 'alive' },
150+
},
151+
},
152+
{
153+
type: 'special',
154+
options: {
155+
match: 'nan',
156+
result: { color: 'red', text: 'unknown' },
157+
},
158+
},
159+
],
160+
},
161+
],
162+
},
163+
],
164+
},
165+
transformations: [
166+
{
167+
"id": "organize",
168+
"options": {
169+
"excludeByName": {
170+
"Time": true,
171+
"__name__": true,
172+
"job": true,
173+
"exported_job": true,
174+
"endpoint": true,
175+
"namespace": true,
176+
"pod": true,
177+
"service": true,
178+
"instance": true
179+
},
180+
"renameByName": {
181+
"alias": "coordinator uuid",
182+
"exported_instance": "instance",
183+
"Value": "status"
184+
}
185+
}
186+
},
187+
{
188+
"id": "organize",
189+
"options": {
190+
"indexByName": {
191+
"coordinator uuid": 0,
192+
"replicaset": 1,
193+
"instance": 2,
194+
"status": 3
195+
}
196+
}
197+
}
198+
],
199+
},
14200

15201
health_overview_table(
16202
cfg,

dashboard/section.libsonnet

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -59,6 +59,12 @@ local vinyl = import 'dashboard/panels/vinyl.libsonnet';
5959
cluster.election_term(cfg),
6060
],
6161

62+
failover_coordinator(cfg):: if cfg.type == variable.datasource_type.prometheus then [
63+
cluster.failover_coordinator_row,
64+
cluster.coordinators_status(cfg) { gridPos: { w: 10, h: 14, x: 0, y: 3 } },
65+
cluster.instances_seen_by_coordinators(cfg) { gridPos: { w: 14, h: 14, x: 10, y: 3 } },
66+
] else [],
67+
6268
cluster_cartridge(cfg):: if cfg.type == variable.datasource_type.prometheus then [
6369
// Must be used only in the top of a dashboard, overall stat panels use complicated layout
6470
cluster.row,

tests/Prometheus/dashboard_tarantool3.yml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@ filters:
88
alias: ['=~', '$alias']
99
sections:
1010
- cluster_tarantool3
11+
- failover_coordinator
1112
- replication_tarantool3
1213
- http
1314
- net

0 commit comments

Comments
 (0)