Skip to content

Commit c2cf7c7

Browse files
p0rtaleoleg-jukovec
authored andcommitted
Add tnt_cartridge_config_checksum metric
1 parent 1053f8c commit c2cf7c7

File tree

6 files changed

+95
-0
lines changed

6 files changed

+95
-0
lines changed

CHANGELOG.md

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,8 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
88

99
### Added
1010

11+
- `tnt_cartridge_config_checksum` metric.
12+
1113
### Changed
1214

1315
### Fixed

doc/monitoring/api_reference.rst

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -570,6 +570,7 @@ Metrics functions
570570
* ``luajit``
571571
* ``cartridge_issues``
572572
* ``cartridge_failover``
573+
* ``cartridge_config``
573574
* ``clock``
574575
* ``event_loop``
575576
* ``config``

doc/monitoring/metrics_reference.rst

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -414,6 +414,10 @@ Cartridge
414414
* - ``tnt_cartridge_failover_trigger_total``
415415
- Count of failover triggers in cluster.
416416

417+
* - ``tnt_cartridge_config_checksum``
418+
- Cartridge configuration checksum on the instance.
419+
Can be used to detect configuration divergence across cluster nodes.
420+
417421
.. _metrics-reference-luajit:
418422

419423
LuaJIT metrics

metrics/cartridge/config.lua

Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,28 @@
1+
local utils = require('metrics.utils')
2+
local collectors_list = {}
3+
4+
local function update()
5+
local is_cartridge = pcall(require, 'cartridge')
6+
if not is_cartridge then
7+
return
8+
end
9+
10+
local confapplier = require('cartridge.confapplier')
11+
local clusterwide_config = confapplier.get_active_config()
12+
if clusterwide_config ~= nil then
13+
collectors_list.config_checksum =
14+
utils.set_gauge(
15+
'cartridge_config_checksum',
16+
'Cartridge configuration checksum on the instance',
17+
clusterwide_config:get_checksum(),
18+
nil,
19+
nil,
20+
{default = true}
21+
)
22+
end
23+
end
24+
25+
return {
26+
update = update,
27+
list = collectors_list,
28+
}

metrics/tarantool.lua

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,7 @@ local default_metrics = {
2222
luajit = require('metrics.tarantool.luajit'),
2323
cartridge_issues = require('metrics.cartridge.issues'),
2424
cartridge_failover = require('metrics.cartridge.failover'),
25+
cartridge_config = require('metrics.cartridge.config'),
2526
clock = require('metrics.tarantool.clock'),
2627
event_loop = require('metrics.tarantool.event_loop'),
2728
config = require('metrics.tarantool.config'),

test/integration/cartridge_metrics_test.lua

Lines changed: 59 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -144,3 +144,62 @@ g.test_failover = function()
144144
g.cluster.main_server:start()
145145
g.cluster:wait_until_healthy()
146146
end
147+
148+
local function get_config_checksum(server)
149+
local resp = server:http_request('get', '/metrics')
150+
local checksum_metric = utils.find_metric('tnt_cartridge_config_checksum', resp.json)
151+
152+
t.assert(checksum_metric, 'tnt_cartridge_config_checksum metric should be present')
153+
t.assert_equals(#checksum_metric, 1)
154+
155+
local checksum = checksum_metric[1].value
156+
t.assert_type(checksum, 'number', 'Checksum should be a number')
157+
158+
return checksum
159+
end
160+
161+
g.test_config_checksum_match = function()
162+
local main_server = g.cluster:server('main')
163+
local replica_server = g.cluster:server('replica')
164+
165+
local initial_checksum = get_config_checksum(main_server)
166+
167+
main_server.net_box:eval([[
168+
local patch = require('cartridge').config_patch_clusterwide
169+
local ok, err = patch({
170+
['text'] = 'text',
171+
})
172+
]])
173+
174+
local updated_checksum = get_config_checksum(main_server)
175+
t.assert_not_equals(initial_checksum, updated_checksum, 'Config checksum should change after config update')
176+
177+
local replica_checksum = get_config_checksum(replica_server)
178+
t.assert_equals(replica_checksum, updated_checksum, 'Nodes should have the same config checksum')
179+
end
180+
181+
g.test_config_checksum_mismatch = function()
182+
local main_server = g.cluster:server('main')
183+
local replica_server = g.cluster:server('replica')
184+
185+
replica_server.net_box:eval([[
186+
local vars = require('cartridge.vars').new('cartridge.confapplier')
187+
__old_config = vars.clusterwide_config
188+
]])
189+
190+
main_server.net_box:eval([[
191+
local patch = require('cartridge').config_patch_clusterwide
192+
local ok, err = patch({
193+
['text'] = 'text',
194+
})
195+
]])
196+
197+
replica_server.net_box:eval([[
198+
local vars = require('cartridge.vars').new('cartridge.confapplier')
199+
vars.clusterwide_config = __old_config
200+
]])
201+
202+
local main_checksum = get_config_checksum(main_server)
203+
local replica_checksum = get_config_checksum(replica_server)
204+
t.assert_not_equals(main_checksum, replica_checksum, 'Nodes should have different config checksum')
205+
end

0 commit comments

Comments
 (0)