From b540dd98c1381d297eb6520a90c890bcf8c1592f Mon Sep 17 00:00:00 2001 From: Vladislav Grubov Date: Tue, 3 Oct 2023 19:12:27 +0300 Subject: [PATCH] feat: support config.enforce_ro() * This method is needed for autofailover mechanisms mainly to enforce read_only=true during loading phase of tarantool. Before this patch race condition existed between autofailover and moonlibs/config recovering behaviour. If master crashes but fastly restarts then it initiates long running loading phase. Master recovers as read_only=true but after returning from box.cfg moonlibs/config retrieves config from ETCD and rechecks read_only option. The race happens when autofailover changes configuration in ETCD, but master just in time returns from loading phase and applies oldest configuration. This leads cluster to split-brain. With method config.enforce_ro it is possible for external coordinator firstly enforce_ro on loading leader and receive approval that leader will not be promoted to rw until next reload configuration. tarantool is enforcable to be ro only when all of the following conditions are met: 1) Tarantool is recovering from snapshot (it was already bootstrapped) 2) Client's code do not override box.cfg with passing args.boxcfg 3) args.tidy_load is enabled (default, but can be overriden by client) 4) config uses ETCD to retreive topology. --- config.lua | 27 +++++++++++++++++++++++++-- 1 file changed, 25 insertions(+), 2 deletions(-) diff --git a/config.lua b/config.lua index d6a9862..5ffb25c 100644 --- a/config.lua +++ b/config.lua @@ -665,7 +665,7 @@ local function etcd_load( M, etcd_conf, local_cfg ) " with replication:"..table.concat(cfg.box.replication,", "), string.format("timeout: %s, quorum: %s, lag: %s", cfg.box.replication_connect_timeout - or ('def:%s'):format(load_cfg.default_cfg.replication_connect_quorum or 30), + or ('def:%s'):format(load_cfg.default_cfg.replication_connect_timeout or 30), cfg.box.replication_connect_quorum or 'def:full', cfg.box.replication_sync_lag or ('def:%s'):format(load_cfg.default_cfg.replication_sync_lag or 10) @@ -751,6 +751,7 @@ end ---@field public _load_cfg table ---@field public _flat table ---@field public _fencing_f? Fiber +---@field public _enforced_ro? boolean ---@operator call(moonlibs.config.opts): moonlibs.config ---@type moonlibs.config @@ -779,7 +780,18 @@ local M return end end - end + end, + enforce_ro = function() + if not M._ro_enforcable then + return false, 'cannot enforce readonly' + end + M._enforced_ro = true + return true, { + info_ro = box.info.ro, + cfg_ro = box.cfg.read_only, + enforce_ro = M._enforced_ro, + } + end, },{ ---Reinitiates moonlibs.config ---@param args moonlibs.config.opts @@ -877,6 +889,8 @@ local M -- subject to change, just a PoC local etcd_conf = args.etcd or cfg.etcd + -- we can enforce ro during recovery only if we have etcd config + M._ro_enforcable = M._ro_enforcable and etcd_conf ~= nil if etcd_conf then local s = clock.time() cfg = etcd_load(M, etcd_conf, cfg) @@ -912,6 +926,9 @@ local M return cfg end + -- We cannot enforce ro if any of theese conditions not satisfied + -- Tarantool must be bootstraping with tidy_load and do not overwraps personal boxcfg + M._ro_enforcable = args.boxcfg == nil and args.tidy_load and type(box.cfg) == 'function' local cfg = load_config() --[[@as table]] M._flat = flatten(cfg) @@ -1014,6 +1031,12 @@ local M log.info("Reloading config after start") local new_cfg = load_config() + if M._enforced_ro then + log.info("Enforcing RO (should be ro=%s) because told to", new_cfg.box.read_only) + new_cfg.box.read_only = true + end + M._enforced_ro = nil + M._ro_enforcable = false local diff_box = value_diff(cfg.box, new_cfg.box) -- since load_config loads config also for reloading it removes non-dynamic options