From 78326efb4638018231fd9b60891112235d54d089 Mon Sep 17 00:00:00 2001
From: Vladislav Grubov <orangechaton@gmail.com>
Date: Thu, 8 Dec 2022 18:05:30 +0300
Subject: [PATCH] auto step down when ETCD is not reachable

	* This patch introduces background fiber config._fencing_f
	which auto enables on rw nodes and watches ETCD
	* It is executed after on_after_cfg callback and watches
	  /<prefix>/clusters/<shard-name>/master path in ETCD during
	  etcd.fencing_timeout (default: 10s)
	* Fiber is enabled only in topology etcd.cluster.master and only
	  if etcd.fencing_enabled flag is specified (in conf.lua or in
	  	  common config)
	* Since network drops are indistinguishable from HTTP timeouts
	  after each :wait() time out, fencing rechecks ETCD via :list()
	  method
	* If nothing changed (99% chance) fiber will try again after
	  fencing_timeout
	* If another node is specified in ETCD, node will automatically
	  steps down (executes box.cfg{read_only=true}) and will wait
	  to become master
	* fencing fiber never returns node to be rw (it never calls
	  box.cfg{read_only=false} or package.reload())
	* to bring instance back rw you should manually call
	  package.reload() or use switchover.
---
 README.md               | 544 +++++++++++++++++++++++++++++++++++++---
 config.lua              | 197 +++++++++++++++
 test/Dockerfile         |   6 +
 test/app/conf.lua       |  19 ++
 test/app/init.lua       |  45 ++++
 test/docker-compose.yml |  60 +++++
 test/instance.etcd.yaml |  26 ++
 test/net/Makefile       |  22 ++
 test/net/README.md      |  81 ++++++
 9 files changed, 967 insertions(+), 33 deletions(-)
 create mode 100644 test/Dockerfile
 create mode 100644 test/app/conf.lua
 create mode 100644 test/app/init.lua
 create mode 100644 test/docker-compose.yml
 create mode 100644 test/instance.etcd.yaml
 create mode 100644 test/net/Makefile
 create mode 100644 test/net/README.md
diff --git a/README.md b/README.md
index 66ebaef..e5befb9 100644
--- a/README.md
+++ b/README.md
@@ -1,61 +1,539 @@
-Having conf.lua
+# Config
+
+Module to make proper initialization and configuration of tarantool instance.
+
+It can be used with or without ETCD.
+
+Only ETCD APIv2 now supported.
+
+## Status
+
+Ready for production use.
+
+Latest stable release: `config 0.6.0`.
+
+## Installation
+
+```bash
+tarantoolctl rocks --server=https://moonlibs.org install config 0.6.0
+```
+
+Starting with Tarantool 2.10.0 you may add configuration of moonlibs.org into `config-5.1.lua`
+
+```bash
+$ cat .rocks/config-5.1.lua
+rocks_servers = {
+   "https://moonlibs.org",
+   "http://moonlibs.github.io/rocks",
+   "http://rocks.tarantool.org/",
+   "http://luarocks.org/repositories/rocks"
+}
+```
+
+## Configuration
+
+To configure tarantool instance you must deploy `conf.lua` file.
+
+### Example of `conf.lua`
+
+Typically conf.lua should be located in `/etc/<app-name>/conf.lua`.
 
 ```lua
+assert(instance_name, "instance_name must be defined")
+etcd = {
+    instance_name = instance_name,
+    prefix = '/etcd/path/to/application/etcd',
+    endpoints = {
+        "https://etcd1:2379",
+        "https://etcd2:2379",
+        "https://etcd3:2379",
+    },
+    timeout = 3,
+    boolean_auto = true,
+    print_config = true,
+    login = 'etcd-username',
+    password = 'etcd-password',
+}
+
+-- This options will be passed as is to box.cfg
 box = {
-	work_dir           = '.';
-	pid_file           = 'box.pid';
-	custom_proc_title  = 'm1';
-	background         = false;
-	slab_alloc_arena   = 0.1;
-	--- Networking. Dynamic ---
-		listen = '127.0.0.1:3013',
-		readahead           = 65536,
+    pid_file  = '/var/run/tarantool/'..instance_name..'.pid',
+    memtx_dir = '/var/lib/tarantool/snaps/' .. instance_name,
+    wal_dir   = '/var/lib/tarantool/xlogs/' .. instance_name,
+    log_nonblock = false,
 }
-console = {
-	listen = '127.0.0.1:3016'
+
+--- You may hardcode options for your application in `app` section
+app = {
+
 }
-include 'app.lua'
 ```
 
-and app.lua:
+### Usage in `init.lua`
 
 ```lua
-app = {
-	pool = {
-		{ uri = '127.0.0.1:3013', zone = '1' };
-		{ uri = '127.0.0.2:3013', zone = '2' };
-		{ uri = '127.0.0.3:3013', zone = '3' };
-	}
+
+local instance_name = os.getenv('TT_INSTANCE_NAME')
+
+require 'config' {
+    mkdir = true,
+    instance_name = instance_name,
+    file = '/etc/<app-name>/conf.lua',
+    master_selection_policy = 'etcd.cluster.master',
+}
+
+print("Tarantool bootstrapped")
+```
+
+## Usage
+
+Module config is used both for bootstrap and configuration of your Tarantool application.
+
+In application you may access config options using following syntax
+
+```lua
+local DEFAULT_TIMEOUT = 3
+
+--- If app/http/timeout is defined in config (ETCD or conf.lua) then it will be returned
+--- otherwise value of DEFAULT_TIMEUOT will be returned
+local http_timeout = config.get('app.http.timeout', DEFAULT_TIMEOUT)
+
+--- If app/is_enabled is not defined then `nil` will be returned.
+local is_enabled = config.get('app.is_enabled')
+```
+
+## Topologies
+
+`moonlibs/config` supports different types of Tarantool topologies.
+
+All of them make sence when application is configured using ETCD.
+
+To distinguish application topology option `master_selection_policy` is used.
+
+### Single-shard topology
+
+In most cases you need single shard topology. It means, that your application has single master and many replicas.
+
+Shard will be configured with full-mesh topology. Read more about full-mesh topology on [Tarantool website](https://www.tarantool.io/en/doc/latest/concepts/replication/repl_architecture/).
+
+Each instance of application must have unique name. For example:
+
+- `userdb_001`
+- `userdb_002`
+- `userdb_003`
+
+Typically instance name **should not** contain `master` or `replica` word in it.
+
+#### Example of `init.lua`
+
+```lua
+--- variable instance_name must be derived somehow for each tarantool instance
+--- For example from name of the file. or from environment variable
+require 'config' {
+    mkdir = true,
+    instance_name = instance_name,
+    file = '/etc/userdb/conf.lua',
+    master_selection_policy = 'etcd.cluster.master',
+}
+```
+
+#### Example of `/etc/userdb/conf.lua`
+
+```lua
+assert(instance_name, "instance_name must be defined")
+etcd = {
+    instance_name = instance_name,
+    prefix = '/tarantool/userdb',
+    endpoints = {
+        "https://etcd1:2379",
+        "https://etcd2:2379",
+        "https://etcd3:2379",
+    },
+    timeout = 3,
+    boolean_auto = true,
+    print_config = true,
 }
 
+-- This options will be passed as is to box.cfg
+box = {
+    pid_file  = '/var/run/tarantool/'..instance_name..'.pid',
+    memtx_dir = '/var/lib/tarantool/snaps/' .. instance_name,
+    wal_dir   = '/var/lib/tarantool/xlogs/' .. instance_name,
+    log_nonblock = false,
+}
+```
+
+#### Example of ETCD configuration (`etcd.cluster.master`)
+
+```yaml
+tarantool:
+  userdb:
+    clusters:
+      userdb:
+        master: userdb_001
+        replicaset_uuid: 045e12d8-0001-0000-0000-000000000000
+    common:
+      box:
+        log_level: 5
+        memtx_memory: 268435456
+    instances:
+      userdb_001:
+        cluster: userdb
+        box:
+          instance_uuid: 045e12d8-0000-0001-0000-000000000000
+          listen: 10.0.1.11:3301
+      userdb_002:
+        cluster: userdb
+        box:
+          instance_uuid: 045e12d8-0000-0002-0000-000000000000
+          listen: 10.0.1.12:3302
+      userdb_003:
+        cluster: userdb
+        box:
+          instance_uuid: 045e12d8-0000-0003-0000-000000000000
+          listen: 10.0.1.13:3303
+```
+
+`/tarantool/userdb` -- is root path for application configuration
+
+`/tarantool/userdb/common` -- is common configuration for each instance of application.
+
+`/tarantool/userdb/common/box` -- is section to configure box.cfg parameters. See more on [Tarantool website](https://www.tarantool.io/en/doc/latest/reference/configuration).
+
+`/tarantool/userdb/clusters` section contains list of shards. For single-shard application it is good to single shard it as application itself.
+
+`/tarantool/userdb/instances` section contains instance-specific configuration. It must contain `/box/{listen,instance_uuid}` and `cluster` options.
+
+##### Configuration precedence
+
+- /etc/app-name/conf.lua
+- ETCD:/instances/<instance_name>
+- ETCD:/common/
+- config.get default value
+
+#### Fencing configuration
+
+`etcd.cluster.master` topology supports auto fencing mechanism.
+
+Auto fencing is implemented via background fiber which waits for changes on `<prefix>/clusters/<my-shard-name>` directory.
+
+There are 4 parameters to configure:
+
+| Parameter                        | Description                           | Default Value       |
+|----------------------------------|---------------------------------------|---------------------|
+| `etcd/fencing_enabled`           | Trigger to enable/disable fencing     | `false`             |
+| `etcd/fencing_timeout`           | Fencing timeout                       | `10` (seconds)      |
+| `etcd/fencing_pause`             | Fencing pause                         | `fencing_timeout/2` |
+| `etcd/fencing_check_replication` | Respect replication when ETCD is down | `false`             |
+
+Example of enabled fencing:
+
+```yaml
+tarantool:
+  userdb:
+    common:
+      etcd:
+        fencing_enabled: true
 ```
 
-in init.lua
+Fencing also can be enabled in `conf.lua`:
 
 ```lua
-local conf = require 'config' ('conf.lua') -- call to conf loads config
+etcd = {
+    endpoints = {"http://etcd1:2379", "http://etcd2:2379", "http://etcd3:2379"},
+    prefix = "/tarantool/userdb",
+    timeout = 3,
+    fencing_enabled = true,
+}
+```
+
+#### Fencing algorithm
+
+Fencing can be enabled only for topology `etcd.cluster.master` and only if `etcd/fencing_enabled` is `true` (default: `false`).
 
-local pool = conf.get('app.pool',{})
+Fencing algorithm is the following:
+
+0. Wait until instance became `rw`.
+1. Wait randomized `fencing_pause` (fencing_pause ± 500ms).
+2. Recheck ETCD `<prefix>/clusters/<my-shard-name>` in `fencing_timeout`.
+3. Depends on response:
+    1. [ETCD is ok] => provision self to be `rw` for next `fencing_timeout` seconds. Go to `1.`
+    2. [ETCD is down] => execute `box.cfg{read_only=true}` if `etcd/fencing_check_replication` is disabled. Go to `0.`
+    3. [ETCD has another master, and switching in progress] => do nothing. Go to `1.`
+    4. [ETCD has another master, and switching is not in progress] => execute `box.cfg{read_only=true}`. Go to `0.`
+
+**Note:** to request ETCD Quorum Reads are used. So it is safe to use it in split brain.
+
+### Multi-proxy topology (etcd.instance.single)
+
+`moonlibs/config` supports multi proxy topology. This topology is usefull when you need to have many stateless tarantool proxies or totally independent masters.
+
+Each instance **should** have unique name. For example:
+
+- proxy_001
+- proxy_002
+- proxy_003
+- proxy_004
+- proxy_005
+
+#### Example of proxy `init.lua`
+
+```lua
+--- variable instance_name must be derived somehow for each tarantool instance
+--- For example from name of the file. or from environment variable
+require 'config' {
+    mkdir = true,
+    instance_name = instance_name,
+    file = '/etc/proxy/conf.lua',
+    master_selection_policy = 'etcd.instance.single',
+}
 ```
 
-or anywhere in application module
+#### Example of `/etc/proxy/conf.lua`
 
 ```lua
-local conf = require 'config'
+assert(instance_name, "instance_name must be defined")
+etcd = {
+    instance_name = instance_name,
+    prefix = '/tarantool/proxy',
+    endpoints = {
+        "https://etcd1:2379",
+        "https://etcd2:2379",
+        "https://etcd3:2379",
+    },
+    timeout = 3,
+    boolean_auto = true,
+    print_config = true,
+}
+
+-- This options will be passed as is to box.cfg
+box = {
+    pid_file  = '/var/run/tarantool/'..instance_name..'.pid',
+    memtx_dir = '/var/lib/tarantool/snaps/' .. instance_name,
+    wal_dir   = '/var/lib/tarantool/xlogs/' .. instance_name,
+    log_nonblock = false,
+}
+```
 
-local pool = conf.get('app.pool',{})
+#### Example of ETCD configuration (`etcd.instance.single`)
+
+```yaml
+tarantool:
+  proxy:
+    common:
+      box:
+        log_level: 5
+        memtx_memory: 33554432
+    instances:
+      proxy_001:
+        box:
+          instance_uuid: 01712087-0000-0001-0000-000000000000
+          listen: 10.0.2.12:7101
+      proxy_002:
+        box:
+          instance_uuid: 01712087-0000-0002-0000-000000000000
+          listen: 10.0.2.13:7102
+      proxy_003:
+        box:
+          instance_uuid: 01712087-0000-0003-0000-000000000000
+          listen: 10.0.2.11:7103
 ```
 
-then we could run
+The etcd configuration is the same as `etcd.cluster.master` except that `/tarantool/proxy/clusters` is not defined.
+
+Also `/tarantool/proxy/instances/<instance-name>/cluster` **must not** be defined.
+
+### Multi-shard topology for custom sharding (`etcd.cluster.master`)
+
+`etcd.cluster.master` can be used for multi-shard topologies as well.
 
-```sh
-tarantool init.lua
-# runs tarantool with conf.lua
+Multi-shard means that application consists of several replicasets. Each replicaset has single master and several replicas.
+
+`conf.lua` and `init.lua` files remains exactly the same. But configuration of ETCD slightly changes:
+
+```yaml
+tarantool:
+  notifications:
+    clusters:
+      notifications_002:
+        master: notifications_002_01
+        replicaset_uuid: 11079f9c-0002-0000-0000-000000000000
+      notifications_001:
+        master: notifications_001_01
+        replicaset_uuid: 11079f9c-0001-0000-0000-000000000000
+    common:
+      box:
+        log_level: 5
+        memtx_memory: 268435456
+    instances:
+      notifications_001_01:
+        cluster: notifications_001
+        box:
+          instance_uuid: 11079f9c-0001-0001-0000-000000000000
+          listen: 10.0.3.11:4011
+      notifications_001_02:
+        cluster: notifications_001
+        box:
+          instance_uuid: 11079f9c-0001-0002-0000-000000000000
+          listen: 10.0.3.12:4012
+      notifications_002_01:
+        cluster: notifications_002
+        box:
+          instance_uuid: 11079f9c-0002-0001-0000-000000000000
+          listen: 10.0.3.11:4021
+      notifications_002_02:
+        cluster: notifications_002
+        box:
+          instance_uuid: 11079f9c-0002-0002-0000-000000000000
+          listen: 10.0.3.12:4022
 ```
 
-or
+This configuration describes configuration of application `notifications` with 2 replicasets `notifications_001` and `notifications_002`.
+
+Shard `notifications_001` contains 2 nodes:
+
+- `notifications_001_01` - described as master
+- `notifications_001_02`
 
-```sh
-tarantool -c cf1.lua init.lua
-# runs tarantool with cf1.lua
+Shard `notifications_002` contains 2 nodes:
+
+- `notifications_002_01` - described as master
+- `notifications_002_02`
+
+### Multi-shard topology for vshard-based applications (`etcd.cluster.vshard`)
+
+In most cases for multi-shard applications it is better to use module [tarantool/vshard](https://www.tarantool.io/en/doc/latest/concepts/sharding).
+
+vshard required to be properly configured. Each instance of the cluster must contain the same view of cluster topology.
+
+vshard application has 2 groups of instances: storages (data nodes) and routers (stateless proxy nodes).
+
+#### Example of ETCD configuration for vshard-based applications (`etcd.cluster.vshard`)
+
+```yaml
+tarantool:
+  profile:
+    common:
+      vshard:
+        bucket_count: 30000
+      box:
+        log_level: 5
+        replication_connect_quorum: 2
+    clusters:
+      profile_001:
+        master: profile_001_01
+        replicaset_uuid: 17120f91-0001-0000-0000-000000000000
+      profile_002:
+        master: profile_002_01
+        replicaset_uuid: 17120f91-0002-0000-0000-000000000000
+    instances:
+      profile_001_01:
+        cluster: profile_001
+        box:
+          instance_uuid: 17120f91-0001-0001-0000-000000000000
+          listen: 10.0.4.11:4011
+      profile_001_02:
+        cluster: profile_001
+        box:
+          instance_uuid: 17120f91-0001-0002-0000-000000000000
+          listen: 10.0.4.12:4012
+      profile_002_01:
+        cluster: profile_002
+        box:
+          instance_uuid: 17120f91-0002-0001-0000-000000000000
+          listen: 10.0.4.11:4021
+      profile_002_02:
+        cluster: profile_002
+        box:
+          instance_uuid: 17120f91-0002-0002-0000-000000000000
+          listen: 10.0.4.12:4022
+      router_001:
+        router: true
+        box:
+          instance_uuid: 12047e12-0000-0001-0000-000000000000
+          listen: 10.0.5.12:7001
+      router_002:
+        router: true
+        box:
+          instance_uuid: 12047e12-0000-0002-0000-000000000000
+          listen: 10.0.5.13:7002
+      router_003:
+        router: true
+        box:
+          instance_uuid: 12047e12-0000-0003-0000-000000000000
+          listen: 10.0.5.11:7003
 ```
+
+#### Example of vshard-based init.lua (`etcd.cluster.vshard`)
+
+The code of simultanious bootstrap is tricky, and short safe version of it listed below
+
+```lua
+local fun = require 'fun'
+--- variable instance_name must be derived somehow for each tarantool instance
+--- For example from name of the file. or from environment variable
+require 'config' {
+    mkdir = true,
+    instance_name = instance_name,
+    file  = '/etc/profile/conf.lua',
+    master_selection_policy = 'etcd.cluster.vshard',
+    on_load = function(conf, cfg)
+        -- on_load is called each time right after fetching data from ETCD
+        local all_cfg = conf.etcd:get_all()
+
+        -- Construct vshard/sharding table from ETCD
+        cfg.sharding = fun.iter(all_cfg.clusters)
+            :map(function(shard_name, shard_info)
+                return shard_info.replicaset_uuid, {
+                    replicas = fun.iter(all_cfg.instances)
+                        :grep(function(instance_name, instance_info)
+                            return instance_info.cluster == shard_name
+                        end)
+                        :map(function(instance_name, instance_info)
+                            return instance_info.box.instance_uuid, {
+                                name   = instance_name,
+                                uri    = 'guest:@'..instance_info.box.listen,
+                                master = instance_name == shard_info.master,
+                            }
+                        end)
+                        :tomap()
+                }
+            end)
+            :tomap()
+    end,
+    on_after_cfg = function(conf, cfg)
+        -- on_after_cfg is once after returning from box.cfg (Tarantool is already online)
+        if cfg.cluster then
+            vshard.storage.cfg({
+                sharding = cfg.sharding,
+                bucket_count = config.get('vshard.bucket_count'),
+            }, box.info.uuid)
+        end
+        if cfg.router then
+            vshard.router.cfg({
+                sharding = cfg.sharding,
+                bucket_count = config.get('vshard.bucket_count'),
+            })
+        end
+    end,
+}
+```
+
+#### VShard Maintenance
+
+By default vshard does not support master auto discovery. If you switch master in any replicaset you have to reconfigure routers as well.
+
+With vshard topology it is strongly recommended to use [package.reload](https://github.com/moonlibs/package-reload). Module must be required before first require of `config`.
+
+```lua
+require 'package.reload'
+-- ....
+require 'config' {
+    -- ...
+}
+-- ...
+```
+
+It is good to use [switchover](https://gitlab.com/ochaton/switchover) to maintenance sharded applications.
+
+To get used to vshard please read getting started of it [Sharding with Vshard](https://www.tarantool.io/en/doc/latest/book/admin/vshard_admin/#vshard-install)
diff --git a/config.lua b/config.lua
index 9577f0d..07b62c3 100644
--- a/config.lua
+++ b/config.lua
@@ -3,6 +3,7 @@ local fio = require 'fio'
 local json = require 'json'
 local yaml = require 'yaml'
 local digest = require 'digest'
+local fiber  = require 'fiber'
 json.cfg{ encode_invalid_as_nil = true }
 
 local function lookaround(fun)
@@ -922,6 +923,202 @@ local M
 			end
 			-- print(string.format("Box configured"))
 
+			local msp = config.get('sys.master_selection_policy')
+			if type(cfg.etcd) == 'table'
+				and config.get('etcd.fencing_enabled')
+				and msp == 'etcd.cluster.master'
+				and type(cfg.cluster) == 'string' and cfg.cluster ~= ''
+			then
+				M._fencing_f = fiber.create(function()
+					fiber.name('config/fencing')
+					fiber.yield() -- yield execution
+					local function in_my_gen() fiber.testcancel() return config._fencing_f == fiber.self() end
+					assert(cfg.cluster, "cfg.cluster must be defined")
+
+					local watch_path = fio.pathjoin(
+						config.get('etcd.prefix'),
+						'clusters',
+						cfg.cluster
+					)
+
+					local my_name = assert(config.get('sys.instance_name'), "instance_name is not defined")
+					local fencing_timeout = config.get('etcd.fencing_timeout', 10)
+					local fencing_pause = config.get('etcd.fencing_pause', fencing_timeout/2)
+					local fencing_check_replication = config.get('etcd.fencing_check_replication')
+					if type(fencing_check_replication) == 'string' then
+						fencing_check_replication = fencing_check_replication == 'true'
+					else
+						fencing_check_replication = fencing_check_replication == true
+					end
+
+					local etcd_cluster, watch_index
+
+					local function refresh_list()
+						local result, resp = config.etcd:list(watch_path)
+						if resp.status == 200 then
+							etcd_cluster = result
+							if type(resp.headers) == 'table'
+								and tonumber(resp.headers['x-etcd-index'])
+								and tonumber(resp.headers['x-etcd-index']) > (tonumber(watch_index) or 0)
+							then
+								watch_index = tonumber(resp.headers['x-etcd-index'])
+							end
+						end
+						return etcd_cluster, watch_index
+					end
+
+					local function fencing_check(deadline)
+						local timeout = math.min(deadline-fiber.time(), fencing_timeout)
+						local check_started = fiber.time()
+						local pcall_ok, err_or_resolution, new_cluster = pcall(function()
+							local not_timed_out, response = config.etcd:wait(watch_path, {
+								index = watch_index,
+								timeout = timeout,
+							})
+
+							-- http timed out / our network drop - we'll never know
+							if not not_timed_out then return 'timeout' end
+							local res = json.decode(response.body)
+
+							if type(response.headers) == 'table'
+								and tonumber(response.headers['x-etcd-index'])
+								and tonumber(response.headers['x-etcd-index']) > watch_index
+							then
+								watch_index = tonumber(response.headers['x-etcd-index'])
+							end
+
+							if res.node then
+								return 'changed', config.etcd:recursive_extract(watch_path, res.node)
+							end
+						end)
+
+						if not pcall_ok then
+							log.warn("ETCD watch failed: %s", err_or_resolution)
+						end
+
+						if err_or_resolution ~= 'changed' then
+							new_cluster = nil
+						end
+
+						if not new_cluster then
+							deadline = deadline+fencing_timeout
+							while fiber.time() < deadline and in_my_gen() do
+								local ok, e_cluster = pcall(refresh_list)
+								if ok and e_cluster then
+									new_cluster = e_cluster
+									break
+								end
+								if not in_my_gen() then return end
+								fiber.sleep(fencing_pause / 10)
+							end
+						end
+
+						if not in_my_gen() then return end
+
+						if type(new_cluster) ~= 'table' then -- ETCD is down
+							log.warn('[fencing] ETCD %s is not discovered in etcd during %s seconds',
+								watch_path, fiber.time()-check_started)
+
+							if not fencing_check_replication then
+								return false
+							end
+
+							-- In proper fencing we must step down immediately as soon as we discover
+							-- that coordinator is down. But in real world there are some circumstances
+							-- when coordinator can be down for several seconds if someone crashes network
+							-- or ETCD itself.
+							-- We propose that it is safe to not step down as soon as we are connected to all
+							-- replicas in replicaset (etcd.cluster.master is fullmesh topology).
+							-- We do not check downstreams here, because downstreams cannot lead to collisions.
+							-- It at least 1 upstream is not in status follow
+							-- (Tarantool replication checks with tcp-healthchecks once in box.cfg.replication_timeout)
+							-- We immediately stepdown.
+							for _, ru in pairs(box.info.replication) do
+								if ru.id ~= box.info.id and ru.upstream then
+									if ru.upstream.status ~= "follow" then
+										log.warn("[fencing] upstream %s is not followed by me %s:%s (idle: %s, lag:%s)",
+											ru.upstream.peer, ru.upstream.status, ru.upstream.message,
+											ru.upstream.idle, ru.upstream.lag
+										)
+										return false
+									end
+								end
+							end
+
+							log.warn('[fencing] ETCD is down but all upstreams are followed by me. Continuing leadership')
+							return true
+						elseif new_cluster.master == my_name then
+							-- The most commmon branch. We are registered as the leader.
+							return true
+						elseif new_cluster.switchover then -- new_cluster.master ~= my_name
+							-- Another instance is the leader in ETCD. But we could be the one
+							-- who will be the next (cluster is under switching right now).
+							-- It is almost impossible to get this path in production. But the only one
+							-- protection we have is `fencing_pause` and `fencing_timeout`.
+							-- So, we will do nothing until ETCD mutex is present
+							log.warn('[fencing] It seems that cluster is under switchover right now %s', json.encode(new_cluster))
+							-- (if we are ro -- then we must end the loop)
+							-- (if we are rw -- then we must continue the loop)
+							return not box.info.ro
+						else
+							log.warn('[fencing] ETCD %s/master is %s not us. Stepping down', watch_path, new_cluster.master)
+							return false
+						end
+					end
+
+					if not pcall(refresh_list) then
+						log.warn("etcd list failed")
+					end
+					log.info("etcd_master is %s (index: %s)", json.encode(etcd_cluster), watch_index)
+
+					-- Main fencing loop
+					-- It is executed on every replica in the shard
+					-- if instance is ro then it will wait until instance became rw
+					while in_my_gen() do
+						-- Wait until instance became rw loop
+						while box.info.ro and in_my_gen() do
+							-- this is just fancy sleep.
+							-- if node became rw in less than 3 seconds we will check it immediately
+							pcall(box.ctl.wait_rw, 3)
+						end
+
+						-- after waiting to be rw we will step into fencing-loop
+						-- we must check that we are still in our code generation
+						-- to proceed
+						if not in_my_gen() then return end
+
+						-- we will not step down until deadline.
+						local deadline = fiber.time()+fencing_timeout
+						repeat
+							-- Before ETCD check we better pause
+							-- we do a little bit randomized sleep to not spam ETCD
+							fiber.sleep(math.random(math.max(0.5, fencing_pause-0.5), fencing_pause+0.5))
+							-- After each yield we have to check that we are still in our generation
+							if not in_my_gen() then return end
+
+							-- some one makes us readonly. There no need to check ETCD
+							-- we break from this loop immediately
+							if box.info.ro then break end
+
+							-- fencing_check(deadline) if it returns true,
+							-- then we update leadership leasing
+							if fencing_check(deadline) then
+								-- update deadline.
+								deadline = fiber.time()+fencing_timeout
+							end
+
+							if not in_my_gen() then return end
+						until box.info.ro or fiber.time() > deadline
+
+						-- We have left deadline-loop. It means that fencing is required
+						if not box.info.ro then
+							log.warn('[fencing] Performing self fencing (box.cfg{read_only=true})')
+							box.cfg{read_only=true}
+						end
+					end
+				end)
+			end
+
 			return M
 		end
 	})
diff --git a/test/Dockerfile b/test/Dockerfile
new file mode 100644
index 0000000..ac744bc
--- /dev/null
+++ b/test/Dockerfile
@@ -0,0 +1,6 @@
+FROM tarantool/tarantool:2.10
+RUN apk add --no-cache -u iproute2 make bind-tools
+
+WORKDIR /opt/tarantool
+
+CMD ["tarantool" "/opt/tarantool/init.lua"]
\ No newline at end of file
diff --git a/test/app/conf.lua b/test/app/conf.lua
new file mode 100644
index 0000000..044e133
--- /dev/null
+++ b/test/app/conf.lua
@@ -0,0 +1,19 @@
+etcd = {
+	instance_name = os.getenv("TT_INSTANCE_NAME"),
+	prefix = '/instance',
+	endpoints = {"http://etcd:2379"},
+	fencing_enabled = true,
+}
+
+box = {
+	background = false,
+	log_level = 6,
+	log_format = 'plain',
+
+	memtx_dir = '/var/lib/tarantool/snaps/',
+	wal_dir = '/var/lib/tarantool/xlogs',
+}
+
+app = {
+
+}
\ No newline at end of file
diff --git a/test/app/init.lua b/test/app/init.lua
new file mode 100644
index 0000000..295817d
--- /dev/null
+++ b/test/app/init.lua
@@ -0,0 +1,45 @@
+local fiber = require "fiber"
+
+require 'config' {
+	mkdir = true,
+	print_config = true,
+	instance_name = os.getenv("TT_INSTANCE_NAME"),
+	file = 'conf.lua',
+	master_selection_policy = 'etcd.cluster.master',
+
+	on_after_cfg = function()
+		if not box.info.ro then
+			box.schema.user.grant('guest', 'super', nil, nil, { if_not_exists = true })
+
+			box.schema.space.create('T', {if_not_exists = true})
+			box.space.T:create_index('I', { if_not_exists = true })
+		end
+	end,
+}
+
+fiber.create(function()
+	fiber.name('pusher')
+
+	while true do
+		repeat
+			pcall(box.ctl.wait_rw, 3)
+			fiber.testcancel()
+		until not box.info.ro
+
+		local fibers = {}
+		for _ = 1, 10 do
+			local f = fiber.create(function()
+				fiber.self():set_joinable(true)
+				for i = 1, 100 do
+					box.space.T:replace{i, box.info.id, box.info.vclock}
+				end
+			end)
+			table.insert(fibers, f)
+		end
+
+		for _, f in ipairs(fibers) do
+			f:join()
+		end
+	end
+end)
+
diff --git a/test/docker-compose.yml b/test/docker-compose.yml
new file mode 100644
index 0000000..4833b68
--- /dev/null
+++ b/test/docker-compose.yml
@@ -0,0 +1,60 @@
+version: "3"
+
+x-etcd: &etcd
+  image: quay.io/coreos/etcd:v2.3.8
+  container_name: etcd
+  networks:
+    - tarantool
+  environment:
+    ETCD_LISTEN_PEER_URLS: http://0.0.0.0:2380
+    ETCD_LISTEN_CLIENT_URLS: http://0.0.0.0:2379
+    ETCDCTL_API: 2
+    ETCD_INITIAL_CLUSTER_TOKEN: etcd-cluster
+    ETCD_INITIAL_CLUSTER: etcd=http://etcd:2380
+    ETCD_NAME: etcd
+    ETCD_ADVERTISE_CLIENT_URLS: http://etcd:2379
+    ETCD_INITIAL_ADVERTISE_PEER_URLS: http://etcd:2380
+
+x-tt: &tt
+  build: .
+  volumes:
+    - $PWD/../:/opt/tarantool/.rocks/share/tarantool:ro
+    - $PWD/app:/opt/tarantool
+    - $PWD/net:/opt/tarantool/net:ro
+  depends_on:
+    etcd:
+      condition: service_started
+  privileged: true
+  networks:
+    - tarantool
+  command: ["/bin/sh", "-c", "sleep 5 && tarantool /opt/tarantool/init.lua"]
+
+networks:
+  tarantool:
+    name: tt_net
+    driver: bridge
+
+services:
+  etcd:
+    <<: *etcd
+  etcd_load:
+    image: registry.gitlab.com/ochaton/switchover:010a6965
+    networks:
+      - tarantool
+    volumes:
+      - $PWD/instance.etcd.yaml:/instance.etcd.yaml:ro
+    depends_on:
+      etcd:
+        condition: service_started
+    entrypoint: ['']
+    command: ["/bin/sh", "-c", "sleep 3 && switchover -v -e http://etcd:2379 etcd load / /instance.etcd.yaml"]
+  instance_01:
+    <<: *tt
+    container_name: instance_01
+    environment:
+      TT_INSTANCE_NAME: instance_01
+  instance_02:
+    <<: *tt
+    container_name: instance_02
+    environment:
+      TT_INSTANCE_NAME: instance_02
diff --git a/test/instance.etcd.yaml b/test/instance.etcd.yaml
new file mode 100644
index 0000000..3e0768b
--- /dev/null
+++ b/test/instance.etcd.yaml
@@ -0,0 +1,26 @@
+---
+instance:
+  clusters:
+    instance:
+      master: instance_01
+      replicaset_uuid: 91157a11-0001-0000-0000-000000000000
+  common:
+    etcd:
+      fencing_timeout: 5
+      fencing_pause: 3
+    box:
+      replication_connect_quorum: 1
+      log_level: 5
+      memtx_memory: 268435456
+  instances:
+    instance_01:
+      cluster: instance
+      box:
+        instance_uuid: 91157a11-0000-0001-0000-000000000000
+        listen: instance_01:3301
+    instance_02:
+      cluster: instance
+      box:
+        instance_uuid: 91157a11-0000-0002-0000-000000000000
+        listen: instance_02:3302
+...
diff --git a/test/net/Makefile b/test/net/Makefile
new file mode 100644
index 0000000..84df839
--- /dev/null
+++ b/test/net/Makefile
@@ -0,0 +1,22 @@
+setup:
+	tc qdisc add dev eth0 root handle 1: prio
+	tc qdisc add dev eth0 parent 1:3 handle 10: netem loss 100%
+
+offline-dport-%:
+	tc filter add dev eth0 parent 1: protocol ip prio 1 u32 match ip dport $* 0xffff flowid 1:3
+
+offline-dst-%:
+	tc filter add dev eth0 parent 1: protocol ip prio 1 u32 match ip dst $(shell host -T4 $* | cut -f 4 -d' ') flowid 1:3
+
+online:
+	tc filter del dev eth0 parent 1: protocol ip pref 1 u32
+
+filter:
+	tc -s -d filter show dev eth0
+
+qdisc:
+	tc -d -s qdisc show dev eth0
+
+clear:
+	tc fliter del dev eth0 parent 1:
+	tc qdisc del dev eth0 root
diff --git a/test/net/README.md b/test/net/README.md
new file mode 100644
index 0000000..cadbba9
--- /dev/null
+++ b/test/net/README.md
@@ -0,0 +1,81 @@
+# Split-Brain test toolchain
+
+## Run
+
+```bash
+$ pwd
+config/test
+
+$ docker compose up --build
+```
+
+## Prepare
+
+### Prepare instance_01
+
+```bash
+docker exec -it instance_001 /bin/sh
+
+# make setup must be executed only once per container
+/opt/tarantool $ make -C net setup
+```
+
+### Prepare instance_02
+
+```bash
+docker exec -it instance_002 /bin/sh
+
+# make setup must be executed only once per container
+/opt/tarantool $ make -C net setup
+```
+
+## Make online
+
+```bash
+docker exec -it instance_01 /bin/sh
+
+/opt/tarantool $ make -C net online
+```
+
+## Isolation
+
+### Isolate instance_01 against instance_02
+
+```bash
+docker exec -it instance_01 /bin/sh
+
+/opt/tarantool $ make -C net offline-dst-instance_02
+```
+
+### Isolate instance_01 against etcd
+
+```bash
+docker exec -it instance_01 /bin/sh
+
+/opt/tarantool $ make -C net offline-dst-etcd
+```
+
+### Total instance_01 isolation
+
+```bash
+docker exec -it instance_01 /bin/sh
+
+/opt/tarantool $ make -C net offline-dst-instance_02
+/opt/tarantool $ make -C net offline-dst-etcd
+```
+
+### Split brain instance_01 / instance_02
+
+```bash
+docker exec -it instance_01 /bin/sh
+
+/opt/tarantool $ make -C net offline-dst-instance_02
+/opt/tarantool $ make -C net offline-dst-autofailover-2
+```
+
+```bash
+docker exec -it instance_02 /bin/sh
+
+/opt/tarantool $ make -C net offline-dst-instance_01
+/opt/tarantool $ make -C net offline-dst-autofailover-1
+```