Skip to content

Commit a433c74

Browse files
committed
fix rebalance deadlock 3
Signed-off-by: Leonid Chernin <leonidc@il.ibm.com>
1 parent 9d3a6da commit a433c74

File tree

3 files changed

+31
-8
lines changed

3 files changed

+31
-8
lines changed

control/cephutils.py

Lines changed: 9 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,7 @@ def __init__(self, config):
2525
self.anagroup_list = []
2626
self.rebalance_supported = False
2727
self.rebalance_ana_group = 0
28+
self.num_gws = 0
2829
self.last_sent = time.time()
2930

3031
def execute_ceph_monitor_command(self, cmd):
@@ -58,9 +59,12 @@ def is_rebalance_supported(self):
5859
def get_rebalance_ana_group(self):
5960
return self.rebalance_ana_group
6061

61-
def get_number_created_gateways(self, pool, group):
62+
def get_num_gws(self):
63+
return self.num_gws
64+
65+
def get_number_created_gateways(self, pool, group, caching=True):
6266
now = time.time()
63-
if (now - self.last_sent) < 10 and self.anagroup_list:
67+
if caching and ((now - self.last_sent) < 10) and self.anagroup_list:
6468
self.logger.info(f"Caching response of the monitor: {self.anagroup_list}")
6569
return self.anagroup_list
6670
else:
@@ -77,7 +81,9 @@ def get_number_created_gateways(self, pool, group):
7781
data = json.loads(conv_str)
7882
self.rebalance_supported = True
7983
self.rebalance_ana_group = data.get("rebalance_ana_group", None)
80-
self.logger.debug(f"Rebalance ana_group: {self.rebalance_ana_group}")
84+
self.num_gws = data.get("num gws", None)
85+
self.logger.info(f"Rebalance ana_group: {self.rebalance_ana_group},\
86+
num-gws: {self.num_gws} ")
8187
else:
8288
self.rebalance_supported = False
8389
pos = conv_str.find("[")

control/grpc.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1908,7 +1908,7 @@ def namespace_change_load_balancing_group_safe(self, request, context):
19081908
# the local rebalance logic.
19091909
if context:
19101910
grps_list = self.ceph_utils.get_number_created_gateways(
1911-
self.gateway_pool, self.gateway_group)
1911+
self.gateway_pool, self.gateway_group, False)
19121912
if request.anagrpid not in grps_list:
19131913
self.logger.debug(f"Load balancing groups: {grps_list}")
19141914
errmsg = f"{change_lb_group_failure_prefix}: Load balancing group " \

control/rebalance.py

Lines changed: 21 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,7 @@ def __init__(self, gateway_service):
3030
"gateway",
3131
"max_ns_to_change_lb_grp",
3232
8)
33+
self.last_scale_down_ts = time.time()
3334
self.rebalance_event = threading.Event()
3435
self.logger.info(f" Starting rebalance thread: period: {self.rebalance_period_sec},"
3536
f" max number ns to move: {self.rebalance_max_ns_to_change_lb_grp}")
@@ -102,12 +103,13 @@ def find_min_loaded_group_in_subsys(self, nqn, grp_list) -> int:
102103
# and reballance results will be accurate. Monitor in nvme-gw show response publishes the
103104
# index of ANA group that is currently responsible for rebalance
104105
def rebalance_logic(self, request, context) -> int:
106+
now = time.time()
105107
worker_ana_group = self.ceph_utils.get_rebalance_ana_group()
106108
self.logger.debug(f"Called rebalance logic: current rebalancing ana "
107109
f"group {worker_ana_group}")
108110
ongoing_scale_down_rebalance = False
109111
grps_list = self.ceph_utils.get_number_created_gateways(self.gw_srv.gateway_pool,
110-
self.gw_srv.gateway_group)
112+
self.gw_srv.gateway_group, False)
111113
if not self.ceph_utils.is_rebalance_supported():
112114
self.logger.info("Auto rebalance is not supported with the curent ceph version")
113115
return 1
@@ -119,6 +121,7 @@ def rebalance_logic(self, request, context) -> int:
119121
ongoing_scale_down_rebalance = True
120122
self.logger.info(f"Scale-down rebalance is ongoing for ANA group {ana_grp} "
121123
f"current load {self.gw_srv.ana_grp_ns_load[ana_grp]}")
124+
self.last_scale_down_ts = now
122125
break
123126
num_active_ana_groups = len(grps_list)
124127
for ana_grp in self.gw_srv.ana_grp_state:
@@ -144,8 +147,11 @@ def rebalance_logic(self, request, context) -> int:
144147
f"GW still appears Optimized")
145148
return 1
146149
else:
147-
if not ongoing_scale_down_rebalance and \
148-
(self.gw_srv.ana_grp_state[worker_ana_group] == pb2.ana_state.OPTIMIZED):
150+
# keep hysteresis interval between scale-down and regular rebalance
151+
hysteresis = 2.5 * self.rebalance_period_sec
152+
if not ongoing_scale_down_rebalance \
153+
and ((now - self.last_scale_down_ts) > hysteresis) \
154+
and (self.gw_srv.ana_grp_state[worker_ana_group] == pb2.ana_state.OPTIMIZED):
149155
# if my optimized ana group == worker-ana-group or worker-ana-group is
150156
# also in optimized state on this GW machine
151157

@@ -182,6 +188,17 @@ def rebalance_logic(self, request, context) -> int:
182188
f"{min_ana_grp}, load {min_load} does not "
183189
f"fit rebalance criteria!")
184190
continue
191+
if ongoing_scale_down_rebalance and (num_active_ana_groups == self.ceph_utils.num_gws):
192+
# this GW feels scale_down condition on ana_grp but no GW in Deleting
193+
# state in the current mon.map . Experimental code - just for logs
194+
self.logger.info(f"Seems like scale-down deadlock on group {ana_grp}")
195+
if (self.gw_srv.ana_grp_state[worker_ana_group]) == pb2.ana_state.OPTIMIZED:
196+
min_ana_grp, chosen_nqn = self.find_min_loaded_group(grps_list)
197+
if chosen_nqn != "null":
198+
self.logger.info(f"Start rebalance (deadlock resolving) dest. ana group"
199+
f" {min_ana_grp}, subsystem {chosen_nqn}")
200+
# self.ns_rebalance(context, ana_grp, min_ana_grp, 1, "0")
201+
return 0
185202
return 1
186203

187204
def ns_rebalance(self, context, ana_id, dest_ana_id, num, subs_nqn) -> int:
@@ -203,7 +220,7 @@ def ns_rebalance(self, context, ana_id, dest_ana_id, num, subs_nqn) -> int:
203220
self.logger.debug(f"ret namespace_change_load_balancing_group {ret}")
204221
num_rebalanced += 1
205222
if num_rebalanced >= num:
206-
self.logger.info(f"== Completed rebalance in {time.time() - now } sec for "
223+
self.logger.info(f"== Completed rebalance in {time.time() - now} sec for "
207224
f"{num} namespaces from anagrp {ana_id} to {dest_ana_id} ")
208225
return 0
209226
return 0

0 commit comments

Comments
 (0)