@@ -30,6 +30,7 @@ def __init__(self, gateway_service):
30
30
"gateway" ,
31
31
"max_ns_to_change_lb_grp" ,
32
32
8 )
33
+ self .last_scale_down_ts = time .time ()
33
34
self .rebalance_event = threading .Event ()
34
35
self .logger .info (f" Starting rebalance thread: period: { self .rebalance_period_sec } ,"
35
36
f" max number ns to move: { self .rebalance_max_ns_to_change_lb_grp } " )
@@ -102,12 +103,13 @@ def find_min_loaded_group_in_subsys(self, nqn, grp_list) -> int:
102
103
# and reballance results will be accurate. Monitor in nvme-gw show response publishes the
103
104
# index of ANA group that is currently responsible for rebalance
104
105
def rebalance_logic (self , request , context ) -> int :
106
+ now = time .time ()
105
107
worker_ana_group = self .ceph_utils .get_rebalance_ana_group ()
106
108
self .logger .debug (f"Called rebalance logic: current rebalancing ana "
107
109
f"group { worker_ana_group } " )
108
110
ongoing_scale_down_rebalance = False
109
111
grps_list = self .ceph_utils .get_number_created_gateways (self .gw_srv .gateway_pool ,
110
- self .gw_srv .gateway_group )
112
+ self .gw_srv .gateway_group , False )
111
113
if not self .ceph_utils .is_rebalance_supported ():
112
114
self .logger .info ("Auto rebalance is not supported with the curent ceph version" )
113
115
return 1
@@ -119,6 +121,7 @@ def rebalance_logic(self, request, context) -> int:
119
121
ongoing_scale_down_rebalance = True
120
122
self .logger .info (f"Scale-down rebalance is ongoing for ANA group { ana_grp } "
121
123
f"current load { self .gw_srv .ana_grp_ns_load [ana_grp ]} " )
124
+ self .last_scale_down_ts = now
122
125
break
123
126
num_active_ana_groups = len (grps_list )
124
127
for ana_grp in self .gw_srv .ana_grp_state :
@@ -144,8 +147,11 @@ def rebalance_logic(self, request, context) -> int:
144
147
f"GW still appears Optimized" )
145
148
return 1
146
149
else :
147
- if not ongoing_scale_down_rebalance and \
148
- (self .gw_srv .ana_grp_state [worker_ana_group ] == pb2 .ana_state .OPTIMIZED ):
150
+ # keep hysteresis interval between scale-down and regular rebalance
151
+ hysteresis = 2.5 * self .rebalance_period_sec
152
+ if not ongoing_scale_down_rebalance \
153
+ and ((now - self .last_scale_down_ts ) > hysteresis ) \
154
+ and (self .gw_srv .ana_grp_state [worker_ana_group ] == pb2 .ana_state .OPTIMIZED ):
149
155
# if my optimized ana group == worker-ana-group or worker-ana-group is
150
156
# also in optimized state on this GW machine
151
157
@@ -182,6 +188,17 @@ def rebalance_logic(self, request, context) -> int:
182
188
f"{ min_ana_grp } , load { min_load } does not "
183
189
f"fit rebalance criteria!" )
184
190
continue
191
+ if ongoing_scale_down_rebalance and (num_active_ana_groups == self .ceph_utils .num_gws ):
192
+ # this GW feels scale_down condition on ana_grp but no GW in Deleting
193
+ # state in the current mon.map . Experimental code - just for logs
194
+ self .logger .info (f"Seems like scale-down deadlock on group { ana_grp } " )
195
+ if (self .gw_srv .ana_grp_state [worker_ana_group ]) == pb2 .ana_state .OPTIMIZED :
196
+ min_ana_grp , chosen_nqn = self .find_min_loaded_group (grps_list )
197
+ if chosen_nqn != "null" :
198
+ self .logger .info (f"Start rebalance (deadlock resolving) dest. ana group"
199
+ f" { min_ana_grp } , subsystem { chosen_nqn } " )
200
+ # self.ns_rebalance(context, ana_grp, min_ana_grp, 1, "0")
201
+ return 0
185
202
return 1
186
203
187
204
def ns_rebalance (self , context , ana_id , dest_ana_id , num , subs_nqn ) -> int :
@@ -203,7 +220,7 @@ def ns_rebalance(self, context, ana_id, dest_ana_id, num, subs_nqn) -> int:
203
220
self .logger .debug (f"ret namespace_change_load_balancing_group { ret } " )
204
221
num_rebalanced += 1
205
222
if num_rebalanced >= num :
206
- self .logger .info (f"== Completed rebalance in { time .time () - now } sec for "
223
+ self .logger .info (f"== Completed rebalance in { time .time () - now } sec for "
207
224
f"{ num } namespaces from anagrp { ana_id } to { dest_ana_id } " )
208
225
return 0
209
226
return 0
0 commit comments