@@ -133,7 +133,8 @@ validate_all_active() {
133
133
NUM_SUBSYSTEMS=2
134
134
NUM_GATEWAYS=4
135
135
FAILING_GATEWAYS=2
136
- NUM_OPTIMIZED=1
136
+ NUM_OPTIMIZED_FAILOVER=2
137
+ NUM_OPTIMIZED_REBALANCE=1
137
138
#
138
139
# Step 1 validate all gateways are optimized for one of ANA group
139
140
# and all groups are unique
@@ -156,11 +157,14 @@ for i in $(seq 0 $(expr $FAILING_GATEWAYS - 1)); do
156
157
echo 📫 nvme-gw delete gateway: \' $gw_name \' pool: \' $POOL \' , group: \'\' \( empty string\)
157
158
docker compose exec -T ceph ceph nvme-gw delete $gw_name $POOL ' '
158
159
done
159
- sleep 100 # wait for scale down rebalance complete
160
+
160
161
docker ps
161
162
162
- # expect remaining gws to have 1 optimized groups each because
163
- # due to scale down rebalance 2 deleted gws and 2 ANA groups were removed from the monitor's database
163
+ # array to track PIDs of all top-level background tasks
164
+ pids=()
165
+
166
+ # expect remaining gws to have two optimized groups each initially
167
+ # till rebalance kicks and we should expect a single optimized group
164
168
for i in $( seq 4) ; do
165
169
found=0
166
170
for j in $( seq 0 $( expr $FAILING_GATEWAYS - 1) ) ; do
@@ -173,13 +177,53 @@ for i in $(seq 4); do
173
177
174
178
# if gw is a healthy one
175
179
if [ " $found " -eq " 0" ]; then
176
- echo " ℹ️ Check healthy gw gw=$i "
177
- for s in $( seq $NUM_SUBSYSTEMS ) ; do
178
- NQN=" nqn.2016-06.io.spdk:cnode$s "
179
- GW_OPTIMIZED=$( expect_optimized " $( gw_name $i ) " " $NUM_OPTIMIZED " " $NQN " )
180
- done
180
+ echo " ℹ️ Check healthy gw gw=$i "
181
+
182
+ (
183
+ subsystem_pids=() # Array to track PIDs for subsystem checks
184
+ subsystem_info=() # Array to track subsystem identifiers
185
+ for s in $( seq $NUM_SUBSYSTEMS ) ; do
186
+ (
187
+ NQN=" nqn.2016-06.io.spdk:cnode$s "
188
+ GW_OPTIMIZED=$( expect_optimized " $( gw_name $i ) " " $NUM_OPTIMIZED_FAILOVER " " $NQN " )
189
+ echo " ✅ failover gw gw=$i nqn=$NQN "
190
+ GW_OPTIMIZED=$( expect_optimized " $( gw_name $i ) " " $NUM_OPTIMIZED_REBALANCE " " $NQN " )
191
+ echo " ✅ rebalance gw gw=$i nqn=$NQN "
192
+ ) &
193
+ subsystem_pids+=($! ) # Track PID for this subsystem task
194
+ subsystem_info+=(" gw=$i subsystem=$s " ) # Track subsystem info for logging
195
+ done
196
+
197
+ # wait for all subsystem tasks and check their exit statuses
198
+ for idx in " ${! subsystem_pids[@]} " ; do
199
+ pid=${subsystem_pids[$idx]}
200
+ info=${subsystem_info[$idx]}
201
+ wait " $pid " || {
202
+ echo " ❌ subsystem task failed: $info " >&2
203
+ exit 1 # Fail the parent task for this gateway if any subsystem fails
204
+ }
205
+ done
206
+ echo " ✅ failover rebalance gw=$i all subsystems"
207
+ ) &
208
+ pids+=($! ) # track PID for this gateway's checks
181
209
fi
182
210
done
211
+
212
+ # wait for all top-level gateway tasks and check their exit statuses
213
+ success=true
214
+ for pid in " ${pids[@]} " ; do
215
+ wait " $pid " || {
216
+ echo " ❌ gateway task failed." >&2
217
+ success=false
218
+ }
219
+ done
220
+
221
+ if $success ; then
222
+ echo " ✅ all gateway and subsystem checks completed successfully."
223
+ else
224
+ echo " ❌ one or more gateway tasks failed." >&2
225
+ exit 1
226
+ fi
183
227
184
228
#
185
229
# Step 3 failback
0 commit comments