From fa0bd5a24b0839e31e2b0b73c750ddf5e1628e64 Mon Sep 17 00:00:00 2001 From: Leonid Chernin Date: Tue, 18 Mar 2025 13:35:21 +0200 Subject: [PATCH] new github re-deploy test Signed-off-by: Leonid Chernin --- .env | 4 +- .github/workflows/build-container.yml | 2 +- tests/ha/redeploy.sh | 67 +++++++++++++++++++++++++++ 3 files changed, 70 insertions(+), 3 deletions(-) create mode 100644 tests/ha/redeploy.sh diff --git a/.env b/.env index 92496dd44af..13924fda51e 100644 --- a/.env +++ b/.env @@ -65,8 +65,8 @@ SPDK_CENTOS_REPO_VER="9.0-21.el9" # Ceph Cluster CEPH_CLUSTER_VERSION="${CEPH_VERSION}" -CEPH_BRANCH=main -CEPH_SHA=latest +CEPH_BRANCH=wip-leonidc-1803-redeploy-fix +CEPH_SHA=2f2b7874cfe8af46cd4f61c9133da8b93790c884 CEPH_DEVEL_MGR_PATH=../ceph diff --git a/.github/workflows/build-container.yml b/.github/workflows/build-container.yml index e06fa40432a..9827a57a57d 100644 --- a/.github/workflows/build-container.yml +++ b/.github/workflows/build-container.yml @@ -491,7 +491,7 @@ jobs: strategy: fail-fast: false matrix: - test: ["sanity", "no_huge", "ns_lb_change", "no_subsystems", "auto_load_balance", "state_transitions", "state_transitions_both_gws", "state_transitions_loop", "state_transitions_rand_loop", "late_registration", "late_registration_loop", "4gws", "4gws_loop", "4gws_create_delete", "4gws_create_delete_loop", "namespaces", "namespaces_loop", "mtls", "notify", "ceph_status", "blocklist", "main_exit", "cluster_pool", "flat_bdev_per_cluster", "set_qos", "set_qos_2ms"] + test: ["sanity", "no_huge", "ns_lb_change", "redeploy", "no_subsystems", "auto_load_balance", "state_transitions", "state_transitions_both_gws", "state_transitions_loop", "state_transitions_rand_loop", "late_registration", "late_registration_loop", "4gws", "4gws_loop", "4gws_create_delete", "4gws_create_delete_loop", "namespaces", "namespaces_loop", "mtls", "notify", "ceph_status", "blocklist", "main_exit", "cluster_pool", "flat_bdev_per_cluster", "set_qos", "set_qos_2ms"] runs-on: ubuntu-latest env: HUGEPAGES: 1024 # 4 spdk instances diff --git a/tests/ha/redeploy.sh b/tests/ha/redeploy.sh new file mode 100644 index 00000000000..91215e358e7 --- /dev/null +++ b/tests/ha/redeploy.sh @@ -0,0 +1,67 @@ +#!/bin/bash +set -xe + +GW1_NAME=$(docker ps --format '{{.ID}}\t{{.Names}}' | awk '$2 ~ /nvmeof/ && $2 ~ /1/ {print $1}') +GW2_NAME=$(docker ps --format '{{.ID}}\t{{.Names}}' | awk '$2 ~ /nvmeof/ && $2 ~ /2/ {print $1}') +#GW3_NAME=$(docker ps --format '{{.ID}}\t{{.Names}}' | awk '$2 ~ /nvmeof/ && $2 ~ /3/ {print $1}') + + + verify_number_active_groups() + { + NUM="$1" + #test that ana group is Active + json=$(docker compose exec -T ceph ceph nvme-gw show rbd '') + states=$(echo "$json" | jq -r '.["Created Gateways:"][] | ."ana states"') + echo "$states" + rc=$(echo "$states" | grep ' ACTIVE' | wc -l) + echo $rc + if [ "$rc" -ne "$NUM" ]; then + echo "Error!: wrong number of Active ANA groups found $rc" + exit 1 + else + echo "Correct number of Active ANA groups found $rc" + fi + echo "$json" | jq -r '."GW-epoch"' + + } + +echo "ℹ️ ℹ️ Start test: Redeploy test - simulate fast reboot and verify that no failovers during 12 sec" + +sleep 10 +epoch0=$(verify_number_active_groups 2) + +#simulate fast-reboot +# get nvme-gw show +docker stop $GW1_NAME +docker start $GW1_NAME +sleep 16 +epoch1=$(verify_number_active_groups 2) +#verify only one Ana group is Active - means no failover + +sleep 8 +epoch2=$(verify_number_active_groups 2) + +if (( epoch1 - epoch0 > 3 )); then + exit 1 +fi + + +# now redeploy all all Gws +docker stop $GW1_NAME +docker start $GW1_NAME +sleep 1 +docker stop $GW2_NAME +docker start $GW2_NAME + +sleep 16 +epoch3=$(verify_number_active_groups 2) + +if (( epoch3 - epoch2 > 5 )); then + exit 1 +fi + + +sleep 8 +verify_number_active_groups 2 + +echo "ℹ️ ℹ️ test passed"