Add test case test_drain_with_block_for_eviction_if_contains_last_rep…

…lica_success ref: 7521 Signed-off-by: Chris <chris.chien@suse.com>
longhorn · Feb 20, 2024 · 84de17d · 84de17d
1 parent 6a084af
commit 84de17d
Showing 1 changed file with 193 additions and 3 deletions.
diff --git a/manager/integration/tests/test_node.py b/manager/integration/tests/test_node.py
@@ -53,6 +53,7 @@
 from common import create_pv_for_volume
 from common import create_pvc_for_volume, create_and_wait_deployment
 from common import get_apps_api_client, write_pod_volume_random_data
+from common import prepare_host_disk, wait_for_volume_degraded
 
 from backupstore import set_random_backupstore # NOQA
 from concurrent.futures import ThreadPoolExecutor, TimeoutError
@@ -2849,8 +2850,9 @@ def test_drain_with_block_for_eviction_success(client, core_api, volume_name, ma
     assert expected_test_data_checksum == test_data_checksum
 
 
-@pytest.mark.skip(reason="TODO")  # NOQA
-def test_drain_with_block_for_eviction_if_contains_last_replica_success():
+def test_drain_with_block_for_eviction_if_contains_last_replica_success(client, # NOQA
+                                                                        core_api, # NOQA
+                                                                        make_deployment_with_pvc): # NOQA
     """
     Test drain completes after evicting replicas with node-drain-policy
     block-for-eviction-if-contains-last-replica
@@ -2864,7 +2866,6 @@ def test_drain_with_block_for_eviction_if_contains_last_replica_success():
     4. Write data to the volumes.
     5. Drain a node both volumes have a replica scheduled to.
     6. While the drain is ongoing:
-       - Verify that the volume with one replica never becomes degraded.
        - Verify that the volume with three replicas becomes degraded.
        - Verify that `node.status.autoEvicting == true`.
        - Optionally verify that `replica.spec.evictionRequested == true` on the
@@ -2880,6 +2881,195 @@ def test_drain_with_block_for_eviction_if_contains_last_replica_success():
     12. Verify that `replica.spec.evictionRequested == false` on all replicas.
     13. Verify the the data in both volumes.
     """
+    apps_api = get_apps_api_client()
+    host_id = get_self_host_id()
+    nodes = client.list_node()
+    evict_nodes = [node for node in nodes if node.id != host_id][:2]
+    evict_source_node = evict_nodes[0]
+
+    # Create extra disk on current node
+    node = client.by_id_node(host_id)
+    disks = node.disks
+
+    disk_volume_name = 'vol-disk'
+    disk_volume = client.create_volume(name=disk_volume_name,
+                                       size=str(2 * Gi),
+                                       numberOfReplicas=1,
+                                       dataLocality="strict-local")
+    disk_volume = wait_for_volume_detached(client, disk_volume_name)
+
+    disk_volume.attach(hostId=host_id)
+    disk_volume = wait_for_volume_healthy(client, disk_volume_name)
+    disk_path = prepare_host_disk(get_volume_endpoint(disk_volume),
+                                  disk_volume_name)
+    disk = {"path": disk_path, "allowScheduling": True}
+
+    update_disk = get_update_disks(disks)
+    update_disk["disk1"] = disk
+
+    node = update_node_disks(client, node.name, disks=update_disk, retry=True)
+    node = wait_for_disk_update(client, host_id, len(update_disk))
+    assert len(node.disks) == len(update_disk)
+
+    # Step 1
+    setting = client.by_id_setting(
+        SETTING_NODE_DRAIN_POLICY)
+    client.update(setting, value="block-for-eviction-if-contains-last-replica")
+
+    # Step 2, 3
+    volume1_name = "vol-1"
+    volume1 = client.create_volume(name=volume1_name,
+                                   size=str(1 * Gi),
+                                   numberOfReplicas=3)
+    volume1 = common.wait_for_volume_detached(client, volume1_name)
+
+    pvc1_name = volume1_name + "-pvc"
+    create_pv_for_volume(client, core_api, volume1, volume1_name)
+    create_pvc_for_volume(client, core_api, volume1, pvc1_name)
+    deployment1_name = volume1_name + "-dep"
+    deployment1 = make_deployment_with_pvc(deployment1_name, pvc1_name)
+    deployment1["spec"]["template"]["spec"]["nodeSelector"] \
+        = {"kubernetes.io/hostname": host_id}
+
+    create_and_wait_deployment(apps_api, deployment1)
+
+    volume1 = wait_for_volume_healthy(client, volume1_name)
+    # Make volume 1 replica only located on evict_source_node
+    volume1.updateReplicaCount(replicaCount=1)
+    for replica in volume1.replicas:
+        if replica.hostId != evict_source_node.id:
+            volume1.replicaRemove(name=replica.name)
+
+    # volume 2 attach to current with 3 replicas
+    volume2_name = "vol-2"
+    volume2 = client.create_volume(name=volume2_name,
+                                   size=str(1 * Gi),
+                                   numberOfReplicas=3)
+    volume2 = common.wait_for_volume_detached(client, volume2_name)
+
+    pvc2_name = volume2_name + "-pvc"
+    create_pv_for_volume(client, core_api, volume2, volume2_name)
+    create_pvc_for_volume(client, core_api, volume2, pvc2_name)
+    deployment2_name = volume2_name + "-dep"
+    deployment2 = make_deployment_with_pvc(deployment2_name, pvc2_name)
+    deployment2["spec"]["template"]["spec"]["nodeSelector"] \
+        = {"kubernetes.io/hostname": host_id}
+
+    create_and_wait_deployment(apps_api, deployment2)
+
+    volume2_replicas = []
+    volume2 = client.by_id_volume(volume2_name)
+    for replica in volume2.replicas:
+        volume2_replicas.append(replica.name)
+
+    # Step 4
+    data_path = '/data/test'
+
+    deployment1_pod_names = common.get_deployment_pod_names(core_api,
+                                                            deployment1)
+    write_pod_volume_random_data(core_api,
+                                 deployment1_pod_names[0],
+                                 data_path,
+                                 DATA_SIZE_IN_MB_3)
+    expected_test_data_checksum1 = get_pod_data_md5sum(core_api,
+                                                       deployment1_pod_names[0], # NOQA
+                                                       data_path)
+
+    deployment2_pod_names = common.get_deployment_pod_names(core_api,
+                                                            deployment2)
+    write_pod_volume_random_data(core_api,
+                                 deployment2_pod_names[0],
+                                 data_path,
+                                 DATA_SIZE_IN_MB_3)
+    expected_test_data_checksum2 = get_pod_data_md5sum(core_api,
+                                                       deployment2_pod_names[0], # NOQA
+                                                       data_path)
+
+    # Step 5
+    executor = ThreadPoolExecutor(max_workers=5)
+    future = executor.submit(drain_node, core_api, evict_source_node)
+
+    # Step 6
+    volume1 = client.by_id_volume(volume1_name)
+    for replica in volume1.replicas:
+        if replica.hostId == evict_source_node.id:
+            replica_name = replica.name
+            break
+
+    replica_info = get_replica_detail(replica_name)
+    eviction_requested = replica_info["spec"]["evictionRequested"]
+    assert eviction_requested is True
+
+    nodes = client.list_node()
+    for node in nodes:
+        if node.id == evict_source_node.id:
+            assert node.autoEvicting is True
+
+    volume2 = wait_for_volume_degraded(client, volume2_name)
+
+    for replica in volume2.replicas:
+        replica_info = get_replica_detail(replica.name)
+        eviction_requested = replica_info["spec"]["evictionRequested"]
+        assert eviction_requested is False
+
+    # Step 7
+    thread_timeout = 60
+    try:
+        future.result(timeout=thread_timeout)
+        drain_complete = True
+    except TimeoutError:
+        print("drain node thread exceed timeout ({})s".format(thread_timeout))
+        drain_complete = False
+        future.cancel()
+    finally:
+        assert drain_complete is True
+
+    # Step 8
+    set_node_cordon(core_api, evict_source_node.id, False)
+
+    # Step 9
+    volume1 = client.by_id_volume(volume1_name)
+    assert len(volume1.replicas) == 1
+    for replica in volume1.replicas:
+        assert replica.hostId != evict_source_node.id
+
+    # Step 10
+    # Verify volume2 replicas not moved by check replica name
+    # stored before node drain
+    volume2 = wait_for_volume_healthy(client, volume2_name)
+    for replica in volume2.replicas:
+        assert replica.name in volume2_replicas
+
+    # Step 11
+    nodes = client.list_node()
+    for node in nodes:
+        if node.id == evict_source_node.id:
+            assert node.autoEvicting is False
+
+    # Step 12
+    volume1 = client.by_id_volume(volume1_name)
+    for replica in volume1.replicas:
+        replica_info = get_replica_detail(replica.name)
+        eviction_requested = replica_info["spec"]["evictionRequested"]
+        assert eviction_requested is False
+
+    volume2 = client.by_id_volume(volume2_name)
+    for replica in volume2.replicas:
+        replica_info = get_replica_detail(replica.name)
+        eviction_requested = replica_info["spec"]["evictionRequested"]
+        assert eviction_requested is False
+
+    # Step 13
+    test_data_checksum1 = get_pod_data_md5sum(core_api,
+                                              deployment1_pod_names[0],
+                                              data_path)
+    assert expected_test_data_checksum1 == test_data_checksum1
+
+    test_data_checksum2 = get_pod_data_md5sum(core_api,
+                                              deployment2_pod_names[0],
+                                              data_path)
+    assert expected_test_data_checksum2 == test_data_checksum2
+
 
 @pytest.mark.skip(reason="TODO")  # NOQA
 def test_drain_with_block_for_eviction_failure():