minor fix

longhorn · Sep 26, 2024 · 9c6dc80 · 9c6dc80
1 parent 6a59971
commit 9c6dc80
Show file tree

Hide file tree

Showing 5 changed files with 31 additions and 44 deletions.
diff --git a/e2e/keywords/k8s.resource b/e2e/keywords/k8s.resource
@@ -8,7 +8,7 @@ Library             ../libs/keywords/host_keywords.py
 Library             ../libs/keywords/node_keywords.py
 
 *** Variables ***
-
+${DRAIN_TIMEOUT}    90
 
 *** Keywords ***
 Stop volume node kubelet of ${workload_kind} ${workload_id} for ${duration} seconds
@@ -87,26 +87,35 @@ Check node ${node_id} cordoned
     ${node_name} =    get_node_by_index    ${node_id}
     check_node_cordoned    ${node_name}
 
-Force drain node ${node_id} and wait for ${duration} second    
+Force drain node ${node_id} and expect failure
+    ${drained_node} =    get_node_by_index    ${node_id}
+    ${instance_manager_name} =     get_instance_manager_on_node    ${drained_node}
+    Run Keyword And Expect Error    *    force drain node    ${drained_node}
+    Set Test Variable    ${instance_manager_name}
+    Set Test Variable    ${drained_node}
+
+Force drain node ${node_id} and expect success
     ${drained_node} =    get_node_by_index    ${node_id}
     ${instance_manager_name} =     get_instance_manager_on_node    ${drained_node}
-    ${drain_process} =    force_drain_node_and_wait    ${drained_node}    ${duration}
-    Set Test Variable    ${drain_process}
+    force drain node    ${drained_node}
     Set Test Variable    ${instance_manager_name}
     Set Test Variable    ${drained_node}
 
+#Force drain node ${node_id} and wait for ${duration} second
+#    ${drained_node} =    get_node_by_index    ${node_id}
+#    ${instance_manager_name} =     get_instance_manager_on_node    ${drained_node}
+#    ${drain_process} =    force_drain_node_and_wait    ${drained_node}    ${duration}
+#    Set Test Variable    ${drain_process}
+#    Set Test Variable    ${instance_manager_name}
+#    Set Test Variable    ${drained_node}
+
 The drain process not completed
     check_drain_process_not_completed    ${drain_process}
 
 The drain process completed
     wait_for_all_pods_evicted    ${drained_node}
     check_drain_process_completed    ${drain_process}
 
-Drain logs should contain
-    [Arguments]    ${log}
-    ${drain_logs} =     get_drain_process_error_log    ${drain_process}
-    Should Contain    ${drain_logs}    ${log}
-
 Check PDB not exist
     [Arguments]    ${instance_manger}
     check_instance_manager_pdb_not_exist    ${instance_manger}

diff --git a/e2e/libs/k8s/k8s.py b/e2e/libs/k8s/k8s.py
@@ -12,7 +12,7 @@
 from utility.utility import get_retry_count_and_interval
 from utility.utility import check_popen_process_not_completed
 from utility.utility import check_popen_process_completed
-from utility.utility import get_popen_process_error_log
+from utility.utility import subprocess_exec_cmd_with_timeout
 from robot.libraries.BuiltIn import BuiltIn
 
 async def restart_kubelet(node_name, downtime_in_sec=10):
@@ -37,9 +37,9 @@ def drain_node(node_name):
     exec_cmd = ["kubectl", "drain", node_name, "--ignore-daemonsets", "--delete-emptydir-data"]
     res = subprocess_exec_cmd(exec_cmd)
 
-def force_drain_node(node_name):
+def force_drain_node(node_name, timeout):
     exec_cmd = ["kubectl", "drain", node_name, "--force", "--ignore-daemonsets", "--delete-emptydir-data"]
-    res = subprocess_exec_cmd(exec_cmd)
+    res = subprocess_exec_cmd_with_timeout(exec_cmd, timeout)
 
 def cordon_node(node_name):
     exec_cmd = ["kubectl", "cordon", node_name]
@@ -82,28 +82,12 @@ def check_node_cordoned(node_name):
     node = api.read_node(node_name)
     assert node.spec.unschedulable is True, f"node {node_name} is not cordoned."
 
-def force_drain_node_and_wait(node_name, duration):
-    _, retry_interval = get_retry_count_and_interval()
-    exec_cmd = ["kubectl", "drain", node_name, "--force", "--ignore-daemonsets", "--delete-emptydir-data"]
-    drain_process = subprocess.Popen(exec_cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
-
-    for i in range(int(duration)):
-        logging(f"Performing {exec_cmd} and wait...  counts={i}")
-        if drain_process.poll() is not None:
-            raise AssertionError(f"Drain node {node_name} completed, but it was expected not to complete!")
-        time.sleep(retry_interval)
-
-    return drain_process
-
 def check_drain_process_not_completed(drain_process):
     check_popen_process_not_completed(drain_process)
 
 def check_drain_process_completed(drain_process):
     check_popen_process_completed(drain_process)
 
-def get_drain_process_error_log(drain_process):
-    return get_popen_process_error_log(drain_process)
-
 def get_instance_manager_on_node(node_name):
     data_engine = BuiltIn().get_variable_value("${DATA_ENGINE}")
     pods = get_all_pods_on_node(node_name)

diff --git a/e2e/libs/keywords/k8s_keywords.py b/e2e/libs/keywords/k8s_keywords.py
@@ -7,10 +7,8 @@
 from k8s.k8s import wait_all_pods_evicted
 from k8s.k8s import get_all_pods_on_node
 from k8s.k8s import check_node_cordoned
-from k8s.k8s import force_drain_node_and_wait
 from k8s.k8s import check_drain_process_not_completed
 from k8s.k8s import get_instance_manager_on_node
-from k8s.k8s import get_drain_process_error_log
 from k8s.k8s import check_instance_manager_pdb_not_exist
 from k8s.k8s import check_drain_process_completed
 from utility.utility import logging
@@ -53,7 +51,8 @@ def drain_node(self, node_name):
         drain_node(node_name)
 
     def force_drain_node(self, node_name):
-        force_drain_node(node_name)
+        timeout = int(BuiltIn().get_variable_value("${DRAIN_TIMEOUT}", default="90"))
+        force_drain_node(node_name, timeout)
 
     def uncordon_node(self, node_name):
         uncordon_node(node_name)
@@ -76,9 +75,6 @@ def get_all_pods_on_node(self, node_name):
     def check_node_cordoned(self, node_name):
         check_node_cordoned(node_name)
 
-    def force_drain_node_and_wait(self, node_name, duration):
-        return force_drain_node_and_wait(node_name, duration)
-
     def check_drain_process_not_completed(self, drain_process):
         return check_drain_process_not_completed(drain_process)
 
@@ -88,8 +84,5 @@ def check_drain_process_completed(self, drain_process):
     def get_instance_manager_on_node(self, node_name):
         return get_instance_manager_on_node(node_name)
 
-    def get_drain_process_error_log(self, drain_process):
-        return get_drain_process_error_log(drain_process)
-
     def check_instance_manager_pdb_not_exist(self, instance_manager):
         return check_instance_manager_pdb_not_exist(instance_manager)
diff --git a/e2e/libs/utility/utility.py b/e2e/libs/utility/utility.py
@@ -86,6 +86,10 @@ def subprocess_exec_cmd(cmd):
     logging(f"Executed command {cmd} with result {res}")
     return res
 
+def subprocess_exec_cmd_with_timeout(cmd, timeout):
+    res = subprocess.check_output(cmd, timeout)
+    logging(f"Executed command {cmd} with result {res}")
+    return res
 
 def wait_for_cluster_ready():
     core_api = client.CoreV1Api()

diff --git a/e2e/tests/negative/node_drain.robot b/e2e/tests/negative/node_drain.robot
@@ -190,10 +190,8 @@ Stopped replicas on deleted nodes should not be counted as healthy replicas when
     And Wait for volume 0 detached
     And Power off node 1
 
-    When Force drain node 2 and wait for 90 second
-    And The drain process not completed
-    And Check instance-manager pod is running on node 2
-    And Drain logs should contain    log=error when evicting pods/\"${instance_manager_name}
+    When Force drain node 2 and expect failure
+    And Check instance-manager pod is running on node 2    
     And Check volume 0 replica on node 2 exist
 
 Setting Allow Node Drain with the Last Healthy Replica protects the last healthy replica with Pod Disruption Budget (PDB)
@@ -225,10 +223,9 @@ Setting Allow Node Drain with the Last Healthy Replica protects the last healthy
     And Wait for volume 0 detached
     And Power off node 1
 
-    When Force drain node 2 and wait for 90 second
-    And The drain process not completed
+    When Force drain node 2 and expect failure    
     And Check instance-manager pod is running on node 2
 
     When Set setting node-drain-policy to always-allow
-    And The drain process completed
+    And Force drain node 2 and expect success
     And Check PDB not exist    instance_manger=${instance_manager_name}