minor fix

longhorn · Sep 25, 2024 · caef9ab · caef9ab
1 parent e0353ac
commit caef9ab
Show file tree

Hide file tree

Showing 6 changed files with 45 additions and 64 deletions.
diff --git a/e2e/keywords/k8s.resource b/e2e/keywords/k8s.resource
@@ -8,7 +8,7 @@ Library             ../libs/keywords/host_keywords.py
 Library             ../libs/keywords/node_keywords.py
 
 *** Variables ***
-
+${DRAIN_TIMEOUT}    90
 
 *** Keywords ***
 Stop volume node kubelet of ${workload_kind} ${workload_id} for ${duration} seconds
@@ -87,25 +87,23 @@ Check node ${node_id} cordoned
     ${node_name} =    get_node_by_index    ${node_id}
     check_node_cordoned    ${node_name}
 
-Force drain node ${node_id} and wait for ${duration} second
-    [Arguments]    ${dataEngine}
+Force drain node ${node_id}
+    ${duration} =    90
     ${drained_node} =    get_node_by_index    ${node_id}
-    ${instance_manager_name} =     get_instance_manager_on_node    ${drained_node}    ${dataEngine}
-    ${drain_process} =    force_drain_node_and_wait    ${drained_node}    ${duration}
-    Set Test Variable    ${drain_process}
-    Set Test Variable    ${instance_manager_name}
+    ${instance_manager_name} =     get_instance_manager_on_node    ${drained_node}
+    ${drain_logs}    ${completed} =    force_drain_node_with_timeout    ${drained_node}    ${DRAIN_TIMEOUT}
+    Set Test Variable    ${drain_logs}
+    Set Test Variable    ${completed}
     Set Test Variable    ${drained_node}
 
 The drain process not completed
-    check_drain_process_not_completed    ${drain_process}
+    Should Be Equal    ${completed}    ${False}
 
 The drain process completed
     wait_for_all_pods_evicted    ${drained_node}
-    check_drain_process_completed    ${drain_process}
 
 Drain logs should contain
     [Arguments]    ${log}
-    ${drain_logs} =     get_drain_process_error_log    ${drain_process}
     Should Contain    ${drain_logs}    ${log}
 
 Check PDB not exist

diff --git a/e2e/libs/k8s/k8s.py b/e2e/libs/k8s/k8s.py
@@ -10,9 +10,8 @@
 from utility.utility import subprocess_exec_cmd
 from utility.utility import logging
 from utility.utility import get_retry_count_and_interval
-from utility.utility import check_popen_process_not_completed
-from utility.utility import check_popen_process_completed
-from utility.utility import get_popen_process_error_log
+from utility.utility import subprocess_run_cmd
+from robot.libraries.BuiltIn import BuiltIn
 
 async def restart_kubelet(node_name, downtime_in_sec=10):
     manifest = new_pod_manifest(
@@ -81,7 +80,12 @@ def check_node_cordoned(node_name):
     node = api.read_node(node_name)
     assert node.spec.unschedulable is True, f"node {node_name} is not cordoned."
 
-def force_drain_node_and_wait(node_name, duration):
+def force_drain_node_with_timeout(node_name, timeout):
+    exec_cmd = ["kubectl", "drain", node_name, "--force", "--ignore-daemonsets", "--delete-emptydir-data", f"--timeout={int(timeout)}s"]
+    log, completed = subprocess_run_cmd(exec_cmd)
+    return log, completed
+
+    '''
     _, retry_interval = get_retry_count_and_interval()
     exec_cmd = ["kubectl", "drain", node_name, "--force", "--ignore-daemonsets", "--delete-emptydir-data"]
     drain_process = subprocess.Popen(exec_cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
@@ -93,17 +97,10 @@ def force_drain_node_and_wait(node_name, duration):
         time.sleep(retry_interval)
 
     return drain_process
+    '''
 
-def check_drain_process_not_completed(drain_process):
-    check_popen_process_not_completed(drain_process)
-
-def check_drain_process_completed(drain_process):
-    check_popen_process_completed(drain_process)
-
-def get_drain_process_error_log(drain_process):
-    return get_popen_process_error_log(drain_process)
-
-def get_instance_manager_on_node(node_name, data_engine):
+def get_instance_manager_on_node(node_name):
+    data_engine = BuiltIn().get_variable_value("${DATA_ENGINE}")
     pods = get_all_pods_on_node(node_name)
     for pod in pods:
         labels = pod.metadata.labels

diff --git a/e2e/libs/keywords/k8s_keywords.py b/e2e/libs/keywords/k8s_keywords.py
@@ -7,12 +7,9 @@
 from k8s.k8s import wait_all_pods_evicted
 from k8s.k8s import get_all_pods_on_node
 from k8s.k8s import check_node_cordoned
-from k8s.k8s import force_drain_node_and_wait
-from k8s.k8s import check_drain_process_not_completed
+from k8s.k8s import force_drain_node_with_timeout
 from k8s.k8s import get_instance_manager_on_node
-from k8s.k8s import get_drain_process_error_log
 from k8s.k8s import check_instance_manager_pdb_not_exist
-from k8s.k8s import check_drain_process_completed
 from utility.utility import logging
 from node import Node
 
@@ -76,17 +73,14 @@ def get_all_pods_on_node(self, node_name):
     def check_node_cordoned(self, node_name):
         check_node_cordoned(node_name)
 
-    def force_drain_node_and_wait(self, node_name, duration):
-        return force_drain_node_and_wait(node_name, duration)
+    def force_drain_node_with_timeout(self, node_name, duration):
+        return force_drain_node_with_timeout(node_name, duration)
 
     def check_drain_process_not_completed(self, drain_process):
         return check_drain_process_not_completed(drain_process)
 
-    def check_drain_process_completed(self, drain_process):
-        return check_drain_process_completed(drain_process)
-
-    def get_instance_manager_on_node(self, node_name, data_engine):
-        return get_instance_manager_on_node(node_name, data_engine)
+    def get_instance_manager_on_node(self, node_name):
+        return get_instance_manager_on_node(node_name)
 
     def get_drain_process_error_log(self, drain_process):
         return get_drain_process_error_log(drain_process)

diff --git a/e2e/libs/utility/utility.py b/e2e/libs/utility/utility.py
@@ -87,6 +87,20 @@ def subprocess_exec_cmd(cmd):
     return res
 
 
+def subprocess_run_cmd(cmd):
+    completed = None
+    try:
+        res = subprocess.check_output(cmd, check=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
+        logging(f"Executed command {cmd} completed and result= {res}")
+        log = res.stdout
+        completed = True
+    except subprocess.TimeoutExpired as e:
+        log = e.stderr
+        logging(f"Executed command {cmd} timeout and log=  {log}")
+        completed = False
+    return log, completed
+
+
 def wait_for_cluster_ready():
     core_api = client.CoreV1Api()
     retry_count, retry_interval = get_retry_count_and_interval()
@@ -285,25 +299,3 @@ def get_name_suffix(*args):
         if arg:
             suffix += f"-{arg}"
     return suffix
-
-
-def check_popen_process_not_completed(process):
-    assert process.poll() is None, f"process {process} terminated which is not expected."
-
-
-def check_popen_process_completed(process):
-    retry_count, retry_interval = get_retry_count_and_interval()
-    for i in range(retry_count):
-        if process.poll() is not None:
-            return
-        time.sleep(retry_interval)
-    assert process.poll() is not None, f"process {process} not terminated which is not expected."
-
-
-def get_popen_process_error_log(drain_process):
-    if drain_process.poll() is None:
-        drain_process.terminate()
-
-    stdout, stderr = drain_process.communicate()
-    logging(f"{stderr.decode('utf-8')}")
-    return stderr.decode('utf-8')
diff --git a/e2e/requirements.txt b/e2e/requirements.txt
@@ -6,4 +6,4 @@ kubernetes==27.2.0
 requests==2.32.3
 boto3==1.35.19
 pyyaml==6.0.2
-minio==5.0.10
+minio==5.0.10
diff --git a/e2e/tests/negative/node_drain.robot b/e2e/tests/negative/node_drain.robot
@@ -182,22 +182,22 @@ Stopped replicas on deleted nodes should not be counted as healthy replicas when
     [Teardown]    Cleanup test resources include off nodes
     Given Disable node 0 scheduling
     And Set setting node-drain-policy to block-if-contains-last-replica
-    And Given Create volume 0 with    size=5Gi    numberOfReplicas=2
+    And Given Create volume 0 with    size=5Gi    numberOfReplicas=2    dataEngine=${DATA_ENGINE}
     And Attach volume 0 to node 1
     And Wait for volume 0 healthy
     And Write data to volume 0
     And Detach volume 0 from attached node
     And Wait for volume 0 detached
     And Power off node 1
 
-    When Force drain node 2 and wait for 90 second    dataEngine=${DATA_ENGINE}
+    When Force drain node 2
     And The drain process not completed
     And Check instance-manager pod is running on node 2    dataEngine=${DATA_ENGINE}
     And Drain logs should contain    log=error when evicting pods/\"${instance_manager_name}
     And Check volume 0 replica on node 2 exist
 
 Setting Allow Node Drain with the Last Healthy Replica protects the last healthy replica with Pod Disruption Budget (PDB)
-    [Documentation]    Setting Allow Node Drain with the Last Healthy Replica protects the last healthy replica with Pod Disruption Budget (PDB) 
+    [Documentation]    Setting Allow Node Drain with the Last Healthy Replica protects the last healthy replica with Pod Disruption Budget (PDB)
     ...    Related Issue:
     ...    - https://github.com/longhorn/longhorn/issues/2237
     ...
@@ -217,17 +217,17 @@ Setting Allow Node Drain with the Last Healthy Replica protects the last healthy
     [Teardown]    Cleanup test resources include off nodes
     Given Disable node 0 scheduling
     And Set setting node-drain-policy to block-if-contains-last-replica
-    And Given Create volume 0 with    size=5Gi    numberOfReplicas=2
+    And Given Create volume 0 with    size=5Gi    numberOfReplicas=2    dataEngine=${DATA_ENGINE}
     And Attach volume 0 to node 1
     And Wait for volume 0 healthy
     And Write data to volume 0
     And Detach volume 0 from attached node
     And Wait for volume 0 detached
     And Power off node 1
 
-    When Force drain node 2 and wait for 90 second    dataEngine=${DATA_ENGINE}
+    When Force drain node 2
     And The drain process not completed
-    And Check instance-manager pod is running on node 2    dataEngine=${DATA_ENGINE}
+    And Check instance-manager pod is running on node 2
 
     When Set setting node-drain-policy to always-allow
     And The drain process completed