diff --git a/e2e/keywords/k8s.resource b/e2e/keywords/k8s.resource index d553daef4..a0519cb47 100644 --- a/e2e/keywords/k8s.resource +++ b/e2e/keywords/k8s.resource @@ -8,7 +8,7 @@ Library ../libs/keywords/host_keywords.py Library ../libs/keywords/node_keywords.py *** Variables *** - +${DRAIN_TIMEOUT} 90 *** Keywords *** Stop volume node kubelet of ${workload_kind} ${workload_id} for ${duration} seconds @@ -87,14 +87,28 @@ Check node ${node_id} cordoned ${node_name} = get_node_by_index ${node_id} check_node_cordoned ${node_name} -Force drain node ${node_id} and wait for ${duration} second +Force drain node ${node_id} and expect failure + ${drained_node} = get_node_by_index ${node_id} + ${instance_manager_name} = get_instance_manager_on_node ${drained_node} + Run Keyword And Expect Error * force drain node ${drained_node} + Set Test Variable ${instance_manager_name} + Set Test Variable ${drained_node} + +Force drain node ${node_id} and expect success ${drained_node} = get_node_by_index ${node_id} ${instance_manager_name} = get_instance_manager_on_node ${drained_node} - ${drain_process} = force_drain_node_and_wait ${drained_node} ${duration} - Set Test Variable ${drain_process} + force drain node ${drained_node} Set Test Variable ${instance_manager_name} Set Test Variable ${drained_node} +#Force drain node ${node_id} and wait for ${duration} second +# ${drained_node} = get_node_by_index ${node_id} +# ${instance_manager_name} = get_instance_manager_on_node ${drained_node} +# ${drain_process} = force_drain_node_and_wait ${drained_node} ${duration} +# Set Test Variable ${drain_process} +# Set Test Variable ${instance_manager_name} +# Set Test Variable ${drained_node} + The drain process not completed check_drain_process_not_completed ${drain_process} @@ -102,11 +116,6 @@ The drain process completed wait_for_all_pods_evicted ${drained_node} check_drain_process_completed ${drain_process} -Drain logs should contain - [Arguments] ${log} - ${drain_logs} = get_drain_process_error_log ${drain_process} - Should Contain ${drain_logs} ${log} - Check PDB not exist [Arguments] ${instance_manger} check_instance_manager_pdb_not_exist ${instance_manger} diff --git a/e2e/libs/k8s/k8s.py b/e2e/libs/k8s/k8s.py index 4c90d1d4e..bdaac74ea 100644 --- a/e2e/libs/k8s/k8s.py +++ b/e2e/libs/k8s/k8s.py @@ -12,7 +12,7 @@ from utility.utility import get_retry_count_and_interval from utility.utility import check_popen_process_not_completed from utility.utility import check_popen_process_completed -from utility.utility import get_popen_process_error_log +from utility.utility import subprocess_exec_cmd_with_timeout from robot.libraries.BuiltIn import BuiltIn async def restart_kubelet(node_name, downtime_in_sec=10): @@ -37,9 +37,9 @@ def drain_node(node_name): exec_cmd = ["kubectl", "drain", node_name, "--ignore-daemonsets", "--delete-emptydir-data"] res = subprocess_exec_cmd(exec_cmd) -def force_drain_node(node_name): +def force_drain_node(node_name, timeout): exec_cmd = ["kubectl", "drain", node_name, "--force", "--ignore-daemonsets", "--delete-emptydir-data"] - res = subprocess_exec_cmd(exec_cmd) + res = subprocess_exec_cmd_with_timeout(exec_cmd, timeout) def cordon_node(node_name): exec_cmd = ["kubectl", "cordon", node_name] @@ -82,28 +82,12 @@ def check_node_cordoned(node_name): node = api.read_node(node_name) assert node.spec.unschedulable is True, f"node {node_name} is not cordoned." -def force_drain_node_and_wait(node_name, duration): - _, retry_interval = get_retry_count_and_interval() - exec_cmd = ["kubectl", "drain", node_name, "--force", "--ignore-daemonsets", "--delete-emptydir-data"] - drain_process = subprocess.Popen(exec_cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE) - - for i in range(int(duration)): - logging(f"Performing {exec_cmd} and wait... counts={i}") - if drain_process.poll() is not None: - raise AssertionError(f"Drain node {node_name} completed, but it was expected not to complete!") - time.sleep(retry_interval) - - return drain_process - def check_drain_process_not_completed(drain_process): check_popen_process_not_completed(drain_process) def check_drain_process_completed(drain_process): check_popen_process_completed(drain_process) -def get_drain_process_error_log(drain_process): - return get_popen_process_error_log(drain_process) - def get_instance_manager_on_node(node_name): data_engine = BuiltIn().get_variable_value("${DATA_ENGINE}") pods = get_all_pods_on_node(node_name) diff --git a/e2e/libs/keywords/k8s_keywords.py b/e2e/libs/keywords/k8s_keywords.py index 85834dfe2..a3eb6a312 100644 --- a/e2e/libs/keywords/k8s_keywords.py +++ b/e2e/libs/keywords/k8s_keywords.py @@ -7,10 +7,8 @@ from k8s.k8s import wait_all_pods_evicted from k8s.k8s import get_all_pods_on_node from k8s.k8s import check_node_cordoned -from k8s.k8s import force_drain_node_and_wait from k8s.k8s import check_drain_process_not_completed from k8s.k8s import get_instance_manager_on_node -from k8s.k8s import get_drain_process_error_log from k8s.k8s import check_instance_manager_pdb_not_exist from k8s.k8s import check_drain_process_completed from utility.utility import logging @@ -53,7 +51,8 @@ def drain_node(self, node_name): drain_node(node_name) def force_drain_node(self, node_name): - force_drain_node(node_name) + timeout = int(BuiltIn().get_variable_value("${DRAIN_TIMEOUT}", default="90")) + force_drain_node(node_name, timeout) def uncordon_node(self, node_name): uncordon_node(node_name) @@ -76,9 +75,6 @@ def get_all_pods_on_node(self, node_name): def check_node_cordoned(self, node_name): check_node_cordoned(node_name) - def force_drain_node_and_wait(self, node_name, duration): - return force_drain_node_and_wait(node_name, duration) - def check_drain_process_not_completed(self, drain_process): return check_drain_process_not_completed(drain_process) @@ -88,8 +84,5 @@ def check_drain_process_completed(self, drain_process): def get_instance_manager_on_node(self, node_name): return get_instance_manager_on_node(node_name) - def get_drain_process_error_log(self, drain_process): - return get_drain_process_error_log(drain_process) - def check_instance_manager_pdb_not_exist(self, instance_manager): return check_instance_manager_pdb_not_exist(instance_manager) \ No newline at end of file diff --git a/e2e/libs/utility/utility.py b/e2e/libs/utility/utility.py index c1e1e469c..2381ded36 100644 --- a/e2e/libs/utility/utility.py +++ b/e2e/libs/utility/utility.py @@ -86,6 +86,10 @@ def subprocess_exec_cmd(cmd): logging(f"Executed command {cmd} with result {res}") return res +def subprocess_exec_cmd_with_timeout(cmd, timeout): + res = subprocess.check_output(cmd, timeout) + logging(f"Executed command {cmd} with result {res}") + return res def wait_for_cluster_ready(): core_api = client.CoreV1Api() diff --git a/e2e/tests/negative/node_drain.robot b/e2e/tests/negative/node_drain.robot index 1e689b114..649a8eae6 100644 --- a/e2e/tests/negative/node_drain.robot +++ b/e2e/tests/negative/node_drain.robot @@ -190,10 +190,8 @@ Stopped replicas on deleted nodes should not be counted as healthy replicas when And Wait for volume 0 detached And Power off node 1 - When Force drain node 2 and wait for 90 second - And The drain process not completed - And Check instance-manager pod is running on node 2 - And Drain logs should contain log=error when evicting pods/\"${instance_manager_name} + When Force drain node 2 and expect failure + And Check instance-manager pod is running on node 2 And Check volume 0 replica on node 2 exist Setting Allow Node Drain with the Last Healthy Replica protects the last healthy replica with Pod Disruption Budget (PDB) @@ -225,10 +223,9 @@ Setting Allow Node Drain with the Last Healthy Replica protects the last healthy And Wait for volume 0 detached And Power off node 1 - When Force drain node 2 and wait for 90 second - And The drain process not completed + When Force drain node 2 and expect failure And Check instance-manager pod is running on node 2 When Set setting node-drain-policy to always-allow - And The drain process completed + And Force drain node 2 and expect success And Check PDB not exist instance_manger=${instance_manager_name}