Skip to content

Commit

Permalink
minor fix
Browse files Browse the repository at this point in the history
  • Loading branch information
chriscchien committed Sep 26, 2024
1 parent 6a59971 commit 9c6dc80
Show file tree
Hide file tree
Showing 5 changed files with 31 additions and 44 deletions.
27 changes: 18 additions & 9 deletions e2e/keywords/k8s.resource
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ Library ../libs/keywords/host_keywords.py
Library ../libs/keywords/node_keywords.py

*** Variables ***

${DRAIN_TIMEOUT} 90

*** Keywords ***
Stop volume node kubelet of ${workload_kind} ${workload_id} for ${duration} seconds
Expand Down Expand Up @@ -87,26 +87,35 @@ Check node ${node_id} cordoned
${node_name} = get_node_by_index ${node_id}
check_node_cordoned ${node_name}

Force drain node ${node_id} and wait for ${duration} second
Force drain node ${node_id} and expect failure
${drained_node} = get_node_by_index ${node_id}
${instance_manager_name} = get_instance_manager_on_node ${drained_node}
Run Keyword And Expect Error * force drain node ${drained_node}
Set Test Variable ${instance_manager_name}
Set Test Variable ${drained_node}

Force drain node ${node_id} and expect success
${drained_node} = get_node_by_index ${node_id}
${instance_manager_name} = get_instance_manager_on_node ${drained_node}
${drain_process} = force_drain_node_and_wait ${drained_node} ${duration}
Set Test Variable ${drain_process}
force drain node ${drained_node}
Set Test Variable ${instance_manager_name}
Set Test Variable ${drained_node}

#Force drain node ${node_id} and wait for ${duration} second
# ${drained_node} = get_node_by_index ${node_id}
# ${instance_manager_name} = get_instance_manager_on_node ${drained_node}
# ${drain_process} = force_drain_node_and_wait ${drained_node} ${duration}
# Set Test Variable ${drain_process}
# Set Test Variable ${instance_manager_name}
# Set Test Variable ${drained_node}

The drain process not completed
check_drain_process_not_completed ${drain_process}

The drain process completed
wait_for_all_pods_evicted ${drained_node}
check_drain_process_completed ${drain_process}

Drain logs should contain
[Arguments] ${log}
${drain_logs} = get_drain_process_error_log ${drain_process}
Should Contain ${drain_logs} ${log}

Check PDB not exist
[Arguments] ${instance_manger}
check_instance_manager_pdb_not_exist ${instance_manger}
Expand Down
22 changes: 3 additions & 19 deletions e2e/libs/k8s/k8s.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@
from utility.utility import get_retry_count_and_interval
from utility.utility import check_popen_process_not_completed
from utility.utility import check_popen_process_completed
from utility.utility import get_popen_process_error_log
from utility.utility import subprocess_exec_cmd_with_timeout
from robot.libraries.BuiltIn import BuiltIn

async def restart_kubelet(node_name, downtime_in_sec=10):
Expand All @@ -37,9 +37,9 @@ def drain_node(node_name):
exec_cmd = ["kubectl", "drain", node_name, "--ignore-daemonsets", "--delete-emptydir-data"]
res = subprocess_exec_cmd(exec_cmd)

def force_drain_node(node_name):
def force_drain_node(node_name, timeout):
exec_cmd = ["kubectl", "drain", node_name, "--force", "--ignore-daemonsets", "--delete-emptydir-data"]
res = subprocess_exec_cmd(exec_cmd)
res = subprocess_exec_cmd_with_timeout(exec_cmd, timeout)

def cordon_node(node_name):
exec_cmd = ["kubectl", "cordon", node_name]
Expand Down Expand Up @@ -82,28 +82,12 @@ def check_node_cordoned(node_name):
node = api.read_node(node_name)
assert node.spec.unschedulable is True, f"node {node_name} is not cordoned."

def force_drain_node_and_wait(node_name, duration):
_, retry_interval = get_retry_count_and_interval()
exec_cmd = ["kubectl", "drain", node_name, "--force", "--ignore-daemonsets", "--delete-emptydir-data"]
drain_process = subprocess.Popen(exec_cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE)

for i in range(int(duration)):
logging(f"Performing {exec_cmd} and wait... counts={i}")
if drain_process.poll() is not None:
raise AssertionError(f"Drain node {node_name} completed, but it was expected not to complete!")
time.sleep(retry_interval)

return drain_process

def check_drain_process_not_completed(drain_process):
check_popen_process_not_completed(drain_process)

def check_drain_process_completed(drain_process):
check_popen_process_completed(drain_process)

def get_drain_process_error_log(drain_process):
return get_popen_process_error_log(drain_process)

def get_instance_manager_on_node(node_name):
data_engine = BuiltIn().get_variable_value("${DATA_ENGINE}")
pods = get_all_pods_on_node(node_name)
Expand Down
11 changes: 2 additions & 9 deletions e2e/libs/keywords/k8s_keywords.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,10 +7,8 @@
from k8s.k8s import wait_all_pods_evicted
from k8s.k8s import get_all_pods_on_node
from k8s.k8s import check_node_cordoned
from k8s.k8s import force_drain_node_and_wait
from k8s.k8s import check_drain_process_not_completed
from k8s.k8s import get_instance_manager_on_node
from k8s.k8s import get_drain_process_error_log
from k8s.k8s import check_instance_manager_pdb_not_exist
from k8s.k8s import check_drain_process_completed
from utility.utility import logging
Expand Down Expand Up @@ -53,7 +51,8 @@ def drain_node(self, node_name):
drain_node(node_name)

def force_drain_node(self, node_name):
force_drain_node(node_name)
timeout = int(BuiltIn().get_variable_value("${DRAIN_TIMEOUT}", default="90"))
force_drain_node(node_name, timeout)

def uncordon_node(self, node_name):
uncordon_node(node_name)
Expand All @@ -76,9 +75,6 @@ def get_all_pods_on_node(self, node_name):
def check_node_cordoned(self, node_name):
check_node_cordoned(node_name)

def force_drain_node_and_wait(self, node_name, duration):
return force_drain_node_and_wait(node_name, duration)

def check_drain_process_not_completed(self, drain_process):
return check_drain_process_not_completed(drain_process)

Expand All @@ -88,8 +84,5 @@ def check_drain_process_completed(self, drain_process):
def get_instance_manager_on_node(self, node_name):
return get_instance_manager_on_node(node_name)

def get_drain_process_error_log(self, drain_process):
return get_drain_process_error_log(drain_process)

def check_instance_manager_pdb_not_exist(self, instance_manager):
return check_instance_manager_pdb_not_exist(instance_manager)
4 changes: 4 additions & 0 deletions e2e/libs/utility/utility.py
Original file line number Diff line number Diff line change
Expand Up @@ -86,6 +86,10 @@ def subprocess_exec_cmd(cmd):
logging(f"Executed command {cmd} with result {res}")
return res

def subprocess_exec_cmd_with_timeout(cmd, timeout):
res = subprocess.check_output(cmd, timeout)
logging(f"Executed command {cmd} with result {res}")
return res

def wait_for_cluster_ready():
core_api = client.CoreV1Api()
Expand Down
11 changes: 4 additions & 7 deletions e2e/tests/negative/node_drain.robot
Original file line number Diff line number Diff line change
Expand Up @@ -190,10 +190,8 @@ Stopped replicas on deleted nodes should not be counted as healthy replicas when
And Wait for volume 0 detached
And Power off node 1

When Force drain node 2 and wait for 90 second
And The drain process not completed
And Check instance-manager pod is running on node 2
And Drain logs should contain log=error when evicting pods/\"${instance_manager_name}
When Force drain node 2 and expect failure
And Check instance-manager pod is running on node 2
And Check volume 0 replica on node 2 exist

Setting Allow Node Drain with the Last Healthy Replica protects the last healthy replica with Pod Disruption Budget (PDB)
Expand Down Expand Up @@ -225,10 +223,9 @@ Setting Allow Node Drain with the Last Healthy Replica protects the last healthy
And Wait for volume 0 detached
And Power off node 1

When Force drain node 2 and wait for 90 second
And The drain process not completed
When Force drain node 2 and expect failure
And Check instance-manager pod is running on node 2

When Set setting node-drain-policy to always-allow
And The drain process completed
And Force drain node 2 and expect success
And Check PDB not exist instance_manger=${instance_manager_name}

0 comments on commit 9c6dc80

Please sign in to comment.