Skip to content

Commit

Permalink
minor fix
Browse files Browse the repository at this point in the history
  • Loading branch information
chriscchien committed Sep 25, 2024
1 parent e0353ac commit caef9ab
Show file tree
Hide file tree
Showing 6 changed files with 45 additions and 64 deletions.
18 changes: 8 additions & 10 deletions e2e/keywords/k8s.resource
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ Library ../libs/keywords/host_keywords.py
Library ../libs/keywords/node_keywords.py

*** Variables ***

${DRAIN_TIMEOUT} 90

*** Keywords ***
Stop volume node kubelet of ${workload_kind} ${workload_id} for ${duration} seconds
Expand Down Expand Up @@ -87,25 +87,23 @@ Check node ${node_id} cordoned
${node_name} = get_node_by_index ${node_id}
check_node_cordoned ${node_name}

Force drain node ${node_id} and wait for ${duration} second
[Arguments] ${dataEngine}
Force drain node ${node_id}
${duration} = 90
${drained_node} = get_node_by_index ${node_id}
${instance_manager_name} = get_instance_manager_on_node ${drained_node} ${dataEngine}
${drain_process} = force_drain_node_and_wait ${drained_node} ${duration}
Set Test Variable ${drain_process}
Set Test Variable ${instance_manager_name}
${instance_manager_name} = get_instance_manager_on_node ${drained_node}
${drain_logs} ${completed} = force_drain_node_with_timeout ${drained_node} ${DRAIN_TIMEOUT}
Set Test Variable ${drain_logs}
Set Test Variable ${completed}
Set Test Variable ${drained_node}

The drain process not completed
check_drain_process_not_completed ${drain_process}
Should Be Equal ${completed} ${False}

The drain process completed
wait_for_all_pods_evicted ${drained_node}
check_drain_process_completed ${drain_process}

Drain logs should contain
[Arguments] ${log}
${drain_logs} = get_drain_process_error_log ${drain_process}
Should Contain ${drain_logs} ${log}

Check PDB not exist
Expand Down
25 changes: 11 additions & 14 deletions e2e/libs/k8s/k8s.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,9 +10,8 @@
from utility.utility import subprocess_exec_cmd
from utility.utility import logging
from utility.utility import get_retry_count_and_interval
from utility.utility import check_popen_process_not_completed
from utility.utility import check_popen_process_completed
from utility.utility import get_popen_process_error_log
from utility.utility import subprocess_run_cmd
from robot.libraries.BuiltIn import BuiltIn

async def restart_kubelet(node_name, downtime_in_sec=10):
manifest = new_pod_manifest(
Expand Down Expand Up @@ -81,7 +80,12 @@ def check_node_cordoned(node_name):
node = api.read_node(node_name)
assert node.spec.unschedulable is True, f"node {node_name} is not cordoned."

def force_drain_node_and_wait(node_name, duration):
def force_drain_node_with_timeout(node_name, timeout):
exec_cmd = ["kubectl", "drain", node_name, "--force", "--ignore-daemonsets", "--delete-emptydir-data", f"--timeout={int(timeout)}s"]
log, completed = subprocess_run_cmd(exec_cmd)
return log, completed

'''
_, retry_interval = get_retry_count_and_interval()
exec_cmd = ["kubectl", "drain", node_name, "--force", "--ignore-daemonsets", "--delete-emptydir-data"]
drain_process = subprocess.Popen(exec_cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
Expand All @@ -93,17 +97,10 @@ def force_drain_node_and_wait(node_name, duration):
time.sleep(retry_interval)
return drain_process
'''

def check_drain_process_not_completed(drain_process):
check_popen_process_not_completed(drain_process)

def check_drain_process_completed(drain_process):
check_popen_process_completed(drain_process)

def get_drain_process_error_log(drain_process):
return get_popen_process_error_log(drain_process)

def get_instance_manager_on_node(node_name, data_engine):
def get_instance_manager_on_node(node_name):
data_engine = BuiltIn().get_variable_value("${DATA_ENGINE}")
pods = get_all_pods_on_node(node_name)
for pod in pods:
labels = pod.metadata.labels
Expand Down
16 changes: 5 additions & 11 deletions e2e/libs/keywords/k8s_keywords.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,12 +7,9 @@
from k8s.k8s import wait_all_pods_evicted
from k8s.k8s import get_all_pods_on_node
from k8s.k8s import check_node_cordoned
from k8s.k8s import force_drain_node_and_wait
from k8s.k8s import check_drain_process_not_completed
from k8s.k8s import force_drain_node_with_timeout
from k8s.k8s import get_instance_manager_on_node
from k8s.k8s import get_drain_process_error_log
from k8s.k8s import check_instance_manager_pdb_not_exist
from k8s.k8s import check_drain_process_completed
from utility.utility import logging
from node import Node

Expand Down Expand Up @@ -76,17 +73,14 @@ def get_all_pods_on_node(self, node_name):
def check_node_cordoned(self, node_name):
check_node_cordoned(node_name)

def force_drain_node_and_wait(self, node_name, duration):
return force_drain_node_and_wait(node_name, duration)
def force_drain_node_with_timeout(self, node_name, duration):
return force_drain_node_with_timeout(node_name, duration)

def check_drain_process_not_completed(self, drain_process):
return check_drain_process_not_completed(drain_process)

def check_drain_process_completed(self, drain_process):
return check_drain_process_completed(drain_process)

def get_instance_manager_on_node(self, node_name, data_engine):
return get_instance_manager_on_node(node_name, data_engine)
def get_instance_manager_on_node(self, node_name):
return get_instance_manager_on_node(node_name)

def get_drain_process_error_log(self, drain_process):
return get_drain_process_error_log(drain_process)
Expand Down
36 changes: 14 additions & 22 deletions e2e/libs/utility/utility.py
Original file line number Diff line number Diff line change
Expand Up @@ -87,6 +87,20 @@ def subprocess_exec_cmd(cmd):
return res


def subprocess_run_cmd(cmd):
completed = None
try:
res = subprocess.check_output(cmd, check=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
logging(f"Executed command {cmd} completed and result= {res}")
log = res.stdout
completed = True
except subprocess.TimeoutExpired as e:
log = e.stderr
logging(f"Executed command {cmd} timeout and log= {log}")
completed = False
return log, completed


def wait_for_cluster_ready():
core_api = client.CoreV1Api()
retry_count, retry_interval = get_retry_count_and_interval()
Expand Down Expand Up @@ -285,25 +299,3 @@ def get_name_suffix(*args):
if arg:
suffix += f"-{arg}"
return suffix


def check_popen_process_not_completed(process):
assert process.poll() is None, f"process {process} terminated which is not expected."


def check_popen_process_completed(process):
retry_count, retry_interval = get_retry_count_and_interval()
for i in range(retry_count):
if process.poll() is not None:
return
time.sleep(retry_interval)
assert process.poll() is not None, f"process {process} not terminated which is not expected."


def get_popen_process_error_log(drain_process):
if drain_process.poll() is None:
drain_process.terminate()

stdout, stderr = drain_process.communicate()
logging(f"{stderr.decode('utf-8')}")
return stderr.decode('utf-8')
2 changes: 1 addition & 1 deletion e2e/requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -6,4 +6,4 @@ kubernetes==27.2.0
requests==2.32.3
boto3==1.35.19
pyyaml==6.0.2
minio==5.0.10
minio==5.0.10
12 changes: 6 additions & 6 deletions e2e/tests/negative/node_drain.robot
Original file line number Diff line number Diff line change
Expand Up @@ -182,22 +182,22 @@ Stopped replicas on deleted nodes should not be counted as healthy replicas when
[Teardown] Cleanup test resources include off nodes
Given Disable node 0 scheduling
And Set setting node-drain-policy to block-if-contains-last-replica
And Given Create volume 0 with size=5Gi numberOfReplicas=2
And Given Create volume 0 with size=5Gi numberOfReplicas=2 dataEngine=${DATA_ENGINE}
And Attach volume 0 to node 1
And Wait for volume 0 healthy
And Write data to volume 0
And Detach volume 0 from attached node
And Wait for volume 0 detached
And Power off node 1

When Force drain node 2 and wait for 90 second dataEngine=${DATA_ENGINE}
When Force drain node 2
And The drain process not completed
And Check instance-manager pod is running on node 2 dataEngine=${DATA_ENGINE}
And Drain logs should contain log=error when evicting pods/\"${instance_manager_name}
And Check volume 0 replica on node 2 exist

Setting Allow Node Drain with the Last Healthy Replica protects the last healthy replica with Pod Disruption Budget (PDB)
[Documentation] Setting Allow Node Drain with the Last Healthy Replica protects the last healthy replica with Pod Disruption Budget (PDB)
[Documentation] Setting Allow Node Drain with the Last Healthy Replica protects the last healthy replica with Pod Disruption Budget (PDB)
... Related Issue:
... - https://github.com/longhorn/longhorn/issues/2237
...
Expand All @@ -217,17 +217,17 @@ Setting Allow Node Drain with the Last Healthy Replica protects the last healthy
[Teardown] Cleanup test resources include off nodes
Given Disable node 0 scheduling
And Set setting node-drain-policy to block-if-contains-last-replica
And Given Create volume 0 with size=5Gi numberOfReplicas=2
And Given Create volume 0 with size=5Gi numberOfReplicas=2 dataEngine=${DATA_ENGINE}
And Attach volume 0 to node 1
And Wait for volume 0 healthy
And Write data to volume 0
And Detach volume 0 from attached node
And Wait for volume 0 detached
And Power off node 1

When Force drain node 2 and wait for 90 second dataEngine=${DATA_ENGINE}
When Force drain node 2
And The drain process not completed
And Check instance-manager pod is running on node 2 dataEngine=${DATA_ENGINE}
And Check instance-manager pod is running on node 2

When Set setting node-drain-policy to always-allow
And The drain process completed
Expand Down

0 comments on commit caef9ab

Please sign in to comment.