Skip to content

Commit

Permalink
Increase timeout waiting for the machine to delete (#10789)
Browse files Browse the repository at this point in the history
Signed-off-by: Itzhak Kave <ikave@ibm.com>
Co-authored-by: Itzhak Kave <ikave@ibm.com>
  • Loading branch information
yitzhak12 and Itzhak Kave authored Nov 14, 2024
1 parent 26f6240 commit 777d1c7
Show file tree
Hide file tree
Showing 3 changed files with 44 additions and 9 deletions.
10 changes: 6 additions & 4 deletions ocs_ci/ocs/machine.py
Original file line number Diff line number Diff line change
Expand Up @@ -93,12 +93,14 @@ def get_machines(machine_type=constants.WORKER_MACHINE):
return machines


def delete_machine(machine_name):
def delete_machine(machine_name, wait=True, timeout=600):
"""
Deletes a machine
Args:
machine_name (str): Name of the machine you want to delete
wait (bool): Wait for the machine to be deleted
timeout (int): Time to wait for the machine to be deleted
Raises:
CommandFailed: In case yaml_file and resource_name wasn't provided
Expand All @@ -107,7 +109,7 @@ def delete_machine(machine_name):
kind="machine", namespace=constants.OPENSHIFT_MACHINE_API_NAMESPACE
)
log.info(f"Deleting machine {machine_name}")
machine_obj.delete(resource_name=machine_name)
machine_obj.delete(resource_name=machine_name, wait=wait, timeout=timeout)


def get_machine_type(machine_name):
Expand Down Expand Up @@ -167,7 +169,7 @@ def delete_machine_and_check_state_of_new_spinned_machine(machine_name):
machine_type = get_machine_type(machine_name)
machine_list = get_machines(machine_type=machine_type)
initial_machine_names = [machine.name for machine in machine_list]
delete_machine(machine_name)
delete_machine(machine_name, wait=False)
new_machine_list = get_machines(machine_type=machine_type)
new_machine = [
machine
Expand All @@ -181,7 +183,7 @@ def delete_machine_and_check_state_of_new_spinned_machine(machine_name):
condition=constants.STATUS_RUNNING,
resource_name=new_machine_name,
column="PHASE",
timeout=600,
timeout=900,
sleep=30,
)
log.info(f"{new_machine_name} is in {constants.STATUS_RUNNING} state")
Expand Down
28 changes: 24 additions & 4 deletions ocs_ci/ocs/node.py
Original file line number Diff line number Diff line change
Expand Up @@ -977,10 +977,18 @@ def delete_and_create_osd_node_ipi(osd_node_name):
new_machine_name = machine.delete_machine_and_check_state_of_new_spinned_machine(
machine_name
)
machineset_name = machine.get_machineset_from_machine_name(new_machine_name)
log.info("Waiting for new worker node to be in ready state")
machine.wait_for_new_node_to_be_ready(machineset_name)
new_node_name = get_node_from_machine_name(new_machine_name)
if config.ENV_DATA.get("worker_replicas") == 0:
new_node_name = get_node_from_machine_name(new_machine_name)
log.info("Waiting for new worker node to be in ready state")
wait_for_nodes_status(
[new_node_name], constants.STATUS_READY, timeout=600, sleep=20
)
else:
machineset_name = machine.get_machineset_from_machine_name(new_machine_name)
log.info("Waiting for new worker node to be in ready state")
machine.wait_for_new_node_to_be_ready(machineset_name)
new_node_name = get_node_from_machine_name(new_machine_name)

if not is_node_labeled(new_node_name):
log.info("Adding ocs label to newly created worker node")
node_obj = ocp.OCP(kind="node")
Expand All @@ -989,6 +997,18 @@ def delete_and_create_osd_node_ipi(osd_node_name):
)
log.info(f"Successfully labeled {new_node_name} with OCS storage label")

log.info(f"Wait for the old machine {machine_name} to be deleted")
if config.ENV_DATA.get("worker_replicas") == 0:
timeout = 1200
else:
timeout = 420
ocp_obj = OCP(kind="machine", namespace=constants.OPENSHIFT_MACHINE_API_NAMESPACE)
ocp_obj.wait_for_delete(
resource_name=machine_name,
timeout=timeout,
sleep=30,
ignore_command_failed_exception=True,
)
return new_node_name


Expand Down
15 changes: 14 additions & 1 deletion ocs_ci/ocs/ocp.py
Original file line number Diff line number Diff line change
Expand Up @@ -892,7 +892,13 @@ def wait_for_resource(

return False

def wait_for_delete(self, resource_name="", timeout=60, sleep=3):
def wait_for_delete(
self,
resource_name="",
timeout=60,
sleep=3,
ignore_command_failed_exception=False,
):
"""
Wait for a resource to be deleted
Expand All @@ -901,6 +907,9 @@ def wait_for_delete(self, resource_name="", timeout=60, sleep=3):
for (e.g.my-pv1)
timeout (int): Time in seconds to wait
sleep (int): Sampling time in seconds
ignore_command_failed_exception (bool): If True, it will ignore the CommandFailed Exception
if it differs from the "NotFound" exception and wait until the given timeout. If False, it will
raise the CommandFailed Exception if it differs from the "NotFound" exception.
Raises:
CommandFailed: If failed to verify the resource deletion
Expand All @@ -920,6 +929,10 @@ def wait_for_delete(self, resource_name="", timeout=60, sleep=3):
if "NotFound" in str(ex):
log.info(f"{self.kind} {resource_name} got deleted successfully")
return True
elif ignore_command_failed_exception:
log.warning(
f"Failed to get the resource {resource_name} due to the exception: {str(ex)}"
)
else:
raise ex

Expand Down

0 comments on commit 777d1c7

Please sign in to comment.