diff --git a/ocs_ci/ocs/machine.py b/ocs_ci/ocs/machine.py index 950b81fd9bc..6240f9ab418 100644 --- a/ocs_ci/ocs/machine.py +++ b/ocs_ci/ocs/machine.py @@ -93,12 +93,14 @@ def get_machines(machine_type=constants.WORKER_MACHINE): return machines -def delete_machine(machine_name): +def delete_machine(machine_name, wait=True, timeout=600): """ Deletes a machine Args: machine_name (str): Name of the machine you want to delete + wait (bool): Wait for the machine to be deleted + timeout (int): Time to wait for the machine to be deleted Raises: CommandFailed: In case yaml_file and resource_name wasn't provided @@ -107,7 +109,7 @@ def delete_machine(machine_name): kind="machine", namespace=constants.OPENSHIFT_MACHINE_API_NAMESPACE ) log.info(f"Deleting machine {machine_name}") - machine_obj.delete(resource_name=machine_name) + machine_obj.delete(resource_name=machine_name, wait=wait, timeout=timeout) def get_machine_type(machine_name): @@ -167,7 +169,7 @@ def delete_machine_and_check_state_of_new_spinned_machine(machine_name): machine_type = get_machine_type(machine_name) machine_list = get_machines(machine_type=machine_type) initial_machine_names = [machine.name for machine in machine_list] - delete_machine(machine_name) + delete_machine(machine_name, wait=False) new_machine_list = get_machines(machine_type=machine_type) new_machine = [ machine @@ -181,7 +183,7 @@ def delete_machine_and_check_state_of_new_spinned_machine(machine_name): condition=constants.STATUS_RUNNING, resource_name=new_machine_name, column="PHASE", - timeout=600, + timeout=900, sleep=30, ) log.info(f"{new_machine_name} is in {constants.STATUS_RUNNING} state") diff --git a/ocs_ci/ocs/node.py b/ocs_ci/ocs/node.py index 4b33bbb2d7d..02918fe05ab 100644 --- a/ocs_ci/ocs/node.py +++ b/ocs_ci/ocs/node.py @@ -931,10 +931,18 @@ def delete_and_create_osd_node_ipi(osd_node_name): new_machine_name = machine.delete_machine_and_check_state_of_new_spinned_machine( machine_name ) - machineset_name = machine.get_machineset_from_machine_name(new_machine_name) - log.info("Waiting for new worker node to be in ready state") - machine.wait_for_new_node_to_be_ready(machineset_name) - new_node_name = get_node_from_machine_name(new_machine_name) + if config.ENV_DATA.get("worker_replicas") == 0: + new_node_name = get_node_from_machine_name(new_machine_name) + log.info("Waiting for new worker node to be in ready state") + wait_for_nodes_status( + [new_node_name], constants.STATUS_READY, timeout=600, sleep=20 + ) + else: + machineset_name = machine.get_machineset_from_machine_name(new_machine_name) + log.info("Waiting for new worker node to be in ready state") + machine.wait_for_new_node_to_be_ready(machineset_name) + new_node_name = get_node_from_machine_name(new_machine_name) + if not is_node_labeled(new_node_name): log.info("Adding ocs label to newly created worker node") node_obj = ocp.OCP(kind="node") @@ -943,6 +951,18 @@ def delete_and_create_osd_node_ipi(osd_node_name): ) log.info(f"Successfully labeled {new_node_name} with OCS storage label") + log.info(f"Wait for the old machine {machine_name} to be deleted") + if config.ENV_DATA.get("worker_replicas") == 0: + timeout = 1200 + else: + timeout = 420 + ocp_obj = OCP(kind="machine", namespace=constants.OPENSHIFT_MACHINE_API_NAMESPACE) + ocp_obj.wait_for_delete( + resource_name=machine_name, + timeout=timeout, + sleep=30, + ignore_command_failed_exception=True, + ) return new_node_name diff --git a/ocs_ci/ocs/ocp.py b/ocs_ci/ocs/ocp.py index 70031391615..5c32064c948 100644 --- a/ocs_ci/ocs/ocp.py +++ b/ocs_ci/ocs/ocp.py @@ -892,7 +892,13 @@ def wait_for_resource( return False - def wait_for_delete(self, resource_name="", timeout=60, sleep=3): + def wait_for_delete( + self, + resource_name="", + timeout=60, + sleep=3, + ignore_command_failed_exception=False, + ): """ Wait for a resource to be deleted @@ -901,6 +907,9 @@ def wait_for_delete(self, resource_name="", timeout=60, sleep=3): for (e.g.my-pv1) timeout (int): Time in seconds to wait sleep (int): Sampling time in seconds + ignore_command_failed_exception (bool): If True, it will ignore the CommandFailed Exception + if it differs from the "NotFound" exception and wait until the given timeout. If False, it will + raise the CommandFailed Exception if it differs from the "NotFound" exception. Raises: CommandFailed: If failed to verify the resource deletion @@ -920,6 +929,10 @@ def wait_for_delete(self, resource_name="", timeout=60, sleep=3): if "NotFound" in str(ex): log.info(f"{self.kind} {resource_name} got deleted successfully") return True + elif ignore_command_failed_exception: + log.warning( + f"Failed to get the resource {resource_name} due to the exception: {str(ex)}" + ) else: raise ex