From 046c45cb4a611c8a4ba21992f1affb6b664dc53c Mon Sep 17 00:00:00 2001
From: Murali Krishnasamy <mukrishn@redhat.com>
Date: Fri, 3 Nov 2023 13:16:45 -0400
Subject: [PATCH] rebased and squashed

---
 libs/platforms/platform.py                    |   5 +-
 libs/platforms/rosa/hypershift/hypershift.py  |   4 +-
 libs/platforms/rosa/rosa.py                   |  41 +-
 libs/platforms/rosa/terraform/files/main.tf   |  20 +-
 libs/platforms/rosa/terraform/files/output.tf |  15 -
 .../rosa/terraform/files/variables.tf         |  15 +
 libs/platforms/rosa/terraform/terraform.py    | 401 +++++++++++++-----
 libs/utils.py                                 |  11 +-
 rosa-burner.py                                |   2 +-
 9 files changed, 354 insertions(+), 160 deletions(-)
 delete mode 100644 libs/platforms/rosa/terraform/files/output.tf

diff --git a/libs/platforms/platform.py b/libs/platforms/platform.py
index c33786c..0a71ba0 100644
--- a/libs/platforms/platform.py
+++ b/libs/platforms/platform.py
@@ -21,6 +21,9 @@ def __init__(self, arguments, logging, utils, es):
 
         self.environment["platform"] = arguments["platform"]
 
+        if arguments["subplatform"]:
+            self.environment["subplatform"] = arguments["subplatform"]
+
         self.environment["ocm_url"] = arguments["ocm_url"]
         self.environment["ocm_token"] = arguments["ocm_token"]
 
@@ -184,7 +187,7 @@ def create_cluster(self, platform, cluster_name):
     def delete_cluster(self, platform, cluster_name):
         pass
 
-    def platform_cleanup(self):
+    def platform_cleanup(self, platform=""):
         pass
 
     def watcher(self):
diff --git a/libs/platforms/rosa/hypershift/hypershift.py b/libs/platforms/rosa/hypershift/hypershift.py
index b21b26f..0b910ee 100644
--- a/libs/platforms/rosa/hypershift/hypershift.py
+++ b/libs/platforms/rosa/hypershift/hypershift.py
@@ -114,8 +114,8 @@ def _get_mc(self, cluster_id):
         )
         return json.loads(resp_out).get("management_cluster", None) if resp_code == 0 else None
 
-    def platform_cleanup(self):
-        super().platform_cleanup()
+    def platform_cleanup(self, platform=""):
+        super().platform_cleanup(platform)
         self.logging.info("Cleaning resources")
         # Delete Operator Roles
         self._delete_operator_roles() if self.environment[
diff --git a/libs/platforms/rosa/rosa.py b/libs/platforms/rosa/rosa.py
index d534a9f..0526cd1 100644
--- a/libs/platforms/rosa/rosa.py
+++ b/libs/platforms/rosa/rosa.py
@@ -199,8 +199,8 @@ def _delete_operator_roles(self):
             )
             return True
 
-    def platform_cleanup(self):
-        super().platform_cleanup()
+    def platform_cleanup(self, platform=""):
+        super().platform_cleanup(platform)
 
     def create_cluster(self, platform, cluster_name):
         super().create_cluster(platform, cluster_name)
@@ -240,25 +240,28 @@ def _preflight_wait(self, cluster_id, cluster_name):
                 self.logging.error(f"Exiting preflight times capturing on {cluster_name} cluster after capturing Ctrl-C")
                 return 0
             self.logging.info(f"Getting status for cluster {cluster_name}")
-            status_code, status_out, status_err = self.utils.subprocess_exec("rosa describe cluster -c " + cluster_id + " -o json", extra_params={"universal_newlines": True})
+            status_code, status_out, status_err = self.utils.subprocess_exec("rosa describe cluster -c " + cluster_id + " -o json", extra_params={"universal_newlines": True}, log_output=False)
             current_time = int(datetime.datetime.utcnow().timestamp())
-            try:
-                current_status = json.loads(status_out)["state"]
-            except Exception as err:
-                self.logging.error(f"Cannot load metadata for cluster {cluster_name}")
-                self.logging.error(err)
-                continue
-            if current_status != previous_status and previous_status != "":
-                return_data[previous_status] = current_time - start_time
-                start_time = current_time
-                self.logging.info(f"Cluster {cluster_name} moved from {previous_status} status to {current_status} status after {return_data[previous_status]} seconds")
-                if current_status == "installing":
-                    self.logging.info(f"Cluster {cluster_name} is on installing status. Exiting preflights waiting...")
-                    return return_data
+            if status_code != 0:
+                self.logging.debug("Cluster data not available yet, retrying..")
             else:
-                self.logging.debug(f"Cluster {cluster_name} on {current_status} status. Waiting 2 seconds until {datetime.datetime.fromtimestamp(start_time + 60 * 60)} for next check")
-                time.sleep(1)
-            previous_status = current_status
+                try:
+                    current_status = json.loads(status_out)["state"]
+                except Exception as err:
+                    self.logging.error(f"Cannot load metadata for cluster {cluster_name}")
+                    self.logging.error(err)
+                    continue
+                if current_status != previous_status and previous_status != "":
+                    return_data[previous_status] = current_time - start_time
+                    start_time = current_time
+                    self.logging.info(f"Cluster {cluster_name} moved from {previous_status} status to {current_status} status after {return_data[previous_status]} seconds")
+                    if current_status == "installing":
+                        self.logging.info(f"Cluster {cluster_name} is on installing status. Exiting preflights waiting...")
+                        return return_data
+                else:
+                    self.logging.debug(f"Cluster {cluster_name} on {current_status} status. Waiting 2 seconds until {datetime.datetime.fromtimestamp(start_time + 60 * 60)} for next check")
+                    time.sleep(1)
+                previous_status = current_status
         self.logging.error(f"Cluster {cluster_name} on {current_status} status (not installing) after 60 minutes. Exiting preflight waiting...")
         return return_data
 
diff --git a/libs/platforms/rosa/terraform/files/main.tf b/libs/platforms/rosa/terraform/files/main.tf
index 6f865ce..9e99fa7 100644
--- a/libs/platforms/rosa/terraform/files/main.tf
+++ b/libs/platforms/rosa/terraform/files/main.tf
@@ -31,18 +31,6 @@ provider "rhcs" {
   url   = var.url
 }
 
-# Create managed OIDC config
-module "oidc_config" {
-  token                = var.token
-  url                  = var.url
-  source               = "./oidc_provider"
-  managed              = true
-  operator_role_prefix = var.operator_role_prefix
-  account_role_prefix  = var.account_role_prefix
-  tags                 = var.tags
-  path                 = var.path
-}
-
 locals {
   path = coalesce(var.path, "/")
   sts_roles = {
@@ -53,7 +41,7 @@ locals {
       worker_role_arn = "arn:aws:iam::${data.aws_caller_identity.current.account_id}:role${local.path}${var.account_role_prefix}-Worker-Role"
     },
     operator_role_prefix = var.operator_role_prefix,
-    oidc_config_id       = module.oidc_config.id
+    oidc_config_id       = var.oidc_config_id
   }
 }
 
@@ -65,7 +53,8 @@ locals {
 }
 
 resource "rhcs_cluster_rosa_classic" "rosa_sts_cluster" {
-  name                 = var.cluster_name
+  count                = var.clusters_per_apply
+  name                 = "${var.cluster_name}-${format("%04d", var.loop_factor + count.index + 1)}"
   cloud_region         = var.cloud_region
   aws_account_id       = data.aws_caller_identity.current.account_id
   availability_zones   = var.availability_zones
@@ -79,5 +68,6 @@ resource "rhcs_cluster_rosa_classic" "rosa_sts_cluster" {
     rosa_creator_arn = data.aws_caller_identity.current.arn
   }
   sts                      = local.sts_roles
-  wait_for_create_complete = true
+  wait_for_create_complete = false
+  disable_waiting_in_destroy = true
 }
diff --git a/libs/platforms/rosa/terraform/files/output.tf b/libs/platforms/rosa/terraform/files/output.tf
deleted file mode 100644
index 49ede51..0000000
--- a/libs/platforms/rosa/terraform/files/output.tf
+++ /dev/null
@@ -1,15 +0,0 @@
-output "oidc_config_id" {
-  value = module.oidc_config.id
-}
-
-output "oidc_endpoint_url" {
-  value = module.oidc_config.oidc_endpoint_url
-}
-
-output "thumbprint" {
-  value = module.oidc_config.thumbprint
-}
-
-output "cluster_id" {
-  value = rhcs_cluster_rosa_classic.rosa_sts_cluster.id
-}
diff --git a/libs/platforms/rosa/terraform/files/variables.tf b/libs/platforms/rosa/terraform/files/variables.tf
index 8116c1c..ee448b3 100644
--- a/libs/platforms/rosa/terraform/files/variables.tf
+++ b/libs/platforms/rosa/terraform/files/variables.tf
@@ -23,6 +23,21 @@ variable "cluster_name" {
   default = "rbur-000-0001"
 }
 
+variable "clusters_per_apply" {
+  type    = number
+  default = null
+}
+
+variable "loop_factor" {
+  type    = number
+  default = null
+}
+
+variable "oidc_config_id" {
+  type    = string
+  default = null
+}
+
 variable "cloud_region" {
   type    = string
   default = "us-east-2"
diff --git a/libs/platforms/rosa/terraform/terraform.py b/libs/platforms/rosa/terraform/terraform.py
index 6332316..e008a19 100644
--- a/libs/platforms/rosa/terraform/terraform.py
+++ b/libs/platforms/rosa/terraform/terraform.py
@@ -8,6 +8,8 @@
 # import math
 import shutil
 import configparser
+import concurrent.futures
+
 
 from libs.platforms.rosa.rosa import Rosa
 from libs.platforms.rosa.rosa import RosaArguments
@@ -22,65 +24,255 @@ def __init__(self, arguments, logging, utils, es):
         self.logging.info("Parameter --workers will be ignored on terraform subplatform. OCM Terraform module is fixed to 2 workers")
         self.environment["workers"] = "2"
 
-        # if self.environment['cluster_count'] % arguments['clusters_per_apply'] == 0:
-        #     self.logging.debug(str(self.environment['cluster_count'] % arguments['clusters_per_apply']))
-        #     self.logging.info(str(arguments['clusters_per_apply']) + " clusters will be installed on each Terraform Apply")
-        #     self.environment['clusters_per_apply'] = arguments['clusters_per_apply']
-        #     self.environment['cluster_count'] = self.environment['cluster_count'] / self.environment['clusters_per_apply']
-        # else:
-        #     self.logging.debug(str(self.environment['cluster_count'] % arguments['clusters_per_apply']))
-        #     self.logging.error("--cluster-count (" + str(self.environment['cluster_count']) + ") parameter must be divisible by --clusters-per-apply (" + str(arguments['clusters_per_apply']) + ")")
-        #     sys.exit("Exiting...")
+        if self.environment['cluster_count'] % arguments['clusters_per_apply'] == 0:
+            self.logging.debug(str(self.environment['cluster_count'] % arguments['clusters_per_apply']))
+            self.logging.info(str(arguments['clusters_per_apply']) + " clusters will be installed on each Terraform Apply")
+            self.environment['clusters_per_apply'] = arguments['clusters_per_apply']
+            self.environment['clusters_per_apply_count'] = self.environment['cluster_count'] / self.environment['clusters_per_apply']
+        else:
+            self.logging.debug(str(self.environment['cluster_count'] % arguments['clusters_per_apply']))
+            self.logging.error("--cluster-count (" + str(self.environment['cluster_count']) + ") parameter must be divisible by --clusters-per-apply (" + str(arguments['clusters_per_apply']) + ")")
+            sys.exit("Exiting...")
 
     def initialize(self):
         super().initialize()
 
-        shutil.copytree(sys.path[0] + "/libs/platforms/rosa/terraform/files", self.environment['path'] + "/terraform")
+        if not os.path.exists(self.environment['path'] + "/terraform"):
+            shutil.copytree(sys.path[0] + "/libs/platforms/rosa/terraform/files", self.environment['path'] + "/terraform")
 
         self.logging.info("Initializing Terraform with: terraform init")
         terraform_code, terraform_out, terraform_err = self.utils.subprocess_exec("terraform init", self.environment["path"] + "/terraform/terraform-init.log", {"cwd": self.environment["path"] + "/terraform"})
         if terraform_code != 0:
-            self.logging.error(f"Failed to initialize terraform. Check {self.environment['path']}/terraform/init.log for more information")
+            self.logging.error(f"Failed to initialize terraform. Check {self.environment['path']}/terraform/terraform-init.log for more information")
             sys.exit("Exiting...")
 
-    def platform_cleanup(self):
-        super().platform_cleanup()
+        terraform_code, terraform_out, terraform_err = self.utils.subprocess_exec("terraform init", self.environment["path"] + "/terraform/oidc_provider/terraform-init.log", {"cwd": self.environment["path"] + "/terraform/oidc_provider"})
+        if terraform_code != 0:
+            self.logging.error(f"Failed to initialize terraform. Check {self.environment['path']}/terraform/oidc_provider/terraform-init.log for more information")
+            sys.exit("Exiting...")
 
-    def delete_cluster(self, platform, cluster_name):
-        super().delete_cluster(platform, cluster_name)
+    def platform_cleanup(self, platform=""):
+        super().platform_cleanup(platform)
+        self.destroy_tf_template(platform, tf_module="oidc")
 
-        myenv = os.environ.copy()
-        myenv["TF_VAR_token"] = self.environment["ocm_token"]
-        myenv["TF_VAR_cloud_region"] = self.environment['aws']['region']
-        myenv["TF_VAR_url"] = self.environment["ocm_url"]
-        myenv["TF_VAR_account_role_prefix"] = 'ManagedOpenShift'
-        myenv["TF_VAR_cluster_name"] = cluster_name
-        myenv["TF_VAR_operator_role_prefix"] = cluster_name
-#        myenv["TF_VAR_clusters_per_apply"] = str(self.environment['clusters_per_apply'])
+    def _oidc_tf_template(self, action, tf_path, myenv):
+        code, out, err = self.utils.subprocess_exec("terraform " + action + " --auto-approve -state=" + tf_path + "/terraform_oidc.tfstate ", tf_path + "/terraform_oidc_" + action.replace(" ", "") + ".log", {"cwd": self.environment['path'] + "/terraform/oidc_provider", 'preexec_fn': self.utils.disable_signals, "env": myenv})
+        return code, out, err
+
+    # creates templates based on the clusters_per_apply
+    # and apply them at given interval without wait for it to complete
+    def apply_tf_template(self, platform):
+        loop_counter = 0
+        while loop_counter < platform.environment["clusters_per_apply_count"]:
+            tf_counter = 0
+            self.logging.debug(platform.environment["clusters"])
+            if self.utils.force_terminate:
+                loop_counter += 1
+            else:
+                cluster_workers = int(platform.environment["workers"])
+
+                tf_name = platform.environment["cluster_name_seed"]
+
+                try:
+                    tf_path = platform.environment["path"] + "/" + "TF_" + tf_name + "-" + str(loop_counter * self.environment['clusters_per_apply']).zfill(4)
+                    os.mkdir(tf_path)
+
+                    myenv = os.environ.copy()
+                    myenv["TF_VAR_token"] = self.environment["ocm_token"]
+                    myenv["TF_VAR_cloud_region"] = self.environment['aws']['region']
+                    myenv["TF_VAR_url"] = self.environment["ocm_url"]
+                    myenv["TF_VAR_account_role_prefix"] = 'ManagedOpenShift'
+                    myenv["TF_VAR_cluster_name"] = tf_name
+                    myenv["TF_VAR_replicas"] = str(cluster_workers)
+                    myenv["TF_VAR_operator_role_prefix"] = tf_name + "-" + str(loop_counter)
+                    myenv["TF_VAR_clusters_per_apply"] = str(self.environment['clusters_per_apply'])
+                    myenv["TF_VAR_loop_factor"] = str((loop_counter * self.environment['clusters_per_apply']))
+
+                    # additional env for oidc_provider template
+                    myenv["TF_VAR_managed"] = "true"
+
+                    self.logging.info(f"Applying OIDC template to create oidc_provider for cluster seed {tf_name} looping {loop_counter + 1}")
+                    terraform_oidc_apply_code, terraform_oidc_apply_out, terraform_oidc_apply_err = self._oidc_tf_template("apply", tf_path, myenv)
+                    if terraform_oidc_apply_code != 0:
+                        self.logging.error(f"OIDC with seed {tf_name} looping {loop_counter + 1} terraform apply failed")
+                        self.logging.debug(terraform_oidc_apply_out)
+                        return 1
+                    else:
+                        self.logging.info(f"Applied OIDC template successfully for cluster seed {tf_name} looping {loop_counter + 1}")
+                        with open(tf_path + "/terraform_oidc.tfstate", "r") as terraform_state:
+                            json_output = json.load(terraform_state)
+                        oidc_id = json_output["outputs"]["id"]["value"]
+
+                    # Passing new OIDC ID to the cluster template
+                    myenv["TF_VAR_oidc_config_id"] = oidc_id
+
+                    self.logging.info(f"Applying template to create {platform.environment['clusters_per_apply']} with cluster seed {tf_name} looping {loop_counter + 1}")
+                    terraform_plan_code, terraform_plan_out, terraform_plan_err = self.utils.subprocess_exec("terraform plan -out " + tf_path + "/" + tf_name + ".tfplan", tf_path + "/terraform_plan.log", {"cwd": self.environment['path'] + "/terraform", "env": myenv})
+                    if terraform_plan_code != 0:
+                        self.logging.error(f"Clusters with seed {tf_name} looping {loop_counter + 1} terraform plan failed")
+                        self.logging.debug(terraform_plan_out)
+                        return 1
+                    else:
+                        self.logging.info(f"Trying to install clusters with TF template {tf_name} looping {loop_counter + 1} with {cluster_workers} workers up to 5 times using terraform provider")
+                        trying = 0
+                        while trying <= 5:
+                            if self.utils.force_terminate:
+                                self.logging.error(f"Exiting clusters creation for {tf_name} looping {loop_counter + 1} after capturing Ctrl-C")
+                                return 0
+                            trying += 1
+                            cluster_apply_time = int(datetime.datetime.utcnow().timestamp())
+                            terraform_apply_code, terraform_apply_out, terraform_apply_err = self.utils.subprocess_exec("terraform apply -state=" + tf_path + "/terraform.tfstate " + tf_path + "/" + tf_name + ".tfplan", tf_path + "/terraform_apply-" + str(trying) + ".log", {"cwd": self.environment['path'] + "/terraform", 'preexec_fn': self.utils.disable_signals, "env": myenv})
+                            if terraform_apply_code != 0:
+                                self.logging.debug(terraform_apply_out)
+                                self.logging.debug(terraform_apply_err)
+                                if trying <= 5:
+                                    self.logging.warning(f"Try: {trying}/5. Clusters with seed {tf_name} looping {loop_counter + 1} installation failed, retrying in 15 seconds")
+                                    time.sleep(15)
+                                else:
+                                    self.logging.error(f"Clusters with seed {tf_name} looping {loop_counter + 1} installation failed after 5 retries")
+                                    self.logging.debug(terraform_apply_out)
+                                    self.logging.debug(terraform_apply_err)
+                                    return 1
+                            else:
+                                break
+
+                except Exception as err:
+                    self.logging.error(f"Failed to apply with cluster seed {tf_name} looping {loop_counter + 1}")
+                    self.logging.error(err)
+                    return 1
+                while tf_counter < platform.environment["clusters_per_apply"]:
+                    cluster_name = platform.environment["cluster_name_seed"] + "-" + str((loop_counter * self.environment['clusters_per_apply']) + (tf_counter + 1)).zfill(4)
+                    platform.environment["clusters"][cluster_name]["cluster_apply_time"] = cluster_apply_time
+                    tf_counter += 1
+                if platform.environment["delay_between_batch"] is None:
+                    time.sleep(1)
+                else:
+                    time.sleep(platform.environment["delay_between_batch"])                
+            loop_counter += 1
+        return 0
+
+    # uses created templates based on the clusters_per_apply
+    # and destroy them at given interval without wait for it to complete
+    def destroy_tf_template(self, platform, tf_module="cluster"):
+        loop_counter = 0
+        while loop_counter < platform.environment["clusters_per_apply_count"]:
+            self.logging.debug(platform.environment["clusters"])
+            if self.utils.force_terminate:
+                loop_counter += 1
+            else:
+                cluster_workers = int(platform.environment["workers"])
+
+                tf_name = platform.environment["cluster_name_seed"]
+
+                try:
+
+                    tf_path = platform.environment["path"] + "/" + "TF_" + tf_name + "-" + str(loop_counter * self.environment['clusters_per_apply']).zfill(4)
+                    if not os.path.exists(tf_path):
+                        os.mkdir(tf_path)
 
+                    myenv = os.environ.copy()
+                    myenv["TF_VAR_token"] = self.environment["ocm_token"]
+                    myenv["TF_VAR_cloud_region"] = self.environment['aws']['region']
+                    myenv["TF_VAR_url"] = self.environment["ocm_url"]
+                    myenv["TF_VAR_account_role_prefix"] = 'ManagedOpenShift'
+                    myenv["TF_VAR_cluster_name"] = tf_name
+                    myenv["TF_VAR_replicas"] = str(cluster_workers)
+                    myenv["TF_VAR_operator_role_prefix"] = tf_name + "-" + str(loop_counter)
+                    myenv["TF_VAR_clusters_per_apply"] = str(self.environment['clusters_per_apply'])
+                    myenv["TF_VAR_loop_factor"] = str((loop_counter * self.environment['clusters_per_apply']))
+
+                    if tf_module == "oidc":
+                        # additional env for oidc_provider template
+                        myenv["TF_VAR_managed"] = "true"
+
+                        self.logging.info(f"Destroying OIDC template to delete oidc_provider for cluster seed {tf_name} looping {loop_counter + 1}")
+                        terraform_oidc_destroy_code, terraform_oidc_destroy_out, terraform_oidc_destroy_err = self._oidc_tf_template("apply -destroy", tf_path, myenv)
+                        if terraform_oidc_destroy_code != 0:
+                            self.logging.error(f"OIDC with seed {tf_name} looping {loop_counter + 1} terraform destroy failed")
+                            self.logging.debug(terraform_oidc_destroy_out)
+                            return 1
+
+                    else:
+                        self.logging.info(f"Deleting Clusters with seed {tf_name} looping {loop_counter + 1} on Rosa Platform using terraform")
+                        trying = 0
+                        while trying <= 5:
+                            if self.utils.force_terminate:
+                                self.logging.error(f"Exiting clusters deletion for {tf_name} looping {loop_counter + 1} after capturing Ctrl-C")
+                                return 0
+                            trying += 1
+                            cleanup_code, cleanup_out, cleanup_err = self.utils.subprocess_exec("terraform apply -destroy -state=" + tf_path + "/terraform.tfstate --auto-approve", tf_path + "/cleanup-" + str(trying) + ".log", {"cwd": self.environment['path'] + "/terraform", 'preexec_fn': self.utils.disable_signals, "env": myenv})
+                            if cleanup_code != 0:
+                                self.logging.debug(f"Clusters Cleanup with seed {tf_name} looping {loop_counter + 1} is failed")
+                                self.logging.debug(cleanup_out)
+                                self.logging.debug(cleanup_err)
+                                if trying <= 5:
+                                    self.logging.warning(f"Try: {trying}/5. Clusters with seed {tf_name} looping {loop_counter + 1} deletion failed, retrying in 15 seconds")
+                                    time.sleep(15)
+                                else:
+                                    self.logging.error(f"Clusters with seed {tf_name} looping {loop_counter + 1} deletion failed after 5 retries")
+                                    self.logging.debug(cleanup_out)
+                                    self.logging.debug(cleanup_err)
+                                    return 1
+                            else:
+                                break
+
+                except Exception as err:
+                    self.logging.error(f"Failed to apply with cluster seed {tf_name} looping {loop_counter + 1}")
+                    self.logging.error(err)
+                    return 1
+                if platform.environment["delay_between_cleanup"] is None:
+                    time.sleep(1)
+                else:
+                    time.sleep(platform.environment["delay_between_cleanup"])
+            loop_counter += 1
+        return 0
+
+    # Cluster deletion will be initiated by destroy_tf_template
+    # this function waits and verifies the deletions
+    # No actual delete logic in this function unlike other subplatform
+    def delete_cluster(self, platform, cluster_name):
+        super().delete_cluster(platform, cluster_name)
+        retry_loop = 0
         cluster_info = platform.environment["clusters"][cluster_name]
-        cluster_start_time = int(datetime.datetime.utcnow().timestamp())
         cluster_info["uuid"] = self.environment["uuid"]
         cluster_info["install_method"] = "terraform"
-        self.logging.info(f"Deleting cluster {cluster_name} on Rosa Platform using terraform")
-        cleanup_code, cleanup_out, cleanup_err = self.utils.subprocess_exec("terraform apply -destroy -state=" + cluster_info['path'] + "/terraform.tfstate --auto-approve", cluster_info["path"] + "/cleanup.log", {"cwd": self.environment['path'] + "/terraform", 'preexec_fn': self.utils.disable_signals, "env": myenv})
-        cluster_delete_end_time = int(datetime.datetime.utcnow().timestamp())
-        if cleanup_code == 0:
-            self.logging.debug(
-                f"Confirm cluster {cluster_name} deleted by attempting to describe the cluster. This should fail if the cluster is removed."
-            )
-            check_code, check_out, check_err = self.utils.subprocess_exec(
-                "rosa describe cluster -c " + cluster_name, log_output=False
-            )
-            if check_code != 0:
-                cluster_info["status"] = "deleted"
+        cluster_info["per_template_count"] = platform.environment['clusters_per_apply']
+        cluster_info["tf_count"] = platform.environment['clusters_per_apply_count']
+        cluster_info["total_count"] = platform.environment['cluster_count']
+        self.logging.info(f"Checking uninstall log for cluster {cluster_name}")
+
+        while retry_loop <= 600:  # 1hr timeout
+            retry_loop += 1
+            cluster_delete_start_time = int(datetime.datetime.utcnow().timestamp())
+            watch_code, watch_out, watch_err = self.utils.subprocess_exec("rosa logs uninstall -c " + cluster_name + " --watch", cluster_info["path"] + "/cleanup.log", {'preexec_fn': self.utils.disable_signals})
+            if watch_code != 0:
+                if retry_loop <= 600:
+                    self.logging.debug(f"ROSA cluster uninstall log for {cluster_name} is not available yet, retrying..")
+                    self.logging.debug(watch_out)
+                    time.sleep(6)
+                else:
+                    cluster_info['status'] = "not deleted"
+                    self.logging.debug(watch_out)
+                    self.logging.error(watch_err)
+                    return 1
             else:
-                cluster_info["status"] = "not deleted"
+                break
+
+        cluster_delete_end_time = int(datetime.datetime.utcnow().timestamp())
+        self.logging.debug(
+            f"Confirm cluster {cluster_name} deleted by attempting to describe the cluster. This should fail if the cluster is removed."
+        )
+        check_code, check_out, check_err = self.utils.subprocess_exec(
+            "rosa describe cluster -c " + cluster_name, log_output=False
+        )
+        if check_code != 0:
+            cluster_info["status"] = "deleted"
         else:
             cluster_info["status"] = "not deleted"
+
         cluster_end_time = int(datetime.datetime.utcnow().timestamp())
-        cluster_info["destroy_duration"] = cluster_delete_end_time - cluster_start_time
-        cluster_info["destroy_all_duration"] = cluster_end_time - cluster_start_time
+        cluster_info["destroy_duration"] = cluster_delete_end_time - cluster_delete_start_time
+        cluster_info["destroy_all_duration"] = cluster_end_time - cluster_delete_start_time
         try:
             with open(cluster_info['path'] + "/metadata_destroy.json", "w") as metadata_file:
                 json.dump(cluster_info, metadata_file)
@@ -116,85 +308,82 @@ def get_workers_ready(self, kubeconfig, cluster_name):
         ready_nodes = status_list["True"] if "True" in status_list else 0
         return ready_nodes
 
+    # Cluster creation will be initiated by apply_tf_template
+    # this function waits and verifies the creation
+    # No actual create logic in this function unlike other subplatform
     def create_cluster(self, platform, cluster_name):
         super().create_cluster(platform, cluster_name)
+        retry_loop = 0
         cluster_info = platform.environment["clusters"][cluster_name]
         cluster_info["uuid"] = self.environment["uuid"]
         cluster_info["install_method"] = "terraform"
+        cluster_info["per_template_count"] = platform.environment['clusters_per_apply']
+        cluster_info["tf_count"] = platform.environment['clusters_per_apply_count']
+        cluster_info["total_count"] = platform.environment['cluster_count']
         self.logging.info(f"Creating cluster {cluster_info['index']} on ROSA with name {cluster_name} and {cluster_info['workers']} workers")
         cluster_info["path"] = platform.environment["path"] + "/" + cluster_name
         os.mkdir(cluster_info["path"])
-        self.logging.debug("Attempting cluster installation")
         self.logging.debug("Output directory set to %s" % cluster_info["path"])
 
-        myenv = os.environ.copy()
-        myenv["TF_VAR_token"] = self.environment["ocm_token"]
-        myenv["TF_VAR_cloud_region"] = self.environment['aws']['region']
-        myenv["TF_VAR_url"] = self.environment["ocm_url"]
-        myenv["TF_VAR_account_role_prefix"] = 'ManagedOpenShift'
-        myenv["TF_VAR_cluster_name"] = cluster_name
-        myenv["TF_VAR_operator_role_prefix"] = cluster_name
-#        myenv["TF_VAR_clusters_per_apply"] = str(self.environment['clusters_per_apply'])
-
-        terraform_plan_code, terraform_plan_out, terraform_plan_err = self.utils.subprocess_exec("terraform plan -out " + cluster_info['path'] + "/" + cluster_name + ".tfplan", cluster_info["path"] + "/terraform_plan.log", {"cwd": self.environment['path'] + "/terraform", "env": myenv})
-        if terraform_plan_code != 0:
-            cluster_end_time = int(datetime.datetime.utcnow().timestamp())
-            cluster_info["status"] = "Not Installed"
-            self.logging.error(f"Cluster {cluster_name} terraform plan failed")
-            self.logging.debug(terraform_plan_out)
-            return 1
-        else:
-            self.logging.info(f"Trying to install cluster {cluster_name} with {cluster_info['workers']} workers up to 5 times using terraform provider")
-            trying = 0
-            while trying <= 5:
-                cluster_start_time = int(datetime.datetime.utcnow().timestamp())
-                if self.utils.force_terminate:
-                    self.logging.error(f"Exiting cluster creation for {cluster_name} after capturing Ctrl-C")
-                    return 0
-                trying += 1
-                terraform_apply_code, terraform_apply_out, terraform_apply_err = self.utils.subprocess_exec("terraform apply -state=" + cluster_info['path'] + "/terraform.tfstate " + cluster_info['path'] + "/" + cluster_name + ".tfplan", cluster_info["path"] + "/terraform_apply.log", {"cwd": self.environment['path'] + "/terraform", 'preexec_fn': self.utils.disable_signals, "env": myenv})
-                if terraform_apply_code != 0:
-                    cluster_info["install_try"] = trying
-                    self.logging.debug(terraform_apply_out)
-                    self.logging.debug(terraform_apply_err)
-                    if trying <= 5:
-                        self.logging.warning(f"Try: {trying}/5. Cluster {cluster_name} installation failed, retrying in 15 seconds")
-                        time.sleep(15)
-                    else:
-                        cluster_end_time = int(datetime.datetime.utcnow().timestamp())
-                        cluster_info["status"] = "Not Installed"
-                        self.logging.error(f"Cluster {cluster_name} installation failed after 5 retries")
-                        self.logging.debug(terraform_apply_out)
-                        self.logging.debug(terraform_apply_err)
-                        return 1
+        while retry_loop <= 60:  # 10 min timeout
+            retry_loop += 1
+            cluster_start_time = int(datetime.datetime.utcnow().timestamp())
+            status_code, status_out, status_err = self.utils.subprocess_exec("rosa describe cluster -c " + cluster_name + " -o json", extra_params={"universal_newlines": True}, log_output=False)
+            if status_code != 0:
+                if retry_loop <= 60:
+                    self.logging.debug(f"ROSA cluster {cluster_name} is not available yet, retrying..")
+                    self.logging.debug(status_out)
+                    time.sleep(5)
                 else:
-                    cluster_end_time = int(datetime.datetime.utcnow().timestamp())
-                    break
-
-        cluster_info['status'] = "installed"
-        self.logging.info(f"Cluster {cluster_name} installation finished on the {trying} try")
-        cluster_info["metadata"] = self.get_metadata(cluster_name)
-        cluster_info["install_try"] = trying
-        cluster_info["install_duration"] = cluster_end_time - cluster_start_time
-        access_timers = self.get_cluster_admin_access(cluster_name, cluster_info["path"])
-        cluster_info["kubeconfig"] = access_timers.get("kubeconfig", None)
-        cluster_info["cluster_admin_create"] = access_timers.get("cluster_admin_create", None)
-        cluster_info["cluster_admin_login"] = access_timers.get("cluster_admin_login", None)
-        cluster_info["cluster_oc_adm"] = access_timers.get("cluster_oc_adm", None)
-        if not cluster_info["kubeconfig"]:
-            self.logging.error(f"Failed to download kubeconfig file for cluster {cluster_name}. Disabling wait for workers and workload execution")
-            cluster_info["workers_wait_time"] = None
-            cluster_info["status"] = "Ready. Not Access"
-            return 1
-        if cluster_info["workers_wait_time"]:
-            workers_ready = self._wait_for_workers(cluster_info["kubeconfig"], cluster_info["workers"], cluster_info["workers_wait_time"], cluster_name, "workers")
-            if workers_ready[1] == cluster_info["workers"]:
-                cluster_info["workers_ready"] = workers_ready[2] - cluster_start_time
+                    cluster_info['status'] = "not ready"
+                    self.logging.debug(status_out)
+                    self.logging.error(status_err)
+                    return 1
             else:
-                cluster_info['workers_ready'] = None
-                cluster_info['status'] = "Ready, missing workers"
+                cluster_listed_time = int(datetime.datetime.utcnow().timestamp())
+                preflight_ch = self._preflight_wait(cluster_name, cluster_name)
+                cluster_info["preflight_checks"] = preflight_ch
+                break
+
+        watch_code, watch_out, watch_err = self.utils.subprocess_exec("rosa logs install -c " + cluster_name + " --watch", cluster_info["path"] + "/installation.log", {'preexec_fn': self.utils.disable_signals})
+        status_code, status_out, status_err = self.utils.subprocess_exec("rosa describe cluster -c " + cluster_name + " -o json", extra_params={"universal_newlines": True})
+        current_status = json.loads(status_out)["state"]
+        if watch_code != 0 or current_status != "ready":
+            cluster_info['status'] = "not installed"
+            return 1
+        else:
+            cluster_info['status'] = "installed"
+            cluster_end_time = int(datetime.datetime.utcnow().timestamp())
+            # Getting againg metadata to update the cluster status
+            cluster_info["metadata"] = self.get_metadata(cluster_name)
+            cluster_info["install_duration"] = cluster_end_time - cluster_start_time
+            access_timers = self.get_cluster_admin_access(cluster_name, cluster_info["path"])
+            cluster_info["kubeconfig"] = access_timers.get("kubeconfig", None)
+            cluster_info["cluster_admin_create"] = access_timers.get("cluster_admin_create", None)
+            cluster_info["cluster_admin_login"] = access_timers.get("cluster_admin_login", None)
+            cluster_info["cluster_oc_adm"] = access_timers.get("cluster_oc_adm", None)
+            if not cluster_info["kubeconfig"]:
+                self.logging.error(f"Failed to download kubeconfig file for cluster {cluster_name}. Disabling wait for workers and workload execution")
+                cluster_info["workers_wait_time"] = None
+                cluster_info["status"] = "Ready. Not Access"
                 return 1
-        cluster_info['status'] = "ready"
+            if cluster_info["workers_wait_time"]:
+                with concurrent.futures.ThreadPoolExecutor() as wait_executor:
+                    futures = [wait_executor.submit(self._wait_for_workers, cluster_info["kubeconfig"], cluster_info["workers"], cluster_info["workers_wait_time"], cluster_name, "workers")]
+                    futures.append(wait_executor.submit(self._wait_for_workers, cluster_info["kubeconfig"], platform.environment["extra_machinepool"]["replicas"], cluster_info["workers_wait_time"], cluster_name, platform.environment["extra_machinepool"]["name"])) if "extra_machinepool" in platform.environment else None
+                    for future in concurrent.futures.as_completed(futures):
+                        result = future.result()
+                        if result[0] == "workers":
+                            default_pool_workers = int(result[1])
+                            if default_pool_workers == cluster_info["workers"]:
+                                cluster_info["workers_ready"] = result[2] - cluster_start_time
+                            else:
+                                cluster_info['workers_ready'] = None
+                                cluster_info['status'] = "Ready, missing workers"
+                                return 1
+            cluster_info['status'] = "ready"
+            cluster_apply_time = int(platform.environment["clusters"][cluster_name]["cluster_apply_time"])
+            cluster_info["apply_duration"] = cluster_listed_time - cluster_apply_time            
         try:
             with open(cluster_info['path'] + "/metadata_install.json", "w") as metadata_file:
                 json.dump(cluster_info, metadata_file)
@@ -263,7 +452,7 @@ def __init__(self, parser, config_file, environment):
 #        EnvDefault = self.EnvDefault
 
         parser.add_argument("--terraform-retry", type=int, default=5, help="Number of retries when executing terraform commands")
-#        parser.add_argument("--clusters-per-apply", type=int, default=1, help="Number of clusters to install on each terraform apply")
+        parser.add_argument("--clusters-per-apply", type=int, default=1, help="Number of clusters to install on each terraform apply")
 #        parser.add_argument("--service-cluster", action=EnvDefault, env=environment, envvar="ROSA_BURNER_HYPERSHIFT_SERVICE_CLUSTER", help="Service Cluster Used to create the Hosted Clusters")
 
         if config_file:
diff --git a/libs/utils.py b/libs/utils.py
index 542b200..8049b2b 100644
--- a/libs/utils.py
+++ b/libs/utils.py
@@ -113,6 +113,9 @@ def cleanup_scheduler(self, platform):
                     f"Waiting {platform.environment['delay_between_cleanup']} minutes before deleting the next cluster"
                 )
                 time.sleep(platform.environment["delay_between_cleanup"])
+        if platform.environment["subplatform"] and platform.environment["subplatform"] == "terraform":
+            if platform.destroy_tf_template(platform) != 0:
+                return 1
         return delete_cluster_thread_list
 
     # To form the cluster_info dict for cleanup funtions
@@ -182,7 +185,9 @@ def install_scheduler(self, platform):
                         else:
                             cluster_workers = int(platform.environment["workers"].split(",")[(loop_counter - 1) % len(platform.environment["workers"].split(","))])
                         cluster_name = platform.environment["cluster_name_seed"] + "-" + str(loop_counter).zfill(4)
-                        platform.environment["clusters"][cluster_name] = {}
+
+                        if cluster_name not in platform.environment["clusters"]:
+                            platform.environment["clusters"][cluster_name] = {}
                         try:
                             platform.environment["clusters"][cluster_name]["workers"] = cluster_workers
                             platform.environment["clusters"][cluster_name]["workers_wait_time"] = platform.environment["workers_wait_time"]
@@ -196,6 +201,10 @@ def install_scheduler(self, platform):
                         cluster_thread_list.append(thread)
                         thread.start()
                         self.logging.debug("Number of alive threads %d" % threading.active_count())
+                        time.sleep(1)
+            if platform.environment["subplatform"] and platform.environment["subplatform"] == "terraform":
+                if platform.apply_tf_template(platform) != 0:
+                    return 1
         except Exception as err:
             self.logging.error(err)
             self.logging.error("Thread creation failed")
diff --git a/rosa-burner.py b/rosa-burner.py
index b13f094..4402581 100755
--- a/rosa-burner.py
+++ b/rosa-burner.py
@@ -97,6 +97,6 @@
                 else:
                     raise
 
-        platform.platform_cleanup()
+        platform.platform_cleanup(platform)
 
     # utils.test_recap(platform)