diff --git a/ocw/lib/EC2.py b/ocw/lib/EC2.py index dff87f65..5c145eb5 100644 --- a/ocw/lib/EC2.py +++ b/ocw/lib/EC2.py @@ -4,7 +4,7 @@ import boto3 from botocore.exceptions import ClientError import re -from datetime import date, datetime, timedelta +from datetime import date, datetime, timedelta, timezone from ocw.lib.emailnotify import send_mail import traceback import time @@ -21,6 +21,10 @@ def __init__(self, namespace: str): self.all_regions = ConfigFile().getList('default/ec2_regions') else: self.all_regions = self.get_all_regions() + if PCWConfig.has('clusters/ec2_regions'): + self.cluster_regions = ConfigFile().getList('clusters/ec2_regions') + else: + self.cluster_regions = self.get_all_regions() def __new__(cls, vault_namespace): if vault_namespace not in EC2.__instances: @@ -70,11 +74,7 @@ def eks_client(self, region): def all_clusters(self): clusters = dict() - if PCWConfig.has('clusters/ec2_regions'): - cluster_regions = ConfigFile().getList('clusters/ec2_regions') - else: - cluster_regions = self.get_all_regions() - for region in cluster_regions: + for region in self.cluster_regions: response = self.eks_client(region).list_clusters() if len(response['clusters']): clusters[region] = response['clusters'] @@ -163,6 +163,44 @@ def delete_instance(self, region, instance_id): else: raise ex + def wait_for_empty_nodegroup_list(self, region, clusterName, timeout_minutes=20): + if self.dry_run: + self.log_info("Skip waiting due to dry-run mode") + return True + self.log_info("Waiting empty nodegroup list in {}", clusterName) + end = datetime.now(timezone.utc) + timedelta(minutes=timeout_minutes) + resp_nodegroup = self.eks_client(region).list_nodegroups(clusterName=clusterName) + + while datetime.now(timezone.utc) < end and len(resp_nodegroup['nodegroups']) > 0: + time.sleep(20) + resp_nodegroup = self.eks_client(region).list_nodegroups(clusterName=clusterName) + if len(resp_nodegroup['nodegroups']) > 0: + self.log_info("Still waiting for {} nodegroups to disappear", len(resp_nodegroup['nodegroups'])) + + def delete_all_clusters(self): + self.log_info("Deleting all clusters!") + for region in self.cluster_regions: + response = self.eks_client(region).list_clusters() + if len(response['clusters']): + self.log_info("Found {} cluster(s) in {}", len(response['clusters']), region) + for cluster in response['clusters']: + resp_nodegroup = self.eks_client(region).list_nodegroups(clusterName=cluster) + if len(resp_nodegroup['nodegroups']): + self.log_info("Found {} nodegroups for {}", len(resp_nodegroup['nodegroups']), cluster) + for nodegroup in resp_nodegroup['nodegroups']: + if self.dry_run: + self.log_info("Skipping {} nodegroup deletion due to dry-run mode", nodegroup) + else: + self.log_info("Deleting {}", nodegroup) + self.eks_client(region).delete_nodegroup( + clusterName=cluster, nodegroupName=nodegroup) + self.wait_for_empty_nodegroup_list(region, cluster) + if self.dry_run: + self.log_info("Skipping {} cluster deletion due to dry-run mode", cluster) + else: + self.log_info("Finally deleting {} cluster", cluster) + self.eks_client(region).delete_cluster(name=cluster) + def parse_image_name(self, img_name): regexes = [ # openqa-SLES12-SP5-EC2.x86_64-0.9.1-BYOS-Build1.55.raw.xz diff --git a/ocw/management/commands/rmclusters.py b/ocw/management/commands/rmclusters.py new file mode 100644 index 00000000..f966406f --- /dev/null +++ b/ocw/management/commands/rmclusters.py @@ -0,0 +1,11 @@ +from django.core.management.base import BaseCommand +from webui.settings import PCWConfig +from ocw.lib.EC2 import EC2 + + +class Command(BaseCommand): + help = 'Delete all leftovers in all providers (according to pcw.ini)' + + def handle(self, *args, **options): + for namespace in PCWConfig.get_namespaces_for('clusters'): + EC2(namespace).delete_all_clusters()