From 3c270178ca53cd2f29d77c85a5f7ed899a120270 Mon Sep 17 00:00:00 2001 From: Anmol Sachan Date: Tue, 22 May 2018 18:07:35 +0530 Subject: [PATCH] Changed Unmanage flow - Unmanage cluster will happen per node and not via tendrl/monitor tendrl-bug-id: Tendrl/commons#841 bugzilla: 1583590 Signed-off-by: Anmol Sachan --- .../flows/unmanage_cluster/__init__.py | 245 ++++++++++++++---- .../atoms/delete_cluster_details/__init__.py | 100 ------- .../delete_monitoring_details/__init__.py | 59 ----- .../stop_integration_services/__init__.py | 93 ------- .../stop_monitoring_services/__init__.py | 93 ------- .../atoms/unmanage_cluster/__init__.py | 141 ++++++++++ tendrl/commons/objects/definition/master.yaml | 82 +++--- .../stop_integration_services/__init__.py | 58 +++++ .../stop_monitoring_services/__init__.py | 58 +++++ 9 files changed, 490 insertions(+), 439 deletions(-) delete mode 100644 tendrl/commons/objects/cluster/atoms/delete_cluster_details/__init__.py delete mode 100644 tendrl/commons/objects/cluster/atoms/delete_monitoring_details/__init__.py delete mode 100644 tendrl/commons/objects/cluster/atoms/stop_integration_services/__init__.py delete mode 100644 tendrl/commons/objects/cluster/atoms/stop_monitoring_services/__init__.py create mode 100644 tendrl/commons/objects/cluster/atoms/unmanage_cluster/__init__.py create mode 100644 tendrl/commons/objects/node/atoms/stop_integration_services/__init__.py create mode 100644 tendrl/commons/objects/node/atoms/stop_monitoring_services/__init__.py diff --git a/tendrl/commons/flows/unmanage_cluster/__init__.py b/tendrl/commons/flows/unmanage_cluster/__init__.py index ee0c4bc3..a4ee7b7b 100644 --- a/tendrl/commons/flows/unmanage_cluster/__init__.py +++ b/tendrl/commons/flows/unmanage_cluster/__init__.py @@ -1,6 +1,13 @@ +import etcd +import time +import uuid + from tendrl.commons import flows + from tendrl.commons.flows.exceptions import FlowExecutionFailedError from tendrl.commons.objects import AtomExecutionFailedError +from tendrl.commons.utils import etcd_utils +from tendrl.commons.utils import log_utils as logger class UnmanageCluster(flows.BaseFlow): @@ -14,59 +21,203 @@ def run(self): ).load() if _cluster.is_managed == "no": if _cluster.current_job['job_name'] == self.__class__.__name__ \ - and _cluster.current_job['status'] == 'finished': + and _cluster.current_job['status'] == 'finished': + raise FlowExecutionFailedError( + "Cluster is already in un-managed state" + ) + if (_cluster.status is not None and + _cluster.status != "" and + _cluster.current_job['status'] == 'in_progress' and + _cluster.status in + ["importing", "unmanaging", "expanding"]): + # Checking if the cluster is being unmanaged by the parent job + _job = NS.tendrl.objects.Job(job_id=self.job_id).load() + if 'parent' in _job.payload: + if _job.payload['parent'] != _cluster.locked_by['job_id']: raise FlowExecutionFailedError( - "Cluster is already in un-managed state" + "Another job in progress for cluster, " + "please wait till the job finishes (job_id: %s) " + "(integration_id: %s) " % ( + _cluster.current_job['job_id'], + _cluster.integration_id + ) + ) + else: + raise FlowExecutionFailedError( + "Another job in progress for cluster, please wait till " + "the job finishes (job_id: %s) (integration_id: %s) " % ( + _cluster.current_job['job_id'], + _cluster.integration_id ) - if _cluster.current_job['status'] == 'in_progress' and \ - ( - 'job_id' in _cluster.locked_by and - _cluster.locked_by['job_id'] != "" - ) and ( - _cluster.status in ['importing', 'unmanaging', 'expanding'] - ): - raise FlowExecutionFailedError( - "Another job in progress for cluster." - " Please wait till the job finishes " - "(job_id: %s) (integration_id: %s) " % - ( - _cluster.current_job['job_id'], - _cluster.integration_id ) - ) - - _lock_details = { - 'node_id': NS.node_context.node_id, - 'fqdn': NS.node_context.fqdn, - 'tags': NS.node_context.tags, - 'type': NS.type, - 'job_name': self.__class__.__name__, - 'job_id': self.job_id - } - _cluster.locked_by = _lock_details - _cluster.status = "unmanaging" - _cluster.current_job = { - 'job_id': self.job_id, - 'job_name': self.__class__.__name__, - 'status': "in_progress" - } - _cluster.save() try: + if 'Node[]' not in self.parameters: + _lock_details = { + 'node_id': NS.node_context.node_id, + 'fqdn': NS.node_context.fqdn, + 'tags': NS.node_context.tags, + 'type': NS.type, + 'job_name': self.__class__.__name__, + 'job_id': self.job_id + } + _cluster.is_managed = "no" + _cluster.locked_by = _lock_details + _cluster.status = "unmanaging" + _cluster.current_job = { + 'job_id': self.job_id, + 'job_name': self.__class__.__name__, + 'status': "in_progress" + } + _cluster.save() + super(UnmanageCluster, self).run() - _cluster = NS.tendrl.objects.Cluster( - integration_id=integration_id - ).load() - _cluster.status = "" - _cluster.is_managed = "no" - _cluster.locked_by = {} - _cluster.errors = [] - _cluster.current_job = { - 'status': "finished", - 'job_name': self.__class__.__name__, - 'job_id': self.job_id - } - _cluster.save() + + # Below code to be executed on the parent node. + + _job = NS.tendrl.objects.Job(job_id=self.job_id).load() + if 'parent' not in _job.payload: + # Creating job to delete monitoring details. + _job_id = str(uuid.uuid4()) + payload = { + "tags": ["tendrl/integration/monitoring"], + "run": "monitoring.flows.DeleteMonitoringData", + "status": "new", + "parameters": self.parameters, + "parent": self.parameters['job_id'], + "type": "monitoring" + } + NS.tendrl.objects.Job( + job_id=_job_id, + status="new", + payload=payload + ).save() + + # Wait for 2 mins for the job to complete + loop_count = 0 + wait_count = 24 + while True: + if loop_count >= wait_count: + logger.log( + "error", + NS.publisher_id, + { + "message": "Clearing monitoring data for " + "cluster (%s) not yet complete. " + "Timing out." % + NS.tendrl.objects.Cluster( + integration_id=integration_id + ).load().short_name + }, + job_id=self.parameters['job_id'], + flow_id=self.parameters['flow_id'], + ) + return False + time.sleep(5) + finished = True + job = NS.tendrl.objects.Job(job_id=_job_id).load() + if job.status != "finished": + finished = False + if finished: + break + else: + loop_count += 1 + continue + + # Deleting cluster details + etcd_keys_to_delete = [] + etcd_keys_to_delete.append( + "/clusters/%s/nodes" % integration_id + ) + etcd_keys_to_delete.append( + "/clusters/%s/Bricks" % integration_id + ) + etcd_keys_to_delete.append( + "/clusters/%s/Volumes" % integration_id + ) + etcd_keys_to_delete.append( + "/clusters/%s/GlobalDetails" % integration_id + ) + etcd_keys_to_delete.append( + "/clusters/%s/TendrlContext" % integration_id + ) + etcd_keys_to_delete.append( + "/clusters/%s/Utilization" % integration_id + ) + etcd_keys_to_delete.append( + "/clusters/%s/raw_map" % integration_id + ) + etcd_keys_to_delete.append( + "/alerting/clusters/%s" % integration_id + ) + nodes = etcd_utils.read( + "/clusters/%s/nodes" % integration_id + ) + node_ids = [] + for node in nodes.leaves: + node_id = node.key.split("/")[-1] + node_ids.append(node_id) + key = "/alerting/nodes/%s" % node_id + etcd_keys_to_delete.append( + key + ) + try: + # delete node alerts from /alerting/alerts + node_alerts = etcd_utils.read(key) + for node_alert in node_alerts.leaves: + etcd_keys_to_delete.append( + "/alerting/alerts/%s" % node_alert.key.split( + "/")[-1] + ) + except etcd.EtcdKeyNotFound: + # No node alerts, continue + pass + + # Find the alerting/alerts entries to be deleted + try: + cluster_alert_ids = etcd_utils.read( + "/alerting/clusters/%s" % integration_id + ) + for entry in cluster_alert_ids.leaves: + ca_id = entry.key.split("/")[-1] + etcd_keys_to_delete.append( + "/alerting/alerts/%s" % ca_id + ) + except etcd.EtcdKeyNotFound: + # No cluster alerts, continue + pass + + # Remove the cluster details + for key in list(set(etcd_keys_to_delete)): + try: + etcd_utils.delete(key, recursive=True) + except etcd.EtcdKeyNotFound: + logger.log( + "debug", + NS.publisher_id, + { + "message": "%s key not found for deletion" % + key + }, + job_id=self.parameters['job_id'], + flow_id=self.parameters['flow_id'], + ) + continue + # remove short name + _cluster = NS.tendrl.objects.Cluster( + integration_id=integration_id + ).load() + _cluster.short_name = "" + _cluster.status = "" + _cluster.is_managed = "no" + _cluster.locked_by = {} + _cluster.errors = [] + _cluster.current_job = { + 'status': "finished", + 'job_name': self.__class__.__name__, + 'job_id': self.job_id + } + _cluster.save() except (FlowExecutionFailedError, AtomExecutionFailedError, Exception) as ex: diff --git a/tendrl/commons/objects/cluster/atoms/delete_cluster_details/__init__.py b/tendrl/commons/objects/cluster/atoms/delete_cluster_details/__init__.py deleted file mode 100644 index 4bb4be1e..00000000 --- a/tendrl/commons/objects/cluster/atoms/delete_cluster_details/__init__.py +++ /dev/null @@ -1,100 +0,0 @@ -import etcd - -from tendrl.commons import objects -from tendrl.commons.utils import etcd_utils -from tendrl.commons.utils import log_utils as logger - - -class DeleteClusterDetails(objects.BaseAtom): - def __init__(self, *args, **kwargs): - super(DeleteClusterDetails, self).__init__(*args, **kwargs) - - def run(self): - integration_id = self.parameters['TendrlContext.integration_id'] - - etcd_keys_to_delete = [] - etcd_keys_to_delete.append( - "/clusters/%s/nodes" % integration_id - ) - etcd_keys_to_delete.append( - "/clusters/%s/Bricks" % integration_id - ) - etcd_keys_to_delete.append( - "/clusters/%s/Volumes" % integration_id - ) - etcd_keys_to_delete.append( - "/clusters/%s/GlobalDetails" % integration_id - ) - etcd_keys_to_delete.append( - "/clusters/%s/TendrlContext" % integration_id - ) - etcd_keys_to_delete.append( - "/clusters/%s/Utilization" % integration_id - ) - etcd_keys_to_delete.append( - "/clusters/%s/raw_map" % integration_id - ) - etcd_keys_to_delete.append( - "/alerting/clusters/%s" % integration_id - ) - nodes = etcd_utils.read( - "/clusters/%s/nodes" % integration_id - ) - node_ids = [] - for node in nodes.leaves: - node_id = node.key.split("/")[-1] - node_ids.append(node_id) - key = "/alerting/nodes/%s" % node_id - etcd_keys_to_delete.append( - key - ) - try: - # delete node alerts from /alerting/alerts - node_alerts = etcd_utils.read(key) - for node_alert in node_alerts.leaves: - etcd_keys_to_delete.append( - "/alerting/alerts/%s" % node_alert.key.split( - "/")[-1] - ) - except etcd.EtcdKeyNotFound: - # No node alerts, continue - pass - - # Find the alerting/alerts entries to be deleted - try: - cluster_alert_ids = etcd_utils.read( - "/alerting/clusters/%s" % integration_id - ) - for entry in cluster_alert_ids.leaves: - ca_id = entry.key.split("/")[-1] - etcd_keys_to_delete.append( - "/alerting/alerts/%s" % ca_id - ) - except etcd.EtcdKeyNotFound: - # No cluster alerts, continue - pass - - # Remove the cluster details - for key in list(set(etcd_keys_to_delete)): - try: - etcd_utils.delete(key, recursive=True) - except etcd.EtcdKeyNotFound: - logger.log( - "debug", - NS.publisher_id, - { - "message": "%s key not found for deletion" % - key - }, - job_id=self.parameters['job_id'], - flow_id=self.parameters['flow_id'], - ) - continue - # remove short name - cluster = NS.tendrl.objects.Cluster( - integration_id=integration_id - ).load() - cluster.short_name = "" - cluster.save() - - return True diff --git a/tendrl/commons/objects/cluster/atoms/delete_monitoring_details/__init__.py b/tendrl/commons/objects/cluster/atoms/delete_monitoring_details/__init__.py deleted file mode 100644 index ebe329ec..00000000 --- a/tendrl/commons/objects/cluster/atoms/delete_monitoring_details/__init__.py +++ /dev/null @@ -1,59 +0,0 @@ -import time -import uuid - -from tendrl.commons import objects -from tendrl.commons.utils import log_utils as logger - - -class DeleteMonitoringDetails(objects.BaseAtom): - def __init__(self, *args, **kwargs): - super(DeleteMonitoringDetails, self).__init__(*args, **kwargs) - - def run(self): - integration_id = self.parameters['TendrlContext.integration_id'] - _job_id = str(uuid.uuid4()) - payload = { - "tags": ["tendrl/integration/monitoring"], - "run": "monitoring.flows.DeleteMonitoringData", - "status": "new", - "parameters": self.parameters, - "parent": self.parameters['job_id'], - "type": "monitoring" - } - NS.tendrl.objects.Job( - job_id=_job_id, - status="new", - payload=payload - ).save() - - # Wait for 2 mins for the job to complete - loop_count = 0 - wait_count = 24 - while True: - if loop_count >= wait_count: - logger.log( - "error", - NS.publisher_id, - { - "message": "Clearing monitoring data for cluster " - "(%s) not yet complete. Timing out." % - NS.tendrl.objects.Cluster( - integration_id=integration_id - ).load().short_name - }, - job_id=self.parameters['job_id'], - flow_id=self.parameters['flow_id'], - ) - return False - time.sleep(5) - finished = True - job = NS.tendrl.objects.Job(job_id=_job_id).load() - if job.status != "finished": - finished = False - if finished: - break - else: - loop_count += 1 - continue - - return True diff --git a/tendrl/commons/objects/cluster/atoms/stop_integration_services/__init__.py b/tendrl/commons/objects/cluster/atoms/stop_integration_services/__init__.py deleted file mode 100644 index cc225b78..00000000 --- a/tendrl/commons/objects/cluster/atoms/stop_integration_services/__init__.py +++ /dev/null @@ -1,93 +0,0 @@ -import etcd -import time -import uuid - -from tendrl.commons import objects -from tendrl.commons.utils import etcd_utils -from tendrl.commons.utils import log_utils as logger - - -class StopIntegrationServices(objects.BaseAtom): - def __init__(self, *args, **kwargs): - super(StopIntegrationServices, self).__init__(*args, **kwargs) - - def run(self): - integration_id = self.parameters['TendrlContext.integration_id'] - _cluster = NS.tendrl.objects.Cluster( - integration_id=integration_id - ).load() - - try: - # Get the cluster nodes - nodes = etcd_utils.read("/clusters/%s/nodes" % integration_id) - child_job_ids = [] - node_ids = [] - for node in nodes.leaves: - node_id = node.key.split("/")[-1] - node_ids.append(node_id) - # Create jobs on nodes for stoping services - _job_id = str(uuid.uuid4()) - params = { - "Services[]": ["tendrl-gluster-integration"] - } - payload = { - "tags": ["tendrl/node_%s" % node_id], - "run": "tendrl.objects.Node.flows.StopServices", - "status": "new", - "parameters": params, - "parent": self.parameters["job_id"], - "type": "node" - } - NS.tendrl.objects.Job( - job_id=_job_id, - status="new", - payload=payload - ).save() - child_job_ids.append(_job_id) - logger.log( - "info", - NS.publisher_id, - { - "message": "Stop tendrl services (job: %s) " - "on %s in cluster %s" % - (_job_id, node_id, _cluster.short_name) - }, - job_id=self.parameters['job_id'], - flow_id=self.parameters['flow_id'], - ) - - # Wait for (no of nodes) * 10 secs for stop service job to complete - loop_count = 0 - wait_count = (len(child_job_ids)) * 2 - while True: - if loop_count >= wait_count: - logger.log( - "info", - NS.publisher_id, - { - "message": "Stop service jobs on cluster(%s) not " - "yet complete on all nodes(%s). Timing out. " - % (_cluster.short_name, str(node_ids)) - }, - job_id=self.parameters['job_id'], - flow_id=self.parameters['flow_id'], - ) - return False - time.sleep(5) - finished = True - for child_job_id in child_job_ids: - child_job = NS.tendrl.objects.Job( - job_id=child_job_id - ).load() - if child_job.status != "finished": - finished = False - break - if finished: - break - else: - loop_count += 1 - continue - except etcd.EtcdKeyNotFound: - pass - - return True diff --git a/tendrl/commons/objects/cluster/atoms/stop_monitoring_services/__init__.py b/tendrl/commons/objects/cluster/atoms/stop_monitoring_services/__init__.py deleted file mode 100644 index 409993f8..00000000 --- a/tendrl/commons/objects/cluster/atoms/stop_monitoring_services/__init__.py +++ /dev/null @@ -1,93 +0,0 @@ -import etcd -import time -import uuid - -from tendrl.commons import objects -from tendrl.commons.utils import etcd_utils -from tendrl.commons.utils import log_utils as logger - - -class StopMonitoringServices(objects.BaseAtom): - def __init__(self, *args, **kwargs): - super(StopMonitoringServices, self).__init__(*args, **kwargs) - - def run(self): - integration_id = self.parameters['TendrlContext.integration_id'] - _cluster = NS.tendrl.objects.Cluster( - integration_id=integration_id - ).load() - - try: - # Get the cluster nodes - nodes = etcd_utils.read("/clusters/%s/nodes" % integration_id) - child_job_ids = [] - node_ids = [] - for node in nodes.leaves: - node_id = node.key.split("/")[-1] - node_ids.append(node_id) - # Create jobs on nodes for stoping services - _job_id = str(uuid.uuid4()) - params = { - "Services[]": ["collectd"] - } - payload = { - "tags": ["tendrl/node_%s" % node_id], - "run": "tendrl.objects.Node.flows.StopServices", - "status": "new", - "parameters": params, - "parent": self.parameters["job_id"], - "type": "node" - } - NS.tendrl.objects.Job( - job_id=_job_id, - status="new", - payload=payload - ).save() - child_job_ids.append(_job_id) - logger.log( - "info", - NS.publisher_id, - { - "message": "Stop tendrl services (job: %s) " - "on host %s in cluster %s" % - (_job_id, node_id, _cluster.short_name) - }, - job_id=self.parameters['job_id'], - flow_id=self.parameters['flow_id'], - ) - - loop_count = 0 - # one minute for each job - wait_count = (len(child_job_ids)) * 12 - while True: - if loop_count >= wait_count: - logger.log( - "info", - NS.publisher_id, - { - "message": "Stop service jobs on cluster(%s) not " - "yet complete on all nodes(%s). Timing out. " - % (_cluster.short_name, str(node_ids)) - }, - job_id=self.parameters['job_id'], - flow_id=self.parameters['flow_id'], - ) - return False - time.sleep(5) - finished = True - for child_job_id in child_job_ids: - child_job = NS.tendrl.objects.Job( - job_id=child_job_id - ).load() - if child_job.status != "finished": - finished = False - break - if finished: - break - else: - loop_count += 1 - continue - except etcd.EtcdKeyNotFound: - pass - - return True diff --git a/tendrl/commons/objects/cluster/atoms/unmanage_cluster/__init__.py b/tendrl/commons/objects/cluster/atoms/unmanage_cluster/__init__.py new file mode 100644 index 00000000..d5ec1bdf --- /dev/null +++ b/tendrl/commons/objects/cluster/atoms/unmanage_cluster/__init__.py @@ -0,0 +1,141 @@ +import etcd +import json +import time +import uuid + +from tendrl.commons import objects + +from tendrl.commons.event import Event +from tendrl.commons.flows import utils as flow_utils +from tendrl.commons.message import ExceptionMessage +from tendrl.commons.objects import AtomExecutionFailedError +from tendrl.commons.utils import etcd_utils +from tendrl.commons.utils import log_utils as logger + + +class UnmanageCluster(objects.BaseAtom): + def __init__(self, *args, **kwargs): + super(UnmanageCluster, self).__init__(*args, **kwargs) + + def run(self): + try: + integration_id = self.parameters['TendrlContext.integration_id'] + _cluster = NS.tendrl.objects.Cluster( + integration_id=integration_id + ).load() + + if 'Node[]' not in self.parameters: + try: + integration_id_index_key = \ + "indexes/tags/tendrl/integration/%s" % integration_id + _node_ids = etcd_utils.read( + integration_id_index_key).value + self.parameters["Node[]"] = json.loads(_node_ids) + + except etcd.EtcdKeyNotFound: + _cluster = NS.tendrl.objects.Cluster( + integration_id=NS.tendrl_context.integration_id).load() + _cluster.status = "" + _cluster.current_job['status'] = 'failed' + _cluster.save() + raise AtomExecutionFailedError( + "Could not execute UnmanageCluster atom - " + "Could not load Nodelist from etcd") + + node_list = self.parameters['Node[]'] + if len(node_list) > 1: + # This is the master node for this flow + # Lock nodes + flow_utils.acquire_node_lock(self.parameters) + NS.tendrl_context = NS.tendrl_context.load() + # Creating child jobs for nodes to unamange themselves + for node in node_list: + if NS.node_context.node_id != node: + new_params = self.parameters.copy() + new_params['Node[]'] = [node] + # create same flow for each node in node list except + # $this + payload = {"tags": ["tendrl/node_%s" % node], + "run": "tendrl.flows.UnmanageCluster", + "status": "new", + "parameters": new_params, + "parent": self.parameters['job_id'], + "type": "node" + } + _job_id = str(uuid.uuid4()) + NS.tendrl.objects.Job( + job_id=_job_id, + status="new", + payload=payload + ).save() + logger.log( + "info", + NS.publisher_id, + {"message": "UnmanageCluster %s (jobID: %s) :" + "removing host %s" % + (_cluster.short_name, _job_id, node)}, + job_id=self.parameters['job_id'], + flow_id=self.parameters['flow_id'] + ) + logger.log( + "info", + NS.publisher_id, + {"message": "UnmanageCluster %s waiting for hosts %s " + "to be unmanaged" + % (_cluster.short_name, node_list)}, + job_id=self.parameters['job_id'], + flow_id=self.parameters['flow_id'] + ) + loop_count = 0 + # Wait for (no of nodes) * 6 minutes for unmanage to complete + wait_count = (len(node_list) - 1) * 36 + while True: + parent_job = NS.tendrl.objects.Job( + job_id=self.parameters['job_id'] + ).load() + if loop_count >= wait_count: + logger.log( + "info", + NS.publisher_id, + {"message": "Unmanage jobs on cluster(%s) not yet " + "complete on all nodes(%s). " + "Timing out." % + (_cluster.short_name, str(node_list)) + }, + job_id=self.parameters['job_id'], + flow_id=self.parameters['flow_id'] + ) + return False + time.sleep(10) + finished = True + for child_job_id in parent_job.children: + child_job = NS.tendrl.objects.Job( + job_id=child_job_id + ).load() + if child_job.status != "finished": + finished = False + break + if finished: + break + else: + loop_count += 1 + continue + + except Exception as ex: + # For traceback + Event( + ExceptionMessage( + priority="error", + publisher=NS.publisher_id, + payload={ + "message": ex.message, + "exception": ex + } + ) + ) + # raising exception to mark job as failed + raise ex + finally: + # release lock + flow_utils.release_node_lock(self.parameters) + return True diff --git a/tendrl/commons/objects/definition/master.yaml b/tendrl/commons/objects/definition/master.yaml index d284710b..1a64c478 100644 --- a/tendrl/commons/objects/definition/master.yaml +++ b/tendrl/commons/objects/definition/master.yaml @@ -82,13 +82,11 @@ namespace.tendrl: version: 1 UnmanageCluster: tags: - - "tendrl/monitor" + - "tendrl/integration/$TendrlContext.integration_id" atoms: - - tendrl.objects.Cluster.atoms.SetClusterUnmanaged - - tendrl.objects.Cluster.atoms.StopMonitoringServices - - tendrl.objects.Cluster.atoms.StopIntegrationServices - - tendrl.objects.Cluster.atoms.DeleteMonitoringDetails - - tendrl.objects.Cluster.atoms.DeleteClusterDetails + - tendrl.objects.Node.atoms.StopMonitoringServices + - tendrl.objects.Node.atoms.StopIntegrationServices + - tendrl.objects.Cluster.atoms.UnmanageCluster help: "Unmanage a Gluster Cluster" enabled: true inputs: @@ -136,46 +134,6 @@ namespace.tendrl: type: Update uuid: 333c3333-3c33-33c3-333c-c33cc34444cc help: setup cluster alias in graphite - StopMonitoringServices: - enabled: true - inputs: - mandatory: - - TendrlContext.integration_id - name: stop node services - run: tendrl.objects.Cluster.atoms.StopNodeServices - type: Update - uuid: 333c3333-3c33-33c3-333c-c33cc3c4444c - help: Stop node services - StopIntegrationServices: - enabled: true - inputs: - mandatory: - - TendrlContext.integration_id - name: stop node services - run: tendrl.objects.Cluster.atoms.StopNodeServices - type: Update - uuid: 333c3333-3c33-33c3-333c-c33cc3c4444d - help: Stop node services - DeleteClusterDetails: - enabled: True - inputs: - mandatory: - - TendrlContext.integration_id - name: clear cluster details - run: tendrl.objects.Cluster.atoms.ClearClusterDetails - type: Update - uuid: 333c3333-3c33-33c3-333c-c33cc3c5555c - help: Clear cluster details - DeleteMonitoringDetails: - enabled: True - inputs: - mandatory: - - TendrlContext.integration_id - name: Clear monitoring data - run: tendrl.objects.Cluster.atoms.ClearMonitoringData - type: Update - uuid: 333c3333-3c33-33c3-333c-c33cc3c6666c - help: Clear monitoring data SetClusterUnmanaged: enabled: True inputs: @@ -234,6 +192,16 @@ namespace.tendrl: type: Create uuid: 452f6190-9b37-11e6-950d-a24fc0d9649c help: Import the cluster + UnmanageCluster: + enabled: true + inputs: + mandatory: + - TendrlContext.integration_id + name: unmanage cluster + run: tendrl.objects.Cluster.atoms.UnmanageCluster + type: Update + uuid: 452f6190-9b37-11e6-960d-a25fc0d9649c + help: Unmanage the cluster CheckClusterAvailable: enabled: true inputs: @@ -926,6 +894,26 @@ namespace.tendrl: type: check uuid: 2f94a48a-05d7-408c-b400-e27827f4edca version: 1 + StopMonitoringServices: + enabled: true + inputs: + mandatory: + - TendrlContext.integration_id + name: stop node services + run: tendrl.objects.Node.atoms.StopMonitoringServices + type: Update + uuid: 333c3333-3c33-33c3-333c-c76cc3c4444c + help: Stop node services + StopIntegrationServices: + enabled: true + inputs: + mandatory: + - TendrlContext.integration_id + name: stop node services + run: tendrl.objects.Node.atoms.StopIntegrationServices + type: Update + uuid: 333c3333-3c33-33c3-333c-c77cc3c4444d + help: Stop node services Cmd: enabled: true inputs: @@ -1132,7 +1120,7 @@ namespace.tendrl: help: "ID of child jobs created by this job" type: List output: - help: output of the job execution + help: output of the job executionStopMonitoringServices type: dict timeout: help: Job is timed out after 10 mins if yes diff --git a/tendrl/commons/objects/node/atoms/stop_integration_services/__init__.py b/tendrl/commons/objects/node/atoms/stop_integration_services/__init__.py new file mode 100644 index 00000000..b0a79cdc --- /dev/null +++ b/tendrl/commons/objects/node/atoms/stop_integration_services/__init__.py @@ -0,0 +1,58 @@ +from tendrl.commons import objects +from tendrl.commons.utils import cmd_utils +from tendrl.commons.utils import log_utils as logger + + +class StopIntegrationServices(objects.BaseAtom): + def __init__(self, *args, **kwargs): + super(StopIntegrationServices, self).__init__(*args, **kwargs) + + def run(self): + services = ["tendrl-gluster-integration"] + for service in services: + srv = NS.tendrl.objects.Service(service=service) + if not srv.running: + logger.log( + "debug", + NS.publisher_id, + { + "message": "%s not running on " + "%s" % (service, NS.node_context.fqdn) + }, + job_id=self.parameters['job_id'], + flow_id=self.parameters['flow_id'], + ) + continue + + _cmd_str = "systemctl stop %s" % service + cmd = cmd_utils.Command(_cmd_str) + err, out, rc = cmd.run() + if err: + logger.log( + "error", + NS.publisher_id, + { + "message": "Could not stop %s" + " service. Error: %s" % (service, err) + }, + job_id=self.parameters['job_id'], + flow_id=self.parameters['flow_id'], + ) + return False + + _cmd_str = "systemctl disable %s" % service + cmd = cmd_utils.Command(_cmd_str) + err, out, rc = cmd.run() + if err: + logger.log( + "error", + NS.publisher_id, + { + "message": "Could not disable %s" + " service. Error: %s" % (service, err) + }, + job_id=self.parameters['job_id'], + flow_id=self.parameters['flow_id'], + ) + return False + return True diff --git a/tendrl/commons/objects/node/atoms/stop_monitoring_services/__init__.py b/tendrl/commons/objects/node/atoms/stop_monitoring_services/__init__.py new file mode 100644 index 00000000..d3114dca --- /dev/null +++ b/tendrl/commons/objects/node/atoms/stop_monitoring_services/__init__.py @@ -0,0 +1,58 @@ +from tendrl.commons import objects +from tendrl.commons.utils import cmd_utils +from tendrl.commons.utils import log_utils as logger + + +class StopMonitoringServices(objects.BaseAtom): + def __init__(self, *args, **kwargs): + super(StopMonitoringServices, self).__init__(*args, **kwargs) + + def run(self): + services = ["collectd"] + for service in services: + srv = NS.tendrl.objects.Service(service=service) + if not srv.running: + logger.log( + "debug", + NS.publisher_id, + { + "message": "%s not running on " + "%s" % (service, NS.node_context.fqdn) + }, + job_id=self.parameters['job_id'], + flow_id=self.parameters['flow_id'], + ) + continue + + _cmd_str = "systemctl stop %s" % service + cmd = cmd_utils.Command(_cmd_str) + err, out, rc = cmd.run() + if err: + logger.log( + "error", + NS.publisher_id, + { + "message": "Could not stop %s" + " service. Error: %s" % (service, err) + }, + job_id=self.parameters['job_id'], + flow_id=self.parameters['flow_id'], + ) + return False + + _cmd_str = "systemctl disable %s" % service + cmd = cmd_utils.Command(_cmd_str) + err, out, rc = cmd.run() + if err: + logger.log( + "error", + NS.publisher_id, + { + "message": "Could not disable %s" + " service. Error: %s" % (service, err) + }, + job_id=self.parameters['job_id'], + flow_id=self.parameters['flow_id'], + ) + return False + return True