diff --git a/gitops_server/workers/deployer/deploy.py b/gitops_server/workers/deployer/deploy.py index a71fc9d..348d6c8 100644 --- a/gitops_server/workers/deployer/deploy.py +++ b/gitops_server/workers/deployer/deploy.py @@ -3,6 +3,7 @@ import logging import os import tempfile +import time import uuid from opentelemetry import trace @@ -40,7 +41,7 @@ async def post_init_summary(source, username, added_apps, updated_apps, removed_ @tracer.start_as_current_span("post_result") async def post_result(app: App, result: UpdateAppResult, deployer: "Deployer", **kwargs): if result["exit_code"] != 0: - deploy_result = await handle_failed_deploy(app, result, deployer) + deploy_result = await handle_failed_deploy(app, result, deployer, **kwargs) message = ( deploy_result["slack_message"] or f"Failed to deploy app `{result['app_name']}` for cluster `{settings.CLUSTER_NAME}`:\n>>>{result['output']}" @@ -170,6 +171,7 @@ async def update_app_deployment(self, app: App) -> UpdateAppResult | None: async with self.semaphore: logger.info(f"Deploying app {app.name!r}.") + from_timestamp = time.time() if app.chart.type == "git": span.set_attribute("gitops.chart.type", "git") assert app.chart.git_repo_url @@ -224,7 +226,7 @@ async def update_app_deployment(self, app: App) -> UpdateAppResult | None: update_result = UpdateAppResult(app_name=app.name, slack_message="", **result) - await post_result(app=app, result=update_result, deployer=self) + await post_result(app=app, result=update_result, deployer=self, from_timestamp=from_timestamp) return update_result def calculate_app_deltas(self): diff --git a/gitops_server/workers/deployer/hooks.py b/gitops_server/workers/deployer/hooks.py index 7e14e63..f7b95ff 100644 --- a/gitops_server/workers/deployer/hooks.py +++ b/gitops_server/workers/deployer/hooks.py @@ -2,6 +2,8 @@ import logging import os +import time +from typing import Any import httpx from opentelemetry import trace @@ -17,10 +19,7 @@ logger = logging.getLogger(__name__) -async def update_issue_from_deployment_url( - app: App, - deployment_url: str, -) -> None: +async def update_issue_from_deployment_url(app: App, deployment_url: str, **kwargs: Any) -> None: async with httpx.AsyncClient() as client: headers = github.get_headers() deployment_response = await client.get(deployment_url, headers=headers) @@ -35,10 +34,12 @@ async def update_issue_from_deployment_url( try: response = await client.post(issue_url + "/labels", json={"labels": ["NODEPLOY"]}, headers=headers) response.raise_for_status() + dashboard_url = get_dashboard_url( + workspace_name=app.name, from_timestamp=kwargs.get("from_timestamp"), to_timestamp=time.time() + ) comment = ( ":poop: Failed to deploy :poop:\n Applying `NODEPLOY` label to shutdown the server" - " and prevent deploys until it has been fixed.\nCheck migration logs at" - f" https://my.papertrailapp.com/systems/{app.name}-migration/events" + f" and prevent deploys until it has been fixed.\nCheck migration logs at {dashboard_url}" ) response = await client.post(issue_url + "/comments", json={"body": comment}, headers=headers) response.raise_for_status() @@ -47,11 +48,7 @@ async def update_issue_from_deployment_url( return -async def handle_successful_deploy( - app: App, - result, - deployer, -) -> UpdateAppResult: +async def handle_successful_deploy(app: App, result, deployer, **kwargs) -> UpdateAppResult: github_deployment_url = str(app.values.get("github/deployment_url", "")) await github.update_deployment( github_deployment_url, @@ -64,7 +61,27 @@ async def handle_successful_deploy( DEFAULT_USER_GROUP = SlackGroup("devops", "", "devops", os.environ.get("DEFAULT_SLACK_USER_GROUP_ID", "S5KVCGSGP")) -async def handle_failed_deploy(app: App, result: UpdateAppResult, deployer) -> UpdateAppResult: +def get_dashboard_url( + workspace_name: str, from_timestamp: float | None = None, to_timestamp: float | None = None +) -> str: + DASHBOARD_URL = "https://grafana.onuptick.com/d/workforce-failed-deploys/workforce-failed-deploys?from={from_timestamp}&to={to_timestamp}&var-workspace={workspace_name}" + + if from_timestamp: + from_timestamp_grafana = str(int(from_timestamp * 1000)) + else: + from_timestamp_grafana = "now-6h" + + if to_timestamp: + to_timestamp_grafana = str(int(to_timestamp * 1000)) + else: + to_timestamp_grafana = "now" + + return DASHBOARD_URL.format( + workspace_name=workspace_name, from_timestamp=from_timestamp_grafana, to_timestamp=to_timestamp_grafana + ) + + +async def handle_failed_deploy(app: App, result: UpdateAppResult, deployer, **kwargs) -> UpdateAppResult: github_deployment_url = str(app.values.get("github/deployment_url", "")) if github_deployment_url: await github.update_deployment( @@ -72,7 +89,7 @@ async def handle_failed_deploy(app: App, result: UpdateAppResult, deployer) -> U status=github.STATUSES.failure, description=f"Failed to deploy app. {result['output']}", ) - await update_issue_from_deployment_url(app, github_deployment_url) + await update_issue_from_deployment_url(app, github_deployment_url, **kwargs) email = deployer.author_email @@ -83,7 +100,7 @@ async def handle_failed_deploy(app: App, result: UpdateAppResult, deployer) -> U await find_commiter_slack_user(name=deployer.author_name, email=deployer.author_email) or DEFAULT_USER_GROUP ) slack_user_msg = f" {slack_user} " if slack_user else "" - log_msg = f"" + log_msg = f"<{get_dashboard_url(workspace_name=app.name, from_timestamp=kwargs.get("from_timestamp"), to_timestamp=time.time())}|(Deployment Logs)>" result["slack_message"] = ( f"Failed to deploy app `{result['app_name']}` for cluster" f" `{settings.CLUSTER_NAME}` :rotating_light:"