From c87f4b8178e97344de072c713edfc0d0ebfc7336 Mon Sep 17 00:00:00 2001 From: Sabrina <9669990+violetbrina@users.noreply.github.com> Date: Fri, 24 Mar 2023 15:51:23 +1100 Subject: [PATCH 01/11] START: modified main.py, added gcp/azure deploy configs and modified dockerfile --- server/Dockerfile | 4 +++- server/deploy-config-azure.json | 5 +++++ server/deploy-config-gcp.json | 5 +++++ server/deploy-config.json | 1 - server/main.py | 19 ++++++++++------- server/util.py | 38 ++++++++++++++++++++++++--------- 6 files changed, 52 insertions(+), 20 deletions(-) create mode 100644 server/deploy-config-azure.json create mode 100644 server/deploy-config-gcp.json delete mode 100644 server/deploy-config.json diff --git a/server/Dockerfile b/server/Dockerfile index 10589286..131de7a1 100644 --- a/server/Dockerfile +++ b/server/Dockerfile @@ -19,6 +19,8 @@ EXPOSE $PORT COPY main.py cromwell.py util.py ./ # Prepare the Hail deploy config to point to the CPG domain. -COPY deploy-config.json /deploy-config/deploy-config.json +ENV HAIL_DEPLOY_CONFIG_FILE /deploy-config/deploy-config-gcp.json +COPY deploy-config-gcp.json /deploy-config/deploy-config-gcp.json +COPY deploy-config-azure.json /deploy-config/deploy-config-azure.json CMD gunicorn --bind :$PORT --worker-class aiohttp.GunicornWebWorker main:init_func diff --git a/server/deploy-config-azure.json b/server/deploy-config-azure.json new file mode 100644 index 00000000..7e42e445 --- /dev/null +++ b/server/deploy-config-azure.json @@ -0,0 +1,5 @@ +{ + "location": "external", + "default_namespace": "default", + "domain": "azhail.populationgenomics.org.au" +} diff --git a/server/deploy-config-gcp.json b/server/deploy-config-gcp.json new file mode 100644 index 00000000..3be2a787 --- /dev/null +++ b/server/deploy-config-gcp.json @@ -0,0 +1,5 @@ +{ + "location": "external", + "default_namespace": "default", + "domain": "hail.populationgenomics.org.au" +} diff --git a/server/deploy-config.json b/server/deploy-config.json deleted file mode 100644 index 7fb35202..00000000 --- a/server/deploy-config.json +++ /dev/null @@ -1 +0,0 @@ -{"location": "external", "default_namespace": "default", "domain": "hail.populationgenomics.org.au"} diff --git a/server/main.py b/server/main.py index 3e59a699..a76540d4 100644 --- a/server/main.py +++ b/server/main.py @@ -1,7 +1,8 @@ """The analysis-runner server, running Hail Batch pipelines on users' behalf.""" # pylint: disable=wrong-import-order -import datetime +import os import json +import datetime import logging import traceback from shlex import quote @@ -15,6 +16,9 @@ from util import ( DRIVER_IMAGE, PUBSUB_TOPIC, + SUPPORTED_CLOUD_ENVIRONMENTS, + DEFAULT_CLOUD_ENVIRONMENT, + DEPLOY_CONFIG_PATHS, _get_hail_version, check_allowed_repos, check_dataset_and_group, @@ -41,9 +45,6 @@ routes = web.RouteTableDef() -SUPPORTED_CLOUD_ENVIRONMENTS = {'gcp'} - - # pylint: disable=too-many-statements @routes.post('/') async def index(request): @@ -56,11 +57,14 @@ async def index(request): output_prefix = validate_output_dir(params['output']) dataset = params['dataset'] - cloud_environment = params.get('cloud_environment', 'gcp') + cloud_environment = params.get('cloud_environment', DEFAULT_CLOUD_ENVIRONMENT) if cloud_environment not in SUPPORTED_CLOUD_ENVIRONMENTS: raise web.HTTPBadRequest( reason=f'analysis-runner does not yet support the {cloud_environment} environment' ) + + # Set hail backend to the correct one based on the cloud environment + os.environ['HAIL_DEPLOY_CONFIG_FILE'] = DEPLOY_CONFIG_PATHS.get(cloud_environment) dataset_config = check_dataset_and_group( server_config=get_server_config(), @@ -229,7 +233,6 @@ async def config(request): dataset=dataset, email=email, ) - environment_config = dataset_config.get(cloud_environment) image = params.get('image') or DRIVER_IMAGE access_level = params['accessLevel'] @@ -241,8 +244,8 @@ async def config(request): # Prepare the job's configuration to return run_config = get_baseline_run_config( - environment=cloud_environment, - gcp_project_id=environment_config.get('projectId'), + environment='gcp', + gcp_project_id=dataset_config.get('gcp', {}).get('projectId'), dataset=dataset, access_level=access_level, output_prefix=output_prefix, diff --git a/server/util.py b/server/util.py index b1c2e34a..9c2fe13f 100644 --- a/server/util.py +++ b/server/util.py @@ -33,7 +33,18 @@ MEMBERS_CACHE_LOCATION = os.getenv('MEMBERS_CACHE_LOCATION') assert MEMBERS_CACHE_LOCATION -CONFIG_PATH_PREFIXES = {'gcp': 'gs://cpg-config'} +SUPPORTED_CLOUD_ENVIRONMENTS = {'gcp', 'azure'} +DEFAULT_CLOUD_ENVIRONMENT = 'gcp' + +DEPLOY_CONFIG_PATHS = { + 'gcp': '/deploy-config/deploy-config-gcp.json', + 'azure': '/deploy-config/deploy-config-azure.json' +} + +CONFIG_PATH_PREFIXES = { + 'gcp': 'gs://cpg-config', + 'azure': 'hail-az://cpg-config' +} secret_manager = secretmanager.SecretManagerServiceClient() publisher = pubsub_v1.PublisherClient() @@ -46,7 +57,7 @@ def get_server_config() -> dict: async def _get_hail_version(environment: str) -> str: """ASYNC get hail version for the hail server in the local deploy_config""" - if not environment == 'gcp': + if environment not in SUPPORTED_CLOUD_ENVIRONMENTS: raise web.HTTPBadRequest( reason=f'Unsupported Hail Batch deploy config environment: {environment}' ) @@ -110,13 +121,22 @@ def check_dataset_and_group(server_config, environment: str, dataset, email) -> reason=f'Dataset {dataset} does not support the {environment} environment' ) - # do this to check access-members cache - gcp_project = dataset_config.get('gcp', {}).get('projectId') + if environment == 'gcp': + # do this to check access-members cache + gcp_project = dataset_config.get('gcp', {}).get('projectId') + + if not gcp_project: + raise web.HTTPBadRequest( + reason=f'The analysis-runner does not support checking group members for the {environment} environment' + ) + elif environment == 'azure': + azure_resource_group = dataset_config.get('azure', {}).get('resourceGroup') + + if not azure_resource_group: + raise web.HTTPBadRequest( + reason=f'The analysis-runner does not support checking group members for the {environment} environment' + ) - if not gcp_project: - raise web.HTTPBadRequest( - reason=f'The analysis-runner does not support checking group members for the {environment} environment' - ) if not is_member_in_cached_group( f'{dataset}-analysis', email, members_cache_location=MEMBERS_CACHE_LOCATION ): @@ -228,8 +248,6 @@ def get_baseline_run_config( baseline_config = { 'hail': { 'billing_project': dataset, - # TODO: how would this work for Azure - 'bucket': f'cpg-{dataset}-hail', }, 'workflow': { 'access_level': access_level, From 3cf6863a90f42e9544a4582d52cd4b3fd43bc366 Mon Sep 17 00:00:00 2001 From: Sabrina <9669990+violetbrina@users.noreply.github.com> Date: Tue, 28 Mar 2023 13:40:45 +1100 Subject: [PATCH 02/11] ADD: --cloud command line argument for setting whether to run on gcp or azure --- analysis_runner/cli_analysisrunner.py | 13 +++++++++++++ server/main.py | 4 ++-- server/util.py | 9 +++++++-- 3 files changed, 22 insertions(+), 4 deletions(-) diff --git a/analysis_runner/cli_analysisrunner.py b/analysis_runner/cli_analysisrunner.py index 93894bd0..31089849 100644 --- a/analysis_runner/cli_analysisrunner.py +++ b/analysis_runner/cli_analysisrunner.py @@ -19,6 +19,8 @@ check_if_commit_is_on_remote, ) from analysis_runner.util import ( + SUPPORTED_CLOUD_ENVIRONMENTS, + DEFAULT_CLOUD_ENVIRONMENT, add_general_args, _perform_version_check, confirm_choice, @@ -35,6 +37,15 @@ def add_analysis_runner_args(parser=None) -> argparse.ArgumentParser: add_general_args(parser) + parser.add_argument( + '-c', + '--cloud', + required=False, + default=DEFAULT_CLOUD_ENVIRONMENT, + choices=SUPPORTED_CLOUD_ENVIRONMENTS, + help=f'Backend cloud environment to use. Supported options are ({", ".join(SUPPORTED_CLOUD_ENVIRONMENTS)})', + ) + parser.add_argument( '--image', help=( @@ -104,6 +115,7 @@ def run_analysis_runner( # pylint: disable=too-many-arguments commit=None, repository=None, cwd=None, + cloud=DEFAULT_CLOUD_ENVIRONMENT, image=None, cpu=None, memory=None, @@ -216,6 +228,7 @@ def run_analysis_runner( # pylint: disable=too-many-arguments 'script': _script, 'description': description, 'cwd': _cwd, + 'cloud': cloud, 'image': image, 'cpu': cpu, 'memory': memory, diff --git a/server/main.py b/server/main.py index a76540d4..390b7dbe 100644 --- a/server/main.py +++ b/server/main.py @@ -57,7 +57,7 @@ async def index(request): output_prefix = validate_output_dir(params['output']) dataset = params['dataset'] - cloud_environment = params.get('cloud_environment', DEFAULT_CLOUD_ENVIRONMENT) + cloud_environment = params.get('cloud', DEFAULT_CLOUD_ENVIRONMENT) if cloud_environment not in SUPPORTED_CLOUD_ENVIRONMENTS: raise web.HTTPBadRequest( reason=f'analysis-runner does not yet support the {cloud_environment} environment' @@ -221,7 +221,7 @@ async def config(request): output_prefix = validate_output_dir(params['output']) dataset = params['dataset'] - cloud_environment = params.get('cloud_environment', 'gcp') + cloud_environment = params.get('cloud', DEFAULT_CLOUD_ENVIRONMENT) if cloud_environment not in SUPPORTED_CLOUD_ENVIRONMENTS: raise web.HTTPBadRequest( reason=f'analysis-runner config does not yet support the {cloud_environment} environment' diff --git a/server/util.py b/server/util.py index 9c2fe13f..f4277f54 100644 --- a/server/util.py +++ b/server/util.py @@ -43,7 +43,7 @@ CONFIG_PATH_PREFIXES = { 'gcp': 'gs://cpg-config', - 'azure': 'hail-az://cpg-config' + 'azure': 'az://cpgcommon/cpg-config' } secret_manager = secretmanager.SecretManagerServiceClient() @@ -168,7 +168,12 @@ def get_analysis_runner_metadata( Get well-formed analysis-runner metadata, requiring the core listed keys with some flexibility to provide your own keys (as **kwargs) """ - output_dir = f'gs://cpg-{dataset}-{cpg_namespace(access_level)}/{output_prefix}' + if environment == 'gcp': + output_dir = f'gs://cpg-{dataset}-{cpg_namespace(access_level)}/{output_prefix}' + elif environment == 'azure': + # TODO: need a way for analysis runner to know where to save metadata + prefix = CONFIG_PATH_PREFIXES.get(environment) + output_dir = AnyPath(prefix) / return { 'timestamp': timestamp, From 22501b1ffdfb5399662176f7e758a1105da65568 Mon Sep 17 00:00:00 2001 From: Sabrina <9669990+violetbrina@users.noreply.github.com> Date: Tue, 28 Mar 2023 13:43:52 +1100 Subject: [PATCH 03/11] ADD: --cloud command line argument for setting whether to run on gcp or azure --- analysis_runner/cli_analysisrunner.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/analysis_runner/cli_analysisrunner.py b/analysis_runner/cli_analysisrunner.py index 31089849..f6f850dd 100644 --- a/analysis_runner/cli_analysisrunner.py +++ b/analysis_runner/cli_analysisrunner.py @@ -19,14 +19,14 @@ check_if_commit_is_on_remote, ) from analysis_runner.util import ( - SUPPORTED_CLOUD_ENVIRONMENTS, - DEFAULT_CLOUD_ENVIRONMENT, add_general_args, _perform_version_check, confirm_choice, logger, ) +SUPPORTED_CLOUD_ENVIRONMENTS = {'gcp', 'azure'} +DEFAULT_CLOUD_ENVIRONMENT = 'gcp' def add_analysis_runner_args(parser=None) -> argparse.ArgumentParser: """ From 133bc66b92cec2f2cf5c8b459fba46e0873539d9 Mon Sep 17 00:00:00 2001 From: Sabrina <9669990+violetbrina@users.noreply.github.com> Date: Tue, 28 Mar 2023 14:07:12 +1100 Subject: [PATCH 04/11] TESTS: add a test script for the analysis runner --- test/hail_batch_job.py | 42 ++++++++++++++++++++++++++++++++++++++++ test/hail_batch_job.toml | 19 ++++++++++++++++++ test/run_analysis.sh | 12 ++++++++++++ 3 files changed, 73 insertions(+) create mode 100644 test/hail_batch_job.py create mode 100644 test/hail_batch_job.toml create mode 100644 test/run_analysis.sh diff --git a/test/hail_batch_job.py b/test/hail_batch_job.py new file mode 100644 index 00000000..ab6830f8 --- /dev/null +++ b/test/hail_batch_job.py @@ -0,0 +1,42 @@ +#!/usr/bin/env python3 +""" + Simple script to test whether the CPG infrastructure and permissions are + configured appropriately to permit running AIP. +""" + +import click + +from cpg_utils.config import get_config +from cpg_utils.hail_batch import remote_tmpdir +import hailtop.batch as hb + + +@click.command() +def main(): + """ + main + """ + + service_backend = hb.ServiceBackend( + billing_project=get_config()['hail']['billing_project'], + remote_tmpdir=remote_tmpdir(), + ) + batch = hb.Batch( + name='Test CPG Infra', + backend=service_backend, + cancel_after_n_failures=1, + default_timeout=6000, + default_memory='highmem', + ) + + j = batch.new_job(name='Write the file') + j.command(f'echo "Hello World." > {j.ofile}') + + k = batch.new_job(name='Read the file') + k.command(f'cat {j.ofile}') + + batch.run(wait=False) + + +if __name__ == '__main__': + main() # pylint: disable=E1120 diff --git a/test/hail_batch_job.toml b/test/hail_batch_job.toml new file mode 100644 index 00000000..2a14098d --- /dev/null +++ b/test/hail_batch_job.toml @@ -0,0 +1,19 @@ +[buckets] +web_suffix = "web" +tmp_suffix = "tmp" +analysis_suffix = "analysis" + +[workflow] +dataset = "thousand-genomes" +access_level = "test" +dataset_path = "cpgthousandgenomes/test" +output_prefix = "output" +path_scheme = "az" +image_registry_prefix = "cpgcommonimages.azurecr.io" + +[hail] +billing_project = "fewgenomes" +bucket = "az://cpgthousandgenomes/test" + +[images] +hail = "hailgenetics/hail:0.2.93" diff --git a/test/run_analysis.sh b/test/run_analysis.sh new file mode 100644 index 00000000..a63e73bb --- /dev/null +++ b/test/run_analysis.sh @@ -0,0 +1,12 @@ +#!/bin/bash + +analysis-runner \ + --dataset thousand-genomes \ + --description 'Test script for batch on Azure' \ + --output-dir test \ + --cloud azure \ + --access-level test \ + --config test/hail_batch_job.toml \ + --image cpg_workflows:latest \ + test/test_cpg_infra.py \ + test From 1e16b40d9ab4d990fa9d91572010d0279462a3e9 Mon Sep 17 00:00:00 2001 From: Sabrina <9669990+violetbrina@users.noreply.github.com> Date: Tue, 28 Mar 2023 16:19:51 +1100 Subject: [PATCH 05/11] TESTS: move to examples/batch, add local option for analysis runner server --- analysis_runner/constants.py | 8 +++++++- {test => examples/batch}/hail_batch_job.py | 0 {test => examples/batch}/hail_batch_job.toml | 0 {test => examples/batch}/run_analysis.sh | 4 ++-- server/util.py | 2 +- 5 files changed, 10 insertions(+), 4 deletions(-) rename {test => examples/batch}/hail_batch_job.py (100%) rename {test => examples/batch}/hail_batch_job.toml (100%) rename {test => examples/batch}/run_analysis.sh (72%) diff --git a/analysis_runner/constants.py b/analysis_runner/constants.py index eb29acdd..ea07e5fc 100644 --- a/analysis_runner/constants.py +++ b/analysis_runner/constants.py @@ -1,5 +1,7 @@ """Constants for analysis-runner""" +import os + SERVER_ENDPOINT = 'https://server-a2pko7ameq-ts.a.run.app' SERVER_TEST_ENDPOINT = 'https://server-test-a2pko7ameq-ts.a.run.app' ANALYSIS_RUNNER_PROJECT_ID = 'analysis-runner' @@ -11,13 +13,17 @@ 'gcloud -q auth activate-service-account --key-file=/gsa-key/key.json' ) +USE_LOCAL_SERVER = os.getenv('ANALYSIS_RUNNER_LOCAL', False) + def get_server_endpoint(is_test: bool = False): """ Get the server endpoint {production / test} Do it in a function so it's easy to fix if the logic changes """ - if is_test: + if USE_LOCAL_SERVER: + return 'http://localhost:8080' + elif is_test: return SERVER_TEST_ENDPOINT return SERVER_ENDPOINT diff --git a/test/hail_batch_job.py b/examples/batch/hail_batch_job.py similarity index 100% rename from test/hail_batch_job.py rename to examples/batch/hail_batch_job.py diff --git a/test/hail_batch_job.toml b/examples/batch/hail_batch_job.toml similarity index 100% rename from test/hail_batch_job.toml rename to examples/batch/hail_batch_job.toml diff --git a/test/run_analysis.sh b/examples/batch/run_analysis.sh similarity index 72% rename from test/run_analysis.sh rename to examples/batch/run_analysis.sh index a63e73bb..1030c9ff 100644 --- a/test/run_analysis.sh +++ b/examples/batch/run_analysis.sh @@ -6,7 +6,7 @@ analysis-runner \ --output-dir test \ --cloud azure \ --access-level test \ - --config test/hail_batch_job.toml \ + --config examples/batch/hail_batch_job.toml \ --image cpg_workflows:latest \ - test/test_cpg_infra.py \ + examples/batch/test_cpg_infra.py \ test diff --git a/server/util.py b/server/util.py index f4277f54..cb406d0e 100644 --- a/server/util.py +++ b/server/util.py @@ -173,7 +173,7 @@ def get_analysis_runner_metadata( elif environment == 'azure': # TODO: need a way for analysis runner to know where to save metadata prefix = CONFIG_PATH_PREFIXES.get(environment) - output_dir = AnyPath(prefix) / + # output_dir = AnyPath(prefix) / return { 'timestamp': timestamp, From adf9663ae13b325e3dfd947e917012d85bbaee7f Mon Sep 17 00:00:00 2001 From: Sabrina <9669990+violetbrina@users.noreply.github.com> Date: Tue, 28 Mar 2023 17:00:48 +1100 Subject: [PATCH 06/11] TESTS: bug fixes for testing --- analysis_runner/cli.py | 2 +- analysis_runner/constants.py | 3 ++- examples/batch/hail_batch_job.py | 1 + server/util.py | 7 +++++-- 4 files changed, 9 insertions(+), 4 deletions(-) mode change 100644 => 100755 examples/batch/hail_batch_job.py diff --git a/analysis_runner/cli.py b/analysis_runner/cli.py index 404fba53..18bcc19a 100755 --- a/analysis_runner/cli.py +++ b/analysis_runner/cli.py @@ -78,4 +78,4 @@ def main_from_args(args=None): if __name__ == '__main__': - main_from_args() + main_from_args(args=sys.argv[1:]) diff --git a/analysis_runner/constants.py b/analysis_runner/constants.py index ea07e5fc..d083fe6d 100644 --- a/analysis_runner/constants.py +++ b/analysis_runner/constants.py @@ -1,6 +1,7 @@ """Constants for analysis-runner""" import os +import distutils SERVER_ENDPOINT = 'https://server-a2pko7ameq-ts.a.run.app' SERVER_TEST_ENDPOINT = 'https://server-test-a2pko7ameq-ts.a.run.app' @@ -13,7 +14,7 @@ 'gcloud -q auth activate-service-account --key-file=/gsa-key/key.json' ) -USE_LOCAL_SERVER = os.getenv('ANALYSIS_RUNNER_LOCAL', False) +USE_LOCAL_SERVER = distutils.util.strtobool(os.getenv('ANALYSIS_RUNNER_LOCAL', 'False')) def get_server_endpoint(is_test: bool = False): diff --git a/examples/batch/hail_batch_job.py b/examples/batch/hail_batch_job.py old mode 100644 new mode 100755 index ab6830f8..a62b8842 --- a/examples/batch/hail_batch_job.py +++ b/examples/batch/hail_batch_job.py @@ -1,4 +1,5 @@ #!/usr/bin/env python3 + """ Simple script to test whether the CPG infrastructure and permissions are configured appropriately to permit running AIP. diff --git a/server/util.py b/server/util.py index cb406d0e..d0fcd3a4 100644 --- a/server/util.py +++ b/server/util.py @@ -6,6 +6,7 @@ import json import uuid import toml +import distutils from aiohttp import web, ClientSession from cloudpathlib import AnyPath @@ -36,9 +37,11 @@ SUPPORTED_CLOUD_ENVIRONMENTS = {'gcp', 'azure'} DEFAULT_CLOUD_ENVIRONMENT = 'gcp' +USE_LOCAL_SERVER = distutils.util.strtobool(os.getenv('ANALYSIS_RUNNER_LOCAL', 'False')) +PREFIX = os.path.dirname(os.path.abspath(__file__)) if USE_LOCAL_SERVER else '/deploy-config/' DEPLOY_CONFIG_PATHS = { - 'gcp': '/deploy-config/deploy-config-gcp.json', - 'azure': '/deploy-config/deploy-config-azure.json' + 'gcp': f'{PREFIX}deploy-config-gcp.json', + 'azure': f'{PREFIX}deploy-config-azure.json' } CONFIG_PATH_PREFIXES = { From d5ebf93d554ce2d2ea0fd146ec780b9b93350224 Mon Sep 17 00:00:00 2001 From: Sabrina <9669990+violetbrina@users.noreply.github.com> Date: Thu, 30 Mar 2023 13:48:17 +1100 Subject: [PATCH 07/11] IN PROGRESS: azure bucket access to analysis runner --- .gitignore | 1 + analysis_runner/cli_analysisrunner.py | 4 ++-- server/main.py | 4 ++-- server/util.py | 31 +++++++++++++++++---------- 4 files changed, 25 insertions(+), 15 deletions(-) diff --git a/.gitignore b/.gitignore index b2867c56..6a52dd99 100644 --- a/.gitignore +++ b/.gitignore @@ -131,3 +131,4 @@ dmypy.json .idea/ .DS_Store .vscode/ +.*.json diff --git a/analysis_runner/cli_analysisrunner.py b/analysis_runner/cli_analysisrunner.py index f6f850dd..314dad24 100644 --- a/analysis_runner/cli_analysisrunner.py +++ b/analysis_runner/cli_analysisrunner.py @@ -9,7 +9,7 @@ import requests from cpg_utils.config import read_configs -from cpg_utils.cloud import get_google_identity_token +from cpg_utils.cloud import get_google_identity_token, get_azure_identity_token from analysis_runner.constants import get_server_endpoint from analysis_runner.git import ( get_git_default_remote, @@ -237,7 +237,7 @@ def run_analysis_runner( # pylint: disable=too-many-arguments 'config': _config, }, headers={'Authorization': f'Bearer {_token}'}, - timeout=60, + # timeout=60, ) try: response.raise_for_status() diff --git a/server/main.py b/server/main.py index 390b7dbe..23553ae1 100644 --- a/server/main.py +++ b/server/main.py @@ -118,8 +118,8 @@ async def index(request): # Prepare the job's configuration and write it to a blob. run_config = get_baseline_run_config( - environment=cloud_environment, - gcp_project_id=environment_config.get('projectId'), + environment='gcp', + gcp_project_id=dataset_config.get('gcp', {}).get('projectId'), dataset=dataset, access_level=access_level, output_prefix=output_prefix, diff --git a/server/util.py b/server/util.py index d0fcd3a4..9d7048f8 100644 --- a/server/util.py +++ b/server/util.py @@ -9,8 +9,9 @@ import distutils from aiohttp import web, ClientSession -from cloudpathlib import AnyPath +from cloudpathlib import AzureBlobClient, AnyPath from hailtop.config import get_deploy_config +from azure.identity import DefaultAzureCredential, EnvironmentCredential from google.cloud import secretmanager, pubsub_v1 from cpg_utils.config import update_dict from cpg_utils.cloud import ( @@ -40,15 +41,23 @@ USE_LOCAL_SERVER = distutils.util.strtobool(os.getenv('ANALYSIS_RUNNER_LOCAL', 'False')) PREFIX = os.path.dirname(os.path.abspath(__file__)) if USE_LOCAL_SERVER else '/deploy-config/' DEPLOY_CONFIG_PATHS = { - 'gcp': f'{PREFIX}deploy-config-gcp.json', - 'azure': f'{PREFIX}deploy-config-azure.json' + 'gcp': os.path.join(PREFIX, 'deploy-config-gcp.json'), + 'azure': os.path.join(PREFIX, 'deploy-config-azure.json') } +AZURE_STORAGE_ACCOUNT = 'cpgcommon' CONFIG_PATH_PREFIXES = { 'gcp': 'gs://cpg-config', - 'azure': 'az://cpgcommon/cpg-config' + 'azure': f'az://cpg-config' } +# Set Azure AnyPath client +client = AzureBlobClient( + account_url=f'https://{AZURE_STORAGE_ACCOUNT}.blob.core.windows.net/', + credential=EnvironmentCredential() +) +client.set_as_default_client() + secret_manager = secretmanager.SecretManagerServiceClient() publisher = pubsub_v1.PublisherClient() @@ -133,9 +142,7 @@ def check_dataset_and_group(server_config, environment: str, dataset, email) -> reason=f'The analysis-runner does not support checking group members for the {environment} environment' ) elif environment == 'azure': - azure_resource_group = dataset_config.get('azure', {}).get('resourceGroup') - - if not azure_resource_group: + if not environment in dataset_config: raise web.HTTPBadRequest( reason=f'The analysis-runner does not support checking group members for the {environment} environment' ) @@ -171,12 +178,14 @@ def get_analysis_runner_metadata( Get well-formed analysis-runner metadata, requiring the core listed keys with some flexibility to provide your own keys (as **kwargs) """ + output_dir = None if environment == 'gcp': output_dir = f'gs://cpg-{dataset}-{cpg_namespace(access_level)}/{output_prefix}' elif environment == 'azure': - # TODO: need a way for analysis runner to know where to save metadata - prefix = CONFIG_PATH_PREFIXES.get(environment) - # output_dir = AnyPath(prefix) / + config = toml.load(config_path) + key = '.test' if cpg_namespace(access_level) == 'test' else '' + output_path = config.get(f'storage.{dataset}{key}', {}).get('default') + output_dir = f'{output_path}/{output_prefix}' return { 'timestamp': timestamp, @@ -198,7 +207,7 @@ def get_analysis_runner_metadata( def run_batch_job_and_print_url(batch, wait, environment): """Call batch.run(), return the URL, and wait for job to finish if wait=True""" - if not environment == 'gcp': + if environment not in SUPPORTED_CLOUD_ENVIRONMENTS: raise web.HTTPBadRequest( reason=f'Unsupported Hail Batch deploy config environment: {environment}' ) From 350d1d1aa56a5fe4de595d30933866c8ef9f20cd Mon Sep 17 00:00:00 2001 From: Sabrina <9669990+violetbrina@users.noreply.github.com> Date: Thu, 30 Mar 2023 14:59:49 +1100 Subject: [PATCH 08/11] WORKFLOW: add push of driver image to azure --- .github/workflows/deploy_server.yaml | 32 ++++++++++++++++++++++------ server/cromwell.py | 10 ++++----- server/main.py | 6 +++++- server/util.py | 11 +--------- 4 files changed, 36 insertions(+), 23 deletions(-) diff --git a/.github/workflows/deploy_server.yaml b/.github/workflows/deploy_server.yaml index db498218..2b275aad 100644 --- a/.github/workflows/deploy_server.yaml +++ b/.github/workflows/deploy_server.yaml @@ -15,8 +15,10 @@ jobs: DOCKER_BUILDKIT: 1 BUILDKIT_PROGRESS: plain CLOUDSDK_CORE_DISABLE_PROMPTS: 1 - DRIVER_IMAGE: australia-southeast1-docker.pkg.dev/analysis-runner/images/driver - SERVER_IMAGE: australia-southeast1-docker.pkg.dev/analysis-runner/images/server + AZURE_CONTAINER_REGISTRY: cpgcommonimages.azurecr.io + GCP_CONTAINER_REGSITRY: australia-southeast1-docker.pkg.dev + DRIVER_IMAGE: analysis-runner/images/driver + SERVER_IMAGE: $GCP_CONTAINER_REGSITRY/analysis-runner/images/server steps: - name: "checkout analysis-runner repo" @@ -48,21 +50,37 @@ jobs: run: | gcloud auth configure-docker marketplace.gcr.io,australia-southeast1-docker.pkg.dev + - name: "azure setup" + uses: azure/login@v1 + with: + creds: $${{ secrets.AZURE_CREDENTIALS }} + + - name: "azure docker auth" + uses: azure/docker-login@v1 + with: + login-server: $AZURE_CONTAINER_REGISTRY + username: ${{ secrets.AZURE_REGISTRY_USERNAME }} + password: ${{ secrets.AZURE_REGISTRY_PASSWORD }} + - name: "build driver image" run: | docker build -f driver/Dockerfile.hail --build-arg HAIL_SHA=$HAIL_SHA --tag $DRIVER_IMAGE:$IMAGE_TAG driver - - name: "push driver image" + - name: "tag and push gcp image [driver]" + run: | + docker image tag $DRIVER_IMAGE:$IMAGE_TAG $GCP_CONTAINER_REGISTRY/$DRIVER_IMAGE:latest && + docker push $GCP_CONTAINER_REGISTRY/$DRIVER_IMAGE:latest + + - name: "tag and push azure image [driver]" run: | - docker push $DRIVER_IMAGE:$IMAGE_TAG - docker tag $DRIVER_IMAGE:$IMAGE_TAG $DRIVER_IMAGE:latest - docker push $DRIVER_IMAGE:latest + docker image tag $DRIVER_IMAGE:$IMAGE_TAG $AZURE_CONTAINER_REGISTRY/$DRIVER_IMAGE:latest && + $AZURE_CONTAINER_REGISTRY/$DRIVER_IMAGE:latest - name: "build server image" run: | docker build --build-arg DRIVER_IMAGE=$DRIVER_IMAGE:$IMAGE_TAG --tag $SERVER_IMAGE:$IMAGE_TAG server - - name: "push server image" + - name: "push server image to gcp" run: | docker push $SERVER_IMAGE:$IMAGE_TAG docker tag $SERVER_IMAGE:$IMAGE_TAG $SERVER_IMAGE:latest diff --git a/server/cromwell.py b/server/cromwell.py index 3c3449ce..c2f238c4 100644 --- a/server/cromwell.py +++ b/server/cromwell.py @@ -3,6 +3,7 @@ Exports 'add_cromwell_routes', to add the following route to a flask API: POST /cromwell: Posts a workflow to a cromwell_url """ +import os import json from datetime import datetime @@ -100,11 +101,6 @@ async def cromwell(request): # pylint: disable=too-many-locals input_jsons = params.get('input_json_paths') or [] input_dict = params.get('inputs_dict') - if access_level == 'test': - workflow_output_dir = f'gs://cpg-{dataset}-test/{output_dir}' - else: - workflow_output_dir = f'gs://cpg-{dataset}-main/{output_dir}' - timestamp = datetime.now().astimezone().isoformat() # Prepare the job's configuration and write it to a blob. @@ -121,6 +117,10 @@ async def cromwell(request): # pylint: disable=too-many-locals config_path = write_config(config, cloud_environment) # This metadata dictionary gets stored at the output_dir location. + workflow_output_dir = os.path.join( + config.get('storage', {}).get('default', {}).get('default'), + output_dir + ) metadata = get_analysis_runner_metadata( timestamp=timestamp, dataset=dataset, diff --git a/server/main.py b/server/main.py index 23553ae1..45225a32 100644 --- a/server/main.py +++ b/server/main.py @@ -129,6 +129,10 @@ async def index(request): update_dict(run_config, user_config) config_path = write_config(run_config, environment=cloud_environment) + output_dir = os.path.join( + run_config.get('storage', {}).get('default', {}).get('default'), + output_prefix + ) metadata = get_analysis_runner_metadata( timestamp=timestamp, dataset=dataset, @@ -138,12 +142,12 @@ async def index(request): commit=commit, script=' '.join(script), description=params['description'], - output_prefix=output_prefix, hailVersion=hail_version, driver_image=image, config_path=config_path, cwd=cwd, environment=cloud_environment, + output_dir=output_dir ) user_name = email.split('@')[0] diff --git a/server/util.py b/server/util.py index 9d7048f8..9fb02750 100644 --- a/server/util.py +++ b/server/util.py @@ -167,26 +167,17 @@ def get_analysis_runner_metadata( commit, script, description, - output_prefix, driver_image, config_path, cwd, environment, + output_dir, **kwargs, ): """ Get well-formed analysis-runner metadata, requiring the core listed keys with some flexibility to provide your own keys (as **kwargs) """ - output_dir = None - if environment == 'gcp': - output_dir = f'gs://cpg-{dataset}-{cpg_namespace(access_level)}/{output_prefix}' - elif environment == 'azure': - config = toml.load(config_path) - key = '.test' if cpg_namespace(access_level) == 'test' else '' - output_path = config.get(f'storage.{dataset}{key}', {}).get('default') - output_dir = f'{output_path}/{output_prefix}' - return { 'timestamp': timestamp, 'dataset': dataset, From 9af914ec812a138ca9b22ffa21cfbac5e684d85a Mon Sep 17 00:00:00 2001 From: Sabrina <9669990+violetbrina@users.noreply.github.com> Date: Thu, 30 Mar 2023 16:56:06 +1100 Subject: [PATCH 09/11] FIX: write_config output hail-az:// path --- server/util.py | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/server/util.py b/server/util.py index 9fb02750..1dfb5787 100644 --- a/server/util.py +++ b/server/util.py @@ -226,13 +226,20 @@ def validate_image(container: str, is_test: bool): def write_config(config: dict, environment: str) -> str: """Writes the given config dictionary to a blob and returns its unique path.""" + prefix = CONFIG_PATH_PREFIXES.get(environment) if not prefix: raise web.HTTPBadRequest(reason=f'Bad environment for config: {environment}') + # Uses the default AzureBlobClient defined at the top of utils + # to connect to AZURE_STORAGE_ACCOUNT where it will always write out to config_path = AnyPath(prefix) / (str(uuid.uuid4()) + '.toml') with config_path.open('w') as f: toml.dump(config, f) + + if environment == 'azure': + return os.path.join(f'hail-az://{AZURE_STORAGE_ACCOUNT}', str(config_path).removeprefix('az://')) + return str(config_path) From ac4effb889c3e1d63bae98b9a4a3fc006a3bce6a Mon Sep 17 00:00:00 2001 From: Sabrina <9669990+violetbrina@users.noreply.github.com> Date: Thu, 30 Mar 2023 17:15:41 +1100 Subject: [PATCH 10/11] DEBUGGING --- server/main.py | 1 + 1 file changed, 1 insertion(+) diff --git a/server/main.py b/server/main.py index 45225a32..ff4de9d3 100644 --- a/server/main.py +++ b/server/main.py @@ -161,6 +161,7 @@ async def index(request): batch = hb.Batch(backend=backend, name=batch_name, **extra_batch_params) job = batch.new_job(name='driver') + job.command('cat /gsa-key/key.json') job = prepare_git_job(job=job, repo_name=repo, commit=commit, is_test=is_test) job.image(image) if cpu: From 9d78ebc4a37d9dca8f690afffd38e9c3f8121cc8 Mon Sep 17 00:00:00 2001 From: Sabrina <9669990+violetbrina@users.noreply.github.com> Date: Fri, 31 Mar 2023 12:20:03 +1100 Subject: [PATCH 11/11] DEBUGGING --- server/util.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/server/util.py b/server/util.py index 1dfb5787..08b4afc4 100644 --- a/server/util.py +++ b/server/util.py @@ -48,7 +48,7 @@ AZURE_STORAGE_ACCOUNT = 'cpgcommon' CONFIG_PATH_PREFIXES = { 'gcp': 'gs://cpg-config', - 'azure': f'az://cpg-config' + 'azure': 'az://cpg-config' } # Set Azure AnyPath client