From 90944a4e21def4700fba1ccc6b7caa452a4944ca Mon Sep 17 00:00:00 2001 From: "Ferdinand, Lorrin" Date: Sat, 3 Jan 2026 06:08:54 -0700 Subject: [PATCH 01/26] test the case-sensitivity of the environment input --- .github/workflows/platform_cd_workflow.yml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/.github/workflows/platform_cd_workflow.yml b/.github/workflows/platform_cd_workflow.yml index 47f733be..d7eb81ec 100644 --- a/.github/workflows/platform_cd_workflow.yml +++ b/.github/workflows/platform_cd_workflow.yml @@ -82,6 +82,7 @@ jobs: deploy-online: name: Deploy_Online runs-on: ubuntu-latest + environment: pr permissions: id-token: write contents: read @@ -283,6 +284,7 @@ jobs: deploy-batch: name: Deploy_Batch runs-on: ubuntu-latest + environment: PR permissions: id-token: write contents: read From 8ee4dca1b40196d6485f864a1a388d2b65080612 Mon Sep 17 00:00:00 2001 From: "Ferdinand, Lorrin" Date: Sat, 3 Jan 2026 06:18:46 -0700 Subject: [PATCH 02/26] Add back workflow_call block --- .github/workflows/platform_cd_workflow.yml | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/.github/workflows/platform_cd_workflow.yml b/.github/workflows/platform_cd_workflow.yml index d7eb81ec..e1d3dc33 100644 --- a/.github/workflows/platform_cd_workflow.yml +++ b/.github/workflows/platform_cd_workflow.yml @@ -11,6 +11,17 @@ on: model_type: type: string required: true + workflow_call: + inputs: + exec_environment: + type: string + description: "Execution Environment" + required: true + default: "dev" + model_type: + type: string + description: "type of model to execute" + required: true jobs: execute-training-job: From 13c896d6125ad16919bec9ec75e00f78ccdd357c Mon Sep 17 00:00:00 2001 From: "Ferdinand, Lorrin" Date: Sat, 3 Jan 2026 07:28:18 -0700 Subject: [PATCH 03/26] Fix platform cd since permissions and environment mapping had been removed --- .github/workflows/platform_cd_workflow.yml | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/.github/workflows/platform_cd_workflow.yml b/.github/workflows/platform_cd_workflow.yml index e1d3dc33..100236e8 100644 --- a/.github/workflows/platform_cd_workflow.yml +++ b/.github/workflows/platform_cd_workflow.yml @@ -23,6 +23,24 @@ on: description: "type of model to execute" required: true +permissions: + id-token: write + contents: read + +env: + SUBSCRIPTION_ID: ${{vars.SUBSCRIPTION_ID}} + RESOURCE_GROUP_NAME: ${{ vars.RESOURCE_GROUP_NAME }} + WORKSPACE_NAME: ${{ vars.WORKSPACE_NAME }} + STORAGE_ACCOUNT_NAME: ${{ vars.STORAGE_ACCT_NAME }} + ARM_CLIENT_ID: ${{vars.ARM_CLIENT_ID}} + ARM_TENANT_ID: ${{vars.ARM_TENANT_ID}} + BUILD_BUILDID: ${{ github.run_id }} + BUILD_SOURCEBRANCHNAME: ${{ github.head_ref || github.ref_name }} + + + + + jobs: execute-training-job: name: Execute training job From 66feb42de3492291e2ebbde8c1781248e503aa06 Mon Sep 17 00:00:00 2001 From: "Ferdinand, Lorrin" Date: Sat, 3 Jan 2026 07:53:06 -0700 Subject: [PATCH 04/26] allow setting of enable_storage_public_access in cd dev deployment --- .github/workflows/platform_cd_workflow.yml | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/.github/workflows/platform_cd_workflow.yml b/.github/workflows/platform_cd_workflow.yml index 100236e8..309b20d4 100644 --- a/.github/workflows/platform_cd_workflow.yml +++ b/.github/workflows/platform_cd_workflow.yml @@ -7,7 +7,7 @@ on: type: string enable_storage_public_access: type: boolean - default: false + default: true model_type: type: string required: true @@ -18,6 +18,9 @@ on: description: "Execution Environment" required: true default: "dev" + enable_storage_public_access: + type: boolean + default: false model_type: type: string description: "type of model to execute" From ae6dbe3a694cf4d919ac02d4db5622b91c2694b5 Mon Sep 17 00:00:00 2001 From: "Ferdinand, Lorrin" Date: Sat, 3 Jan 2026 07:59:28 -0700 Subject: [PATCH 05/26] add ability to pass public access flag from model to platform on workflow call --- .github/workflows/london_taxi_cd_pipeline.yml | 3 +++ 1 file changed, 3 insertions(+) diff --git a/.github/workflows/london_taxi_cd_pipeline.yml b/.github/workflows/london_taxi_cd_pipeline.yml index 0d8c6ffd..c274e5cd 100644 --- a/.github/workflows/london_taxi_cd_pipeline.yml +++ b/.github/workflows/london_taxi_cd_pipeline.yml @@ -8,6 +8,9 @@ on: model_type: type: string default: "london_taxi" + enable_storage_public_access: + type: boolean + default: true # ← ADD THIS (true to enable by default) push: branches: - main From 22d01ab4082668865a4e181efb35e5f528bfecde Mon Sep 17 00:00:00 2001 From: "Ferdinand, Lorrin" Date: Sat, 3 Jan 2026 08:05:29 -0700 Subject: [PATCH 06/26] Pass the enable_storage_public_access to downstream pipelines --- .github/workflows/london_taxi_cd_pipeline.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/.github/workflows/london_taxi_cd_pipeline.yml b/.github/workflows/london_taxi_cd_pipeline.yml index c274e5cd..a819fec4 100644 --- a/.github/workflows/london_taxi_cd_pipeline.yml +++ b/.github/workflows/london_taxi_cd_pipeline.yml @@ -41,3 +41,4 @@ jobs: with: exec_environment: ${{ inputs.exec_environment || 'dev' }} model_type: ${{ inputs.model_type || 'london_taxi' }} + enable_storage_public_access: ${{ inputs.enable_storage_public_access || false }} From f37a0d5262c8467e6a174fd9b26ecc43aa7cbb25 Mon Sep 17 00:00:00 2001 From: "Ferdinand, Lorrin" Date: Sat, 3 Jan 2026 08:18:22 -0700 Subject: [PATCH 07/26] Make exec_environment passed as a variable --- .github/workflows/london_taxi_cd_pipeline.yml | 3 +++ .github/workflows/platform_cd_workflow.yml | 7 +++---- 2 files changed, 6 insertions(+), 4 deletions(-) diff --git a/.github/workflows/london_taxi_cd_pipeline.yml b/.github/workflows/london_taxi_cd_pipeline.yml index a819fec4..4e01576c 100644 --- a/.github/workflows/london_taxi_cd_pipeline.yml +++ b/.github/workflows/london_taxi_cd_pipeline.yml @@ -32,6 +32,9 @@ on: description: "The type of model to run the workflow for" required: true default: "london_taxi" + enable_storage_public_access: + type: boolean + default: true permissions: id-token: write contents: read diff --git a/.github/workflows/platform_cd_workflow.yml b/.github/workflows/platform_cd_workflow.yml index 309b20d4..4777d4c2 100644 --- a/.github/workflows/platform_cd_workflow.yml +++ b/.github/workflows/platform_cd_workflow.yml @@ -114,7 +114,7 @@ jobs: deploy-online: name: Deploy_Online runs-on: ubuntu-latest - environment: pr + environment: ${{ inputs.exec_environment }} permissions: id-token: write contents: read @@ -316,7 +316,7 @@ jobs: deploy-batch: name: Deploy_Batch runs-on: ubuntu-latest - environment: PR + environment: ${{ inputs.exec_environment }} permissions: id-token: write contents: read @@ -395,8 +395,7 @@ jobs: id-token: write contents: read needs: [execute-training-job] - environment: - name: dev + environment: ${{ inputs.exec_environment }} steps: - name: Checkout uses: actions/checkout@v4 From 04bbe74b5dede85cd93bb90a68d8e8fd5a806662 Mon Sep 17 00:00:00 2001 From: "Ferdinand, Lorrin" Date: Sat, 3 Jan 2026 09:35:36 -0700 Subject: [PATCH 08/26] Fix storage validation step --- .github/actions/configure_azureml_agent/action.yml | 9 ++++++++- .github/workflows/platform_cd_workflow.yml | 3 ++- 2 files changed, 10 insertions(+), 2 deletions(-) diff --git a/.github/actions/configure_azureml_agent/action.yml b/.github/actions/configure_azureml_agent/action.yml index 1790df53..194f726d 100644 --- a/.github/actions/configure_azureml_agent/action.yml +++ b/.github/actions/configure_azureml_agent/action.yml @@ -70,4 +70,11 @@ runs: shell: bash run: | python -m pip install --upgrade pip - python -m pip install --upgrade -r .github/requirements/execute_job_requirements.txt \ No newline at end of file + python -m pip install --upgrade -r .github/requirements/execute_job_requirements.txt + + - name: Pre-install Azure ML CLI Extension + shell: bash + run: | + echo "Pre-installing Azure ML CLI extension to avoid warnings..." + az extension add --name ml --yes --only-show-errors 2>/dev/null || true + echo "Azure ML CLI extension ready." \ No newline at end of file diff --git a/.github/workflows/platform_cd_workflow.yml b/.github/workflows/platform_cd_workflow.yml index 4777d4c2..8320bf92 100644 --- a/.github/workflows/platform_cd_workflow.yml +++ b/.github/workflows/platform_cd_workflow.yml @@ -175,7 +175,8 @@ jobs: --name "$PUBLISHED_MODEL_NAME" \ --workspace-name ${{ env.WORKSPACE_NAME }} \ --resource-group ${{ env.RESOURCE_GROUP_NAME }} \ - --subscription ${{ env.SUBSCRIPTION_ID }} -o json) + --subscription ${{ env.SUBSCRIPTION_ID }} \ + --only-show-errors -o json 2>&1 | grep -v '^WARNING' | grep -v '^Class') if [ -z "$MODEL_LIST_JSON" ] || [ "$MODEL_LIST_JSON" = "[]" ]; then echo "❌ ERROR: No registered model found named '$PUBLISHED_MODEL_NAME'" From e2cf107d446d773546df6d43be2d7ad7069574cb Mon Sep 17 00:00:00 2001 From: "Ferdinand, Lorrin" Date: Sun, 4 Jan 2026 06:11:03 -0700 Subject: [PATCH 09/26] Fix Validate step json-parsing issues. --- .github/workflows/platform_cd_workflow.yml | 108 +++++++++++---------- 1 file changed, 59 insertions(+), 49 deletions(-) diff --git a/.github/workflows/platform_cd_workflow.yml b/.github/workflows/platform_cd_workflow.yml index 8320bf92..4731d23f 100644 --- a/.github/workflows/platform_cd_workflow.yml +++ b/.github/workflows/platform_cd_workflow.yml @@ -168,59 +168,69 @@ jobs: print(generate_model_name("${{ inputs.model_type }}")) PY ) - echo "Published model name: $PUBLISHED_MODEL_NAME" - - echo "Listing registered models to find latest version..." - MODEL_LIST_JSON=$(az ml model list \ - --name "$PUBLISHED_MODEL_NAME" \ - --workspace-name ${{ env.WORKSPACE_NAME }} \ - --resource-group ${{ env.RESOURCE_GROUP_NAME }} \ - --subscription ${{ env.SUBSCRIPTION_ID }} \ - --only-show-errors -o json 2>&1 | grep -v '^WARNING' | grep -v '^Class') - - if [ -z "$MODEL_LIST_JSON" ] || [ "$MODEL_LIST_JSON" = "[]" ]; then - echo "❌ ERROR: No registered model found named '$PUBLISHED_MODEL_NAME'" - exit 1 - fi - - LATEST_VERSION=$(echo "$MODEL_LIST_JSON" | python - < /tmp/model_list.json 2>&1 || true + + # Filter out any non-JSON lines (warnings, etc) + MODEL_LIST_JSON=$(grep -E '^\[|^ |^\]' /tmp/model_list.json || echo "[]") + + if [ -z "$MODEL_LIST_JSON" ] || [ "$MODEL_LIST_JSON" = "[]" ]; then + echo "❌ ERROR: No registered model found named '$PUBLISHED_MODEL_NAME'" + echo "Debug: Raw output from model list:" + cat /tmp/model_list.json || true + exit 1 + fi + + LATEST_VERSION=$(echo "$MODEL_LIST_JSON" | python - < Date: Sun, 4 Jan 2026 06:23:21 -0700 Subject: [PATCH 10/26] Add a local storage validation for online endpoints --- .gitignore | 1 + 1 file changed, 1 insertion(+) diff --git a/.gitignore b/.gitignore index cdcba079..13b0df63 100644 --- a/.gitignore +++ b/.gitignore @@ -139,3 +139,4 @@ mlops/*/environment/mamba* interrogate_deployment.ps1 .github/workflows/deploy_online_only.yml vars.env +mlops/common/validate_storage_rbac.py From 9341a0e75bedb02236e50d93adb9a50e6fbdb983 Mon Sep 17 00:00:00 2001 From: "Ferdinand, Lorrin" Date: Sun, 4 Jan 2026 07:27:08 -0700 Subject: [PATCH 11/26] Comment out the Validation step - it is challenging to get working --- .github/workflows/london_taxi_cd_pipeline.yml | 8 +- .github/workflows/platform_cd_workflow.yml | 186 +++++++++--------- 2 files changed, 103 insertions(+), 91 deletions(-) diff --git a/.github/workflows/london_taxi_cd_pipeline.yml b/.github/workflows/london_taxi_cd_pipeline.yml index 4e01576c..db04822f 100644 --- a/.github/workflows/london_taxi_cd_pipeline.yml +++ b/.github/workflows/london_taxi_cd_pipeline.yml @@ -34,6 +34,8 @@ on: default: "london_taxi" enable_storage_public_access: type: boolean + description: "Temporarily enable storage public access for training" + required: false default: true permissions: id-token: write @@ -44,4 +46,8 @@ jobs: with: exec_environment: ${{ inputs.exec_environment || 'dev' }} model_type: ${{ inputs.model_type || 'london_taxi' }} - enable_storage_public_access: ${{ inputs.enable_storage_public_access || false }} + # Converts the input parameter 'enable_storage_public_access' to a boolean value. + # If the input is not explicitly set to false, it defaults to true. + # This ensures that storage public access is enabled by default unless explicitly disabled. + # The double negation (!= false) handles cases where the input might be null, undefined, or any truthy value. + enable_storage_public_access: ${{ inputs.enable_storage_public_access != false }} diff --git a/.github/workflows/platform_cd_workflow.yml b/.github/workflows/platform_cd_workflow.yml index 4731d23f..a0ec1b4e 100644 --- a/.github/workflows/platform_cd_workflow.yml +++ b/.github/workflows/platform_cd_workflow.yml @@ -141,96 +141,102 @@ jobs: echo "No common directory found for ${{ inputs.model_type }}, skipping copy" fi - - name: Validate Storage Configuration (RBAC smoke tests) - run: | - echo "=== Validating Storage Configuration and AD-auth model download ===" - - # Print storage configuration for diagnostics - az storage account show \ - --name ${{ env.STORAGE_ACCOUNT_NAME }} \ - --resource-group ${{ env.RESOURCE_GROUP_NAME }} \ - --subscription ${{ env.SUBSCRIPTION_ID }} \ - --query '{name:name, defaultAction:networkRuleSet.defaultAction, allowSharedKeyAccess:allowSharedKeyAccess, publicNetworkAccess:publicNetworkAccess}' -o json - - echo "\nTesting blob access with workflow identity (auth-mode login)..." - if ! az storage container list \ - --account-name ${{ env.STORAGE_ACCOUNT_NAME }} \ - --auth-mode login \ - --subscription ${{ env.SUBSCRIPTION_ID }} \ - --output table; then - echo "❌ ERROR: Workflow identity cannot access storage - this indicates RBAC or network issues" - exit 1 - fi - - echo "\nDeriving published model name for smoke download test..." - PUBLISHED_MODEL_NAME=$(python - < /tmp/model_list.json 2>&1 || true - - # Filter out any non-JSON lines (warnings, etc) - MODEL_LIST_JSON=$(grep -E '^\[|^ |^\]' /tmp/model_list.json || echo "[]") - - if [ -z "$MODEL_LIST_JSON" ] || [ "$MODEL_LIST_JSON" = "[]" ]; then - echo "❌ ERROR: No registered model found named '$PUBLISHED_MODEL_NAME'" - echo "Debug: Raw output from model list:" - cat /tmp/model_list.json || true - exit 1 - fi - - LATEST_VERSION=$(echo "$MODEL_LIST_JSON" | python - </dev/null | python - < Date: Sun, 4 Jan 2026 08:19:26 -0700 Subject: [PATCH 12/26] Test docker-taxi-cd --- .github/workflows/docker_taxi_cd_pipeline.yml | 22 +++++++++++++++++-- 1 file changed, 20 insertions(+), 2 deletions(-) diff --git a/.github/workflows/docker_taxi_cd_pipeline.yml b/.github/workflows/docker_taxi_cd_pipeline.yml index ee2b0671..67653901 100644 --- a/.github/workflows/docker_taxi_cd_pipeline.yml +++ b/.github/workflows/docker_taxi_cd_pipeline.yml @@ -1,6 +1,17 @@ name: Custom Object Detection CD Workflow on: + workflow_dispatch: + inputs: + exec_environment: + type: string + default: "dev" + model_type: + type: string + default: "docker_taxi" + enable_storage_public_access: + type: boolean + default: true push: branches: - main @@ -22,12 +33,19 @@ on: description: "The type of model to run the workflow for" required: true default: "docker_taxi" + enable_storage_public_access: + type: boolean + description: "Temporarily enable storage public access for training" + required: false + default: true permissions: id-token: write - contents: read + contents: read jobs: run-cd-workflow: - uses: ./.github/workflows/platform_cd_workflow.yml + uses: microsoft/dstoolkit-mlops-v2/.github/workflows/platform_cd_workflow.yml@main with: exec_environment: ${{ inputs.exec_environment || 'dev' }} model_type: ${{ inputs.model_type || 'docker_taxi' }} + enable_storage_public_access: ${{ inputs.enable_storage_public_access != false }} + secrets: inherit From 31a2f9fc7a5450090e82e344097d28c731a83d7f Mon Sep 17 00:00:00 2001 From: "Ferdinand, Lorrin" Date: Sun, 4 Jan 2026 08:26:08 -0700 Subject: [PATCH 13/26] Fix path to generic platform_cd --- .github/workflows/docker_taxi_cd_pipeline.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/docker_taxi_cd_pipeline.yml b/.github/workflows/docker_taxi_cd_pipeline.yml index 67653901..3463e42d 100644 --- a/.github/workflows/docker_taxi_cd_pipeline.yml +++ b/.github/workflows/docker_taxi_cd_pipeline.yml @@ -43,7 +43,7 @@ permissions: contents: read jobs: run-cd-workflow: - uses: microsoft/dstoolkit-mlops-v2/.github/workflows/platform_cd_workflow.yml@main + uses: ./.github/workflows/platform_cd_workflow.yml with: exec_environment: ${{ inputs.exec_environment || 'dev' }} model_type: ${{ inputs.model_type || 'docker_taxi' }} From 08960fd872590469d6f0f5f862400ea4dcd1c598 Mon Sep 17 00:00:00 2001 From: "Ferdinand, Lorrin" Date: Sun, 4 Jan 2026 08:41:55 -0700 Subject: [PATCH 14/26] Fix linting errors --- mlops/common/config_utils.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/mlops/common/config_utils.py b/mlops/common/config_utils.py index e9170349..6108ea2c 100644 --- a/mlops/common/config_utils.py +++ b/mlops/common/config_utils.py @@ -132,7 +132,8 @@ def get_pipeline_config(self, pipeline_name: str) -> Dict: available = ', '.join(sorted(self.pipeline_configs.keys())) raise KeyError( - f"Pipeline config '{pipelineconfig_name}' not found in {self.config_path}. ``pipeline_configs`` keys: {available}" + f"Pipeline config '{pipelineconfig_name}' not found in {self.config_path}. " + f"``pipeline_configs`` keys: {available}" ) def get_deployment_config(self, deployment_name: str) -> Dict: @@ -143,7 +144,8 @@ def get_deployment_config(self, deployment_name: str) -> Dict: available = ', '.join(sorted(self.deployment_configs.keys())) raise KeyError( - f"Deployment config '{deploymentconfig_name}' not found in {self.config_path}. ``deployment_configs`` keys: {available}" + f"Deployment config '{deploymentconfig_name}' not found in {self.config_path}. " + f"``deployment_configs`` keys: {available}" ) From 966c563fa28f3b310e0cb5c4fe5f91ab340918c9 Mon Sep 17 00:00:00 2001 From: "Ferdinand, Lorrin" Date: Sun, 4 Jan 2026 08:47:56 -0700 Subject: [PATCH 15/26] Enable public storage access --- .github/workflows/docker_taxi_ci_pipeline.yml | 14 +++++++++++++- .github/workflows/london_taxi_ci_pipeline.yml | 9 +++++++++ .github/workflows/sequence_model_ci_pipeline.yml | 6 ++++++ 3 files changed, 28 insertions(+), 1 deletion(-) diff --git a/.github/workflows/docker_taxi_ci_pipeline.yml b/.github/workflows/docker_taxi_ci_pipeline.yml index 30cb388a..33a40d18 100644 --- a/.github/workflows/docker_taxi_ci_pipeline.yml +++ b/.github/workflows/docker_taxi_ci_pipeline.yml @@ -28,6 +28,11 @@ on: description: "Is Docker used for build validation?" required: true default: true + enable_storage_public_access: + type: boolean + description: "Temporarily enable storage public access for training" + required: false + default: true workflow_call: inputs: exec_environment: @@ -45,9 +50,14 @@ on: description: "Is Docker used for build validation?" required: true default: true + enable_storage_public_access: + type: boolean + description: "Temporarily enable storage public access for training" + required: false + default: true permissions: id-token: write - contents: read + contents: read jobs: run-ci-workflow: uses: ./.github/workflows/platform_ci_workflow.yml @@ -55,3 +65,5 @@ jobs: exec_environment: ${{ inputs.exec_environment || 'pr' }} model_type: ${{ inputs.model_type || 'docker_taxi' }} is_docker: ${{ github.event_name == 'pull_request' && true || inputs.is_docker }} + enable_storage_public_access: ${{ inputs.enable_storage_public_access != false }} + secrets: inherit diff --git a/.github/workflows/london_taxi_ci_pipeline.yml b/.github/workflows/london_taxi_ci_pipeline.yml index 3b76972a..a228f44c 100644 --- a/.github/workflows/london_taxi_ci_pipeline.yml +++ b/.github/workflows/london_taxi_ci_pipeline.yml @@ -9,6 +9,9 @@ on: model_type: type: string default: "london_taxi" + enable_storage_public_access: + type: boolean + default: true pull_request: branches: - main @@ -31,6 +34,11 @@ on: description: "The type of model to run the workflow for" required: true default: "london_taxi" + enable_storage_public_access: + type: boolean + description: "Temporarily enable storage public access for training" + required: false + default: true permissions: id-token: write contents: read @@ -41,4 +49,5 @@ jobs: exec_environment: ${{ inputs.exec_environment || 'pr' }} model_type: ${{ inputs.model_type || 'london_taxi' }} is_docker: false + enable_storage_public_access: ${{ inputs.enable_storage_public_access != false }} secrets: inherit \ No newline at end of file diff --git a/.github/workflows/sequence_model_ci_pipeline.yml b/.github/workflows/sequence_model_ci_pipeline.yml index fe9cc38b..4ac02c65 100644 --- a/.github/workflows/sequence_model_ci_pipeline.yml +++ b/.github/workflows/sequence_model_ci_pipeline.yml @@ -23,6 +23,11 @@ on: description: "The type of model to run the workflow for" required: true default: "sequence_model" + enable_storage_public_access: + type: boolean + description: "Temporarily enable storage public access for training" + required: false + default: true permissions: id-token: write contents: read @@ -33,4 +38,5 @@ jobs: exec_environment: ${{ inputs.exec_environment || 'pr' }} model_type: ${{ inputs.model_type || 'sequence_model' }} is_docker: false + enable_storage_public_access: ${{ inputs.enable_storage_public_access != false }} secrets: inherit From 0a395c7f0195a6bcfdf2d923e7228f69d346dc2c Mon Sep 17 00:00:00 2001 From: "Ferdinand, Lorrin" Date: Sun, 4 Jan 2026 09:31:37 -0700 Subject: [PATCH 16/26] Flip the flag to true for enable storage public access --- .github/workflows/platform_ci_workflow.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/platform_ci_workflow.yml b/.github/workflows/platform_ci_workflow.yml index 761a5f2e..e72838b2 100644 --- a/.github/workflows/platform_ci_workflow.yml +++ b/.github/workflows/platform_ci_workflow.yml @@ -14,7 +14,7 @@ on: default: false enable_storage_public_access: type: boolean - default: false + default: true workflow_call: inputs: exec_environment: @@ -35,7 +35,7 @@ on: type: boolean description: "Temporarily enable storage public network access for this run" required: false - default: false + default: true # arm_client_id: # required: true # type: string From 6d7ff40d9b71b68fa39b3ef2920527f546c58aec Mon Sep 17 00:00:00 2001 From: "Ferdinand, Lorrin" Date: Mon, 5 Jan 2026 06:16:40 -0700 Subject: [PATCH 17/26] Test pr-driven activation of the CI process --- .github/workflows/docker_taxi_ci_pipeline.yml | 2 +- .github/workflows/london_taxi_ci_pipeline.yml | 2 +- .github/workflows/sequence_model_ci_pipeline.yml | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/.github/workflows/docker_taxi_ci_pipeline.yml b/.github/workflows/docker_taxi_ci_pipeline.yml index 33a40d18..06cc4f58 100644 --- a/.github/workflows/docker_taxi_ci_pipeline.yml +++ b/.github/workflows/docker_taxi_ci_pipeline.yml @@ -65,5 +65,5 @@ jobs: exec_environment: ${{ inputs.exec_environment || 'pr' }} model_type: ${{ inputs.model_type || 'docker_taxi' }} is_docker: ${{ github.event_name == 'pull_request' && true || inputs.is_docker }} - enable_storage_public_access: ${{ inputs.enable_storage_public_access != false }} + enable_storage_public_access: true secrets: inherit diff --git a/.github/workflows/london_taxi_ci_pipeline.yml b/.github/workflows/london_taxi_ci_pipeline.yml index a228f44c..82fa5c58 100644 --- a/.github/workflows/london_taxi_ci_pipeline.yml +++ b/.github/workflows/london_taxi_ci_pipeline.yml @@ -49,5 +49,5 @@ jobs: exec_environment: ${{ inputs.exec_environment || 'pr' }} model_type: ${{ inputs.model_type || 'london_taxi' }} is_docker: false - enable_storage_public_access: ${{ inputs.enable_storage_public_access != false }} + enable_storage_public_access: true secrets: inherit \ No newline at end of file diff --git a/.github/workflows/sequence_model_ci_pipeline.yml b/.github/workflows/sequence_model_ci_pipeline.yml index 4ac02c65..18ff9798 100644 --- a/.github/workflows/sequence_model_ci_pipeline.yml +++ b/.github/workflows/sequence_model_ci_pipeline.yml @@ -38,5 +38,5 @@ jobs: exec_environment: ${{ inputs.exec_environment || 'pr' }} model_type: ${{ inputs.model_type || 'sequence_model' }} is_docker: false - enable_storage_public_access: ${{ inputs.enable_storage_public_access != false }} + enable_storage_public_access: true secrets: inherit From 7ee2d26286fc512f329511ac3f188dc7a81f12b6 Mon Sep 17 00:00:00 2001 From: "Ferdinand, Lorrin" Date: Mon, 5 Jan 2026 06:37:09 -0700 Subject: [PATCH 18/26] Add cd tester to initiate all cd workflows when developing --- .gitignore | 1 + 1 file changed, 1 insertion(+) diff --git a/.gitignore b/.gitignore index 13b0df63..79c01769 100644 --- a/.gitignore +++ b/.gitignore @@ -140,3 +140,4 @@ interrogate_deployment.ps1 .github/workflows/deploy_online_only.yml vars.env mlops/common/validate_storage_rbac.py +.github/workflows/test_all_cd.yml From e4d2d979d8f60fc3254d78a19f8119b2923693b8 Mon Sep 17 00:00:00 2001 From: "Ferdinand, Lorrin" Date: Mon, 5 Jan 2026 06:51:41 -0700 Subject: [PATCH 19/26] Add control over public network access storage settings to sequence model --- .../workflows/sequence_model_cd_pipeline.yml | 20 ++++++++++++++++++- 1 file changed, 19 insertions(+), 1 deletion(-) diff --git a/.github/workflows/sequence_model_cd_pipeline.yml b/.github/workflows/sequence_model_cd_pipeline.yml index 2680aa84..6052a6e6 100644 --- a/.github/workflows/sequence_model_cd_pipeline.yml +++ b/.github/workflows/sequence_model_cd_pipeline.yml @@ -1,5 +1,16 @@ name: Sequence Model CD Workflow on: + workflow_dispatch: + inputs: + exec_environment: + type: string + default: "dev" + model_type: + type: string + default: "sequence_model" + enable_storage_public_access: + type: boolean + default: true push: branches: - main @@ -22,12 +33,19 @@ on: description: "The type of model to run the workflow for" required: true default: "sequence_model" + enable_storage_public_access: + type: boolean + description: "Temporarily enable storage public access for training" + required: false + default: true permissions: id-token: write contents: read jobs: - run-ci-workflow: + run-cd-workflow: uses: ./.github/workflows/platform_cd_workflow.yml with: exec_environment: ${{ inputs.exec_environment || 'dev' }} model_type: ${{ inputs.model_type || 'sequence_model' }} + enable_storage_public_access: ${{ inputs.enable_storage_public_access != false }} + secrets: inherit From 3d7713c7d338a60153cad475fb50830e0fb2924a Mon Sep 17 00:00:00 2001 From: "Ferdinand, Lorrin" Date: Mon, 5 Jan 2026 06:53:18 -0700 Subject: [PATCH 20/26] Add public network storage access control to sequence model cd --- .github/workflows/sequence_model_cd_pipeline.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/sequence_model_cd_pipeline.yml b/.github/workflows/sequence_model_cd_pipeline.yml index 6052a6e6..5e483e84 100644 --- a/.github/workflows/sequence_model_cd_pipeline.yml +++ b/.github/workflows/sequence_model_cd_pipeline.yml @@ -40,7 +40,7 @@ on: default: true permissions: id-token: write - contents: read + contents: read jobs: run-cd-workflow: uses: ./.github/workflows/platform_cd_workflow.yml From 024641c642ced7a9cbdcff6290cdd714f1073466 Mon Sep 17 00:00:00 2001 From: "Ferdinand, Lorrin" Date: Mon, 5 Jan 2026 07:13:14 -0700 Subject: [PATCH 21/26] Add feature branch to cause cd to run on push --- .github/workflows/docker_taxi_cd_pipeline.yml | 1 + .github/workflows/london_taxi_cd_pipeline.yml | 1 + .github/workflows/sequence_model_cd_pipeline.yml | 1 + 3 files changed, 3 insertions(+) diff --git a/.github/workflows/docker_taxi_cd_pipeline.yml b/.github/workflows/docker_taxi_cd_pipeline.yml index 3463e42d..8fd0bc50 100644 --- a/.github/workflows/docker_taxi_cd_pipeline.yml +++ b/.github/workflows/docker_taxi_cd_pipeline.yml @@ -15,6 +15,7 @@ on: push: branches: - main + - lorrin/20260103-fix-platform-cd-env paths: - mlops/common/** - mlops/docker_taxi/** diff --git a/.github/workflows/london_taxi_cd_pipeline.yml b/.github/workflows/london_taxi_cd_pipeline.yml index db04822f..464680d8 100644 --- a/.github/workflows/london_taxi_cd_pipeline.yml +++ b/.github/workflows/london_taxi_cd_pipeline.yml @@ -14,6 +14,7 @@ on: push: branches: - main + - lorrin/20260103-fix-platform-cd-env paths: - mlops/common/** - mlops/london_taxi/** diff --git a/.github/workflows/sequence_model_cd_pipeline.yml b/.github/workflows/sequence_model_cd_pipeline.yml index 5e483e84..593fb6b5 100644 --- a/.github/workflows/sequence_model_cd_pipeline.yml +++ b/.github/workflows/sequence_model_cd_pipeline.yml @@ -14,6 +14,7 @@ on: push: branches: - main + - lorrin/20260103-fix-platform-cd-env paths: - mlops/common/** - mlops/** From b8c10d05cdd8c4287ab4df3c2509f12c101301af Mon Sep 17 00:00:00 2001 From: "Ferdinand, Lorrin" Date: Mon, 5 Jan 2026 13:48:00 -0700 Subject: [PATCH 22/26] Test cd the models on push to feature branch temporarily. --- .github/workflows/docker_taxi_cd_pipeline.yml | 1 + .github/workflows/london_taxi_cd_pipeline.yml | 1 + .github/workflows/sequence_model_cd_pipeline.yml | 1 + 3 files changed, 3 insertions(+) diff --git a/.github/workflows/docker_taxi_cd_pipeline.yml b/.github/workflows/docker_taxi_cd_pipeline.yml index 8fd0bc50..99d0ec99 100644 --- a/.github/workflows/docker_taxi_cd_pipeline.yml +++ b/.github/workflows/docker_taxi_cd_pipeline.yml @@ -17,6 +17,7 @@ on: - main - lorrin/20260103-fix-platform-cd-env paths: + - .github/workflows/docker_taxi_cd_pipeline.yml - mlops/common/** - mlops/docker_taxi/** - model/docker_taxi/** diff --git a/.github/workflows/london_taxi_cd_pipeline.yml b/.github/workflows/london_taxi_cd_pipeline.yml index 464680d8..fe849728 100644 --- a/.github/workflows/london_taxi_cd_pipeline.yml +++ b/.github/workflows/london_taxi_cd_pipeline.yml @@ -16,6 +16,7 @@ on: - main - lorrin/20260103-fix-platform-cd-env paths: + - .github/workflows/london_taxi_cd_pipeline.yml - mlops/common/** - mlops/london_taxi/** - model/london_taxi/** diff --git a/.github/workflows/sequence_model_cd_pipeline.yml b/.github/workflows/sequence_model_cd_pipeline.yml index 593fb6b5..2357a713 100644 --- a/.github/workflows/sequence_model_cd_pipeline.yml +++ b/.github/workflows/sequence_model_cd_pipeline.yml @@ -16,6 +16,7 @@ on: - main - lorrin/20260103-fix-platform-cd-env paths: + - .github/workflows/sequence_model_cd_pipeline.yml - mlops/common/** - mlops/** - mlops/sequence_model/** From fbb30ff8837d558806a63f2f45e86dbdc3749b6c Mon Sep 17 00:00:00 2001 From: "Ferdinand, Lorrin" Date: Tue, 6 Jan 2026 06:50:34 -0700 Subject: [PATCH 23/26] Reverse workflow_dispatch, on push branch, and add documentation of the enable_storage_public_network flag --- .github/workflows/docker_taxi_cd_pipeline.yml | 2 - .github/workflows/docker_taxi_ci_pipeline.yml | 22 ------- .github/workflows/london_taxi_cd_pipeline.yml | 4 +- .github/workflows/london_taxi_ci_pipeline.yml | 11 ---- .../workflows/sequence_model_cd_pipeline.yml | 3 - docs/how-to/InfrastructureDesign.md | 62 ++++++++++++++++++- 6 files changed, 62 insertions(+), 42 deletions(-) diff --git a/.github/workflows/docker_taxi_cd_pipeline.yml b/.github/workflows/docker_taxi_cd_pipeline.yml index 99d0ec99..3463e42d 100644 --- a/.github/workflows/docker_taxi_cd_pipeline.yml +++ b/.github/workflows/docker_taxi_cd_pipeline.yml @@ -15,9 +15,7 @@ on: push: branches: - main - - lorrin/20260103-fix-platform-cd-env paths: - - .github/workflows/docker_taxi_cd_pipeline.yml - mlops/common/** - mlops/docker_taxi/** - model/docker_taxi/** diff --git a/.github/workflows/docker_taxi_ci_pipeline.yml b/.github/workflows/docker_taxi_ci_pipeline.yml index 06cc4f58..ab8d68f3 100644 --- a/.github/workflows/docker_taxi_ci_pipeline.yml +++ b/.github/workflows/docker_taxi_ci_pipeline.yml @@ -11,28 +11,6 @@ on: - 'model/docker_taxi/**' - 'src/docker_taxi_src/**' - 'test/docker_taxi/**' - workflow_dispatch: - inputs: - exec_environment: - type: string - description: "The environment to run the workflow in" - required: true - default: "pr" - model_type: - type: string - description: "The type of model to run the workflow for" - required: true - default: "docker_taxi" - is_docker: - type: boolean - description: "Is Docker used for build validation?" - required: true - default: true - enable_storage_public_access: - type: boolean - description: "Temporarily enable storage public access for training" - required: false - default: true workflow_call: inputs: exec_environment: diff --git a/.github/workflows/london_taxi_cd_pipeline.yml b/.github/workflows/london_taxi_cd_pipeline.yml index fe849728..ed22adbf 100644 --- a/.github/workflows/london_taxi_cd_pipeline.yml +++ b/.github/workflows/london_taxi_cd_pipeline.yml @@ -10,13 +10,11 @@ on: default: "london_taxi" enable_storage_public_access: type: boolean - default: true # ← ADD THIS (true to enable by default) + default: true push: branches: - main - - lorrin/20260103-fix-platform-cd-env paths: - - .github/workflows/london_taxi_cd_pipeline.yml - mlops/common/** - mlops/london_taxi/** - model/london_taxi/** diff --git a/.github/workflows/london_taxi_ci_pipeline.yml b/.github/workflows/london_taxi_ci_pipeline.yml index 82fa5c58..bea07470 100644 --- a/.github/workflows/london_taxi_ci_pipeline.yml +++ b/.github/workflows/london_taxi_ci_pipeline.yml @@ -1,17 +1,6 @@ name: London Taxi CI Workflow on: - workflow_dispatch: - inputs: - exec_environment: - type: string - default: "pr" - model_type: - type: string - default: "london_taxi" - enable_storage_public_access: - type: boolean - default: true pull_request: branches: - main diff --git a/.github/workflows/sequence_model_cd_pipeline.yml b/.github/workflows/sequence_model_cd_pipeline.yml index 2357a713..83885ee4 100644 --- a/.github/workflows/sequence_model_cd_pipeline.yml +++ b/.github/workflows/sequence_model_cd_pipeline.yml @@ -14,11 +14,8 @@ on: push: branches: - main - - lorrin/20260103-fix-platform-cd-env paths: - - .github/workflows/sequence_model_cd_pipeline.yml - mlops/common/** - - mlops/** - mlops/sequence_model/** - model/sequence_model/** - src/sequence_model/** diff --git a/docs/how-to/InfrastructureDesign.md b/docs/how-to/InfrastructureDesign.md index f1034332..bf13b70a 100644 --- a/docs/how-to/InfrastructureDesign.md +++ b/docs/how-to/InfrastructureDesign.md @@ -3,6 +3,66 @@ For teams who are starting with MLOps, we suggest to have at least two Azure Machine Learning instances. ![Dev and Prod](../media/devprd.png) For teams with more familiarity with MLOps and Azure, we recommend to have three environments. ![Dev, Test and Prod](../media/devtestprd.png) -**Note**: In the current version of Model Factory, the infrastructure is provisioned in a public network configuration. Support for provisioning the infrastructure in a Private networking configuration is forthcoming in a future release. +## Storage Network Access Configuration + +### Storage Security Baseline + +The Model Factory provisions Azure Storage with a security-first baseline: +- **Public network access**: Disabled +- **Default network action**: Deny +- **Shared key access**: Disabled (policy-enforced) +- **Access control**: Azure AD authentication with RBAC + +This configuration ensures storage is not exposed to the public internet and all access requires proper identity and role-based permissions. + +### Temporary Public Access for Development + +During development, training jobs and deployment endpoints need to download models and data from storage. In non-production environments (PR validation and dev testing), the workflows can temporarily enable public storage access for the duration of the job, then restore security restrictions afterward. + +This pattern is controlled by the `enable_storage_public_access` workflow parameter: + +**CI Workflows** (`*_ci_pipeline.yml`): +- Always use `enable_storage_public_access: true` (hardcoded) +- Acceptable for PR/dev environments where temporary public access trades security for simplicity and cost optimization +- Public access is enabled before training, then restored to restricted mode after job completion + +**CD Workflows** (`*_cd_pipeline.yml`): +- `workflow_call` (orchestrated): defaults to `true` in model workflows, can be overridden by orchestrator +- `push` to main: uses `true` via conditional logic for automated dev deployments + +**Stage/Production Deployments**: +- Set `enable_storage_public_access: false` when calling CD workflows for production environments +- Requires [private endpoints](https://learn.microsoft.com/en-us/azure/storage/common/storage-private-endpoints) or [service endpoints](https://learn.microsoft.com/en-us/azure/virtual-network/virtual-network-service-endpoints-overview) configured for storage +- Requires [resource access rules](https://learn.microsoft.com/en-us/azure/storage/common/storage-network-security?tabs=azure-portal#grant-access-from-azure-resource-instances) to allow Azure ML workspace and compute to access storage +- Endpoint managed identities must have `Storage Blob Data Reader` role assigned + +### Workflow Examples + +**Manual dev deployment with temporary public access:** +```bash +gh workflow run london_taxi_cd_pipeline.yml \ + -f exec_environment=dev \ + -f enable_storage_public_access=true +``` + +**Orchestrated production deployment with private networking:** +```bash +gh workflow run test_all_cd.yml \ + -f exec_environment=prod \ + -f enable_storage_public_access=false +``` + +### Network Configuration Timeline + +**Current (v2.x)**: +- Infrastructure provisioned with public network configuration +- Temporary public access pattern available via `enable_storage_public_access` parameter +- Production deployments should set parameter to `false` and configure private/service endpoints manually + +**Future**: +- Full private networking configuration will be provisioned via IaC +- Service endpoints or private endpoints created automatically +- Resource access rules configured during infrastructure provisioning +- `enable_storage_public_access: false` will work out-of-box for all environments If you want to learn more about best practices, you can visit [Azure CloudFramework Best Practices](https://docs.microsoft.com/en-us/azure/cloud-adoption-framework/ready/azure-best-practices/ai-machine-learning-resource-organization) From e3b5edf1ea71d7f48b224f022d97c7d6815f281f Mon Sep 17 00:00:00 2001 From: "Ferdinand, Lorrin" Date: Tue, 6 Jan 2026 07:22:08 -0700 Subject: [PATCH 24/26] Implement environment-based variable resolution and improve CD workflow robustness --- .github/workflows/platform_cd_workflow.yml | 98 +--------------------- .github/workflows/platform_ci_workflow.yml | 15 +--- 2 files changed, 3 insertions(+), 110 deletions(-) diff --git a/.github/workflows/platform_cd_workflow.yml b/.github/workflows/platform_cd_workflow.yml index a0ec1b4e..0b0313ef 100644 --- a/.github/workflows/platform_cd_workflow.yml +++ b/.github/workflows/platform_cd_workflow.yml @@ -48,6 +48,7 @@ jobs: execute-training-job: name: Execute training job runs-on: ubuntu-latest + environment: ${{ inputs.exec_environment }} steps: - name: Checkout uses: actions/checkout@v4 @@ -141,103 +142,6 @@ jobs: echo "No common directory found for ${{ inputs.model_type }}, skipping copy" fi - # - name: Validate Storage Configuration (RBAC smoke tests) - # run: | - # echo "=== Validating Storage Configuration and AD-auth model download ===" - - # # Print storage configuration for diagnostics - # az storage account show \ - # --name ${{ env.STORAGE_ACCOUNT_NAME }} \ - # --resource-group ${{ env.RESOURCE_GROUP_NAME }} \ - # --subscription ${{ env.SUBSCRIPTION_ID }} \ - # --query '{name:name, defaultAction:networkRuleSet.defaultAction, allowSharedKeyAccess:allowSharedKeyAccess, publicNetworkAccess:publicNetworkAccess}' -o json - - # echo "\nTesting blob access with workflow identity (auth-mode login)..." - # if ! az storage container list \ - # --account-name ${{ env.STORAGE_ACCOUNT_NAME }} \ - # --auth-mode login \ - # --subscription ${{ env.SUBSCRIPTION_ID }} \ - # --output table; then - # echo "❌ ERROR: Workflow identity cannot access storage - this indicates RBAC or network issues" - # exit 1 - # fi - - # echo "\nDeriving published model name for smoke download test..." - # PUBLISHED_MODEL_NAME=$(python - </dev/null | python - < Date: Tue, 6 Jan 2026 07:29:54 -0700 Subject: [PATCH 25/26] feat: implement environment-based variable resolution and improve CD workflow robustness From 0e0aecca9fc5bb449f11f7bc01fd037d894a8751 Mon Sep 17 00:00:00 2001 From: "Ferdinand, Lorrin" Date: Tue, 6 Jan 2026 08:39:30 -0700 Subject: [PATCH 26/26] feat: implement environment-based variable resolution and improve CD workflow robustness