From 1555c00cd07a85d83c2dc8b96597b1d542882239 Mon Sep 17 00:00:00 2001 From: Charles Costanzo Date: Mon, 22 May 2023 15:49:17 -0400 Subject: [PATCH] chose to break out the weekly full refresh into it's own dag so that the original dag is preserved if it nedds to be used --- .../METADATA.yml | 2 +- .../METADATA.yml | 19 +++++ .../dbt_run_and_upload_artifacts.yml | 77 +++++++++++++++++++ .../dbt_test.yml | 61 +++++++++++++++ 4 files changed, 158 insertions(+), 1 deletion(-) create mode 100644 airflow/dags/transform_warehouse_full_refresh_exclude_rt/METADATA.yml create mode 100644 airflow/dags/transform_warehouse_full_refresh_exclude_rt/dbt_run_and_upload_artifacts.yml create mode 100644 airflow/dags/transform_warehouse_full_refresh_exclude_rt/dbt_test.yml diff --git a/airflow/dags/transform_warehouse_full_refresh/METADATA.yml b/airflow/dags/transform_warehouse_full_refresh/METADATA.yml index ef62fa7f2a..266b2e26c5 100644 --- a/airflow/dags/transform_warehouse_full_refresh/METADATA.yml +++ b/airflow/dags/transform_warehouse_full_refresh/METADATA.yml @@ -1,5 +1,5 @@ description: "Runs dbt with --full-refresh; use this to refresh/rebuild/backfill incremental models." -schedule_interval: "0 12 * * 0" +schedule_interval: Null tags: - all_gusty_features default_args: diff --git a/airflow/dags/transform_warehouse_full_refresh_exclude_rt/METADATA.yml b/airflow/dags/transform_warehouse_full_refresh_exclude_rt/METADATA.yml new file mode 100644 index 0000000000..55600bf1f2 --- /dev/null +++ b/airflow/dags/transform_warehouse_full_refresh_exclude_rt/METADATA.yml @@ -0,0 +1,19 @@ +description: "Runs dbt with --full-refresh; use this to refresh/rebuild/backfill incremental models." +schedule_interval: "0 12 * * 0" +tags: + - all_gusty_features +default_args: + owner: airflow + depends_on_past: False + start_date: !days_ago 1 + email: + - "andrew.v@jarv.us" + - "jameelah.y@jarv.us" + - "laurie.m@jarv.us" + email_on_failure: True + email_on_retry: False + retries: 0 + retry_delay: !timedelta 'minutes: 2' + concurrency: 50 + #sla: !timedelta 'hours: 2' +latest_only: True diff --git a/airflow/dags/transform_warehouse_full_refresh_exclude_rt/dbt_run_and_upload_artifacts.yml b/airflow/dags/transform_warehouse_full_refresh_exclude_rt/dbt_run_and_upload_artifacts.yml new file mode 100644 index 0000000000..11daf20957 --- /dev/null +++ b/airflow/dags/transform_warehouse_full_refresh_exclude_rt/dbt_run_and_upload_artifacts.yml @@ -0,0 +1,77 @@ +operator: 'operators.PodOperator' +name: 'dbt-run-and-upload-artifacts' +image: 'ghcr.io/cal-itp/data-infra/warehouse:{{ image_tag() }}' + +cmds: + - python3 +arguments: + - '/app/scripts/run_and_upload.py' + - 'run' + - '--full-refresh' + - '--dbt-docs' + - '--save-artifacts' + - '--deploy-docs' + - '--sync-metabase' + - '--select' + - "{{ dag_run.conf.get('dbt_select_statement', '') }}" + - '--exclude' + - 'gtfs_rt_external_tables+' + +is_delete_operator_pod: true +get_logs: true +is_gke: true +pod_location: us-west1 +cluster_name: data-infra-apps +namespace: airflow-jobs +priority_class_name: dbt-high-priority + +env_vars: + AIRFLOW_ENV: "{{ env_var('AIRFLOW_ENV') }}" + CALITP_BUCKET__DBT_ARTIFACTS: "{{ env_var('CALITP_BUCKET__DBT_ARTIFACTS') }}" + BIGQUERY_KEYFILE_LOCATION: /secrets/jobs-data/service_account.json + DBT_PROJECT_DIR: /app + DBT_PROFILE_DIR: /app + DBT_DATABASE: "{{ get_project_id() }}" + DBT_TARGET: "{{ env_var('DBT_TARGET') }}" + MB_HOST: dashboards.calitp.org + NETLIFY_SITE_ID: cal-itp-dbt-docs + SENTRY_DSN: "{{ env_var('SENTRY_DSN') }}" + SENTRY_ENVIRONMENT: "{{ env_var('SENTRY_ENVIRONMENT') }}" + +secrets: + - deploy_type: volume + deploy_target: /secrets/jobs-data/ + secret: jobs-data + key: service-account.json + - deploy_type: env + deploy_target: MB_USER + secret: jobs-data + key: metabase-user + - deploy_type: env + deploy_target: MB_PASSWORD + secret: jobs-data + key: metabase-password + - deploy_type: env + deploy_target: NETLIFY_AUTH_TOKEN + secret: jobs-data + key: netlify-auth-token + +k8s_resources: + request_memory: 2.0Gi + request_cpu: 1 + +tolerations: + - key: pod-role + operator: Equal + value: computetask + effect: NoSchedule + +affinity: + nodeAffinity: + requiredDuringSchedulingIgnoredDuringExecution: + nodeSelectorTerms: + - matchExpressions: + - key: pod-role + operator: In + values: + - computetask diff --git a/airflow/dags/transform_warehouse_full_refresh_exclude_rt/dbt_test.yml b/airflow/dags/transform_warehouse_full_refresh_exclude_rt/dbt_test.yml new file mode 100644 index 0000000000..5563976121 --- /dev/null +++ b/airflow/dags/transform_warehouse_full_refresh_exclude_rt/dbt_test.yml @@ -0,0 +1,61 @@ +operator: 'operators.PodOperator' +name: 'dbt-test' +image: 'ghcr.io/cal-itp/data-infra/warehouse:{{ image_tag() }}' + +cmds: + - python3 +arguments: + - '/app/scripts/run_and_upload.py' + - 'run' + - '--no-dbt-seed' + - '--no-dbt-run' + - '--dbt-test' + - '--select' + - "{{ dag_run.conf.get('dbt_select_statement', '') }}" + +dependencies: + - dbt_run_and_upload_artifacts +trigger_rule: all_done + +is_delete_operator_pod: true +get_logs: true +is_gke: true +pod_location: us-west1 +cluster_name: data-infra-apps +namespace: airflow-jobs + +env_vars: + AIRFLOW_ENV: "{{ env_var('AIRFLOW_ENV') }}" + CALITP_BUCKET__DBT_ARTIFACTS: "{{ env_var('CALITP_BUCKET__DBT_ARTIFACTS') }}" + BIGQUERY_KEYFILE_LOCATION: /secrets/jobs-data/service_account.json + DBT_PROJECT_DIR: /app + DBT_PROFILE_DIR: /app + DBT_TARGET: "{{ env_var('DBT_TARGET') }}" + SENTRY_DSN: "{{ env_var('SENTRY_DSN') }}" + SENTRY_ENVIRONMENT: "{{ env_var('SENTRY_ENVIRONMENT') }}" + +secrets: + - deploy_type: volume + deploy_target: /secrets/jobs-data/ + secret: jobs-data + key: service-account.json + +k8s_resources: + request_memory: 2.0Gi + request_cpu: 1 + +tolerations: + - key: pod-role + operator: Equal + value: computetask + effect: NoSchedule + +affinity: + nodeAffinity: + requiredDuringSchedulingIgnoredDuringExecution: + nodeSelectorTerms: + - matchExpressions: + - key: pod-role + operator: In + values: + - computetask