Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

work in progress #6942

Draft
wants to merge 2 commits into
base: main
Choose a base branch
from
Draft

work in progress #6942

wants to merge 2 commits into from

Conversation

kwindau
Copy link
Contributor

@kwindau kwindau commented Jan 31, 2025

work in progress

@dataops-ci-bot
Copy link

Integration report for "Fix schema.yaml - remove trailing space in column desc"

sql.diff

Click to expand!
Only in /tmp/workspace/generated-sql/dags/: bqetl_daily_retention.py
diff -bur --no-dereference --new-file /tmp/workspace/main-generated-sql/dags/bqetl_core.py /tmp/workspace/generated-sql/dags/bqetl_core.py
--- /tmp/workspace/main-generated-sql/dags/bqetl_core.py	2025-01-31 22:09:30.000000000 +0000
+++ /tmp/workspace/generated-sql/dags/bqetl_core.py	2025-01-31 22:10:24.000000000 +0000
@@ -118,6 +118,13 @@
         )
 
         ExternalTaskMarker(
+            task_id="bqetl_daily_retention__wait_for_telemetry_derived__core_clients_last_seen__v1",
+            external_dag_id="bqetl_daily_retention",
+            external_task_id="wait_for_telemetry_derived__core_clients_last_seen__v1",
+            execution_date="{{ (execution_date - macros.timedelta(days=-1, seconds=22800)).isoformat() }}",
+        )
+
+        ExternalTaskMarker(
             task_id="bqetl_gud__wait_for_telemetry_derived__core_clients_last_seen__v1",
             external_dag_id="bqetl_gud",
             external_task_id="wait_for_telemetry_derived__core_clients_last_seen__v1",
diff -bur --no-dereference --new-file /tmp/workspace/main-generated-sql/dags/bqetl_daily_retention.py /tmp/workspace/generated-sql/dags/bqetl_daily_retention.py
--- /tmp/workspace/main-generated-sql/dags/bqetl_daily_retention.py	1970-01-01 00:00:00.000000000 +0000
+++ /tmp/workspace/generated-sql/dags/bqetl_daily_retention.py	2025-01-31 22:10:29.000000000 +0000
@@ -0,0 +1,334 @@
+# Generated via https://github.com/mozilla/bigquery-etl/blob/main/bigquery_etl/query_scheduling/generate_airflow_dags.py
+
+from airflow import DAG
+from airflow.sensors.external_task import ExternalTaskMarker
+from airflow.sensors.external_task import ExternalTaskSensor
+from airflow.utils.task_group import TaskGroup
+import datetime
+from operators.gcp_container_operator import GKEPodOperator
+from utils.constants import ALLOWED_STATES, FAILED_STATES
+from utils.gcp import bigquery_etl_query, bigquery_dq_check, bigquery_bigeye_check
+
+docs = """
+### bqetl_daily_retention
+
+Built from bigquery-etl repo, [`dags/bqetl_daily_retention.py`](https://github.com/mozilla/bigquery-etl/blob/generated-sql/dags/bqetl_daily_retention.py)
+
+#### Description
+
+Schedules daily level retention queries
+#### Owner
+
+kwindau@mozilla.com
+
+#### Tags
+
+* impact/tier_2
+* repo/bigquery-etl
+"""
+
+
+default_args = {
+    "owner": "kwindau@mozilla.com",
+    "start_date": datetime.datetime(2025, 2, 5, 0, 0),
+    "end_date": None,
+    "email": ["telemetry-alerts@mozilla.com", "kwindau@mozilla.com"],
+    "depends_on_past": False,
+    "retry_delay": datetime.timedelta(seconds=1800),
+    "email_on_failure": True,
+    "email_on_retry": False,
+    "retries": 2,
+}
+
+tags = ["impact/tier_2", "repo/bigquery-etl"]
+
+with DAG(
+    "bqetl_daily_retention",
+    default_args=default_args,
+    schedule_interval="40 19 * * *",
+    doc_md=docs,
+    tags=tags,
+) as dag:
+
+    wait_for_bigeye__org_mozilla_fenix_derived__baseline_clients_last_seen__v1 = ExternalTaskSensor(
+        task_id="wait_for_bigeye__org_mozilla_fenix_derived__baseline_clients_last_seen__v1",
+        external_dag_id="bqetl_glean_usage",
+        external_task_id="fenix.bigeye__org_mozilla_fenix_derived__baseline_clients_last_seen__v1",
+        execution_delta=datetime.timedelta(seconds=63600),
+        check_existence=True,
+        mode="reschedule",
+        poke_interval=datetime.timedelta(minutes=5),
+        allowed_states=ALLOWED_STATES,
+        failed_states=FAILED_STATES,
+        pool="DATA_ENG_EXTERNALTASKSENSOR",
+    )
+
+    wait_for_bigeye__org_mozilla_fenix_nightly_derived__baseline_clients_last_seen__v1 = ExternalTaskSensor(
+        task_id="wait_for_bigeye__org_mozilla_fenix_nightly_derived__baseline_clients_last_seen__v1",
+        external_dag_id="bqetl_glean_usage",
+        external_task_id="fenix.bigeye__org_mozilla_fenix_nightly_derived__baseline_clients_last_seen__v1",
+        execution_delta=datetime.timedelta(seconds=63600),
+        check_existence=True,
+        mode="reschedule",
+        poke_interval=datetime.timedelta(minutes=5),
+        allowed_states=ALLOWED_STATES,
+        failed_states=FAILED_STATES,
+        pool="DATA_ENG_EXTERNALTASKSENSOR",
+    )
+
+    wait_for_bigeye__org_mozilla_fennec_aurora_derived__baseline_clients_last_seen__v1 = ExternalTaskSensor(
+        task_id="wait_for_bigeye__org_mozilla_fennec_aurora_derived__baseline_clients_last_seen__v1",
+        external_dag_id="bqetl_glean_usage",
+        external_task_id="fenix.bigeye__org_mozilla_fennec_aurora_derived__baseline_clients_last_seen__v1",
+        execution_delta=datetime.timedelta(seconds=63600),
+        check_existence=True,
+        mode="reschedule",
+        poke_interval=datetime.timedelta(minutes=5),
+        allowed_states=ALLOWED_STATES,
+        failed_states=FAILED_STATES,
+        pool="DATA_ENG_EXTERNALTASKSENSOR",
+    )
+
+    wait_for_bigeye__org_mozilla_firefox_beta_derived__baseline_clients_last_seen__v1 = ExternalTaskSensor(
+        task_id="wait_for_bigeye__org_mozilla_firefox_beta_derived__baseline_clients_last_seen__v1",
+        external_dag_id="bqetl_glean_usage",
+        external_task_id="fenix.bigeye__org_mozilla_firefox_beta_derived__baseline_clients_last_seen__v1",
+        execution_delta=datetime.timedelta(seconds=63600),
+        check_existence=True,
+        mode="reschedule",
+        poke_interval=datetime.timedelta(minutes=5),
+        allowed_states=ALLOWED_STATES,
+        failed_states=FAILED_STATES,
+        pool="DATA_ENG_EXTERNALTASKSENSOR",
+    )
+
+    wait_for_bigeye__org_mozilla_firefox_derived__baseline_clients_last_seen__v1 = ExternalTaskSensor(
+        task_id="wait_for_bigeye__org_mozilla_firefox_derived__baseline_clients_last_seen__v1",
+        external_dag_id="bqetl_glean_usage",
+        external_task_id="fenix.bigeye__org_mozilla_firefox_derived__baseline_clients_last_seen__v1",
+        execution_delta=datetime.timedelta(seconds=63600),
+        check_existence=True,
+        mode="reschedule",
+        poke_interval=datetime.timedelta(minutes=5),
+        allowed_states=ALLOWED_STATES,
+        failed_states=FAILED_STATES,
+        pool="DATA_ENG_EXTERNALTASKSENSOR",
+    )
+
+    wait_for_bigeye__org_mozilla_focus_beta_derived__baseline_clients_last_seen__v1 = ExternalTaskSensor(
+        task_id="wait_for_bigeye__org_mozilla_focus_beta_derived__baseline_clients_last_seen__v1",
+        external_dag_id="bqetl_glean_usage",
+        external_task_id="focus_android.bigeye__org_mozilla_focus_beta_derived__baseline_clients_last_seen__v1",
+        execution_delta=datetime.timedelta(seconds=63600),
+        check_existence=True,
+        mode="reschedule",
+        poke_interval=datetime.timedelta(minutes=5),
+        allowed_states=ALLOWED_STATES,
+        failed_states=FAILED_STATES,
+        pool="DATA_ENG_EXTERNALTASKSENSOR",
+    )
+
+    wait_for_bigeye__org_mozilla_focus_derived__baseline_clients_last_seen__v1 = ExternalTaskSensor(
+        task_id="wait_for_bigeye__org_mozilla_focus_derived__baseline_clients_last_seen__v1",
+        external_dag_id="bqetl_glean_usage",
+        external_task_id="focus_android.bigeye__org_mozilla_focus_derived__baseline_clients_last_seen__v1",
+        execution_delta=datetime.timedelta(seconds=63600),
+        check_existence=True,
+        mode="reschedule",
+        poke_interval=datetime.timedelta(minutes=5),
+        allowed_states=ALLOWED_STATES,
+        failed_states=FAILED_STATES,
+        pool="DATA_ENG_EXTERNALTASKSENSOR",
+    )
+
+    wait_for_bigeye__org_mozilla_focus_nightly_derived__baseline_clients_last_seen__v1 = ExternalTaskSensor(
+        task_id="wait_for_bigeye__org_mozilla_focus_nightly_derived__baseline_clients_last_seen__v1",
+        external_dag_id="bqetl_glean_usage",
+        external_task_id="focus_android.bigeye__org_mozilla_focus_nightly_derived__baseline_clients_last_seen__v1",
+        execution_delta=datetime.timedelta(seconds=63600),
+        check_existence=True,
+        mode="reschedule",
+        poke_interval=datetime.timedelta(minutes=5),
+        allowed_states=ALLOWED_STATES,
+        failed_states=FAILED_STATES,
+        pool="DATA_ENG_EXTERNALTASKSENSOR",
+    )
+
+    wait_for_bigeye__org_mozilla_ios_fennec_derived__baseline_clients_last_seen__v1 = ExternalTaskSensor(
+        task_id="wait_for_bigeye__org_mozilla_ios_fennec_derived__baseline_clients_last_seen__v1",
+        external_dag_id="bqetl_glean_usage",
+        external_task_id="firefox_ios.bigeye__org_mozilla_ios_fennec_derived__baseline_clients_last_seen__v1",
+        execution_delta=datetime.timedelta(seconds=63600),
+        check_existence=True,
+        mode="reschedule",
+        poke_interval=datetime.timedelta(minutes=5),
+        allowed_states=ALLOWED_STATES,
+        failed_states=FAILED_STATES,
+        pool="DATA_ENG_EXTERNALTASKSENSOR",
+    )
+
+    wait_for_bigeye__org_mozilla_ios_firefox_derived__baseline_clients_last_seen__v1 = ExternalTaskSensor(
+        task_id="wait_for_bigeye__org_mozilla_ios_firefox_derived__baseline_clients_last_seen__v1",
+        external_dag_id="bqetl_glean_usage",
+        external_task_id="firefox_ios.bigeye__org_mozilla_ios_firefox_derived__baseline_clients_last_seen__v1",
+        execution_delta=datetime.timedelta(seconds=63600),
+        check_existence=True,
+        mode="reschedule",
+        poke_interval=datetime.timedelta(minutes=5),
+        allowed_states=ALLOWED_STATES,
+        failed_states=FAILED_STATES,
+        pool="DATA_ENG_EXTERNALTASKSENSOR",
+    )
+
+    wait_for_bigeye__org_mozilla_ios_firefoxbeta_derived__baseline_clients_last_seen__v1 = ExternalTaskSensor(
+        task_id="wait_for_bigeye__org_mozilla_ios_firefoxbeta_derived__baseline_clients_last_seen__v1",
+        external_dag_id="bqetl_glean_usage",
+        external_task_id="firefox_ios.bigeye__org_mozilla_ios_firefoxbeta_derived__baseline_clients_last_seen__v1",
+        execution_delta=datetime.timedelta(seconds=63600),
+        check_existence=True,
+        mode="reschedule",
+        poke_interval=datetime.timedelta(minutes=5),
+        allowed_states=ALLOWED_STATES,
+        failed_states=FAILED_STATES,
+        pool="DATA_ENG_EXTERNALTASKSENSOR",
+    )
+
+    wait_for_bigeye__org_mozilla_ios_focus_derived__baseline_clients_last_seen__v1 = ExternalTaskSensor(
+        task_id="wait_for_bigeye__org_mozilla_ios_focus_derived__baseline_clients_last_seen__v1",
+        external_dag_id="bqetl_glean_usage",
+        external_task_id="focus_ios.bigeye__org_mozilla_ios_focus_derived__baseline_clients_last_seen__v1",
+        execution_delta=datetime.timedelta(seconds=63600),
+        check_existence=True,
+        mode="reschedule",
+        poke_interval=datetime.timedelta(minutes=5),
+        allowed_states=ALLOWED_STATES,
+        failed_states=FAILED_STATES,
+        pool="DATA_ENG_EXTERNALTASKSENSOR",
+    )
+
+    wait_for_bigeye__org_mozilla_ios_klar_derived__baseline_clients_last_seen__v1 = ExternalTaskSensor(
+        task_id="wait_for_bigeye__org_mozilla_ios_klar_derived__baseline_clients_last_seen__v1",
+        external_dag_id="bqetl_glean_usage",
+        external_task_id="klar_ios.bigeye__org_mozilla_ios_klar_derived__baseline_clients_last_seen__v1",
+        execution_delta=datetime.timedelta(seconds=63600),
+        check_existence=True,
+        mode="reschedule",
+        poke_interval=datetime.timedelta(minutes=5),
+        allowed_states=ALLOWED_STATES,
+        failed_states=FAILED_STATES,
+        pool="DATA_ENG_EXTERNALTASKSENSOR",
+    )
+
+    wait_for_bigeye__org_mozilla_klar_derived__baseline_clients_last_seen__v1 = ExternalTaskSensor(
+        task_id="wait_for_bigeye__org_mozilla_klar_derived__baseline_clients_last_seen__v1",
+        external_dag_id="bqetl_glean_usage",
+        external_task_id="klar_android.bigeye__org_mozilla_klar_derived__baseline_clients_last_seen__v1",
+        execution_delta=datetime.timedelta(seconds=63600),
+        check_existence=True,
+        mode="reschedule",
+        poke_interval=datetime.timedelta(minutes=5),
+        allowed_states=ALLOWED_STATES,
+        failed_states=FAILED_STATES,
+        pool="DATA_ENG_EXTERNALTASKSENSOR",
+    )
+
+    wait_for_checks__fail_telemetry_derived__clients_last_seen__v2 = ExternalTaskSensor(
+        task_id="wait_for_checks__fail_telemetry_derived__clients_last_seen__v2",
+        external_dag_id="bqetl_main_summary",
+        external_task_id="checks__fail_telemetry_derived__clients_last_seen__v2",
+        execution_delta=datetime.timedelta(seconds=63600),
+        check_existence=True,
+        mode="reschedule",
+        poke_interval=datetime.timedelta(minutes=5),
+        allowed_states=ALLOWED_STATES,
+        failed_states=FAILED_STATES,
+        pool="DATA_ENG_EXTERNALTASKSENSOR",
+    )
+
+    wait_for_telemetry_derived__core_clients_last_seen__v1 = ExternalTaskSensor(
+        task_id="wait_for_telemetry_derived__core_clients_last_seen__v1",
+        external_dag_id="bqetl_core",
+        external_task_id="telemetry_derived__core_clients_last_seen__v1",
+        execution_delta=datetime.timedelta(seconds=63600),
+        check_existence=True,
+        mode="reschedule",
+        poke_interval=datetime.timedelta(minutes=5),
+        allowed_states=ALLOWED_STATES,
+        failed_states=FAILED_STATES,
+        pool="DATA_ENG_EXTERNALTASKSENSOR",
+    )
+
+    telemetry_derived__rolling_cohorts__v2 = bigquery_etl_query(
+        task_id="telemetry_derived__rolling_cohorts__v2",
+        destination_table="rolling_cohorts_v2",
+        dataset_id="telemetry_derived",
+        project_id="moz-fx-data-shared-prod",
+        owner="kwindau@mozilla.com",
+        email=["kwindau@mozilla.com", "telemetry-alerts@mozilla.com"],
+        date_partition_parameter="submission_date",
+        depends_on_past=False,
+    )
+
+    telemetry_derived__rolling_cohorts__v2.set_upstream(
+        wait_for_bigeye__org_mozilla_fenix_derived__baseline_clients_last_seen__v1
+    )
+
+    telemetry_derived__rolling_cohorts__v2.set_upstream(
+        wait_for_bigeye__org_mozilla_fenix_nightly_derived__baseline_clients_last_seen__v1
+    )
+
+    telemetry_derived__rolling_cohorts__v2.set_upstream(
+        wait_for_bigeye__org_mozilla_fennec_aurora_derived__baseline_clients_last_seen__v1
+    )
+
+    telemetry_derived__rolling_cohorts__v2.set_upstream(
+        wait_for_bigeye__org_mozilla_firefox_beta_derived__baseline_clients_last_seen__v1
+    )
+
+    telemetry_derived__rolling_cohorts__v2.set_upstream(
+        wait_for_bigeye__org_mozilla_firefox_derived__baseline_clients_last_seen__v1
+    )
+
+    telemetry_derived__rolling_cohorts__v2.set_upstream(
+        wait_for_bigeye__org_mozilla_focus_beta_derived__baseline_clients_last_seen__v1
+    )
+
+    telemetry_derived__rolling_cohorts__v2.set_upstream(
+        wait_for_bigeye__org_mozilla_focus_derived__baseline_clients_last_seen__v1
+    )
+
+    telemetry_derived__rolling_cohorts__v2.set_upstream(
+        wait_for_bigeye__org_mozilla_focus_nightly_derived__baseline_clients_last_seen__v1
+    )
+
+    telemetry_derived__rolling_cohorts__v2.set_upstream(
+        wait_for_bigeye__org_mozilla_ios_fennec_derived__baseline_clients_last_seen__v1
+    )
+
+    telemetry_derived__rolling_cohorts__v2.set_upstream(
+        wait_for_bigeye__org_mozilla_ios_firefox_derived__baseline_clients_last_seen__v1
+    )
+
+    telemetry_derived__rolling_cohorts__v2.set_upstream(
+        wait_for_bigeye__org_mozilla_ios_firefoxbeta_derived__baseline_clients_last_seen__v1
+    )
+
+    telemetry_derived__rolling_cohorts__v2.set_upstream(
+        wait_for_bigeye__org_mozilla_ios_focus_derived__baseline_clients_last_seen__v1
+    )
+
+    telemetry_derived__rolling_cohorts__v2.set_upstream(
+        wait_for_bigeye__org_mozilla_ios_klar_derived__baseline_clients_last_seen__v1
+    )
+
+    telemetry_derived__rolling_cohorts__v2.set_upstream(
+        wait_for_bigeye__org_mozilla_klar_derived__baseline_clients_last_seen__v1
+    )
+
+    telemetry_derived__rolling_cohorts__v2.set_upstream(
+        wait_for_checks__fail_telemetry_derived__clients_last_seen__v2
+    )
+
+    telemetry_derived__rolling_cohorts__v2.set_upstream(
+        wait_for_telemetry_derived__core_clients_last_seen__v1
+    )
diff -bur --no-dereference --new-file /tmp/workspace/main-generated-sql/dags/bqetl_default.py /tmp/workspace/generated-sql/dags/bqetl_default.py
--- /tmp/workspace/main-generated-sql/dags/bqetl_default.py	2025-01-31 22:09:30.000000000 +0000
+++ /tmp/workspace/generated-sql/dags/bqetl_default.py	2025-01-31 22:10:26.000000000 +0000
@@ -62,3 +62,15 @@
         date_partition_parameter="submission_date",
         depends_on_past=False,
     )
+
+    telemetry_derived__cohort_daily_statistics__v2 = bigquery_etl_query(
+        #### WARNING: This task has been scheduled in the default DAG. It can be moved to a more suitable DAG using `bqetl query schedule`.
+        task_id="telemetry_derived__cohort_daily_statistics__v2",
+        destination_table="cohort_daily_statistics_v2",
+        dataset_id="telemetry_derived",
+        project_id="moz-fx-data-shared-prod",
+        owner="example@mozilla.com",
+        email=["example@mozilla.com", "telemetry-alerts@mozilla.com"],
+        date_partition_parameter="submission_date",
+        depends_on_past=False,
+    )
diff -bur --no-dereference --new-file /tmp/workspace/main-generated-sql/dags/bqetl_main_summary.py /tmp/workspace/generated-sql/dags/bqetl_main_summary.py
--- /tmp/workspace/main-generated-sql/dags/bqetl_main_summary.py	2025-01-31 22:09:30.000000000 +0000
+++ /tmp/workspace/generated-sql/dags/bqetl_main_summary.py	2025-01-31 22:10:24.000000000 +0000
@@ -209,6 +209,13 @@
         )
 
         ExternalTaskMarker(
+            task_id="bqetl_daily_retention__wait_for_checks__fail_telemetry_derived__clients_last_seen__v2",
+            external_dag_id="bqetl_daily_retention",
+            external_task_id="wait_for_checks__fail_telemetry_derived__clients_last_seen__v2",
+            execution_date="{{ (execution_date - macros.timedelta(days=-1, seconds=22800)).isoformat() }}",
+        )
+
+        ExternalTaskMarker(
             task_id="bqetl_gud__wait_for_checks__fail_telemetry_derived__clients_last_seen__v2",
             external_dag_id="bqetl_gud",
             external_task_id="wait_for_checks__fail_telemetry_derived__clients_last_seen__v2",
Only in /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/telemetry_derived: cohort_daily_statistics_v2
Only in /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/telemetry_derived: rolling_cohorts_v2
diff -bur --no-dereference --new-file /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/telemetry_derived/cohort_daily_statistics_v2/metadata.yaml /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/telemetry_derived/cohort_daily_statistics_v2/metadata.yaml
--- /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/telemetry_derived/cohort_daily_statistics_v2/metadata.yaml	1970-01-01 00:00:00.000000000 +0000
+++ /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/telemetry_derived/cohort_daily_statistics_v2/metadata.yaml	2025-01-31 22:05:41.000000000 +0000
@@ -0,0 +1,25 @@
+friendly_name: Cohort Daily Statistics
+description: |-
+  Please provide a description for the query
+owners:
+- example@mozilla.com
+labels:
+  incremental: true
+  owner1: example
+  dag: bqetl_default
+scheduling:
+  dag_name: bqetl_default
+bigquery:
+  time_partitioning:
+    type: day
+    field: ''
+    require_partition_filter: true
+    expiration_days: null
+  range_partitioning: null
+  clustering:
+    fields: []
+workgroup_access:
+- role: roles/bigquery.dataViewer
+  members:
+  - workgroup:mozilla-confidential
+references: {}
diff -bur --no-dereference --new-file /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/telemetry_derived/cohort_daily_statistics_v2/query.sql /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/telemetry_derived/cohort_daily_statistics_v2/query.sql
--- /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/telemetry_derived/cohort_daily_statistics_v2/query.sql	1970-01-01 00:00:00.000000000 +0000
+++ /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/telemetry_derived/cohort_daily_statistics_v2/query.sql	2025-01-31 22:02:28.000000000 +0000
@@ -0,0 +1 @@
+--TO DO
diff -bur --no-dereference --new-file /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/telemetry_derived/cohort_daily_statistics_v2/schema.yaml /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/telemetry_derived/cohort_daily_statistics_v2/schema.yaml
--- /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/telemetry_derived/cohort_daily_statistics_v2/schema.yaml	1970-01-01 00:00:00.000000000 +0000
+++ /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/telemetry_derived/cohort_daily_statistics_v2/schema.yaml	2025-01-31 22:02:28.000000000 +0000
@@ -0,0 +1,97 @@
+fields:
+- mode: NULLABLE
+  name: cohort_date
+  type: DATE
+  description: Cohort Date
+- mode: NULLABLE
+  name: activity_date
+  type: DATE
+  description: Activity Date
+- mode: NULLABLE
+  name: activity_segment
+  type: STRING
+  description: Activity Segment
+- mode: NULLABLE
+  name: app_version
+  type: STRING
+  description: App Version
+- mode: NULLABLE
+  name: attribution_campaign
+  type: STRING
+  description: Attribution Campaign
+- mode: NULLABLE
+  name: attribution_content
+  type: STRING
+  description: Attribution Content
+- mode: NULLABLE
+  name: attribution_experiment
+  type: STRING
+  description: Attribution Experiment
+- mode: NULLABLE
+  name: attribution_medium
+  type: STRING
+  description: Attribution Medium
+- mode: NULLABLE
+  name: attribution_source
+  type: STRING
+  description: Attribution Source
+- mode: NULLABLE
+  name: attribution_variation
+  type: STRING
+  description: Attribution Variation
+- mode: NULLABLE
+  name: city
+  type: STRING
+  description: City
+- mode: NULLABLE
+  name: country
+  type: STRING
+  description: Country
+- mode: NULLABLE
+  name: device_model
+  type: STRING
+  description: Device Model
+- mode: NULLABLE
+  name: distribution_id
+  type: STRING
+  description: Distribution ID
+- mode: NULLABLE
+  name: is_default_browser
+  type: BOOLEAN
+  description: Is Default Browser
+- mode: NULLABLE
+  name: locale
+  type: STRING
+  description: Locale
+- mode: NULLABLE
+  name: normalized_app_name
+  type: STRING
+  description: Normalized App Name
+- mode: NULLABLE
+  name: normalized_channel
+  type: STRING
+  description: Normalized Channel
+- mode: NULLABLE
+  name: normalized_os
+  type: STRING
+  description: Normalized Operating System
+- mode: NULLABLE
+  name: normalized_os_version
+  type: STRING
+  description: Normalized Operating System Version
+- mode: NULLABLE
+  name: os_version_major
+  type: INTEGER
+  description: Operating System Major Version
+- mode: NULLABLE
+  name: os_version_minor
+  type: INTEGER
+  description: Operating System Minor Version
+- mode: NULLABLE
+  name: num_clients_in_cohort
+  type: INTEGER
+  description: Number of Clients in Cohort
+- mode: NULLABLE
+  name: num_clients_active_on_day
+  type: INTEGER
+  description: Number of Clients Active on Day
diff -bur --no-dereference --new-file /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/telemetry_derived/rolling_cohorts_v2/metadata.yaml /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/telemetry_derived/rolling_cohorts_v2/metadata.yaml
--- /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/telemetry_derived/rolling_cohorts_v2/metadata.yaml	1970-01-01 00:00:00.000000000 +0000
+++ /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/telemetry_derived/rolling_cohorts_v2/metadata.yaml	2025-01-31 22:05:41.000000000 +0000
@@ -0,0 +1,35 @@
+friendly_name: Rolling Cohorts
+description: |-
+  Rolling Cohorts consists of one row per client per date (each date
+  representing a new cohort) i.e. all the clients that had their first
+  ping sent that day. This can be left joined with various activity
+  tables to calculate activity metrics for particular cohorts.
+
+  Note that client-attributes such as os versions, is default browser,
+  etc. are based on the values at the time the cohort is created.
+  This might not be the same as when activity happens. For example
+  if a client changes whether Firefox is the default browser.
+owners:
+- kwindau@mozilla.com
+labels:
+  incremental: true
+  owner1: kwindau
+  table_type: client_level
+  dag: bqetl_daily_retention
+scheduling:
+  dag_name: bqetl_daily_retention
+bigquery:
+  time_partitioning:
+    type: day
+    field: cohort_date
+    require_partition_filter: true
+    expiration_days: 775.0
+  range_partitioning: null
+  clustering:
+    fields:
+    - normalized_channel
+workgroup_access:
+- role: roles/bigquery.dataViewer
+  members:
+  - workgroup:mozilla-confidential
+references: {}
diff -bur --no-dereference --new-file /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/telemetry_derived/rolling_cohorts_v2/query.sql /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/telemetry_derived/rolling_cohorts_v2/query.sql
--- /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/telemetry_derived/rolling_cohorts_v2/query.sql	1970-01-01 00:00:00.000000000 +0000
+++ /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/telemetry_derived/rolling_cohorts_v2/query.sql	2025-01-31 22:02:28.000000000 +0000
@@ -0,0 +1,63 @@
+--Desktop
+SELECT
+  au.client_id,
+  au.first_seen_date AS cohort_date,
+  au.activity_segment,
+  au.app_version,
+  CAST(NULL AS string) AS attribution_campaign, --FIX
+  CAST(NULL AS string) AS attribution_content, --FIX
+  CAST(NULL AS string) AS attribution_experiment, --FIX
+  au.attribution_medium,
+  au.attribution_source,
+  CAST(NULL AS string) AS attribution_variation, --FIX
+  au.city,
+  au.country,
+  ccls.device AS device_model,
+  au.distribution_id,
+  au.is_default_browser,
+  au.locale,
+  au.app_name AS normalized_app_name,
+  au.normalized_channel,
+  au.os AS normalized_os, --old one had it as normalized_os, do I need to add a transform of some kind to normalize?
+  au.normalized_os_version,
+  CAST(NULL AS INTEGER) AS os_version_major, --FIX
+  CAST(NULL AS INTEGER) AS os_version_minor, --FIX
+FROM
+  `moz-fx-data-shared-prod.telemetry.desktop_active_users` au
+LEFT JOIN
+  `moz-fx-data-shared-prod.telemetry.core_clients_last_seen` ccls
+  ON au.client_id = ccls.client_id
+  AND au.submission_date = ccls.submission_date
+WHERE
+  au.first_seen_date = @submission_date
+  AND au.submission_date = @submission_date
+UNION ALL
+--Mobile
+SELECT
+  au.client_id,
+  au.first_seen_date AS cohort_date,
+  au.activity_segment,
+  CAST(NULL AS STRING) AS app_version, --FIX
+  CAST(NULL AS string) AS attribution_campaign, --FIX
+  CAST(NULL AS string) AS attribution_content, --FIX
+  CAST(NULL AS string) AS attribution_experiment, --FIX
+  CAST(NULL AS string) AS attribution_medium, --FIX
+  CAST(NULL AS string) AS attribution_source, --FIX
+  CAST(NULL AS string) AS attribution_variation, --FIX
+  au.city,
+  au.country,
+  au.device_model,
+  au.distribution_id,
+  CAST(NULL AS BOOLEAN) AS is_default_browser,
+  au.locale,
+  CAST(NULL AS STRING) AS normalized_app_name,--FIX
+  au.normalized_channel,
+  CAST(NULL AS STRING) AS normalized_os, --FIX
+  CAST(NULL AS STRING) AS normalized_os_version, --FIX
+  CAST(NULL AS INTEGER) AS os_version_major, --FIX
+  CAST(NULL AS INTEGER) AS os_version_minor, --FIX
+FROM
+  `moz-fx-data-shared-prod.telemetry.mobile_active_users` au
+WHERE
+  au.first_seen_date = @submission_date
+  AND au.submission_date = @submission_date
diff -bur --no-dereference --new-file /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/telemetry_derived/rolling_cohorts_v2/schema.yaml /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/telemetry_derived/rolling_cohorts_v2/schema.yaml
--- /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/telemetry_derived/rolling_cohorts_v2/schema.yaml	1970-01-01 00:00:00.000000000 +0000
+++ /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/telemetry_derived/rolling_cohorts_v2/schema.yaml	2025-01-31 22:02:28.000000000 +0000
@@ -0,0 +1,89 @@
+fields:
+- mode: NULLABLE
+  name: client_id
+  type: STRING
+  description: Client ID
+- mode: NULLABLE
+  name: cohort_date
+  type: DATE
+  description: Cohort Date - First Seen Date
+- mode: NULLABLE
+  name: activity_segment
+  type: STRING
+  description: Activity Segment
+- mode: NULLABLE
+  name: app_version
+  type: STRING
+  description: App Version
+- mode: NULLABLE
+  name: attribution_campaign
+  type: STRING
+  description: Attribution Campaign
+- mode: NULLABLE
+  name: attribution_content
+  type: STRING
+  description: Attribution Content
+- mode: NULLABLE
+  name: attribution_experiment
+  type: STRING
+  description: Attribution Experiment
+- mode: NULLABLE
+  name: attribution_medium
+  type: STRING
+  description: Attribution Medium
+- mode: NULLABLE
+  name: attribution_source
+  type: STRING
+  description: Attribution Source
+- mode: NULLABLE
+  name: attribution_variation
+  type: STRING
+  description: Attribution Variation
+- mode: NULLABLE
+  name: city
+  type: STRING
+  description: City
+- mode: NULLABLE
+  name: country
+  type: STRING
+  description: Country
+- mode: NULLABLE
+  name: device_model
+  type: STRING
+  description: Device Model
+- mode: NULLABLE
+  name: distribution_id
+  type: STRING
+  description: Distribution ID
+- mode: NULLABLE
+  name: is_default_browser
+  type: BOOLEAN
+  description: Is Default Browser
+- mode: NULLABLE
+  name: locale
+  type: STRING
+  description: Locale
+- mode: NULLABLE
+  name: normalized_app_name
+  type: STRING
+  description: Normalized app Name
+- mode: NULLABLE
+  name: normalized_channel
+  type: STRING
+  description: Normalized Channel
+- mode: NULLABLE
+  name: normalized_os
+  type: STRING
+  description: Normalized Operating System
+- mode: NULLABLE
+  name: normalized_os_version
+  type: STRING
+  description: Normalized Operating System Version
+- mode: NULLABLE
+  name: os_version_major
+  type: INTEGER
+  description: Operating System - Major Version
+- mode: NULLABLE
+  name: os_version_minor
+  type: INTEGER
+  description: Operating System - Minor Version

Link to full diff

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Labels
None yet
Projects
None yet
Development

Successfully merging this pull request may close these issues.

2 participants