From b81a7a09bb8a9c1c41f3961640622190c7041da1 Mon Sep 17 00:00:00 2001 From: kik-kik <42538694+kik-kik@users.noreply.github.com> Date: Wed, 15 Jan 2025 11:37:26 +0100 Subject: [PATCH] feat(DENG-6890): Update mobile kpi support metric aggregates to support dma analysis (#6783) * feat: add device_type and device_manufacturer to KPI support metrics datasets * feat: ensure all mobile kpi support metric aggregated dataset have device_type and device_manufacturer dimensions * feat: add device_type and device_manfacturer to the mobile kpi support metric aggregate queries * feat: tweak active_users view device_type value setting logic * feat: lower case device_manufacturer for some cases where the same manufacturer value has different casing * feat: bucket device_manufacturers into "other" bucket in case when they have a lower occurance to prevent from exploding the aggregate tables * fix: retention_clients some selection fields being ambiguous * feat: Use dense rank instead of count for bucketing all device_manufacturers into other bucket if they are rank 151 or more * fix: using incorrect source table for ranking device_manufacturers * feat: make sure that device_manufacturer is lowered prior to joining --- .../templates/active_users.view.sql | 15 +++++++++- .../templates/engagement.query.sql | 28 +++++++++++++++++ .../templates/engagement.schema.yaml | 12 ++++++++ .../templates/engagement_clients.view.sql | 4 +++ .../templates/new_profile_clients.view.sql | 1 + .../templates/new_profiles.query.sql | 28 ++++++++++++++++- .../templates/new_profiles.schema.yaml | 6 ++++ .../templates/retention.query.sql | 30 +++++++++++++++++++ .../templates/retention.schema.yaml | 12 ++++++++ .../templates/retention_clients.view.sql | 4 +++ 10 files changed, 138 insertions(+), 2 deletions(-) diff --git a/sql_generators/mobile_kpi_support_metrics/templates/active_users.view.sql b/sql_generators/mobile_kpi_support_metrics/templates/active_users.view.sql index 30e8698e199..fb491b08cbc 100644 --- a/sql_generators/mobile_kpi_support_metrics/templates/active_users.view.sql +++ b/sql_generators/mobile_kpi_support_metrics/templates/active_users.view.sql @@ -3,7 +3,10 @@ CREATE OR REPLACE VIEW `{{ project_id }}.{{ dataset }}.{{ name }}` AS SELECT - * EXCEPT (isp), + * EXCEPT (isp) REPLACE( + -- Lower device_manufacturer as in some cases the same manufacturer value has different casing. + LOWER(device_manufacturer) AS device_manufacturer + ), CASE WHEN LOWER(isp) = "browserstack" THEN CONCAT("{{ friendly_name }}", " ", isp) @@ -47,5 +50,15 @@ SELECT -- Adding isp at the end because it's in different column index in baseline table for some products. -- This is to make sure downstream union works as intended. isp, + CASE + WHEN normalized_os = "iOS" AND STARTS_WITH(device_model, "iPad") + THEN "iPad" + WHEN normalized_os = "iOS" AND STARTS_WITH(device_model, "iPhone") + THEN "iPhone" + WHEN normalized_os = "Android" + THEN "Android" + ELSE + CAST(NULL AS STRING) + END AS device_type, FROM `{{ project_id }}.{{ dataset }}.baseline_clients_last_seen` diff --git a/sql_generators/mobile_kpi_support_metrics/templates/engagement.query.sql b/sql_generators/mobile_kpi_support_metrics/templates/engagement.query.sql index 316c00dd2cb..18fd79d8424 100644 --- a/sql_generators/mobile_kpi_support_metrics/templates/engagement.query.sql +++ b/sql_generators/mobile_kpi_support_metrics/templates/engagement.query.sql @@ -1,4 +1,24 @@ {{ header }} +WITH device_manufacturer_counts AS ( + SELECT + submission_date, + device_manufacturer, + RANK() OVER(PARTITION BY submission_date ORDER BY COUNT(*) DESC) AS manufacturer_rank, + FROM + `{{ project_id }}.{{ dataset }}.engagement_clients` + WHERE + {% raw %} + {% if is_init() %} + submission_date < CURRENT_DATE + {% else %} + submission_date = @submission_date + {% endif %} + {% endraw %} + GROUP BY + submission_date, + device_manufacturer +) + SELECT submission_date, first_seen_date, @@ -14,8 +34,14 @@ SELECT COUNTIF(is_dau) AS dau, COUNTIF(is_wau) AS wau, COUNTIF(is_mau) AS mau, + device_type, + -- Bucket device manufacturers with low count prior to aggregation + IF(manufacturer_rank <= 150, device_manufacturer, "other") AS device_manufacturer, FROM `{{ project_id }}.{{ dataset }}.engagement_clients` +LEFT JOIN + device_manufacturer_counts + USING(submission_date, device_manufacturer) WHERE {% raw %} {% if is_init() %} @@ -32,6 +58,8 @@ GROUP BY app_version, country, locale, + device_type, + device_manufacturer, is_mobile {% for field in product_attribution_fields.values() if not field.client_only %} {% if loop.first %},{% endif %} diff --git a/sql_generators/mobile_kpi_support_metrics/templates/engagement.schema.yaml b/sql_generators/mobile_kpi_support_metrics/templates/engagement.schema.yaml index c584a15d0c4..89606ebc252 100644 --- a/sql_generators/mobile_kpi_support_metrics/templates/engagement.schema.yaml +++ b/sql_generators/mobile_kpi_support_metrics/templates/engagement.schema.yaml @@ -58,3 +58,15 @@ fields: type: INTEGER mode: NULLABLE description: MAU - Monthly Active Users + +- name: device_type + type: STRING + mode: NULLABLE + description: | + On Apple devices allows us to differentiate between iPhone and iPad. On Android devices the value is always "Android". + +- name: device_manufacturer + type: STRING + mode: NULLABLE + description: | + Manufacturer of the device where the client is installed. diff --git a/sql_generators/mobile_kpi_support_metrics/templates/engagement_clients.view.sql b/sql_generators/mobile_kpi_support_metrics/templates/engagement_clients.view.sql index f1910a7087b..5dd40e90a3a 100644 --- a/sql_generators/mobile_kpi_support_metrics/templates/engagement_clients.view.sql +++ b/sql_generators/mobile_kpi_support_metrics/templates/engagement_clients.view.sql @@ -18,6 +18,8 @@ WITH active_users AS ( is_wau, is_mau, is_mobile, + device_type, + device_manufacturer, FROM `{{ project_id }}.{{ dataset }}.active_users` ), @@ -62,6 +64,8 @@ SELECT THEN 'existing_user' ELSE 'Unknown' END AS lifecycle_stage, + device_type, + device_manufacturer, FROM active_users LEFT JOIN diff --git a/sql_generators/mobile_kpi_support_metrics/templates/new_profile_clients.view.sql b/sql_generators/mobile_kpi_support_metrics/templates/new_profile_clients.view.sql index d8c1d114a73..9d49e965164 100644 --- a/sql_generators/mobile_kpi_support_metrics/templates/new_profile_clients.view.sql +++ b/sql_generators/mobile_kpi_support_metrics/templates/new_profile_clients.view.sql @@ -20,6 +20,7 @@ SELECT attribution.{{ attribution_field }}, {% endfor %} attribution.paid_vs_organic, + device_type, FROM `{{ project_id }}.{{ dataset }}.active_users` AS active_users LEFT JOIN diff --git a/sql_generators/mobile_kpi_support_metrics/templates/new_profiles.query.sql b/sql_generators/mobile_kpi_support_metrics/templates/new_profiles.query.sql index eba947447da..6f6c6382af0 100644 --- a/sql_generators/mobile_kpi_support_metrics/templates/new_profiles.query.sql +++ b/sql_generators/mobile_kpi_support_metrics/templates/new_profiles.query.sql @@ -1,4 +1,24 @@ {{ header }} +WITH device_manufacturer_counts AS ( + SELECT + first_seen_date, + device_manufacturer, + RANK() OVER(PARTITION BY first_seen_date ORDER BY COUNT(*) DESC) AS manufacturer_rank, + FROM + `{{ project_id }}.{{ dataset }}.new_profile_clients` + WHERE + {% raw %} + {% if is_init() %} + first_seen_date < CURRENT_DATE + {% else %} + first_seen_date = @submission_date + {% endif %} + {% endraw %} + GROUP BY + first_seen_date, + device_manufacturer +) + SELECT first_seen_date, normalized_channel, @@ -8,14 +28,19 @@ SELECT locale, os, os_version, - device_manufacturer, + -- Bucket device manufacturers with low count prior to aggregation + IF(manufacturer_rank <= 150, device_manufacturer, "other") AS device_manufacturer, is_mobile, {% for field in product_attribution_fields.values() if not field.client_only %} {{ field.name }}, {% endfor %} COUNT(*) AS new_profiles, + device_type, FROM `{{ project_id }}.{{ dataset }}.new_profile_clients` +LEFT JOIN + device_manufacturer_counts + USING(first_seen_date, device_manufacturer) WHERE {% raw %} {% if is_init() %} @@ -33,6 +58,7 @@ GROUP BY locale, os, os_version, + device_type, device_manufacturer, is_mobile {% for field in product_attribution_fields.values() if not field.client_only %} diff --git a/sql_generators/mobile_kpi_support_metrics/templates/new_profiles.schema.yaml b/sql_generators/mobile_kpi_support_metrics/templates/new_profiles.schema.yaml index baa8602e3a1..a1410b4bcf4 100644 --- a/sql_generators/mobile_kpi_support_metrics/templates/new_profiles.schema.yaml +++ b/sql_generators/mobile_kpi_support_metrics/templates/new_profiles.schema.yaml @@ -58,3 +58,9 @@ fields: type: INTEGER mode: NULLABLE description: Number of new profiles recorded for the first seen date. + +- name: device_type + type: STRING + mode: NULLABLE + description: | + On Apple devices allows us to differentiate between iPhone and iPad. On Android devices the value is always "Android". diff --git a/sql_generators/mobile_kpi_support_metrics/templates/retention.query.sql b/sql_generators/mobile_kpi_support_metrics/templates/retention.query.sql index 48a8cbeae35..56d8e14a68e 100644 --- a/sql_generators/mobile_kpi_support_metrics/templates/retention.query.sql +++ b/sql_generators/mobile_kpi_support_metrics/templates/retention.query.sql @@ -1,4 +1,26 @@ {{ header }} +WITH device_manufacturer_counts AS ( + SELECT + submission_date, + device_manufacturer, + RANK() OVER(PARTITION BY submission_date ORDER BY COUNT(*) DESC) AS manufacturer_rank, + FROM + `{{ project_id }}.{{ dataset }}.retention_clients` + WHERE + {% raw %} + {% if is_init() %} + metric_date < DATE_SUB(CURRENT_DATE, INTERVAL 27 DAY) + AND submission_date < CURRENT_DATE + {% else %} + metric_date = DATE_SUB(@submission_date, INTERVAL 27 DAY) + AND submission_date = @submission_date + {% endif %} + {% endraw %} + GROUP BY + submission_date, + device_manufacturer +) + SELECT metric_date, first_seen_date, @@ -18,8 +40,14 @@ SELECT COUNTIF(retained_week_4_new_profile) AS retained_week_4_new_profiles, COUNTIF(new_profile_metric_date) AS new_profiles_metric_date, COUNTIF(repeat_profile) AS repeat_profiles, + device_type, + -- Bucket device manufacturers with low count prior to aggregation + IF(manufacturer_rank <= 150, device_manufacturer, "other") AS device_manufacturer, FROM `{{ project_id }}.{{ dataset }}.retention_clients` +LEFT JOIN + device_manufacturer_counts + USING(submission_date, device_manufacturer) WHERE {% raw %} {% if is_init() %} @@ -38,6 +66,8 @@ GROUP BY country, app_version, locale, + device_type, + device_manufacturer, is_mobile {% for field in product_attribution_fields.values() if not field.client_only %} {% if loop.first %},{% endif %} diff --git a/sql_generators/mobile_kpi_support_metrics/templates/retention.schema.yaml b/sql_generators/mobile_kpi_support_metrics/templates/retention.schema.yaml index 8da7a8d750d..48aea5f8f38 100644 --- a/sql_generators/mobile_kpi_support_metrics/templates/retention.schema.yaml +++ b/sql_generators/mobile_kpi_support_metrics/templates/retention.schema.yaml @@ -78,3 +78,15 @@ fields: type: INTEGER mode: NULLABLE description: Number of new profiles on the metric date that were DAU at least twice in the next 28 days. + +- name: device_type + type: STRING + mode: NULLABLE + description: | + On Apple devices allows us to differentiate between iPhone and iPad. On Android devices the value is always "Android". + +- name: device_manufacturer + type: STRING + mode: NULLABLE + description: | + Manufacturer of the device where the client is installed. diff --git a/sql_generators/mobile_kpi_support_metrics/templates/retention_clients.view.sql b/sql_generators/mobile_kpi_support_metrics/templates/retention_clients.view.sql index 894da2da481..221c3fcf0c3 100644 --- a/sql_generators/mobile_kpi_support_metrics/templates/retention_clients.view.sql +++ b/sql_generators/mobile_kpi_support_metrics/templates/retention_clients.view.sql @@ -14,6 +14,8 @@ WITH active_users AS ( days_seen_bits, days_active_bits, is_mobile, + device_type, + device_manufacturer, FROM `{{ project_id }}.{{ dataset }}.active_users` ), @@ -81,6 +83,8 @@ SELECT THEN 'existing_user' ELSE 'Unknown' END AS lifecycle_stage, + active_users.device_type, + active_users.device_manufacturer, FROM `{{ project_id }}.{{ dataset }}.baseline_clients_daily` AS clients_daily INNER JOIN