From 33530af898d78748e2bbc915a19278a62ff4a4d1 Mon Sep 17 00:00:00 2001 From: Brian Mesick Date: Wed, 29 May 2024 14:01:22 -0400 Subject: [PATCH] fix: Make instance aggregations daily instead of hourly These currently get bogged down on large datasets, when querying across all time. This is an attempt to speed things up. --- models/instance/fact_instance_actors.sql | 8 ++++---- models/instance/fact_instance_enrollments.sql | 11 +++++------ models/instance/fact_instance_events.sql | 8 ++++---- 3 files changed, 13 insertions(+), 14 deletions(-) diff --git a/models/instance/fact_instance_actors.sql b/models/instance/fact_instance_actors.sql index 252f8376..467093a0 100644 --- a/models/instance/fact_instance_actors.sql +++ b/models/instance/fact_instance_actors.sql @@ -3,13 +3,13 @@ materialized="materialized_view", schema=env_var("ASPECTS_XAPI_DATABASE", "xapi"), engine=get_engine("AggregatingMergeTree()"), - order_by="(emission_hour)", - partition_by="(toYYYYMM(emission_hour))", + order_by="(emission_day)", + partition_by="(toYYYYMM(emission_day))", ) }} select - date_trunc('hour', emission_time) as emission_hour, + date_trunc('day', emission_time) as emission_day, uniqCombinedState(actor_id) as actors_cnt from {{ ref("xapi_events_all_parsed") }} -group by emission_hour +group by emission_day diff --git a/models/instance/fact_instance_enrollments.sql b/models/instance/fact_instance_enrollments.sql index c0d182b9..1a68e71b 100644 --- a/models/instance/fact_instance_enrollments.sql +++ b/models/instance/fact_instance_enrollments.sql @@ -3,8 +3,8 @@ materialized="materialized_view", schema=env_var("ASPECTS_XAPI_DATABASE", "xapi"), engine=get_engine("SummingMergeTree()"), - order_by="(emission_hour)", - partition_by="(toYYYYMM(emission_hour))", + order_by="(emission_day, course_key, enrollment_mode, enrollments_status)", + partition_by="(toYYYYMM(emission_day))", ) }} @@ -19,11 +19,10 @@ with ) select - date_trunc('hour', emission_time) as emission_hour, - courses.course_name as course_name, + date_trunc('day', emission_time) as emission_day, + enrollments.course_key, enrollments.enrollment_mode as enrollment_mode, enrollments.enrollment_status as enrollment_status, count() as course_enrollment_mode_status_cnt from enrollments -join {{ ref("course_names") }} courses on enrollments.course_key = courses.course_key -group by emission_hour, course_name, enrollment_mode, enrollment_status +group by emission_day, course_key, enrollment_mode, enrollment_status diff --git a/models/instance/fact_instance_events.sql b/models/instance/fact_instance_events.sql index 901a6dfc..76953fb5 100644 --- a/models/instance/fact_instance_events.sql +++ b/models/instance/fact_instance_events.sql @@ -3,13 +3,13 @@ materialized="materialized_view", schema=env_var("ASPECTS_XAPI_DATABASE", "xapi"), engine=get_engine("AggregatingMergeTree()"), - order_by="(emission_hour)", - partition_by="(toYYYYMM(emission_hour))", + order_by="(emission_day)", + partition_by="(toYYYYMM(emission_day))", ) }} select - date_trunc('hour', emission_time) as emission_hour, + date_trunc('day', emission_time) as emission_day, uniqCombinedState(event_id) as events_cnt from {{ ref("xapi_events_all_parsed") }} -group by emission_hour +group by emission_day