diff --git a/models/base/schema.yml b/models/base/schema.yml index ba0d9108..9eb0afb3 100644 --- a/models/base/schema.yml +++ b/models/base/schema.yml @@ -18,7 +18,10 @@ models: description: "The xAPI object identifier" - name: course_id data_type: string - description: "The course identifier" + description: "The fully-qualified course identifier URL" + - name: course_key + data_type: String + description: "The course key for the course" - name: org data_type: string description: "The organization that the course belongs to" diff --git a/models/base/xapi_events_all_parsed.sql b/models/base/xapi_events_all_parsed.sql index 40ac8ea2..bbfea94e 100644 --- a/models/base/xapi_events_all_parsed.sql +++ b/models/base/xapi_events_all_parsed.sql @@ -13,7 +13,7 @@ select event_id as event_id, - JSON_VALUE(event::String, '$.verb.id') as verb_id, + toLowCardinality(JSON_VALUE(event::String, '$.verb.id')) as verb_id, COALESCE( NULLIF(JSON_VALUE(event::String, '$.actor.account.name'), ''), NULLIF(JSON_VALUE(event::String, '$.actor.mbox'), ''), @@ -23,23 +23,32 @@ select -- If the contextActivities parent is a course, use that. It can be a "course" -- type, or a "cmi.interaction" type for multiple question problem submissions. -- Otherwise use the object id for the course id. - multiIf( - -- If the contextActivities parent is a course, use that - JSON_VALUE( - event::String, '$.context.contextActivities.parent[0].definition.type' + toLowCardinality( + multiIf( + -- If the contextActivities parent is a course, use that + JSON_VALUE( + event::String, '$.context.contextActivities.parent[0].definition.type' + ) + = 'http://adlnet.gov/expapi/activities/course', + JSON_VALUE(event::String, '$.context.contextActivities.parent[0].id'), + -- Else if the contextActivities parent is a GroupActivity, it's a multi + -- question problem and we use the grouping id + JSON_VALUE( + event::String, '$.context.contextActivities.parent[0].objectType' + ) + in ('Activity', 'GroupActivity'), + JSON_VALUE(event::String, '$.context.contextActivities.grouping[0].id'), + -- Otherwise use the object id + JSON_VALUE(event::String, '$.object.id') ) - = 'http://adlnet.gov/expapi/activities/course', - JSON_VALUE(event::String, '$.context.contextActivities.parent[0].id'), - -- Else if the contextActivities parent is a GroupActivity, it's a multi - -- question problem and we use the grouping id - JSON_VALUE(event::String, '$.context.contextActivities.parent[0].objectType') - in ('Activity', 'GroupActivity'), - JSON_VALUE(event::String, '$.context.contextActivities.grouping[0].id'), - -- Otherwise use the object id - JSON_VALUE(event::String, '$.object.id') ) as course_id, - coalesce( - get_org_from_course_url(course_id), get_org_from_ccx_course_url(course_id), '' + toLowCardinality(splitByString('/', course_id)[-1]) as course_key, + toLowCardinality( + coalesce( + get_org_from_course_url(course_id), + get_org_from_ccx_course_url(course_id), + '' + ) ) as org, emission_time as emission_time, event::String as event diff --git a/models/completion/completion_events.sql b/models/completion/completion_events.sql index b8315667..a962af24 100644 --- a/models/completion/completion_events.sql +++ b/models/completion/completion_events.sql @@ -15,7 +15,7 @@ select CAST(emission_time, 'DateTime') as emission_time, actor_id, object_id, - splitByString('/', course_id)[-1] as course_key, + course_key, org, verb_id, JSON_VALUE( diff --git a/models/enrollment/enrollment_events.sql b/models/enrollment/enrollment_events.sql index 90ed12db..f0447df7 100644 --- a/models/enrollment/enrollment_events.sql +++ b/models/enrollment/enrollment_events.sql @@ -15,12 +15,14 @@ select cast(emission_time as DateTime) as emission_time, actor_id, object_id, - splitByString('/', course_id)[-1] as course_key, + course_key, org, verb_id, - JSON_VALUE( - event, - '$.object.definition.extensions."https://w3id.org/xapi/acrossx/extensions/type"' + toLowCardinality( + JSON_VALUE( + event, + '$.object.definition.extensions."https://w3id.org/xapi/acrossx/extensions/type"' + ) ) as enrollment_mode from {{ ref("xapi_events_all_parsed") }} where diff --git a/models/forum/forum_events.sql b/models/forum/forum_events.sql index c643dd93..32ee108f 100644 --- a/models/forum/forum_events.sql +++ b/models/forum/forum_events.sql @@ -14,7 +14,7 @@ select event_id, CAST(emission_time, 'DateTime') as emission_time, org, - splitByString('/', course_id)[-1] as course_key, + course_key, object_id, actor_id, verb_id diff --git a/models/grading/grading_events.sql b/models/grading/grading_events.sql index 89a36189..50dbfbc5 100644 --- a/models/grading/grading_events.sql +++ b/models/grading/grading_events.sql @@ -16,7 +16,7 @@ select CAST(emission_time, 'DateTime') as emission_time, actor_id, object_id, - splitByString('/', course_id)[-1] as course_key, + course_key, org, verb_id, JSONExtractFloat(event, 'result', 'score', 'scaled') as scaled_score diff --git a/models/instance/fact_instance_actors.sql b/models/instance/fact_instance_actors.sql index 252f8376..467093a0 100644 --- a/models/instance/fact_instance_actors.sql +++ b/models/instance/fact_instance_actors.sql @@ -3,13 +3,13 @@ materialized="materialized_view", schema=env_var("ASPECTS_XAPI_DATABASE", "xapi"), engine=get_engine("AggregatingMergeTree()"), - order_by="(emission_hour)", - partition_by="(toYYYYMM(emission_hour))", + order_by="(emission_day)", + partition_by="(toYYYYMM(emission_day))", ) }} select - date_trunc('hour', emission_time) as emission_hour, + date_trunc('day', emission_time) as emission_day, uniqCombinedState(actor_id) as actors_cnt from {{ ref("xapi_events_all_parsed") }} -group by emission_hour +group by emission_day diff --git a/models/instance/fact_instance_enrollments.sql b/models/instance/fact_instance_enrollments.sql index c0d182b9..49aa72ed 100644 --- a/models/instance/fact_instance_enrollments.sql +++ b/models/instance/fact_instance_enrollments.sql @@ -3,8 +3,8 @@ materialized="materialized_view", schema=env_var("ASPECTS_XAPI_DATABASE", "xapi"), engine=get_engine("SummingMergeTree()"), - order_by="(emission_hour)", - partition_by="(toYYYYMM(emission_hour))", + order_by="(emission_day, course_key, enrollment_mode, enrollment_status)", + partition_by="(toYYYYMM(emission_day))", ) }} @@ -19,11 +19,10 @@ with ) select - date_trunc('hour', emission_time) as emission_hour, - courses.course_name as course_name, + date_trunc('day', emission_time) as emission_day, + enrollments.course_key, enrollments.enrollment_mode as enrollment_mode, enrollments.enrollment_status as enrollment_status, count() as course_enrollment_mode_status_cnt from enrollments -join {{ ref("course_names") }} courses on enrollments.course_key = courses.course_key -group by emission_hour, course_name, enrollment_mode, enrollment_status +group by emission_day, course_key, enrollment_mode, enrollment_status diff --git a/models/instance/fact_instance_events.sql b/models/instance/fact_instance_events.sql index 901a6dfc..76953fb5 100644 --- a/models/instance/fact_instance_events.sql +++ b/models/instance/fact_instance_events.sql @@ -3,13 +3,13 @@ materialized="materialized_view", schema=env_var("ASPECTS_XAPI_DATABASE", "xapi"), engine=get_engine("AggregatingMergeTree()"), - order_by="(emission_hour)", - partition_by="(toYYYYMM(emission_hour))", + order_by="(emission_day)", + partition_by="(toYYYYMM(emission_day))", ) }} select - date_trunc('hour', emission_time) as emission_hour, + date_trunc('day', emission_time) as emission_day, uniqCombinedState(event_id) as events_cnt from {{ ref("xapi_events_all_parsed") }} -group by emission_hour +group by emission_day diff --git a/models/instance/schema.yml b/models/instance/schema.yml index 475f84c5..dd1298f7 100644 --- a/models/instance/schema.yml +++ b/models/instance/schema.yml @@ -14,9 +14,9 @@ models: - name: fact_instance_events description: "A materialized view summarizing site-wide xAPI event activity" columns: - - name: emission_hour - data_type: datetime(64) - description: "Time of summary, rounded to the nearest hour" + - name: emission_day + data_type: datetime + description: "Time of summary, truncated to the day" - name: events_cnt data_type: int description: "The number of xAPI events that occurred in the given hour" @@ -24,9 +24,9 @@ models: - name: fact_instance_actors description: "A materialized view summarizing site-wide user activity" columns: - - name: emission_hour - data_type: datetime(64) - description: "Time of summary, rounded to the nearest hour" + - name: emission_day + data_type: datetime + description: "Time of summary, truncated to the day" - name: actors_cnt data_type: int description: "The number of xAPI actors active in the given hour" @@ -34,12 +34,15 @@ models: - name: fact_instance_enrollments description: "A materialized view for summarizing site-wide enrollment activity" columns: - - name: emission_hour - data_type: datetime(64) - description: "Time of summary, rounded to the nearest hour" + - name: emission_day + data_type: datetime + description: "Time of summary, truncated to the day" - name: course_name data_type: String description: "The name of the course" + - name: course_key + data_type: String + description: "The course key for the course" - name: enrollment_mode data_type: string description: "The name of the enrollment mode (ex: audit, honor)" diff --git a/models/navigation/navigation_events.sql b/models/navigation/navigation_events.sql index 65fa5db3..d09a419d 100644 --- a/models/navigation/navigation_events.sql +++ b/models/navigation/navigation_events.sql @@ -15,7 +15,7 @@ select cast(emission_time as DateTime) as emission_time, actor_id, splitByString('/xblock/', object_id)[-1] as block_id, - splitByString('/', course_id)[-1] as course_key, + course_key, org, verb_id, JSONExtractString(event, 'object', 'definition', 'type') as object_type, diff --git a/models/problems/problem_events.sql b/models/problems/problem_events.sql index e75ccea2..11f29c13 100644 --- a/models/problems/problem_events.sql +++ b/models/problems/problem_events.sql @@ -15,7 +15,7 @@ select cast(emission_time as DateTime) as emission_time, actor_id, object_id, - splitByString('/', course_id)[-1] as course_key, + course_key, org, verb_id, JSON_VALUE(event, '$.result.response') as responses, @@ -25,7 +25,9 @@ select cast(JSON_VALUE(event, '$.result.success') as Bool), false ) as success, - JSON_VALUE(event, '$.object.definition.interactionType') as interaction_type, + toLowCardinality( + JSON_VALUE(event, '$.object.definition.interactionType') + ) as interaction_type, if( verb_id = 'https://w3id.org/xapi/acrossx/verbs/evaluated', cast( diff --git a/models/video/video_playback_events.sql b/models/video/video_playback_events.sql index a33aec47..d515146a 100644 --- a/models/video/video_playback_events.sql +++ b/models/video/video_playback_events.sql @@ -15,7 +15,7 @@ select CAST(emission_time, 'DateTime') as emission_time, actor_id, object_id, - splitByString('/', course_id)[-1] as course_key, + course_key, org, verb_id, ceil( diff --git a/models/video/video_transcript_events.sql b/models/video/video_transcript_events.sql index 23386c7a..bcf5df6f 100644 --- a/models/video/video_transcript_events.sql +++ b/models/video/video_transcript_events.sql @@ -14,7 +14,7 @@ select event_id, CAST(emission_time, 'DateTime') as emission_time, org, - splitByString('/', course_id)[-1] as course_key, + course_key, splitByString('/xblock/', object_id)[2] as video_id, actor_id, JSONExtractBool( @@ -25,7 +25,7 @@ select ) as cc_enabled from {{ ref("xapi_events_all_parsed") }} where - verb_id in ('http://adlnet.gov/expapi/verbs/interacted') + verb_id = 'http://adlnet.gov/expapi/verbs/interacted' and JSONHas( event, 'result',