Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: Use LowCardinality for smaller columns #98

Merged
merged 5 commits into from
May 30, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 4 additions & 1 deletion models/base/schema.yml
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,10 @@ models:
description: "The xAPI object identifier"
- name: course_id
data_type: string
description: "The course identifier"
description: "The fully-qualified course identifier URL"
- name: course_key
data_type: String
description: "The course key for the course"
- name: org
data_type: string
description: "The organization that the course belongs to"
Expand Down
41 changes: 25 additions & 16 deletions models/base/xapi_events_all_parsed.sql
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@

select
event_id as event_id,
JSON_VALUE(event::String, '$.verb.id') as verb_id,
toLowCardinality(JSON_VALUE(event::String, '$.verb.id')) as verb_id,
COALESCE(
NULLIF(JSON_VALUE(event::String, '$.actor.account.name'), ''),
NULLIF(JSON_VALUE(event::String, '$.actor.mbox'), ''),
Expand All @@ -23,23 +23,32 @@ select
-- If the contextActivities parent is a course, use that. It can be a "course"
-- type, or a "cmi.interaction" type for multiple question problem submissions.
-- Otherwise use the object id for the course id.
multiIf(
-- If the contextActivities parent is a course, use that
JSON_VALUE(
event::String, '$.context.contextActivities.parent[0].definition.type'
toLowCardinality(
multiIf(
-- If the contextActivities parent is a course, use that
JSON_VALUE(
event::String, '$.context.contextActivities.parent[0].definition.type'
)
= 'http://adlnet.gov/expapi/activities/course',
JSON_VALUE(event::String, '$.context.contextActivities.parent[0].id'),
-- Else if the contextActivities parent is a GroupActivity, it's a multi
-- question problem and we use the grouping id
JSON_VALUE(
event::String, '$.context.contextActivities.parent[0].objectType'
)
in ('Activity', 'GroupActivity'),
JSON_VALUE(event::String, '$.context.contextActivities.grouping[0].id'),
-- Otherwise use the object id
JSON_VALUE(event::String, '$.object.id')
)
= 'http://adlnet.gov/expapi/activities/course',
JSON_VALUE(event::String, '$.context.contextActivities.parent[0].id'),
-- Else if the contextActivities parent is a GroupActivity, it's a multi
-- question problem and we use the grouping id
JSON_VALUE(event::String, '$.context.contextActivities.parent[0].objectType')
in ('Activity', 'GroupActivity'),
JSON_VALUE(event::String, '$.context.contextActivities.grouping[0].id'),
-- Otherwise use the object id
JSON_VALUE(event::String, '$.object.id')
) as course_id,
coalesce(
get_org_from_course_url(course_id), get_org_from_ccx_course_url(course_id), ''
toLowCardinality(splitByString('/', course_id)[-1]) as course_key,
toLowCardinality(
coalesce(
get_org_from_course_url(course_id),
get_org_from_ccx_course_url(course_id),
''
)
) as org,
emission_time as emission_time,
event::String as event
Expand Down
2 changes: 1 addition & 1 deletion models/completion/completion_events.sql
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ select
CAST(emission_time, 'DateTime') as emission_time,
actor_id,
object_id,
splitByString('/', course_id)[-1] as course_key,
course_key,
org,
verb_id,
JSON_VALUE(
Expand Down
10 changes: 6 additions & 4 deletions models/enrollment/enrollment_events.sql
Original file line number Diff line number Diff line change
Expand Up @@ -15,12 +15,14 @@ select
cast(emission_time as DateTime) as emission_time,
actor_id,
object_id,
splitByString('/', course_id)[-1] as course_key,
course_key,
org,
verb_id,
JSON_VALUE(
event,
'$.object.definition.extensions."https://w3id.org/xapi/acrossx/extensions/type"'
toLowCardinality(
JSON_VALUE(
event,
'$.object.definition.extensions."https://w3id.org/xapi/acrossx/extensions/type"'
)
) as enrollment_mode
from {{ ref("xapi_events_all_parsed") }}
where
Expand Down
2 changes: 1 addition & 1 deletion models/forum/forum_events.sql
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ select
event_id,
CAST(emission_time, 'DateTime') as emission_time,
org,
splitByString('/', course_id)[-1] as course_key,
course_key,
object_id,
actor_id,
verb_id
Expand Down
2 changes: 1 addition & 1 deletion models/grading/grading_events.sql
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ select
CAST(emission_time, 'DateTime') as emission_time,
actor_id,
object_id,
splitByString('/', course_id)[-1] as course_key,
course_key,
org,
verb_id,
JSONExtractFloat(event, 'result', 'score', 'scaled') as scaled_score
Expand Down
8 changes: 4 additions & 4 deletions models/instance/fact_instance_actors.sql
Original file line number Diff line number Diff line change
Expand Up @@ -3,13 +3,13 @@
materialized="materialized_view",
schema=env_var("ASPECTS_XAPI_DATABASE", "xapi"),
engine=get_engine("AggregatingMergeTree()"),
order_by="(emission_hour)",
partition_by="(toYYYYMM(emission_hour))",
order_by="(emission_day)",
partition_by="(toYYYYMM(emission_day))",
)
}}

select
date_trunc('hour', emission_time) as emission_hour,
date_trunc('day', emission_time) as emission_day,
uniqCombinedState(actor_id) as actors_cnt
from {{ ref("xapi_events_all_parsed") }}
group by emission_hour
group by emission_day
11 changes: 5 additions & 6 deletions models/instance/fact_instance_enrollments.sql
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,8 @@
materialized="materialized_view",
schema=env_var("ASPECTS_XAPI_DATABASE", "xapi"),
engine=get_engine("SummingMergeTree()"),
order_by="(emission_hour)",
partition_by="(toYYYYMM(emission_hour))",
order_by="(emission_day, course_key, enrollment_mode, enrollment_status)",
partition_by="(toYYYYMM(emission_day))",
)
}}

Expand All @@ -19,11 +19,10 @@ with
)

select
date_trunc('hour', emission_time) as emission_hour,
courses.course_name as course_name,
date_trunc('day', emission_time) as emission_day,
enrollments.course_key,
enrollments.enrollment_mode as enrollment_mode,
enrollments.enrollment_status as enrollment_status,
count() as course_enrollment_mode_status_cnt
from enrollments
join {{ ref("course_names") }} courses on enrollments.course_key = courses.course_key
group by emission_hour, course_name, enrollment_mode, enrollment_status
group by emission_day, course_key, enrollment_mode, enrollment_status
8 changes: 4 additions & 4 deletions models/instance/fact_instance_events.sql
Original file line number Diff line number Diff line change
Expand Up @@ -3,13 +3,13 @@
materialized="materialized_view",
schema=env_var("ASPECTS_XAPI_DATABASE", "xapi"),
engine=get_engine("AggregatingMergeTree()"),
order_by="(emission_hour)",
partition_by="(toYYYYMM(emission_hour))",
order_by="(emission_day)",
partition_by="(toYYYYMM(emission_day))",
)
}}

select
date_trunc('hour', emission_time) as emission_hour,
date_trunc('day', emission_time) as emission_day,
uniqCombinedState(event_id) as events_cnt
from {{ ref("xapi_events_all_parsed") }}
group by emission_hour
group by emission_day
21 changes: 12 additions & 9 deletions models/instance/schema.yml
Original file line number Diff line number Diff line change
Expand Up @@ -14,32 +14,35 @@ models:
- name: fact_instance_events
description: "A materialized view summarizing site-wide xAPI event activity"
columns:
- name: emission_hour
data_type: datetime(64)
description: "Time of summary, rounded to the nearest hour"
- name: emission_day
data_type: datetime
description: "Time of summary, truncated to the day"
- name: events_cnt
data_type: int
description: "The number of xAPI events that occurred in the given hour"

- name: fact_instance_actors
description: "A materialized view summarizing site-wide user activity"
columns:
- name: emission_hour
data_type: datetime(64)
description: "Time of summary, rounded to the nearest hour"
- name: emission_day
data_type: datetime
description: "Time of summary, truncated to the day"
- name: actors_cnt
data_type: int
description: "The number of xAPI actors active in the given hour"

- name: fact_instance_enrollments
description: "A materialized view for summarizing site-wide enrollment activity"
columns:
- name: emission_hour
data_type: datetime(64)
description: "Time of summary, rounded to the nearest hour"
- name: emission_day
data_type: datetime
description: "Time of summary, truncated to the day"
- name: course_name
data_type: String
description: "The name of the course"
- name: course_key
data_type: String
description: "The course key for the course"
- name: enrollment_mode
data_type: string
description: "The name of the enrollment mode (ex: audit, honor)"
Expand Down
2 changes: 1 addition & 1 deletion models/navigation/navigation_events.sql
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ select
cast(emission_time as DateTime) as emission_time,
actor_id,
splitByString('/xblock/', object_id)[-1] as block_id,
splitByString('/', course_id)[-1] as course_key,
course_key,
org,
verb_id,
JSONExtractString(event, 'object', 'definition', 'type') as object_type,
Expand Down
6 changes: 4 additions & 2 deletions models/problems/problem_events.sql
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ select
cast(emission_time as DateTime) as emission_time,
actor_id,
object_id,
splitByString('/', course_id)[-1] as course_key,
course_key,
org,
verb_id,
JSON_VALUE(event, '$.result.response') as responses,
Expand All @@ -25,7 +25,9 @@ select
cast(JSON_VALUE(event, '$.result.success') as Bool),
false
) as success,
JSON_VALUE(event, '$.object.definition.interactionType') as interaction_type,
toLowCardinality(
JSON_VALUE(event, '$.object.definition.interactionType')
) as interaction_type,
if(
verb_id = 'https://w3id.org/xapi/acrossx/verbs/evaluated',
cast(
Expand Down
2 changes: 1 addition & 1 deletion models/video/video_playback_events.sql
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ select
CAST(emission_time, 'DateTime') as emission_time,
actor_id,
object_id,
splitByString('/', course_id)[-1] as course_key,
course_key,
org,
verb_id,
ceil(
Expand Down
4 changes: 2 additions & 2 deletions models/video/video_transcript_events.sql
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ select
event_id,
CAST(emission_time, 'DateTime') as emission_time,
org,
splitByString('/', course_id)[-1] as course_key,
course_key,
splitByString('/xblock/', object_id)[2] as video_id,
actor_id,
JSONExtractBool(
Expand All @@ -25,7 +25,7 @@ select
) as cc_enabled
from {{ ref("xapi_events_all_parsed") }}
where
verb_id in ('http://adlnet.gov/expapi/verbs/interacted')
verb_id = 'http://adlnet.gov/expapi/verbs/interacted'
and JSONHas(
event,
'result',
Expand Down
Loading