Skip to content

Commit b431753

Browse files
authored
Merge pull request #1 from ndrewwm/pace-of-discovery
Streamline project DAG, add report tracking counts of "new" artists/tracks
2 parents 839be1c + a008874 commit b431753

18 files changed

+367
-90
lines changed

dbt_spotify/dbt_project.yml

Lines changed: 8 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
name: 'dbt_spotify'
2-
version: '1.0.0'
2+
version: '2.0.0'
33

44
profile: 'dbt_spotify'
55

@@ -18,7 +18,12 @@ models:
1818
dbt_spotify:
1919
staging:
2020
+materialized: view
21-
intermediate:
22-
+materialized: ephemeral
21+
+docs:
22+
node_color: 'silver'
2323
marts:
2424
+materialized: table
25+
+docs:
26+
node_color: salmon
27+
reports:
28+
+docs:
29+
node_color: skyblue

dbt_spotify/models/intermediate/int_dim_track.sql

Lines changed: 0 additions & 7 deletions
This file was deleted.

dbt_spotify/models/intermediate/int_fct_played_track.sql

Lines changed: 0 additions & 33 deletions
This file was deleted.
Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,15 @@
1+
{% docs dttm_first_played %}
2+
Date and time (America/Boise) when the track or artist was first recorded as being played.
3+
{% enddocs %}
4+
5+
{% docs yr_first_played %}
6+
Year when the track or artist was first recorded as being played.
7+
{% enddocs %}
8+
9+
{% docs wk_first_played %}
10+
Week (integer) when the track or artist was first recorded as being played.
11+
{% enddocs %}
12+
13+
{% docs total_plays %}
14+
Total number of times the track or artist has been recorded as being played.
15+
{% enddocs %}

dbt_spotify/models/marts/_spotify__models.yml

Lines changed: 37 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -11,22 +11,34 @@ models:
1111
- not_null
1212

1313
- name: album
14-
description: Album title.
14+
description: '{{ doc("track_album") }}'
1515

1616
- name: release_date
17-
description: Date when the album was released.
17+
description: '{{ doc("album_release_date") }}'
1818

1919
- name: dim_artist
20-
description: Directory of artists observed in the data. Each row is an artist or performing group.
20+
description: Directory of artists observed in the data. Each row is an artist or performing group (or set of artists).
2121
columns:
2222
- name: artist_id
2323
description: Unique artist ID.
2424
tests:
2525
- unique
2626
- not_null
2727

28-
- name: artist
29-
description: Artist/performer name.
28+
- name: artists
29+
description: '{{ doc("track_artists") }}'
30+
31+
- name: total_plays
32+
description: '{{ doc("total_plays") }}'
33+
34+
- name: dttm_first_played
35+
description: '{{ doc("dttm_first_played") }}'
36+
37+
- name: yr_first_played
38+
description: '{{ doc("yr_first_played") }}'
39+
40+
- name: wk_first_played
41+
description: '{{ doc("wk_first_played") }}'
3042

3143
- name: dim_track
3244
description: Directory of all tracks observed. Each row is a track.
@@ -43,11 +55,26 @@ models:
4355
- name: album_id
4456
description: Album ID corresponding to this track.
4557

58+
- name: album
59+
description: '{{ doc("track_album") }}'
60+
4661
- name: artists
4762
description: '{{ doc("track_artists") }}'
4863

4964
- name: duration_ms
50-
description: Track duration (milliseconds).
65+
description: '{{ doc("duration_ms") }}'
66+
67+
- name: total_plays
68+
description: '{{ doc("total_plays") }}'
69+
70+
- name: dttm_first_played
71+
description: '{{ doc("dttm_first_played") }}'
72+
73+
- name: yr_first_played
74+
description: '{{ doc("yr_first_played") }}'
75+
76+
- name: wk_first_played
77+
description: '{{ doc("wk_first_played") }}'
5178

5279
- name: fct_played_track
5380
description: Spotify listening history. Each row is an instance that a track was played.
@@ -67,8 +94,11 @@ models:
6794
- name: played_at
6895
description: '{{ doc("played_at") }}'
6996

97+
- name: played_at_mtn
98+
description: '{{ doc("played_at_mtn") }}'
99+
70100
- name: track_popularity
71101
description: '{{ doc("popularity") }}'
72102

73103
- name: context
74-
description: What context the track is being played in.
104+
description: '{{ doc("context") }}'
Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1 +1,10 @@
1-
select * from {{ ref("stg_dim_album") }}
1+
with tracks as (
2+
select * from {{ ref("stg_recent_tracks") }}
3+
)
4+
5+
select distinct
6+
album_id,
7+
track_album as album,
8+
album_release_date as release_date
9+
from tracks
10+
order by album_id
Lines changed: 24 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1 +1,24 @@
1-
select * from {{ ref("stg_dim_artist") }}
1+
with tracks as (
2+
select * from {{ ref("stg_recent_tracks") }}
3+
),
4+
5+
summary as (
6+
select
7+
artist_id,
8+
count(*) as total_plays,
9+
min(played_at_mtn) as dttm_first_played
10+
from tracks
11+
group by artist_id
12+
)
13+
14+
select distinct
15+
tr.artist_id,
16+
tr.track_artists as artists,
17+
su.total_plays,
18+
su.dttm_first_played,
19+
year(su.dttm_first_played::date) as yr_first_played,
20+
week(su.dttm_first_played::date) as wk_first_played
21+
from tracks tr
22+
left join summary su
23+
on tr.artist_id = su.artist_id
24+
order by tr.artist_id
Lines changed: 28 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1 +1,28 @@
1-
select * from {{ ref("int_dim_track") }}
1+
with tracks as (
2+
select * from {{ ref("stg_recent_tracks") }}
3+
),
4+
5+
summary as (
6+
select
7+
track_id,
8+
count(*) as total_plays,
9+
min(played_at_mtn) as dttm_first_played
10+
from tracks
11+
group by track_id
12+
)
13+
14+
select distinct
15+
tr.track_id,
16+
tr.album_id,
17+
tr.track_album as album,
18+
tr.track_name,
19+
tr.track_artists as artists,
20+
su.total_plays,
21+
su.dttm_first_played,
22+
year(su.dttm_first_played::date) as yr_first_played,
23+
week(su.dttm_first_played::date) as wk_first_played,
24+
tr.duration_ms
25+
from tracks tr
26+
left join summary su
27+
on tr.track_id = su.track_id
28+
order by tr.track_id
Lines changed: 18 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1 +1,18 @@
1-
select * from {{ ref('int_fct_played_track') }}
1+
with tracks_played as (
2+
select * from {{ ref("stg_recent_tracks") }}
3+
),
4+
5+
final as (
6+
select
7+
play_id,
8+
track_id,
9+
album_id,
10+
played_at,
11+
played_at_mtn,
12+
track_popularity,
13+
context
14+
from tracks_played
15+
order by played_at_mtn desc
16+
)
17+
18+
select * from final

dbt_spotify/models/marts/reports/_spotify_reports__models.yml

Lines changed: 59 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,65 @@ version: 2
33
models:
44
- name: rpt_artist_counts
55
description: A summary of the total number of plays and minutes listened over the prior 30 days, by artist.
6+
columns:
7+
- name: artists
8+
description: '{{ doc("track_artists") }}'
9+
10+
- name: plays
11+
description: Number of times a track from the artist(s) has been played.
12+
13+
- name: minutes_played
14+
description: Total number of minutes spent listening to the artist(s) over the past 30 days.
615

716
- name: rpt_track_counts
817
description: A summary of the total number of plays and minutes listened over the prior 30 days, by track.
18+
columns:
19+
- name: track_name
20+
description: The track's name.
21+
22+
- name: artists
23+
description: '{{ doc("track_artists") }}'
24+
25+
- name: album
26+
description: '{{ doc("track_album") }}'
27+
28+
- name: plays
29+
description: Number of times this track has been played in the past 30 days.
30+
31+
- name: minutes_played
32+
description: Total number of minutes spent listening to this track over the past 30 days.
33+
34+
- name: rpt_discovery_rate
35+
description: |
36+
A report, organized by week and year, tracking the total number of plays per week,
37+
how many *unique* artists/tracks were played, and how many *new* artists/tracks were played.
38+
columns:
39+
- name: year_played
40+
description: Year
41+
42+
- name: week_played
43+
description: Week (integer)
44+
45+
- name: week_dt
46+
description: Date corresponding to the week. Included for plotting convenience.
47+
48+
- name: plays
49+
description: Total number of plays recorded during this week.
50+
51+
- name: uniq_artists
52+
description: Count of unique artists observed.
53+
54+
- name: uniq_tracks
55+
description: Count of unique tracks observed.
56+
57+
- name: new_tracks
58+
description: Count of new tracks observed.
59+
60+
- name: new_artists
61+
description: Count of new artists observed
62+
63+
- name: track_discovery_rate
64+
description: "WIP. A proportion: new_tracks / uniq_tracks"
65+
66+
- name: artist_discovery_rate
67+
description: "WIP. A proportion: new_artists / uniq_artists"
Lines changed: 69 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,69 @@
1+
with tracks as (
2+
select * from {{ ref("fct_played_track") }}
3+
),
4+
5+
dim_track as (
6+
select * from {{ ref("dim_track") }}
7+
),
8+
9+
dim_artist as (
10+
select * from {{ ref("dim_artist") }}
11+
),
12+
13+
base as (
14+
select distinct
15+
year(played_at_mtn) as year_played,
16+
week(played_at_mtn) as week_played,
17+
max(played_at_mtn::date) as week_dt,
18+
count(*) as plays,
19+
count(distinct dt.artists) as uniq_artists,
20+
count(distinct dt.track_id) as uniq_tracks
21+
from tracks
22+
left join dim_track dt
23+
on tracks.track_id = dt.track_id
24+
group by year_played, week_played
25+
order by year_played, week_played
26+
),
27+
28+
new_tracks as (
29+
select
30+
yr_first_played as year_played,
31+
wk_first_played as week_played,
32+
count(*) as new_tracks
33+
from dim_track
34+
group by yr_first_played, wk_first_played
35+
),
36+
37+
new_artists as (
38+
select
39+
yr_first_played as year_played,
40+
wk_first_played as week_played,
41+
count(*) as new_artists
42+
from dim_artist
43+
group by yr_first_played, wk_first_played
44+
),
45+
46+
final as (
47+
select
48+
base.*,
49+
new_tracks.new_tracks,
50+
new_artists.new_artists,
51+
{{
52+
dbt_utils.safe_divide('new_tracks.new_tracks', 'base.uniq_tracks')
53+
}} as track_discovery_rate,
54+
{{
55+
dbt_utils.safe_divide('new_artists.new_artists', 'base.uniq_artists')
56+
}} as artist_discovery_rate
57+
from base
58+
left join new_tracks
59+
on
60+
base.year_played = new_tracks.year_played
61+
and base.week_played = new_tracks.week_played
62+
63+
left join new_artists
64+
on
65+
base.year_played = new_artists.year_played
66+
and base.week_played = new_artists.week_played
67+
)
68+
69+
select * from final

0 commit comments

Comments
 (0)