-
Notifications
You must be signed in to change notification settings - Fork 4
/
22_affinity.view.lkml
197 lines (168 loc) · 5.29 KB
/
22_affinity.view.lkml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
view: affinity {
derived_table: {
persist_for: "24 hours" ## Best practice would be to use `datagroup_trigger: ecommerce_etl` but we don't here for snowflake costs
sql: SELECT
product_a_id
, product_b_id
, joint_user_freq
, joint_order_freq
, top1.prod_freq AS product_a_freq
, top2.prod_freq AS product_b_freq
FROM
(
SELECT
up1.prod_id AS product_a_id
, up2.prod_id AS product_b_id
, COUNT(*) AS joint_user_freq
FROM ${user_order_product.SQL_TABLE_NAME} AS up1
LEFT JOIN ${user_order_product.SQL_TABLE_NAME} AS up2
ON up1.user_id = up2.user_id
AND up1.prod_id <> up2.prod_id
GROUP BY product_a_id, product_b_id
) AS juf
LEFT JOIN
(
SELECT
op1.prod_id AS oproduct_a_id
, op2.prod_id AS oproduct_b_id
, COUNT(*) AS joint_order_freq
FROM ${user_order_product.SQL_TABLE_NAME} op1
LEFT JOIN ${user_order_product.SQL_TABLE_NAME} op2
ON op1.order_id = op2.order_id
AND op1.prod_id <> op2.prod_id
GROUP BY oproduct_a_id, oproduct_b_id
) AS jof
ON jof.oproduct_a_id = juf.product_a_id
AND jof.oproduct_b_id = juf.product_b_id
LEFT JOIN ${total_order_product.SQL_TABLE_NAME} top1
ON top1.prod_id = juf.product_a_id
LEFT JOIN ${total_order_product.SQL_TABLE_NAME} top2
ON top2.prod_id = juf.product_b_id
;;
}
measure: count {
type: count
drill_fields: [detail*]
}
dimension: product_a_id {
sql: ${TABLE}.product_a_id ;;
}
dimension: product_b_id {
sql: ${TABLE}.product_b_id ;;
}
dimension: joint_user_freq {
description: "The number of users who have purchased both product a and product b"
type: number
sql: ${TABLE}.joint_user_freq ;;
}
dimension: joint_order_freq {
description: "The number of orders that include both product a and product b"
type: number
sql: ${TABLE}.joint_order_freq ;;
}
dimension: product_a_freq {
description: "The total number of times product a has been purchased"
type: number
sql: ${TABLE}.product_a_freq ;;
}
dimension: product_b_freq {
description: "The total number of times product b has been purchased"
type: number
sql: ${TABLE}.product_b_freq ;;
}
dimension: user_affinity {
hidden: yes
type: number
sql: 1.0*${joint_user_freq}/NULLIF((${product_a_freq}+${product_b_freq})-(${joint_user_freq}),0) ;;
value_format_name: percent_2
}
dimension: order_affinity {
hidden: yes
type: number
sql: 1.0*${joint_order_freq}/NULLIF((${product_a_freq}+${product_b_freq})-(${joint_order_freq}),0) ;;
value_format_name: percent_2
}
measure: avg_user_affinity {
label: "Affinity Score (by User History)"
description: "Percentage of users that bought both products weighted by how many times each product sold individually"
type: average
sql: 100.0 * ${user_affinity} ;;
value_format_name: decimal_2
}
measure: avg_order_affinity {
label: "Affinity Score (by Order Basket)"
description: "Percentage of orders that contained both products weighted by how many times each product sold individually"
type: average
sql: 100.0 * ${order_affinity} ;;
value_format_name: decimal_2
}
measure: combined_affinity {
type: number
sql: ${avg_user_affinity} + ${avg_order_affinity} ;;
}
set: detail {
fields: [product_a_id,product_b_id,user_affinity,order_affinity]
}
}
#############################################
#Table that aggregates the products purchased by user and order id
view: user_order_product {
derived_table: {
persist_for: "24 hours" ## Best practice would be to use `datagroup_trigger: ecommerce_etl` but we don't here for snowflake costs
sql: SELECT
oi.user_id AS user_id
, p.id AS prod_id
, oi.order_id AS order_id
FROM order_items oi
LEFT JOIN inventory_items ii
ON oi.inventory_item_id = ii.id
LEFT JOIN products p
ON ii.product_id = p.id
GROUP BY 1,2,3
;;
}
# measure: count {
# type: count
# drill_fields: [detail*]
# }
dimension: user_id {
type: number
sql: ${TABLE}.user_id ;;
}
dimension: prod_id {
type: number
sql: ${TABLE}.prod_id ;;
}
dimension: order_id {
type: number
sql: ${TABLE}.order_id ;;
}
}
#################################################
#Table to count the total times a product id has been purchased
view: total_order_product {
derived_table: {
persist_for: "24 hours" ## Best practice would be to use `datagroup_trigger: ecommerce_etl` but we don't here for snowflake costs
sql: SELECT
p.id AS prod_id
, COUNT(*) AS prod_freq
FROM order_items oi
LEFT JOIN inventory_items
ON oi.inventory_item_id = inventory_items.id
LEFT JOIN products p
ON inventory_items.product_id = p.id
GROUP BY p.id
;;
}
# measure: count {
# type: count
# drill_fields: [detail*]
# }
dimension: prod_id {
sql: ${TABLE}.prod_id ;;
}
dimension: prod_freq {
type: number
sql: ${TABLE}.prod_freq ;;
}
}