-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathfilter-android-how-to-do-it.sql
104 lines (99 loc) · 2.32 KB
/
filter-android-how-to-do-it.sql
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
-- Params:
-- - {{database}}: Database to be used on table creation
-- [Create Posts Questions Table] (Main query)
CREATE OR REPLACE TABLE
`{{database}}.posts_questions` AS
SELECT
PQ.*
FROM
`bigquery-public-data.stackoverflow.posts_questions` PQ
WHERE
'android' IN UNNEST(SPLIT(PQ.tags, "|")) -- only with actual tag android
AND (PQ.title LIKE '%how%'
OR PQ.body LIKE '%how%') -- it has "how" in the title or body
AND (PQ.body NOT LIKE '%fail%'
AND PQ.body NOT LIKE '%problem%'
AND PQ.body NOT LIKE '%error%'
AND PQ.body NOT LIKE '%wrong%'
AND PQ.body NOT LIKE '%fix%'
AND PQ.body NOT LIKE '%bug%'
AND PQ.body NOT LIKE '%issue%'
AND PQ.body NOT LIKE '%solve%'
AND PQ.body NOT LIKE '%trouble%')-- AND it doesn't have any of the debug-corrective words in the body (“fail”, “problem”, “error”, “wrong”, “fix”, “bug”, “issue”, “solve”, “trouble”)
-- AND it doesn't have "error" in any of the code snippets (also tracked by the above)
ORDER BY
PQ.view_count DESC;
-- [Create Posts Answers Table]
CREATE OR REPLACE TABLE
`{{database}}.posts_answers` AS
SELECT
PA.*
FROM
`bigquery-public-data.stackoverflow.posts_answers` PA
WHERE
PA.parent_id IN (
SELECT
id
FROM
`{{database}}.posts_questions`);
-- [Create Questions Table]
CREATE OR REPLACE TABLE
`{{database}}.comments` AS
SELECT
C.*
FROM
`bigquery-public-data.stackoverflow.comments` C
WHERE
C.post_id IN (
SELECT
id
FROM
`{{database}}.posts_questions`
UNION ALL
SELECT
id
FROM
`{{database}}.posts_answers`);
-- [Create Users Table]
CREATE OR REPLACE TABLE
`{{database}}.users` AS
SELECT
U.*
FROM
`bigquery-public-data.stackoverflow.users` U
WHERE
U.id IN (
SELECT
owner_user_id
FROM
`{{database}}.posts_questions`
UNION ALL
SELECT
owner_user_id
FROM
`{{database}}.posts_answers`
UNION ALL
SELECT
user_id
FROM
`{{database}}.comments`);
-- [Create Tags Table]
CREATE OR REPLACE TABLE `{{database}}.tags` AS
SELECT
T.*
FROM
`bigquery-public-data.stackoverflow.tags` T
WHERE
T.tag_name IN (
WITH
sequences AS (
SELECT
SPLIT(tags, '|') AS tags
FROM
`{{database}}.posts_questions`)
SELECT
DISTINCT flattened_tags
FROM
sequences
CROSS JOIN
UNNEST(sequences.tags) AS flattened_tags)