-
Notifications
You must be signed in to change notification settings - Fork 0
/
station_feature.sql
64 lines (64 loc) · 1.74 KB
/
station_feature.sql
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
INSERT INTO
iceberg.samples.station_feat_30d (
start_station_id,
num_trip,
avg_duration_mins,
std_duration_mins,
mode_subscriber_type,
event_timestamp
) WITH data AS (
SELECT
*
FROM
hive.samples.bikeshare_trips
WHERE
start_time BETWEEN cast('2023-03-25 00:00:00' AS timestamp) - INTERVAL '30' DAY
AND cast('2023-03-25 00:00:00' AS timestamp)
),
mode_sub_agg AS (
SELECT
start_station_id,
subscriber_type AS mode_subscriber_type
FROM
(
SELECT
start_station_id,
subscriber_type,
COUNT(*) AS count,
ROW_NUMBER() OVER (
PARTITION BY start_station_id
ORDER BY
COUNT(*) DESC
) AS rn
FROM
data
GROUP BY
start_station_id,
subscriber_type
) t
WHERE
rn = 1
ORDER BY
start_station_id
),
stats AS (
SELECT
start_station_id,
count(trip_id) AS num_trip,
avg(duration_minutes) AS avg_duration_mins,
stddev(duration_minutes) AS std_duration_mins
FROM
data d
GROUP BY
start_station_id
)
SELECT
stats.start_station_id,
num_trip,
avg_duration_mins,
std_duration_mins,
mode_subscriber_type,
cast('2023-03-25 00:00:00' AS timestamp(6)) AS event_timestamp
FROM
stats
JOIN mode_sub_agg agg ON stats.start_station_id = agg.start_station_id;