-
Notifications
You must be signed in to change notification settings - Fork 1
/
nyc_yellow_table_creation.sql
108 lines (91 loc) · 3.59 KB
/
nyc_yellow_table_creation.sql
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
-- Assume that all attributes are text based
-- This is for TEST
CREATE TABLE trips_text(
id serial NOT NULL,
vendorid text,
t_pickup text,
t_dropoff text,
num_passengers text,
trip_distance text,
l_pickup_lon text,
l_pickup_lat text,
ratecodeid text,
flag_store text,
l_dropoff_lon text,
l_dropoff_lat text,
payment_type text,
fare_amount text,
extra text,
mta_tax text,
surcharge text,
tip text,
tolls text,
total text,
constraint "PK_text" PRIMARY KEY (id))
-- Optimal trip table design - staging table
CREATE TABLE trips_staging(
id serial NOT NULL,
vendorid character varying(1),
t_pickup timestamp without time zone,
t_dropoff timestamp without time zone,
num_passengers smallint,
trip_distance real,
l_pickup_lon double precision,
l_pickup_lat double precision,
ratecodeid character(2),
flag_store character(1),
l_dropoff_lon double precision,
l_dropoff_lat double precision,
payment_type character(1),
fare_amount real,
extra real,
mta_tax real,
surcharge real,
tip real,
tolls real,
total real,
constraint "PK" PRIMARY KEY (id))
-- Create the pickup and dropoff geometry column - production table
CREATE TABLE trips(
id serial NOT NULL,
vendorid character varying(1),
t_pickup timestamp without time zone,
t_dropoff timestamp without time zone,
num_passengers smallint,
trip_distance real,
l_pickup_lon double precision,
l_pickup_lat double precision,
ratecodeid character(2),
flag_store character(1),
l_dropoff_lon double precision,
l_dropoff_lat double precision,
payment_type character(1),
fare_amount real,
extra real,
mta_tax real,
surcharge real,
tip real,
tolls real,
total real,
l_pickup geometry(point, 4326),
l_dropoff geometry(point, 4326),
constraint "PK_2" PRIMARY KEY (id)
)
-- Insert into the new taxi table and generate the geometry
insert into trips(id, vendorid, t_pickup, t_dropoff, num_passengers,
trip_distance, l_pickup_lon, l_pickup_lat, ratecodeid, flag_store,
l_dropoff_lon, l_dropoff_lat, payment_type, fare_amount, extra, mta_tax,
surcharge, tip, tolls, total,
l_pickup, l_dropoff)
select id, vendorid, t_pickup, t_dropoff, num_passengers,
trip_distance, l_pickup_lon, l_pickup_lat, ratecodeid, flag_store,
l_dropoff_lon, l_dropoff_lat, payment_type, fare_amount, extra, mta_tax,
surcharge, tip, tolls, total,
ST_SetSRID(ST_Point(l_pickup_lon,l_pickup_lat),4326),
ST_SetSRID(ST_Point(l_dropoff_lon,l_dropoff_lat),4326)
from trips_staging
-- Sample k-NN
SELECT id
FROM trips
ORDER BY l_pickup <-> (select l_pickup from trips where id = 11000000)
limit 10