Skip to content

Commit aace0fc

Browse files
author
tiffanychu90
committed
cast to epsg:3310 and rename geom columns to be clear
1 parent df99df7 commit aace0fc

File tree

8 files changed

+112
-87
lines changed

8 files changed

+112
-87
lines changed

Makefile

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@ build_portfolio_site:
1111
git add portfolio/$(site)/*.yml portfolio/$(site)/*.md
1212
git add portfolio/$(site)/*.ipynb
1313
git add portfolio/sites/$(site).yml
14-
#make production_portfolio
14+
make production_portfolio
1515

1616

1717
build_competitive_corridors:
@@ -39,8 +39,8 @@ build_ntd_report:
3939
make build_portfolio_site
4040

4141
build_route_speeds:
42-
$(eval override site = route_speeds)
43-
cd rt_segment_speeds / && make pip install -r requirements.txt && cd ..
42+
$(eval export site = route_speeds)
43+
cd rt_segment_speeds / && pip install -r requirements.txt && cd ..
4444
cd rt_segment_speeds/ && python deploy_portfolio_yaml.py && cd ..
4545
make build_portfolio_site
4646

_shared_utils/shared_utils/rt_dates.py

Lines changed: 12 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,8 @@
33
44
GCS: gs://calitp-analytics-data/data-analyses/rt_delay/cached_views/
55
"""
6+
from typing import Literal
7+
68
# HQTAs and RT speedmaps
79
DATES = {
810
"feb2022": "2022-02-08",
@@ -57,8 +59,16 @@
5759

5860
y2024_dates = [v for k, v in DATES.items() if k.endswith("2024")]
5961

60-
apr_week = [v for k, v in DATES.items() if "apr2023" in k]
61-
oct_week = [v for k, v in DATES.items() if "oct2023" in k]
62+
63+
def get_week(month: Literal["apr2023", "oct2023"], exclude_wed: bool) -> list:
64+
if exclude_wed:
65+
return [v for k, v in DATES.items() if month in k]
66+
else:
67+
return [v for k, v in DATES.items() if month in k and not k.endswith(month)]
68+
69+
70+
apr_week = get_week(month="apr2023", exclude_wed=False)
71+
oct_week = get_week(month="oct2023", exclude_wed=False)
6272

6373

6474
# Planning and Modal Advisory Committee (PMAC) - quarterly

rt_segment_speeds/scripts/average_speeds.py

Lines changed: 6 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -366,7 +366,7 @@ def multi_day_averages(analysis_date_list: list, dict_inputs: dict):
366366
columns = col_order + ["route_name", "geometry"]
367367
)
368368

369-
utils_to_add.geoparquet_gcs_export(
369+
utils.geoparquet_gcs_export(
370370
route_dir_avg,
371371
SEGMENT_GCS,
372372
f"{ROUTE_DIR_FILE}_{time_span_str}"
@@ -432,15 +432,14 @@ def stage_open_data_exports(analysis_date: str, dict_inputs: dict):
432432

433433
logger.info(f"average rollups for {analysis_date}: {end - start}")
434434

435-
'''
436-
for month in ["apr2023", "oct2023"]:
437-
start = datetime.datetime.now()
438-
439-
one_week = [v for k, v in rt_dates.DATES.items() if month in k]
440435

436+
for one_week in [rt_dates.oct_week, rt_dates.apr_week]:
437+
start = datetime.datetime.now()
438+
441439
multi_day_averages(one_week, STOP_SEG_DICT)
442440
end = datetime.datetime.now()
443441

444442
logger.info(f"average rollups for {one_week}: {end - start}")
445-
'''
443+
444+
446445

rt_segment_speeds/scripts/config.yml

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,8 @@ stop_segments:
1616
rt_stop_times:
1717
stage1: "vp_usable"
1818
stage2: "nearest/nearest_vp_rt_stop_times"
19+
stage3: "stop_arrivals_rt_stop_times"
20+
stage4: "speeds_rt_stop_times"
1921
segments_file: "segment_options/stop_segments"
2022
road_segments:
2123
stage1: "vp_usable"

rt_segment_speeds/scripts/interpolate_stop_arrival.py

Lines changed: 13 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -18,24 +18,25 @@
1818
def project_points_onto_shape(
1919
stop_geometry: shapely.Point,
2020
vp_coords_trio: shapely.LineString,
21-
shape_geometry: shapely.Geometry,
21+
shape_geometry: shapely.LineString,
2222
timestamp_arr: np.ndarray,
2323
) -> tuple[float]:
2424
"""
2525
Project the points in the vp trio against the shape geometry
2626
and the stop position onto the shape geometry.
2727
Use np.interp to find interpolated arrival time
2828
"""
29-
stop_position = shape_geometry.project(stop_geometry)
30-
29+
stop_position = vp_coords_trio.project(stop_geometry)
30+
stop_meters = shape_geometry.project(stop_geometry)
31+
3132
points = [shapely.Point(p) for p in vp_coords_trio.coords]
32-
xp = np.asarray([shape_geometry.project(p) for p in points])
33+
xp = np.asarray([vp_coords_trio.project(p) for p in points])
3334

3435
yp = timestamp_arr.astype("datetime64[s]").astype("float64")
3536

3637
interpolated_arrival = np.interp(stop_position, xp, yp)
37-
38-
return stop_position, interpolated_arrival
38+
39+
return stop_meters, interpolated_arrival
3940

4041

4142
def interpolate_stop_arrivals(
@@ -58,6 +59,7 @@ def interpolate_stop_arrivals(
5859
)
5960

6061
df = df.assign(
62+
stop_geometry = df.stop_geometry.to_crs(PROJECT_CRS),
6163
vp_coords_trio = df.vp_coords_trio.to_crs(PROJECT_CRS)
6264
)
6365

@@ -96,7 +98,8 @@ def interpolate_stop_arrivals(
9698
arrival_time = stop_arrival_series,
9799
).astype({"arrival_time": "datetime64[s]"})[
98100
["trip_instance_key", "shape_array_key",
99-
"stop_sequence", "stop_id", "stop_meters",
101+
"stop_sequence", "stop_id",
102+
"stop_meters",
100103
"arrival_time"
101104
]]
102105

@@ -121,8 +124,10 @@ def interpolate_stop_arrivals(
121124
level="INFO")
122125

123126
STOP_SEG_DICT = helpers.get_parameters(CONFIG_PATH, "stop_segments")
124-
127+
RT_STOP_TIMES_DICT = helpers.get_parameters(CONFIG_PATH, "rt_stop_times")
128+
125129
for analysis_date in analysis_date_list:
126130
interpolate_stop_arrivals(analysis_date, STOP_SEG_DICT)
131+
#interpolate_stop_arrivals(analysis_date, RT_STOP_TIMES_DICT)
127132

128133

rt_segment_speeds/scripts/nearest_vp_to_stop.py

Lines changed: 56 additions & 54 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,8 @@
22
Find nearest_vp_idx to the stop position
33
using scipy KDTree.
44
"""
5+
import dask.dataframe as dd
6+
import dask_geopandas as dg
57
import datetime
68
import geopandas as gpd
79
import numpy as np
@@ -16,6 +18,11 @@
1618
from segment_speed_utils import helpers, neighbor
1719
from segment_speed_utils.project_vars import SEGMENT_GCS
1820

21+
stop_time_col_order = [
22+
'trip_instance_key', 'shape_array_key',
23+
'stop_sequence', 'stop_id', 'stop_pair',
24+
'stop_primary_direction', 'geometry'
25+
]
1926

2027
def add_nearest_neighbor_result(
2128
gdf: gpd.GeoDataFrame,
@@ -31,11 +38,11 @@ def add_nearest_neighbor_result(
3138
f"{SEGMENT_GCS}condensed/vp_condensed_{analysis_date}.parquet",
3239
columns = ["trip_instance_key", "vp_idx",
3340
"location_timestamp_local",
34-
"geometry"]
41+
"geometry"],
3542
).rename(columns = {
3643
"vp_idx": "trip_vp_idx",
3744
"geometry": "trip_geometry"
38-
})
45+
}).set_geometry("trip_geometry").to_crs(WGS84)
3946

4047
gdf2 = pd.merge(
4148
gdf,
@@ -52,48 +59,40 @@ def add_nearest_neighbor_result(
5259
coords_trio_series = []
5360

5461
# Iterate through and find the nearest_vp_idx, then surrounding trio
62+
nearest_vp_idx = np.vectorize(neighbor.add_nearest_vp_idx)(
63+
gdf2.vp_geometry, gdf2.stop_geometry, gdf2.vp_idx
64+
)
65+
66+
gdf2 = gdf2.assign(
67+
nearest_vp_idx = nearest_vp_idx,
68+
).drop(
69+
columns = ["vp_idx", "vp_geometry"]
70+
)
71+
5572
for row in gdf2.itertuples():
56-
nearest_vp = neighbor.add_nearest_vp_idx(
57-
getattr(row, "geometry"),
58-
getattr(row, "stop_geometry"),
59-
getattr(row, "vp_idx")
60-
)
61-
62-
vp_idx_arr = np.asarray(getattr(row, "trip_vp_idx"))
63-
timestamp_arr = np.asarray(getattr(row, "location_timestamp_local"))
64-
coords_arr = np.asarray(getattr(row, "trip_geometry").coords)
65-
6673
vp_trio, time_trio, coords_trio = neighbor.add_trio(
67-
nearest_vp,
74+
getattr(row, "nearest_vp_idx"),
6875
np.asarray(getattr(row, "trip_vp_idx")),
6976
np.asarray(getattr(row, "location_timestamp_local")),
70-
np.array(getattr(row, "trip_geometry").coords),
77+
np.asarray(getattr(row, "trip_geometry").coords),
7178
)
7279

73-
nearest_vp_idx_series.append(nearest_vp)
74-
trio_line = shapely.LineString(coords_trio)
7580
vp_trio_series.append(vp_trio)
7681
time_trio_series.append(time_trio)
77-
coords_trio_series.append(trio_line)
78-
79-
80-
gdf2 = gdf2.assign(
81-
nearest_vp_idx = nearest_vp_idx_series,
82-
vp_idx_trio = vp_trio_series,
83-
location_timestamp_local_trio = time_trio_series,
84-
vp_coords_trio = gpd.GeoSeries(coords_trio_series, crs = WGS84)
85-
)
86-
82+
coords_trio_series.append(shapely.LineString(coords_trio))
83+
8784
drop_cols = [
88-
"vp_idx", "geometry",
8985
"location_timestamp_local",
9086
"trip_vp_idx", "trip_geometry"
9187
]
9288

93-
gdf2 = gdf2.drop(columns = drop_cols)
89+
gdf2 = gdf2.assign(
90+
vp_idx_trio = vp_trio_series,
91+
location_timestamp_local_trio = time_trio_series,
92+
vp_coords_trio = gpd.GeoSeries(coords_trio_series, crs = WGS84)
93+
).drop(columns = drop_cols)
9494

95-
del nearest_vp_idx_series, vp_trio_series
96-
del time_trio_series, coords_trio_series
95+
del vp_trio_series, time_trio_series, coords_trio_series
9796

9897
return gdf2
9998

@@ -111,14 +110,14 @@ def nearest_neighbor_rt_stop_times(
111110

112111
stop_times = helpers.import_scheduled_stop_times(
113112
analysis_date,
114-
columns = ["trip_instance_key",
113+
columns = ["trip_instance_key", "shape_array_key",
115114
"stop_sequence", "stop_id", "stop_pair",
116115
"stop_primary_direction",
117116
"geometry"],
118117
with_direction = True,
119118
get_pandas = True,
120119
crs = WGS84
121-
)
120+
).reindex(columns = stop_time_col_order)
122121

123122
gdf = neighbor.merge_stop_vp_for_nearest_neighbor(
124123
stop_times, analysis_date)
@@ -154,45 +153,48 @@ def nearest_neighbor_shape_segments(
154153
EXPORT_FILE = dict_inputs["stage2"]
155154
SEGMENT_FILE = dict_inputs["segments_file"]
156155

157-
subset_trips = pd.read_parquet(
156+
rt_trips = helpers.import_unique_vp_trips(analysis_date)
157+
158+
shape_stop_combinations = pd.read_parquet(
158159
f"{SEGMENT_GCS}{SEGMENT_FILE}_{analysis_date}.parquet",
159-
columns = ["st_trip_instance_key"]
160-
).st_trip_instance_key.unique()
160+
columns = ["trip_instance_key",
161+
"stop_id1", "stop_pair",
162+
"st_trip_instance_key"],
163+
filters = [[("trip_instance_key", "in", rt_trips)]]
164+
).rename(columns = {"stop_id1": "stop_id"})
165+
166+
subset_trips = shape_stop_combinations.st_trip_instance_key.unique()
161167

162168
stops_to_use = helpers.import_scheduled_stop_times(
163169
analysis_date,
164170
columns = ["trip_instance_key", "shape_array_key",
165-
"stop_sequence", "stop_id", "stop_pair",
166-
"stop_primary_direction",
167-
"geometry"],
171+
"stop_sequence", "stop_id", "stop_pair",
172+
"stop_primary_direction", "geometry"],
168173
filters = [[("trip_instance_key", "in", subset_trips)]],
169174
get_pandas = True,
170175
with_direction = True
171176
).rename(columns = {"trip_instance_key": "st_trip_instance_key"})
172177

173-
all_trips = helpers.import_scheduled_stop_times(
174-
analysis_date,
175-
columns = ["trip_instance_key", "shape_array_key"],
176-
get_pandas = True,
177-
with_direction = True
178-
).drop_duplicates().reset_index(drop=True)
179-
180178
stop_times = pd.merge(
181179
stops_to_use,
182-
all_trips,
183-
on = "shape_array_key",
180+
shape_stop_combinations,
181+
on = ["st_trip_instance_key", "stop_id", "stop_pair"],
184182
how = "inner"
183+
).drop(
184+
columns = "st_trip_instance_key"
185+
).drop_duplicates().reset_index(drop=True).reindex(
186+
columns = stop_time_col_order
185187
)
186188

189+
del stops_to_use, shape_stop_combinations
190+
187191
gdf = neighbor.merge_stop_vp_for_nearest_neighbor(
188192
stop_times, analysis_date)
189-
190-
del stop_times, all_trips, stops_to_use
191-
192-
results = add_nearest_neighbor_result(gdf, analysis_date)
193193

194-
del gdf
194+
results = add_nearest_neighbor_result(gdf, analysis_date)
195195

196+
del stop_times, gdf
197+
196198
utils.geoparquet_gcs_export(
197199
results,
198200
SEGMENT_GCS,
@@ -206,7 +208,7 @@ def nearest_neighbor_shape_segments(
206208
del results
207209

208210
return
209-
211+
210212

211213
if __name__ == "__main__":
212214

@@ -223,5 +225,5 @@ def nearest_neighbor_shape_segments(
223225

224226
for analysis_date in analysis_date_list:
225227
nearest_neighbor_shape_segments(analysis_date, STOP_SEG_DICT)
226-
nearest_neighbor_rt_stop_times(analysis_date, RT_STOP_TIMES_DICT)
228+
#nearest_neighbor_rt_stop_times(analysis_date, RT_STOP_TIMES_DICT)
227229

rt_segment_speeds/scripts/stop_arrivals_to_speed.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -147,5 +147,4 @@ def calculate_speed_from_stop_arrivals(
147147
STOP_SEG_DICT = helpers.get_parameters(CONFIG_PATH, "stop_segments")
148148

149149
for analysis_date in analysis_date_list:
150-
151150
calculate_speed_from_stop_arrivals(analysis_date, STOP_SEG_DICT)

0 commit comments

Comments
 (0)