Skip to content

Commit 05863e0

Browse files
author
tiffanychu90
committed
try sjoin shapes to road segments
1 parent 573ed7d commit 05863e0

File tree

1 file changed

+186
-0
lines changed

1 file changed

+186
-0
lines changed
Lines changed: 186 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,186 @@
1+
{
2+
"cells": [
3+
{
4+
"cell_type": "markdown",
5+
"id": "2e515aeb-4ac8-4ecb-a09c-23fb86c7bcab",
6+
"metadata": {},
7+
"source": [
8+
"# Spatial join shapes to roads\n",
9+
"\n",
10+
"* Parallel routes\n",
11+
"\n",
12+
"Instead of using points (stops, stop arrivals), let's join a bunch of shapes to road segments and see how that turns out."
13+
]
14+
},
15+
{
16+
"cell_type": "code",
17+
"execution_count": null,
18+
"id": "876a573f-a163-4218-b6a0-a8d0f23fc677",
19+
"metadata": {},
20+
"outputs": [],
21+
"source": [
22+
"import dask.dataframe as dd\n",
23+
"import dask_geopandas as dg\n",
24+
"import geopandas as gpd\n",
25+
"import intake\n",
26+
"import pandas as pd\n",
27+
"\n",
28+
"from shared_utils import rt_dates, rt_utils\n",
29+
"from segment_speed_utils import helpers\n",
30+
"from segment_speed_utils.project_vars import (SCHED_GCS, \n",
31+
" SHARED_GCS, \n",
32+
" PROJECT_CRS\n",
33+
" )\n",
34+
" \n",
35+
"catalog = intake.open_catalog(\n",
36+
" \"../_shared_utils/shared_utils/shared_data_catalog.yml\")\n",
37+
"\n",
38+
"analysis_date = rt_dates.DATES[\"sep2023\"]"
39+
]
40+
},
41+
{
42+
"cell_type": "code",
43+
"execution_count": null,
44+
"id": "42c8a487-03bb-48ae-8833-032962b7f62c",
45+
"metadata": {},
46+
"outputs": [],
47+
"source": [
48+
"primary_secondary = dg.read_parquet(\n",
49+
" f\"{SHARED_GCS}segmented_roads_2020_primarysecondary.parquet\"\n",
50+
")\n",
51+
"\n",
52+
"local = dg.read_parquet(\n",
53+
" f\"{SHARED_GCS}segmented_roads_2020_local.parquet\",\n",
54+
" columns = [i for i in primary_secondary.columns]\n",
55+
")"
56+
]
57+
},
58+
{
59+
"cell_type": "code",
60+
"execution_count": null,
61+
"id": "e8fb012d-c94b-4d47-9a2c-7af88045d6c0",
62+
"metadata": {},
63+
"outputs": [],
64+
"source": [
65+
"road_segments = dd.multi.concat(\n",
66+
" [primary_secondary, local], \n",
67+
" axis=0\n",
68+
").reset_index(drop=True).repartition(npartitions=10)"
69+
]
70+
},
71+
{
72+
"cell_type": "code",
73+
"execution_count": null,
74+
"id": "d3ef08ab-ba37-47d6-a767-9515aa657336",
75+
"metadata": {},
76+
"outputs": [],
77+
"source": [
78+
"shapes = helpers.import_scheduled_shapes(\n",
79+
" analysis_date,\n",
80+
" columns = [\"shape_array_key\", \"n_trips\", \"geometry\"],\n",
81+
" get_pandas = True,\n",
82+
" crs = PROJECT_CRS\n",
83+
")"
84+
]
85+
},
86+
{
87+
"cell_type": "code",
88+
"execution_count": null,
89+
"id": "c93729ab-fb86-41f3-86f1-ca021e506b14",
90+
"metadata": {},
91+
"outputs": [],
92+
"source": [
93+
"def spatial_join_by_partition(road_segments, shapes):\n",
94+
" road_segments_buff = road_segments.assign(\n",
95+
" geometry = road_segments.geometry.buffer(35)\n",
96+
" )\n",
97+
" \n",
98+
" s1 = gpd.sjoin(\n",
99+
" road_segments_buff,\n",
100+
" shapes,\n",
101+
" how = \"inner\",\n",
102+
" predicate = \"intersects\"\n",
103+
" ).drop(columns = [\"index_right\", \"geometry\"])\n",
104+
" \n",
105+
" return s1\n",
106+
" "
107+
]
108+
},
109+
{
110+
"cell_type": "code",
111+
"execution_count": null,
112+
"id": "63711923-9035-4597-b162-19b2dbece88d",
113+
"metadata": {},
114+
"outputs": [],
115+
"source": [
116+
"road_cols = [\n",
117+
" \"linearid\", \"mtfcc\", \n",
118+
" \"fullname\", \"segment_sequence\", \n",
119+
" \"primary_direction\"\n",
120+
"]\n",
121+
"\n",
122+
"road_dtypes = road_segments[road_cols].dtypes.to_dict()\n",
123+
"shape_dtypes = shapes[[\"shape_array_key\", \"n_trips\"]].dtypes.to_dict()\n",
124+
"\n",
125+
"sjoin_results = road_segments.map_partitions(\n",
126+
" spatial_join_by_partition,\n",
127+
" shapes,\n",
128+
" meta = {\n",
129+
" **road_dtypes,\n",
130+
" **shape_dtypes\n",
131+
" },\n",
132+
" align_dataframes = False,\n",
133+
").persist()"
134+
]
135+
},
136+
{
137+
"cell_type": "code",
138+
"execution_count": null,
139+
"id": "cbd685c6-22ba-4d29-bfaa-c46a6e7c5b7e",
140+
"metadata": {},
141+
"outputs": [],
142+
"source": [
143+
"trips_per_segment = (sjoin_results.groupby(road_cols, \n",
144+
" observed=True, group_keys=False)\n",
145+
" .agg({\n",
146+
" \"shape_array_key\": \"count\",\n",
147+
" \"n_trips\": \"sum\"\n",
148+
" }).reset_index()\n",
149+
" ).compute()"
150+
]
151+
},
152+
{
153+
"cell_type": "code",
154+
"execution_count": null,
155+
"id": "dd3e3963-78f3-489e-8bc2-8d25185d3c1d",
156+
"metadata": {},
157+
"outputs": [],
158+
"source": [
159+
"trips_per_segment.to_parquet(\n",
160+
" f\"{SCHED_GCS}trips_road_segments_{analysis_date}.parquet\"\n",
161+
")"
162+
]
163+
}
164+
],
165+
"metadata": {
166+
"kernelspec": {
167+
"display_name": "Python 3 (ipykernel)",
168+
"language": "python",
169+
"name": "python3"
170+
},
171+
"language_info": {
172+
"codemirror_mode": {
173+
"name": "ipython",
174+
"version": 3
175+
},
176+
"file_extension": ".py",
177+
"mimetype": "text/x-python",
178+
"name": "python",
179+
"nbconvert_exporter": "python",
180+
"pygments_lexer": "ipython3",
181+
"version": "3.9.13"
182+
}
183+
},
184+
"nbformat": 4,
185+
"nbformat_minor": 5
186+
}

0 commit comments

Comments
 (0)