Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Cut 2 mile road segments, prep for route typologies #1062

Merged
merged 14 commits into from
Mar 27, 2024
3 changes: 2 additions & 1 deletion _shared_utils/requirements.txt
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
-e .
gtfs-segments==0.1.0
pyairtable==2.2.2
great-tables==0.3.1
great-tables==0.4.0
polars==0.20.16
28 changes: 28 additions & 0 deletions gtfs_digest/extra_cleaning.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
# operators that need route names parsed
# probably just keep the long name portion
# also do titlecase? this is finicky, because some are CC (which we don't want titlecase)
operators_only_route_long_name = [
"Antelope Valley Transit Authority Schedule",
"Bay Area 511 ACE Schedule",
"Bay Area 511 Caltrain Schedule",
"Bay Area 511 Emery Go-Round Schedule",
"Bay Area 511 Petaluma Schedule",
"Beach Cities GMV Schedule",
"Bear Schedule",
"Commerce Schedule",
"Elk Grove Schedule",
"Humboldt Schedule",
"LA DOT Schedule",
"Lawndale Beat GMV Schedule",
"Redding Schedule",
"Redwood Coast Schedule",
"Santa Maria Schedule",
"StanRTA Schedule",
"VCTC GMV Schedule",
"Victor Valley GMV Schedule",
"Visalia Schedule",
"Yolobus Schedule",
]

# BruinBus Schedule - nothing shows up in route stats
# Why does "StanRTA Schedule" and "Tahoe Transportation District Schedule" appear to have similar route names?
1 change: 1 addition & 0 deletions gtfs_funnel/logs/download_data.log
Original file line number Diff line number Diff line change
Expand Up @@ -318,3 +318,4 @@
2024-03-14 11:43:06.291 | INFO | __main__:download_one_day:29 - # operators to run: 172
2024-03-14 11:43:06.291 | INFO | __main__:download_one_day:33 - *********** Download st data ***********
2024-03-14 11:44:27.599 | INFO | __main__:download_one_day:56 - execution time: 0:01:22.625555
2024-03-19 16:50:51.742 | INFO | __main__:download_one_year:35 - execution time: 0:00:43.062868
48 changes: 27 additions & 21 deletions gtfs_funnel/stop_times_with_direction.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,15 +22,16 @@ def prep_scheduled_stop_times(analysis_date: str) -> gpd.GeoDataFrame:
"""
stops = helpers.import_scheduled_stops(
analysis_date,
columns = ["feed_key", "stop_id", "geometry"],
columns = ["feed_key", "stop_id", "stop_name", "geometry"],
crs = PROJECT_CRS,
get_pandas = True
)

stop_times = helpers.import_scheduled_stop_times(
analysis_date,
columns = ["feed_key", "trip_id", "stop_id", "stop_sequence"]
).compute()
columns = ["feed_key", "trip_id", "stop_id", "stop_sequence"],
get_pandas = True
)

trips = helpers.import_scheduled_trips(
analysis_date,
Expand All @@ -53,7 +54,7 @@ def prep_scheduled_stop_times(analysis_date: str) -> gpd.GeoDataFrame:
stops,
on = ["feed_key", "stop_id"],
how = "inner"
).drop(columns = ["feed_key", "trip_id"])
).drop(columns = ["trip_id"])

st_with_stop = gpd.GeoDataFrame(
st_with_stop, geometry = "geometry", crs = PROJECT_CRS)
Expand Down Expand Up @@ -83,13 +84,15 @@ def get_projected_stop_meters(
return gdf


def find_prior_stop(
def find_prior_subseq_stop(
stop_times: gpd.GeoDataFrame,
trip_stop_cols: list
) -> gpd.GeoDataFrame:
"""
For trip-stop, find the previous stop (using stop sequence).
Attach the previous stop's geometry.
This will determine the direction for the stop (it's from prior stop).
Add in subseq stop information too.
"""
prior_stop = stop_times[trip_stop_cols].sort_values(
trip_stop_cols).reset_index(drop=True)
Expand All @@ -105,6 +108,9 @@ def find_prior_stop(
subseq_stop_id = (prior_stop.groupby("trip_instance_key")
.stop_id
.shift(-1)),
subseq_stop_name = (prior_stop.groupby("trip_instance_key")
.stop_name
.shift(-1))
)

# Merge in prior stop geom as a separate column so we can
Expand Down Expand Up @@ -133,18 +139,25 @@ def find_prior_stop(
).astype({
"prior_stop_sequence": "Int64",
"subseq_stop_sequence": "Int64"
}).fillna({"subseq_stop_id": ""})


}).fillna({
"subseq_stop_id": "",
"subseq_stop_name": ""
})

# Create stop pair with underscores, since stop_id
# can contain hyphens
stop_times_with_prior_geom = stop_times_with_prior_geom.assign(
stop_pair = stop_times_with_prior_geom.apply(
lambda x:
str(x.stop_id) + "__" + str(x.subseq_stop_id),
axis=1,
)
).drop(columns = ["subseq_stop_id"])
),
stop_pair_name = stop_times_with_prior_geom.apply(
lambda x:
x.stop_name + "__" + x.subseq_stop_name,
axis=1,
),
).drop(columns = ["subseq_stop_id", "subseq_stop_name"])

return stop_times_with_prior_geom

Expand All @@ -167,9 +180,10 @@ def assemble_stop_times_with_direction(

scheduled_stop_times = prep_scheduled_stop_times(analysis_date)

trip_stop_cols = ["trip_instance_key", "stop_sequence", "stop_id"]
trip_stop_cols = ["trip_instance_key", "stop_sequence",
"stop_id", "stop_name"]

scheduled_stop_times2 = find_prior_stop(
scheduled_stop_times2 = find_prior_subseq_stop(
scheduled_stop_times, trip_stop_cols
)

Expand All @@ -195,11 +209,6 @@ def assemble_stop_times_with_direction(
rt_utils.primary_cardinal_direction)(prior_geom, current_geom)
stop_distance = prior_geom.distance(current_geom)

# Create a column with normalized direction vector
# Add this because some bus can travel in southeasterly direction,
# but it's categorized as southbound or eastbound depending
# on whether south or east value is larger.

other_stops_no_geom = other_stops_no_geom.assign(
stop_primary_direction = stop_direction,
stop_meters = stop_distance,
Expand All @@ -224,10 +233,7 @@ def assemble_stop_times_with_direction(

end = datetime.datetime.now()
print(f"execution time: {end - start}")

del scheduled_stop_times, scheduled_stop_times2
del other_stops_no_geom, scheduled_stop_times_with_direction, df


return


Expand Down
11 changes: 5 additions & 6 deletions rt_segment_speeds/logs/cut_road_segments.log
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
2023-12-11 18:49:41.963 | INFO | __main__:<module>:178 - execution time: 0:02:12.275031
2023-12-12 12:38:25.692 | INFO | __main__:<module>:282 - cut local roads: 0:13:31.749643
2023-12-12 12:38:25.740 | INFO | __main__:<module>:285 - execution time: 0:13:31.797462
2023-12-12 12:43:57.178 | INFO | __main__:<module>:261 - cut primary/secondary roads: 0:03:51.547346
2023-12-12 13:01:51.518 | INFO | __main__:<module>:61 - concatenate road segments: 0:00:51.244157
2023-12-12 14:35:58.485 | INFO | __main__:<module>:62 - concatenate road segments: 0:00:45.026472
2024-03-21 16:53:55.732 | INFO | __main__:<module>:236 - cut primary/secondary roads: 0:00:25.924757
2024-03-21 17:03:42.871 | INFO | __main__:<module>:255 - cut local roads: 0:09:47.138963
2024-03-21 17:03:42.871 | INFO | __main__:<module>:258 - execution time: 0:10:13.064427
2024-03-22 14:00:01.019 | INFO | __main__:<module>:184 - execution time: 0:05:50.663597
2024-03-22 14:19:25.092 | INFO | __main__:<module>:196 - execution time: 0:08:45.006011
Loading
Loading