Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Make the transit networks time aware #83

Closed
wants to merge 6 commits into from
Closed
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
68 changes: 48 additions & 20 deletions urbanaccess/gtfs/network.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,9 @@

def create_transit_net(gtfsfeeds_dfs, day,
timerange,
timerange_pad=None,
calendar_dates_lookup=None,
time_aware=False,
overwrite_existing_stop_times_int=False,
use_existing_stop_times_int=False,
save_processed_gtfs=False,
Expand Down Expand Up @@ -41,6 +43,9 @@ def create_transit_net(gtfsfeeds_dfs, day,
to represent a relevant travel time period such as a 3 hour window
for the AM Peak period. Must follow format
of a 24 hour clock for example: 08:00:00 or 17:00:00
timerange_pad: int
integer indicating the number of hours to pad
after the end of the the time interval
calendar_dates_lookup : dict, optional
dictionary of the lookup column (key) as a string and corresponding
string (value) as string or list of strings to use to subset trips
Expand All @@ -49,6 +54,9 @@ def create_transit_net(gtfsfeeds_dfs, day,
that are not in the calendar DataFrame. Note search will select all
records that meet each key value pair criteria.
Example: {'schedule_type' : 'WD'} or {'schedule_type' : ['WD', 'SU']}
time_aware: boolean
Boolean to indicate whether the transit network should include
time information.
overwrite_existing_stop_times_int : bool, optional
if true, and if there is an existing stop_times_int
DataFrame stored in the gtfsfeeds_dfs object it will be
Expand Down Expand Up @@ -155,7 +163,8 @@ def create_transit_net(gtfsfeeds_dfs, day,
selected_interpolated_stop_times_df = _time_selector(
df=gtfsfeeds_dfs.stop_times_int,
starttime=timerange[0],
endtime=timerange[1])
endtime=timerange[1],
timerange_pad=timerange_pad)

final_edge_table = _format_transit_net_edge(
stop_times_df=selected_interpolated_stop_times_df[['unique_trip_id',
Expand All @@ -177,7 +186,8 @@ def create_transit_net(gtfsfeeds_dfs, day,
transit_nodes = _format_transit_net_nodes(df=final_selected_stops)

transit_edges = _route_type_to_edge(transit_edge_df=transit_edges,
stop_time_df=gtfsfeeds_dfs.stop_times)
stop_time_df=gtfsfeeds_dfs.stop_times,
time_aware=time_aware)

transit_edges = _route_id_to_edge(transit_edge_df=transit_edges,
trips_df=gtfsfeeds_dfs.trips)
Expand Down Expand Up @@ -661,7 +671,7 @@ def _time_difference(stop_times_df):
return stop_times_df


def _time_selector(df, starttime, endtime):
def _time_selector(df, starttime, endtime, timerange_pad=None):
"""
Select stop times that fall within a specified time range

Expand All @@ -673,6 +683,9 @@ def _time_selector(df, starttime, endtime):
24 hour clock formatted time 1
endtime : str
24 hour clock formatted time 2
timerange_pad: int
integer indicating the number of hours to pad
after the end of the the time interval
Returns
-------
selected_stop_timesdf : pandas.DataFrame
Expand All @@ -698,10 +711,12 @@ def _time_selector(df, starttime, endtime):
end_s = int(str(endtime[6:8]))
endtime_sec = (end_h * 60 * 60) + (end_m * 60) + end_s

# define variable for including stops active after end of timerange
pad = int(0 if timerange_pad is None else timerange_pad)
# create df of stops times that are within the requested range
selected_stop_timesdf = df[(
(starttime_sec < df["departure_time_sec_interpolate"]) & (
df["departure_time_sec_interpolate"] < endtime_sec))]
(starttime_sec <= df["departure_time_sec_interpolate"]) & (
df["departure_time_sec_interpolate"] <= endtime_sec + (pad * 3600)))]

log(
'Stop times from {} to {} successfully selected {:,} records out of '
Expand Down Expand Up @@ -896,7 +911,7 @@ def _format_transit_net_nodes(df):
return final_node_df


def _route_type_to_edge(transit_edge_df, stop_time_df):
def _route_type_to_edge(transit_edge_df, stop_time_df, time_aware=False):
"""
Append route type information to transit edge table

Expand All @@ -906,6 +921,8 @@ def _route_type_to_edge(transit_edge_df, stop_time_df):
transit edge dataframe
stop_time_df : pandas.DataFrame
stop time dataframe
time_aware: boolean
whether the transit network should include time

Returns
-------
Expand All @@ -919,20 +936,31 @@ def _route_type_to_edge(transit_edge_df, stop_time_df):
stop_time_df['trip_id'].str.cat(
stop_time_df['unique_agency_id'].astype('str'), sep='_'))

# join route_id to the edge table
merged_df = pd.merge(transit_edge_df,
stop_time_df[['unique_trip_id', 'route_type']],
how='left', on='unique_trip_id', sort=False,
copy=False)
merged_df.drop_duplicates(subset='unique_trip_id',
keep='first',
inplace=True)
# need to get unique records here to have a one to one join -
# this serves as the look up table
# join the look up table created above to the table of interest
transit_edge_df_w_routetype = pd.merge(transit_edge_df, merged_df[
['route_type', 'unique_trip_id']], how='left', on='unique_trip_id',
sort=False, copy=False)
if time_aware:
# join route_id to the edge table
merged_df = pd.merge(transit_edge_df,
stop_time_df[['unique_trip_id', 'route_type', 'stop_sequence',
'arrival_time', 'departure_time']],
how='left',
left_on=['unique_trip_id', 'sequence'],
right_on=['unique_trip_id', 'stop_sequence'], sort=False,
copy=False)

else:
merged_df = pd.merge(transit_edge_df,
stop_time_df[['unique_trip_id', 'route_type']],
how='left', on='unique_trip_id', sort=False,
copy=False)
merged_df.drop_duplicates(subset='unique_trip_id',
keep='first',
inplace=True)
# need to get unique records here to have a one to one join -
# this serves as the look up table
# join the look up table created above to the table of interest
transit_edge_df_w_routetype = pd.merge(transit_edge_df,
merged_df[['route_type', 'unique_trip_id']],
how='left', on='unique_trip_id',
sort=False, copy=False)

log(
'route type successfully joined to transit edges. Took {:,'
Expand Down