diff --git a/urbanaccess/gtfs/network.py b/urbanaccess/gtfs/network.py index 39b6725..1ebba6b 100644 --- a/urbanaccess/gtfs/network.py +++ b/urbanaccess/gtfs/network.py @@ -13,7 +13,9 @@ def create_transit_net(gtfsfeeds_dfs, day, timerange, + timerange_pad=None, calendar_dates_lookup=None, + time_aware=False, overwrite_existing_stop_times_int=False, use_existing_stop_times_int=False, save_processed_gtfs=False, @@ -41,6 +43,9 @@ def create_transit_net(gtfsfeeds_dfs, day, to represent a relevant travel time period such as a 3 hour window for the AM Peak period. Must follow format of a 24 hour clock for example: 08:00:00 or 17:00:00 + timerange_pad: int + integer indicating the number of hours to pad + after the end of the the time interval calendar_dates_lookup : dict, optional dictionary of the lookup column (key) as a string and corresponding string (value) as string or list of strings to use to subset trips @@ -49,6 +54,9 @@ def create_transit_net(gtfsfeeds_dfs, day, that are not in the calendar DataFrame. Note search will select all records that meet each key value pair criteria. Example: {'schedule_type' : 'WD'} or {'schedule_type' : ['WD', 'SU']} + time_aware: boolean + Boolean to indicate whether the transit network should include + time information. overwrite_existing_stop_times_int : bool, optional if true, and if there is an existing stop_times_int DataFrame stored in the gtfsfeeds_dfs object it will be @@ -155,7 +163,8 @@ def create_transit_net(gtfsfeeds_dfs, day, selected_interpolated_stop_times_df = _time_selector( df=gtfsfeeds_dfs.stop_times_int, starttime=timerange[0], - endtime=timerange[1]) + endtime=timerange[1], + timerange_pad=timerange_pad) final_edge_table = _format_transit_net_edge( stop_times_df=selected_interpolated_stop_times_df[['unique_trip_id', @@ -177,7 +186,8 @@ def create_transit_net(gtfsfeeds_dfs, day, transit_nodes = _format_transit_net_nodes(df=final_selected_stops) transit_edges = _route_type_to_edge(transit_edge_df=transit_edges, - stop_time_df=gtfsfeeds_dfs.stop_times) + stop_time_df=gtfsfeeds_dfs.stop_times, + time_aware=time_aware) transit_edges = _route_id_to_edge(transit_edge_df=transit_edges, trips_df=gtfsfeeds_dfs.trips) @@ -661,7 +671,7 @@ def _time_difference(stop_times_df): return stop_times_df -def _time_selector(df, starttime, endtime): +def _time_selector(df, starttime, endtime, timerange_pad=None): """ Select stop times that fall within a specified time range @@ -673,6 +683,9 @@ def _time_selector(df, starttime, endtime): 24 hour clock formatted time 1 endtime : str 24 hour clock formatted time 2 + timerange_pad: int + integer indicating the number of hours to pad + after the end of the the time interval Returns ------- selected_stop_timesdf : pandas.DataFrame @@ -698,10 +711,12 @@ def _time_selector(df, starttime, endtime): end_s = int(str(endtime[6:8])) endtime_sec = (end_h * 60 * 60) + (end_m * 60) + end_s + # define variable for including stops active after end of timerange + pad = int(0 if timerange_pad is None else timerange_pad) # create df of stops times that are within the requested range selected_stop_timesdf = df[( - (starttime_sec < df["departure_time_sec_interpolate"]) & ( - df["departure_time_sec_interpolate"] < endtime_sec))] + (starttime_sec <= df["departure_time_sec_interpolate"]) & ( + df["departure_time_sec_interpolate"] <= endtime_sec + (pad * 3600)))] log( 'Stop times from {} to {} successfully selected {:,} records out of ' @@ -896,7 +911,7 @@ def _format_transit_net_nodes(df): return final_node_df -def _route_type_to_edge(transit_edge_df, stop_time_df): +def _route_type_to_edge(transit_edge_df, stop_time_df, time_aware=False): """ Append route type information to transit edge table @@ -906,6 +921,8 @@ def _route_type_to_edge(transit_edge_df, stop_time_df): transit edge dataframe stop_time_df : pandas.DataFrame stop time dataframe + time_aware: boolean + whether the transit network should include time Returns ------- @@ -919,20 +936,31 @@ def _route_type_to_edge(transit_edge_df, stop_time_df): stop_time_df['trip_id'].str.cat( stop_time_df['unique_agency_id'].astype('str'), sep='_')) - # join route_id to the edge table - merged_df = pd.merge(transit_edge_df, - stop_time_df[['unique_trip_id', 'route_type']], - how='left', on='unique_trip_id', sort=False, - copy=False) - merged_df.drop_duplicates(subset='unique_trip_id', - keep='first', - inplace=True) - # need to get unique records here to have a one to one join - - # this serves as the look up table - # join the look up table created above to the table of interest - transit_edge_df_w_routetype = pd.merge(transit_edge_df, merged_df[ - ['route_type', 'unique_trip_id']], how='left', on='unique_trip_id', - sort=False, copy=False) + if time_aware: + # join route_id to the edge table + merged_df = pd.merge(transit_edge_df, + stop_time_df[['unique_trip_id', 'route_type', 'stop_sequence', + 'arrival_time', 'departure_time']], + how='left', + left_on=['unique_trip_id', 'sequence'], + right_on=['unique_trip_id', 'stop_sequence'], sort=False, + copy=False) + + else: + merged_df = pd.merge(transit_edge_df, + stop_time_df[['unique_trip_id', 'route_type']], + how='left', on='unique_trip_id', sort=False, + copy=False) + merged_df.drop_duplicates(subset='unique_trip_id', + keep='first', + inplace=True) + # need to get unique records here to have a one to one join - + # this serves as the look up table + # join the look up table created above to the table of interest + transit_edge_df_w_routetype = pd.merge(transit_edge_df, + merged_df[['route_type', 'unique_trip_id']], + how='left', on='unique_trip_id', + sort=False, copy=False) log( 'route type successfully joined to transit edges. Took {:,'