From 9e3b71b2b7dfac3ec73ed1f8eeb1af800c705643 Mon Sep 17 00:00:00 2001 From: Mohamed Amine Bouzaghrane Date: Mon, 15 Feb 2021 17:29:09 -0500 Subject: [PATCH 1/6] Add time padding at end of time interval This is to be able to include the active stops within a time interval after the end of the specified time range. --- urbanaccess/gtfs/network.py | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/urbanaccess/gtfs/network.py b/urbanaccess/gtfs/network.py index 39b6725..7253669 100644 --- a/urbanaccess/gtfs/network.py +++ b/urbanaccess/gtfs/network.py @@ -13,6 +13,7 @@ def create_transit_net(gtfsfeeds_dfs, day, timerange, + timerange_pad=None, calendar_dates_lookup=None, overwrite_existing_stop_times_int=False, use_existing_stop_times_int=False, @@ -697,11 +698,13 @@ def _time_selector(df, starttime, endtime): end_m = int(str(endtime[3:5])) end_s = int(str(endtime[6:8])) endtime_sec = (end_h * 60 * 60) + (end_m * 60) + end_s - + + # define variable for including stops active after end of timerange + pad = int(0 if timerange_pad is None else timerange_pad) # create df of stops times that are within the requested range selected_stop_timesdf = df[( - (starttime_sec < df["departure_time_sec_interpolate"]) & ( - df["departure_time_sec_interpolate"] < endtime_sec))] + (starttime_sec <= df["departure_time_sec_interpolate"]) & ( + df["departure_time_sec_interpolate"] <= endtime_sec + (pad * 3600))] log( 'Stop times from {} to {} successfully selected {:,} records out of ' From 01c65cbea05312cf2e2eb20add8f79fd237c3e24 Mon Sep 17 00:00:00 2001 From: Mohamed Amine Bouzaghrane Date: Sat, 27 Feb 2021 11:53:19 -0600 Subject: [PATCH 2/6] fix closing parenthesis --- urbanaccess/gtfs/network.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/urbanaccess/gtfs/network.py b/urbanaccess/gtfs/network.py index 7253669..aca63e5 100644 --- a/urbanaccess/gtfs/network.py +++ b/urbanaccess/gtfs/network.py @@ -704,7 +704,7 @@ def _time_selector(df, starttime, endtime): # create df of stops times that are within the requested range selected_stop_timesdf = df[( (starttime_sec <= df["departure_time_sec_interpolate"]) & ( - df["departure_time_sec_interpolate"] <= endtime_sec + (pad * 3600))] + df["departure_time_sec_interpolate"] <= endtime_sec + (pad * 3600)))] log( 'Stop times from {} to {} successfully selected {:,} records out of ' From 8f400718577e694bf47588cadb13b00d07e23f10 Mon Sep 17 00:00:00 2001 From: Mohamed Amine Bouzaghrane Date: Sat, 27 Feb 2021 12:15:46 -0600 Subject: [PATCH 3/6] added time aware functionality Added argument in create transit and route_type_to_edge functions. --- urbanaccess/gtfs/network.py | 37 ++++++++++++++++++++++++++++--------- 1 file changed, 28 insertions(+), 9 deletions(-) diff --git a/urbanaccess/gtfs/network.py b/urbanaccess/gtfs/network.py index aca63e5..be0c6a0 100644 --- a/urbanaccess/gtfs/network.py +++ b/urbanaccess/gtfs/network.py @@ -15,6 +15,7 @@ def create_transit_net(gtfsfeeds_dfs, day, timerange, timerange_pad=None, calendar_dates_lookup=None, + time_aware=False, overwrite_existing_stop_times_int=False, use_existing_stop_times_int=False, save_processed_gtfs=False, @@ -178,7 +179,8 @@ def create_transit_net(gtfsfeeds_dfs, day, transit_nodes = _format_transit_net_nodes(df=final_selected_stops) transit_edges = _route_type_to_edge(transit_edge_df=transit_edges, - stop_time_df=gtfsfeeds_dfs.stop_times) + stop_time_df=gtfsfeeds_dfs.stop_times, + time_aware=time_aware) transit_edges = _route_id_to_edge(transit_edge_df=transit_edges, trips_df=gtfsfeeds_dfs.trips) @@ -899,7 +901,7 @@ def _format_transit_net_nodes(df): return final_node_df -def _route_type_to_edge(transit_edge_df, stop_time_df): +def _route_type_to_edge(transit_edge_df, stop_time_df, time_aware=False): """ Append route type information to transit edge table @@ -922,18 +924,35 @@ def _route_type_to_edge(transit_edge_df, stop_time_df): stop_time_df['trip_id'].str.cat( stop_time_df['unique_agency_id'].astype('str'), sep='_')) - # join route_id to the edge table - merged_df = pd.merge(transit_edge_df, + if time_aware: + # join route_id to the edge table + merged_df = pd.merge(transit_edge_df, + stop_time_df[['unique_trip_id', 'route_type', + 'arrival_time','departure_time']], + how='left', on='unique_trip_id', sort=False, + copy=False) + merged_df.drop_duplicates(subset='unique_trip_id', + keep='first', + inplace=True) + # need to get unique records here to have a one to one join - + # this serves as the look up table + # join the look up table created above to the table of interest + transit_edge_df_w_routetype = pd.merge(transit_edge_df, merged_df[ + ['route_type', 'unique_trip_id', + 'arrival_time','departure_time']], how='left', on='unique_trip_id', + sort=False, copy=False) + else: + merged_df = pd.merge(transit_edge_df, stop_time_df[['unique_trip_id', 'route_type']], how='left', on='unique_trip_id', sort=False, copy=False) - merged_df.drop_duplicates(subset='unique_trip_id', + merged_df.drop_duplicates(subset='unique_trip_id', keep='first', inplace=True) - # need to get unique records here to have a one to one join - - # this serves as the look up table - # join the look up table created above to the table of interest - transit_edge_df_w_routetype = pd.merge(transit_edge_df, merged_df[ + # need to get unique records here to have a one to one join - + # this serves as the look up table + # join the look up table created above to the table of interest + transit_edge_df_w_routetype = pd.merge(transit_edge_df, merged_df[ ['route_type', 'unique_trip_id']], how='left', on='unique_trip_id', sort=False, copy=False) From d6c7edaa9748d0ffc99c7d5df33621f1b136f3c3 Mon Sep 17 00:00:00 2001 From: Mohamed Amine Bouzaghrane Date: Mon, 1 Mar 2021 15:03:08 -0500 Subject: [PATCH 4/6] fix timerange_pad argument + adding documentation --- urbanaccess/gtfs/network.py | 18 +++++++++++++++--- 1 file changed, 15 insertions(+), 3 deletions(-) diff --git a/urbanaccess/gtfs/network.py b/urbanaccess/gtfs/network.py index be0c6a0..4a8725a 100644 --- a/urbanaccess/gtfs/network.py +++ b/urbanaccess/gtfs/network.py @@ -43,6 +43,9 @@ def create_transit_net(gtfsfeeds_dfs, day, to represent a relevant travel time period such as a 3 hour window for the AM Peak period. Must follow format of a 24 hour clock for example: 08:00:00 or 17:00:00 + timerange_pad: int + integer indicating the number of hours to pad + after the end of the the time interval calendar_dates_lookup : dict, optional dictionary of the lookup column (key) as a string and corresponding string (value) as string or list of strings to use to subset trips @@ -51,6 +54,9 @@ def create_transit_net(gtfsfeeds_dfs, day, that are not in the calendar DataFrame. Note search will select all records that meet each key value pair criteria. Example: {'schedule_type' : 'WD'} or {'schedule_type' : ['WD', 'SU']} + time_aware: boolean + Boolean to indicate whether the transit network should include + time information. overwrite_existing_stop_times_int : bool, optional if true, and if there is an existing stop_times_int DataFrame stored in the gtfsfeeds_dfs object it will be @@ -157,7 +163,8 @@ def create_transit_net(gtfsfeeds_dfs, day, selected_interpolated_stop_times_df = _time_selector( df=gtfsfeeds_dfs.stop_times_int, starttime=timerange[0], - endtime=timerange[1]) + endtime=timerange[1], + timerange_pad=timerange_pad) final_edge_table = _format_transit_net_edge( stop_times_df=selected_interpolated_stop_times_df[['unique_trip_id', @@ -180,7 +187,7 @@ def create_transit_net(gtfsfeeds_dfs, day, transit_edges = _route_type_to_edge(transit_edge_df=transit_edges, stop_time_df=gtfsfeeds_dfs.stop_times, - time_aware=time_aware) + time_aware=time_aware) transit_edges = _route_id_to_edge(transit_edge_df=transit_edges, trips_df=gtfsfeeds_dfs.trips) @@ -664,7 +671,7 @@ def _time_difference(stop_times_df): return stop_times_df -def _time_selector(df, starttime, endtime): +def _time_selector(df, starttime, endtime, timerange_pad=None): """ Select stop times that fall within a specified time range @@ -676,6 +683,9 @@ def _time_selector(df, starttime, endtime): 24 hour clock formatted time 1 endtime : str 24 hour clock formatted time 2 + timerange_pad: int + integer indicating the number of hours to pad + after the end of the the time interval Returns ------- selected_stop_timesdf : pandas.DataFrame @@ -911,6 +921,8 @@ def _route_type_to_edge(transit_edge_df, stop_time_df, time_aware=False): transit edge dataframe stop_time_df : pandas.DataFrame stop time dataframe + time_aware: boolean + whether the transit network should include time Returns ------- From 7824a5f2fe47e04322cc56054b8298d78d0b8917 Mon Sep 17 00:00:00 2001 From: Bouzaghrane Date: Mon, 1 Mar 2021 18:40:34 -0500 Subject: [PATCH 5/6] fix merging when time dependent networks are needed. --- urbanaccess/gtfs/network.py | 54 +++++++++++++++++-------------------- 1 file changed, 24 insertions(+), 30 deletions(-) diff --git a/urbanaccess/gtfs/network.py b/urbanaccess/gtfs/network.py index 4a8725a..34da922 100644 --- a/urbanaccess/gtfs/network.py +++ b/urbanaccess/gtfs/network.py @@ -710,7 +710,7 @@ def _time_selector(df, starttime, endtime, timerange_pad=None): end_m = int(str(endtime[3:5])) end_s = int(str(endtime[6:8])) endtime_sec = (end_h * 60 * 60) + (end_m * 60) + end_s - + # define variable for including stops active after end of timerange pad = int(0 if timerange_pad is None else timerange_pad) # create df of stops times that are within the requested range @@ -937,36 +937,30 @@ def _route_type_to_edge(transit_edge_df, stop_time_df, time_aware=False): stop_time_df['unique_agency_id'].astype('str'), sep='_')) if time_aware: - # join route_id to the edge table - merged_df = pd.merge(transit_edge_df, - stop_time_df[['unique_trip_id', 'route_type', - 'arrival_time','departure_time']], - how='left', on='unique_trip_id', sort=False, - copy=False) - merged_df.drop_duplicates(subset='unique_trip_id', - keep='first', - inplace=True) - # need to get unique records here to have a one to one join - - # this serves as the look up table - # join the look up table created above to the table of interest - transit_edge_df_w_routetype = pd.merge(transit_edge_df, merged_df[ - ['route_type', 'unique_trip_id', - 'arrival_time','departure_time']], how='left', on='unique_trip_id', - sort=False, copy=False) + # join route_id to the edge table + merged_df = pd.merge(transit_edge_df, + stop_time_df[['unique_trip_id', 'route_type', 'stop_sequence', + 'arrival_time', 'departure_time']], + how='left', + left_on=['unique_trip_id', 'sequence'], + right_on=['unique_trip_id', 'stop_sequence'] sort=False, + copy=False) + else: - merged_df = pd.merge(transit_edge_df, - stop_time_df[['unique_trip_id', 'route_type']], - how='left', on='unique_trip_id', sort=False, - copy=False) - merged_df.drop_duplicates(subset='unique_trip_id', - keep='first', - inplace=True) - # need to get unique records here to have a one to one join - - # this serves as the look up table - # join the look up table created above to the table of interest - transit_edge_df_w_routetype = pd.merge(transit_edge_df, merged_df[ - ['route_type', 'unique_trip_id']], how='left', on='unique_trip_id', - sort=False, copy=False) + merged_df = pd.merge(transit_edge_df, + stop_time_df[['unique_trip_id', 'route_type']], + how='left', on='unique_trip_id', sort=False, + copy=False) + merged_df.drop_duplicates(subset='unique_trip_id', + keep='first', + inplace=True) + # need to get unique records here to have a one to one join - + # this serves as the look up table + # join the look up table created above to the table of interest + transit_edge_df_w_routetype = pd.merge(transit_edge_df, + merged_df[['route_type', 'unique_trip_id']], + how='left', on='unique_trip_id', + sort=False, copy=False) log( 'route type successfully joined to transit edges. Took {:,' From 7131306322ad0a121ff2a842726dbdda12d05b9e Mon Sep 17 00:00:00 2001 From: Bouzaghrane Date: Mon, 1 Mar 2021 20:18:32 -0500 Subject: [PATCH 6/6] fix missing comma. --- urbanaccess/gtfs/network.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/urbanaccess/gtfs/network.py b/urbanaccess/gtfs/network.py index 34da922..1ebba6b 100644 --- a/urbanaccess/gtfs/network.py +++ b/urbanaccess/gtfs/network.py @@ -943,7 +943,7 @@ def _route_type_to_edge(transit_edge_df, stop_time_df, time_aware=False): 'arrival_time', 'departure_time']], how='left', left_on=['unique_trip_id', 'sequence'], - right_on=['unique_trip_id', 'stop_sequence'] sort=False, + right_on=['unique_trip_id', 'stop_sequence'], sort=False, copy=False) else: