Skip to content

Commit

Permalink
Merge pull request #87 from UDST/feature/time-aware-transit-networks
Browse files Browse the repository at this point in the history
Feature/time aware transit networks
  • Loading branch information
sablanchard authored May 19, 2021
2 parents 531c394 + 35a0d0b commit a1976dc
Show file tree
Hide file tree
Showing 4 changed files with 440 additions and 33 deletions.
2 changes: 0 additions & 2 deletions .travis.yml
Original file line number Diff line number Diff line change
@@ -1,8 +1,6 @@
language: python

python:
- '2.7'
- '3.5'
- '3.6'
- '3.7'
- '3.8'
Expand Down
1 change: 1 addition & 0 deletions requirements-dev.txt
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ pycodestyle

# testing demo notebook
jupyter
fiona <= 1.8.18 # fixes issue with travis
cartopy # requires conda
pyepsg

Expand Down
141 changes: 116 additions & 25 deletions urbanaccess/gtfs/network.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
import os
import pandas as pd
import time
from datetime import datetime, timedelta
import logging as lg

from urbanaccess.utils import log, df_to_hdf5, hdf5_to_df
Expand All @@ -23,7 +24,9 @@ def create_transit_net(
use_existing_stop_times_int=False,
save_processed_gtfs=False,
save_dir=config.settings.data_folder,
save_filename=None):
save_filename=None,
timerange_pad=None,
time_aware=False):
"""
Create a travel time weight network graph in units of
minutes from GTFS data
Expand Down Expand Up @@ -69,6 +72,17 @@ def create_transit_net(
directory to save the HDF5 file
save_filename : str, optional
name to save the HDF5 file as
timerange_pad: str, optional
string indicating the number of hours minutes seconds to pad after the
end of the time interval specified in 'timerange'. Must follow format
of a 24 hour clock for example: '02:00:00' for a two hour pad or
'02:30:00' for a 2 hour and 30 minute pad.
time_aware: bool, optional
boolean to indicate whether the transit network should include
time information. If True, 'arrival_time' and 'departure_time' columns
from the stop_times table will be included in the transit edge table
where 'departure_time' is the departure time at node_id_from stop and
'arrival_time' is the arrival time at node_id_to stop
Returns
-------
Expand Down Expand Up @@ -97,6 +111,10 @@ def create_transit_net(
raise ValueError('use_existing_stop_times_int must be bool.')
if not isinstance(save_processed_gtfs, bool):
raise ValueError('save_processed_gtfs must be bool.')
if timerange_pad and not isinstance(timerange_pad, str):
raise ValueError('timerange_pad must be string.')
if not isinstance(time_aware, bool):
raise ValueError('time_aware must be bool.')
if overwrite_existing_stop_times_int and use_existing_stop_times_int:
raise ValueError('overwrite_existing_stop_times_int and '
'use_existing_stop_times_int cannot both be True.')
Expand Down Expand Up @@ -144,12 +162,12 @@ def create_transit_net(
selected_interpolated_stop_times_df = _time_selector(
df=gtfsfeeds_dfs.stop_times_int,
starttime=timerange[0],
endtime=timerange[1])
endtime=timerange[1],
timerange_pad=timerange_pad)

final_edge_table = _format_transit_net_edge(
stop_times_df=selected_interpolated_stop_times_df[
['unique_trip_id', 'stop_id', 'unique_stop_id', 'timediff',
'stop_sequence', 'unique_agency_id', 'trip_id']])
stop_times_df=selected_interpolated_stop_times_df,
time_aware=time_aware)

transit_edges = _convert_imp_time_units(
df=final_edge_table, time_col='weight', convert_to='minutes')
Expand Down Expand Up @@ -658,7 +676,7 @@ def _time_difference(stop_times_df):
return stop_times_df


def _time_selector(df, starttime, endtime):
def _time_selector(df, starttime, endtime, timerange_pad=None):
"""
Select stop times that fall within a specified time range
Expand All @@ -669,7 +687,12 @@ def _time_selector(df, starttime, endtime):
starttime : str
24 hour clock formatted time 1
endtime : str
24 hour clock formatted time 2
24 hour clock formatted time 2,
timerange_pad: str, optional
string indicating the number of hours minutes seconds to pad after the
end of the time interval specified in 'timerange'. Must follow format
of a 24 hour clock for example: '02:00:00' for a two hour pad or
'02:30:00' for a 2 hour and 30 minute pad.
Returns
-------
selected_stop_timesdf : pandas.DataFrame
Expand All @@ -695,24 +718,54 @@ def _time_selector(df, starttime, endtime):
end_s = int(str(endtime[6:8]))
endtime_sec = (end_h * 60 * 60) + (end_m * 60) + end_s

# define timepad in seconds to include stops active after specified endtime
if timerange_pad:
# convert timerange_pad 24 hour to seconds
pad_h = int(str(timerange_pad[0:2]))
pad_m = int(str(timerange_pad[3:5]))
pad_s = int(str(timerange_pad[6:8]))
pad_sec = (pad_h * 60 * 60) + (pad_m * 60) + pad_s

# add endtime and timerange_pad to get new endtime and convert to
# str for informative print
dt1 = datetime.strptime(endtime, '%H:%M:%S')
dt2 = datetime.strptime(timerange_pad, '%H:%M:%S')
dt2_delta = timedelta(hours=dt2.hour, minutes=dt2.minute,
seconds=dt2.second)
dt3 = dt1 + dt2_delta
str_t3 = datetime.strftime(dt3, '%H:%M:%S')
log(' Additional stop times active between the specified end time: '
'{} with timerange_pad of: {} (padded end time: {}) '
'will be selected...'.format(endtime, timerange_pad, str_t3))
pad = int(0 if timerange_pad is None else pad_sec)

# create df of stops times that are within the requested range
selected_stop_timesdf = df[(
(starttime_sec < df["departure_time_sec_interpolate"]) & (
df["departure_time_sec_interpolate"] < endtime_sec))]
(starttime_sec <= df["departure_time_sec_interpolate"]) & (
df["departure_time_sec_interpolate"] <= endtime_sec + pad))]

subset_df_count = len(selected_stop_timesdf)
df_count = len(df)
log('Stop times from {} to {} successfully selected {:,} records out of '
'{:,} total records ({:.2f} percent of total). '
'Took {:,.2f} seconds.'.format(
starttime, endtime, subset_df_count, df_count,
(subset_df_count / df_count) * 100,
time.time() - start_time))
if timerange_pad:
log('Stop times from {} to {} (with time_pad end time: {}) '
'successfully selected {:,} records out of {:,} total records '
'({:.2f} percent of total). '
'Took {:,.2f} seconds.'.format(
starttime, endtime, str_t3, subset_df_count, df_count,
(subset_df_count / df_count) * 100,
time.time() - start_time))
else:
log('Stop times from {} to {} successfully selected {:,} records '
'out of {:,} total records ({:.2f} percent of total). '
'Took {:,.2f} seconds.'.format(
starttime, endtime, subset_df_count, df_count,
(subset_df_count / df_count) * 100,
time.time() - start_time))

return selected_stop_timesdf


def _format_transit_net_edge(stop_times_df):
def _format_transit_net_edge(stop_times_df, time_aware=False):
"""
Format transit network data table to match the format required for edges
in Pandana graph networks edges
Expand All @@ -722,6 +775,12 @@ def _format_transit_net_edge(stop_times_df):
stop_times_df : pandas.DataFrame
interpolated stop times with travel time between stops for the subset
time and day
time_aware: bool, optional
boolean to indicate whether the transit network should include
time information. If True, 'arrival_time' and 'departure_time' columns
from the stop_times table will be included in the transit edge table
where 'departure_time' is the departure time at node_id_from stop and
'arrival_time' is the arrival time at node_id_to stop
Returns
-------
Expand All @@ -733,22 +792,52 @@ def _format_transit_net_edge(stop_times_df):
log('Starting transformation process for {:,} '
'total trips...'.format(len(stop_times_df['unique_trip_id'].unique())))

# subset to only columns needed for processing
cols_of_interest = ['unique_trip_id', 'stop_id', 'unique_stop_id',
'timediff', 'stop_sequence', 'unique_agency_id',
'trip_id', 'arrival_time', 'departure_time']
stop_times_df = stop_times_df[cols_of_interest]

# set columns for new df for data needed by Pandana for edges
merged_edge = []

stop_times_df.sort_values(by=['unique_trip_id', 'stop_sequence'],
inplace=True)

if time_aware:
log(' time_aware is True, also adding arrival and departure '
'stop times to edges...')

for trip, tmp_trip_df in stop_times_df.groupby(['unique_trip_id']):
edge_df = pd.DataFrame({
"node_id_from": tmp_trip_df['unique_stop_id'].iloc[:-1].values,
"node_id_to": tmp_trip_df['unique_stop_id'].iloc[1:].values,
"weight": tmp_trip_df['timediff'].iloc[1:].values,
"unique_agency_id": tmp_trip_df[
'unique_agency_id'].iloc[1:].values,
# set unique trip ID without edge order to join other data later
"unique_trip_id": trip
})
# if 'time_aware', also create arrival and departure time cols
if time_aware:
edge_df = pd.DataFrame({
"node_id_from": tmp_trip_df['unique_stop_id'].iloc[:-1].values,
"node_id_to": tmp_trip_df['unique_stop_id'].iloc[1:].values,
"weight": tmp_trip_df['timediff'].iloc[1:].values,
"unique_agency_id":
tmp_trip_df['unique_agency_id'].iloc[1:].values,
# set unique trip ID without edge order to join other data
# later
"unique_trip_id": trip,
# departure_time at node_id_from stop
"departure_time":
tmp_trip_df['departure_time'].iloc[:-1].values,
# arrival_time at node_id_to stop
"arrival_time":
tmp_trip_df['arrival_time'].iloc[1:].values
})
else:
edge_df = pd.DataFrame({
"node_id_from": tmp_trip_df['unique_stop_id'].iloc[:-1].values,
"node_id_to": tmp_trip_df['unique_stop_id'].iloc[1:].values,
"weight": tmp_trip_df['timediff'].iloc[1:].values,
"unique_agency_id":
tmp_trip_df['unique_agency_id'].iloc[1:].values,
# set unique trip ID without edge order to join other data
# later
"unique_trip_id": trip
})

# Set current trip ID to edge ID column adding edge order at
# end of string
Expand All @@ -760,6 +849,8 @@ def _format_transit_net_edge(stop_times_df):
merged_edge_df = pd.concat(merged_edge, ignore_index=True)
merged_edge_df['sequence'] = merged_edge_df['sequence'].astype(
int, copy=False)
# create a unique sequential edge ID
# TODO: consider changing col name to 'edge_id' for clarity
merged_edge_df['id'] = (
merged_edge_df['unique_trip_id'].str.cat(
merged_edge_df['sequence'].astype('str'), sep='_'))
Expand Down
Loading

0 comments on commit a1976dc

Please sign in to comment.