-
Notifications
You must be signed in to change notification settings - Fork 2
/
Copy pathoptimal_station_placement.py
326 lines (238 loc) · 11.7 KB
/
optimal_station_placement.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
# %% set working directory if needed
import os
os.getcwd()
os.chdir('/Users/MargheritaP/Documents/GitHub/bike-stations')
#os.getcwd()
# %% import packages
import numpy as np
from itertools import combinations
import pandas as pd
import igraph as ig
from queue import PriorityQueue
#from igraph.drawing.colors import GradientPalette#ColorScale, ColorBrewer
#import ColorBrewer
import random
from bike_functions import *
# %% load trips data as potential commuter data
anaheim_trips = "Data/anaheim_trips.tntp"
with open(anaheim_trips, 'r') as f:
data = f.readlines()
table = []
for line in data:
if 'Origin' in line:
origin = line.strip().split()[1]
# Add missing destination for first origin
#table.append([origin, '1', '0'])
else:
for destination in line.strip().split(';')[:-1]:
destination_number, weight = destination.split(':')
table.append([origin, destination_number.strip(), weight.strip()])
# Add missing destination for last origin
#table.append([origin, '37', '0'])
#print(table)
trips = pd.DataFrame(table, columns=['Origin', 'Destination', 'Weight'])
print(trips.shape)
print(trips.head())
# %% read ANAHEIM road data as dataframes
# Zones: 38 __ Nodes: 416 __ Links: 914 __ Trips: 104,694.40
# Time: minutes __ Distance: feet __ Speed: feet per minute
anaheim = "Data/anaheim.tntp"
anaheim = pd.read_csv(anaheim, skiprows=8, sep='\t').drop(["~",";"], axis=1)
#flow data
flow = "Data/anaheim_flow.tntp"
flow = pd.read_csv(flow, sep='\t|\s', engine='python')
anaheim = anaheim.merge(flow[["Volume"]], left_index=True, right_index=True)
print(anaheim.head())
# %% read SIOUX FALLS road data as dataframes
siouxfalls = "Data/SiouxFalls_net.tntp"
siouxfalls = pd.read_csv(siouxfalls, skiprows=8, sep='\t').drop(["~",";"], axis=1)
#flow data
flow = "Data/SiouxFalls_flow.tntp"
flow = pd.read_csv(flow, sep='\t|\s', engine='python')
siouxfalls = siouxfalls.merge(flow[["Volume"]], left_index=True, right_index=True)
print(siouxfalls.head())
#%% ANAHEIM
# %% convert the ANAHEIM dataframe data to a graph.
g_a = ig.Graph.TupleList(anaheim.itertuples(index=False), directed=True, weights=False, edge_attrs=["capacity","length","free_flow_time","b","power","speed","toll","link_type", "Volume"])
# add colour as edge attribute - on the basis of volume
g_a.es['color'] = continuous_to_rgb(np.log(anaheim.Volume +1)).tolist()
# give zone-nodes a different colour and shape to make them visible
zone_indices = list(range(1,39))
colors = ["red" if i in zone_indices else "grey" for i in range(g_a.vcount())]
shapes = ["square" if i in zone_indices else "circle" for i in range(g_a.vcount())]
g_a.vs["color"] = colors
g_a.vs["shape"] = shapes
# add degree as vertex attribute
degree = g_a.degree()
g_a.vs["degree"] = degree
# calculate vertex sizes based on degree
sizes_a = [d * 1.4 for d in degree]
#adjust layout - for full list of options: https://igraph.org/python/tutorial/0.9.6/visualisation.html#graph-layouts
layout = g_a.layout("kamada_kawai") # most suitable: kamada_kawai __ DrL __fruchterman_reingold __ davidson_harel
# get graph information and plot
g_a.summary() # 416 vertices and 914 edges
ig.plot(g_a, vertex_size = sizes_a, layout=layout, edge_arrow_size = 0.3, vertex_frame_width=0.5)
# %% convert the SIOUX FALLS dataframe data to a graph.
g_s = ig.Graph.TupleList(siouxfalls.itertuples(index=False), directed=True, weights=False, edge_attrs=["capacity","length","free_flow_time","b","power","speed","toll","link_type", "Volume"])
# add colour as edge attribute - on the basis of volume
g_s.es['color'] = continuous_to_rgb(np.log(siouxfalls.Volume +1)).tolist()
# give zone-nodes a different colour and shape to make them visible
zone_indices_s = list(range(1,1)) #not applicable for sioux falls
colors = ["red" if i in zone_indices_s else "grey" for i in range(g_s.vcount())]
shapes = ["square" if i in zone_indices_s else "circle" for i in range(g_s.vcount())]
g_s.vs["color"] = colors
g_s.vs["shape"] = shapes
# add degree as vertex attribute
degree_s = g_s.degree()
g_s.vs["degree"] = degree_s
# calculate vertex sizes based on degree
sizes_s = [d *3 for d in degree_s]
#adjust layout - for full list of options: https://igraph.org/python/tutorial/0.9.6/visualisation.html#graph-layouts
layout = g_s.layout("kamada_kawai") # most suitable: kamada_kawai __ DrL __fruchterman_reingold __ davidson_harel
# get graph information and plot
g_s.summary() # 416 vertices and 914 edges
ig.plot(g_s, vertex_size = sizes_s, layout=layout, edge_arrow_size = 0.3, vertex_frame_width=0.5)
# %%
# find all shortest paths between vertices in the subset
shortest_paths = []
for source in zone_indices:
for target in zone_indices:
if source != target:
paths = g_a.get_all_shortest_paths(source, to=target, weights='free_flow_time')
shortest_paths.extend(paths)
#print(shortest_paths) # list of lists
trip_paths = pd.DataFrame({
'path': shortest_paths,
'Origin': [lst[0] for lst in shortest_paths],
'Destination': [lst[-1] for lst in shortest_paths],
'path_length': [len(lst) for lst in shortest_paths]
})
contains_zone = [any(1 <= node <= 38 for node in path[1:-1]) for path in trip_paths['path']]
trip_paths['contains_zone'] = contains_zone
print(trip_paths[0:10])
print(trip_paths.shape)
#count the number of duplicates
num_duplicates = trip_paths.duplicated(subset=['Origin', 'Destination']).sum()
print(f"There are {num_duplicates} duplicates in the DataFrame, as multiple candidates for the shortest path exist between many nodes.")
grouped = trip_paths.groupby(['Origin', 'Destination']).size().reset_index(name='count')
grouped_filtered = grouped[grouped['count'] > 1]
print(grouped_filtered)
## for simplicity, I now simply randomly select one potential shortest path betwen each zone-node
## we could later try to attach likelihoods of any of them being used based on the volume along each of the paths
trip_paths_simple = trip_paths.groupby(['Origin', 'Destination']).apply(pd.DataFrame.sample, n=1).reset_index(drop=True)
print(trip_paths_simple.head())
print(trip_paths_simple.shape) # as expected, we are back to 1406 rows (38*37)
# check that there are no more duplicate
#num_duplicates = trip_paths_simple.duplicated(subset=['Origin', 'Destination']).sum()
#num_duplicates
# %% Generate fake station data (as below, but now for the whole graph) - I wasn't able to run this as it takes too long:
bike_stations = random.sample(g.vs.indices,2)
station_paths = g.get_all_simple_paths(bike_stations[0], bike_stations[1])
station_paths.append(g.get_all_simple_paths(bike_stations[1], bike_stations[0]))
is_bike_edge = paths_to_edges(station_paths, anaheim)
# visualize and plot
g.es['color'] = ['grey' if edge else 'pink' for edge in is_bike_edge]
ig.plot(g, vertex_size = sizes, layout=layout, edge_arrow_size = 0.3, vertex_frame_width=0.5)
# %% optional: generate a subgraph to work with fewer nodes
sg_a = g_a.subgraph(g_a.neighborhood(31, order= 5))
sg_df = sg_a.get_edge_dataframe()
# here's what the subgraph looks like. The edges are colored by volume of traffic.
ig.plot(sg_a, edge_arrow_size = 0.6, sizes = sizes_a)
# %% generate some fake commuter data:
paths = []
time = []
volume = []
for v in sg_a.vs.indices:
path = random.sample(filter_subpaths(sg_a.get_all_shortest_paths(v)), 1)[0]
if len(path) >1:
paths.append(path)
time.append(calculate_travel_time(path, sg_df))
volume.append(get_total_volume(path, sg_df))
riders = pd.DataFrame({"path":paths, "travel_time":time, "volume": volume})
print(riders.shape)
print(riders.head())
# %% alternative: instead of fake commuter data: real commuter data
real_riders = pd.read_csv("real commutes.csv")
real_riders_merged = pd.merge(real_riders, trip_paths_simple, on=['Origin', 'Destination'])
# g_df = g.get_edge_dataframe()
real_riders_merged["travel_time"]= real_riders_merged['path'].apply(calculate_travel_time, args=(sg_df,))
# because we are using the subgraph, there will be travel times == 0 (not in the subplot)
# we filter them out. when using the whole graph, the data is fully computed
riders = real_riders_merged[real_riders_merged["travel_time"]!=0]
# %% Generate fake station data:
bike_stations = random.sample(sg.vs.indices,2)
station_paths = sg.get_all_simple_paths(bike_stations[0], bike_stations[1])
station_paths.append(sg.get_all_simple_paths(bike_stations[1], bike_stations[0]))
is_bike_edge = paths_to_edges(station_paths, sg_df)
# visualize and plot
sg.es['color'] = ['red' if edge else 'black' for edge in is_bike_edge]
ig.plot(sg)
# %% calculate new travel time based on this
travel_time_bike = []
for i, path in enumerate(riders.path):
if any_subpath(station_paths, path):
travel_time_bike.append(calculate_travel_time_bike(path, sg_df, is_bike_edge))
else:
travel_time_bike.append(riders.iloc[i]['travel_time'])
riders['travel_time_bike'] = pd.Series(travel_time_bike)
print('travel time without stations: ', riders.travel_time.sum())
print('travel time with two randomly placed stations: ', riders.travel_time_bike.sum())
# %% Ok, we did a small scale, now let's see the time reduction when we iterate over all possible stations
stations = filter_stations(list(combinations(sg.vs.indices,2)))
station_time = []
for station in stations:
station_paths = sg.get_all_simple_paths(station[0], station[1])
station_paths.append(sg.get_all_simple_paths(station[1], station[0]))
is_bike_edge = paths_to_edges(station_paths, sg_df)
travel_time_bike = []
for i, path in enumerate(riders.path):
if any_subpath(station_paths, path):
travel_time_bike.append(calculate_travel_time_bike(path, sg_df, is_bike_edge))
else:
travel_time_bike.append(riders.iloc[i]['travel_time'])
riders['travel_time_bike'] = pd.Series(travel_time_bike)
station_time.append(riders.travel_time_bike.sum())
# report of stations and their times:
station_df = pd.DataFrame({"station":stations, "station_time":station_time})
# this is a visualization of travel time iterating over all possible stations.
# we see most bike station placements result in little time reduction, while there are a few outliers
# that reduce time a lot
station_df.station_time.hist()
# top 10 station placements and their times
station_df.sort_values('station_time').head(10)
# %% visualization of best station
station = station_df.sort_values('station_time').head(1).station.values[0]
station_paths = sg.get_all_simple_paths(station[0], station[1])
station_paths.append(sg.get_all_simple_paths(station[1], station[0]))
is_bike_edge = paths_to_edges(station_paths, sg_df)
# visualize and plot
sg.es['color'] = ['red' if edge else 'black' for edge in is_bike_edge]
sg.vs['color'] = ['red' if node in station else 'black' for node in sg.vs.indices]
print('best bike station placement:')
ig.plot(sg)
# %%
############################################
# Generating rider data that reflects the volume of each edge
# paths = []
# time = []
# volume = []
# # %%
# riders
# # %%
# def has_nan(lst):
# return np.isnan(lst).any()
# for v in sg.vs.indices:
# d_paths=dict()
# possible_paths=sg.get_all_shortest_paths(v)
# #figure out which paths are the shortest between two pairs of nodes
# for i,path in enumerate(possible_paths):
# d_paths[i]=round(get_total_volume(path, sg_df),2)
# d_paths={k:v/sum(list(d_paths.values())) for k,v in d_paths.items()}
# if has_nan(list(d_paths.values())) ==False:
# sampled_indices = np.random.choice(list(d_paths.keys()), size=3, p=list(d_paths.values()))
# sampled_paths=[possible_paths[i] for i in sampled_indices]
# # break
# paths.extend(sampled_paths)
# # %%
# riders = pd.DataFrame({"path":paths, "travel_time":time, "volume": volume})
# # %%