-
Notifications
You must be signed in to change notification settings - Fork 27
/
utils.py
400 lines (317 loc) · 14 KB
/
utils.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
from __future__ import print_function
import os
import numpy as np
import orca
import pandas as pd
from urbansim.models import RegressionModel, SegmentedRegressionModel, \
MNLDiscreteChoiceModel, SegmentedMNLDiscreteChoiceModel, \
GrowthRateTransition
from urbansim.developer import sqftproforma, developer
from urbansim.utils import misc
def get_run_filename():
return os.path.join(misc.runs_dir(), "run%d.h5" % misc.get_run_number())
def change_store(store_name):
orca.add_injectable(
"store",
pd.HDFStore(os.path.join(misc.data_dir(), store_name), mode="r"))
def change_scenario(scenario):
assert scenario in orca.get_injectable("scenario_inputs"), \
"Invalid scenario name"
print("Changing scenario to '%s'" % scenario)
orca.add_injectable("scenario", scenario)
def conditional_upzone(scenario, attr_name, upzone_name):
scenario_inputs = orca.get_injectable("scenario_inputs")
zoning_baseline = orca.get_table(
scenario_inputs["baseline"]["zoning_table_name"])
attr = zoning_baseline[attr_name]
if scenario != "baseline":
zoning_scenario = orca.get_table(
scenario_inputs[scenario]["zoning_table_name"])
upzone = zoning_scenario[upzone_name].dropna()
attr = pd.concat([attr, upzone], axis=1).max(skipna=True, axis=1)
return attr
def enable_logging():
from urbansim.utils import logutil
logutil.set_log_level(logutil.logging.INFO)
logutil.log_to_stream()
def deal_with_nas(df):
df_cnt = len(df)
fail = False
df = df.replace([np.inf, -np.inf], np.nan)
for col in df.columns:
s_cnt = df[col].count()
if df_cnt != s_cnt:
fail = True
print("Found %d nas or inf (out of %d) in column %s" %
(df_cnt - s_cnt, df_cnt, col))
assert not fail, "NAs were found in dataframe, please fix"
return df
def fill_nas_from_config(dfname, df):
df_cnt = len(df)
fillna_config = orca.get_injectable("fillna_config")
fillna_config_df = fillna_config[dfname]
for fname in fillna_config_df:
filltyp, dtyp = fillna_config_df[fname]
s_cnt = df[fname].count()
fill_cnt = df_cnt - s_cnt
if filltyp == "zero":
val = 0
elif filltyp == "mode":
val = df[fname].dropna().value_counts().idxmax()
elif filltyp == "median":
val = df[fname].dropna().quantile()
else:
assert 0, "Fill type not found!"
print("Filling column {} with value {} ({} values)"
.format(fname, val, fill_cnt))
df[fname] = df[fname].fillna(val).astype(dtyp)
return df
def to_frame(tables, cfg, additional_columns=[]):
cfg = yaml_to_class(cfg).from_yaml(str_or_buffer=cfg)
tables = [t for t in tables if t is not None]
columns = misc.column_list(tables, cfg.columns_used()) + additional_columns
if len(tables) > 1:
df = orca.merge_tables(target=tables[0].name,
tables=tables, columns=columns)
else:
df = tables[0].to_frame(columns)
df = deal_with_nas(df)
return df
def yaml_to_class(cfg):
import yaml
model_type = yaml.load(open(cfg))["model_type"]
return {
"regression": RegressionModel,
"segmented_regression": SegmentedRegressionModel,
"discretechoice": MNLDiscreteChoiceModel,
"segmented_discretechoice": SegmentedMNLDiscreteChoiceModel
}[model_type]
def hedonic_estimate(cfg, tbl, nodes):
cfg = misc.config(cfg)
df = to_frame([tbl, nodes], cfg)
return yaml_to_class(cfg).fit_from_cfg(df, cfg)
def hedonic_simulate(cfg, tbl, nodes, out_fname):
cfg = misc.config(cfg)
df = to_frame([tbl, nodes], cfg)
price_or_rent, _ = yaml_to_class(cfg).predict_from_cfg(df, cfg)
tbl.update_col_from_series(out_fname, price_or_rent, cast=True)
def lcm_estimate(cfg, choosers, chosen_fname, buildings, nodes):
cfg = misc.config(cfg)
choosers = to_frame([choosers], cfg, additional_columns=[chosen_fname])
alternatives = to_frame([buildings, nodes], cfg)
return yaml_to_class(cfg).fit_from_cfg(choosers,
chosen_fname,
alternatives,
cfg)
def lcm_simulate(cfg, choosers, buildings, nodes, out_fname,
supply_fname, vacant_fname):
"""
Simulate the location choices for the specified choosers
Parameters
----------
cfg : string
The name of the yaml config file from which to read the location
choice model.
choosers : DataFrame
A dataframe of agents doing the choosing.
buildings : DataFrame
A dataframe of buildings which the choosers are locating in and which
have a supply.
nodes : DataFrame
A land use dataset to give neighborhood info around the buildings -
will be joined to the buildings.
out_dfname : string
The name of the dataframe to write the simulated location to.
out_fname : string
The column name to write the simulated location to.
supply_fname : string
The string in the buildings table that indicates the amount of
available units there are for choosers, vacant or not.
vacant_fname : string
The string in the buildings table that indicates the amount of vacant
units there will be for choosers.
"""
cfg = misc.config(cfg)
choosers_df = to_frame([choosers], cfg, additional_columns=[out_fname])
locations_df = to_frame([buildings, nodes], cfg,
[supply_fname, vacant_fname])
available_units = buildings[supply_fname]
vacant_units = buildings[vacant_fname]
print("There are {} total available units\n"
" and {} total choosers\n"
" but there are {} overfull buildings"
.format(available_units.sum(), len(choosers),
len(vacant_units[vacant_units < 0])))
vacant_units = vacant_units[vacant_units > 0]
units = locations_df.loc[np.repeat(vacant_units.index.values,
vacant_units.values.astype('int'))].reset_index()
print(" for a total of {} temporarily empty units\n"
" in {} buildings total in the region"
.format(int(vacant_units.sum()), len(vacant_units)))
movers = choosers_df[choosers_df[out_fname] == -1]
if len(movers) > vacant_units.sum():
print("WARNING: Not enough locations for movers"
" reducing locations to size of movers for performance gain")
movers = movers.head(vacant_units.sum())
new_units, _ = yaml_to_class(cfg).predict_from_cfg(movers, units, cfg)
# new_units returns nans when there aren't enough units,
# get rid of them and they'll stay as -1s
new_units = new_units.dropna()
# go from units back to buildings
new_buildings = pd.Series(units.loc[new_units.values][out_fname].values,
index=new_units.index)
choosers.update_col_from_series(out_fname, new_buildings, cast=True)
_print_number_unplaced(choosers, out_fname)
vacant_units = buildings[vacant_fname]
print(" and there are now {} empty units"
" and {} overfull buildings"
.format(vacant_units.sum(), len(vacant_units[vacant_units < 0])))
def simple_relocation(choosers, relocation_rate, fieldname):
print("Total agents: %d" % len(choosers))
_print_number_unplaced(choosers, fieldname)
print("Assinging for relocation...")
chooser_ids = np.random.choice(choosers.index, size=int(relocation_rate *
len(choosers)), replace=False)
choosers.update_col_from_series(fieldname,
pd.Series(-1, index=chooser_ids),
cast=True)
_print_number_unplaced(choosers, fieldname)
def simple_transition(tbl, rate, location_fname):
transition = GrowthRateTransition(rate)
df = tbl.to_frame(tbl.local_columns)
print("%d agents before transition" % len(df.index))
df, added, copied, removed = transition.transition(df, None)
print("%d agents after transition" % len(df.index))
df.loc[added, location_fname] = -1
orca.add_table(tbl.name, df)
def _print_number_unplaced(df, fieldname):
print("Total currently unplaced: %d"
% df[fieldname].value_counts().get(-1, 0))
def run_feasibility(parcels, parcel_price_callback,
parcel_use_allowed_callback, residential_to_yearly=True):
"""
Execute development feasibility on all parcels
Parameters
----------
parcels : DataFrame Wrapper
The data frame wrapper for the parcel data
parcel_price_callback : function
A callback which takes each use of the pro forma and returns a series
with index as parcel_id and value as yearly_rent
parcel_use_allowed_callback : function
A callback which takes each form of the pro forma and returns a series
with index as parcel_id and value and boolean whether the form
is allowed on the parcel
residential_to_yearly : boolean (default true)
Whether to use the cap rate to convert the residential price from total
sales price per sqft to rent per sqft
Returns
-------
Adds a table called feasibility to the sim object (returns nothing)
"""
pf = sqftproforma.SqFtProForma()
df = parcels.to_frame()
# add prices for each use
for use in pf.config.uses:
df[use] = parcel_price_callback(use)
# convert from cost to yearly rent
if residential_to_yearly:
df["residential"] *= pf.config.cap_rate
print("Describe of the yearly rent by use")
print(df[pf.config.uses].describe())
d = {}
for form in pf.config.forms:
print("Computing feasibility for form %s" % form)
d[form] = pf.lookup(form, df[parcel_use_allowed_callback(form)])
far_predictions = pd.concat(d.values(), keys=d.keys(), axis=1)
orca.add_table("feasibility", far_predictions)
def run_developer(forms, agents, buildings, supply_fname, parcel_size,
ave_unit_size, total_units, feasibility, year=None,
target_vacancy=.1, form_to_btype_callback=None,
add_more_columns_callback=None, max_parcel_size=200000,
residential=True, bldg_sqft_per_job=400.0):
"""
Run the developer model to pick and build buildings
Parameters
----------
forms : string or list of strings
Passed directly dev.pick
agents : DataFrame Wrapper
Used to compute the current demand for units/floorspace in the area
buildings : DataFrame Wrapper
Used to compute the current supply of units/floorspace in the area
supply_fname : string
Identifies the column in buildings which indicates the supply of
units/floorspace
parcel_size : Series
Passed directly to dev.pick
ave_unit_size : Series
Passed directly to dev.pick - average residential unit size
total_units : Series
Passed directly to dev.pick - total current residential_units /
job_spaces
feasibility : DataFrame Wrapper
The output from feasibility above (the table called 'feasibility')
year : int
The year of the simulation - will be assigned to 'year_built' on the
new buildings
target_vacancy : float
The target vacancy rate - used to determine how much to build
form_to_btype_callback : function
Will be used to convert the 'forms' in the pro forma to
'building_type_id' in the larger model
add_more_columns_callback : function
Takes a dataframe and returns a dataframe - is used to make custom
modifications to the new buildings that get added
max_parcel_size : float
Passed directly to dev.pick - max parcel size to consider
residential : boolean
Passed directly to dev.pick - switches between adding/computing
residential_units and job_spaces
bldg_sqft_per_job : float
Passed directly to dev.pick - specified the multiplier between
floor spaces and job spaces for this form (does not vary by parcel
as ave_unit_size does)
Returns
-------
Writes the result back to the buildings table (returns nothing)
"""
dev = developer.Developer(feasibility.to_frame())
target_units = dev.\
compute_units_to_build(len(agents),
buildings[supply_fname].sum(),
target_vacancy)
print("{:,} feasible buildings before running developer"
.format(len(dev.feasibility)))
new_buildings = dev.pick(forms,
target_units,
parcel_size,
ave_unit_size,
total_units,
max_parcel_size=max_parcel_size,
drop_after_build=True,
residential=residential,
bldg_sqft_per_job=bldg_sqft_per_job)
orca.add_table("feasibility", dev.feasibility)
if new_buildings is None:
return
if year is not None:
new_buildings["year_built"] = year
if not isinstance(forms, list):
# form gets set only if forms is a list
new_buildings["form"] = forms
if form_to_btype_callback is not None:
new_buildings["building_type_id"] = new_buildings["form"].\
apply(form_to_btype_callback)
new_buildings["stories"] = new_buildings.stories.apply(np.ceil)
if add_more_columns_callback is not None:
new_buildings = add_more_columns_callback(new_buildings)
print("Adding {:,} buildings with {:,} {}"
.format(len(new_buildings),
int(new_buildings[supply_fname].sum()),
supply_fname))
print("{:,} feasible buildings after running developer"
.format(len(dev.feasibility)))
all_buildings = dev.merge(buildings.to_frame(buildings.local_columns),
new_buildings[buildings.local_columns])
orca.add_table("buildings", all_buildings)