Skip to content

Commit f252006

Browse files
committed
CDAP estimation with joint component
1 parent 2f9afa0 commit f252006

File tree

2 files changed

+136
-15
lines changed

2 files changed

+136
-15
lines changed

activitysim/abm/models/cdap.py

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -181,6 +181,11 @@ def cdap_simulate(
181181
for hhsize in range(2, cdap.MAX_HHSIZE + 1):
182182
spec = cdap.get_cached_spec(state, hhsize)
183183
estimator.write_table(spec, "spec_%s" % hhsize, append=False)
184+
if add_joint_tour_utility:
185+
joint_spec = cdap.get_cached_joint_spec(hhsize)
186+
estimator.write_table(
187+
joint_spec, "joint_spec_%s" % hhsize, append=False
188+
)
184189

185190
logger.info("Running cdap_simulate with %d persons", len(persons_merged.index))
186191

@@ -215,6 +220,11 @@ def cdap_simulate(
215220
if estimator:
216221
estimator.write_choices(choices)
217222
choices = estimator.get_survey_values(choices, "persons", "cdap_activity")
223+
if add_joint_tour_utility:
224+
hh_joint.index.name = "household_id"
225+
hh_joint = estimator.get_survey_values(
226+
hh_joint, "households", "has_joint_tour"
227+
)
218228
estimator.write_override_choices(choices)
219229
estimator.end_estimation()
220230

activitysim/estimation/larch/cdap.py

Lines changed: 126 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -21,8 +21,10 @@
2121

2222
_logger = logging.getLogger(logger_name)
2323

24+
MAX_HHSIZE = 5
2425

25-
def generate_alternatives(n_persons):
26+
27+
def generate_alternatives(n_persons, add_joint=False):
2628
"""
2729
Generate a dictionary of CDAP alternatives.
2830
@@ -41,8 +43,14 @@ def generate_alternatives(n_persons):
4143
alt_names = list(
4244
"".join(i) for i in itertools.product(basic_patterns, repeat=n_persons)
4345
)
46+
if add_joint:
47+
pattern = r"[MN]"
48+
joint_alts = [
49+
alt + "J" for alt in alt_names if len(re.findall(pattern, alt)) >= 2
50+
]
51+
alt_names = alt_names + joint_alts
4452
alt_codes = np.arange(1, len(alt_names) + 1)
45-
return Dict(zip(alt_names, alt_codes))
53+
return dict(zip(alt_names, alt_codes))
4654

4755

4856
def apply_replacements(expression, prefix, tokens):
@@ -69,7 +77,9 @@ def apply_replacements(expression, prefix, tokens):
6977
return expression
7078

7179

72-
def cdap_base_utility_by_person(model, n_persons, spec, alts=None, value_tokens=()):
80+
def cdap_base_utility_by_person(
81+
model, n_persons, spec, alts=None, value_tokens=(), add_joint=False
82+
):
7383
"""
7484
Build the base utility by person for each pattern.
7585
@@ -102,7 +112,7 @@ def cdap_base_utility_by_person(model, n_persons, spec, alts=None, value_tokens=
102112
model.utility_co[3] += X(spec.Expression[i]) * P(spec.loc[i, "H"])
103113
else:
104114
if alts is None:
105-
alts = generate_alternatives(n_persons)
115+
alts = generate_alternatives(n_persons, add_joint)
106116
person_numbers = range(1, n_persons + 1)
107117
for pnum in person_numbers:
108118
for i in spec.index:
@@ -222,13 +232,71 @@ def cdap_interaction_utility(model, n_persons, alts, interaction_coef, coefficie
222232
model.utility_co[anum] += linear_component
223233

224234

225-
def cdap_split_data(households, values):
235+
def cdap_joint_tour_utility(model, n_persons, alts, joint_coef, values):
236+
"""
237+
FIXME: Not fully implemented!!!!
238+
239+
Code is adapted from the cdap model in ActivitySim with the joint tour component
240+
Structure is pretty much in place, but dependencies need to be filtered out.
241+
"""
242+
243+
for row in joint_coef.itertuples():
244+
for aname, anum in alts.items():
245+
# only adding joint tour utility to alternatives with joint tours
246+
if "J" not in aname:
247+
continue
248+
expression = row.Expression
249+
dependency_name = row.dependency
250+
coefficient = row.coefficient
251+
252+
# dealing with dependencies
253+
if dependency_name in ["M_px", "N_px", "H_px"]:
254+
if "_pxprod" in expression:
255+
prod_conds = row.Expression.split("|")
256+
expanded_expressions = [
257+
tup
258+
for tup in itertools.product(
259+
range(len(prod_conds)), repeat=n_persons
260+
)
261+
]
262+
for expression_tup in expanded_expressions:
263+
expression_list = []
264+
dependency_list = []
265+
for counter in range(len(expression_tup)):
266+
expression_list.append(
267+
prod_conds[expression_tup[counter]].replace(
268+
"xprod", str(counter + 1)
269+
)
270+
)
271+
if expression_tup[counter] == 0:
272+
dependency_list.append(
273+
dependency_name.replace("x", str(counter + 1))
274+
)
275+
276+
expression_value = "&".join(expression_list)
277+
# FIXME only apply to alternative if dependency satisfied
278+
bug
279+
model.utility_co[anum] += X(expression_value) * P(coefficient)
280+
281+
elif "_px" in expression:
282+
for pnum in range(1, n_persons + 1):
283+
dependency_name = row.dependency.replace("x", str(pnum))
284+
expression = row.Expression.replace("x", str(pnum))
285+
# FIXME only apply to alternative if dependency satisfied
286+
bug
287+
model.utility_co[anum] += X(expression) * P(coefficient)
288+
289+
else:
290+
model.utility_co[anum] += X(expression) * P(coefficient)
291+
292+
293+
def cdap_split_data(households, values, add_joint):
226294
if "cdap_rank" not in values:
227295
raise ValueError("assign cdap_rank to values first")
228296
# only process the first 5 household members
229-
values = values[values.cdap_rank <= 5]
297+
values = values[values.cdap_rank <= MAX_HHSIZE]
230298
cdap_data = {}
231-
for hhsize, hhs_part in households.groupby(households.hhsize.clip(1, 5)):
299+
for hhsize, hhs_part in households.groupby(households.hhsize.clip(1, MAX_HHSIZE)):
232300
if hhsize == 1:
233301
v = pd.merge(values, hhs_part.household_id, on="household_id").set_index(
234302
"household_id"
@@ -241,16 +309,31 @@ def cdap_split_data(households, values):
241309
)
242310
v.columns = [f"p{i[1]}_{i[0]}" for i in v.columns]
243311
for agglom in ["override_choice", "model_choice"]:
244-
v[agglom] = v[[f"p{p}_{agglom}" for p in range(1, hhsize + 1)]].sum(1)
312+
v[agglom] = (
313+
v[[f"p{p}_{agglom}" for p in range(1, hhsize + 1)]]
314+
.fillna("H")
315+
.sum(1)
316+
)
317+
if add_joint:
318+
joint_tour_indicator = (
319+
hhs_part.set_index("household_id")
320+
.reindex(v.index)
321+
.has_joint_tour
322+
)
323+
pd.testing.assert_index_equal(v.index, joint_tour_indicator.index)
324+
v[agglom] = np.where(
325+
joint_tour_indicator == 1, v[agglom] + "J", v[agglom]
326+
)
245327
cdap_data[hhsize] = v
328+
246329
return cdap_data
247330

248331

249-
def cdap_dataframes(households, values):
250-
data = cdap_split_data(households, values)
332+
def cdap_dataframes(households, values, add_joint):
333+
data = cdap_split_data(households, values, add_joint)
251334
dfs = {}
252335
for hhsize in data.keys():
253-
alts = generate_alternatives(hhsize)
336+
alts = generate_alternatives(hhsize, add_joint)
254337
dfs[hhsize] = DataFrames(
255338
co=data[hhsize],
256339
alt_names=alts.keys(),
@@ -298,6 +381,7 @@ def cdap_data(
298381
spec1_file="{name}_INDIV_AND_HHSIZE1_SPEC.csv",
299382
settings_file="{name}_model_settings.yaml",
300383
chooser_data_file="{name}_values_combined.csv",
384+
joint_coeffs_file="{name}_joint_tour_coefficients.csv",
301385
):
302386
edb_directory = edb_directory.format(name=name)
303387
if not os.path.exists(edb_directory):
@@ -343,9 +427,28 @@ def read_yaml(filename, **kwargs):
343427
comment="#",
344428
)
345429

430+
try:
431+
joint_coef = read_csv(
432+
joint_coeffs_file,
433+
# dtype={"interaction_ptypes": str},
434+
# keep_default_na=False,
435+
comment="#",
436+
)
437+
add_joint = True
438+
except FileNotFoundError:
439+
joint_coef = None
440+
add_joint = False
441+
print("Including joint tour utiltiy?:", add_joint)
442+
346443
spec1 = read_csv(spec1_file, comment="#")
347444
values = read_csv(chooser_data_file, comment="#")
348-
values["cdap_rank"] = person_rank
445+
person_rank = cdap.assign_cdap_rank(
446+
persons[persons.household_id.isin(values.household_id)]
447+
.set_index("person_id")
448+
.reindex(values.person_id),
449+
person_type_map,
450+
)
451+
values["cdap_rank"] = person_rank.values
349452

350453
return Dict(
351454
edb_directory=Path(edb_directory),
@@ -355,6 +458,8 @@ def read_yaml(filename, **kwargs):
355458
coefficients=coefficients,
356459
households=hhs,
357460
settings=settings,
461+
joint_coef=joint_coef,
462+
add_joint=add_joint,
358463
)
359464

360465

@@ -367,6 +472,7 @@ def cdap_model(
367472
spec1_file="{name}_INDIV_AND_HHSIZE1_SPEC.csv",
368473
settings_file="{name}_model_settings.yaml",
369474
chooser_data_file="{name}_values_combined.csv",
475+
joint_coeffs_file="{name}_joint_tour_coefficients.csv",
370476
return_data=False,
371477
):
372478
d = cdap_data(
@@ -379,15 +485,17 @@ def cdap_model(
379485
spec1_file=spec1_file,
380486
settings_file=settings_file,
381487
chooser_data_file=chooser_data_file,
488+
joint_coeffs_file=joint_coeffs_file,
382489
)
383490

384491
households = d.households
385492
values = d.person_data
386493
spec1 = d.spec1
387494
interaction_coef = d.interaction_coef
388495
coefficients = d.coefficients
496+
add_joint = d.add_joint
389497

390-
cdap_dfs = cdap_dataframes(households, values)
498+
cdap_dfs = cdap_dataframes(households, values, add_joint)
391499
m = {}
392500
_logger.info(f"building for model 1")
393501
m[1] = Model(dataservice=cdap_dfs[1])
@@ -400,12 +508,15 @@ def cdap_model(
400508
interaction_coef["cardinality"] = interaction_coef[
401509
"interaction_ptypes"
402510
].str.len()
403-
for s in [2, 3, 4, 5]:
511+
for s in range(2, MAX_HHSIZE + 1):
512+
# for s in [2, 3, 4, 5]:
404513
_logger.info(f"building for model {s}")
405514
m[s] = Model(dataservice=cdap_dfs[s])
406-
alts = generate_alternatives(s)
515+
alts = generate_alternatives(s, add_joint)
407516
cdap_base_utility_by_person(m[s], s, spec1, alts, values.columns)
408517
cdap_interaction_utility(m[s], s, alts, interaction_coef, coefficients)
518+
# if add_joint:
519+
# cdap_joint_tour_utility(m[s], s, alts, d.joint_coef, values)
409520
m[s].choice_any = True
410521
m[s].availability_any = True
411522

0 commit comments

Comments
 (0)