From f252006a94ab14acd65aab303be9a6274c7b981d Mon Sep 17 00:00:00 2001 From: David Hensle Date: Fri, 15 Dec 2023 16:35:37 -0800 Subject: [PATCH 1/2] CDAP estimation with joint component --- activitysim/abm/models/cdap.py | 10 ++ activitysim/estimation/larch/cdap.py | 141 ++++++++++++++++++++++++--- 2 files changed, 136 insertions(+), 15 deletions(-) diff --git a/activitysim/abm/models/cdap.py b/activitysim/abm/models/cdap.py index e10763730..7eb082e4d 100644 --- a/activitysim/abm/models/cdap.py +++ b/activitysim/abm/models/cdap.py @@ -181,6 +181,11 @@ def cdap_simulate( for hhsize in range(2, cdap.MAX_HHSIZE + 1): spec = cdap.get_cached_spec(state, hhsize) estimator.write_table(spec, "spec_%s" % hhsize, append=False) + if add_joint_tour_utility: + joint_spec = cdap.get_cached_joint_spec(hhsize) + estimator.write_table( + joint_spec, "joint_spec_%s" % hhsize, append=False + ) logger.info("Running cdap_simulate with %d persons", len(persons_merged.index)) @@ -215,6 +220,11 @@ def cdap_simulate( if estimator: estimator.write_choices(choices) choices = estimator.get_survey_values(choices, "persons", "cdap_activity") + if add_joint_tour_utility: + hh_joint.index.name = "household_id" + hh_joint = estimator.get_survey_values( + hh_joint, "households", "has_joint_tour" + ) estimator.write_override_choices(choices) estimator.end_estimation() diff --git a/activitysim/estimation/larch/cdap.py b/activitysim/estimation/larch/cdap.py index 761cce05b..58cff46d7 100644 --- a/activitysim/estimation/larch/cdap.py +++ b/activitysim/estimation/larch/cdap.py @@ -21,8 +21,10 @@ _logger = logging.getLogger(logger_name) +MAX_HHSIZE = 5 -def generate_alternatives(n_persons): + +def generate_alternatives(n_persons, add_joint=False): """ Generate a dictionary of CDAP alternatives. @@ -41,8 +43,14 @@ def generate_alternatives(n_persons): alt_names = list( "".join(i) for i in itertools.product(basic_patterns, repeat=n_persons) ) + if add_joint: + pattern = r"[MN]" + joint_alts = [ + alt + "J" for alt in alt_names if len(re.findall(pattern, alt)) >= 2 + ] + alt_names = alt_names + joint_alts alt_codes = np.arange(1, len(alt_names) + 1) - return Dict(zip(alt_names, alt_codes)) + return dict(zip(alt_names, alt_codes)) def apply_replacements(expression, prefix, tokens): @@ -69,7 +77,9 @@ def apply_replacements(expression, prefix, tokens): return expression -def cdap_base_utility_by_person(model, n_persons, spec, alts=None, value_tokens=()): +def cdap_base_utility_by_person( + model, n_persons, spec, alts=None, value_tokens=(), add_joint=False +): """ Build the base utility by person for each pattern. @@ -102,7 +112,7 @@ def cdap_base_utility_by_person(model, n_persons, spec, alts=None, value_tokens= model.utility_co[3] += X(spec.Expression[i]) * P(spec.loc[i, "H"]) else: if alts is None: - alts = generate_alternatives(n_persons) + alts = generate_alternatives(n_persons, add_joint) person_numbers = range(1, n_persons + 1) for pnum in person_numbers: for i in spec.index: @@ -222,13 +232,71 @@ def cdap_interaction_utility(model, n_persons, alts, interaction_coef, coefficie model.utility_co[anum] += linear_component -def cdap_split_data(households, values): +def cdap_joint_tour_utility(model, n_persons, alts, joint_coef, values): + """ + FIXME: Not fully implemented!!!! + + Code is adapted from the cdap model in ActivitySim with the joint tour component + Structure is pretty much in place, but dependencies need to be filtered out. + """ + + for row in joint_coef.itertuples(): + for aname, anum in alts.items(): + # only adding joint tour utility to alternatives with joint tours + if "J" not in aname: + continue + expression = row.Expression + dependency_name = row.dependency + coefficient = row.coefficient + + # dealing with dependencies + if dependency_name in ["M_px", "N_px", "H_px"]: + if "_pxprod" in expression: + prod_conds = row.Expression.split("|") + expanded_expressions = [ + tup + for tup in itertools.product( + range(len(prod_conds)), repeat=n_persons + ) + ] + for expression_tup in expanded_expressions: + expression_list = [] + dependency_list = [] + for counter in range(len(expression_tup)): + expression_list.append( + prod_conds[expression_tup[counter]].replace( + "xprod", str(counter + 1) + ) + ) + if expression_tup[counter] == 0: + dependency_list.append( + dependency_name.replace("x", str(counter + 1)) + ) + + expression_value = "&".join(expression_list) + # FIXME only apply to alternative if dependency satisfied + bug + model.utility_co[anum] += X(expression_value) * P(coefficient) + + elif "_px" in expression: + for pnum in range(1, n_persons + 1): + dependency_name = row.dependency.replace("x", str(pnum)) + expression = row.Expression.replace("x", str(pnum)) + # FIXME only apply to alternative if dependency satisfied + bug + model.utility_co[anum] += X(expression) * P(coefficient) + + else: + model.utility_co[anum] += X(expression) * P(coefficient) + + +def cdap_split_data(households, values, add_joint): if "cdap_rank" not in values: raise ValueError("assign cdap_rank to values first") # only process the first 5 household members - values = values[values.cdap_rank <= 5] + values = values[values.cdap_rank <= MAX_HHSIZE] cdap_data = {} - for hhsize, hhs_part in households.groupby(households.hhsize.clip(1, 5)): + for hhsize, hhs_part in households.groupby(households.hhsize.clip(1, MAX_HHSIZE)): if hhsize == 1: v = pd.merge(values, hhs_part.household_id, on="household_id").set_index( "household_id" @@ -241,16 +309,31 @@ def cdap_split_data(households, values): ) v.columns = [f"p{i[1]}_{i[0]}" for i in v.columns] for agglom in ["override_choice", "model_choice"]: - v[agglom] = v[[f"p{p}_{agglom}" for p in range(1, hhsize + 1)]].sum(1) + v[agglom] = ( + v[[f"p{p}_{agglom}" for p in range(1, hhsize + 1)]] + .fillna("H") + .sum(1) + ) + if add_joint: + joint_tour_indicator = ( + hhs_part.set_index("household_id") + .reindex(v.index) + .has_joint_tour + ) + pd.testing.assert_index_equal(v.index, joint_tour_indicator.index) + v[agglom] = np.where( + joint_tour_indicator == 1, v[agglom] + "J", v[agglom] + ) cdap_data[hhsize] = v + return cdap_data -def cdap_dataframes(households, values): - data = cdap_split_data(households, values) +def cdap_dataframes(households, values, add_joint): + data = cdap_split_data(households, values, add_joint) dfs = {} for hhsize in data.keys(): - alts = generate_alternatives(hhsize) + alts = generate_alternatives(hhsize, add_joint) dfs[hhsize] = DataFrames( co=data[hhsize], alt_names=alts.keys(), @@ -298,6 +381,7 @@ def cdap_data( spec1_file="{name}_INDIV_AND_HHSIZE1_SPEC.csv", settings_file="{name}_model_settings.yaml", chooser_data_file="{name}_values_combined.csv", + joint_coeffs_file="{name}_joint_tour_coefficients.csv", ): edb_directory = edb_directory.format(name=name) if not os.path.exists(edb_directory): @@ -343,9 +427,28 @@ def read_yaml(filename, **kwargs): comment="#", ) + try: + joint_coef = read_csv( + joint_coeffs_file, + # dtype={"interaction_ptypes": str}, + # keep_default_na=False, + comment="#", + ) + add_joint = True + except FileNotFoundError: + joint_coef = None + add_joint = False + print("Including joint tour utiltiy?:", add_joint) + spec1 = read_csv(spec1_file, comment="#") values = read_csv(chooser_data_file, comment="#") - values["cdap_rank"] = person_rank + person_rank = cdap.assign_cdap_rank( + persons[persons.household_id.isin(values.household_id)] + .set_index("person_id") + .reindex(values.person_id), + person_type_map, + ) + values["cdap_rank"] = person_rank.values return Dict( edb_directory=Path(edb_directory), @@ -355,6 +458,8 @@ def read_yaml(filename, **kwargs): coefficients=coefficients, households=hhs, settings=settings, + joint_coef=joint_coef, + add_joint=add_joint, ) @@ -367,6 +472,7 @@ def cdap_model( spec1_file="{name}_INDIV_AND_HHSIZE1_SPEC.csv", settings_file="{name}_model_settings.yaml", chooser_data_file="{name}_values_combined.csv", + joint_coeffs_file="{name}_joint_tour_coefficients.csv", return_data=False, ): d = cdap_data( @@ -379,6 +485,7 @@ def cdap_model( spec1_file=spec1_file, settings_file=settings_file, chooser_data_file=chooser_data_file, + joint_coeffs_file=joint_coeffs_file, ) households = d.households @@ -386,8 +493,9 @@ def cdap_model( spec1 = d.spec1 interaction_coef = d.interaction_coef coefficients = d.coefficients + add_joint = d.add_joint - cdap_dfs = cdap_dataframes(households, values) + cdap_dfs = cdap_dataframes(households, values, add_joint) m = {} _logger.info(f"building for model 1") m[1] = Model(dataservice=cdap_dfs[1]) @@ -400,12 +508,15 @@ def cdap_model( interaction_coef["cardinality"] = interaction_coef[ "interaction_ptypes" ].str.len() - for s in [2, 3, 4, 5]: + for s in range(2, MAX_HHSIZE + 1): + # for s in [2, 3, 4, 5]: _logger.info(f"building for model {s}") m[s] = Model(dataservice=cdap_dfs[s]) - alts = generate_alternatives(s) + alts = generate_alternatives(s, add_joint) cdap_base_utility_by_person(m[s], s, spec1, alts, values.columns) cdap_interaction_utility(m[s], s, alts, interaction_coef, coefficients) + # if add_joint: + # cdap_joint_tour_utility(m[s], s, alts, d.joint_coef, values) m[s].choice_any = True m[s].availability_any = True From d05f77c76602d41c28041776865f12cbb536a6c4 Mon Sep 17 00:00:00 2001 From: David Hensle Date: Mon, 11 Mar 2024 15:00:28 -0700 Subject: [PATCH 2/2] fixing person_rank call --- activitysim/estimation/larch/cdap.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/activitysim/estimation/larch/cdap.py b/activitysim/estimation/larch/cdap.py index 58cff46d7..0f0f41218 100644 --- a/activitysim/estimation/larch/cdap.py +++ b/activitysim/estimation/larch/cdap.py @@ -412,8 +412,6 @@ def read_yaml(filename, **kwargs): if person_type_map is None: raise KeyError("PERSON_TYPE_MAP missing from cdap_settings.yaml") - person_rank = cdap.assign_cdap_rank(None, persons, person_type_map) - coefficients = read_csv( coefficients_file, index_col="coefficient_name", @@ -443,6 +441,7 @@ def read_yaml(filename, **kwargs): spec1 = read_csv(spec1_file, comment="#") values = read_csv(chooser_data_file, comment="#") person_rank = cdap.assign_cdap_rank( + None, persons[persons.household_id.isin(values.household_id)] .set_index("person_id") .reindex(values.person_id),