CDAP estimation with joint component

dhensle · dhensle · commit f252006a94ab · 2024-03-07T09:33:58.000-08:00
diff --git a/activitysim/abm/models/cdap.py b/activitysim/abm/models/cdap.py
@@ -181,6 +181,11 @@ def cdap_simulate(
         for hhsize in range(2, cdap.MAX_HHSIZE + 1):
             spec = cdap.get_cached_spec(state, hhsize)
             estimator.write_table(spec, "spec_%s" % hhsize, append=False)
+            if add_joint_tour_utility:
+                joint_spec = cdap.get_cached_joint_spec(hhsize)
+                estimator.write_table(
+                    joint_spec, "joint_spec_%s" % hhsize, append=False
+                )
 
     logger.info("Running cdap_simulate with %d persons", len(persons_merged.index))
 
@@ -215,6 +220,11 @@ def cdap_simulate(
     if estimator:
         estimator.write_choices(choices)
         choices = estimator.get_survey_values(choices, "persons", "cdap_activity")
+        if add_joint_tour_utility:
+            hh_joint.index.name = "household_id"
+            hh_joint = estimator.get_survey_values(
+                hh_joint, "households", "has_joint_tour"
+            )
         estimator.write_override_choices(choices)
         estimator.end_estimation()
 
diff --git a/activitysim/estimation/larch/cdap.py b/activitysim/estimation/larch/cdap.py
@@ -21,8 +21,10 @@
 
 _logger = logging.getLogger(logger_name)
 
+MAX_HHSIZE = 5
 
-def generate_alternatives(n_persons):
+
+def generate_alternatives(n_persons, add_joint=False):
     """
     Generate a dictionary of CDAP alternatives.
 
@@ -41,8 +43,14 @@ def generate_alternatives(n_persons):
     alt_names = list(
         "".join(i) for i in itertools.product(basic_patterns, repeat=n_persons)
     )
+    if add_joint:
+        pattern = r"[MN]"
+        joint_alts = [
+            alt + "J" for alt in alt_names if len(re.findall(pattern, alt)) >= 2
+        ]
+        alt_names = alt_names + joint_alts
     alt_codes = np.arange(1, len(alt_names) + 1)
-    return Dict(zip(alt_names, alt_codes))
+    return dict(zip(alt_names, alt_codes))
 
 
 def apply_replacements(expression, prefix, tokens):
@@ -69,7 +77,9 @@ def apply_replacements(expression, prefix, tokens):
     return expression
 
 
-def cdap_base_utility_by_person(model, n_persons, spec, alts=None, value_tokens=()):
+def cdap_base_utility_by_person(
+    model, n_persons, spec, alts=None, value_tokens=(), add_joint=False
+):
     """
     Build the base utility by person for each pattern.
 
@@ -102,7 +112,7 @@ def cdap_base_utility_by_person(model, n_persons, spec, alts=None, value_tokens=
                 model.utility_co[3] += X(spec.Expression[i]) * P(spec.loc[i, "H"])
     else:
         if alts is None:
-            alts = generate_alternatives(n_persons)
+            alts = generate_alternatives(n_persons, add_joint)
         person_numbers = range(1, n_persons + 1)
         for pnum in person_numbers:
             for i in spec.index:
@@ -222,13 +232,71 @@ def cdap_interaction_utility(model, n_persons, alts, interaction_coef, coefficie
                                 model.utility_co[anum] += linear_component
 
 
-def cdap_split_data(households, values):
+def cdap_joint_tour_utility(model, n_persons, alts, joint_coef, values):
+    """
+    FIXME: Not fully implemented!!!!
+
+    Code is adapted from the cdap model in ActivitySim with the joint tour component
+    Structure is pretty much in place, but dependencies need to be filtered out.
+    """
+
+    for row in joint_coef.itertuples():
+        for aname, anum in alts.items():
+            # only adding joint tour utility to alternatives with joint tours
+            if "J" not in aname:
+                continue
+            expression = row.Expression
+            dependency_name = row.dependency
+            coefficient = row.coefficient
+
+            # dealing with dependencies
+            if dependency_name in ["M_px", "N_px", "H_px"]:
+                if "_pxprod" in expression:
+                    prod_conds = row.Expression.split("|")
+                    expanded_expressions = [
+                        tup
+                        for tup in itertools.product(
+                            range(len(prod_conds)), repeat=n_persons
+                        )
+                    ]
+                    for expression_tup in expanded_expressions:
+                        expression_list = []
+                        dependency_list = []
+                        for counter in range(len(expression_tup)):
+                            expression_list.append(
+                                prod_conds[expression_tup[counter]].replace(
+                                    "xprod", str(counter + 1)
+                                )
+                            )
+                            if expression_tup[counter] == 0:
+                                dependency_list.append(
+                                    dependency_name.replace("x", str(counter + 1))
+                                )
+
+                        expression_value = "&".join(expression_list)
+                        # FIXME only apply to alternative if dependency satisfied
+                        bug
+                        model.utility_co[anum] += X(expression_value) * P(coefficient)
+
+                elif "_px" in expression:
+                    for pnum in range(1, n_persons + 1):
+                        dependency_name = row.dependency.replace("x", str(pnum))
+                        expression = row.Expression.replace("x", str(pnum))
+                        # FIXME only apply to alternative if dependency satisfied
+                        bug
+                        model.utility_co[anum] += X(expression) * P(coefficient)
+
+            else:
+                model.utility_co[anum] += X(expression) * P(coefficient)
+
+
+def cdap_split_data(households, values, add_joint):
     if "cdap_rank" not in values:
         raise ValueError("assign cdap_rank to values first")
     # only process the first 5 household members
-    values = values[values.cdap_rank <= 5]
+    values = values[values.cdap_rank <= MAX_HHSIZE]
     cdap_data = {}
-    for hhsize, hhs_part in households.groupby(households.hhsize.clip(1, 5)):
+    for hhsize, hhs_part in households.groupby(households.hhsize.clip(1, MAX_HHSIZE)):
         if hhsize == 1:
             v = pd.merge(values, hhs_part.household_id, on="household_id").set_index(
                 "household_id"
@@ -241,16 +309,31 @@ def cdap_split_data(households, values):
             )
             v.columns = [f"p{i[1]}_{i[0]}" for i in v.columns]
             for agglom in ["override_choice", "model_choice"]:
-                v[agglom] = v[[f"p{p}_{agglom}" for p in range(1, hhsize + 1)]].sum(1)
+                v[agglom] = (
+                    v[[f"p{p}_{agglom}" for p in range(1, hhsize + 1)]]
+                    .fillna("H")
+                    .sum(1)
+                )
+                if add_joint:
+                    joint_tour_indicator = (
+                        hhs_part.set_index("household_id")
+                        .reindex(v.index)
+                        .has_joint_tour
+                    )
+                    pd.testing.assert_index_equal(v.index, joint_tour_indicator.index)
+                    v[agglom] = np.where(
+                        joint_tour_indicator == 1, v[agglom] + "J", v[agglom]
+                    )
         cdap_data[hhsize] = v
+
     return cdap_data
 
 
-def cdap_dataframes(households, values):
-    data = cdap_split_data(households, values)
+def cdap_dataframes(households, values, add_joint):
+    data = cdap_split_data(households, values, add_joint)
     dfs = {}
     for hhsize in data.keys():
-        alts = generate_alternatives(hhsize)
+        alts = generate_alternatives(hhsize, add_joint)
         dfs[hhsize] = DataFrames(
             co=data[hhsize],
             alt_names=alts.keys(),
@@ -298,6 +381,7 @@ def cdap_data(
     spec1_file="{name}_INDIV_AND_HHSIZE1_SPEC.csv",
     settings_file="{name}_model_settings.yaml",
     chooser_data_file="{name}_values_combined.csv",
+    joint_coeffs_file="{name}_joint_tour_coefficients.csv",
 ):
     edb_directory = edb_directory.format(name=name)
     if not os.path.exists(edb_directory):
@@ -343,9 +427,28 @@ def read_yaml(filename, **kwargs):
         comment="#",
     )
 
+    try:
+        joint_coef = read_csv(
+            joint_coeffs_file,
+            # dtype={"interaction_ptypes": str},
+            # keep_default_na=False,
+            comment="#",
+        )
+        add_joint = True
+    except FileNotFoundError:
+        joint_coef = None
+        add_joint = False
+    print("Including joint tour utiltiy?:", add_joint)
+
     spec1 = read_csv(spec1_file, comment="#")
     values = read_csv(chooser_data_file, comment="#")
-    values["cdap_rank"] = person_rank
+    person_rank = cdap.assign_cdap_rank(
+        persons[persons.household_id.isin(values.household_id)]
+        .set_index("person_id")
+        .reindex(values.person_id),
+        person_type_map,
+    )
+    values["cdap_rank"] = person_rank.values
 
     return Dict(
         edb_directory=Path(edb_directory),
@@ -355,6 +458,8 @@ def read_yaml(filename, **kwargs):
         coefficients=coefficients,
         households=hhs,
         settings=settings,
+        joint_coef=joint_coef,
+        add_joint=add_joint,
     )
 
 
@@ -367,6 +472,7 @@ def cdap_model(
     spec1_file="{name}_INDIV_AND_HHSIZE1_SPEC.csv",
     settings_file="{name}_model_settings.yaml",
     chooser_data_file="{name}_values_combined.csv",
+    joint_coeffs_file="{name}_joint_tour_coefficients.csv",
     return_data=False,
 ):
     d = cdap_data(
@@ -379,15 +485,17 @@ def cdap_model(
         spec1_file=spec1_file,
         settings_file=settings_file,
         chooser_data_file=chooser_data_file,
+        joint_coeffs_file=joint_coeffs_file,
     )
 
     households = d.households
     values = d.person_data
     spec1 = d.spec1
     interaction_coef = d.interaction_coef
     coefficients = d.coefficients
+    add_joint = d.add_joint
 
-    cdap_dfs = cdap_dataframes(households, values)
+    cdap_dfs = cdap_dataframes(households, values, add_joint)
     m = {}
     _logger.info(f"building for model 1")
     m[1] = Model(dataservice=cdap_dfs[1])
@@ -400,12 +508,15 @@ def cdap_model(
         interaction_coef["cardinality"] = interaction_coef[
             "interaction_ptypes"
         ].str.len()
-    for s in [2, 3, 4, 5]:
+    for s in range(2, MAX_HHSIZE + 1):
+        # for s in [2, 3, 4, 5]:
         _logger.info(f"building for model {s}")
         m[s] = Model(dataservice=cdap_dfs[s])
-        alts = generate_alternatives(s)
+        alts = generate_alternatives(s, add_joint)
         cdap_base_utility_by_person(m[s], s, spec1, alts, values.columns)
         cdap_interaction_utility(m[s], s, alts, interaction_coef, coefficients)
+        # if add_joint:
+        #     cdap_joint_tour_utility(m[s], s, alts, d.joint_coef, values)
         m[s].choice_any = True
         m[s].availability_any = True