Skip to content

Commit

Permalink
BayDAG Contribution #3: CDAP estimation with joint component (#769)
Browse files Browse the repository at this point in the history
* CDAP estimation with joint component

* fixing person_rank call
  • Loading branch information
dhensle authored Apr 1, 2024
1 parent f15486c commit ade9aa4
Show file tree
Hide file tree
Showing 2 changed files with 137 additions and 17 deletions.
10 changes: 10 additions & 0 deletions activitysim/abm/models/cdap.py
Original file line number Diff line number Diff line change
Expand Up @@ -181,6 +181,11 @@ def cdap_simulate(
for hhsize in range(2, cdap.MAX_HHSIZE + 1):
spec = cdap.get_cached_spec(state, hhsize)
estimator.write_table(spec, "spec_%s" % hhsize, append=False)
if add_joint_tour_utility:
joint_spec = cdap.get_cached_joint_spec(hhsize)
estimator.write_table(
joint_spec, "joint_spec_%s" % hhsize, append=False
)

logger.info("Running cdap_simulate with %d persons", len(persons_merged.index))

Expand Down Expand Up @@ -215,6 +220,11 @@ def cdap_simulate(
if estimator:
estimator.write_choices(choices)
choices = estimator.get_survey_values(choices, "persons", "cdap_activity")
if add_joint_tour_utility:
hh_joint.index.name = "household_id"
hh_joint = estimator.get_survey_values(
hh_joint, "households", "has_joint_tour"
)
estimator.write_override_choices(choices)
estimator.end_estimation()

Expand Down
144 changes: 127 additions & 17 deletions activitysim/estimation/larch/cdap.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,8 +21,10 @@

_logger = logging.getLogger(logger_name)

MAX_HHSIZE = 5

def generate_alternatives(n_persons):

def generate_alternatives(n_persons, add_joint=False):
"""
Generate a dictionary of CDAP alternatives.
Expand All @@ -41,8 +43,14 @@ def generate_alternatives(n_persons):
alt_names = list(
"".join(i) for i in itertools.product(basic_patterns, repeat=n_persons)
)
if add_joint:
pattern = r"[MN]"
joint_alts = [
alt + "J" for alt in alt_names if len(re.findall(pattern, alt)) >= 2
]
alt_names = alt_names + joint_alts
alt_codes = np.arange(1, len(alt_names) + 1)
return Dict(zip(alt_names, alt_codes))
return dict(zip(alt_names, alt_codes))


def apply_replacements(expression, prefix, tokens):
Expand All @@ -69,7 +77,9 @@ def apply_replacements(expression, prefix, tokens):
return expression


def cdap_base_utility_by_person(model, n_persons, spec, alts=None, value_tokens=()):
def cdap_base_utility_by_person(
model, n_persons, spec, alts=None, value_tokens=(), add_joint=False
):
"""
Build the base utility by person for each pattern.
Expand Down Expand Up @@ -102,7 +112,7 @@ def cdap_base_utility_by_person(model, n_persons, spec, alts=None, value_tokens=
model.utility_co[3] += X(spec.Expression[i]) * P(spec.loc[i, "H"])
else:
if alts is None:
alts = generate_alternatives(n_persons)
alts = generate_alternatives(n_persons, add_joint)
person_numbers = range(1, n_persons + 1)
for pnum in person_numbers:
for i in spec.index:
Expand Down Expand Up @@ -222,13 +232,71 @@ def cdap_interaction_utility(model, n_persons, alts, interaction_coef, coefficie
model.utility_co[anum] += linear_component


def cdap_split_data(households, values):
def cdap_joint_tour_utility(model, n_persons, alts, joint_coef, values):
"""
FIXME: Not fully implemented!!!!
Code is adapted from the cdap model in ActivitySim with the joint tour component
Structure is pretty much in place, but dependencies need to be filtered out.
"""

for row in joint_coef.itertuples():
for aname, anum in alts.items():
# only adding joint tour utility to alternatives with joint tours
if "J" not in aname:
continue
expression = row.Expression
dependency_name = row.dependency
coefficient = row.coefficient

# dealing with dependencies
if dependency_name in ["M_px", "N_px", "H_px"]:
if "_pxprod" in expression:
prod_conds = row.Expression.split("|")
expanded_expressions = [
tup
for tup in itertools.product(
range(len(prod_conds)), repeat=n_persons
)
]
for expression_tup in expanded_expressions:
expression_list = []
dependency_list = []
for counter in range(len(expression_tup)):
expression_list.append(
prod_conds[expression_tup[counter]].replace(
"xprod", str(counter + 1)
)
)
if expression_tup[counter] == 0:
dependency_list.append(
dependency_name.replace("x", str(counter + 1))
)

expression_value = "&".join(expression_list)
# FIXME only apply to alternative if dependency satisfied
bug
model.utility_co[anum] += X(expression_value) * P(coefficient)

elif "_px" in expression:
for pnum in range(1, n_persons + 1):
dependency_name = row.dependency.replace("x", str(pnum))
expression = row.Expression.replace("x", str(pnum))
# FIXME only apply to alternative if dependency satisfied
bug
model.utility_co[anum] += X(expression) * P(coefficient)

else:
model.utility_co[anum] += X(expression) * P(coefficient)


def cdap_split_data(households, values, add_joint):
if "cdap_rank" not in values:
raise ValueError("assign cdap_rank to values first")
# only process the first 5 household members
values = values[values.cdap_rank <= 5]
values = values[values.cdap_rank <= MAX_HHSIZE]
cdap_data = {}
for hhsize, hhs_part in households.groupby(households.hhsize.clip(1, 5)):
for hhsize, hhs_part in households.groupby(households.hhsize.clip(1, MAX_HHSIZE)):
if hhsize == 1:
v = pd.merge(values, hhs_part.household_id, on="household_id").set_index(
"household_id"
Expand All @@ -241,16 +309,31 @@ def cdap_split_data(households, values):
)
v.columns = [f"p{i[1]}_{i[0]}" for i in v.columns]
for agglom in ["override_choice", "model_choice"]:
v[agglom] = v[[f"p{p}_{agglom}" for p in range(1, hhsize + 1)]].sum(1)
v[agglom] = (
v[[f"p{p}_{agglom}" for p in range(1, hhsize + 1)]]
.fillna("H")
.sum(1)
)
if add_joint:
joint_tour_indicator = (
hhs_part.set_index("household_id")
.reindex(v.index)
.has_joint_tour
)
pd.testing.assert_index_equal(v.index, joint_tour_indicator.index)
v[agglom] = np.where(
joint_tour_indicator == 1, v[agglom] + "J", v[agglom]
)
cdap_data[hhsize] = v

return cdap_data


def cdap_dataframes(households, values):
data = cdap_split_data(households, values)
def cdap_dataframes(households, values, add_joint):
data = cdap_split_data(households, values, add_joint)
dfs = {}
for hhsize in data.keys():
alts = generate_alternatives(hhsize)
alts = generate_alternatives(hhsize, add_joint)
dfs[hhsize] = DataFrames(
co=data[hhsize],
alt_names=alts.keys(),
Expand Down Expand Up @@ -298,6 +381,7 @@ def cdap_data(
spec1_file="{name}_INDIV_AND_HHSIZE1_SPEC.csv",
settings_file="{name}_model_settings.yaml",
chooser_data_file="{name}_values_combined.csv",
joint_coeffs_file="{name}_joint_tour_coefficients.csv",
):
edb_directory = edb_directory.format(name=name)
if not os.path.exists(edb_directory):
Expand Down Expand Up @@ -328,8 +412,6 @@ def read_yaml(filename, **kwargs):
if person_type_map is None:
raise KeyError("PERSON_TYPE_MAP missing from cdap_settings.yaml")

person_rank = cdap.assign_cdap_rank(None, persons, person_type_map)

coefficients = read_csv(
coefficients_file,
index_col="coefficient_name",
Expand All @@ -343,9 +425,29 @@ def read_yaml(filename, **kwargs):
comment="#",
)

try:
joint_coef = read_csv(
joint_coeffs_file,
# dtype={"interaction_ptypes": str},
# keep_default_na=False,
comment="#",
)
add_joint = True
except FileNotFoundError:
joint_coef = None
add_joint = False
print("Including joint tour utiltiy?:", add_joint)

spec1 = read_csv(spec1_file, comment="#")
values = read_csv(chooser_data_file, comment="#")
values["cdap_rank"] = person_rank
person_rank = cdap.assign_cdap_rank(
None,
persons[persons.household_id.isin(values.household_id)]
.set_index("person_id")
.reindex(values.person_id),
person_type_map,
)
values["cdap_rank"] = person_rank.values

return Dict(
edb_directory=Path(edb_directory),
Expand All @@ -355,6 +457,8 @@ def read_yaml(filename, **kwargs):
coefficients=coefficients,
households=hhs,
settings=settings,
joint_coef=joint_coef,
add_joint=add_joint,
)


Expand All @@ -367,6 +471,7 @@ def cdap_model(
spec1_file="{name}_INDIV_AND_HHSIZE1_SPEC.csv",
settings_file="{name}_model_settings.yaml",
chooser_data_file="{name}_values_combined.csv",
joint_coeffs_file="{name}_joint_tour_coefficients.csv",
return_data=False,
):
d = cdap_data(
Expand All @@ -379,15 +484,17 @@ def cdap_model(
spec1_file=spec1_file,
settings_file=settings_file,
chooser_data_file=chooser_data_file,
joint_coeffs_file=joint_coeffs_file,
)

households = d.households
values = d.person_data
spec1 = d.spec1
interaction_coef = d.interaction_coef
coefficients = d.coefficients
add_joint = d.add_joint

cdap_dfs = cdap_dataframes(households, values)
cdap_dfs = cdap_dataframes(households, values, add_joint)
m = {}
_logger.info(f"building for model 1")
m[1] = Model(dataservice=cdap_dfs[1])
Expand All @@ -400,12 +507,15 @@ def cdap_model(
interaction_coef["cardinality"] = interaction_coef[
"interaction_ptypes"
].str.len()
for s in [2, 3, 4, 5]:
for s in range(2, MAX_HHSIZE + 1):
# for s in [2, 3, 4, 5]:
_logger.info(f"building for model {s}")
m[s] = Model(dataservice=cdap_dfs[s])
alts = generate_alternatives(s)
alts = generate_alternatives(s, add_joint)
cdap_base_utility_by_person(m[s], s, spec1, alts, values.columns)
cdap_interaction_utility(m[s], s, alts, interaction_coef, coefficients)
# if add_joint:
# cdap_joint_tour_utility(m[s], s, alts, d.joint_coef, values)
m[s].choice_any = True
m[s].availability_any = True

Expand Down

0 comments on commit ade9aa4

Please sign in to comment.