From 7238f16b07dd651179ec71c90ac4a42faeb3f7ff Mon Sep 17 00:00:00 2001 From: Ali Etezady <58451076+aletzdy@users.noreply.github.com> Date: Tue, 15 Apr 2025 08:27:00 -0700 Subject: [PATCH 1/9] initial batch of models with pre and post processing added --- activitysim/abm/models/cdap.py | 44 ++++++++++++++----- activitysim/abm/models/free_parking.py | 12 +++++ .../abm/models/joint_tour_composition.py | 16 +++++++ .../abm/models/joint_tour_destination.py | 4 +- .../joint_tour_frequency_composition.py | 11 +++++ .../abm/models/joint_tour_scheduling.py | 11 +++++ .../abm/models/transit_pass_ownership.py | 12 +++++ .../abm/models/util/tour_destination.py | 23 +++++++++- .../abm/models/util/tour_scheduling.py | 11 +++++ activitysim/abm/models/work_from_home.py | 12 +++++ 10 files changed, 142 insertions(+), 14 deletions(-) diff --git a/activitysim/abm/models/cdap.py b/activitysim/abm/models/cdap.py index 6776c06c7f..3d5fc62b16 100644 --- a/activitysim/abm/models/cdap.py +++ b/activitysim/abm/models/cdap.py @@ -24,6 +24,8 @@ ) from activitysim.core.util import reindex +from .util import annotate + logger = logging.getLogger(__name__) @@ -141,6 +143,21 @@ def cdap_simulate( cdap_interaction_coefficients ) + # - preprocessor + preprocessor_settings = model_settings.preprocessor + if preprocessor_settings: + locals_d = {} + if constants is not None: + locals_d.update(constants) + + expressions.assign_columns( + state, + df=persons_merged, + model_settings=preprocessor_settings, + locals_dict=locals_d, + trace_label=trace_label, + ) + # specs are built just-in-time on demand and cached as injectables # prebuilding here allows us to write them to the output directory # (also when multiprocessing locutor might not see all household sizes) @@ -242,12 +259,13 @@ def cdap_simulate( choices = choices.astype(cap_cat_type) persons["cdap_activity"] = choices - expressions.assign_columns( - state, - df=persons, - model_settings=model_settings.annotate_persons, - trace_label=tracing.extend_trace_label(trace_label, "annotate_persons"), - ) + if model_settings.annotate_persons: + expressions.assign_columns( + state, + df=persons, + model_settings=model_settings.annotate_persons, + trace_label=tracing.extend_trace_label(trace_label, "annotate_persons"), + ) state.add_table("persons", persons) @@ -256,12 +274,14 @@ def cdap_simulate( hh_joint = hh_joint.reindex(households.index) households["has_joint_tour"] = hh_joint - expressions.assign_columns( - state, - df=households, - model_settings=model_settings.annotate_households, - trace_label=tracing.extend_trace_label(trace_label, "annotate_households"), - ) + if model_settings.annotate_households: + expressions.assign_columns( + state, + df=households, + model_settings=model_settings.annotate_households, + trace_label=tracing.extend_trace_label(trace_label, "annotate_households"), + ) + state.add_table("households", households) tracing.print_summary("cdap_activity", persons.cdap_activity, value_counts=True) diff --git a/activitysim/abm/models/free_parking.py b/activitysim/abm/models/free_parking.py index 9aa2800a67..44d8fc8366 100644 --- a/activitysim/abm/models/free_parking.py +++ b/activitysim/abm/models/free_parking.py @@ -18,6 +18,8 @@ from activitysim.core.configuration.base import PreprocessorSettings, PydanticReadable from activitysim.core.configuration.logit import LogitComponentSettings +from .util import annotate + logger = logging.getLogger(__name__) @@ -29,6 +31,10 @@ class FreeParkingSettings(LogitComponentSettings, extra="forbid"): preprocessor: PreprocessorSettings | None = None """Setting for the preprocessor.""" + annotate_households: PreprocessorSettings | None = None + + annotate_persons: PreprocessorSettings | None = None + FREE_PARKING_ALT: int """The code for free parking.""" @@ -144,3 +150,9 @@ def free_parking( if state.settings.trace_hh_id: state.tracing.trace_df(persons, label=trace_label, warn_if_empty=True) + + if model_settings.annotate_households: + annotate.annotate_households(state, model_settings, trace_label) + + if model_settings.annotate_persons: + annotate.annotate_persons(state, model_settings, trace_label) diff --git a/activitysim/abm/models/joint_tour_composition.py b/activitysim/abm/models/joint_tour_composition.py index ee4ac3a69c..6e2aef0934 100644 --- a/activitysim/abm/models/joint_tour_composition.py +++ b/activitysim/abm/models/joint_tour_composition.py @@ -18,6 +18,8 @@ from activitysim.core.configuration.base import PreprocessorSettings from activitysim.core.configuration.logit import LogitComponentSettings +from .util import annotate + logger = logging.getLogger(__name__) @@ -39,6 +41,11 @@ class JointTourCompositionSettings(LogitComponentSettings, extra="forbid"): preprocessor: PreprocessorSettings | None = None """Setting for the preprocessor.""" + annotate_households: PreprocessorSettings | None = None + + annotate_persons: PreprocessorSettings | None = None + + annotate_tours: PreprocessorSettings | None = None @workflow.step def joint_tour_composition( @@ -156,3 +163,12 @@ def joint_tour_composition( label="joint_tour_composition.joint_tours", slicer="household_id", ) + + if model_settings.annotate_tours: + annotate.annotate_tours(state, model_settings, trace_label) + + if model_settings.annotate_households: + annotate.annotate_households(state, model_settings, trace_label) + + if model_settings.annotate_persons: + annotate.annotate_persons(state, model_settings, trace_label) diff --git a/activitysim/abm/models/joint_tour_destination.py b/activitysim/abm/models/joint_tour_destination.py index cd6c2fed01..c6529a42cf 100644 --- a/activitysim/abm/models/joint_tour_destination.py +++ b/activitysim/abm/models/joint_tour_destination.py @@ -11,6 +11,8 @@ from activitysim.core.configuration.logit import TourLocationComponentSettings from activitysim.core.util import assign_in_place +from .util import annotate + logger = logging.getLogger(__name__) @@ -112,4 +114,4 @@ def joint_tour_destination( state.extend_table(sample_table_name, save_sample_df) if trace_hh_id: - state.tracing.trace_df(joint_tours, label="joint_tour_destination.joint_tours") + state.tracing.trace_df(joint_tours, label="joint_tour_destination.joint_tours") \ No newline at end of file diff --git a/activitysim/abm/models/joint_tour_frequency_composition.py b/activitysim/abm/models/joint_tour_frequency_composition.py index 553b280fe7..ad65344bb7 100644 --- a/activitysim/abm/models/joint_tour_frequency_composition.py +++ b/activitysim/abm/models/joint_tour_frequency_composition.py @@ -22,6 +22,8 @@ ) from activitysim.core.interaction_simulate import interaction_simulate +from .util import annotate + logger = logging.getLogger(__name__) @@ -221,3 +223,12 @@ def joint_tour_frequency_composition( label="joint_tour_frequency_composition.joint_tours", slicer="household_id", ) + + if model_settings.annotate_tours: + annotate.annotate_tours(state, model_settings, trace_label) + + if model_settings.annotate_households: + annotate.annotate_households(state, model_settings, trace_label) + + if model_settings.annotate_persons: + annotate.annotate_persons(state, model_settings, trace_label) \ No newline at end of file diff --git a/activitysim/abm/models/joint_tour_scheduling.py b/activitysim/abm/models/joint_tour_scheduling.py index 9bdcbe146f..8c7fd054d9 100644 --- a/activitysim/abm/models/joint_tour_scheduling.py +++ b/activitysim/abm/models/joint_tour_scheduling.py @@ -22,6 +22,8 @@ from activitysim.core.configuration.logit import LogitComponentSettings from activitysim.core.util import assign_in_place, reindex +from .util import annotate + logger = logging.getLogger(__name__) @@ -172,3 +174,12 @@ def joint_tour_scheduling( state.tracing.trace_df( joint_tours, label="joint_tour_scheduling", slicer="household_id" ) + + if model_settings.annotate_households: + annotate.annotate_households(state, model_settings, trace_label) + + if model_settings.annotate_persons: + annotate.annotate_persons(state, model_settings, trace_label) + + if model_settings.annotate_tours: + annotate.annotate_tours(state, model_settings, trace_label) diff --git a/activitysim/abm/models/transit_pass_ownership.py b/activitysim/abm/models/transit_pass_ownership.py index 9a34b7b0b4..50568182e4 100644 --- a/activitysim/abm/models/transit_pass_ownership.py +++ b/activitysim/abm/models/transit_pass_ownership.py @@ -17,6 +17,8 @@ from activitysim.core.configuration.base import PreprocessorSettings from activitysim.core.configuration.logit import LogitComponentSettings +from .util import annotate + logger = logging.getLogger("activitysim") @@ -28,6 +30,10 @@ class TransitPassOwnershipSettings(LogitComponentSettings, extra="forbid"): preprocessor: PreprocessorSettings | None = None """Setting for the preprocessor.""" + annotate_households: PreprocessorSettings | None = None + + annotate_persons: PreprocessorSettings | None = None + @workflow.step def transit_pass_ownership( @@ -114,3 +120,9 @@ def transit_pass_ownership( if state.settings.trace_hh_id: state.tracing.trace_df(persons, label=trace_label, warn_if_empty=True) + + if model_settings.annotate_households: + annotate.annotate_households(state, model_settings, trace_label) + + if model_settings.annotate_persons: + annotate.annotate_persons(state, model_settings, trace_label) diff --git a/activitysim/abm/models/util/tour_destination.py b/activitysim/abm/models/util/tour_destination.py index 0891b8d216..1770fddf5d 100644 --- a/activitysim/abm/models/util/tour_destination.py +++ b/activitysim/abm/models/util/tour_destination.py @@ -9,12 +9,14 @@ from activitysim.abm.models.util import logsums as logsum from activitysim.abm.tables.size_terms import tour_destination_size_terms -from activitysim.core import config, los, simulate, tracing, workflow +from activitysim.core import config, los, simulate, tracing, workflow, expressions from activitysim.core.configuration.logit import TourLocationComponentSettings from activitysim.core.interaction_sample import interaction_sample from activitysim.core.interaction_sample_simulate import interaction_sample_simulate from activitysim.core.util import reindex +import annotate + logger = logging.getLogger(__name__) DUMP = False @@ -761,6 +763,16 @@ def run_destination_simulate( if constants is not None: locals_d.update(constants) + preprocessor_settings = model_settings.get('preprocessor', None) + if preprocessor_settings: + expressions.assign_columns( + state, + df=choosers, + model_settings=preprocessor_settings, + locals_dict=locals_d, + trace_label=trace_label, + ) + state.tracing.dump_df(DUMP, choosers, trace_label, "choosers") log_alt_losers = state.settings.log_alt_losers @@ -789,6 +801,15 @@ def run_destination_simulate( assert isinstance(choices, pd.Series) choices = choices.to_frame("choice") + if model_settings.annotate_households: + annotate.annotate_households(state, model_settings, trace_label) + + if model_settings.annotate_persons: + annotate.annotate_persons(state, model_settings, trace_label) + + if model_settings.annotate_tours: + annotate.annotate_tours(state, model_settings, trace_label) + return choices diff --git a/activitysim/abm/models/util/tour_scheduling.py b/activitysim/abm/models/util/tour_scheduling.py index db003786f4..e33b3cedbb 100644 --- a/activitysim/abm/models/util/tour_scheduling.py +++ b/activitysim/abm/models/util/tour_scheduling.py @@ -11,6 +11,8 @@ from .vectorize_tour_scheduling import TourModeComponentSettings, TourSchedulingSettings +import annotate + logger = logging.getLogger(__name__) @@ -195,4 +197,13 @@ def run_tour_scheduling( choices.to_frame("tdd"), tdd_alts, left_on=["tdd"], right_index=True, how="left" ) + if model_settings.annotate_households: + annotate.annotate_households(state, model_settings, trace_label) + + if model_settings.annotate_persons: + annotate.annotate_persons(state, model_settings, trace_label) + + if model_settings.annotate_tours: + annotate.annotate_tours(state, model_settings, trace_label) + return choices diff --git a/activitysim/abm/models/work_from_home.py b/activitysim/abm/models/work_from_home.py index 8b96dafa13..73a864c825 100755 --- a/activitysim/abm/models/work_from_home.py +++ b/activitysim/abm/models/work_from_home.py @@ -18,6 +18,8 @@ from activitysim.core.configuration.base import PreprocessorSettings, PydanticReadable from activitysim.core.configuration.logit import LogitComponentSettings +from .util import annotate + logger = logging.getLogger("activitysim") @@ -29,6 +31,10 @@ class WorkFromHomeSettings(LogitComponentSettings, extra="forbid"): preprocessor: PreprocessorSettings | None = None """Setting for the preprocessor.""" + annotate_households: PreprocessorSettings | None = None + + annotate_persons: PreprocessorSettings | None = None + WORK_FROM_HOME_ALT: int """Value that specify if the person is working from home""" # TODO @@ -221,3 +227,9 @@ def work_from_home( if state.settings.trace_hh_id: state.tracing.trace_df(persons, label=trace_label, warn_if_empty=True) + + if model_settings.annotate_households: + annotate.annotate_households(state, model_settings, trace_label) + + if model_settings.annotate_persons: + annotate.annotate_persons(state, model_settings, trace_label) \ No newline at end of file From 388219e8b41ddbab024b1c8cd84ee88233983e02 Mon Sep 17 00:00:00 2001 From: Ali Etezady <58451076+aletzdy@users.noreply.github.com> Date: Tue, 15 Apr 2025 08:36:38 -0700 Subject: [PATCH 2/9] nonmand sched and transit pass models --- activitysim/abm/models/non_mandatory_scheduling.py | 9 +++++++++ activitysim/abm/models/transit_pass_subsidy.py | 12 ++++++++++++ 2 files changed, 21 insertions(+) diff --git a/activitysim/abm/models/non_mandatory_scheduling.py b/activitysim/abm/models/non_mandatory_scheduling.py index 4e444107bc..d6670ad86f 100644 --- a/activitysim/abm/models/non_mandatory_scheduling.py +++ b/activitysim/abm/models/non_mandatory_scheduling.py @@ -71,3 +71,12 @@ def non_mandatory_tour_scheduling( columns=None, warn_if_empty=True, ) + + if model_settings.annotate_households: + annotate.annotate_households(state, model_settings, trace_label) + + if model_settings.annotate_persons: + annotate.annotate_persons(state, model_settings, trace_label) + + if model_settings.annotate_tours: + annotate.annotate_tours(state, model_settings, trace_label) diff --git a/activitysim/abm/models/transit_pass_subsidy.py b/activitysim/abm/models/transit_pass_subsidy.py index 0f71279cdf..b162230925 100644 --- a/activitysim/abm/models/transit_pass_subsidy.py +++ b/activitysim/abm/models/transit_pass_subsidy.py @@ -17,6 +17,8 @@ from activitysim.core.configuration.base import PreprocessorSettings, PydanticReadable from activitysim.core.configuration.logit import LogitComponentSettings +from .util import annotate + logger = logging.getLogger("activitysim") @@ -28,6 +30,10 @@ class TransitPassSubsidySettings(LogitComponentSettings, extra="forbid"): preprocessor: PreprocessorSettings | None = None """Setting for the preprocessor.""" + annotate_households: PreprocessorSettings | None = None + + annotate_persons: PreprocessorSettings | None = None + CHOOSER_FILTER_COLUMN_NAME: str | None = None """Column name which selects choosers. If None, all persons are choosers.""" @@ -122,3 +128,9 @@ def transit_pass_subsidy( if state.settings.trace_hh_id: state.tracing.trace_df(persons, label=trace_label, warn_if_empty=True) + + if model_settings.annotate_households: + annotate.annotate_households(state, model_settings, trace_label) + + if model_settings.annotate_persons: + annotate.annotate_persons(state, model_settings, trace_label) From 79070694c4bb49a6bd5186c44691931c04620d04 Mon Sep 17 00:00:00 2001 From: David Hensle Date: Mon, 16 Jun 2025 18:55:13 -0700 Subject: [PATCH 3/9] first pass at preprocessing and annotate functionality in all models --- .../abm/models/atwork_subtour_destination.py | 10 +- .../abm/models/atwork_subtour_frequency.py | 30 ++-- .../abm/models/atwork_subtour_mode_choice.py | 19 +-- .../abm/models/atwork_subtour_scheduling.py | 8 + activitysim/abm/models/auto_ownership.py | 39 +++-- activitysim/abm/models/cdap.py | 60 +++----- activitysim/abm/models/free_parking.py | 45 ++---- .../abm/models/joint_tour_composition.py | 62 ++++---- .../abm/models/joint_tour_destination.py | 13 +- .../abm/models/joint_tour_frequency.py | 45 +++--- .../joint_tour_frequency_composition.py | 73 +++++---- .../abm/models/joint_tour_participation.py | 75 ++++------ .../abm/models/joint_tour_scheduling.py | 54 +++---- activitysim/abm/models/location_choice.py | 77 ++++++---- .../abm/models/mandatory_scheduling.py | 20 ++- .../abm/models/mandatory_tour_frequency.py | 38 +++-- .../abm/models/non_mandatory_destination.py | 15 +- .../abm/models/non_mandatory_scheduling.py | 28 ++-- .../models/non_mandatory_tour_frequency.py | 80 +++++----- .../abm/models/parking_location_choice.py | 63 ++++++-- activitysim/abm/models/school_escorting.py | 42 ++++-- activitysim/abm/models/stop_frequency.py | 24 ++- .../abm/models/telecommute_frequency.py | 31 ++-- activitysim/abm/models/tour_mode_choice.py | 25 +++- activitysim/abm/models/tour_od_choice.py | 10 +- .../models/tour_scheduling_probabilistic.py | 8 + .../abm/models/transit_pass_ownership.py | 42 +++--- .../abm/models/transit_pass_subsidy.py | 43 ++---- .../abm/models/trip_departure_choice.py | 33 ++++- activitysim/abm/models/trip_destination.py | 65 ++++---- activitysim/abm/models/trip_mode_choice.py | 35 +++-- activitysim/abm/models/trip_purpose.py | 26 ++-- .../models/trip_purpose_and_destination.py | 10 +- activitysim/abm/models/trip_scheduling.py | 25 ++-- .../abm/models/trip_scheduling_choice.py | 49 ++++-- activitysim/abm/models/util/annotate.py | 136 ----------------- .../abm/models/util/tour_destination.py | 61 +++++--- activitysim/abm/models/util/tour_frequency.py | 3 +- activitysim/abm/models/util/tour_od.py | 90 ++++------- .../abm/models/util/tour_scheduling.py | 53 +++---- .../models/util/vectorize_tour_scheduling.py | 2 - activitysim/abm/models/vehicle_allocation.py | 40 +++-- activitysim/abm/models/vehicle_type_choice.py | 58 ++++---- activitysim/abm/models/work_from_home.py | 42 ++---- activitysim/core/configuration/logit.py | 31 +++- activitysim/core/expressions.py | 140 ++++++++++++++++-- activitysim/core/util.py | 6 +- 47 files changed, 1047 insertions(+), 937 deletions(-) delete mode 100644 activitysim/abm/models/util/annotate.py diff --git a/activitysim/abm/models/atwork_subtour_destination.py b/activitysim/abm/models/atwork_subtour_destination.py index d0d7fd9916..1c313a0896 100644 --- a/activitysim/abm/models/atwork_subtour_destination.py +++ b/activitysim/abm/models/atwork_subtour_destination.py @@ -7,7 +7,7 @@ import pandas as pd from activitysim.abm.models.util import tour_destination -from activitysim.core import config, estimation, los, tracing, workflow +from activitysim.core import config, estimation, los, tracing, workflow, expressions from activitysim.core.configuration.logit import TourLocationComponentSettings from activitysim.core.util import assign_in_place @@ -120,3 +120,11 @@ def atwork_subtour_destination( state.tracing.trace_df( tours, label="atwork_subtour_destination", columns=["destination"] ) + + expressions.annotate_tables( + state, + locals_dict={}, + skims=None, + model_settings=model_settings, + trace_label=trace_label, + ) diff --git a/activitysim/abm/models/atwork_subtour_frequency.py b/activitysim/abm/models/atwork_subtour_frequency.py index 3483554432..ca7ae6d259 100644 --- a/activitysim/abm/models/atwork_subtour_frequency.py +++ b/activitysim/abm/models/atwork_subtour_frequency.py @@ -37,8 +37,8 @@ class AtworkSubtourFrequencySettings(LogitComponentSettings, extra="forbid"): Settings for the `atwork_subtour_frequency` component. """ - preprocessor: PreprocessorSettings | None = None - """Setting for the preprocessor.""" + # no additional fields are required for this component + pass @workflow.step @@ -92,15 +92,15 @@ def atwork_subtour_frequency( nest_spec = config.get_logit_model_settings(model_settings) constants = config.get_model_constants(model_settings) - # - preprocessor - preprocessor_settings = model_settings.preprocessor - if preprocessor_settings: - expressions.assign_columns( - state, - df=work_tours, - model_settings=preprocessor_settings, - trace_label=trace_label, - ) + # preprocess choosers + expressions.annotate_preprocessors( + state, + df=work_tours, + locals_dict=constants, + skims=None, + model_settings=model_settings, + trace_label=trace_label, + ) if estimator: estimator.write_spec(model_settings) @@ -164,3 +164,11 @@ def atwork_subtour_frequency( if trace_hh_id: state.tracing.trace_df(tours, label="atwork_subtour_frequency.tours") + + expressions.annotate_tables( + state, + locals_dict=constants, + skims=None, + model_settings=model_settings, + trace_label=trace_label, + ) diff --git a/activitysim/abm/models/atwork_subtour_mode_choice.py b/activitysim/abm/models/atwork_subtour_mode_choice.py index 4e1949dc06..b9889aee72 100644 --- a/activitysim/abm/models/atwork_subtour_mode_choice.py +++ b/activitysim/abm/models/atwork_subtour_mode_choice.py @@ -195,17 +195,6 @@ def atwork_subtour_mode_choice( ) state.add_table("tours", tours) - # - annotate tours table - if model_settings.annotate_tours: - tours = state.get_dataframe("tours") - expressions.assign_columns( - state, - df=tours, - model_settings=model_settings.annotate_tours, - trace_label=tracing.extend_trace_label(trace_label, "annotate_tours"), - ) - state.add_table("tours", tours) - if trace_hh_id: state.tracing.trace_df( tours[tours.tour_category == "atwork"], @@ -213,3 +202,11 @@ def atwork_subtour_mode_choice( slicer="tour_id", index_label="tour_id", ) + + expressions.annotate_tables( + state, + locals_dict=constants, + skims=skims, + model_settings=model_settings, + trace_label=trace_label, + ) diff --git a/activitysim/abm/models/atwork_subtour_scheduling.py b/activitysim/abm/models/atwork_subtour_scheduling.py index fff94ef30f..2ad67ff22a 100644 --- a/activitysim/abm/models/atwork_subtour_scheduling.py +++ b/activitysim/abm/models/atwork_subtour_scheduling.py @@ -141,3 +141,11 @@ def atwork_subtour_scheduling( trace_label, "tour_map", ) + + expressions.annotate_tables( + state, + locals_dict=constants, + skims=skims, + model_settings=model_settings, + trace_label=trace_label, + ) diff --git a/activitysim/abm/models/auto_ownership.py b/activitysim/abm/models/auto_ownership.py index a66ce763a7..fe6a472abb 100644 --- a/activitysim/abm/models/auto_ownership.py +++ b/activitysim/abm/models/auto_ownership.py @@ -18,8 +18,6 @@ from activitysim.core.configuration.base import PreprocessorSettings, PydanticReadable from activitysim.core.configuration.logit import LogitComponentSettings -from .util import annotate - logger = logging.getLogger(__name__) @@ -28,8 +26,8 @@ class AutoOwnershipSettings(LogitComponentSettings, extra="forbid"): Settings for the `auto_ownership` component. """ - preprocessor: PreprocessorSettings | None = None - annotate_households: PreprocessorSettings | None = None + # no additional fields are required for this component + pass @workflow.step @@ -69,20 +67,14 @@ def auto_ownership_simulate( logger.info("Running %s with %d households", trace_label, len(choosers)) - # - preprocessor - preprocessor_settings = model_settings.preprocessor - if preprocessor_settings: - locals_d = {} - if constants is not None: - locals_d.update(constants) - - expressions.assign_columns( - state, - df=choosers, - model_settings=preprocessor_settings, - locals_dict=locals_d, - trace_label=trace_label, - ) + expressions.annotate_preprocessors( + state, + df=choosers, + locals_dict=constants, + skims=None, + model_settings=model_settings, + trace_label=trace_label, + ) if estimator: estimator.write_model_settings(model_settings, model_settings_file_name) @@ -120,8 +112,13 @@ def auto_ownership_simulate( "auto_ownership", households.auto_ownership, value_counts=True ) - if model_settings.annotate_households: - annotate.annotate_households(state, model_settings, trace_label) - if trace_hh_id: state.tracing.trace_df(households, label="auto_ownership", warn_if_empty=True) + + expressions.annotate_tables( + state, + locals_dict=constants, + skims=None, + model_settings=model_settings, + trace_label=trace_label, + ) diff --git a/activitysim/abm/models/cdap.py b/activitysim/abm/models/cdap.py index 3d5fc62b16..2ade0b3454 100644 --- a/activitysim/abm/models/cdap.py +++ b/activitysim/abm/models/cdap.py @@ -24,8 +24,6 @@ ) from activitysim.core.util import reindex -from .util import annotate - logger = logging.getLogger(__name__) @@ -38,12 +36,16 @@ class CdapSettings(PydanticReadable, extra="forbid"): JOINT_TOUR_COEFFICIENTS: str = "cdap_joint_tour_coefficients.csv" JOINT_TOUR_USEFUL_COLUMNS: list[str] | None = None """Columns to include from the persons table that will be need to calculate household joint tour utility.""" - annotate_persons: PreprocessorSettings | None = None - annotate_households: PreprocessorSettings | None = None COEFFICIENTS: Path CONSTANTS: dict[str, Any] = {} compute_settings: ComputeSettings | None = None + preprocessor: PreprocessorSettings | None = None + """Preprocess choosers tables before running the model.""" + annotate_persons: PreprocessorSettings | None = None + annotate_households: PreprocessorSettings | None = None + """Postprocess tables after model completion.""" + @workflow.step def cdap_simulate( @@ -143,21 +145,6 @@ def cdap_simulate( cdap_interaction_coefficients ) - # - preprocessor - preprocessor_settings = model_settings.preprocessor - if preprocessor_settings: - locals_d = {} - if constants is not None: - locals_d.update(constants) - - expressions.assign_columns( - state, - df=persons_merged, - model_settings=preprocessor_settings, - locals_dict=locals_d, - trace_label=trace_label, - ) - # specs are built just-in-time on demand and cached as injectables # prebuilding here allows us to write them to the output directory # (also when multiprocessing locutor might not see all household sizes) @@ -188,6 +175,16 @@ def cdap_simulate( index=True, ) + # preprocess choosers + expressions.annotate_preprocessors( + state, + df=persons_merged, + locals_dict=constants, + skims=None, + model_settings=model_settings, + trace_label=trace_label, + ) + if estimator: estimator.write_model_settings(model_settings, "cdap.yaml") estimator.write_spec(model_settings, tag="INDIV_AND_HHSIZE1_SPEC") @@ -258,15 +255,6 @@ def cdap_simulate( cap_cat_type = pd.api.types.CategoricalDtype(["", "M", "N", "H"], ordered=False) choices = choices.astype(cap_cat_type) persons["cdap_activity"] = choices - - if model_settings.annotate_persons: - expressions.assign_columns( - state, - df=persons, - model_settings=model_settings.annotate_persons, - trace_label=tracing.extend_trace_label(trace_label, "annotate_persons"), - ) - state.add_table("persons", persons) # - annotate households table @@ -274,14 +262,6 @@ def cdap_simulate( hh_joint = hh_joint.reindex(households.index) households["has_joint_tour"] = hh_joint - if model_settings.annotate_households: - expressions.assign_columns( - state, - df=households, - model_settings=model_settings.annotate_households, - trace_label=tracing.extend_trace_label(trace_label, "annotate_households"), - ) - state.add_table("households", households) tracing.print_summary("cdap_activity", persons.cdap_activity, value_counts=True) @@ -289,3 +269,11 @@ def cdap_simulate( "cdap crosstabs:\n%s" % pd.crosstab(persons.ptype, persons.cdap_activity, margins=True) ) + + expressions.annotate_tables( + state, + locals_dict=constants, + skims=None, + model_settings=model_settings, + trace_label=trace_label, + ) diff --git a/activitysim/abm/models/free_parking.py b/activitysim/abm/models/free_parking.py index 44d8fc8366..ebcecd248a 100644 --- a/activitysim/abm/models/free_parking.py +++ b/activitysim/abm/models/free_parking.py @@ -18,8 +18,6 @@ from activitysim.core.configuration.base import PreprocessorSettings, PydanticReadable from activitysim.core.configuration.logit import LogitComponentSettings -from .util import annotate - logger = logging.getLogger(__name__) @@ -28,13 +26,6 @@ class FreeParkingSettings(LogitComponentSettings, extra="forbid"): Settings for the `free_parking` component. """ - preprocessor: PreprocessorSettings | None = None - """Setting for the preprocessor.""" - - annotate_households: PreprocessorSettings | None = None - - annotate_persons: PreprocessorSettings | None = None - FREE_PARKING_ALT: int """The code for free parking.""" @@ -84,21 +75,6 @@ def free_parking( constants = model_settings.CONSTANTS or {} - # - preprocessor - preprocessor_settings = model_settings.preprocessor - if preprocessor_settings: - locals_d = {} - if constants is not None: - locals_d.update(constants) - - expressions.assign_columns( - state, - df=choosers, - model_settings=preprocessor_settings, - locals_dict=locals_d, - trace_label=trace_label, - ) - model_spec = state.filesystem.read_model_spec(file_name=model_settings.SPEC) coefficients_df = state.filesystem.read_model_coefficients(model_settings) model_spec = simulate.eval_coefficients( @@ -107,6 +83,15 @@ def free_parking( nest_spec = config.get_logit_model_settings(model_settings) + expressions.annotate_preprocessors( + state, + df=choosers, + locals_dict=constants, + skims=None, + model_settings=model_settings, + trace_label=trace_label, + ) + if estimator: estimator.write_model_settings(model_settings, model_settings_file_name) estimator.write_spec(file_name=model_settings.SPEC) @@ -151,8 +136,10 @@ def free_parking( if state.settings.trace_hh_id: state.tracing.trace_df(persons, label=trace_label, warn_if_empty=True) - if model_settings.annotate_households: - annotate.annotate_households(state, model_settings, trace_label) - - if model_settings.annotate_persons: - annotate.annotate_persons(state, model_settings, trace_label) + expressions.annotate_tables( + state, + locals_dict=constants, + skims=None, + model_settings=model_settings, + trace_label=trace_label, + ) diff --git a/activitysim/abm/models/joint_tour_composition.py b/activitysim/abm/models/joint_tour_composition.py index 6e2aef0934..60de227aa1 100644 --- a/activitysim/abm/models/joint_tour_composition.py +++ b/activitysim/abm/models/joint_tour_composition.py @@ -18,7 +18,6 @@ from activitysim.core.configuration.base import PreprocessorSettings from activitysim.core.configuration.logit import LogitComponentSettings -from .util import annotate logger = logging.getLogger(__name__) @@ -38,14 +37,8 @@ class JointTourCompositionSettings(LogitComponentSettings, extra="forbid"): Settings for the `joint_tour_composition` component. """ - preprocessor: PreprocessorSettings | None = None - """Setting for the preprocessor.""" + pass - annotate_households: PreprocessorSettings | None = None - - annotate_persons: PreprocessorSettings | None = None - - annotate_tours: PreprocessorSettings | None = None @workflow.step def joint_tour_composition( @@ -85,26 +78,6 @@ def joint_tour_composition( "Running joint_tour_composition with %d joint tours" % joint_tours.shape[0] ) - # - run preprocessor - preprocessor_settings = model_settings.preprocessor - if preprocessor_settings: - locals_dict = { - "persons": persons, - "hh_time_window_overlap": lambda *x: hh_time_window_overlap(state, *x), - } - - expressions.assign_columns( - state, - df=households, - model_settings=preprocessor_settings, - locals_dict=locals_dict, - trace_label=trace_label, - ) - - joint_tours_merged = pd.merge( - joint_tours, households, left_on="household_id", right_index=True, how="left" - ) - # - simple_simulate model_spec = state.filesystem.read_model_spec(file_name=model_settings.SPEC) coefficients_df = state.filesystem.read_model_coefficients(model_settings) @@ -115,6 +88,24 @@ def joint_tour_composition( nest_spec = config.get_logit_model_settings(model_settings) constants = config.get_model_constants(model_settings) + locals_dict = { + "hh_time_window_overlap": lambda *x: hh_time_window_overlap(state, *x), + } + locals_dict.update(constants) + + expressions.annotate_preprocessors( + state, + df=households, + locals_dict=locals_dict, + skims=None, + model_settings=model_settings, + trace_label=trace_label, + ) + + joint_tours_merged = pd.merge( + joint_tours, households, left_on="household_id", right_index=True, how="left" + ) + if estimator: estimator.write_spec(model_settings) estimator.write_model_settings(model_settings, model_settings_file_name) @@ -164,11 +155,10 @@ def joint_tour_composition( slicer="household_id", ) - if model_settings.annotate_tours: - annotate.annotate_tours(state, model_settings, trace_label) - - if model_settings.annotate_households: - annotate.annotate_households(state, model_settings, trace_label) - - if model_settings.annotate_persons: - annotate.annotate_persons(state, model_settings, trace_label) + expressions.annotate_tables( + state, + locals_dict=locals_dict, + skims=None, + model_settings=model_settings, + trace_label=trace_label, + ) diff --git a/activitysim/abm/models/joint_tour_destination.py b/activitysim/abm/models/joint_tour_destination.py index c6529a42cf..29b117821b 100644 --- a/activitysim/abm/models/joint_tour_destination.py +++ b/activitysim/abm/models/joint_tour_destination.py @@ -7,11 +7,10 @@ import pandas as pd from activitysim.abm.models.util import tour_destination -from activitysim.core import estimation, los, tracing, workflow +from activitysim.core import estimation, los, tracing, workflow, expressions from activitysim.core.configuration.logit import TourLocationComponentSettings from activitysim.core.util import assign_in_place -from .util import annotate logger = logging.getLogger(__name__) @@ -114,4 +113,12 @@ def joint_tour_destination( state.extend_table(sample_table_name, save_sample_df) if trace_hh_id: - state.tracing.trace_df(joint_tours, label="joint_tour_destination.joint_tours") \ No newline at end of file + state.tracing.trace_df(joint_tours, label="joint_tour_destination.joint_tours") + + expressions.annotate_tables( + state, + locals_dict={}, + skims=None, + model_settings=model_settings, + trace_label=trace_label, + ) diff --git a/activitysim/abm/models/joint_tour_frequency.py b/activitysim/abm/models/joint_tour_frequency.py index 1700c143b0..7db1087d21 100644 --- a/activitysim/abm/models/joint_tour_frequency.py +++ b/activitysim/abm/models/joint_tour_frequency.py @@ -25,11 +25,11 @@ class JointTourFrequencySettings(LogitComponentSettings, extra="forbid"): """ - Settings for the `free_parking` component. + Settings for the `joint_tour_frequency` component. """ - preprocessor: PreprocessorSettings | None = None - """Setting for the preprocessor.""" + # no additional settings are required for this model + pass @workflow.step @@ -72,22 +72,6 @@ def joint_tour_frequency( % multi_person_households.shape[0] ) - # - preprocessor - preprocessor_settings = model_settings.preprocessor - if preprocessor_settings: - locals_dict = { - "persons": persons, - "hh_time_window_overlap": lambda *x: hh_time_window_overlap(state, *x), - } - - expressions.assign_columns( - state, - df=multi_person_households, - model_settings=preprocessor_settings, - locals_dict=locals_dict, - trace_label=trace_label, - ) - model_spec = state.filesystem.read_model_spec(file_name=model_settings.SPEC) coefficients_df = state.filesystem.read_model_coefficients(model_settings) model_spec = simulate.eval_coefficients( @@ -97,6 +81,21 @@ def joint_tour_frequency( nest_spec = config.get_logit_model_settings(model_settings) constants = config.get_model_constants(model_settings) + # - preprocess choosers table + locals_dict = { + "hh_time_window_overlap": lambda *x: hh_time_window_overlap(state, *x), + } + locals_dict.update(constants) + + expressions.annotate_preprocessors( + state, + df=multi_person_households, + locals_dict=locals_dict, + skims=None, + model_settings=model_settings, + trace_label=trace_label, + ) + if estimator: estimator.write_spec(model_settings) estimator.write_model_settings(model_settings, model_settings_file_name) @@ -205,3 +204,11 @@ def joint_tour_frequency( print(f"tours_not_in_survey_tours\n{tours_not_in_survey_tours}") different = True assert not different + + expressions.annotate_tables( + state, + locals_dict=locals_dict, + skims=None, + model_settings=model_settings, + trace_label=trace_label, + ) diff --git a/activitysim/abm/models/joint_tour_frequency_composition.py b/activitysim/abm/models/joint_tour_frequency_composition.py index ad65344bb7..b9f801eb5e 100644 --- a/activitysim/abm/models/joint_tour_frequency_composition.py +++ b/activitysim/abm/models/joint_tour_frequency_composition.py @@ -22,7 +22,6 @@ ) from activitysim.core.interaction_simulate import interaction_simulate -from .util import annotate logger = logging.getLogger(__name__) @@ -60,37 +59,6 @@ def joint_tour_frequency_composition( logger.info("Running %s with %d households", trace_label, len(choosers)) - # alt preprocessor - alt_preprocessor_settings = model_settings.ALTS_PREPROCESSOR - if alt_preprocessor_settings: - locals_dict = {} - - alts = alts.copy() - - expressions.assign_columns( - state, - df=alts, - model_settings=alt_preprocessor_settings, - locals_dict=locals_dict, - trace_label=trace_label, - ) - - # - preprocessor - preprocessor_settings = model_settings.preprocessor - if preprocessor_settings: - locals_dict = { - "persons": persons, - "hh_time_window_overlap": lambda *x: hh_time_window_overlap(state, *x), - } - - expressions.assign_columns( - state, - df=choosers, - model_settings=preprocessor_settings, - locals_dict=locals_dict, - trace_label=trace_label, - ) - estimator = estimation.manager.begin_estimation( state, "joint_tour_frequency_composition" ) @@ -103,6 +71,32 @@ def joint_tour_frequency_composition( constants = config.get_model_constants(model_settings) + # preprocess choosers table + locals_dict = { + "persons": persons, + "hh_time_window_overlap": lambda *x: hh_time_window_overlap(state, *x), + } + locals_dict.update(constants) + expressions.annotate_preprocessors( + state, + df=choosers, + locals_dict=locals_dict, + skims=None, + model_settings=model_settings, + trace_label=trace_label, + ) + + # preprocess alternatives table + expressions.annotate_preprocessors( + state, + df=alts, + locals_dict=constants, + skims=None, + model_settings=model_settings, + trace_label=trace_label, + preprocessor_setting_name="ALTS_PREPROCESSOR", + ) + if estimator: estimator.write_spec(model_settings) estimator.write_model_settings(model_settings, model_settings_file_name) @@ -224,11 +218,10 @@ def joint_tour_frequency_composition( slicer="household_id", ) - if model_settings.annotate_tours: - annotate.annotate_tours(state, model_settings, trace_label) - - if model_settings.annotate_households: - annotate.annotate_households(state, model_settings, trace_label) - - if model_settings.annotate_persons: - annotate.annotate_persons(state, model_settings, trace_label) \ No newline at end of file + expressions.annotate_tables( + state, + locals_dict=locals_dict, + skims=None, + model_settings=model_settings, + trace_label=trace_label, + ) diff --git a/activitysim/abm/models/joint_tour_participation.py b/activitysim/abm/models/joint_tour_participation.py index 55d5367b3c..4e834fc628 100644 --- a/activitysim/abm/models/joint_tour_participation.py +++ b/activitysim/abm/models/joint_tour_participation.py @@ -274,22 +274,6 @@ def participants_chooser( return choices, rands -def annotate_jtp( - state: workflow.State, - model_settings: JointTourParticipationSettings, - trace_label: str, -): - # - annotate persons - persons = state.get_dataframe("persons") - expressions.assign_columns( - state, - df=persons, - model_settings=model_settings.annotate_persons, - trace_label=tracing.extend_trace_label(trace_label, "annotate_persons"), - ) - state.add_table("persons", persons) - - def add_null_results( state: workflow.State, model_settings: JointTourParticipationSettings, @@ -305,7 +289,13 @@ def add_null_results( state.add_table("joint_tour_participants", participants) # - run annotations - annotate_jtp(state, model_settings, trace_label) + expressions.annotate_tables( + state, + locals_dict={}, + skims=None, + model_settings=model_settings, + trace_label=trace_label, + ) class JointTourParticipationSettings(LogitComponentSettings, extra="forbid"): @@ -313,12 +303,6 @@ class JointTourParticipationSettings(LogitComponentSettings, extra="forbid"): Settings for the `joint_tour_participation` component. """ - preprocessor: PreprocessorSettings | None = None - """Setting for the preprocessor.""" - - annotate_persons: PreprocessorSettings | None = None - """Instructions for annotating the persons table.""" - participation_choice: str = "participate" max_participation_choice_iterations: int = 5000 @@ -362,25 +346,6 @@ def joint_tour_participation( "Running joint_tours_participation with %d potential participants (candidates)" % candidates.shape[0] ) - - # - preprocessor - preprocessor_settings = model_settings.preprocessor - if preprocessor_settings: - locals_dict = { - "person_time_window_overlap": lambda x: person_time_window_overlap( - state, x - ), - "persons": persons_merged, - } - - expressions.assign_columns( - state, - df=candidates, - model_settings=preprocessor_settings, - locals_dict=locals_dict, - trace_label=trace_label, - ) - # - simple_simulate estimator = estimation.manager.begin_estimation(state, "joint_tour_participation") @@ -394,6 +359,21 @@ def joint_tour_participation( nest_spec = config.get_logit_model_settings(model_settings) constants = config.get_model_constants(model_settings) + # preprocess choosers table + locals_dict = { + "persons": persons_merged, + "person_time_window_overlap": lambda x: person_time_window_overlap(state, x), + } + locals_dict.update(constants) + expressions.annotate_preprocessors( + state, + df=candidates, + locals_dict=locals_dict, + skims=None, + model_settings=model_settings, + trace_label=trace_label, + ) + if estimator: estimator.write_model_settings(model_settings, model_settings_file_name) estimator.write_spec(model_settings) @@ -498,9 +478,6 @@ def joint_tour_participation( state.add_table("tours", tours) - # - run annotations - annotate_jtp(state, model_settings, trace_label) - if trace_hh_id: state.tracing.trace_df( participants, label="joint_tour_participation.participants" @@ -509,3 +486,11 @@ def joint_tour_participation( state.tracing.trace_df( joint_tours, label="joint_tour_participation.joint_tours" ) + + expressions.annotate_tables( + state, + locals_dict=locals_dict, + skims=None, + model_settings=model_settings, + trace_label=trace_label, + ) diff --git a/activitysim/abm/models/joint_tour_scheduling.py b/activitysim/abm/models/joint_tour_scheduling.py index 8c7fd054d9..791dd7aaf4 100644 --- a/activitysim/abm/models/joint_tour_scheduling.py +++ b/activitysim/abm/models/joint_tour_scheduling.py @@ -22,24 +22,10 @@ from activitysim.core.configuration.logit import LogitComponentSettings from activitysim.core.util import assign_in_place, reindex -from .util import annotate logger = logging.getLogger(__name__) -# class JointTourSchedulingSettings(LogitComponentSettings, extra="forbid"): -# """ -# Settings for the `joint_tour_scheduling` component. -# """ -# -# preprocessor: PreprocessorSettings | None = None -# """Setting for the preprocessor.""" -# -# sharrow_skip: bool = False -# """Setting to skip sharrow""" -# - - @workflow.step def joint_tour_scheduling( state: workflow.State, @@ -87,21 +73,18 @@ def joint_tour_scheduling( constants = config.get_model_constants(model_settings) # - run preprocessor to annotate choosers - preprocessor_settings = model_settings.preprocessor - if preprocessor_settings: - locals_d = {} - if constants is not None: - locals_d.update(constants) - - expressions.assign_columns( - state, - df=joint_tours, - model_settings=preprocessor_settings, - locals_dict=locals_d, - trace_label=trace_label, - ) - timetable = state.get_injectable("timetable") + locals_d = {"timetable": timetable} + locals_d.update(constants) + + expressions.annotate_preprocessors( + state, + df=joint_tours, + locals_dict=locals_d, + skims=None, + model_settings=model_settings, + trace_label=trace_label, + ) estimator = estimation.manager.begin_estimation(state, "joint_tour_scheduling") @@ -175,11 +158,10 @@ def joint_tour_scheduling( joint_tours, label="joint_tour_scheduling", slicer="household_id" ) - if model_settings.annotate_households: - annotate.annotate_households(state, model_settings, trace_label) - - if model_settings.annotate_persons: - annotate.annotate_persons(state, model_settings, trace_label) - - if model_settings.annotate_tours: - annotate.annotate_tours(state, model_settings, trace_label) + expressions.annotate_tables( + state, + locals_dict=locals_d, + skims=None, + model_settings=model_settings, + trace_label=trace_label, + ) diff --git a/activitysim/abm/models/location_choice.py b/activitysim/abm/models/location_choice.py index 83e794b2be..f634bb3fdb 100644 --- a/activitysim/abm/models/location_choice.py +++ b/activitysim/abm/models/location_choice.py @@ -163,6 +163,27 @@ def _location_sample( } locals_d.update(model_settings.CONSTANTS or {}) + # preprocess choosers table + expressions.annotate_preprocessors( + state, + df=choosers, + locals_dict=locals_d, + skims=skims, + model_settings=model_settings, + trace_label=trace_label, + ) + + # preprocess alternatives table + expressions.annotate_preprocessors( + state, + df=alternatives, + locals_dict=locals_d, + skims=None, + model_settings=model_settings, + trace_label=trace_label, + preprocessor_setting_name="alts_preprocessor_sample", + ) + spec = simulate.spec_for_segment( state, None, @@ -664,6 +685,27 @@ def run_location_simulate( } locals_d.update(model_settings.CONSTANTS or {}) + # preprocess choosers table + expressions.annotate_preprocessors( + state, + df=choosers, + locals_dict=locals_d, + skims=None, # skims included in locals_d + model_settings=model_settings, + trace_label=trace_label, + ) + + # preprocess alternatives table + expressions.annotate_preprocessors( + state, + df=alternatives, + locals_dict=locals_d, + skims=None, + model_settings=model_settings, + trace_label=trace_label, + preprocessor_setting_name="alts_preprocessor_sample", + ) + if estimator: # write choosers after annotation estimator.write_choosers(choosers) @@ -1117,33 +1159,18 @@ def iterate_location_choice( ) state.extend_table(sample_table_name, save_sample_df) - # - annotate persons table - if model_settings.annotate_persons: - expressions.assign_columns( - state, - df=persons_df, - model_settings=model_settings.annotate_persons, - trace_label=tracing.extend_trace_label(trace_label, "annotate_persons"), - ) - - state.add_table("persons", persons_df) + state.add_table("persons", persons_df) - if state.settings.trace_hh_id: - state.tracing.trace_df(persons_df, label=trace_label, warn_if_empty=True) + if state.settings.trace_hh_id: + state.tracing.trace_df(persons_df, label=trace_label, warn_if_empty=True) - # - annotate households table - if model_settings.annotate_households: - households_df = households - expressions.assign_columns( - state, - df=households_df, - model_settings=model_settings.annotate_households, - trace_label=tracing.extend_trace_label(trace_label, "annotate_households"), - ) - state.add_table("households", households_df) - - if state.settings.trace_hh_id: - state.tracing.trace_df(households_df, label=trace_label, warn_if_empty=True) + expressions.annotate_tables( + state, + locals_dict={}, + skims=None, + model_settings=model_settings, + trace_label=trace_label, + ) if dc_logsum_column_name: tracing.print_summary( diff --git a/activitysim/abm/models/mandatory_scheduling.py b/activitysim/abm/models/mandatory_scheduling.py index 64fc26215f..a8cb46ecca 100644 --- a/activitysim/abm/models/mandatory_scheduling.py +++ b/activitysim/abm/models/mandatory_scheduling.py @@ -8,8 +8,9 @@ from activitysim.abm.models.util.tour_scheduling import run_tour_scheduling from activitysim.core import timetable as tt -from activitysim.core import tracing, workflow +from activitysim.core import tracing, workflow, expressions from activitysim.core.util import assign_in_place, reindex +from activitysim.abm.models.util.vectorize_tour_scheduling import TourSchedulingSettings logger = logging.getLogger(__name__) @@ -30,6 +31,12 @@ def mandatory_tour_scheduling( model_name = "mandatory_tour_scheduling" trace_label = model_name + model_settings = TourSchedulingSettings.read_settings_file( + state.filesystem, + f"{model_name}.yaml", + mandatory=False, + ) + mandatory_tours = tours[tours.tour_category == "mandatory"] # - if no mandatory_tours @@ -55,11 +62,12 @@ def mandatory_tour_scheduling( choices = run_tour_scheduling( state, - model_name, + model_settings, mandatory_tours, persons_merged, tdd_alts, tour_segment_col, + trace_label, ) assign_in_place( @@ -86,3 +94,11 @@ def mandatory_tour_scheduling( columns=None, warn_if_empty=True, ) + + expressions.annotate_tables( + state, + locals_dict={}, + skims=None, + model_settings=model_settings, + trace_label=trace_label, + ) diff --git a/activitysim/abm/models/mandatory_tour_frequency.py b/activitysim/abm/models/mandatory_tour_frequency.py index 8ab69710f1..f495f1c2b2 100644 --- a/activitysim/abm/models/mandatory_tour_frequency.py +++ b/activitysim/abm/models/mandatory_tour_frequency.py @@ -95,19 +95,6 @@ def mandatory_tour_frequency( add_null_results(state, trace_label, model_settings) return - # - preprocessor - preprocessor_settings = model_settings.preprocessor - if preprocessor_settings: - locals_dict = {} - - expressions.assign_columns( - state, - df=choosers, - model_settings=preprocessor_settings, - locals_dict=locals_dict, - trace_label=trace_label, - ) - estimator = estimation.manager.begin_estimation(state, "mandatory_tour_frequency") model_spec = state.filesystem.read_model_spec(file_name=model_settings.SPEC) @@ -119,6 +106,16 @@ def mandatory_tour_frequency( nest_spec = config.get_logit_model_settings(model_settings) constants = config.get_model_constants(model_settings) + # - preprocessor + expressions.annotate_preprocessors( + state, + df=choosers, + locals_dict=constants, + skims=None, + model_settings=model_settings, + trace_label=trace_label, + ) + if estimator: estimator.write_spec(model_settings) estimator.write_model_settings(model_settings, model_settings_file_name) @@ -183,13 +180,6 @@ def mandatory_tour_frequency( # need to reindex as we only handled persons with cdap_activity == 'M' persons["mandatory_tour_frequency"] = choices.reindex(persons.index).fillna("") - expressions.assign_columns( - state, - df=persons, - model_settings=model_settings.annotate_persons, - trace_label=tracing.extend_trace_label(trace_label, "annotate_persons"), - ) - state.add_table("persons", persons) tracing.print_summary( @@ -206,3 +196,11 @@ def mandatory_tour_frequency( state.tracing.trace_df( persons, label="mandatory_tour_frequency.persons", warn_if_empty=True ) + + expressions.annotate_tables( + state, + locals_dict=constants, + skims=None, + model_settings=model_settings, + trace_label=trace_label, + ) diff --git a/activitysim/abm/models/non_mandatory_destination.py b/activitysim/abm/models/non_mandatory_destination.py index 496c734cdd..b0c6ea3fb1 100644 --- a/activitysim/abm/models/non_mandatory_destination.py +++ b/activitysim/abm/models/non_mandatory_destination.py @@ -6,8 +6,8 @@ import pandas as pd -from activitysim.abm.models.util import annotate, tour_destination -from activitysim.core import estimation, los, tracing, workflow +from activitysim.abm.models.util import tour_destination +from activitysim.core import estimation, los, tracing, workflow, expressions from activitysim.core.configuration.logit import TourLocationComponentSettings from activitysim.core.util import assign_in_place @@ -129,9 +129,6 @@ def non_mandatory_tour_destination( state.add_table("tours", tours) - if model_settings.annotate_tours: - annotate.annotate_tours(state, model_settings, trace_label) - if want_sample_table: assert len(save_sample_df.index.get_level_values(0).unique()) == len(choices_df) # save_sample_df.set_index(model_settings['ALT_DEST_COL_NAME'], append=True, inplace=True) @@ -146,3 +143,11 @@ def non_mandatory_tour_destination( columns=None, warn_if_empty=True, ) + + expressions.annotate_tables( + state, + locals_dict={}, + skims=None, + model_settings=model_settings, + trace_label=trace_label, + ) diff --git a/activitysim/abm/models/non_mandatory_scheduling.py b/activitysim/abm/models/non_mandatory_scheduling.py index d6670ad86f..bbb09f7f57 100644 --- a/activitysim/abm/models/non_mandatory_scheduling.py +++ b/activitysim/abm/models/non_mandatory_scheduling.py @@ -8,8 +8,9 @@ from activitysim.abm.models.util.tour_scheduling import run_tour_scheduling from activitysim.core import timetable as tt -from activitysim.core import tracing, workflow +from activitysim.core import tracing, workflow, expressions from activitysim.core.util import assign_in_place +from activitysim.abm.models.util.vectorize_tour_scheduling import TourSchedulingSettings logger = logging.getLogger(__name__) DUMP = False @@ -29,6 +30,13 @@ def non_mandatory_tour_scheduling( model_name = "non_mandatory_tour_scheduling" trace_label = model_name trace_hh_id = state.settings.trace_hh_id + + model_settings = TourSchedulingSettings.read_settings_file( + state.filesystem, + f"{model_name}.yaml", + mandatory=False, + ) + non_mandatory_tours = tours[tours.tour_category == "non_mandatory"] # - if no mandatory_tours @@ -40,11 +48,12 @@ def non_mandatory_tour_scheduling( choices = run_tour_scheduling( state, - model_name, + model_settings, non_mandatory_tours, persons_merged, tdd_alts, tour_segment_col, + trace_label, ) assign_in_place( @@ -72,11 +81,10 @@ def non_mandatory_tour_scheduling( warn_if_empty=True, ) - if model_settings.annotate_households: - annotate.annotate_households(state, model_settings, trace_label) - - if model_settings.annotate_persons: - annotate.annotate_persons(state, model_settings, trace_label) - - if model_settings.annotate_tours: - annotate.annotate_tours(state, model_settings, trace_label) + expressions.annotate_tables( + state, + locals_dict={}, + skims=None, + model_settings=model_settings, + trace_label=trace_label, + ) diff --git a/activitysim/abm/models/non_mandatory_tour_frequency.py b/activitysim/abm/models/non_mandatory_tour_frequency.py index 69b0524d42..175a4b1bfa 100644 --- a/activitysim/abm/models/non_mandatory_tour_frequency.py +++ b/activitysim/abm/models/non_mandatory_tour_frequency.py @@ -10,7 +10,6 @@ import numpy as np import pandas as pd -from activitysim.abm.models.util import annotate from activitysim.abm.models.util.overlap import ( person_available_periods, person_max_window, @@ -28,7 +27,7 @@ tracing, workflow, ) -from activitysim.core.configuration.base import PreprocessorSettings, PydanticReadable +from activitysim.core.configuration.base import PydanticReadable, PreprocessorSettings from activitysim.core.configuration.logit import LogitComponentSettings from activitysim.core.interaction_simulate import interaction_simulate @@ -166,27 +165,21 @@ class NonMandatoryTourFrequencySettings(LogitComponentSettings, extra="forbid"): Settings for the `non_mandatory_tour_frequency` component. """ - preprocessor: PreprocessorSettings | None = None - """Setting for the preprocessor.""" - SEGMENT_COL: str = "ptype" # not used anymore TODO remove if needed SPEC_SEGMENTS: list[NonMandatoryTourSpecSegment] = [] # check the above - annotate_persons: PreprocessorSettings | None = None - """Preprocessor settings to annotate persons""" - - annotate_tours: PreprocessorSettings | None = None - """Preprocessor settings to annotate tours""" - explicit_chunk: float = 0 """ If > 0, use this chunk size instead of adaptive chunking. If less than 1, use this fraction of the total number of rows. """ + alts_preprocessor: PreprocessorSettings | None = None + """Settings for the alternatives preprocessor.""" + @workflow.step def non_mandatory_tour_frequency( @@ -233,27 +226,36 @@ def non_mandatory_tour_frequency( choosers = persons_merged choosers = choosers[choosers.cdap_activity.isin(["M", "N"])] - # - preprocessor - preprocessor_settings = model_settings.preprocessor - if preprocessor_settings: - locals_dict = { - "person_max_window": lambda x: person_max_window(state, x), - "person_available_periods": lambda persons, start_bin, end_bin, continuous: person_available_periods( - state, persons, start_bin, end_bin, continuous - ), - } - - expressions.assign_columns( - state, - df=choosers, - model_settings=preprocessor_settings, - locals_dict=locals_dict, - trace_label=trace_label, - ) - logger.info("Running non_mandatory_tour_frequency with %d persons", len(choosers)) - + # preprocessing choosers constants = config.get_model_constants(model_settings) + locals_dict = { + "person_max_window": lambda x: person_max_window(state, x), + "person_available_periods": lambda persons, start_bin, end_bin, continuous: person_available_periods( + state, persons, start_bin, end_bin, continuous + ), + } + locals_dict.update(constants) + + expressions.annotate_preprocessors( + state, + df=choosers, + locals_dict=locals_dict, + skims=None, + model_settings=model_settings, + trace_label=trace_label, + ) + + # preprocessing alternatives + expressions.annotate_preprocessors( + state, + df=alternatives, + locals_dict=constants, + skims=None, + model_settings=model_settings, + trace_label=trace_label, + preprocessor_setting_name="alts_preprocessor", + ) model_spec = state.filesystem.read_model_spec(file_name=model_settings.SPEC) spec_segments = model_settings.SPEC_SEGMENTS @@ -479,16 +481,6 @@ def non_mandatory_tour_frequency( # need to re-compute tour frequency statistics to account for school escort tours recompute_tour_count_statistics(state) - if model_settings.annotate_tours: - annotate.annotate_tours(state, model_settings, trace_label) - - expressions.assign_columns( - state, - df=persons, - model_settings=model_settings.annotate_persons, - trace_label=trace_label, - ) - state.add_table("persons", persons) tracing.print_summary( @@ -513,3 +505,11 @@ def non_mandatory_tour_frequency( label="non_mandatory_tour_frequency.annotated_persons", warn_if_empty=True, ) + + expressions.annotate_tables( + state, + locals_dict=constants, + skims=None, + model_settings=model_settings, + trace_label=trace_label, + ) diff --git a/activitysim/abm/models/parking_location_choice.py b/activitysim/abm/models/parking_location_choice.py index 674b950aaa..995229b69f 100644 --- a/activitysim/abm/models/parking_location_choice.py +++ b/activitysim/abm/models/parking_location_choice.py @@ -3,6 +3,7 @@ from __future__ import annotations import logging +import warnings from pathlib import Path import numpy as np @@ -325,7 +326,14 @@ class ParkingLocationSettings(LogitComponentSettings, extra="forbid"): """The school escort model does not use this setting, see `SPECIFICATION`.""" PREPROCESSOR: PreprocessorSettings | None = None - """Setting for the preprocessor.""" + """Setting for the preprocessor. + Runs before the choosers are filtered by the CHOOSER_FILTER_COLUMN_NAME. + Deprecated name -- use `preprocessor` instead. + """ + + alts_preprocessor: PreprocessorSettings | None = None + """Setting for the alternatives (aka landuse zones) preprocessor. + Runs before the alternatives are filtered by the ALTERNATIVE_FILTER_COLUMN_NAME.""" ALT_DEST_COL_NAME: str = "parking_zone" """Parking destination column name.""" @@ -362,6 +370,19 @@ class ParkingLocationSettings(LogitComponentSettings, extra="forbid"): If less than 1, use this fraction of the total number of rows. """ + def __init__(self, **data): + # Handle deprecated ALTS_PREPROCESSOR + if "PREPROCESSOR" in data: + warnings.warn( + "The 'PREPROCESSOR' setting is deprecated. Please use 'preprocessor' (lowercase) instead.", + DeprecationWarning, + stacklevel=2, + ) + # If both are provided, prefer the lowercase version + if "preprocessor" not in data: + data["preprocessor"] = data["PREPROCESSOR"] + super().__init__(**data) + @workflow.step def parking_location( @@ -388,8 +409,6 @@ def parking_location( trace_hh_id = state.settings.trace_hh_id alt_destination_col_name = model_settings.ALT_DEST_COL_NAME - preprocessor_settings = model_settings.PREPROCESSOR - trips_df = trips trips_merged_df = trips_merged land_use_df = land_use @@ -416,14 +435,28 @@ def parking_location( if constants is not None: locals_dict.update(constants) - if preprocessor_settings: - expressions.assign_columns( - state, - df=trips_merged_df, - model_settings=preprocessor_settings, - locals_dict=locals_dict, - trace_label=trace_label, - ) + # putting preprocessor and alts preprocessor here so that they are run before + # the filter columns are applied so the user can use the preprocessor to add filter + # preprocessing choosers + expressions.annotate_preprocessors( + state, + df=trips_merged_df, + locals_dict=locals_dict, + skims=None, + model_settings=model_settings, + trace_label=trace_label, + ) + + # preprocessing alternatives + expressions.annotate_preprocessors( + state, + df=land_use_df, + locals_dict=locals_dict, + skims=None, + model_settings=model_settings, + trace_label=trace_label, + preprocessor_setting_name="alts_preprocessor", + ) parking_locations, save_sample_df = run_parking_destination( state, @@ -467,3 +500,11 @@ def parking_location( if state.is_table(sample_table_name): raise RuntimeError("sample table %s already exists" % sample_table_name) state.extend_table(sample_table_name, save_sample_df) + + expressions.annotate_tables( + state, + locals_dict=locals_dict, + skims=None, + model_settings=model_settings, + trace_label=trace_label, + ) diff --git a/activitysim/abm/models/school_escorting.py b/activitysim/abm/models/school_escorting.py index a0cf6a3312..e4955dc039 100644 --- a/activitysim/abm/models/school_escorting.py +++ b/activitysim/abm/models/school_escorting.py @@ -319,9 +319,6 @@ class SchoolEscortSettings(BaseLogitComponentSettings, extra="forbid"): Settings for the `telecommute_frequency` component. """ - preprocessor: PreprocessorSettings | None = None - """Setting for the preprocessor.""" - ALTS: Any NUM_ESCORTEES: int = 3 @@ -353,6 +350,8 @@ class SchoolEscortSettings(BaseLogitComponentSettings, extra="forbid"): preprocessor_outbound: PreprocessorSettings | None = None preprocessor_inbound: PreprocessorSettings | None = None preprocessor_outbound_cond: PreprocessorSettings | None = None + alts_preprocessor: PreprocessorSettings | None = None + """Preprocessor settings for the school escorting model alternatives.""" no_escorting_alterative: int = 1 """The alternative number for no escorting. Used to set the choice for households with no escortees.""" @@ -428,6 +427,16 @@ def school_escorting( constants = config.get_model_constants(model_settings) locals_dict = {} locals_dict.update(constants) + # alternatives preprocessor + expressions.annotate_preprocessors( + state, + df=choosers, + locals_dict=constants, + skims=None, + model_settings=model_settings, + trace_label=trace_label, + preprocessor_setting_name="alts_preprocessor", + ) school_escorting_stages = ["outbound", "inbound", "outbound_cond"] escort_bundles = [] @@ -476,15 +485,16 @@ def school_escorting( logger.info("Running %s with %d households", stage_trace_label, len(choosers)) - preprocessor_settings = getattr(model_settings, "preprocessor_" + stage, None) - if preprocessor_settings: - expressions.assign_columns( - state, - df=choosers, - model_settings=preprocessor_settings, - locals_dict=locals_dict, - trace_label=stage_trace_label, - ) + preprocessor_setting_name = "preprocessor_" + stage + expressions.annotate_preprocessors( + state, + df=choosers, + locals_dict=locals_dict, + skims=None, + model_settings=model_settings, + trace_label=trace_label, + preprocessor_setting_name=preprocessor_setting_name, + ) if estimator: estimator.write_model_settings(model_settings, model_settings_file_name) @@ -655,3 +665,11 @@ def school_escorting( ) timetable.replace_table(state) + + expressions.annotate_tables( + state, + locals_dict=constants, + skims=None, + model_settings=model_settings, + trace_label=trace_label, + ) diff --git a/activitysim/abm/models/stop_frequency.py b/activitysim/abm/models/stop_frequency.py index 2f0253f219..ec1d4961d1 100644 --- a/activitysim/abm/models/stop_frequency.py +++ b/activitysim/abm/models/stop_frequency.py @@ -134,21 +134,15 @@ def stop_frequency( simulate.set_skim_wrapper_targets(tours_merged, skims) # this should be pre-slice as some expressions may count tours by type - annotations = expressions.compute_columns( + expressions.annotate_preprocessors( state, df=tours_merged, - model_settings=preprocessor_settings, locals_dict=locals_dict, + skims=None, # skims are already set on tours_merged above + model_settings=model_settings, trace_label=trace_label, ) - assign_in_place( - tours_merged, - annotations, - state.settings.downcast_int, - state.settings.downcast_float, - ) - tracing.print_summary( "stop_frequency segments", tours_merged.primary_purpose, value_counts=True ) @@ -304,10 +298,6 @@ def stop_frequency( trips, label="stop_frequency.trips", slicer="person_id", columns=None ) - state.tracing.trace_df( - annotations, label="stop_frequency.annotations", columns=None - ) - state.tracing.trace_df( tours_merged, label="stop_frequency.tours_merged", @@ -317,3 +307,11 @@ def stop_frequency( if state.is_table("school_escort_trips"): school_escort_tours_trips.merge_school_escort_trips_into_pipeline(state) + + expressions.annotate_tables( + state, + locals_dict=constants, + skims=None, + model_settings=model_settings, + trace_label=trace_label, + ) diff --git a/activitysim/abm/models/telecommute_frequency.py b/activitysim/abm/models/telecommute_frequency.py index f98791a2a2..195335258a 100755 --- a/activitysim/abm/models/telecommute_frequency.py +++ b/activitysim/abm/models/telecommute_frequency.py @@ -61,20 +61,15 @@ def telecommute_frequency( constants = config.get_model_constants(model_settings) - # - preprocessor - preprocessor_settings = model_settings.preprocessor - if preprocessor_settings: - locals_d = {} - if constants is not None: - locals_d.update(constants) - - expressions.assign_columns( - state, - df=choosers, - model_settings=preprocessor_settings, - locals_dict=locals_d, - trace_label=trace_label, - ) + # choosers preprocessor + expressions.annotate_preprocessors( + state, + df=choosers, + locals_dict=constants, + skims=None, + model_settings=model_settings, + trace_label=trace_label, + ) model_spec = state.filesystem.read_model_spec(file_name=model_settings.SPEC) coefficients_df = state.filesystem.read_model_coefficients(model_settings) @@ -127,3 +122,11 @@ def telecommute_frequency( if state.settings.trace_hh_id: state.tracing.trace_df(persons, label=trace_label, warn_if_empty=True) + + expressions.annotate_tables( + state, + locals_dict=constants, + skims=None, + model_settings=model_settings, + trace_label=trace_label, + ) diff --git a/activitysim/abm/models/tour_mode_choice.py b/activitysim/abm/models/tour_mode_choice.py index befcb0d7a3..d2052f099a 100644 --- a/activitysim/abm/models/tour_mode_choice.py +++ b/activitysim/abm/models/tour_mode_choice.py @@ -7,9 +7,18 @@ import numpy as np import pandas as pd -from activitysim.abm.models.util import annotate, school_escort_tours_trips, trip +from activitysim.abm.models.util import school_escort_tours_trips, trip from activitysim.abm.models.util.mode import run_tour_mode_choice_simulate -from activitysim.core import config, estimation, logit, los, simulate, tracing, workflow +from activitysim.core import ( + config, + estimation, + logit, + los, + simulate, + tracing, + workflow, + expressions, +) from activitysim.core.configuration.logit import TourModeComponentSettings from activitysim.core.util import assign_in_place, reindex @@ -450,10 +459,6 @@ def tour_mode_choice_simulate( state.add_table("tours", all_tours) - # - annotate tours table - if model_settings.annotate_tours: - annotate.annotate_tours(state, model_settings, trace_label) - if state.settings.trace_hh_id: state.tracing.trace_df( primary_tours, @@ -462,3 +467,11 @@ def tour_mode_choice_simulate( index_label="tour_id", warn_if_empty=True, ) + + expressions.annotate_tables( + state, + locals_dict=constants, + skims=skims, + model_settings=model_settings, + trace_label=trace_label, + ) diff --git a/activitysim/abm/models/tour_od_choice.py b/activitysim/abm/models/tour_od_choice.py index b518b36f97..ad9f527d75 100644 --- a/activitysim/abm/models/tour_od_choice.py +++ b/activitysim/abm/models/tour_od_choice.py @@ -7,7 +7,7 @@ import pandas as pd from activitysim.abm.models.util import tour_od -from activitysim.core import estimation, los, workflow +from activitysim.core import estimation, los, workflow, expressions logger = logging.getLogger(__name__) @@ -147,3 +147,11 @@ def tour_od_choice( columns=None, warn_if_empty=True, ) + + expressions.annotate_tables( + state, + locals_dict={}, + skims=None, + model_settings=model_settings, + trace_label=trace_label, + ) diff --git a/activitysim/abm/models/tour_scheduling_probabilistic.py b/activitysim/abm/models/tour_scheduling_probabilistic.py index 324db45667..ac8ac2632a 100644 --- a/activitysim/abm/models/tour_scheduling_probabilistic.py +++ b/activitysim/abm/models/tour_scheduling_probabilistic.py @@ -175,3 +175,11 @@ def tour_scheduling_probabilistic( assert not tours_df["duration"].isnull().any() state.add_table("tours", tours_df) + + expressions.annotate_tables( + state, + locals_dict=constants, + skims=None, + model_settings=model_settings, + trace_label=trace_label, + ) diff --git a/activitysim/abm/models/transit_pass_ownership.py b/activitysim/abm/models/transit_pass_ownership.py index 50568182e4..19ba883212 100644 --- a/activitysim/abm/models/transit_pass_ownership.py +++ b/activitysim/abm/models/transit_pass_ownership.py @@ -17,7 +17,6 @@ from activitysim.core.configuration.base import PreprocessorSettings from activitysim.core.configuration.logit import LogitComponentSettings -from .util import annotate logger = logging.getLogger("activitysim") @@ -27,12 +26,8 @@ class TransitPassOwnershipSettings(LogitComponentSettings, extra="forbid"): Settings for the `transit_pass_ownership` component. """ - preprocessor: PreprocessorSettings | None = None - """Setting for the preprocessor.""" - - annotate_households: PreprocessorSettings | None = None - - annotate_persons: PreprocessorSettings | None = None + # no additional fields are required for this component + pass @workflow.step @@ -62,19 +57,14 @@ def transit_pass_ownership( constants = config.get_model_constants(model_settings) # - preprocessor - preprocessor_settings = model_settings.preprocessor - if preprocessor_settings: - locals_d = {} - if constants is not None: - locals_d.update(constants) - - expressions.assign_columns( - state, - df=choosers, - model_settings=preprocessor_settings, - locals_dict=locals_d, - trace_label=trace_label, - ) + expressions.annotate_preprocessors( + state, + df=choosers, + locals_dict=constants, + skims=None, + model_settings=model_settings, + trace_label=trace_label, + ) model_spec = state.filesystem.read_model_spec(file_name=model_settings.SPEC) coefficients_df = state.filesystem.read_model_coefficients(model_settings) @@ -121,8 +111,10 @@ def transit_pass_ownership( if state.settings.trace_hh_id: state.tracing.trace_df(persons, label=trace_label, warn_if_empty=True) - if model_settings.annotate_households: - annotate.annotate_households(state, model_settings, trace_label) - - if model_settings.annotate_persons: - annotate.annotate_persons(state, model_settings, trace_label) + expressions.annotate_tables( + state, + locals_dict=constants, + skims=None, + model_settings=model_settings, + trace_label=trace_label, + ) diff --git a/activitysim/abm/models/transit_pass_subsidy.py b/activitysim/abm/models/transit_pass_subsidy.py index b162230925..328b499929 100644 --- a/activitysim/abm/models/transit_pass_subsidy.py +++ b/activitysim/abm/models/transit_pass_subsidy.py @@ -17,7 +17,6 @@ from activitysim.core.configuration.base import PreprocessorSettings, PydanticReadable from activitysim.core.configuration.logit import LogitComponentSettings -from .util import annotate logger = logging.getLogger("activitysim") @@ -27,13 +26,6 @@ class TransitPassSubsidySettings(LogitComponentSettings, extra="forbid"): Settings for the `transit_pass_subsidy` component. """ - preprocessor: PreprocessorSettings | None = None - """Setting for the preprocessor.""" - - annotate_households: PreprocessorSettings | None = None - - annotate_persons: PreprocessorSettings | None = None - CHOOSER_FILTER_COLUMN_NAME: str | None = None """Column name which selects choosers. If None, all persons are choosers.""" @@ -62,20 +54,15 @@ def transit_pass_subsidy( constants = config.get_model_constants(model_settings) - # - preprocessor - preprocessor_settings = model_settings.preprocessor - if preprocessor_settings: - locals_d = {} - if constants is not None: - locals_d.update(constants) - - expressions.assign_columns( - state, - df=choosers, - model_settings=preprocessor_settings, - locals_dict=locals_d, - trace_label=trace_label, - ) + # - preprocessor, running before choosers are filtered so column can be created + expressions.annotate_preprocessors( + state, + df=choosers, + locals_dict=constants, + skims=None, + model_settings=model_settings, + trace_label=trace_label, + ) filter_col = model_settings.CHOOSER_FILTER_COLUMN_NAME if filter_col is not None: @@ -129,8 +116,10 @@ def transit_pass_subsidy( if state.settings.trace_hh_id: state.tracing.trace_df(persons, label=trace_label, warn_if_empty=True) - if model_settings.annotate_households: - annotate.annotate_households(state, model_settings, trace_label) - - if model_settings.annotate_persons: - annotate.annotate_persons(state, model_settings, trace_label) + expressions.annotate_tables( + state, + locals_dict=constants, + skims=None, + model_settings=model_settings, + trace_label=trace_label, + ) diff --git a/activitysim/abm/models/trip_departure_choice.py b/activitysim/abm/models/trip_departure_choice.py index 236a755dec..0e4dd05d9d 100644 --- a/activitysim/abm/models/trip_departure_choice.py +++ b/activitysim/abm/models/trip_departure_choice.py @@ -191,9 +191,21 @@ def choose_tour_leg_pattern( trace_label="trace_label", *, chunk_sizer: chunk.ChunkSizer, - compute_settings: ComputeSettings | None = None, + model_settings: TripDepartureChoiceSettings, ): alternatives = generate_alternatives(trip_segment, STOP_TIME_DURATION).sort_index() + + # preprocessing alternatives + expressions.annotate_preprocessors( + state, + df=alternatives, + locals_dict={}, + skims=None, + model_settings=model_settings, + trace_label=trace_label, + preprocessor_setting_name="alts_preprocessor", + ) + have_trace_targets = state.tracing.has_trace_targets(trip_segment) if have_trace_targets: @@ -245,7 +257,7 @@ def choose_tour_leg_pattern( trace_label, trace_rows, estimator=None, - compute_settings=compute_settings, + compute_settings=model_settings.compute_settings, ) interaction_utilities = pd.concat( @@ -402,7 +414,7 @@ def apply_stage_two_model( trips, chunk_size, trace_label: str, - compute_settings: ComputeSettings | None = None, + model_settings: TripDepartureChoiceSettings, ): if not trips.index.is_monotonic_increasing: trips = trips.sort_index() @@ -473,7 +485,7 @@ def apply_stage_two_model( spec, trace_label=segment_trace_label, chunk_sizer=chunk_sizer, - compute_settings=compute_settings, + model_settings=model_settings, ) choices = pd.merge( @@ -509,6 +521,9 @@ class TripDepartureChoiceSettings(PydanticCompute, extra="forbid"): PREPROCESSOR: PreprocessorSettings | None = None """Setting for the preprocessor.""" + alts_preprocessor: PreprocessorSettings | None = None + """Setting for the alternatives preprocessor.""" + SPECIFICATION: str = "trip_departure_choice.csv" """Filename for the trip departure choice (.csv) file.""" @@ -580,7 +595,7 @@ def trip_departure_choice( trips_merged_df, state.settings.chunk_size, trace_label, - compute_settings=model_settings.compute_settings, + model_settings=model_settings, ) trips_df = trips @@ -590,3 +605,11 @@ def trip_departure_choice( assert trips_df[trips_df["depart"].isnull()].empty state.add_table("trips", trips_df) + + expressions.annotate_tables( + state, + locals_dict={}, + skims=None, + model_settings=model_settings, + trace_label=trace_label, + ) diff --git a/activitysim/abm/models/trip_destination.py b/activitysim/abm/models/trip_destination.py index 2b6b5a2ff2..0e7cfb98dd 100644 --- a/activitysim/abm/models/trip_destination.py +++ b/activitysim/abm/models/trip_destination.py @@ -59,9 +59,6 @@ class TripDestinationSettings(LocationComponentSettings, extra="forbid"): PRIMARY_ORIGIN: str = "origin" PRIMARY_DEST: str = "tour_leg_dest" # must be created in preprocessor REDUNDANT_TOURS_MERGED_CHOOSER_COLUMNS: list[str] | None = None - preprocessor: PreprocessorSettings | None = None - alts_preprocessor_sample: PreprocessorSettings | None = None - alts_preprocessor_simulate: PreprocessorSettings | None = None CLEANUP: bool fail_some_trips_for_testing: bool = False """This setting is used by testing code to force failed trip_destination.""" @@ -203,14 +200,16 @@ def _destination_sample( log_alt_losers = state.settings.log_alt_losers - if model_settings.alts_preprocessor_sample: - expressions.assign_columns( - state, - df=alternatives, - model_settings=model_settings.alts_preprocessor_sample, - locals_dict=locals_dict, - trace_label=tracing.extend_trace_label(trace_label, "alts"), - ) + # preprocessing alternatives + expressions.annotate_preprocessors( + state, + df=alternatives, + locals_dict=locals_dict, + skims=skims, + model_settings=model_settings, + trace_label=trace_label, + preprocessor_setting_name="alts_preprocessor_sample", + ) choices = interaction_sample( state, @@ -951,14 +950,16 @@ def trip_destination_simulate( ) locals_dict.update(skims) - if model_settings.alts_preprocessor_simulate: - expressions.assign_columns( - state, - df=destination_sample, - model_settings=model_settings.alts_preprocessor_simulate, - locals_dict=locals_dict, - trace_label=tracing.extend_trace_label(trace_label, "alts"), - ) + # preprocessing alternatives + expressions.annotate_preprocessors( + state, + df=destination_sample, + locals_dict=locals_dict, + skims=skims, + model_settings=model_settings, + trace_label=trace_label, + preprocessor_setting_name="alts_preprocessor_simulate", + ) log_alt_losers = state.settings.log_alt_losers destinations = interaction_sample_simulate( @@ -1383,15 +1384,15 @@ def run_trip_destination( } locals_dict.update(model_settings.CONSTANTS) - # - annotate nth_trips - if preprocessor_settings: - expressions.assign_columns( - state, - df=nth_trips, - model_settings=preprocessor_settings, - locals_dict=locals_dict, - trace_label=nth_trace_label, - ) + # preprocessing choosers + expressions.annotate_preprocessors( + state, + df=nth_trips, + locals_dict=locals_dict, + skims=None, + model_settings=model_settings, + trace_label=trace_label, + ) if isinstance( nth_trips["trip_period"].dtype, pd.api.types.CategoricalDtype @@ -1677,3 +1678,11 @@ def trip_destination( if state.is_table(sample_table_name): raise RuntimeError("sample table %s already exists" % sample_table_name) state.extend_table(sample_table_name, save_sample_df) + + expressions.annotate_tables( + state, + locals_dict={}, + skims=None, + model_settings=model_settings, + trace_label=trace_label, + ) diff --git a/activitysim/abm/models/trip_mode_choice.py b/activitysim/abm/models/trip_mode_choice.py index b3dd0e7f4f..a942b7af84 100644 --- a/activitysim/abm/models/trip_mode_choice.py +++ b/activitysim/abm/models/trip_mode_choice.py @@ -8,7 +8,7 @@ import numpy as np import pandas as pd -from activitysim.abm.models.util import annotate, school_escort_tours_trips +from activitysim.abm.models.util import school_escort_tours_trips from activitysim.abm.models.util.mode import mode_choice_simulate from activitysim.core import ( chunk, @@ -32,9 +32,6 @@ class TripModeChoiceSettings(TemplatedLogitComponentSettings, extra="forbid"): Settings for the `trip_mode_choice` component. """ - preprocessor: PreprocessorSettings | None = None - """Setting for the preprocessor.""" - MODE_CHOICE_LOGSUM_COLUMN_NAME: str = "mode_choice_logsum" """Column name of the mode choice logsum""" @@ -49,8 +46,11 @@ class TripModeChoiceSettings(TemplatedLogitComponentSettings, extra="forbid"): use_TVPB_constants: bool = True FORCE_ESCORTEE_CHAUFFEUR_MODE_MATCH: bool = True - - annotate_trips: PreprocessorSettings | None = None + """ + If True, overwrite the trip mode of escortee trips to match the mode selected + by the chauffeur. This is useful for school escort tours where the escortee trip + mode (e.g., "transit") should match the chauffeur trip mode. + """ LEGACY_COEFFICIENTS: str | None = None @@ -370,15 +370,6 @@ def trip_mode_choice( state.add_table("trips", trips_df) - if model_settings.annotate_trips: - # need to update locals_dict to access skims that are the same .shape as trips table - locals_dict = {} - locals_dict.update(constants) - simulate.set_skim_wrapper_targets(trips_merged, skims) - locals_dict.update(skims) - locals_dict["timeframe"] = "trip" - annotate.annotate_trips(state, model_settings, trace_label, locals_dict) - if state.settings.trace_hh_id: state.tracing.trace_df( trips_df, @@ -387,3 +378,17 @@ def trip_mode_choice( index_label="trip_id", warn_if_empty=True, ) + + # need to update locals_dict to access skims that are the same .shape as trips table + locals_dict = {} + locals_dict.update(constants) + simulate.set_skim_wrapper_targets(trips_merged, skims) + locals_dict.update(skims) + locals_dict["timeframe"] = "trip" + expressions.annotate_tables( + state, + locals_dict=locals_dict, + skims=skims, + model_settings=model_settings, + trace_label=trace_label, + ) diff --git a/activitysim/abm/models/trip_purpose.py b/activitysim/abm/models/trip_purpose.py index 5f208f514d..616145ebe4 100644 --- a/activitysim/abm/models/trip_purpose.py +++ b/activitysim/abm/models/trip_purpose.py @@ -253,16 +253,14 @@ def run_trip_purpose( trips_df = trips_df[~last_trip] logger.info("assign purpose to %s intermediate trips", trips_df.shape[0]) - preprocessor_settings = model_settings.preprocessor - if preprocessor_settings: - locals_dict = config.get_model_constants(model_settings) - expressions.assign_columns( - state, - df=trips_df, - model_settings=preprocessor_settings, - locals_dict=locals_dict, - trace_label=trace_label, - ) + expressions.annotate_preprocessors( + state, + df=trips_df, + locals_dict=config.get_model_constants(model_settings), + skims=None, + model_settings=model_settings, + trace_label=trace_label, + ) use_depart_time = model_settings.use_depart_time @@ -359,3 +357,11 @@ def trip_purpose(state: workflow.State, trips: pd.DataFrame) -> None: index_label="trip_id", warn_if_empty=True, ) + + expressions.annotate_tables( + state, + locals_dict=locals_dict, + skims=None, + model_settings=model_settings, + trace_label=trace_label, + ) diff --git a/activitysim/abm/models/trip_purpose_and_destination.py b/activitysim/abm/models/trip_purpose_and_destination.py index 66443e0892..02b8a6cbfd 100644 --- a/activitysim/abm/models/trip_purpose_and_destination.py +++ b/activitysim/abm/models/trip_purpose_and_destination.py @@ -12,7 +12,7 @@ cleanup_failed_trips, flag_failed_trip_leg_mates, ) -from activitysim.core import estimation, tracing, workflow +from activitysim.core import estimation, tracing, workflow, expressions from activitysim.core.configuration.base import PydanticReadable from activitysim.core.util import assign_in_place @@ -261,3 +261,11 @@ def trip_purpose_and_destination( index_label="trip_id", warn_if_empty=True, ) + + expressions.annotate_tables( + state, + locals_dict={}, + skims=None, + model_settings=model_settings, + trace_label=trace_label, + ) diff --git a/activitysim/abm/models/trip_scheduling.py b/activitysim/abm/models/trip_scheduling.py index fa69567ec9..0e45d463dc 100644 --- a/activitysim/abm/models/trip_scheduling.py +++ b/activitysim/abm/models/trip_scheduling.py @@ -224,7 +224,6 @@ def schedule_trips_in_leg( failfix = model_settings.FAILFIX depart_alt_base = model_settings.DEPART_ALT_BASE scheduling_mode = model_settings.scheduling_mode - preprocessor_settings = model_settings.preprocessor probs_join_cols = model_settings.probs_join_cols if probs_join_cols is None: @@ -286,14 +285,14 @@ def schedule_trips_in_leg( nth_trace_label = tracing.extend_trace_label(trace_label, "num_%s" % i) # - annotate trips - if preprocessor_settings: - expressions.assign_columns( - state, - df=trips, - model_settings=preprocessor_settings, - locals_dict=locals_dict, - trace_label=nth_trace_label, - ) + expressions.annotate_preprocessors( + state, + df=trips, + locals_dict=locals_dict, + skims=None, + model_settings=model_settings, + trace_label=trace_label, + ) if ( outbound @@ -682,3 +681,11 @@ def trip_scheduling( assert not trips_df.depart.isnull().any() state.add_table("trips", trips_df) + + expressions.annotate_tables( + state, + locals_dict={}, + skims=None, + model_settings=model_settings, + trace_label=trace_label, + ) diff --git a/activitysim/abm/models/trip_scheduling_choice.py b/activitysim/abm/models/trip_scheduling_choice.py index 5f58e68ee4..510d4ece8d 100644 --- a/activitysim/abm/models/trip_scheduling_choice.py +++ b/activitysim/abm/models/trip_scheduling_choice.py @@ -280,6 +280,17 @@ def run_trip_scheduling_choice( choosers = choosers.sort_index() schedules = generate_schedule_alternatives(choosers).sort_index() + # preprocessing alternatives + expressions.annotate_preprocessors( + state, + df=schedules, + locals_dict=locals_dict, + skims=None, + model_settings=model_settings, + trace_label=trace_label, + preprocessor_setting_name="alts_preprocessor", + ) + # Assuming we did the max_alt_size calculation correctly, # we should get the same sizes here. assert choosers[NUM_ALTERNATIVES].sum() == schedules.shape[0] @@ -340,6 +351,8 @@ class TripSchedulingChoiceSettings(PydanticReadable, extra="forbid"): PREPROCESSOR: PreprocessorSettings | None = None """Setting for the preprocessor.""" + alts_preprocessor: PreprocessorSettings | None = None + """Setting for the alternatives preprocessor.""" SPECIFICATION: str """file name of specification file""" @@ -396,34 +409,32 @@ def trip_scheduling_choice( .reindex(tours.index) ) - preprocessor_settings = model_settings.PREPROCESSOR - # hack: preprocessor adds origin column in place if it does not exist already od_skim_stack_wrapper = skim_dict.wrap("origin", "destination") do_skim_stack_wrapper = skim_dict.wrap("destination", "origin") obib_skim_stack_wrapper = skim_dict.wrap(LAST_OB_STOP, FIRST_IB_STOP) - skims = [od_skim_stack_wrapper, do_skim_stack_wrapper, obib_skim_stack_wrapper] - - locals_dict = { + skims = { "od_skims": od_skim_stack_wrapper, "do_skims": do_skim_stack_wrapper, "obib_skims": obib_skim_stack_wrapper, + } + locals_dict = { "orig_col_name": "origin", "dest_col_name": "destination", "timeframe": "timeless_directional", } + locals_dict.update(skims) - if preprocessor_settings: - simulate.set_skim_wrapper_targets(tours_df, skims) - - expressions.assign_columns( - state, - df=tours_df, - model_settings=preprocessor_settings, - locals_dict=locals_dict, - trace_label=trace_label, - ) + # preprocess choosers + expressions.annotate_preprocessors( + state, + df=tours_df, + locals_dict=locals_dict, + skims=skims, + model_settings=model_settings, + trace_label=trace_label, + ) tours_df = run_trip_scheduling_choice( state, @@ -436,3 +447,11 @@ def trip_scheduling_choice( ) state.add_table("tours", tours_df) + + expressions.annotate_tables( + state, + locals_dict=locals_dict, + skims=skims, + model_settings=model_settings, + trace_label=trace_label, + ) diff --git a/activitysim/abm/models/util/annotate.py b/activitysim/abm/models/util/annotate.py deleted file mode 100644 index 5c0f628219..0000000000 --- a/activitysim/abm/models/util/annotate.py +++ /dev/null @@ -1,136 +0,0 @@ -# ActivitySim -# See full license in LICENSE.txt. -from __future__ import annotations - -import logging - -import pandas as pd - -from activitysim.core import expressions, tracing, workflow -from activitysim.core.configuration import PydanticBase - -""" -Code for annotating tables -""" - -logger = logging.getLogger(__name__) - - -def annotate_households( - state: workflow.State, - model_settings: dict | PydanticBase, - trace_label: str, - locals_dict: dict | None = None, -): - """ - Add columns to the households table in the pipeline according to spec. - - Parameters - ---------- - model_settings : dict - trace_label : str - """ - if isinstance(model_settings, PydanticBase): - model_settings = model_settings.dict() - if locals_dict is None: - locals_dict = {} - households = state.get_dataframe("households") - expressions.assign_columns( - state, - df=households, - model_settings=model_settings.get("annotate_households"), - locals_dict=locals_dict, - trace_label=tracing.extend_trace_label(trace_label, "annotate_households"), - ) - state.add_table("households", households) - - -def annotate_persons( - state: workflow.State, - model_settings: dict | PydanticBase, - trace_label: str, - locals_dict: dict | None = None, -): - """ - Add columns to the persons table in the pipeline according to spec. - - Parameters - ---------- - model_settings : dict - trace_label : str - """ - if isinstance(model_settings, PydanticBase): - model_settings = model_settings.dict() - if locals_dict is None: - locals_dict = {} - persons = state.get_dataframe("persons") - expressions.assign_columns( - state, - df=persons, - model_settings=model_settings.get("annotate_persons"), - locals_dict=locals_dict, - trace_label=tracing.extend_trace_label(trace_label, "annotate_persons"), - ) - state.add_table("persons", persons) - - -def annotate_tours( - state: workflow.State, - model_settings: dict | PydanticBase, - trace_label: str, - locals_dict: dict | None = None, -): - """ - Add columns to the tours table in the pipeline according to spec. - - Parameters - ---------- - state : workflow.State - model_settings : dict or PydanticBase - trace_label : str - locals_dict : dict, optional - """ - if isinstance(model_settings, PydanticBase): - model_settings = model_settings.dict() - if locals_dict is None: - locals_dict = {} - tours = state.get_dataframe("tours") - expressions.assign_columns( - state, - df=tours, - model_settings=model_settings.get("annotate_tours"), - locals_dict=locals_dict, - trace_label=tracing.extend_trace_label(trace_label, "annotate_tours"), - ) - state.add_table("tours", tours) - - -def annotate_trips( - state: workflow.State, - model_settings: dict | PydanticBase, - trace_label: str, - locals_dict=None, -): - """ - Add columns to the trips table in the pipeline according to spec. - - Parameters - ---------- - state : workflow.State - model_settings : dict or PydanticBase - trace_label : str - locals_dict : dict, optional - """ - if isinstance(model_settings, PydanticBase): - model_settings = model_settings.dict() - if locals_dict is None: - locals_dict = {} - trips = state.get_dataframe("trips") - expressions.assign_columns( - state, - df=trips, - model_settings=model_settings.get("annotate_trips"), - locals_dict=locals_dict, - trace_label=tracing.extend_trace_label(trace_label, "annotate_trips"), - ) - state.add_table("trips", trips) diff --git a/activitysim/abm/models/util/tour_destination.py b/activitysim/abm/models/util/tour_destination.py index 1770fddf5d..7234355fde 100644 --- a/activitysim/abm/models/util/tour_destination.py +++ b/activitysim/abm/models/util/tour_destination.py @@ -15,8 +15,6 @@ from activitysim.core.interaction_sample_simulate import interaction_sample_simulate from activitysim.core.util import reindex -import annotate - logger = logging.getLogger(__name__) DUMP = False @@ -111,6 +109,27 @@ def _destination_sample( log_alt_losers = state.settings.log_alt_losers + # preprocess choosers table + expressions.annotate_preprocessors( + state, + df=choosers, + locals_dict=locals_d, + skims=skims, + model_settings=model_settings, + trace_label=trace_label, + ) + + # preprocess alternatives table + expressions.annotate_preprocessors( + state, + df=destination_size_terms, + locals_dict=locals_d, + skims=None, + model_settings=model_settings, + trace_label=trace_label, + preprocessor_setting_name="alts_preprocessor_sample", + ) + choices = interaction_sample( state, choosers, @@ -763,15 +782,26 @@ def run_destination_simulate( if constants is not None: locals_d.update(constants) - preprocessor_settings = model_settings.get('preprocessor', None) - if preprocessor_settings: - expressions.assign_columns( - state, - df=choosers, - model_settings=preprocessor_settings, - locals_dict=locals_d, - trace_label=trace_label, - ) + # preprocess choosers table + expressions.annotate_preprocessors( + state, + df=choosers, + locals_dict=locals_d, + skims=skims, + model_settings=model_settings, + trace_label=trace_label, + ) + + # preprocess alternatives table + expressions.annotate_preprocessors( + state, + df=destination_sample, + locals_dict=locals_d, + skims=skims, + model_settings=model_settings, + trace_label=trace_label, + preprocessor_setting_name="alts_preprocessor_simulate", + ) state.tracing.dump_df(DUMP, choosers, trace_label, "choosers") @@ -801,15 +831,6 @@ def run_destination_simulate( assert isinstance(choices, pd.Series) choices = choices.to_frame("choice") - if model_settings.annotate_households: - annotate.annotate_households(state, model_settings, trace_label) - - if model_settings.annotate_persons: - annotate.annotate_persons(state, model_settings, trace_label) - - if model_settings.annotate_tours: - annotate.annotate_tours(state, model_settings, trace_label) - return choices diff --git a/activitysim/abm/models/util/tour_frequency.py b/activitysim/abm/models/util/tour_frequency.py index 93c624b135..3c64f4bcc2 100644 --- a/activitysim/abm/models/util/tour_frequency.py +++ b/activitysim/abm/models/util/tour_frequency.py @@ -643,8 +643,7 @@ class JointTourFreqCompSettings(LogitComponentSettings, extra="forbid"): """ ALTS_TABLE_STRUCTURE: JointTourFreqCompAlts = JointTourFreqCompAlts() - preprocessor: PreprocessorSettings | None = None - ALTS_PREPROCESSOR: PreprocessorSettings | None = None + ALTS_PREPROCESSOR: PreprocessorSettings | list[PreprocessorSettings] | None = None def create_joint_tours( diff --git a/activitysim/abm/models/util/tour_od.py b/activitysim/abm/models/util/tour_od.py index 7c615142f4..96ec9aba86 100644 --- a/activitysim/abm/models/util/tour_od.py +++ b/activitysim/abm/models/util/tour_od.py @@ -47,7 +47,6 @@ class TourODSettings(TourLocationComponentSettings): ORIGIN_ATTR_COLS_TO_USE: list[str] = [] ORIG_COL_NAME: str ORIG_FILTER: str | None = None - preprocessor: PreprocessorSettings | None = None def get_od_id_col(origin_col, destination_col): @@ -203,6 +202,17 @@ def _od_sample( elif skims.orig_key not in od_alts_df: logger.error("Alts df is missing origin skim key column.") + # preprocessing alternatives + expressions.annotate_preprocessors( + state, + df=od_alts_df, + locals_dict=locals_d, + skims=skims, + model_settings=model_settings, + trace_label=trace_label, + preprocessor_setting_name="alts_preprocessor_sample", + ) + choices = interaction_sample( state, choosers, @@ -658,54 +668,6 @@ def od_presample( return maz_choices -# class SizeTermCalculatorOD: # class SizeTermCalculator -# """ -# convenience object to provide size_terms for a selector (e.g. -# non_mandatory) for various segments (e.g. tour_type or purpose) -# returns size terms for specified segment in df or series form. -# """ -# -# def __init__(self, size_term_selector): -# # do this once so they can request size_terms for various segments (tour_type or purpose) -# land_use = state.checkpoint.load_dataframe("land_use") -# self.land_use = land_use -# size_terms = state.get_injectable("size_terms") -# self.destination_size_terms = tour_destination_size_terms( -# self.land_use, size_terms, size_term_selector -# ) -# -# assert not self.destination_size_terms.isna().any(axis=None) -# -# def omnibus_size_terms_df(self): -# return self.destination_size_terms -# -# def dest_size_terms_df(self, segment_name, trace_label): -# # return size terms as df with one column named 'size_term' -# # convenient if creating or merging with alts -# -# size_terms = self.destination_size_terms[[segment_name]].copy() -# size_terms.columns = ["size_term"] -# -# # FIXME - no point in considering impossible alternatives (where dest size term is zero) -# logger.debug( -# f"SizeTermCalculator dropping {(~(size_terms.size_term > 0)).sum()} " -# f"of {len(size_terms)} rows where size_term is zero for {segment_name}" -# ) -# size_terms = size_terms[size_terms.size_term > 0] -# -# if len(size_terms) == 0: -# logger.warning( -# f"SizeTermCalculator: no zones with non-zero size terms for {segment_name} in {trace_label}" -# ) -# -# return size_terms -# -# def dest_size_terms_series(self, segment_name): -# # return size terms as as series -# # convenient (and no copy overhead) if reindexing and assigning into alts column -# return self.destination_size_terms[segment_name] - - def run_od_sample( state, spec_segment_name, @@ -1044,6 +1006,17 @@ def run_od_simulate( if constants is not None: locals_d.update(constants) + # preprocessing alternatives + expressions.annotate_preprocessors( + state, + df=od_sample, + locals_dict=locals_d, + skims=skims, + model_settings=model_settings, + trace_label=trace_label, + preprocessor_setting_name="alts_preprocessor_simulate", + ) + state.tracing.dump_df(DUMP, choosers, trace_label, "choosers") choices = interaction_sample_simulate( state, @@ -1085,7 +1058,6 @@ def run_tour_od( trace_label, ): size_term_calculator = SizeTermCalculator(state, model_settings.SIZE_TERM_SELECTOR) - preprocessor_settings = model_settings.preprocessor origin_col_name = model_settings.ORIG_COL_NAME chooser_segment_column = model_settings.CHOOSER_SEGMENT_COLUMN_NAME @@ -1108,15 +1080,15 @@ def run_tour_od( right_index=True, ) - # - annotate choosers - if preprocessor_settings: - expressions.assign_columns( - state, - df=choosers, - model_settings=preprocessor_settings, - trace_label=trace_label, - ) - + # preprocessing choosers + expressions.annotate_preprocessors( + state, + df=choosers, + locals_dict={}, + skims=None, + model_settings=model_settings, + trace_label=trace_label, + ) # size_term segment is segment_name segment_destination_size_terms = size_term_calculator.dest_size_terms_df( segment_name, trace_label diff --git a/activitysim/abm/models/util/tour_scheduling.py b/activitysim/abm/models/util/tour_scheduling.py index e33b3cedbb..e80fc4e687 100644 --- a/activitysim/abm/models/util/tour_scheduling.py +++ b/activitysim/abm/models/util/tour_scheduling.py @@ -11,27 +11,18 @@ from .vectorize_tour_scheduling import TourModeComponentSettings, TourSchedulingSettings -import annotate - logger = logging.getLogger(__name__) def run_tour_scheduling( state: workflow.State, - model_name: str, + model_settings: TourSchedulingSettings, chooser_tours: pd.DataFrame, persons_merged: pd.DataFrame, tdd_alts: pd.DataFrame, tour_segment_col: str, + trace_label: str, ): - trace_label = model_name - model_settings_file_name = f"{model_name}.yaml" - - model_settings = TourSchedulingSettings.read_settings_file( - state.filesystem, - model_settings_file_name, - mandatory=False, - ) if model_settings.LOGSUM_SETTINGS: logsum_settings = TourModeComponentSettings.read_settings_file( @@ -54,18 +45,19 @@ def run_tour_scheduling( timetable = state.get_injectable("timetable") # - run preprocessor to annotate choosers - preprocessor_settings = model_settings.preprocessor - if preprocessor_settings: - locals_d = {"tt": timetable.attach_state(state)} - locals_d.update(config.get_model_constants(model_settings)) - - expressions.assign_columns( - state, - df=chooser_tours, - model_settings=preprocessor_settings, - locals_dict=locals_d, - trace_label=trace_label, - ) + locals_d = {"tt": timetable.attach_state(state)} + locals_d.update(config.get_model_constants(model_settings)) + + # preprocess choosers + expressions.annotate_preprocessors( + state, + df=chooser_tours, + locals_dict=locals_d, + skims=None, + model_settings=model_settings, + trace_label=trace_label, + ) + # alts preprocessed in vectorize_tour_scheduling estimators = {} if model_settings.TOUR_SPEC_SEGMENTS: @@ -74,7 +66,7 @@ def run_tour_scheduling( specs = {} compute_settings = {} for spec_segment_name, spec_settings in spec_segment_settings.items(): - bundle_name = f"{model_name}_{spec_segment_name}" + bundle_name = f"{trace_label}_{spec_segment_name}" # estimator for this tour_segment estimator = estimation.manager.begin_estimation( @@ -93,7 +85,7 @@ def run_tour_scheduling( if estimator: estimators[spec_segment_name] = estimator # add to local list - estimator.write_model_settings(model_settings, model_settings_file_name) + estimator.write_model_settings(model_settings, f"{trace_label}.yaml") estimator.write_spec(spec_settings) estimator.write_coefficients(coefficients_df, spec_settings) @@ -148,7 +140,7 @@ def run_tour_scheduling( if estimators: timetable.begin_transaction(list(estimators.values())) - logger.info(f"Running {model_name} with %d tours", len(chooser_tours)) + logger.info(f"Running {trace_label} with %d tours", len(chooser_tours)) choices = vts.vectorize_tour_scheduling( state, chooser_tours, @@ -197,13 +189,4 @@ def run_tour_scheduling( choices.to_frame("tdd"), tdd_alts, left_on=["tdd"], right_index=True, how="left" ) - if model_settings.annotate_households: - annotate.annotate_households(state, model_settings, trace_label) - - if model_settings.annotate_persons: - annotate.annotate_persons(state, model_settings, trace_label) - - if model_settings.annotate_tours: - annotate.annotate_tours(state, model_settings, trace_label) - return choices diff --git a/activitysim/abm/models/util/vectorize_tour_scheduling.py b/activitysim/abm/models/util/vectorize_tour_scheduling.py index dfab8171de..d4593c21fa 100644 --- a/activitysim/abm/models/util/vectorize_tour_scheduling.py +++ b/activitysim/abm/models/util/vectorize_tour_scheduling.py @@ -43,8 +43,6 @@ class TourSchedulingSettings(LogitComponentSettings, extra="forbid"): give the segements. """ SIMULATE_CHOOSER_COLUMNS: list[str] | None = None - preprocessor: PreprocessorSettings | None = None - """Setting for the preprocessor.""" SPEC_SEGMENTS: dict[str, LogitComponentSettings] = {} diff --git a/activitysim/abm/models/vehicle_allocation.py b/activitysim/abm/models/vehicle_allocation.py index a84dfaabf9..d4efca1102 100644 --- a/activitysim/abm/models/vehicle_allocation.py +++ b/activitysim/abm/models/vehicle_allocation.py @@ -90,20 +90,15 @@ class VehicleAllocationSettings(LogitComponentSettings, extra="forbid"): Settings for the `vehicle_allocation` component. """ - preprocessor: PreprocessorSettings | None = None - """Setting for the preprocessor.""" - - OCCUPANCY_LEVELS: list = [1] # TODO Check this + OCCUPANCY_LEVELS: list = [1, 2, 3.5] """Occupancy level It will create columns in the tour table selecting a vehicle for each of the occupancy levels. They are named vehicle_occup_1, vehicle_occup_2,... etc. - if not supplied, will default to only one occupancy level of 1 + if not supplied, will default to only one occupancy level of 1, 2, and 3.5 + representing sov, hov2, and hov3+ respectively. """ - annotate_tours: PreprocessorSettings | None = None - """Preprocessor settings to annotate tours""" - @workflow.step def vehicle_allocation( @@ -212,15 +207,14 @@ def vehicle_allocation( locals_dict.update(skims) # ------ preprocessor - preprocessor_settings = model_settings.preprocessor - if preprocessor_settings: - expressions.assign_columns( - state, - df=choosers, - model_settings=preprocessor_settings, - locals_dict=locals_dict, - trace_label=trace_label, - ) + expressions.annotate_preprocessors( + state, + df=choosers, + locals_dict=locals_dict, + skims=skims, + model_settings=model_settings, + trace_label=trace_label, + ) logger.info("Running %s with %d tours", trace_label, len(choosers)) @@ -286,9 +280,13 @@ def vehicle_allocation( "vehicle_allocation", tours[tours_veh_occup_cols], value_counts=True ) - annotate_settings = model_settings.annotate_tours - if annotate_settings: - annotate_vehicle_allocation(state, model_settings, trace_label) - if state.settings.trace_hh_id: state.tracing.trace_df(tours, label="vehicle_allocation", warn_if_empty=True) + + expressions.annotate_tables( + state, + locals_dict=locals_dict, + skims=skims, + model_settings=model_settings, + trace_label=trace_label, + ) diff --git a/activitysim/abm/models/vehicle_type_choice.py b/activitysim/abm/models/vehicle_type_choice.py index 93caae0381..5347b5bb79 100644 --- a/activitysim/abm/models/vehicle_type_choice.py +++ b/activitysim/abm/models/vehicle_type_choice.py @@ -407,15 +407,16 @@ def iterate_vehicle_type_choice( ) # alts preprocessor - alts_preprocessor_settings = model_settings.alts_preprocessor - if alts_preprocessor_settings: - expressions.assign_columns( - state, - df=alts_wide, - model_settings=alts_preprocessor_settings, - locals_dict=locals_dict, - trace_label=trace_label, - ) + # preprocessing alternatives + expressions.annotate_preprocessors( + state, + df=alts_wide, + locals_dict=locals_dict, + skims=None, + model_settings=model_settings, + trace_label=trace_label, + preprocessor_setting_name="alts_preprocessor", + ) # - preparing choosers for iterating vehicles_merged["already_owned_veh"] = "" @@ -434,15 +435,15 @@ def iterate_vehicle_type_choice( # running preprocessor on entire vehicle table to enumerate vehicle types # already owned by the household choosers = vehicles_merged - preprocessor_settings = model_settings.preprocessor - if preprocessor_settings: - expressions.assign_columns( - state, - df=choosers, - model_settings=preprocessor_settings, - locals_dict=locals_dict, - trace_label=trace_label, - ) + # preprocessing choosers + expressions.annotate_preprocessors( + state, + df=choosers, + locals_dict=locals_dict, + skims=None, + model_settings=model_settings, + trace_label=trace_label, + ) # only make choices for vehicles that have not been selected yet choosers = choosers[choosers["vehicle_num"] == veh_num] @@ -564,7 +565,6 @@ class VehicleTypeChoiceSettings(LogitComponentSettings, extra="forbid"): VEHICLE_TYPE_DATA_FILE: str | None = None PROBS_SPEC: str | None = None combinatorial_alts: dict | None = None - preprocessor: PreprocessorSettings | None = None alts_preprocessor: PreprocessorSettings | None = None SIMULATION_TYPE: Literal[ "simple_simulate", "interaction_simulate" @@ -576,10 +576,6 @@ class VehicleTypeChoiceSettings(LogitComponentSettings, extra="forbid"): COLS_TO_INCLUDE_IN_ALTS_TABLE: list[str] = [] """Columns to include in the alternatives table for use in utility calculations.""" - annotate_households: PreprocessorSettings | None = None - annotate_persons: PreprocessorSettings | None = None - annotate_vehicles: PreprocessorSettings | None = None - REQUIRE_DATA_FOR_ALL_ALTS: bool = False WRITE_OUT_ALTS_FILE: bool = False @@ -708,14 +704,6 @@ def vehicle_type_choice( vehicles = pd.concat([vehicles, choices], axis=1) state.add_table("vehicles", vehicles) - # - annotate tables - if model_settings.annotate_households: - annotate_vehicle_type_choice_households(state, model_settings, trace_label) - if model_settings.annotate_persons: - annotate_vehicle_type_choice_persons(state, model_settings, trace_label) - if model_settings.annotate_vehicles: - annotate_vehicle_type_choice_vehicles(state, model_settings, trace_label) - tracing.print_summary( "vehicle_type_choice", vehicles.vehicle_type, value_counts=True ) @@ -724,3 +712,11 @@ def vehicle_type_choice( state.tracing.trace_df( vehicles, label="vehicle_type_choice", warn_if_empty=True ) + + expressions.annotate_tables( + state, + locals_dict=locals_dict, + skims=None, + model_settings=model_settings, + trace_label=trace_label, + ) diff --git a/activitysim/abm/models/work_from_home.py b/activitysim/abm/models/work_from_home.py index 73a864c825..6b8f8d7815 100755 --- a/activitysim/abm/models/work_from_home.py +++ b/activitysim/abm/models/work_from_home.py @@ -18,7 +18,6 @@ from activitysim.core.configuration.base import PreprocessorSettings, PydanticReadable from activitysim.core.configuration.logit import LogitComponentSettings -from .util import annotate logger = logging.getLogger("activitysim") @@ -28,13 +27,6 @@ class WorkFromHomeSettings(LogitComponentSettings, extra="forbid"): Settings for the `work_from_home` component. """ - preprocessor: PreprocessorSettings | None = None - """Setting for the preprocessor.""" - - annotate_households: PreprocessorSettings | None = None - - annotate_persons: PreprocessorSettings | None = None - WORK_FROM_HOME_ALT: int """Value that specify if the person is working from home""" # TODO @@ -94,20 +86,14 @@ def work_from_home( constants = config.get_model_constants(model_settings) work_from_home_alt = model_settings.WORK_FROM_HOME_ALT - # - preprocessor - preprocessor_settings = model_settings.preprocessor - if preprocessor_settings: - locals_d = {} - if constants is not None: - locals_d.update(constants) - - expressions.assign_columns( - state, - df=choosers, - model_settings=preprocessor_settings, - locals_dict=locals_d, - trace_label=trace_label, - ) + expressions.annotate_preprocessors( + state, + df=choosers, + locals_dict=constants, + skims=None, + model_settings=model_settings, + trace_label=trace_label, + ) model_spec = state.filesystem.read_model_spec(file_name=model_settings.SPEC) coefficients_df = state.filesystem.read_model_coefficients(model_settings) @@ -228,8 +214,10 @@ def work_from_home( if state.settings.trace_hh_id: state.tracing.trace_df(persons, label=trace_label, warn_if_empty=True) - if model_settings.annotate_households: - annotate.annotate_households(state, model_settings, trace_label) - - if model_settings.annotate_persons: - annotate.annotate_persons(state, model_settings, trace_label) \ No newline at end of file + expressions.annotate_tables( + state, + locals_dict=constants, + skims=None, + model_settings=model_settings, + trace_label=trace_label, + ) diff --git a/activitysim/core/configuration/logit.py b/activitysim/core/configuration/logit.py index d03bcab778..452d645970 100644 --- a/activitysim/core/configuration/logit.py +++ b/activitysim/core/configuration/logit.py @@ -77,6 +77,27 @@ class BaseLogitComponentSettings(PydanticCompute): CONSTANTS: dict[str, Any] = {} """Named constants usable in the utility expressions.""" + preprocessor: PreprocessorSettings | list[PreprocessorSettings] | None = None + """Chooser preprocessor settings. + + This is a set of expressions to be evaluated on the choosers + before the logit model is run. It is used to prepare the choosers + for the logit model by adding columns that are used in the + utility expressions. + """ + + annotate_households: PreprocessorSettings | None = None + annotate_persons: PreprocessorSettings | None = None + annotate_tours: PreprocessorSettings | None = None + annotate_trips: PreprocessorSettings | None = None + annotate_vehicles: PreprocessorSettings | None = None + """Annotate output tables with additional columns. + + These settings are used to add additional columns to the output tables + after the logit model is run. They are typically used to add + additional attributes that are derived from the model results. + """ + # sharrow_skip is deprecated in factor of compute_settings.sharrow_skip @model_validator(mode="before") @classmethod @@ -196,6 +217,11 @@ class LocationComponentSettings(BaseLogitComponentSettings): If less than 1, use this fraction of the total number of rows. """ + alts_preprocessor_sample: PreprocessorSettings | None = None + """Alternatives preprocessor settings to use when sampling alternatives.""" + alts_preprocessor_simulate: PreprocessorSettings | None = None + """Alternatives preprocessor settings to use when simulating choices.""" + class TourLocationComponentSettings(LocationComponentSettings, extra="forbid"): # Logsum-related settings @@ -207,7 +233,6 @@ class TourLocationComponentSettings(LocationComponentSettings, extra="forbid"): SEGMENTS: list[str] | None = None SIZE_TERM_SELECTOR: str | None = None - annotate_tours: PreprocessorSettings | None = None CHOOSER_FILTER_COLUMN_NAME: str | None = None DEST_CHOICE_COLUMN_NAME: str | None = None @@ -221,8 +246,6 @@ class TourLocationComponentSettings(LocationComponentSettings, extra="forbid"): SEGMENT_IDS: dict[str, int] | dict[str, str] | dict[str, bool] | None = None SHADOW_PRICE_TABLE: str | None = None MODELED_SIZE_TABLE: str | None = None - annotate_persons: PreprocessorSettings | None = None - annotate_households: PreprocessorSettings | None = None SIMULATE_CHOOSER_COLUMNS: list[str] | None = None ALT_DEST_COL_NAME: str LOGSUM_TOUR_PURPOSE: str | dict[str, str] | None = None @@ -249,8 +272,6 @@ class TourModeComponentSettings(TemplatedLogitComponentSettings, extra="forbid") COMPUTE_TRIP_MODE_CHOICE_LOGSUMS: bool = False tvpb_mode_path_types: dict[str, Any] | None = None FORCE_ESCORTEE_CHAUFFEUR_MODE_MATCH: bool = True - annotate_tours: PreprocessorSettings | None = None - preprocessor: PreprocessorSettings | list[PreprocessorSettings] | None = None nontour_preprocessor: PreprocessorSettings | list[ PreprocessorSettings ] | None = None diff --git a/activitysim/core/expressions.py b/activitysim/core/expressions.py index 413636d3f4..028c987da3 100644 --- a/activitysim/core/expressions.py +++ b/activitysim/core/expressions.py @@ -6,7 +6,7 @@ import pandas as pd -from activitysim.core import assign, simulate, tracing, workflow +from activitysim.core import config, assign, simulate, tracing, workflow from activitysim.core.configuration.base import PreprocessorSettings, PydanticBase from activitysim.core.util import ( assign_in_place, @@ -184,26 +184,65 @@ def assign_columns( def annotate_preprocessors( state: workflow.State, df: pd.DataFrame, - locals_dict, - skims, + locals_dict: dict, + skims: dict | None, model_settings: PydanticBase | dict, trace_label: str, + preprocessor_setting_name: str = "preprocessor", ): - locals_d = {} - locals_d.update(locals_dict) - locals_d.update(skims) + """ + Look through the preprocessor settings and apply the calculations to the dataframe. + This is generally called before the main model calculations to prepare the data. + + Parameters + ---------- + state : workflow.State + The current state of the workflow. + df : pd.DataFrame + DataFrame to which the preprocessor settings will be applied. + locals_dict : dict + Dictionary of local variables to be used in the expressions. + skims : dict | None + Dictionary of skims to be used in the expressions. + model_settings : PydanticBase | dict + Model settings containing the preprocessor settings. + trace_label : str + Label for tracing the operations. + preprocessor_setting_name : str + Name of the preprocessor settings key in the model settings. + + Returns + ------- + None -- dataframe is modified in place + + """ + if isinstance(model_settings, PydanticBase): + preprocessor_settings = getattr(model_settings, preprocessor_setting_name, []) + elif isinstance(model_settings, dict): + preprocessor_settings = model_settings.get(preprocessor_setting_name, []) + else: + raise ValueError( + f"Expected model_settings to be PydanticBase or dict, got {type(model_settings)}" + ) + + if not preprocessor_settings or preprocessor_settings == []: + return - try: - preprocessor_settings = model_settings.preprocessor - except AttributeError: - preprocessor_settings = model_settings.get("preprocessor", []) - if preprocessor_settings is None: - preprocessor_settings = [] if not isinstance(preprocessor_settings, list): assert isinstance(preprocessor_settings, dict | PreprocessorSettings) preprocessor_settings = [preprocessor_settings] - simulate.set_skim_wrapper_targets(df, skims) + locals_d = {} + locals_d.update(locals_dict) + if skims: + try: + simulate.set_skim_wrapper_targets(df, skims) + locals_d.update(skims) + except AssertionError as e: + logger.warning( + "Failed to set skim wrapper targets: %s. Skims wrappers may not be used in expressions.", + e, + ) for preproc_settings in preprocessor_settings: results = compute_columns( @@ -211,7 +250,9 @@ def annotate_preprocessors( df=df, model_settings=preproc_settings, locals_dict=locals_d, - trace_label=trace_label, + trace_label=tracing.extend_trace_label( + trace_label, preprocessor_setting_name + ), ) assign_in_place( @@ -219,6 +260,77 @@ def annotate_preprocessors( ) +def annotate_tables( + state: workflow.State, + model_settings: PydanticBase | dict, + trace_label: str, + skims: dict | None = None, + locals_dict: dict | None = None, +): + """ + Look through the annotate settings and apply the calculations to the tables. + This is generally called after the main model calculations to add data to output tables. + + Parameters + ---------- + state : workflow.State + The current state of the workflow. + model_settings : PydanticBase | dict + Model settings containing the annotation settings for various tables. + trace_label : str + Label for tracing the operations. + skims : dict | None + Dictionary of skims to be used in the expressions, if applicable. + locals_dict : dict | None + Dictionary of local variables to be used in the expressions, if applicable. + + Returns + ------- + None -- tables are modified in place + """ + + # process tables in least to most aggregated order + tables = ["trips", "tours", "vehicles", "persons", "households"] + + for table_name in tables: + annotate_settings = getattr(model_settings, f"annotate_{table_name}", None) + if annotate_settings is None: + continue + assert isinstance( + annotate_settings, (dict, PreprocessorSettings) + ), f"Expected annotate_{table_name} to be dict or PreprocessorSettings, got {type(annotate_settings)}" + + df = state.get_dataframe(table_name) + + locals_d = {} + if skims: + try: + simulate.set_skim_wrapper_targets(df, skims) + locals_d.update(skims) + except AssertionError as e: + logger.warning( + "Failed to set skim wrapper targets: %s. Skims wrappers may not be used in expressions.", + e, + ) + if locals_dict: + locals_d.update(locals_dict) + + results = compute_columns( + state, + df=df, + model_settings=annotate_settings, + locals_dict=locals_d, + trace_label=tracing.extend_trace_label(trace_label, "annotate_persons"), + ) + + assign_in_place( + df, results, state.settings.downcast_int, state.settings.downcast_float + ) + + # write table with new columns back to state + state.add_table(table_name, df) + + def filter_chooser_columns(choosers, chooser_columns): missing_columns = [c for c in chooser_columns if c not in choosers] if missing_columns: diff --git a/activitysim/core/util.py b/activitysim/core/util.py index 0db1e4dde5..cbb6f2da2c 100644 --- a/activitysim/core/util.py +++ b/activitysim/core/util.py @@ -704,9 +704,9 @@ def drop_unused_columns( custom_chooser_lines = inspect.getsource(custom_chooser) unique_variables_in_spec.update(re.findall(pattern, custom_chooser_lines)) - logger.info("Dropping unused variables in chooser table") + logger.debug("Dropping unused variables in chooser table") - logger.info( + logger.debug( "before dropping, the choosers table has {} columns: {}".format( len(choosers.columns), choosers.columns ) @@ -715,7 +715,7 @@ def drop_unused_columns( # keep only variables needed for spec choosers = choosers[[c for c in choosers.columns if c in unique_variables_in_spec]] - logger.info( + logger.debug( "after dropping, the choosers table has {} columns: {}".format( len(choosers.columns), choosers.columns ) From 52c93b3e09b564bd4638b83a79f16e5531174153 Mon Sep 17 00:00:00 2001 From: David Hensle Date: Tue, 17 Jun 2025 07:38:58 -0700 Subject: [PATCH 4/9] fixing bugs in jtf and trip purpose --- activitysim/abm/models/joint_tour_frequency.py | 1 + activitysim/abm/models/trip_purpose.py | 7 ++++++- 2 files changed, 7 insertions(+), 1 deletion(-) diff --git a/activitysim/abm/models/joint_tour_frequency.py b/activitysim/abm/models/joint_tour_frequency.py index 7db1087d21..1b9fde94ba 100644 --- a/activitysim/abm/models/joint_tour_frequency.py +++ b/activitysim/abm/models/joint_tour_frequency.py @@ -83,6 +83,7 @@ def joint_tour_frequency( # - preprocess choosers table locals_dict = { + "persons": persons, "hh_time_window_overlap": lambda *x: hh_time_window_overlap(state, *x), } locals_dict.update(constants) diff --git a/activitysim/abm/models/trip_purpose.py b/activitysim/abm/models/trip_purpose.py index 616145ebe4..695882938d 100644 --- a/activitysim/abm/models/trip_purpose.py +++ b/activitysim/abm/models/trip_purpose.py @@ -301,6 +301,10 @@ def trip_purpose(state: workflow.State, trips: pd.DataFrame) -> None: """ trace_label = "trip_purpose" + model_settings = TripPurposeSettings.read_settings_file( + state.filesystem, "trip_purpose.yaml" + ) + trips_df = trips if state.is_table("school_escort_trips"): @@ -324,6 +328,7 @@ def trip_purpose(state: workflow.State, trips: pd.DataFrame) -> None: state, trips_df, estimator, + model_settings, trace_label=trace_label, ) @@ -360,7 +365,7 @@ def trip_purpose(state: workflow.State, trips: pd.DataFrame) -> None: expressions.annotate_tables( state, - locals_dict=locals_dict, + locals_dict={}, skims=None, model_settings=model_settings, trace_label=trace_label, From d709d36c5b2c69a7def01766a31e489ab8c8d3e3 Mon Sep 17 00:00:00 2001 From: David Hensle Date: Tue, 17 Jun 2025 07:54:06 -0700 Subject: [PATCH 5/9] adding persons back in to locals_d in jtc --- activitysim/abm/models/joint_tour_composition.py | 1 + 1 file changed, 1 insertion(+) diff --git a/activitysim/abm/models/joint_tour_composition.py b/activitysim/abm/models/joint_tour_composition.py index 60de227aa1..1c620e0709 100644 --- a/activitysim/abm/models/joint_tour_composition.py +++ b/activitysim/abm/models/joint_tour_composition.py @@ -89,6 +89,7 @@ def joint_tour_composition( constants = config.get_model_constants(model_settings) locals_dict = { + "persons": persons, "hh_time_window_overlap": lambda *x: hh_time_window_overlap(state, *x), } locals_dict.update(constants) From 353d89cc71b6530557355d23f922d2fb90774761 Mon Sep 17 00:00:00 2001 From: David Hensle Date: Tue, 17 Jun 2025 08:10:36 -0700 Subject: [PATCH 6/9] model name missing in tour scheduling --- activitysim/abm/models/util/tour_scheduling.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/activitysim/abm/models/util/tour_scheduling.py b/activitysim/abm/models/util/tour_scheduling.py index e80fc4e687..2b9fbf2e78 100644 --- a/activitysim/abm/models/util/tour_scheduling.py +++ b/activitysim/abm/models/util/tour_scheduling.py @@ -115,7 +115,7 @@ def run_tour_scheduling( assert not model_settings.TOUR_SPEC_SEGMENTS assert tour_segment_col is None - estimator = estimation.manager.begin_estimation(state, model_name) + estimator = estimation.manager.begin_estimation(state, trace_label) spec_file_name = model_settings.SPEC model_spec = state.filesystem.read_model_spec(file_name=spec_file_name) From 2b71451323bc5ef816959c7f005411726799e483 Mon Sep 17 00:00:00 2001 From: David Hensle Date: Tue, 17 Jun 2025 09:35:20 -0700 Subject: [PATCH 7/9] missing expressions import in tour sched prob --- activitysim/abm/models/tour_scheduling_probabilistic.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/activitysim/abm/models/tour_scheduling_probabilistic.py b/activitysim/abm/models/tour_scheduling_probabilistic.py index ac8ac2632a..8dcb1bbff2 100644 --- a/activitysim/abm/models/tour_scheduling_probabilistic.py +++ b/activitysim/abm/models/tour_scheduling_probabilistic.py @@ -7,7 +7,7 @@ import pandas as pd from activitysim.abm.models.util import probabilistic_scheduling as ps -from activitysim.core import chunk, estimation, workflow +from activitysim.core import chunk, estimation, workflow, expressions from activitysim.core.configuration.base import PydanticReadable logger = logging.getLogger(__name__) @@ -178,7 +178,7 @@ def tour_scheduling_probabilistic( expressions.annotate_tables( state, - locals_dict=constants, + locals_dict={}, skims=None, model_settings=model_settings, trace_label=trace_label, From 8179e26ea8dda1b44d4bf9067cbe5e565da9e938 Mon Sep 17 00:00:00 2001 From: David Hensle Date: Wed, 18 Jun 2025 15:05:29 -0700 Subject: [PATCH 8/9] ci unit tests & fixing estimation test error --- .../abm/models/util/tour_scheduling.py | 2 +- activitysim/core/expressions.py | 10 +- .../core/test/configs/preprocessor.csv | 8 + activitysim/core/test/test_preprocessing.py | 197 ++++++++++++++++++ 4 files changed, 215 insertions(+), 2 deletions(-) create mode 100644 activitysim/core/test/configs/preprocessor.csv create mode 100644 activitysim/core/test/test_preprocessing.py diff --git a/activitysim/abm/models/util/tour_scheduling.py b/activitysim/abm/models/util/tour_scheduling.py index 2b9fbf2e78..0a7c6675d1 100644 --- a/activitysim/abm/models/util/tour_scheduling.py +++ b/activitysim/abm/models/util/tour_scheduling.py @@ -126,7 +126,7 @@ def run_tour_scheduling( if estimator: estimators[None] = estimator # add to local list - estimator.write_model_settings(model_settings, model_settings_file_name) + estimator.write_model_settings(model_settings, f"{trace_label}.yaml") estimator.write_spec(model_settings) estimator.write_coefficients(coefficients_df, model_settings) diff --git a/activitysim/core/expressions.py b/activitysim/core/expressions.py index 028c987da3..e6ef0b8e69 100644 --- a/activitysim/core/expressions.py +++ b/activitysim/core/expressions.py @@ -293,7 +293,15 @@ def annotate_tables( tables = ["trips", "tours", "vehicles", "persons", "households"] for table_name in tables: - annotate_settings = getattr(model_settings, f"annotate_{table_name}", None) + if isinstance(model_settings, PydanticBase): + annotate_settings = getattr(model_settings, f"annotate_{table_name}", None) + elif isinstance(model_settings, dict): + annotate_settings = model_settings.get(f"annotate_{table_name}", None) + else: + raise ValueError( + f"Expected model_settings to be PydanticBase or dict, got {type(model_settings)}" + ) + if annotate_settings is None: continue assert isinstance( diff --git a/activitysim/core/test/configs/preprocessor.csv b/activitysim/core/test/configs/preprocessor.csv new file mode 100644 index 0000000000..e2f5c29b97 --- /dev/null +++ b/activitysim/core/test/configs/preprocessor.csv @@ -0,0 +1,8 @@ +Description,Target,Expression +income from households table,_hh_income,"reindex(households.income, df.household_id)" +income test,is_high_income,_hh_income > 50000 +count persons test,num_persons,persons.groupby('household_id').size().reindex(df.household_id) +skim dict test,od_distance,"skim_dict.lookup(df.origin, df.destination, 'DIST')" +skim wrapper test,od_distance_wrapper,skims2d['DIST'] +sov time,od_sov_time,skims3d['SOV_TIME'] +testing constant from locals_dict,constant_test,test_constant / 2 \ No newline at end of file diff --git a/activitysim/core/test/test_preprocessing.py b/activitysim/core/test/test_preprocessing.py new file mode 100644 index 0000000000..06c7785603 --- /dev/null +++ b/activitysim/core/test/test_preprocessing.py @@ -0,0 +1,197 @@ +# ActivitySim +# See full license in LICENSE.txt. +from __future__ import annotations + +import logging +import logging.config +import os.path + +import numpy as np +import pandas as pd +import pytest + +from activitysim.core import workflow, expressions, los +from activitysim.core.configuration.base import PreprocessorSettings + + +def add_canonical_dirs(configs_dir_name): + state = workflow.State() + los_configs_dir = os.path.join(os.path.dirname(__file__), f"los/{configs_dir_name}") + configs_dir = os.path.join(os.path.dirname(__file__), "configs") + data_dir = os.path.join(os.path.dirname(__file__), f"los/data") + output_dir = os.path.join(os.path.dirname(__file__), "output") + state.initialize_filesystem( + working_dir=os.path.dirname(__file__), + configs_dir=(los_configs_dir, configs_dir), + output_dir=output_dir, + data_dir=(data_dir,), + ) + return state + + +@pytest.fixture +def state() -> workflow.State: + state = add_canonical_dirs("configs_1z").load_settings() + network_los = los.Network_LOS(state) + network_los.load_data() + state.set("skim_dict", network_los.get_default_skim_dict()) + return state + + +@pytest.fixture(scope="module") +def households(): + return pd.DataFrame( + { + "household_id": [1, 2, 3], + "home_zone_id": [1, 2, 3], + "income": [50000, 60000, 70000], + } + ).set_index("household_id") + + +@pytest.fixture(scope="module") +def persons(): + return pd.DataFrame( + { + "person_id": [1, 2, 3, 4, 5], + "household_id": [1, 1, 2, 2, 3], + "age": [25, 30, 22, 28, 35], + } + ).set_index("person_id") + + +@pytest.fixture(scope="module") +def tours(): + return pd.DataFrame( + { + "tour_id": [1, 2, 3], + "household_id": [1, 2, 3], + "person_id": [1, 2, 3], + "tour_type": ["work", "shopping", "othmaint"], + "origin": [1, 2, 3], + "destination": [2, 3, 1], + "period": ["AM", "PM", "AM"], + } + ).set_index("tour_id") + + +def check_outputs(tours): + """ + Check that the tours DataFrame has the expected new columns and values + according to the preprocessor / annotator expressions. + """ + new_cols = [ + "is_high_income", + "num_persons", + "od_distance", + "od_distance_wrapper", + "od_sov_time", + "constant_test", + ] + + # check all new columns are added + assert all( + col in tours.columns for col in new_cols + ), f"Missing columns: {set(new_cols) - set(tours.columns)}" + + # column with _ shouldn't be in the columns + assert ( + "_hh_income" not in tours.columns + ), f"Unexpected column found: _hh_income in {tours.columns}" + + # check the values in the new columns + exppected_output = pd.DataFrame( + { + "tour_id": [1, 2, 3], + "is_high_income": [False, True, True], + "num_persons": [2, 2, 1], + "od_distance": [0.24, 0.28, 0.57], + "od_distance_wrapper": [0.24, 0.28, 0.57], + "od_sov_time": [0.78, 0.89, 1.76], + "constant_test": [21, 21, 21], + } + ).set_index("tour_id") + pd.testing.assert_frame_equal(tours[new_cols], exppected_output, check_dtype=False) + + +def setup_skims(state: workflow.State): + """Creates a set of skim wrappers to test in expressions.""" + skim_dict = state.get("skim_dict") + skims3d = skim_dict.wrap_3d( + orig_key="origin", dest_key="destination", dim3_key="period" + ) + skims2d = skim_dict.wrap("origin", "destination") + return {"skims3d": skims3d, "skims2d": skims2d} + + +def test_preprocessor(state: workflow.State, households, persons, tours): + # adding dataframes to state so they can be accessed in preprocessor + state.add_table("households", households) + state.add_table("persons", persons) + original_tours = tours.copy() + state.add_table("tours", original_tours) + + # defining preprocessor + preprocessor_settings = PreprocessorSettings( + SPEC="preprocessor.csv", + DF="tours", + TABLES=["persons", "households"], + ) + model_settings = {"preprocessor": preprocessor_settings} + + # annotating preprocessors + expressions.annotate_preprocessors( + state, + df=tours, + locals_dict={"test_constant": 42}, + skims=setup_skims(state), + model_settings=model_settings, + trace_label="ci_test_preprocessor", + ) + + check_outputs(tours) + + state_tours = state.get_table("tours") + # check that the state table is not modified + pd.testing.assert_frame_equal(state_tours, original_tours) + + +def test_annotator(state, households, persons, tours): + # adding dataframes to state so they can be accessed in annotator + state.add_table("households", households) + state.add_table("persons", persons) + original_tours = tours.copy() + state.add_table("tours", original_tours) + + # defining annotator + annotator_settings = PreprocessorSettings( + SPEC="preprocessor.csv", + DF="tours", + TABLES=["persons", "households"], + ) + model_settings = {"annotate_tours": annotator_settings} + + # annotating preprocessors + expressions.annotate_tables( + state, + model_settings=model_settings, + trace_label="ci_test_annotator", + skims=setup_skims(state), + locals_dict={"test_constant": 42}, + ) + + # outputs now put directly into the state object + check_outputs(state.get_table("tours")) + + # test what happens if we try to annotate a table that does not exist + model_settings = {"annotate_trips": annotator_settings} + + with pytest.raises(ValueError) as excinfo: + # this should raise an error because "trips" table does not exist in state + expressions.annotate_tables( + state, + model_settings=model_settings, + trace_label="ci_test_annotator", + skims=None, + locals_dict={"test_constant": 42}, + ) From c88481ebd7e47c43a3c775d664426851ce65ba47 Mon Sep 17 00:00:00 2001 From: David Hensle Date: Tue, 1 Jul 2025 10:24:45 -0700 Subject: [PATCH 9/9] addressing review comments --- activitysim/abm/models/cdap.py | 3 ++- activitysim/abm/models/mandatory_tour_frequency.py | 6 ++---- activitysim/abm/models/vehicle_allocation.py | 5 ++--- activitysim/core/configuration/logit.py | 14 +++++++++----- activitysim/core/util.py | 6 +++--- 5 files changed, 18 insertions(+), 16 deletions(-) diff --git a/activitysim/abm/models/cdap.py b/activitysim/abm/models/cdap.py index 2ade0b3454..f8632da09f 100644 --- a/activitysim/abm/models/cdap.py +++ b/activitysim/abm/models/cdap.py @@ -43,8 +43,9 @@ class CdapSettings(PydanticReadable, extra="forbid"): preprocessor: PreprocessorSettings | None = None """Preprocess choosers tables before running the model.""" annotate_persons: PreprocessorSettings | None = None + """Postprocess persons table after model completion.""" annotate_households: PreprocessorSettings | None = None - """Postprocess tables after model completion.""" + """Postprocess households table after model completion.""" @workflow.step diff --git a/activitysim/abm/models/mandatory_tour_frequency.py b/activitysim/abm/models/mandatory_tour_frequency.py index f495f1c2b2..41a9806748 100644 --- a/activitysim/abm/models/mandatory_tour_frequency.py +++ b/activitysim/abm/models/mandatory_tour_frequency.py @@ -58,10 +58,8 @@ class MandatoryTourFrequencySettings(LogitComponentSettings, extra="forbid"): Settings for the `mandatory_tour_frequency` component. """ - preprocessor: PreprocessorSettings | None = None - """Setting for the preprocessor.""" - - annotate_persons: PreprocessorSettings | None = None + # no additional fields are required for this component + pass @workflow.step diff --git a/activitysim/abm/models/vehicle_allocation.py b/activitysim/abm/models/vehicle_allocation.py index d4efca1102..a3f04037c0 100644 --- a/activitysim/abm/models/vehicle_allocation.py +++ b/activitysim/abm/models/vehicle_allocation.py @@ -90,13 +90,12 @@ class VehicleAllocationSettings(LogitComponentSettings, extra="forbid"): Settings for the `vehicle_allocation` component. """ - OCCUPANCY_LEVELS: list = [1, 2, 3.5] + OCCUPANCY_LEVELS: list = [1] # TODO check this """Occupancy level It will create columns in the tour table selecting a vehicle for each of the occupancy levels. They are named vehicle_occup_1, vehicle_occup_2,... etc. - if not supplied, will default to only one occupancy level of 1, 2, and 3.5 - representing sov, hov2, and hov3+ respectively. + if not supplied, will default to only one occupancy level of 1 """ diff --git a/activitysim/core/configuration/logit.py b/activitysim/core/configuration/logit.py index 452d645970..7688ac1bb3 100644 --- a/activitysim/core/configuration/logit.py +++ b/activitysim/core/configuration/logit.py @@ -87,16 +87,20 @@ class BaseLogitComponentSettings(PydanticCompute): """ annotate_households: PreprocessorSettings | None = None - annotate_persons: PreprocessorSettings | None = None - annotate_tours: PreprocessorSettings | None = None - annotate_trips: PreprocessorSettings | None = None - annotate_vehicles: PreprocessorSettings | None = None - """Annotate output tables with additional columns. + """Annotate households output tables with additional columns. These settings are used to add additional columns to the output tables after the logit model is run. They are typically used to add additional attributes that are derived from the model results. """ + annotate_persons: PreprocessorSettings | None = None + """Annotate persons output tables with additional columns.""" + annotate_tours: PreprocessorSettings | None = None + """Annotate tours output tables with additional columns.""" + annotate_trips: PreprocessorSettings | None = None + """Annotate trips output tables with additional columns.""" + annotate_vehicles: PreprocessorSettings | None = None + """Annotate vehicles output tables with additional columns.""" # sharrow_skip is deprecated in factor of compute_settings.sharrow_skip @model_validator(mode="before") diff --git a/activitysim/core/util.py b/activitysim/core/util.py index cbb6f2da2c..0db1e4dde5 100644 --- a/activitysim/core/util.py +++ b/activitysim/core/util.py @@ -704,9 +704,9 @@ def drop_unused_columns( custom_chooser_lines = inspect.getsource(custom_chooser) unique_variables_in_spec.update(re.findall(pattern, custom_chooser_lines)) - logger.debug("Dropping unused variables in chooser table") + logger.info("Dropping unused variables in chooser table") - logger.debug( + logger.info( "before dropping, the choosers table has {} columns: {}".format( len(choosers.columns), choosers.columns ) @@ -715,7 +715,7 @@ def drop_unused_columns( # keep only variables needed for spec choosers = choosers[[c for c in choosers.columns if c in unique_variables_in_spec]] - logger.debug( + logger.info( "after dropping, the choosers table has {} columns: {}".format( len(choosers.columns), choosers.columns )