From a14cef3125946badde8c0474b8ccaa22e0d9ff9d Mon Sep 17 00:00:00 2001 From: JoeJimFlood Date: Tue, 21 Oct 2025 14:04:30 -0700 Subject: [PATCH 1/9] Implemented hacky attempt at getting subprocesses to have the same run_id as the whole model run --- activitysim/core/workflow/state.py | 10 ++++++++-- activitysim/core/workflow/tracing.py | 14 +++++++++++++- 2 files changed, 21 insertions(+), 3 deletions(-) diff --git a/activitysim/core/workflow/state.py b/activitysim/core/workflow/state.py index 6178f2488..364d2dc37 100644 --- a/activitysim/core/workflow/state.py +++ b/activitysim/core/workflow/state.py @@ -110,7 +110,7 @@ class State: The encapsulated state of an ActivitySim model. """ - def __init__(self, context=None): + def __init__(self, context=None, run_id = None): """ Initialize the encapsulated state of an ActivitySim model. @@ -130,6 +130,12 @@ def __init__(self, context=None): self._context = context else: raise TypeError(f"cannot init {type(self)} with {type(context)}") + + self.run_id = run_id + + @property + def run_id(self): + return self.run_id def __del__(self): self.close_open_files() @@ -253,7 +259,7 @@ def import_extensions(self, ext: str | Iterable[str] = None, append=True) -> Non checkpoint = Checkpoints() logging = Logging() - tracing = Tracing() + tracing = Tracing(run_id) extend = Extend() report = Reporting() dataset = Datasets() diff --git a/activitysim/core/workflow/tracing.py b/activitysim/core/workflow/tracing.py index 580c6fad9..c0dd0dce5 100644 --- a/activitysim/core/workflow/tracing.py +++ b/activitysim/core/workflow/tracing.py @@ -50,10 +50,22 @@ class Tracing(StateAccessor): Methods to provide the tracing capabilities of ActivitySim. """ + def __init__(self, run_id = None): + super().__init__() + if run_id is None: + run_id = RunId() + self.run_id = run_id + traceable_tables: list[str] = FromState(default_value=DEFAULT_TRACEABLE_TABLES) traceable_table_ids: dict[str, Sequence] = FromState(default_init=True) traceable_table_indexes: dict[str, str] = FromState(default_init=True) - run_id: RunId = FromState(default_init=True) + + @property + def run_id(self) -> RunId: + if self._obj is None: + return RunId() + else: + return self.run_id @property def validation_directory(self) -> Path | None: From e06eef58e78fff31e2a3c2120a572a8fb1683574 Mon Sep 17 00:00:00 2001 From: JoeJimFlood Date: Tue, 21 Oct 2025 15:36:13 -0700 Subject: [PATCH 2/9] Reverted previous commit --- activitysim/core/workflow/state.py | 10 ++-------- activitysim/core/workflow/tracing.py | 14 +------------- 2 files changed, 3 insertions(+), 21 deletions(-) diff --git a/activitysim/core/workflow/state.py b/activitysim/core/workflow/state.py index 364d2dc37..6178f2488 100644 --- a/activitysim/core/workflow/state.py +++ b/activitysim/core/workflow/state.py @@ -110,7 +110,7 @@ class State: The encapsulated state of an ActivitySim model. """ - def __init__(self, context=None, run_id = None): + def __init__(self, context=None): """ Initialize the encapsulated state of an ActivitySim model. @@ -130,12 +130,6 @@ def __init__(self, context=None, run_id = None): self._context = context else: raise TypeError(f"cannot init {type(self)} with {type(context)}") - - self.run_id = run_id - - @property - def run_id(self): - return self.run_id def __del__(self): self.close_open_files() @@ -259,7 +253,7 @@ def import_extensions(self, ext: str | Iterable[str] = None, append=True) -> Non checkpoint = Checkpoints() logging = Logging() - tracing = Tracing(run_id) + tracing = Tracing() extend = Extend() report = Reporting() dataset = Datasets() diff --git a/activitysim/core/workflow/tracing.py b/activitysim/core/workflow/tracing.py index c0dd0dce5..580c6fad9 100644 --- a/activitysim/core/workflow/tracing.py +++ b/activitysim/core/workflow/tracing.py @@ -50,22 +50,10 @@ class Tracing(StateAccessor): Methods to provide the tracing capabilities of ActivitySim. """ - def __init__(self, run_id = None): - super().__init__() - if run_id is None: - run_id = RunId() - self.run_id = run_id - traceable_tables: list[str] = FromState(default_value=DEFAULT_TRACEABLE_TABLES) traceable_table_ids: dict[str, Sequence] = FromState(default_init=True) traceable_table_indexes: dict[str, str] = FromState(default_init=True) - - @property - def run_id(self) -> RunId: - if self._obj is None: - return RunId() - else: - return self.run_id + run_id: RunId = FromState(default_init=True) @property def validation_directory(self) -> Path | None: From acbc11fedd29cc59c024e2199d21b667c0a9e411 Mon Sep 17 00:00:00 2001 From: JoeJimFlood Date: Tue, 28 Oct 2025 16:02:53 -0700 Subject: [PATCH 3/9] Moved generation of Run ID to cli/run.py, initialized states with None for RunID, and set the RunID after the state is initialized --- activitysim/cli/run.py | 14 ++++++++++++++ activitysim/core/configuration/top.py | 10 ++++++++++ activitysim/core/mp_tasks.py | 1 + activitysim/core/workflow/state.py | 1 + activitysim/core/workflow/tracing.py | 22 +++++----------------- 5 files changed, 31 insertions(+), 17 deletions(-) diff --git a/activitysim/cli/run.py b/activitysim/cli/run.py index af9a76daa..258ba8cd5 100644 --- a/activitysim/cli/run.py +++ b/activitysim/cli/run.py @@ -9,6 +9,8 @@ import sys import warnings from datetime import datetime +import struct +import time import numpy as np @@ -29,8 +31,17 @@ "settings_file_name", "imported_extensions", "run_timestamp", + "run_id", ] +class RunId(str): + def __new__(cls, x=None): + if x is None: + return cls( + hex(struct.unpack(" workflow injects injectables """ state = workflow.State() + state.run_id = injectables.get("run_id", None) state = state.initialize_filesystem(**injectables) state.settings = injectables.get("settings", Settings()) state.filesystem.parse_settings(state.settings) diff --git a/activitysim/core/workflow/state.py b/activitysim/core/workflow/state.py index 6178f2488..792b4c15d 100644 --- a/activitysim/core/workflow/state.py +++ b/activitysim/core/workflow/state.py @@ -258,6 +258,7 @@ def import_extensions(self, ext: str | Iterable[str] = None, append=True) -> Non report = Reporting() dataset = Datasets() chunk = Chunking() + run_id = None # To be initialized when the run starts @property def this_step(self): diff --git a/activitysim/core/workflow/tracing.py b/activitysim/core/workflow/tracing.py index 580c6fad9..6063a214c 100644 --- a/activitysim/core/workflow/tracing.py +++ b/activitysim/core/workflow/tracing.py @@ -5,11 +5,9 @@ import logging import logging.config import os -import struct import sys import tarfile import tempfile -import time from collections.abc import Mapping, MutableMapping, Sequence from pathlib import Path from typing import Any, Optional @@ -35,16 +33,6 @@ "vehicles", ] - -class RunId(str): - def __new__(cls, x=None): - if x is None: - return cls( - hex(struct.unpack(" Path | None: @@ -252,7 +240,7 @@ def write_csv( file_name = "%s.%s" % (file_name, CSV_FILE_TYPE) file_path = self._obj.filesystem.get_trace_file_path( - file_name, tail=self.run_id + file_name, tail=self._obj.run_id ) if os.name == "nt": @@ -378,7 +366,7 @@ def read_csv_as_list_of_lists(finame): that_blob = read_csv_as_list_of_lists(that_path) this_path = self._obj.filesystem.get_trace_file_path( - label, tail=self.run_id, file_type="csv" + label, tail=self._obj.run_id, file_type="csv" ) this_blob = read_csv_as_list_of_lists(this_path) @@ -410,7 +398,7 @@ def read_csv_as_list_of_lists(finame): that_df = pd.read_csv(that_path) # check against the file we just wrote this_path = self._obj.filesystem.get_trace_file_path( - label, tail=self.run_id, file_type="csv" + label, tail=self._obj.run_id, file_type="csv" ) this_df = pd.read_csv(this_path) assert_frame_substantively_equal(this_df, that_df) @@ -452,7 +440,7 @@ def trace_interaction_eval_results(self, trace_results, trace_ids, label): # write out the raw dataframe file_path = self._obj.filesystem.get_trace_file_path( - "%s.raw.csv" % label, tail=self.run_id + "%s.raw.csv" % label, tail=self._obj.run_id ) trace_results.to_csv(file_path, mode="a", index=True, header=True) From d5178014f66e4864be9d7cfd344b496320c19d60 Mon Sep 17 00:00:00 2001 From: Jeff Newman Date: Thu, 13 Nov 2025 11:03:15 -0600 Subject: [PATCH 4/9] pass run_id to subprocesses --- activitysim/cli/run.py | 13 +++---------- activitysim/core/configuration/top.py | 7 ------- activitysim/core/mp_tasks.py | 6 +++++- activitysim/core/run_id.py | 10 ++++++++++ activitysim/core/workflow/state.py | 1 - activitysim/core/workflow/tracing.py | 11 ++++++----- 6 files changed, 24 insertions(+), 24 deletions(-) create mode 100644 activitysim/core/run_id.py diff --git a/activitysim/cli/run.py b/activitysim/cli/run.py index 258ba8cd5..a1b23048c 100644 --- a/activitysim/cli/run.py +++ b/activitysim/cli/run.py @@ -16,6 +16,7 @@ from activitysim.core import chunk, config, mem, timing, tracing, workflow from activitysim.core.configuration import FileSystem, Settings +from activitysim.core.run_id import RunId from activitysim.abm.models.settings_checker import check_model_settings @@ -34,14 +35,6 @@ "run_id", ] -class RunId(str): - def __new__(cls, x=None): - if x is None: - return cls( - hex(struct.unpack(" workflow injects injectables """ state = workflow.State() - state.run_id = injectables.get("run_id", None) + _run_id = injectables.get("run_id", None) + if _run_id: + state.tracing.run_id = RunId(_run_id) + state = state.initialize_filesystem(**injectables) state.settings = injectables.get("settings", Settings()) state.filesystem.parse_settings(state.settings) diff --git a/activitysim/core/run_id.py b/activitysim/core/run_id.py new file mode 100644 index 000000000..ed1f9c92c --- /dev/null +++ b/activitysim/core/run_id.py @@ -0,0 +1,10 @@ +import struct +import time + +class RunId(str): + def __new__(cls, x=None): + if x is None: + return cls( + hex(struct.unpack(" Non report = Reporting() dataset = Datasets() chunk = Chunking() - run_id = None # To be initialized when the run starts @property def this_step(self): diff --git a/activitysim/core/workflow/tracing.py b/activitysim/core/workflow/tracing.py index 45ba53d19..cb7affb9e 100644 --- a/activitysim/core/workflow/tracing.py +++ b/activitysim/core/workflow/tracing.py @@ -20,6 +20,7 @@ from activitysim.core.test import assert_equal, assert_frame_substantively_equal from activitysim.core.workflow.accessor import FromState, StateAccessor from activitysim.core.exceptions import TableSlicingError +from activitysim.core.run_id import RunId logger = logging.getLogger(__name__) @@ -42,7 +43,7 @@ class Tracing(StateAccessor): traceable_tables: list[str] = FromState(default_value=DEFAULT_TRACEABLE_TABLES) traceable_table_ids: dict[str, Sequence] = FromState(default_init=True) traceable_table_indexes: dict[str, str] = FromState(default_init=True) - # run_id: RunId = FromState(default_init=True) + run_id: RunId = FromState(default_init=True) @property def validation_directory(self) -> Path | None: @@ -241,7 +242,7 @@ def write_csv( file_name = "%s.%s" % (file_name, CSV_FILE_TYPE) file_path = self._obj.filesystem.get_trace_file_path( - file_name, tail=self._obj.run_id + file_name, tail=self.run_id ) if os.name == "nt": @@ -367,7 +368,7 @@ def read_csv_as_list_of_lists(finame): that_blob = read_csv_as_list_of_lists(that_path) this_path = self._obj.filesystem.get_trace_file_path( - label, tail=self._obj.run_id, file_type="csv" + label, tail=self.run_id, file_type="csv" ) this_blob = read_csv_as_list_of_lists(this_path) @@ -399,7 +400,7 @@ def read_csv_as_list_of_lists(finame): that_df = pd.read_csv(that_path) # check against the file we just wrote this_path = self._obj.filesystem.get_trace_file_path( - label, tail=self._obj.run_id, file_type="csv" + label, tail=self.run_id, file_type="csv" ) this_df = pd.read_csv(this_path) assert_frame_substantively_equal(this_df, that_df) @@ -441,7 +442,7 @@ def trace_interaction_eval_results(self, trace_results, trace_ids, label): # write out the raw dataframe file_path = self._obj.filesystem.get_trace_file_path( - "%s.raw.csv" % label, tail=self._obj.run_id + "%s.raw.csv" % label, tail=self.run_id ) trace_results.to_csv(file_path, mode="a", index=True, header=True) From 3aa038adcf58aa5628f30c31a40da7fc0bcb04f6 Mon Sep 17 00:00:00 2001 From: JoeJimFlood Date: Thu, 13 Nov 2025 15:28:56 -0800 Subject: [PATCH 5/9] Added CI testing for trace ID hashes --- test/trace_id/.gitignore | 2 + test/trace_id/simulation.py | 16 +++++++ test/trace_id/test_trace_id.py | 79 ++++++++++++++++++++++++++++++++++ 3 files changed, 97 insertions(+) create mode 100644 test/trace_id/.gitignore create mode 100644 test/trace_id/simulation.py create mode 100644 test/trace_id/test_trace_id.py diff --git a/test/trace_id/.gitignore b/test/trace_id/.gitignore new file mode 100644 index 000000000..67176c62d --- /dev/null +++ b/test/trace_id/.gitignore @@ -0,0 +1,2 @@ +configs*/ +output/ \ No newline at end of file diff --git a/test/trace_id/simulation.py b/test/trace_id/simulation.py new file mode 100644 index 000000000..70cf3457f --- /dev/null +++ b/test/trace_id/simulation.py @@ -0,0 +1,16 @@ +# ActivitySim +# See full license in LICENSE.txt. + +from __future__ import annotations + +import argparse +import sys + +from activitysim.cli.run import add_run_args, run + +if __name__ == "__main__": + parser = argparse.ArgumentParser() + add_run_args(parser) + args = parser.parse_args() + + sys.exit(run(args)) diff --git a/test/trace_id/test_trace_id.py b/test/trace_id/test_trace_id.py new file mode 100644 index 000000000..7bf2e7ced --- /dev/null +++ b/test/trace_id/test_trace_id.py @@ -0,0 +1,79 @@ +from __future__ import annotations + +# ActivitySim +# See full license in LICENSE.txt. +import importlib.resources +import os +import subprocess +from shutil import copytree + +import pandas as pd +import pandas.testing as pdt +import yaml + +def update_settings(settings_file, key, value): + with open(settings_file, "r") as f: + settings = yaml.safe_load(f) + f.close() + + settings[key] = value + + with open(settings_file, "w") as f: + yaml.safe_dump(settings, f) + f.close() + +def run_test_trace_id(): + + def example_path(dirname): + resource = os.path.join("examples", "prototype_mtc", dirname) + return str(importlib.resources.files("activitysim").joinpath(resource)) + + def test_path(dirname): + return os.path.join(os.path.dirname(__file__), dirname) + + new_configs_dir = test_path("configs") + new_mp_configs_dir = test_path("configs_mp") + new_settings_file = os.path.join(new_configs_dir, "settings.yaml") + copytree(example_path("configs"), new_configs_dir) + copytree(example_path("configs_mp"), new_mp_configs_dir) + + update_settings(new_settings_file, "trace_hh_id", 1932009) # Household in the prototype_mtc example with 11 people + + def check_csv_suffix(directory): + suffix = None + mismatched_files = [] + for root, dirs, files in os.walk(directory): + for filename in files: + if filename.lower().endswith('.csv'): + file_suffix = filename[-10:] + if suffix is None: + suffix = file_suffix + elif file_suffix != suffix: + mismatched_files.append(os.path.join(root, filename)) + if mismatched_files: + raise AssertionError(f"CSV files with mismatched suffixes: {mismatched_files}") + + file_path = os.path.join(os.path.dirname(__file__), "simulation.py") + + run_args = [ + "-c", + test_path("configs_mp"), + "-c", + test_path("configs"), + "-d", + example_path("data"), + "-o", + test_path("output"), + ] + + try: + os.mkdir(test_path("output")) + except FileExistsError: + pass + + subprocess.run(["coverage", "run", "-a", file_path] + run_args, check=True) + + check_csv_suffix(os.path.join(test_path("output"), "trace")) + +if __name__ == "__main__": + run_test_trace_id() \ No newline at end of file From 251ccb7e5706f2d8605f7b4f9616f0af2318768e Mon Sep 17 00:00:00 2001 From: JoeJimFlood Date: Thu, 13 Nov 2025 15:30:50 -0800 Subject: [PATCH 6/9] Added test_trace_id.py to list of CI tests to run --- .github/workflows/core_tests.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/.github/workflows/core_tests.yml b/.github/workflows/core_tests.yml index 8e659b25c..3e4fd52f9 100644 --- a/.github/workflows/core_tests.yml +++ b/.github/workflows/core_tests.yml @@ -130,6 +130,7 @@ jobs: - run: uv run pytest test/test_skim_name_conflicts.py - run: uv run pytest test/random_seed/test_random_seed.py + - run: uv run pytest test/trace_id/test_trace_id.py builtin_regional_models: needs: foundation From 8f0c4a83618d931bb1810359c7d74d32e7751456 Mon Sep 17 00:00:00 2001 From: JoeJimFlood Date: Tue, 18 Nov 2025 13:49:57 -0800 Subject: [PATCH 7/9] Renamed trace ID test so that it will actually run --- test/trace_id/test_trace_id.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/test/trace_id/test_trace_id.py b/test/trace_id/test_trace_id.py index 7bf2e7ced..07d4ad5a5 100644 --- a/test/trace_id/test_trace_id.py +++ b/test/trace_id/test_trace_id.py @@ -22,7 +22,7 @@ def update_settings(settings_file, key, value): yaml.safe_dump(settings, f) f.close() -def run_test_trace_id(): +def test_trace_ids_have_same_hash(): def example_path(dirname): resource = os.path.join("examples", "prototype_mtc", dirname) @@ -76,4 +76,4 @@ def check_csv_suffix(directory): check_csv_suffix(os.path.join(test_path("output"), "trace")) if __name__ == "__main__": - run_test_trace_id() \ No newline at end of file + test_trace_ids_have_same_hash() \ No newline at end of file From be2db24606ef79de7940023c83ff75f5628a9432 Mon Sep 17 00:00:00 2001 From: JoeJimFlood Date: Tue, 18 Nov 2025 15:07:48 -0800 Subject: [PATCH 8/9] blacken --- activitysim/core/configuration/top.py | 1 - activitysim/core/run_id.py | 3 ++- activitysim/core/workflow/tracing.py | 1 + 3 files changed, 3 insertions(+), 2 deletions(-) diff --git a/activitysim/core/configuration/top.py b/activitysim/core/configuration/top.py index 66a385906..a6c29269a 100644 --- a/activitysim/core/configuration/top.py +++ b/activitysim/core/configuration/top.py @@ -790,4 +790,3 @@ def _get_attr(self, attr): return getattr(self, attr) except AttributeError: return self.other_settings.get(attr) - diff --git a/activitysim/core/run_id.py b/activitysim/core/run_id.py index ed1f9c92c..a5d4ea1c8 100644 --- a/activitysim/core/run_id.py +++ b/activitysim/core/run_id.py @@ -1,10 +1,11 @@ import struct import time + class RunId(str): def __new__(cls, x=None): if x is None: return cls( hex(struct.unpack(" Date: Tue, 18 Nov 2025 15:25:20 -0800 Subject: [PATCH 9/9] blacken 'test' directory --- test/trace_id/test_trace_id.py | 22 ++++++++++++++-------- 1 file changed, 14 insertions(+), 8 deletions(-) diff --git a/test/trace_id/test_trace_id.py b/test/trace_id/test_trace_id.py index 07d4ad5a5..4d678759e 100644 --- a/test/trace_id/test_trace_id.py +++ b/test/trace_id/test_trace_id.py @@ -11,6 +11,7 @@ import pandas.testing as pdt import yaml + def update_settings(settings_file, key, value): with open(settings_file, "r") as f: settings = yaml.safe_load(f) @@ -22,37 +23,41 @@ def update_settings(settings_file, key, value): yaml.safe_dump(settings, f) f.close() -def test_trace_ids_have_same_hash(): +def test_trace_ids_have_same_hash(): def example_path(dirname): resource = os.path.join("examples", "prototype_mtc", dirname) return str(importlib.resources.files("activitysim").joinpath(resource)) def test_path(dirname): return os.path.join(os.path.dirname(__file__), dirname) - + new_configs_dir = test_path("configs") new_mp_configs_dir = test_path("configs_mp") new_settings_file = os.path.join(new_configs_dir, "settings.yaml") copytree(example_path("configs"), new_configs_dir) copytree(example_path("configs_mp"), new_mp_configs_dir) - update_settings(new_settings_file, "trace_hh_id", 1932009) # Household in the prototype_mtc example with 11 people + update_settings( + new_settings_file, "trace_hh_id", 1932009 + ) # Household in the prototype_mtc example with 11 people def check_csv_suffix(directory): suffix = None mismatched_files = [] for root, dirs, files in os.walk(directory): for filename in files: - if filename.lower().endswith('.csv'): + if filename.lower().endswith(".csv"): file_suffix = filename[-10:] if suffix is None: suffix = file_suffix elif file_suffix != suffix: mismatched_files.append(os.path.join(root, filename)) if mismatched_files: - raise AssertionError(f"CSV files with mismatched suffixes: {mismatched_files}") - + raise AssertionError( + f"CSV files with mismatched suffixes: {mismatched_files}" + ) + file_path = os.path.join(os.path.dirname(__file__), "simulation.py") run_args = [ @@ -65,7 +70,7 @@ def check_csv_suffix(directory): "-o", test_path("output"), ] - + try: os.mkdir(test_path("output")) except FileExistsError: @@ -75,5 +80,6 @@ def check_csv_suffix(directory): check_csv_suffix(os.path.join(test_path("output"), "trace")) + if __name__ == "__main__": - test_trace_ids_have_same_hash() \ No newline at end of file + test_trace_ids_have_same_hash()