From 915e8d793b4d446d0569ff4ba0cc5c625528e810 Mon Sep 17 00:00:00 2001 From: Jeff Newman Date: Tue, 23 Apr 2024 11:08:23 -0500 Subject: [PATCH 01/37] test external regional model examples --- .github/workflows/core_tests.yml | 78 ++++++++++++++++---------------- 1 file changed, 40 insertions(+), 38 deletions(-) diff --git a/.github/workflows/core_tests.yml b/.github/workflows/core_tests.yml index 1afe9270a..2ec0a91d2 100644 --- a/.github/workflows/core_tests.yml +++ b/.github/workflows/core_tests.yml @@ -24,10 +24,10 @@ jobs: name: linux-64-py${{ matrix.python-version }} runs-on: ubuntu-latest steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 - name: Setup Mambaforge - uses: conda-incubator/setup-miniconda@v2 + uses: conda-incubator/setup-miniconda@v3 with: miniforge-variant: Mambaforge miniforge-version: latest @@ -38,7 +38,7 @@ jobs: - name: Set cache date for year and month run: echo "DATE=$(date +'%Y%m')" >> $GITHUB_ENV - - uses: actions/cache@v3 + - uses: actions/cache@v4 with: path: /usr/share/miniconda3/envs/asim-test key: linux-64-conda-${{ hashFiles('conda-environments/github-actions-tests.yml') }}-${{ env.DATE }}-${{ env.CACHE_NUMBER }} @@ -124,10 +124,10 @@ jobs: name: ${{ matrix.label }} runs-on: ${{ matrix.os }} steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 - name: Setup Mambaforge - uses: conda-incubator/setup-miniconda@v2 + uses: conda-incubator/setup-miniconda@v3 with: miniforge-variant: Mambaforge miniforge-version: latest @@ -138,7 +138,7 @@ jobs: - name: Set cache date for year and month run: echo "DATE=$(date +'%Y%m')" >> $GITHUB_ENV - - uses: actions/cache@v3 + - uses: actions/cache@v4 with: path: ${{ matrix.prefix }} key: ${{ matrix.label }}-conda-${{ hashFiles('conda-environments/github-actions-tests.yml') }}-${{ env.DATE }}-${{ env.CACHE_NUMBER }} @@ -221,10 +221,10 @@ jobs: name: ${{ matrix.region }} runs-on: ubuntu-latest steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 - name: Setup Mambaforge - uses: conda-incubator/setup-miniconda@v2 + uses: conda-incubator/setup-miniconda@v3 with: miniforge-variant: Mambaforge miniforge-version: latest @@ -235,7 +235,7 @@ jobs: - name: Set cache date for year and month run: echo "DATE=$(date +'%Y%m')" >> $GITHUB_ENV - - uses: actions/cache@v3 + - uses: actions/cache@v4 with: path: ${{ env.mamba-env-prefix }} key: ${{ env.label }}-conda-${{ hashFiles('conda-environments/github-actions-tests.yml') }}-${{ env.DATE }}-${{ env.CACHE_NUMBER }} @@ -306,9 +306,15 @@ jobs: label: linux-64 strategy: matrix: - region: - - prototype_mtc - - prototype_psrc_in_development + include: + - region: Standard 1-Zone Example (MTC) + region-org: ActivitySim + region-repo: activitysim-prototype-mtc + region-branch: extended + - region: Standard 2-Zone Example (SANDAG) + region-org: ActivitySim + region-repo: sandag-abm3-example + region-branch: main fail-fast: false defaults: run: @@ -316,10 +322,11 @@ jobs: name: ${{ matrix.region }} runs-on: ubuntu-latest steps: - - uses: actions/checkout@v3 + - name: Checkout ActivitySim + uses: actions/checkout@v4 - name: Setup Mambaforge - uses: conda-incubator/setup-miniconda@v2 + uses: conda-incubator/setup-miniconda@v3 with: miniforge-variant: Mambaforge miniforge-version: latest @@ -330,7 +337,7 @@ jobs: - name: Set cache date for year and month run: echo "DATE=$(date +'%Y%m')" >> $GITHUB_ENV - - uses: actions/cache@v3 + - uses: actions/cache@v4 with: path: | ${{ env.mamba-env-prefix }} @@ -341,19 +348,6 @@ jobs: - name: Update environment run: | mamba env update -n asim-test -f conda-environments/github-actions-tests.yml - mamba install --yes \ - "psutil=5.9.5" \ - "pydantic=2.6.1" \ - "pypyr=5.8.0" \ - "pytables=3.6.1" \ - "pytest-cov" \ - "pytest-regressions=2.5.0" \ - "scikit-learn=1.2.2" \ - "sharrow>=2.6.0" \ - "simwrapper=1.8.5" \ - "xarray=2023.2.0" \ - "zarr=2.14.2" \ - "zstandard=0.21.0" if: steps.cache.outputs.cache-hit != 'true' - name: Install activitysim @@ -361,16 +355,24 @@ jobs: # are in the conda environment defined above. Also, this avoids pip getting # confused and reinstalling tables (pytables). run: | - python -m pip install -e . --no-deps + python -m pip install . --no-deps - name: Conda checkup run: | mamba info -a mamba list + - name: Checkout Example + uses: actions/checkout@v4 + with: + repository: '${{ matrix.region-org }}/${{ matrix.region-repo }}' + ref: '${{ matrix.region-branch }}' + path: '${{ matrix.region-repo }}' + - name: Test ${{ matrix.region }} run: | - python -m activitysim test ${{ matrix.region }} + cd ${{ matrix.region-repo }}/test + python -m pytest . random_seed_generation: needs: foundation @@ -385,10 +387,10 @@ jobs: runs-on: ubuntu-latest steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 - name: Setup Mambaforge - uses: conda-incubator/setup-miniconda@v2 + uses: conda-incubator/setup-miniconda@v3 with: miniforge-variant: Mambaforge miniforge-version: latest @@ -399,7 +401,7 @@ jobs: - name: Set cache date for year and month run: echo "DATE=$(date +'%Y%m')" >> $GITHUB_ENV - - uses: actions/cache@v3 + - uses: actions/cache@v4 with: path: ${{ env.mamba-env-prefix }} key: ${{ env.label }}-conda-${{ hashFiles('conda-environments/github-actions-tests.yml') }}-${{ env.DATE }}-${{ env.CACHE_NUMBER }} @@ -451,10 +453,10 @@ jobs: name: estimation_mode_test runs-on: ubuntu-latest steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 - name: Setup Mambaforge - uses: conda-incubator/setup-miniconda@v2 + uses: conda-incubator/setup-miniconda@v3 with: miniforge-variant: Mambaforge miniforge-version: latest @@ -465,7 +467,7 @@ jobs: - name: Set cache date for year and month run: echo "DATE=$(date +'%Y%m')" >> $GITHUB_ENV - - uses: actions/cache@v3 + - uses: actions/cache@v4 with: path: ${{ env.mamba-env-prefix }} key: ${{ env.label }}-conda-${{ hashFiles('conda-environments/github-actions-tests.yml') }}-${{ env.DATE }}-${{ env.CACHE_NUMBER }} @@ -517,7 +519,7 @@ jobs: run: shell: bash -l {0} steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 with: fetch-depth: 0 # get all tags, lets setuptools_scm do its thing - name: Set up Python 3.10 @@ -525,7 +527,7 @@ jobs: with: python-version: "3.10" - name: Install dependencies - uses: conda-incubator/setup-miniconda@v2 + uses: conda-incubator/setup-miniconda@v3 with: miniforge-variant: Mambaforge miniforge-version: latest From 30382566270ee21e4dcac160173eb118ee2e8de3 Mon Sep 17 00:00:00 2001 From: Jeff Newman Date: Tue, 23 Apr 2024 11:15:27 -0500 Subject: [PATCH 02/37] cache buster --- .github/workflows/core_tests.yml | 15 +-------------- 1 file changed, 1 insertion(+), 14 deletions(-) diff --git a/.github/workflows/core_tests.yml b/.github/workflows/core_tests.yml index 2ec0a91d2..31888df53 100644 --- a/.github/workflows/core_tests.yml +++ b/.github/workflows/core_tests.yml @@ -10,7 +10,7 @@ on: - '*' env: - CACHE_NUMBER: 0 # increase to reset cache manually + CACHE_NUMBER: 1 # increase to reset cache manually jobs: foundation: @@ -47,19 +47,6 @@ jobs: - name: Update environment run: | mamba env update -n asim-test -f conda-environments/github-actions-tests.yml - mamba install --yes \ - "psutil=5.9.5" \ - "pydantic=2.6.1" \ - "pypyr=5.8.0" \ - "pytables=3.6.1" \ - "pytest-cov" \ - "pytest-regressions=2.5.0" \ - "scikit-learn=1.2.2" \ - "sharrow>=2.6.0" \ - "simwrapper=1.8.5" \ - "xarray=2023.2.0" \ - "zarr=2.14.2" \ - "zstandard=0.21.0" if: steps.cache.outputs.cache-hit != 'true' - name: Install activitysim From 3bdc1a4c7a1edf4fb2cffca0159e5ff79a0b84e8 Mon Sep 17 00:00:00 2001 From: Jeff Newman Date: Wed, 24 Apr 2024 08:37:21 -0500 Subject: [PATCH 03/37] optional variable doc --- docs/dev-guide/using-sharrow.md | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/docs/dev-guide/using-sharrow.md b/docs/dev-guide/using-sharrow.md index e2b0093d4..3ab0d47a2 100644 --- a/docs/dev-guide/using-sharrow.md +++ b/docs/dev-guide/using-sharrow.md @@ -193,13 +193,23 @@ in several examples: `@np.log1p(size_terms.get(df.alt_dest, df.purpose)) # sharrow: np.log1p(size_terms['sizearray'])` Here, `size_terms` is a DataFrameMatrix class instance, a special class written into -ActivitySim to facilitate reading from a DataFrame as it it were a 2-d array. As it +ActivitySim to facilitate reading from a DataFrame as if it were a 2-d array. As it is a special purpose class written in Python, the numba compiler cannot handle it directly. Fortunately, sharrow provides an alternative: passing the size terms as a xarray `DataArray`. This has a slightly different interface, so the sharrow and legacy evaluation modes require different expressions. The switching expression is used to handle the DataFrameMatrix on the left (before "# sharrow:") and the DataArray on the right. +### Optional Variables + +In some cases, a variable may be used where it is available, but is not strictly +necessary for the model to run. For example, a model may have a reference to +mode choice logsums, but the model can still run without them, if it is being used +prior to when logsums are calculated. In this case, the variable can be accessed +using the `get` method, which allows for a default value if the variable is not found. + + `@df.get('mode_choice_logsum', 0)` + ### Performance Considerations Sharrow is usually expected to bring significant performance gains to ActivitySim. From b876e9c78e028edf3f302ed6d546bb75932306fb Mon Sep 17 00:00:00 2001 From: Jeff Newman Date: Wed, 24 Apr 2024 08:55:05 -0500 Subject: [PATCH 04/37] fix conda cache dirs --- .github/workflows/core_tests.yml | 18 ++++++------------ 1 file changed, 6 insertions(+), 12 deletions(-) diff --git a/.github/workflows/core_tests.yml b/.github/workflows/core_tests.yml index 31888df53..1a24d0fb6 100644 --- a/.github/workflows/core_tests.yml +++ b/.github/workflows/core_tests.yml @@ -10,7 +10,7 @@ on: - '*' env: - CACHE_NUMBER: 1 # increase to reset cache manually + CACHE_NUMBER: 2 # increase to reset cache manually jobs: foundation: @@ -96,12 +96,10 @@ jobs: include: - os: macos-latest label: macOS - prefix: /Users/runner/miniconda3/envs/asim-test python-version: "3.10" - os: windows-latest label: win-64 - prefix: C:\Miniconda3\envs\asim-test python-version: "3.10" defaults: @@ -127,7 +125,7 @@ jobs: - uses: actions/cache@v4 with: - path: ${{ matrix.prefix }} + path: ${{ env.CONDA }}/envs key: ${{ matrix.label }}-conda-${{ hashFiles('conda-environments/github-actions-tests.yml') }}-${{ env.DATE }}-${{ env.CACHE_NUMBER }} id: cache @@ -186,7 +184,6 @@ jobs: builtin_regional_models: needs: foundation env: - mamba-env-prefix: /usr/share/miniconda3/envs/asim-test python-version: "3.10" label: linux-64 strategy: @@ -224,7 +221,7 @@ jobs: - uses: actions/cache@v4 with: - path: ${{ env.mamba-env-prefix }} + path: ${{ env.CONDA }}/envs key: ${{ env.label }}-conda-${{ hashFiles('conda-environments/github-actions-tests.yml') }}-${{ env.DATE }}-${{ env.CACHE_NUMBER }} id: cache @@ -288,7 +285,6 @@ jobs: external_regional_models: needs: foundation env: - mamba-env-prefix: /usr/share/miniconda3/envs/asim-test python-version: "3.10" label: linux-64 strategy: @@ -327,7 +323,7 @@ jobs: - uses: actions/cache@v4 with: path: | - ${{ env.mamba-env-prefix }} + ${{ env.CONDA }}/envs ~/.cache/ActivitySim key: ${{ env.label }}-conda-${{ hashFiles('conda-environments/github-actions-tests.yml') }}-${{ env.DATE }}-${{ env.CACHE_NUMBER }} id: cache @@ -364,7 +360,6 @@ jobs: random_seed_generation: needs: foundation env: - mamba-env-prefix: /usr/share/miniconda3/envs/asim-test python-version: "3.10" label: linux-64 defaults: @@ -390,7 +385,7 @@ jobs: - uses: actions/cache@v4 with: - path: ${{ env.mamba-env-prefix }} + path: ${{ env.CONDA }}/envs key: ${{ env.label }}-conda-${{ hashFiles('conda-environments/github-actions-tests.yml') }}-${{ env.DATE }}-${{ env.CACHE_NUMBER }} id: cache @@ -431,7 +426,6 @@ jobs: estimation_mode: needs: foundation env: - mamba-env-prefix: /usr/share/miniconda3/envs/asim-test python-version: "3.10" label: linux-64 defaults: @@ -456,7 +450,7 @@ jobs: - uses: actions/cache@v4 with: - path: ${{ env.mamba-env-prefix }} + path: ${{ env.CONDA }}/envs key: ${{ env.label }}-conda-${{ hashFiles('conda-environments/github-actions-tests.yml') }}-${{ env.DATE }}-${{ env.CACHE_NUMBER }} id: cache From 6438f593802db999a70d21e842f90486ffd0e6c5 Mon Sep 17 00:00:00 2001 From: David Hensle Date: Thu, 2 May 2024 14:48:36 -0700 Subject: [PATCH 05/37] trip_destination alts preprocessor --- activitysim/abm/models/trip_destination.py | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/activitysim/abm/models/trip_destination.py b/activitysim/abm/models/trip_destination.py index 9789f805e..6b6b11e85 100644 --- a/activitysim/abm/models/trip_destination.py +++ b/activitysim/abm/models/trip_destination.py @@ -60,6 +60,7 @@ class TripDestinationSettings(LocationComponentSettings, extra="forbid"): PRIMARY_DEST: str = "tour_leg_dest" # must be created in preprocessor REDUNDANT_TOURS_MERGED_CHOOSER_COLUMNS: list[str] | None = None preprocessor: PreprocessorSettings | None = None + alt_preprocessor: PreprocessorSettings | None = None CLEANUP: bool fail_some_trips_for_testing: bool = False """This setting is used by testing code to force failed trip_destination.""" @@ -1245,6 +1246,7 @@ def run_trip_destination( state.filesystem, model_settings_file_name ) preprocessor_settings = model_settings.preprocessor + alt_preprocessor_settings = model_settings.alt_preprocessor logsum_settings = state.filesystem.read_model_settings( model_settings.LOGSUM_SETTINGS ) @@ -1369,6 +1371,15 @@ def run_trip_destination( trace_label=nth_trace_label, ) + if alt_preprocessor_settings: + expressions.assign_columns( + state, + df=alternatives, + model_settings=alt_preprocessor_settings, + locals_dict=locals_dict, + trace_label=tracing.extend_trace_label(nth_trace_label, "alts"), + ) + if isinstance( nth_trips["trip_period"].dtype, pd.api.types.CategoricalDtype ): From 33097eb767229d44dc283473e736d5f4ece7c421 Mon Sep 17 00:00:00 2001 From: David Hensle Date: Sat, 4 May 2024 13:59:37 -0700 Subject: [PATCH 06/37] non_hh_veh cat, drop unused cols for alts --- activitysim/abm/models/trip_destination.py | 10 ++++------ activitysim/abm/models/vehicle_allocation.py | 4 ++++ activitysim/core/interaction_sample.py | 9 +++++++++ activitysim/core/interaction_sample_simulate.py | 9 +++++++++ 4 files changed, 26 insertions(+), 6 deletions(-) diff --git a/activitysim/abm/models/trip_destination.py b/activitysim/abm/models/trip_destination.py index 6b6b11e85..8a3320a20 100644 --- a/activitysim/abm/models/trip_destination.py +++ b/activitysim/abm/models/trip_destination.py @@ -60,7 +60,7 @@ class TripDestinationSettings(LocationComponentSettings, extra="forbid"): PRIMARY_DEST: str = "tour_leg_dest" # must be created in preprocessor REDUNDANT_TOURS_MERGED_CHOOSER_COLUMNS: list[str] | None = None preprocessor: PreprocessorSettings | None = None - alt_preprocessor: PreprocessorSettings | None = None + alts_preprocessor: PreprocessorSettings | None = None CLEANUP: bool fail_some_trips_for_testing: bool = False """This setting is used by testing code to force failed trip_destination.""" @@ -1246,7 +1246,7 @@ def run_trip_destination( state.filesystem, model_settings_file_name ) preprocessor_settings = model_settings.preprocessor - alt_preprocessor_settings = model_settings.alt_preprocessor + alts_preprocessor_settings = model_settings.alts_preprocessor logsum_settings = state.filesystem.read_model_settings( model_settings.LOGSUM_SETTINGS ) @@ -1336,8 +1336,6 @@ def run_trip_destination( # returns a series of size_terms for each chooser's dest_zone_id and purpose with chooser index size_term_matrix = DataFrameMatrix(alternatives) - # don't need size terms in alternatives, just zone_id index - alternatives = alternatives.drop(alternatives.columns, axis=1) alternatives.index.name = model_settings.ALT_DEST_COL_NAME sample_list = [] @@ -1371,11 +1369,11 @@ def run_trip_destination( trace_label=nth_trace_label, ) - if alt_preprocessor_settings: + if alts_preprocessor_settings: expressions.assign_columns( state, df=alternatives, - model_settings=alt_preprocessor_settings, + model_settings=alts_preprocessor_settings, locals_dict=locals_dict, trace_label=tracing.extend_trace_label(nth_trace_label, "alts"), ) diff --git a/activitysim/abm/models/vehicle_allocation.py b/activitysim/abm/models/vehicle_allocation.py index 9dcaf8c71..9061e99fb 100644 --- a/activitysim/abm/models/vehicle_allocation.py +++ b/activitysim/abm/models/vehicle_allocation.py @@ -259,6 +259,10 @@ def vehicle_allocation( choices.loc[choices["alt_choice"] == alt, "choice"] = choosers.loc[ choices["alt_choice"] == alt, alt ] + # add non-household vehicle option to categories + if alts_from_spec[-1] not in choices["choice"].cat.categories: + choices["choice"] = choices["choice"].cat.add_categories(alts_from_spec[-1]) + # set choice for non-household vehicle option choices.loc[ choices["alt_choice"] == alts_from_spec[-1], "choice" ] = alts_from_spec[-1] diff --git a/activitysim/core/interaction_sample.py b/activitysim/core/interaction_sample.py index 5a05b7e8b..b87068150 100644 --- a/activitysim/core/interaction_sample.py +++ b/activitysim/core/interaction_sample.py @@ -259,6 +259,15 @@ def _interaction_sample( sharrow_enabled=sharrow_enabled, ) + alternatives = util.drop_unused_columns( + alternatives, + spec, + locals_d, + custom_chooser=None, + sharrow_enabled=sharrow_enabled, + additional_columns=['tdd'] + ) + if sharrow_enabled: ( interaction_utilities, diff --git a/activitysim/core/interaction_sample_simulate.py b/activitysim/core/interaction_sample_simulate.py index 3d729ad49..3621701f8 100644 --- a/activitysim/core/interaction_sample_simulate.py +++ b/activitysim/core/interaction_sample_simulate.py @@ -160,6 +160,15 @@ def _interaction_sample_simulate( sharrow_enabled=sharrow_enabled, ) + alternatives = util.drop_unused_columns( + alternatives, + spec, + locals_d, + custom_chooser=None, + sharrow_enabled=sharrow_enabled, + additional_columns=['tdd'] + ) + interaction_df = alternatives.join(choosers, how="left", rsuffix="_chooser") logger.info( f"{trace_label} end merging choosers and alternatives to create interaction_df" From a61b2d59c8f29b86937a6e911848d5e7b726c587 Mon Sep 17 00:00:00 2001 From: David Hensle Date: Sat, 4 May 2024 14:02:31 -0700 Subject: [PATCH 07/37] blacken --- activitysim/core/interaction_sample.py | 2 +- activitysim/core/interaction_sample_simulate.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/activitysim/core/interaction_sample.py b/activitysim/core/interaction_sample.py index b87068150..6d4b929c5 100644 --- a/activitysim/core/interaction_sample.py +++ b/activitysim/core/interaction_sample.py @@ -265,7 +265,7 @@ def _interaction_sample( locals_d, custom_chooser=None, sharrow_enabled=sharrow_enabled, - additional_columns=['tdd'] + additional_columns=["tdd"], ) if sharrow_enabled: diff --git a/activitysim/core/interaction_sample_simulate.py b/activitysim/core/interaction_sample_simulate.py index 3621701f8..8951a3d07 100644 --- a/activitysim/core/interaction_sample_simulate.py +++ b/activitysim/core/interaction_sample_simulate.py @@ -166,7 +166,7 @@ def _interaction_sample_simulate( locals_d, custom_chooser=None, sharrow_enabled=sharrow_enabled, - additional_columns=['tdd'] + additional_columns=["tdd"], ) interaction_df = alternatives.join(choosers, how="left", rsuffix="_chooser") From 4c4a9d98ef5463e0da8af433baf29db92a62f62f Mon Sep 17 00:00:00 2001 From: David Hensle Date: Sat, 4 May 2024 14:41:13 -0700 Subject: [PATCH 08/37] adding missed alts columns used in xborder model --- activitysim/core/interaction_sample.py | 2 +- activitysim/core/interaction_sample_simulate.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/activitysim/core/interaction_sample.py b/activitysim/core/interaction_sample.py index 6d4b929c5..6f4c9a6ea 100644 --- a/activitysim/core/interaction_sample.py +++ b/activitysim/core/interaction_sample.py @@ -265,7 +265,7 @@ def _interaction_sample( locals_d, custom_chooser=None, sharrow_enabled=sharrow_enabled, - additional_columns=["tdd"], + additional_columns=["tdd", "origin_destination"], ) if sharrow_enabled: diff --git a/activitysim/core/interaction_sample_simulate.py b/activitysim/core/interaction_sample_simulate.py index 8951a3d07..a317d92af 100644 --- a/activitysim/core/interaction_sample_simulate.py +++ b/activitysim/core/interaction_sample_simulate.py @@ -166,7 +166,7 @@ def _interaction_sample_simulate( locals_d, custom_chooser=None, sharrow_enabled=sharrow_enabled, - additional_columns=["tdd"], + additional_columns=["tdd", "origin_destination"], ) interaction_df = alternatives.join(choosers, how="left", rsuffix="_chooser") From 6cb139b7675f903770e3235f229c41c4a99951d7 Mon Sep 17 00:00:00 2001 From: David Hensle Date: Mon, 6 May 2024 10:06:44 -0700 Subject: [PATCH 09/37] remove unneeded addition to categorical --- activitysim/abm/models/vehicle_allocation.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/activitysim/abm/models/vehicle_allocation.py b/activitysim/abm/models/vehicle_allocation.py index 9061e99fb..632839b11 100644 --- a/activitysim/abm/models/vehicle_allocation.py +++ b/activitysim/abm/models/vehicle_allocation.py @@ -259,9 +259,7 @@ def vehicle_allocation( choices.loc[choices["alt_choice"] == alt, "choice"] = choosers.loc[ choices["alt_choice"] == alt, alt ] - # add non-household vehicle option to categories - if alts_from_spec[-1] not in choices["choice"].cat.categories: - choices["choice"] = choices["choice"].cat.add_categories(alts_from_spec[-1]) + # set choice for non-household vehicle option choices.loc[ choices["alt_choice"] == alts_from_spec[-1], "choice" From 0fa02693de880b014e982b552780f09e0344533a Mon Sep 17 00:00:00 2001 From: Jeff Newman Date: Thu, 2 May 2024 09:44:35 -0500 Subject: [PATCH 10/37] clearer time logging --- activitysim/core/interaction_simulate.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/activitysim/core/interaction_simulate.py b/activitysim/core/interaction_simulate.py index 01ef86dad..78de34dbd 100644 --- a/activitysim/core/interaction_simulate.py +++ b/activitysim/core/interaction_simulate.py @@ -174,7 +174,7 @@ def replace_in_index_level(mi, level, *repls): for i1, i2 in zip(exprs, labels): logger.debug(f" - expr: {i1}: {i2}") - timelogger.mark("sharrow preamble", True, logger, trace_label) + timelogger.mark("sharrow interact preamble", True, logger, trace_label) sh_util, sh_flow, sh_tree = apply_flow( state, @@ -197,10 +197,10 @@ def replace_in_index_level(mi, level, *repls): # if not testing sharrow, we are done with this object now. del sh_util - timelogger.mark("sharrow flow", True, logger, trace_label) + timelogger.mark("sharrow interact flow", True, logger, trace_label) else: sh_util, sh_flow, sh_tree = None, None, None - timelogger.mark("sharrow flow", False) + timelogger.mark("sharrow interact flow", False) if ( utilities is None From 35a57d49e1d7682ce75c2e7944aa7dba9caec2ba Mon Sep 17 00:00:00 2001 From: Jeff Newman Date: Mon, 6 May 2024 11:49:40 -0500 Subject: [PATCH 11/37] bump required numba to 0.57 for np.nan_to_num --- conda-environments/docbuild.yml | 2 +- pyproject.toml | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/conda-environments/docbuild.yml b/conda-environments/docbuild.yml index f89e1ac1c..717e023a1 100644 --- a/conda-environments/docbuild.yml +++ b/conda-environments/docbuild.yml @@ -27,7 +27,7 @@ dependencies: - matplotlib - myst-nb - myst-parser -- numba >= 0.56.4 +- numba >= 0.57 - numpy >= 1.16.1 - numpydoc - openmatrix >= 0.3.4.1 diff --git a/pyproject.toml b/pyproject.toml index 92e8d85f7..36d4e2146 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -11,7 +11,7 @@ name = "activitysim" dynamic = ["version"] dependencies = [ "cytoolz >= 0.8.1", - "numba >= 0.55.2", + "numba >= 0.57", "numpy >= 1.16.1", "openmatrix >= 0.3.4.1", "orca >= 1.6", From 5ea73628a2305ffdd8a7e14b441846fd6e6d5fcd Mon Sep 17 00:00:00 2001 From: Jeff Newman Date: Mon, 6 May 2024 11:50:03 -0500 Subject: [PATCH 12/37] sharrow docs --- docs/dev-guide/using-sharrow.md | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) diff --git a/docs/dev-guide/using-sharrow.md b/docs/dev-guide/using-sharrow.md index e2b0093d4..daf698500 100644 --- a/docs/dev-guide/using-sharrow.md +++ b/docs/dev-guide/using-sharrow.md @@ -174,6 +174,16 @@ memory, as the variable is computed and stored for every row in the entire dataf before it can be used in other expressions. In sharrow, temporary variables are allocated, used, and freed for each row separately, so no extra memory is required. +### Pandas-only Expressions + +In legacy mode, expressions can be evaluated using expressions that tap into the +full pandas library, including the ability to call pandas functions and methods +directly. This is not possible in sharrow, as the expressions are compiled into +numba code, which does not have access to the pandas library. If a pandas function +is needed, it must be called in a pre-processor. However, many pandas functions +can be replaced with numpy functions, which are available in numba. For example, +`df.income.fillna(0)` can be replaced with `np.nan_to_num(df.income)`. + ### Switchable Expressions As a general rule, it is best to write each utility expression in a manner that @@ -222,6 +232,17 @@ compute_settings: in the component's configuration yaml file. +In addition, by default sharrow also tries to optimize performance by setting the +`fastmath` flag to True in the numba compiler. This makes the compiler generate +faster code, by assuming that all variables have finite values (not NaN or Inf). +If the model has expressions that use variables that can contains NaN or Inf +values, the `fastmath` flag can be disabled by setting + +```yaml +compute_settings: + fastmath: false +``` + ### Multiprocessing Performance Sharrow leverages a number of performance enhancing techniques, including From ac2468ff2913dc9c6d1b2a7caccc04a4a197328f Mon Sep 17 00:00:00 2001 From: Jeff Newman Date: Mon, 6 May 2024 11:50:37 -0500 Subject: [PATCH 13/37] use compute_setting in sharrow debugging --- activitysim/core/interaction_simulate.py | 101 +++++++++++++---------- 1 file changed, 57 insertions(+), 44 deletions(-) diff --git a/activitysim/core/interaction_simulate.py b/activitysim/core/interaction_simulate.py index 78de34dbd..a97a806ed 100644 --- a/activitysim/core/interaction_simulate.py +++ b/activitysim/core/interaction_simulate.py @@ -541,53 +541,66 @@ def to_series(x): retrace_eval_parts = {} re_trace_df = df.iloc[re_trace] - for expr, label, coefficient in zip(exprs, labels, spec.iloc[:, 0]): - if expr.startswith("_"): - target = expr[: expr.index("@")] - rhs = expr[expr.index("@") + 1 :] - v = to_series(eval(rhs, globals(), locals_d)) - locals_d[target] = v - if trace_eval_results is not None: - trace_eval_results[expr] = v.iloc[re_trace] - continue - if expr.startswith("@"): - v = to_series(eval(expr[1:], globals(), locals_d)) - else: - v = df.eval(expr, resolvers=[locals_d]) - if check_for_variability and v.std() == 0: - logger.info( - "%s: no variability (%s) in: %s" - % (trace_label, v.iloc[0], expr) + with compute_settings.pandas_option_context(): + for expr, label, coefficient in zip( + exprs, labels, spec.iloc[:, 0] + ): + if expr.startswith("_"): + target = expr[: expr.index("@")] + rhs = expr[expr.index("@") + 1 :] + v = to_series(eval(rhs, globals(), locals_d)) + locals_d[target] = v + if trace_eval_results is not None: + trace_eval_results[expr] = v.iloc[re_trace] + continue + if expr.startswith("@"): + v = to_series(eval(expr[1:], globals(), locals_d)) + else: + v = df.eval(expr, resolvers=[locals_d]) + if check_for_variability and v.std() == 0: + logger.info( + "%s: no variability (%s) in: %s" + % (trace_label, v.iloc[0], expr) + ) + no_variability += 1 + retrace_eval_data[expr] = v.iloc[re_trace] + k = "partial utility (coefficient = %s) for %s" % ( + coefficient, + expr, + ) + retrace_eval_parts[k] = ( + v.iloc[re_trace] * coefficient + ).astype("float") + retrace_eval_data_ = pd.concat(retrace_eval_data, axis=1) + retrace_eval_parts_ = pd.concat(retrace_eval_parts, axis=1) + + re_sh_flow_load = sh_flow.load(sh_tree, dtype=np.float32) + re_sh_flow_load_ = re_sh_flow_load[re_trace] + + use_bottleneck = pd.get_option("compute.use_bottleneck") + use_numexpr = pd.get_option("compute.use_numexpr") + use_numba = pd.get_option("compute.use_numba") + + look_for_problems_here = np.where( + ~np.isclose( + re_sh_flow_load_[ + :, + ~spec.index.get_level_values(0).str.startswith("_"), + ], + retrace_eval_data_.values.astype(np.float32), ) - no_variability += 1 - retrace_eval_data[expr] = v.iloc[re_trace] - k = "partial utility (coefficient = %s) for %s" % ( - coefficient, - expr, - ) - retrace_eval_parts[k] = (v.iloc[re_trace] * coefficient).astype( - "float" - ) - retrace_eval_data_ = pd.concat(retrace_eval_data, axis=1) - retrace_eval_parts_ = pd.concat(retrace_eval_parts, axis=1) - - re_sh_flow_load = sh_flow.load(sh_tree, dtype=np.float32) - re_sh_flow_load_ = re_sh_flow_load[re_trace] - - use_bottleneck = pd.get_option("compute.use_bottleneck") - use_numexpr = pd.get_option("compute.use_numexpr") - use_numba = pd.get_option("compute.use_numba") - - look_for_problems_here = np.where( - ~np.isclose( - re_sh_flow_load_[ - :, ~spec.index.get_level_values(0).str.startswith("_") - ], - retrace_eval_data_.values.astype(np.float32), ) - ) - raise # enter debugger now to see what's up + if len(look_for_problems_here) == 2: + # the first index is the row index, which is probably may different rows + # the second is column index, hopefully only a few unique values + problem_col_indexes = np.unique(look_for_problems_here[1]) + problem_cols = list( + retrace_eval_data_.columns[problem_col_indexes] + ) + print("problem expressions:\n", "\n".join(problem_cols)) + + raise # enter debugger now to see what's up timelogger.mark("sharrow interact test", True, logger, trace_label) logger.info(f"utilities.dtypes {trace_label}\n{utilities.dtypes}") From 97ff87bcb25b696680ea1cd52c6134bb4178950b Mon Sep 17 00:00:00 2001 From: Jeff Newman Date: Mon, 6 May 2024 11:50:46 -0500 Subject: [PATCH 14/37] fix comment --- .../prototype_mtc_extended/configs/school_escorting.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/activitysim/examples/prototype_mtc_extended/configs/school_escorting.yaml b/activitysim/examples/prototype_mtc_extended/configs/school_escorting.yaml index ff04d214e..9ecc711f9 100644 --- a/activitysim/examples/prototype_mtc_extended/configs/school_escorting.yaml +++ b/activitysim/examples/prototype_mtc_extended/configs/school_escorting.yaml @@ -1,5 +1,5 @@ # Some data values in the spec file will refer to missing values stored -# as NaN in the data. This requires the `sharrow_fastmath` setting to +# as NaN in the data. This requires the `fastmath` setting to # be set to `false` to avoid errors in the sharrow implementation. compute_settings: fastmath: false From 26aef51da4209247619b501b3c3ad3c9a268ead6 Mon Sep 17 00:00:00 2001 From: Jeff Newman Date: Mon, 6 May 2024 17:25:52 -0500 Subject: [PATCH 15/37] debug helper values --- activitysim/core/interaction_simulate.py | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/activitysim/core/interaction_simulate.py b/activitysim/core/interaction_simulate.py index a97a806ed..83cedd4f5 100644 --- a/activitysim/core/interaction_simulate.py +++ b/activitysim/core/interaction_simulate.py @@ -600,6 +600,14 @@ def to_series(x): ) print("problem expressions:\n", "\n".join(problem_cols)) + MISMATCH_sharrow = re_sh_flow_load_[ + :, + ~spec.index.get_level_values(0).str.startswith("_"), + ][:, problem_col_indexes] + MISMATCH_legacy = retrace_eval_data_.iloc[ + :, problem_col_indexes + ] + raise # enter debugger now to see what's up timelogger.mark("sharrow interact test", True, logger, trace_label) From 16fc8e2f0172b530bc375b98d4c0f81cf968ec4c Mon Sep 17 00:00:00 2001 From: Jeff Newman Date: Mon, 6 May 2024 17:26:05 -0500 Subject: [PATCH 16/37] dtype compute fixes --- .../joint_tour_frequency_composition.csv | 40 +++++++++---------- 1 file changed, 20 insertions(+), 20 deletions(-) diff --git a/test/joint_tours/configs/joint_tour_frequency_composition.csv b/test/joint_tours/configs/joint_tour_frequency_composition.csv index 2cace7b25..aa63d0be2 100644 --- a/test/joint_tours/configs/joint_tour_frequency_composition.csv +++ b/test/joint_tours/configs/joint_tour_frequency_composition.csv @@ -47,16 +47,16 @@ Label,Description,Expression,Coefficient ,Shopping HOV accessibility for 2 Tours,((autosnum_workers)*shop_hov_oversufficient_accessibility)*(num_joint_tours==2)*shopping,coef_shopping_hov_accessibility_for_2_tours ,Maintenance HOV Accessibility,((autosnum_workers)*maint_hov_oversufficient_accessibility)*othmaint,coef_maintenance_hov_accessibility ,Discretionary HOV Accessibility,((autosnum_workers)*discr_hov_oversufficient_accessibility)*othdiscr,coef_discretionary_hov_accessibility -,Constant for Children Party/ Shopping Tour,@(df.purpose1==5)*(df.party1==2)+(df.purpose2==5)*(df.party2==2),coef_constant_for_children_party_shopping_tour -,Constant for Children Party/ Maintenance Tour,@(df.purpose1==6)*(df.party1==2)+(df.purpose2==6)*(df.party2==2),coef_constant_for_children_party_maintenance_tour -,Constant for Children Party/ Eating Out Tour,@(df.purpose1==7)*(df.party1==2)+(df.purpose2==7)*(df.party2==2),coef_constant_for_children_party_eating_out_tour -,Constant for Children Party/ Visiting Tour,@(df.purpose1==8)*(df.party1==2)+(df.purpose2==8)*(df.party2==2),coef_constant_for_children_party_visiting_tour -,Constant for Children Party/ Discretionary Tour,@(df.purpose1==9)*(df.party1==2)+(df.purpose2==9)*(df.party2==2),coef_constant_for_children_party_discretionary_tour -,Constant for Mixed Party/ Shopping Tour,@(df.purpose1==5)*(df.party1==2)+(df.purpose2==5)*(df.party2==2),coef_constant_for_mixed_party_shopping_tour -,Constant for Mixed Party/ Maintenance Tour,@(df.purpose1==6)*(df.party1==3)+(df.purpose2==6)*(df.party2==3),coef_constant_for_mixed_party_maintenance_tour -,Constant for Mixed Party/ Eating Out Tour,@(df.purpose1==7)*(df.party1==3)+(df.purpose2==7)*(df.party2==3),coef_constant_for_mixed_party_eating_out_tour -,Constant for Mixed Party/ Visiting Tour,@(df.purpose1==8)*(df.party1==3)+(df.purpose2==8)*(df.party2==3),coef_constant_for_mixed_party_visiting_tour -,Constant for Mixed Party/ Discretionary Tour,@(df.purpose1==9)*(df.party1==3)+(df.purpose2==9)*(df.party2==3),coef_constant_for_mixed_party_discretionary_tour +,Constant for Children Party/ Shopping Tour,@((df.purpose1==5)*(df.party1==2)).astype(int)+@((df.purpose2==5)*(df.party2==2)).astype(int),coef_constant_for_children_party_shopping_tour +,Constant for Children Party/ Maintenance Tour,@((df.purpose1==6)*(df.party1==2)).astype(int)+@((df.purpose2==6)*(df.party2==2)).astype(int),coef_constant_for_children_party_maintenance_tour +,Constant for Children Party/ Eating Out Tour,@((df.purpose1==7)*(df.party1==2)).astype(int)+@((df.purpose2==7)*(df.party2==2)).astype(int),coef_constant_for_children_party_eating_out_tour +,Constant for Children Party/ Visiting Tour,@((df.purpose1==8)*(df.party1==2)).astype(int)+@((df.purpose2==8)*(df.party2==2)).astype(int),coef_constant_for_children_party_visiting_tour +,Constant for Children Party/ Discretionary Tour,@((df.purpose1==9)*(df.party1==2)).astype(int)+@((df.purpose2==9)*(df.party2==2)).astype(int),coef_constant_for_children_party_discretionary_tour +,Constant for Mixed Party/ Shopping Tour,@((df.purpose1==5)*(df.party1==3)).astype(int)+@((df.purpose2==5)*(df.party2==3)).astype(int),coef_constant_for_mixed_party_shopping_tour +,Constant for Mixed Party/ Maintenance Tour,@((df.purpose1==6)*(df.party1==3)).astype(int)+@((df.purpose2==6)*(df.party2==3)).astype(int),coef_constant_for_mixed_party_maintenance_tour +,Constant for Mixed Party/ Eating Out Tour,@((df.purpose1==7)*(df.party1==3)).astype(int)+@((df.purpose2==7)*(df.party2==3)).astype(int),coef_constant_for_mixed_party_eating_out_tour +,Constant for Mixed Party/ Visiting Tour,@((df.purpose1==8)*(df.party1==3)).astype(int)+@((df.purpose2==8)*(df.party2==3)).astype(int),coef_constant_for_mixed_party_visiting_tour +,Constant for Mixed Party/ Discretionary Tour,@((df.purpose1==9)*(df.party1==3)).astype(int)+@((df.purpose2==9)*(df.party2==3)).astype(int),coef_constant_for_mixed_party_discretionary_tour ,Number of Active Full time workers /Adult Party,num_travel_active_full_time_workers * (party1==1) + num_travel_active_full_time_workers * (party2==1),coef_number_of_active_full_time_workers_adult_party ,Number of Active Part time workers /Adult Party,num_travel_active_part_time_workers * (party1==1) + num_travel_active_part_time_workers * (party2==1),coef_number_of_active_part_time_workers_adult_party ,Number of Active University Students /Adult Party,num_travel_active_university_students * (party1==1) + num_travel_active_university_students * (party2==1),coef_number_of_active_university_students_adult_party @@ -78,16 +78,16 @@ Label,Description,Expression,Coefficient ,Not more than 1 travel active adult in HH,@(df.num_travel_active_adults < 2)*(((df.party1==1)+(df.party2==1))>0),coef_unavailable ,Not more than 1 travel active child in HH,@(df.num_travel_active_children < 2)*(((df.party1==2)+(df.party2==2))>0),coef_unavailable ,No travel-active pair adult-child in HH ,@((df.num_travel_active_adults*df.num_travel_active_children) ==0)*(((df.party1==3)+(df.party2==3))>0),coef_unavailable -,Adjustment for Children Party/ Shopping Tour,@(df.purpose1==5)*(df.party1==2)+(df.purpose2==5)*(df.party2==2),coef_adjustment_for_children_party_shopping_tour -,Adjustment for Children Party/ Maintenance Tour,@(df.purpose1==6)*(df.party1==2)+(df.purpose2==6)*(df.party2==2),coef_adjustment_for_children_party_maintenance_tour -,Adjustment for Children Party/ Eating Out Tour,@(df.purpose1==7)*(df.party1==2)+(df.purpose2==7)*(df.party2==2),coef_adjustment_for_children_party_eating_out_tour -,Adjustment for Children Party/ Visiting Tour,@(df.purpose1==8)*(df.party1==2)+(df.purpose2==8)*(df.party2==2),coef_adjustment_for_children_party_visiting_tour -,Adjustment for Children Party/ Discretionary Tour,@(df.purpose1==9)*(df.party1==2)+(df.purpose2==9)*(df.party2==2),coef_adjustment_for_children_party_discretionary_tour -,Adjustment for Mixed Party/ Shopping Tour,@(df.purpose1==5)*(df.party1==2)+(df.purpose2==5)*(df.party2==2),coef_adjustment_for_mixed_party_shopping_tour -,Adjustment for Mixed Party/ Maintenance Tour,@(df.purpose1==6)*(df.party1==3)+(df.purpose2==6)*(df.party2==3),coef_adjustment_for_mixed_party_maintenance_tour -,Adjustment for Mixed Party/ Eating Out Tour,@(df.purpose1==7)*(df.party1==3)+(df.purpose2==7)*(df.party2==3),coef_adjustment_for_mixed_party_eating_out_tour -,Adjustment for Mixed Party/ Visiting Tour,@(df.purpose1==8)*(df.party1==3)+(df.purpose2==8)*(df.party2==3),coef_adjustment_for_mixed_party_visiting_tour -,Adjustment for Mixed Party/ Discretionary Tour,@(df.purpose1==9)*(df.party1==3)+(df.purpose2==9)*(df.party2==3),coef_adjustment_for_mixed_party_discretionary_tour +,Adjustment for Children Party/ Shopping Tour,@((df.purpose1==5)*(df.party1==2)).astype(int)+@((df.purpose2==5)*(df.party2==2)).astype(int),coef_adjustment_for_children_party_shopping_tour +,Adjustment for Children Party/ Maintenance Tour,@((df.purpose1==6)*(df.party1==2)).astype(int)+@((df.purpose2==6)*(df.party2==2)).astype(int),coef_adjustment_for_children_party_maintenance_tour +,Adjustment for Children Party/ Eating Out Tour,@((df.purpose1==7)*(df.party1==2)).astype(int)+@((df.purpose2==7)*(df.party2==2)).astype(int),coef_adjustment_for_children_party_eating_out_tour +,Adjustment for Children Party/ Visiting Tour,@((df.purpose1==8)*(df.party1==2)).astype(int)+@((df.purpose2==8)*(df.party2==2)).astype(int),coef_adjustment_for_children_party_visiting_tour +,Adjustment for Children Party/ Discretionary Tour,@((df.purpose1==9)*(df.party1==2)).astype(int)+@((df.purpose2==9)*(df.party2==2)).astype(int),coef_adjustment_for_children_party_discretionary_tour +,Adjustment for Mixed Party/ Shopping Tour,@((df.purpose1==5)*(df.party1==3)).astype(int)+@((df.purpose2==5)*(df.party2==3)).astype(int),coef_adjustment_for_mixed_party_shopping_tour +,Adjustment for Mixed Party/ Maintenance Tour,@((df.purpose1==6)*(df.party1==3)).astype(int)+@((df.purpose2==6)*(df.party2==3)).astype(int),coef_adjustment_for_mixed_party_maintenance_tour +,Adjustment for Mixed Party/ Eating Out Tour,@((df.purpose1==7)*(df.party1==3)).astype(int)+@((df.purpose2==7)*(df.party2==3)).astype(int),coef_adjustment_for_mixed_party_eating_out_tour +,Adjustment for Mixed Party/ Visiting Tour,@((df.purpose1==8)*(df.party1==3)).astype(int)+@((df.purpose2==8)*(df.party2==3)).astype(int),coef_adjustment_for_mixed_party_visiting_tour +,Adjustment for Mixed Party/ Discretionary Tour,@((df.purpose1==9)*(df.party1==3)).astype(int)+@((df.purpose2==9)*(df.party2==3)).astype(int),coef_adjustment_for_mixed_party_discretionary_tour ,Adjustment for shopping tour,shopping,coef_adjustment_for_shopping_tour ,Adjustment for Maintenance tour,othmaint,coef_adjustment_for_maintenance_tour ,Adjustment for eating out tour,eatout,coef_adjustment_for_eating_out_tour From 75d1a1d5ea398f8ad198fe5276f08623f2bc4b34 Mon Sep 17 00:00:00 2001 From: Jeff Newman Date: Tue, 7 May 2024 12:01:24 -0500 Subject: [PATCH 17/37] land_use_columns_orig --- activitysim/abm/models/accessibility.py | 31 ++++++++++++++++++++++++- 1 file changed, 30 insertions(+), 1 deletion(-) diff --git a/activitysim/abm/models/accessibility.py b/activitysim/abm/models/accessibility.py index f82a34f67..4d98b0e86 100644 --- a/activitysim/abm/models/accessibility.py +++ b/activitysim/abm/models/accessibility.py @@ -23,12 +23,27 @@ class AccessibilitySettings(PydanticReadable): CONSTANTS: dict[str, Any] = {} land_use_columns: list[str] = [] - """Only include the these columns in the computational tables + """Only include the these columns in the computational tables. + + This setting joins land use columns to the accessibility destinations. Memory usage is reduced by only listing the minimum columns needed by the SPEC, and nothing extra. """ + land_use_columns_orig: list[str] = [] + """Join these land use columns to the origin zones. + + This setting joins land use columns to the accessibility origins. + To disambiguate from the destination land use columns, the names of the + columns added will be prepended with 'landuse_orig_'. + + Memory usage is reduced by only listing the minimum columns needed by + the SPEC, and nothing extra. + + .. versionadded:: 1.3 + """ + SPEC: str = "accessibility.csv" """Filename for the accessibility specification (csv) file.""" @@ -55,6 +70,7 @@ def compute_accessibilities_for_zones( state: workflow.State, accessibility_df: pd.DataFrame, land_use_df: pd.DataFrame, + orig_land_use_df: pd.DataFrame | None, assignment_spec: dict, constants: dict, network_los: los.Network_LOS, @@ -69,6 +85,7 @@ def compute_accessibilities_for_zones( state : workflow.State accessibility_df : pd.DataFrame land_use_df : pd.DataFrame + orig_land_use_df : pd.DataFrame | None assignment_spec : dict constants : dict network_los : los.Network_LOS @@ -101,6 +118,12 @@ def compute_accessibilities_for_zones( logger.debug(f"{trace_label}: tiling land_use_columns into od_data") for c in land_use_df.columns: od_data[c] = np.tile(land_use_df[c].to_numpy(), orig_zone_count) + if orig_land_use_df is not None: + logger.debug(f"{trace_label}: repeating orig_land_use_columns into od_data") + for c in orig_land_use_df: + od_data[f"landuse_orig_{c}"] = np.repeat( + orig_land_use_df[c], dest_zone_count + ) logger.debug(f"{trace_label}: converting od_data to DataFrame") od_df = pd.DataFrame(od_data) logger.debug(f"{trace_label}: dropping od_data") @@ -233,6 +256,11 @@ def compute_accessibility( land_use_df = land_use land_use_df = land_use_df[land_use_columns] + if model_settings.land_use_columns_orig: + orig_land_use_df = land_use[model_settings.land_use_columns_orig] + else: + orig_land_use_df = None + logger.info( f"Running {trace_label} with {len(accessibility_df.index)} orig zones " f"{len(land_use_df)} dest zones" @@ -253,6 +281,7 @@ def compute_accessibility( state, chooser_chunk, land_use_df, + orig_land_use_df, assignment_spec, constants, network_los, From 11a01a700b00522b7c9e45c81bd6b336b3db84db Mon Sep 17 00:00:00 2001 From: Jeff Newman Date: Tue, 7 May 2024 14:08:31 -0500 Subject: [PATCH 18/37] fix and test orig_land_use with explicit chunking --- activitysim/abm/models/accessibility.py | 3 +- .../abm/test/test_agg_accessibility.py | 45 +++++++++++++++++++ 2 files changed, 47 insertions(+), 1 deletion(-) diff --git a/activitysim/abm/models/accessibility.py b/activitysim/abm/models/accessibility.py index 4d98b0e86..7dc7abe2f 100644 --- a/activitysim/abm/models/accessibility.py +++ b/activitysim/abm/models/accessibility.py @@ -277,11 +277,12 @@ def compute_accessibility( ) in chunk.adaptive_chunked_choosers( state, accessibility_df, trace_label, explicit_chunk_size=explicit_chunk_size ): + orig_land_use_df_chunk = orig_land_use_df.loc[chooser_chunk.index] accessibilities = compute_accessibilities_for_zones( state, chooser_chunk, land_use_df, - orig_land_use_df, + orig_land_use_df_chunk, assignment_spec, constants, network_los, diff --git a/activitysim/abm/test/test_agg_accessibility.py b/activitysim/abm/test/test_agg_accessibility.py index 4015e35cb..2c00252dc 100644 --- a/activitysim/abm/test/test_agg_accessibility.py +++ b/activitysim/abm/test/test_agg_accessibility.py @@ -61,3 +61,48 @@ def test_agg_accessibility_explicit_chunking(state, dataframe_regression): ) df = state.get_dataframe("accessibility") dataframe_regression.check(df, basename="simple_agg_accessibility") + + +@pytest.mark.parametrize("explicit_chunk", [0, 5]) +def test_agg_accessibility_orig_land_use( + state, dataframe_regression, tmp_path, explicit_chunk +): + # set top level settings + state.settings.chunk_size = 0 + state.settings.sharrow = False + state.settings.chunk_training_mode = "explicit" + + # read the accessibility settings and override the explicit chunk size to 5 + model_settings = AccessibilitySettings.read_settings_file( + state.filesystem, "accessibility.yaml" + ) + model_settings.explicit_chunk = explicit_chunk + model_settings.land_use_columns = ["RETEMPN", "TOTEMP", "TOTACRE"] + model_settings.land_use_columns_orig = ["TOTACRE"] + + land_use = state.get_dataframe("land_use") + accessibility = state.get_dataframe("accessibility") + + tmp_spec = tmp_path / "tmp-accessibility.csv" + tmp_spec.open("w").write( + """Description,Target,Expression +orig_acreage,orig_acreage,df.landuse_orig_TOTACRE +dest_acreage,dest_acreage,df.TOTACRE +""" + ) + model_settings.SPEC = str(tmp_spec) + + # state.filesystem.get_config_file_path(model_settings.SPEC) + + compute_accessibility( + state, + land_use, + accessibility, + state.get("network_los"), + model_settings, + model_settings_file_name="accessibility.yaml", + trace_label="compute_accessibility", + output_table_name="accessibility", + ) + df = state.get_dataframe("accessibility") + dataframe_regression.check(df, basename="simple_agg_accessibility_orig_land_use") From 5eb076a94be47a5badd7ce736f05a6e636e1fb2e Mon Sep 17 00:00:00 2001 From: Jeff Newman Date: Tue, 7 May 2024 14:20:47 -0500 Subject: [PATCH 19/37] repair --- activitysim/abm/models/accessibility.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/activitysim/abm/models/accessibility.py b/activitysim/abm/models/accessibility.py index 7dc7abe2f..633c8e99b 100644 --- a/activitysim/abm/models/accessibility.py +++ b/activitysim/abm/models/accessibility.py @@ -277,7 +277,10 @@ def compute_accessibility( ) in chunk.adaptive_chunked_choosers( state, accessibility_df, trace_label, explicit_chunk_size=explicit_chunk_size ): - orig_land_use_df_chunk = orig_land_use_df.loc[chooser_chunk.index] + if orig_land_use_df is not None: + orig_land_use_df_chunk = orig_land_use_df.loc[chooser_chunk.index] + else: + orig_land_use_df_chunk = None accessibilities = compute_accessibilities_for_zones( state, chooser_chunk, From 0c7221aece2321ae1f0f1750710081267234ca15 Mon Sep 17 00:00:00 2001 From: Jeff Newman Date: Tue, 7 May 2024 14:27:45 -0500 Subject: [PATCH 20/37] add missing test result file --- ...simple_agg_accessibility_orig_land_use.csv | 26 +++++++++++++++++++ 1 file changed, 26 insertions(+) create mode 100644 activitysim/abm/test/test_agg_accessibility/simple_agg_accessibility_orig_land_use.csv diff --git a/activitysim/abm/test/test_agg_accessibility/simple_agg_accessibility_orig_land_use.csv b/activitysim/abm/test/test_agg_accessibility/simple_agg_accessibility_orig_land_use.csv new file mode 100644 index 000000000..6e269bec4 --- /dev/null +++ b/activitysim/abm/test/test_agg_accessibility/simple_agg_accessibility_orig_land_use.csv @@ -0,0 +1,26 @@ +zone_id,orig_acreage,dest_acreage +0,6.2314652154886145,7.3737508868303339 +1,6.657368991274053,7.3737508868303339 +2,5.909440711629391,7.3737508868303339 +3,6.1810513148933497,7.3737508868303339 +4,7.1842500057933423,7.3737508868303339 +5,6.5875500148247959,7.3737508868303339 +6,7.026426808699636,7.3737508868303339 +7,7.1514854639047352,7.3737508868303339 +8,7.9377317752601089,7.3737508868303339 +9,7.5167053007413269,7.3737508868303339 +10,7.6138186848086287,7.3737508868303339 +11,7.1955623436220684,7.3737508868303339 +12,6.4975288537722626,7.3737508868303339 +13,6.6411821697405919,7.3737508868303339 +14,6.5701824369168911,7.3737508868303339 +15,8.034631032923107,7.3737508868303339 +16,8.2449906898128429,7.3737508868303339 +17,7.8948771916168834,7.3737508868303339 +18,8.0507033814702993,7.3737508868303339 +19,7.8073066868519945,7.3737508868303339 +20,7.5875638951029023,7.3737508868303339 +21,7.6932537206062692,7.3737508868303339 +22,7.7279755421055585,7.3737508868303339 +23,6.8834625864130921,7.3737508868303339 +24,6.2653012127377101,7.3737508868303339 From b7b72441f9276db8c4b45628a565690f5015d300 Mon Sep 17 00:00:00 2001 From: Jeff Newman Date: Tue, 7 May 2024 21:49:04 -0500 Subject: [PATCH 21/37] omx_ignore_patterns --- activitysim/core/configuration/top.py | 12 ++++++++++++ activitysim/core/skim_dataset.py | 1 + 2 files changed, 13 insertions(+) diff --git a/activitysim/core/configuration/top.py b/activitysim/core/configuration/top.py index 0b6121037..d8d2e7adc 100644 --- a/activitysim/core/configuration/top.py +++ b/activitysim/core/configuration/top.py @@ -585,6 +585,18 @@ class Settings(PydanticBase, extra="allow", validate_assignment=True): compatible with using :py:attr:`Settings.sharrow`. """ + omx_ignore_patterns: list[str] = [] + """ + List of regex patterns to ignore when reading OMX files. + + This is useful if you have tables in your OMX file that you don't want to + read in. For example, if you have both time-of-day values and time-independent + values (e.g., "BIKE_TIME" and "BIKE_TIME__AM"), you can ignore the time-of-day + values by setting this to ["BIKE_TIME__.+"]. + + .. versionadded:: 1.3 + """ + keep_mem_logs: bool = False pipeline_complib: str = "NOTSET" diff --git a/activitysim/core/skim_dataset.py b/activitysim/core/skim_dataset.py index 8421cb6c7..772548a24 100644 --- a/activitysim/core/skim_dataset.py +++ b/activitysim/core/skim_dataset.py @@ -777,6 +777,7 @@ def load_skim_dataset_to_shared_memory(state, skim_tag="taz") -> xr.Dataset: ), time_periods=time_periods, max_float_precision=max_float_precision, + ignore=state.settings.omx_ignore_patterns, ) if zarr_file: From 17826edb7741787d4e5a0d28f7712c3b39170133 Mon Sep 17 00:00:00 2001 From: David Hensle Date: Wed, 8 May 2024 17:55:23 -0700 Subject: [PATCH 22/37] revert change to drop size terms --- activitysim/abm/models/trip_destination.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/activitysim/abm/models/trip_destination.py b/activitysim/abm/models/trip_destination.py index 8a3320a20..57318732a 100644 --- a/activitysim/abm/models/trip_destination.py +++ b/activitysim/abm/models/trip_destination.py @@ -1336,6 +1336,8 @@ def run_trip_destination( # returns a series of size_terms for each chooser's dest_zone_id and purpose with chooser index size_term_matrix = DataFrameMatrix(alternatives) + # don't need size terms in alternatives, just zone_id index + alternatives = alternatives.drop(alternatives.columns, axis=1) alternatives.index.name = model_settings.ALT_DEST_COL_NAME sample_list = [] From 077b8a17bc1c14b7a78e41644d59180c388f3e69 Mon Sep 17 00:00:00 2001 From: David Hensle Date: Thu, 9 May 2024 09:09:55 -0700 Subject: [PATCH 23/37] creating separate sample and simulate preprocessors --- activitysim/abm/models/trip_destination.py | 31 ++++++++++++++-------- 1 file changed, 20 insertions(+), 11 deletions(-) diff --git a/activitysim/abm/models/trip_destination.py b/activitysim/abm/models/trip_destination.py index 57318732a..86c8f657d 100644 --- a/activitysim/abm/models/trip_destination.py +++ b/activitysim/abm/models/trip_destination.py @@ -60,7 +60,8 @@ class TripDestinationSettings(LocationComponentSettings, extra="forbid"): PRIMARY_DEST: str = "tour_leg_dest" # must be created in preprocessor REDUNDANT_TOURS_MERGED_CHOOSER_COLUMNS: list[str] | None = None preprocessor: PreprocessorSettings | None = None - alts_preprocessor: PreprocessorSettings | None = None + alts_preprocessor_sample: PreprocessorSettings | None = None + alts_preprocessor_simulate: PreprocessorSettings | None = None CLEANUP: bool fail_some_trips_for_testing: bool = False """This setting is used by testing code to force failed trip_destination.""" @@ -202,6 +203,15 @@ def _destination_sample( log_alt_losers = state.settings.log_alt_losers + if model_settings.alts_preprocessor_sample: + expressions.assign_columns( + state, + df=alternatives, + model_settings=model_settings.alts_preprocessor_sample, + locals_dict=locals_dict, + trace_label=tracing.extend_trace_label(trace_label, "alts"), + ) + choices = interaction_sample( state, choosers=trips, @@ -936,6 +946,15 @@ def trip_destination_simulate( ) locals_dict.update(skims) + if model_settings.alts_preprocessor_simulate: + expressions.assign_columns( + state, + df=destination_sample, + model_settings=model_settings.alts_preprocessor_simulate, + locals_dict=locals_dict, + trace_label=tracing.extend_trace_label(trace_label, "alts"), + ) + log_alt_losers = state.settings.log_alt_losers destinations = interaction_sample_simulate( state, @@ -1246,7 +1265,6 @@ def run_trip_destination( state.filesystem, model_settings_file_name ) preprocessor_settings = model_settings.preprocessor - alts_preprocessor_settings = model_settings.alts_preprocessor logsum_settings = state.filesystem.read_model_settings( model_settings.LOGSUM_SETTINGS ) @@ -1371,15 +1389,6 @@ def run_trip_destination( trace_label=nth_trace_label, ) - if alts_preprocessor_settings: - expressions.assign_columns( - state, - df=alternatives, - model_settings=alts_preprocessor_settings, - locals_dict=locals_dict, - trace_label=tracing.extend_trace_label(nth_trace_label, "alts"), - ) - if isinstance( nth_trips["trip_period"].dtype, pd.api.types.CategoricalDtype ): From b911f1fad701c6d0095e9ac07fc4de1610b212c8 Mon Sep 17 00:00:00 2001 From: Jeff Newman Date: Mon, 13 May 2024 16:01:03 -0500 Subject: [PATCH 24/37] bugfix --- activitysim/core/interaction_simulate.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/activitysim/core/interaction_simulate.py b/activitysim/core/interaction_simulate.py index 83cedd4f5..a7e037524 100644 --- a/activitysim/core/interaction_simulate.py +++ b/activitysim/core/interaction_simulate.py @@ -425,7 +425,7 @@ def to_series(x): dtype=np.float32, ) logger.info("finish sh_flow load dataarray") - sh_utility_fat = sh_utility_fat[trace_rows, :] + # sh_utility_fat = sh_utility_fat[trace_rows, :] # trace selection above, do not repeat sh_utility_fat = sh_utility_fat.to_dataframe("vals") try: sh_utility_fat = sh_utility_fat.unstack("expressions") From 3cabeda2482a3788c37f0f7a3767ce8322420e62 Mon Sep 17 00:00:00 2001 From: Jeff Newman Date: Mon, 13 May 2024 16:01:43 -0500 Subject: [PATCH 25/37] skim_dataset loading without dask --- activitysim/core/skim_dataset.py | 40 ++++++++++++++++++++++---------- 1 file changed, 28 insertions(+), 12 deletions(-) diff --git a/activitysim/core/skim_dataset.py b/activitysim/core/skim_dataset.py index 772548a24..016038de8 100644 --- a/activitysim/core/skim_dataset.py +++ b/activitysim/core/skim_dataset.py @@ -768,17 +768,24 @@ def load_skim_dataset_to_shared_memory(state, skim_tag="taz") -> xr.Dataset: if d is None: if zarr_file and not do_not_save_zarr: logger.info("did not find zarr skims, loading omx") - d = sh.dataset.from_omx_3d( - [openmatrix.open_file(f, mode="r") for f in omx_file_paths], - index_names=( - ("otap", "dtap", "time_period") - if skim_tag == "tap" - else ("otaz", "dtaz", "time_period") - ), - time_periods=time_periods, - max_float_precision=max_float_precision, - ignore=state.settings.omx_ignore_patterns, - ) + omx_file_handles = [ + openmatrix.open_file(f, mode="r") for f in omx_file_paths + ] + try: + d = sh.dataset.from_omx_3d( + omx_file_handles, + index_names=( + ("otap", "dtap", "time_period") + if skim_tag == "tap" + else ("otaz", "dtaz", "time_period") + ), + time_periods=time_periods, + max_float_precision=max_float_precision, + ignore=state.settings.omx_ignore_patterns, + ) + finally: + for f in omx_file_handles: + f.close() if zarr_file: try: @@ -870,7 +877,16 @@ def load_skim_dataset_to_shared_memory(state, skim_tag="taz") -> xr.Dataset: return d else: logger.info("writing skims to shared memory") - return d.shm.to_shared_memory(backing, mode="r") + # setting `load` to false then calling `reload_from_omx_3d` avoids + # using dask to load the data into memory, which is not performant + # on Windows for large datasets. + d_shared_mem = d.shm.to_shared_memory(backing, mode="r", load=False) + sh.dataset.reload_from_omx_3d( + d_shared_mem, + omx_file_paths, + ignore=state.settings.omx_ignore_patterns, + ) + return d_shared_mem @workflow.cached_object From 0016ea5303eeac1ab2104d30d10758369e2a8ad0 Mon Sep 17 00:00:00 2001 From: Jeff Newman Date: Mon, 13 May 2024 16:37:53 -0500 Subject: [PATCH 26/37] require sharrow 2.9 --- .github/workflows/core_tests.yml | 12 ++++++------ conda-environments/activitysim-dev.yml | 2 +- conda-environments/docbuild.yml | 2 +- conda-environments/github-actions-tests.yml | 2 +- pyproject.toml | 2 +- 5 files changed, 10 insertions(+), 10 deletions(-) diff --git a/.github/workflows/core_tests.yml b/.github/workflows/core_tests.yml index 1afe9270a..e5c7cf79f 100644 --- a/.github/workflows/core_tests.yml +++ b/.github/workflows/core_tests.yml @@ -55,7 +55,7 @@ jobs: "pytest-cov" \ "pytest-regressions=2.5.0" \ "scikit-learn=1.2.2" \ - "sharrow>=2.6.0" \ + "sharrow>=2.9.0" \ "simwrapper=1.8.5" \ "xarray=2023.2.0" \ "zarr=2.14.2" \ @@ -155,7 +155,7 @@ jobs: "pytest-cov" \ "pytest-regressions=2.5.0" \ "scikit-learn=1.2.2" \ - "sharrow>=2.6.0" \ + "sharrow>=2.9.0" \ "simwrapper=1.8.5" \ "xarray=2023.2.0" \ "zarr=2.14.2" \ @@ -252,7 +252,7 @@ jobs: "pytest-cov" \ "pytest-regressions=2.5.0" \ "scikit-learn=1.2.2" \ - "sharrow>=2.6.0" \ + "sharrow>=2.9.0" \ "simwrapper=1.8.5" \ "xarray=2023.2.0" \ "zarr=2.14.2" \ @@ -349,7 +349,7 @@ jobs: "pytest-cov" \ "pytest-regressions=2.5.0" \ "scikit-learn=1.2.2" \ - "sharrow>=2.6.0" \ + "sharrow>=2.9.0" \ "simwrapper=1.8.5" \ "xarray=2023.2.0" \ "zarr=2.14.2" \ @@ -416,7 +416,7 @@ jobs: "pytest-cov" \ "pytest-regressions=2.5.0" \ "scikit-learn=1.2.2" \ - "sharrow>=2.6.0" \ + "sharrow>=2.9.0" \ "simwrapper=1.8.5" \ "xarray=2023.2.0" \ "zarr=2.14.2" \ @@ -482,7 +482,7 @@ jobs: "pytest-cov" \ "pytest-regressions=2.5.0" \ "scikit-learn=1.2.2" \ - "sharrow>=2.6.0" \ + "sharrow>=2.9.0" \ "simwrapper=1.8.5" \ "xarray=2023.2.0" \ "zarr=2.14.2" \ diff --git a/conda-environments/activitysim-dev.yml b/conda-environments/activitysim-dev.yml index 33a5856fb..f26219916 100644 --- a/conda-environments/activitysim-dev.yml +++ b/conda-environments/activitysim-dev.yml @@ -61,7 +61,7 @@ dependencies: - ruff - setuptools_scm - scikit-learn = 1.2.* -- sharrow >= 2.6.0 +- sharrow >= 2.9.0 - simwrapper > 1.7 - snakeviz # for profiling - sparse diff --git a/conda-environments/docbuild.yml b/conda-environments/docbuild.yml index 717e023a1..be1935f23 100644 --- a/conda-environments/docbuild.yml +++ b/conda-environments/docbuild.yml @@ -46,7 +46,7 @@ dependencies: - pyyaml >= 5.1 - requests >= 2.7 - scikit-learn >= 1.1 -- sharrow >= 2.6.0 +- sharrow >= 2.9.0 - simwrapper > 1.7 - sparse - sphinx-argparse diff --git a/conda-environments/github-actions-tests.yml b/conda-environments/github-actions-tests.yml index 3aa8ae115..5051b981c 100644 --- a/conda-environments/github-actions-tests.yml +++ b/conda-environments/github-actions-tests.yml @@ -32,7 +32,7 @@ dependencies: - requests = 2.28.* - ruff - scikit-learn = 1.2.* -- sharrow >= 2.6.0 +- sharrow >= 2.9.0 - simwrapper > 1.7 - sparse - xarray = 2023.2.* diff --git a/pyproject.toml b/pyproject.toml index 36d4e2146..890708041 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -24,7 +24,7 @@ dependencies = [ "pyyaml >= 5.1", "requests >= 2.7", "scikit-learn >= 1.1", - "sharrow >= 2.6", + "sharrow >= 2.9", "simwrapper > 1.7", "sparse", "tables >= 3.5.1", From e630edfc25c9d071a036a6bca05c8f1bf29364fe Mon Sep 17 00:00:00 2001 From: Jeff Newman Date: Mon, 13 May 2024 21:35:24 -0500 Subject: [PATCH 27/37] wait to close open files --- activitysim/core/skim_dataset.py | 33 ++++++++++++++++---------------- 1 file changed, 17 insertions(+), 16 deletions(-) diff --git a/activitysim/core/skim_dataset.py b/activitysim/core/skim_dataset.py index 016038de8..f53d93dd1 100644 --- a/activitysim/core/skim_dataset.py +++ b/activitysim/core/skim_dataset.py @@ -714,6 +714,7 @@ def load_skim_dataset_to_shared_memory(state, skim_tag="taz") -> xr.Dataset: omx_file_paths = state.filesystem.expand_input_file_list( network_los_preload.omx_file_names(skim_tag), ) + omx_file_handles = [] zarr_file = network_los_preload.zarr_file_name(skim_tag) if state.settings.disable_zarr: @@ -771,21 +772,17 @@ def load_skim_dataset_to_shared_memory(state, skim_tag="taz") -> xr.Dataset: omx_file_handles = [ openmatrix.open_file(f, mode="r") for f in omx_file_paths ] - try: - d = sh.dataset.from_omx_3d( - omx_file_handles, - index_names=( - ("otap", "dtap", "time_period") - if skim_tag == "tap" - else ("otaz", "dtaz", "time_period") - ), - time_periods=time_periods, - max_float_precision=max_float_precision, - ignore=state.settings.omx_ignore_patterns, - ) - finally: - for f in omx_file_handles: - f.close() + d = sh.dataset.from_omx_3d( + omx_file_handles, + index_names=( + ("otap", "dtap", "time_period") + if skim_tag == "tap" + else ("otaz", "dtaz", "time_period") + ), + time_periods=time_periods, + max_float_precision=max_float_precision, + ignore=state.settings.omx_ignore_patterns, + ) if zarr_file: try: @@ -874,6 +871,8 @@ def load_skim_dataset_to_shared_memory(state, skim_tag="taz") -> xr.Dataset: np.testing.assert_array_equal(land_use.index, d.dtaz) if d.shm.is_shared_memory: + for f in omx_file_handles: + f.close() return d else: logger.info("writing skims to shared memory") @@ -883,9 +882,11 @@ def load_skim_dataset_to_shared_memory(state, skim_tag="taz") -> xr.Dataset: d_shared_mem = d.shm.to_shared_memory(backing, mode="r", load=False) sh.dataset.reload_from_omx_3d( d_shared_mem, - omx_file_paths, + [str(i) for i in omx_file_paths], ignore=state.settings.omx_ignore_patterns, ) + for f in omx_file_handles: + f.close() return d_shared_mem From 2607029ae66202c0a0155eab53681ab6fc8e124c Mon Sep 17 00:00:00 2001 From: Jeff Newman Date: Tue, 14 May 2024 08:14:06 -0500 Subject: [PATCH 28/37] require sharrow 2.9.1 --- .github/workflows/core_tests.yml | 78 --------------------- conda-environments/activitysim-dev.yml | 2 +- conda-environments/docbuild.yml | 2 +- conda-environments/github-actions-tests.yml | 2 +- pyproject.toml | 2 +- 5 files changed, 4 insertions(+), 82 deletions(-) diff --git a/.github/workflows/core_tests.yml b/.github/workflows/core_tests.yml index e5c7cf79f..09271caf8 100644 --- a/.github/workflows/core_tests.yml +++ b/.github/workflows/core_tests.yml @@ -47,19 +47,6 @@ jobs: - name: Update environment run: | mamba env update -n asim-test -f conda-environments/github-actions-tests.yml - mamba install --yes \ - "psutil=5.9.5" \ - "pydantic=2.6.1" \ - "pypyr=5.8.0" \ - "pytables=3.6.1" \ - "pytest-cov" \ - "pytest-regressions=2.5.0" \ - "scikit-learn=1.2.2" \ - "sharrow>=2.9.0" \ - "simwrapper=1.8.5" \ - "xarray=2023.2.0" \ - "zarr=2.14.2" \ - "zstandard=0.21.0" if: steps.cache.outputs.cache-hit != 'true' - name: Install activitysim @@ -147,19 +134,6 @@ jobs: - name: Update environment run: | mamba env update -n asim-test -f conda-environments/github-actions-tests.yml - mamba install --yes \ - "psutil=5.9.5" \ - "pydantic=2.6.1" \ - "pypyr=5.8.0" \ - "pytables=3.6.1" \ - "pytest-cov" \ - "pytest-regressions=2.5.0" \ - "scikit-learn=1.2.2" \ - "sharrow>=2.9.0" \ - "simwrapper=1.8.5" \ - "xarray=2023.2.0" \ - "zarr=2.14.2" \ - "zstandard=0.21.0" if: steps.cache.outputs.cache-hit != 'true' - name: Install activitysim @@ -244,19 +218,6 @@ jobs: - name: Update environment run: | mamba env update -n asim-test -f conda-environments/github-actions-tests.yml - mamba install --yes \ - "psutil=5.9.5" \ - "pydantic=2.6.1" \ - "pypyr=5.8.0" \ - "pytables=3.6.1" \ - "pytest-cov" \ - "pytest-regressions=2.5.0" \ - "scikit-learn=1.2.2" \ - "sharrow>=2.9.0" \ - "simwrapper=1.8.5" \ - "xarray=2023.2.0" \ - "zarr=2.14.2" \ - "zstandard=0.21.0" if: steps.cache.outputs.cache-hit != 'true' - name: Install activitysim @@ -341,19 +302,6 @@ jobs: - name: Update environment run: | mamba env update -n asim-test -f conda-environments/github-actions-tests.yml - mamba install --yes \ - "psutil=5.9.5" \ - "pydantic=2.6.1" \ - "pypyr=5.8.0" \ - "pytables=3.6.1" \ - "pytest-cov" \ - "pytest-regressions=2.5.0" \ - "scikit-learn=1.2.2" \ - "sharrow>=2.9.0" \ - "simwrapper=1.8.5" \ - "xarray=2023.2.0" \ - "zarr=2.14.2" \ - "zstandard=0.21.0" if: steps.cache.outputs.cache-hit != 'true' - name: Install activitysim @@ -408,19 +356,6 @@ jobs: - name: Update environment run: | mamba env update -n asim-test -f conda-environments/github-actions-tests.yml - mamba install --yes \ - "psutil=5.9.5" \ - "pydantic=2.6.1" \ - "pypyr=5.8.0" \ - "pytables=3.6.1" \ - "pytest-cov" \ - "pytest-regressions=2.5.0" \ - "scikit-learn=1.2.2" \ - "sharrow>=2.9.0" \ - "simwrapper=1.8.5" \ - "xarray=2023.2.0" \ - "zarr=2.14.2" \ - "zstandard=0.21.0" if: steps.cache.outputs.cache-hit != 'true' - name: Install activitysim @@ -474,19 +409,6 @@ jobs: - name: Update environment run: | mamba env update -n asim-test -f conda-environments/github-actions-tests.yml - mamba install --yes \ - "psutil=5.9.5" \ - "pydantic=2.6.1" \ - "pypyr=5.8.0" \ - "pytables=3.6.1" \ - "pytest-cov" \ - "pytest-regressions=2.5.0" \ - "scikit-learn=1.2.2" \ - "sharrow>=2.9.0" \ - "simwrapper=1.8.5" \ - "xarray=2023.2.0" \ - "zarr=2.14.2" \ - "zstandard=0.21.0" if: steps.cache.outputs.cache-hit != 'true' - name: Install Larch diff --git a/conda-environments/activitysim-dev.yml b/conda-environments/activitysim-dev.yml index f26219916..eaf5b7277 100644 --- a/conda-environments/activitysim-dev.yml +++ b/conda-environments/activitysim-dev.yml @@ -61,7 +61,7 @@ dependencies: - ruff - setuptools_scm - scikit-learn = 1.2.* -- sharrow >= 2.9.0 +- sharrow >= 2.9.1 - simwrapper > 1.7 - snakeviz # for profiling - sparse diff --git a/conda-environments/docbuild.yml b/conda-environments/docbuild.yml index be1935f23..16a477a43 100644 --- a/conda-environments/docbuild.yml +++ b/conda-environments/docbuild.yml @@ -46,7 +46,7 @@ dependencies: - pyyaml >= 5.1 - requests >= 2.7 - scikit-learn >= 1.1 -- sharrow >= 2.9.0 +- sharrow >= 2.9.1 - simwrapper > 1.7 - sparse - sphinx-argparse diff --git a/conda-environments/github-actions-tests.yml b/conda-environments/github-actions-tests.yml index 5051b981c..d26edd4bc 100644 --- a/conda-environments/github-actions-tests.yml +++ b/conda-environments/github-actions-tests.yml @@ -32,7 +32,7 @@ dependencies: - requests = 2.28.* - ruff - scikit-learn = 1.2.* -- sharrow >= 2.9.0 +- sharrow >= 2.9.1 - simwrapper > 1.7 - sparse - xarray = 2023.2.* diff --git a/pyproject.toml b/pyproject.toml index 890708041..c426d2010 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -24,7 +24,7 @@ dependencies = [ "pyyaml >= 5.1", "requests >= 2.7", "scikit-learn >= 1.1", - "sharrow >= 2.9", + "sharrow >= 2.9.1", "simwrapper > 1.7", "sparse", "tables >= 3.5.1", From 96d4bb6bea5a5e15db6f632c365fa504e94715c2 Mon Sep 17 00:00:00 2001 From: David Hensle <51132108+dhensle@users.noreply.github.com> Date: Mon, 20 May 2024 11:14:30 -0700 Subject: [PATCH 29/37] landuse index sort before sharrow recode check --- activitysim/abm/tables/landuse.py | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/activitysim/abm/tables/landuse.py b/activitysim/abm/tables/landuse.py index 9abc0c2e7..ed7acaf3b 100644 --- a/activitysim/abm/tables/landuse.py +++ b/activitysim/abm/tables/landuse.py @@ -21,6 +21,13 @@ def land_use(state: workflow.State): df = read_input_table(state, "land_use") + # try to make life easy for everybody by keeping everything in canonical order + # but as long as coalesce_pipeline doesn't sort tables it coalesces, it might not stay in order + # so even though we do this, anyone downstream who depends on it, should look out for themselves... + if not df.index.is_monotonic_increasing: + logger.info(f"sorting land_use index") + df = df.sort_index() + sharrow_enabled = state.settings.sharrow if sharrow_enabled: err_msg = ( @@ -34,12 +41,6 @@ def land_use(state: workflow.State): assert df.index[-1] == len(df.index) - 1, err_msg assert df.index.dtype.kind == "i", err_msg - # try to make life easy for everybody by keeping everything in canonical order - # but as long as coalesce_pipeline doesn't sort tables it coalesces, it might not stay in order - # so even though we do this, anyone downstream who depends on it, should look out for themselves... - if not df.index.is_monotonic_increasing: - df = df.sort_index() - logger.info("loaded land_use %s" % (df.shape,)) buffer = io.StringIO() df.info(buf=buffer) From 79a1a0a71fdad7e0b771d04efabdacd23834c42e Mon Sep 17 00:00:00 2001 From: Jeff Newman Date: Tue, 21 May 2024 16:43:50 -0500 Subject: [PATCH 30/37] decode time periods --- activitysim/core/steps/output.py | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/activitysim/core/steps/output.py b/activitysim/core/steps/output.py index 97f50a57f..b9d7cc13d 100644 --- a/activitysim/core/steps/output.py +++ b/activitysim/core/steps/output.py @@ -370,6 +370,18 @@ def write_tables(state: workflow.State) -> None: decode_instruction = decode_instruction.strip() else: decode_filter = None + + if decode_instruction == "time_period": + map_col = list(state.network_settings.skim_time_periods.labels) + map_func = map_col.__getitem__ + revised_col = ( + pd.Series(dt.column(colname)).astype(int).map(map_func) + ) + dt = dt.drop([colname]).append_column( + colname, pa.array(revised_col) + ) + continue + if "." not in decode_instruction: lookup_col = decode_instruction source_table = table_name From 0bfa91535bd23d942d6595f43b6b4eab7e78b31b Mon Sep 17 00:00:00 2001 From: Jeff Newman Date: Tue, 21 May 2024 16:44:08 -0500 Subject: [PATCH 31/37] use original tazs where possible --- activitysim/abm/tables/landuse.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/activitysim/abm/tables/landuse.py b/activitysim/abm/tables/landuse.py index ed7acaf3b..1668e7b35 100644 --- a/activitysim/abm/tables/landuse.py +++ b/activitysim/abm/tables/landuse.py @@ -66,7 +66,11 @@ def land_use_taz(state: workflow.State): "no land_use_taz defined in input_table_list, constructing " "from discovered TAZ values in land_use" ) - unique_tazs = np.unique(land_use["TAZ"]) + # use original TAZ values if available, otherwise use current TAZ values + if state.settings.recode_pipeline_columns and "_original_TAZ" in land_use: + unique_tazs = np.unique(land_use["_original_TAZ"]) + else: + unique_tazs = np.unique(land_use["TAZ"]) if state.settings.recode_pipeline_columns: df = pd.Series( unique_tazs, From a551bfa5a55ed80a2a86d086bb51f9f3e1f61a93 Mon Sep 17 00:00:00 2001 From: Jeff Newman Date: Wed, 22 May 2024 10:11:02 -0500 Subject: [PATCH 32/37] update numba in envs to 0.57 --- conda-environments/activitysim-dev-base.yml | 4 ++-- conda-environments/activitysim-dev.yml | 2 +- conda-environments/github-actions-tests.yml | 2 +- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/conda-environments/activitysim-dev-base.yml b/conda-environments/activitysim-dev-base.yml index 4b6d5f184..f7aa5c735 100644 --- a/conda-environments/activitysim-dev-base.yml +++ b/conda-environments/activitysim-dev-base.yml @@ -37,7 +37,7 @@ dependencies: - nbconvert - nbformat - nbmake -- numba = 0.56.* +- numba = 0.57.* - numexpr - numpy = 1.23.* - numpydoc @@ -77,4 +77,4 @@ dependencies: - zstandard - pip: - - autodoc_pydantic \ No newline at end of file + - autodoc_pydantic diff --git a/conda-environments/activitysim-dev.yml b/conda-environments/activitysim-dev.yml index eaf5b7277..107b4b355 100644 --- a/conda-environments/activitysim-dev.yml +++ b/conda-environments/activitysim-dev.yml @@ -33,7 +33,7 @@ dependencies: - nbconvert - nbformat - nbmake -- numba = 0.56.* +- numba = 0.57.* - numexpr - numpy = 1.23.* - numpydoc diff --git a/conda-environments/github-actions-tests.yml b/conda-environments/github-actions-tests.yml index d26edd4bc..692ee1192 100644 --- a/conda-environments/github-actions-tests.yml +++ b/conda-environments/github-actions-tests.yml @@ -13,7 +13,7 @@ dependencies: - dask = 2023.3.2 - isort = 5.12.0 - nbmake = 1.4.6 -- numba = 0.56.4 +- numba = 0.57.* - numpy = 1.23.5 - openmatrix = 0.3.5.0 - orca = 1.8 From d98f776af559038e1588c7f07d58df7151965cb3 Mon Sep 17 00:00:00 2001 From: Jeff Newman Date: Thu, 23 May 2024 09:15:32 -0500 Subject: [PATCH 33/37] no fastmath in tour mode choice --- .../prototype_mtc_extended/configs/tour_mode_choice.yaml | 3 +++ 1 file changed, 3 insertions(+) diff --git a/activitysim/examples/prototype_mtc_extended/configs/tour_mode_choice.yaml b/activitysim/examples/prototype_mtc_extended/configs/tour_mode_choice.yaml index 19f5014fc..6550c28e5 100644 --- a/activitysim/examples/prototype_mtc_extended/configs/tour_mode_choice.yaml +++ b/activitysim/examples/prototype_mtc_extended/configs/tour_mode_choice.yaml @@ -196,3 +196,6 @@ LOGSUM_CHOOSER_COLUMNS: MODE_CHOICE_LOGSUM_COLUMN_NAME: mode_choice_logsum + +compute_settings: + fastmath: false # use of isnan in utility functions requires fastmath=False From b465dd0e4dfb30995497b9cb69ffc258920a8042 Mon Sep 17 00:00:00 2001 From: Jeff Newman Date: Sat, 1 Jun 2024 21:58:10 -0500 Subject: [PATCH 34/37] sharrow cache by version --- activitysim/core/configuration/filesystem.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/activitysim/core/configuration/filesystem.py b/activitysim/core/configuration/filesystem.py index ce50becd4..27496b1c7 100644 --- a/activitysim/core/configuration/filesystem.py +++ b/activitysim/core/configuration/filesystem.py @@ -425,9 +425,12 @@ def persist_sharrow_cache(self) -> None: -------- FileSystem.sharrow_cache_dir """ + import sharrow as sh + + sharrow_minor_version = ".".join(sh.__version__.split(".")[:2]) self.sharrow_cache_dir = Path( platformdirs.user_cache_dir(appname="ActivitySim") - ).joinpath(f"numba-{numba.__version__}") + ).joinpath(f"sharrow-{sharrow_minor_version}-numba-{numba.__version__}") self.sharrow_cache_dir.mkdir(parents=True, exist_ok=True) def _cascading_input_file_path( From fe13e93c98d8102eb56168e71d4e057bde99d040 Mon Sep 17 00:00:00 2001 From: David Hensle <51132108+dhensle@users.noreply.github.com> Date: Wed, 19 Jun 2024 08:55:38 -0700 Subject: [PATCH 35/37] include sharrow setting in log by defualt --- activitysim/core/configuration/top.py | 1 + 1 file changed, 1 insertion(+) diff --git a/activitysim/core/configuration/top.py b/activitysim/core/configuration/top.py index d8d2e7adc..450827f56 100644 --- a/activitysim/core/configuration/top.py +++ b/activitysim/core/configuration/top.py @@ -626,6 +626,7 @@ class Settings(PydanticBase, extra="allow", validate_assignment=True): "trace_hh_id", "memory_profile", "instrument", + "sharrow", ) """ Setting to log on startup. From fcf729586d4024d03415cfc6f204e8d98da1c0b3 Mon Sep 17 00:00:00 2001 From: Jeff Newman Date: Thu, 20 Jun 2024 21:12:25 -0500 Subject: [PATCH 36/37] use dask if required --- activitysim/core/skim_dataset.py | 27 ++++++++++++++++++--------- 1 file changed, 18 insertions(+), 9 deletions(-) diff --git a/activitysim/core/skim_dataset.py b/activitysim/core/skim_dataset.py index f53d93dd1..704642040 100644 --- a/activitysim/core/skim_dataset.py +++ b/activitysim/core/skim_dataset.py @@ -840,6 +840,7 @@ def load_skim_dataset_to_shared_memory(state, skim_tag="taz") -> xr.Dataset: else: land_use_zone_id = None + dask_required = False if network_los_preload.zone_system == ONE_ZONE: # check TAZ alignment for ONE_ZONE system. # other systems use MAZ for most lookups, which dynamically @@ -850,6 +851,7 @@ def load_skim_dataset_to_shared_memory(state, skim_tag="taz") -> xr.Dataset: except AssertionError as err: logger.info(f"otaz realignment required\n{err}") d = d.reindex(otaz=land_use_zone_id) + dask_required = True else: logger.info("otaz alignment ok") d["otaz"] = land_use.index.to_numpy() @@ -863,6 +865,7 @@ def load_skim_dataset_to_shared_memory(state, skim_tag="taz") -> xr.Dataset: except AssertionError as err: logger.info(f"dtaz realignment required\n{err}") d = d.reindex(dtaz=land_use_zone_id) + dask_required = True else: logger.info("dtaz alignment ok") d["dtaz"] = land_use.index.to_numpy() @@ -876,15 +879,21 @@ def load_skim_dataset_to_shared_memory(state, skim_tag="taz") -> xr.Dataset: return d else: logger.info("writing skims to shared memory") - # setting `load` to false then calling `reload_from_omx_3d` avoids - # using dask to load the data into memory, which is not performant - # on Windows for large datasets. - d_shared_mem = d.shm.to_shared_memory(backing, mode="r", load=False) - sh.dataset.reload_from_omx_3d( - d_shared_mem, - [str(i) for i in omx_file_paths], - ignore=state.settings.omx_ignore_patterns, - ) + if dask_required: + # setting `load` to True uses dask to load the data into memory + d_shared_mem = d.shm.to_shared_memory(backing, mode="r", load=True) + else: + # setting `load` to false then calling `reload_from_omx_3d` avoids + # using dask to load the data into memory, which is not performant + # on Windows for large datasets, but this only works if the data + # requires no realignment (i.e. the land use table and skims match + # exactly in order and length). + d_shared_mem = d.shm.to_shared_memory(backing, mode="r", load=False) + sh.dataset.reload_from_omx_3d( + d_shared_mem, + [str(i) for i in omx_file_paths], + ignore=state.settings.omx_ignore_patterns, + ) for f in omx_file_handles: f.close() return d_shared_mem From c9d42055039da9d797636ea9a5d8223a89a5e5b3 Mon Sep 17 00:00:00 2001 From: Jeff Newman Date: Thu, 20 Jun 2024 21:15:38 -0500 Subject: [PATCH 37/37] store_skims_in_shm setting --- activitysim/core/configuration/top.py | 22 +++++++++++++++++++++- activitysim/core/skim_dataset.py | 10 +++++++++- 2 files changed, 30 insertions(+), 2 deletions(-) diff --git a/activitysim/core/configuration/top.py b/activitysim/core/configuration/top.py index 450827f56..024f878a4 100644 --- a/activitysim/core/configuration/top.py +++ b/activitysim/core/configuration/top.py @@ -3,7 +3,7 @@ from pathlib import Path from typing import Any, Literal -from pydantic import validator +from pydantic import model_validator, validator from activitysim.core.configuration.base import PydanticBase, Union @@ -476,6 +476,26 @@ class Settings(PydanticBase, extra="allow", validate_assignment=True): True will disable the use of zarr. """ + store_skims_in_shm: bool = True + """ + Store skim dataset in shared memory. + + .. versionadded:: 1.3 + + By default, if sharrow is enabled (any setting other than false), ActivitySim + stores the skim dataset in shared memory. This can be changed by setting this + option to False, in which case skims are stores in "typical" process-local + memory. Note that storing skims in shared memory is pretty much required for + multiprocessing, unless you have a very small model or an absurdly large amount + of RAM. + """ + + @model_validator(mode="after") + def _check_store_skims_in_shm(self): + if not self.store_skims_in_shm and self.multiprocess: + raise ValueError("store_skims_in_shm requires multiprocess to be False") + return self + instrument: bool = False """ Use `pyinstrument` to profile component performance. diff --git a/activitysim/core/skim_dataset.py b/activitysim/core/skim_dataset.py index 704642040..1ed871ec0 100644 --- a/activitysim/core/skim_dataset.py +++ b/activitysim/core/skim_dataset.py @@ -747,7 +747,10 @@ def load_skim_dataset_to_shared_memory(state, skim_tag="taz") -> xr.Dataset: else: remapper = None - d = _use_existing_backing_if_valid(backing, omx_file_paths, skim_tag) + if state.settings.store_skims_in_shm: + d = _use_existing_backing_if_valid(backing, omx_file_paths, skim_tag) + else: + d = None # skims are not stored in shared memory, so we need to load them do_not_save_zarr = False if d is None: @@ -877,6 +880,11 @@ def load_skim_dataset_to_shared_memory(state, skim_tag="taz") -> xr.Dataset: for f in omx_file_handles: f.close() return d + elif not state.settings.store_skims_in_shm: + logger.info( + "store_skims_in_shm is False, keeping skims in process-local memory" + ) + return d else: logger.info("writing skims to shared memory") if dask_required: