diff --git a/activitysim/core/configuration/filesystem.py b/activitysim/core/configuration/filesystem.py index 27496b1c7..b55f0c496 100644 --- a/activitysim/core/configuration/filesystem.py +++ b/activitysim/core/configuration/filesystem.py @@ -105,8 +105,8 @@ def data_model_dirs_must_exist(cls, data_model_dir, values): """ Name of the output directory for sharrow cache files. - If not given, a directory named "__sharrowcache__" will be created inside - the general cache directory. + If not given, the sharrow cache is stored in a run-independent persistent + location, according to `platformdirs.user_cache_dir`. See `persist_sharrow_cache`. """ settings_file_name: str = "settings.yaml" @@ -395,7 +395,8 @@ def get_sharrow_cache_dir(self) -> Path: Path """ if self.sharrow_cache_dir is None: - out = self.get_cache_dir("__sharrowcache__") + self.persist_sharrow_cache() + out = self.sharrow_cache_dir else: out = self.get_working_subdir(self.sharrow_cache_dir) if not out.exists(): diff --git a/activitysim/core/interaction_sample.py b/activitysim/core/interaction_sample.py index 0afa6c3c4..cabcbeb64 100644 --- a/activitysim/core/interaction_sample.py +++ b/activitysim/core/interaction_sample.py @@ -359,7 +359,7 @@ def _interaction_sample( ), interaction_utilities.values, rtol=1e-2, - atol=0, + atol=1e-6, err_msg="utility not aligned", verbose=True, ) @@ -370,7 +370,7 @@ def _interaction_sample( interaction_utilities_sh.values, interaction_utilities.values, rtol=1e-2, - atol=0, + atol=1e-6, ) ) _sh_util_miss1 = interaction_utilities_sh.values[ diff --git a/activitysim/core/interaction_simulate.py b/activitysim/core/interaction_simulate.py index f6f97dd20..38a90655e 100644 --- a/activitysim/core/interaction_simulate.py +++ b/activitysim/core/interaction_simulate.py @@ -504,14 +504,14 @@ def to_series(x): sh_util.reshape(utilities.values.shape), utilities.values, rtol=1e-2, - atol=0, + atol=1e-6, err_msg="utility not aligned", verbose=True, ) except AssertionError as err: print(err) misses = np.where( - ~np.isclose(sh_util, utilities.values, rtol=1e-2, atol=0) + ~np.isclose(sh_util, utilities.values, rtol=1e-2, atol=1e-6) ) _sh_util_miss1 = sh_util[tuple(m[0] for m in misses)] _u_miss1 = utilities.values[tuple(m[0] for m in misses)] diff --git a/activitysim/core/simulate.py b/activitysim/core/simulate.py index 0dcb5a379..3ad4c640a 100644 --- a/activitysim/core/simulate.py +++ b/activitysim/core/simulate.py @@ -787,13 +787,15 @@ def eval_utilities( sh_util, utilities.values, rtol=1e-2, - atol=0, + atol=1e-6, err_msg="utility not aligned", verbose=True, ) except AssertionError as err: print(err) - misses = np.where(~np.isclose(sh_util, utilities.values, rtol=1e-2, atol=0)) + misses = np.where( + ~np.isclose(sh_util, utilities.values, rtol=1e-2, atol=1e-6) + ) _sh_util_miss1 = sh_util[tuple(m[0] for m in misses)] _u_miss1 = utilities.values[tuple(m[0] for m in misses)] _sh_util_miss1 - _u_miss1 diff --git a/docs/_static/favicon.ico b/docs/_static/favicon.ico new file mode 100644 index 000000000..99a65353b Binary files /dev/null and b/docs/_static/favicon.ico differ diff --git a/docs/conf.py b/docs/conf.py index f6f56f081..ef358b4cf 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -175,7 +175,7 @@ # The name of an image file (within the static path) to use as favicon of the # docs. This file should be a Windows icon file (.ico) being 16x16 or 32x32 # pixels large. -# html_favicon = None +html_favicon = "favicon.ico" # Add any paths that contain custom static files (such as style sheets) here, # relative to this directory. They are copied after the builtin static files, diff --git a/docs/dev-guide/install.md b/docs/dev-guide/install.md index b7b6030a0..8f4b07bd8 100644 --- a/docs/dev-guide/install.md +++ b/docs/dev-guide/install.md @@ -49,18 +49,9 @@ conda activate ./ASIM-ENV git clone https://github.com/ActivitySim/sharrow.git python -m pip install -e ./sharrow git clone https://github.com/ActivitySim/activitysim.git -cd activitysim -git switch develop -cd .. python -m pip install -e ./activitysim ``` -```{note} -If the environment create step above fails due to a 404 missing error, -the main repository may not be up to date with these docs, try this instead: -https://raw.githubusercontent.com/camsys/activitysim/sharrow-black/conda-environments/activitysim-dev-base.yml -``` - Note the above commands will create an environment with all the necessary dependencies, clone both ActivitySim and sharrow from GitHub, and `pip install` each of these libraries in editable mode, which diff --git a/docs/dev-guide/using-sharrow.md b/docs/dev-guide/using-sharrow.md index 04894d769..d3924ed2a 100644 --- a/docs/dev-guide/using-sharrow.md +++ b/docs/dev-guide/using-sharrow.md @@ -17,6 +17,55 @@ multiprocessing mode after all the compilation for all model components is complete. ``` +### Top-Level Activation Options + +Activating sharrow is done at the top level of the model settings file, typically +`settings.yaml`, by setting the `sharrow` configuration setting to `True`: + +```yaml +sharrow: True +``` + +The default operation for sharrow is to attempt to use the sharrow compiler for +all model specifications, and to revert to the legacy pandas-based evaluation +if the sharrow compiler encounters a problem. Alternatively, the `sharrow` +setting can also be set to `require` or `test`. The `require` setting +will cause the model simply fail if sharrow encounters a problem, which is +useful if the user is interested in ensuring maximum performance. +The `test` setting will run the model in a mode where both sharrow and the +legacy pandas-based evaluation are run on each model specification, and the +results are compared to ensure they are substantially identical. This is +useful for debugging and testing, but is not recommended for production runs +as it is much slower than running only one evaluation path or the other. + +Testing is strongly recommended during model development, as it is possible +to write expressions that are valid in one evaluation mode but not the other. +This can happen if model data includes `NaN` values +(see [Performance Considerations](#performance-considerations)), or when +using arithmatic on logical values +(see [Arithmetic on Logical Values](#arithmetic-on-logical-values)). + +### Caching of Precompiled Functions + +The first time you run a model with sharrow enabled, the compiler will run +and create a cache of compiled functions. This can take a long time, especially +for models with many components or complex utility specifications. However, +once the cache is created, subsequent runs of the model will be much faster. +By default, the cached functions are stored in a subdirectory of the +`platformdirs.user_cache_dir` directory, which is located in a platform-specific +location: + +- Windows: `%USERPROFILE%\AppData\Local\ActivitySim\ActivitySim\Cache\...` +- MacOS: `~/Library/Caches/ActivitySim/...` +- Linux: `~/.cache/ActivitySim/...` or `~/$XDG_CACHE_HOME/ActivitySim/...` + +The cache directory can be changed from this default location by setting the +[`sharrow_cache_dir`](activitysim.core.configuration.FileSystem.sharrow_cache_dir) +setting in the `settings.yaml` file. Note if you change this setting and provide +a relative path, it will be interpreted as relative to the model working directory, +and cached functions may not carry over to other model runs unless copied there +by the user. + ## Model Design Requirements Activating the `sharrow` optimizations also requires using the new @@ -231,6 +280,35 @@ such string operations won't appear in utility specifications at all, or if they do appear, they are executed only once and stored in a temporary value for re-use as needed. +A good approach to reduce string operations in model spec files is to convert +string columns to integer or categorical columns in preprocessors. This can +be done using the `map` method, which can be used to convert strings to integers, +for example: + + `df['fuel_type'].map({'Gas': 1, 'Diesel': 2, 'Hybrid': 3}).fillna(-1).astype(int)` + +Alternatively, data columns can be converted to categorical columns with well-defined +structures. Recent versions of sharrow have made significant improvements in +handling of unordered categorical values, allowing for the use of possibly +more intuitive categorical columns. For example, the fuel type column above +could instead be redefined as a categorical column with the following code: + + `df['fuel_type'].astype(pd.CategoricalDtype(categories=['Gas', 'Diesel', 'Hybrid'], ordered=False))` + +It is important that the categories are defined with the same set of values +in the same order, as any deviation will from this will void the compiler cache +and cause the model specification to be recompiled. This means that using +`x.astype('category')` is not recommended, as the categories will be inferred +from the data and may not be consistent across multiple calls to the model +specification evaluator. + +```{note} +Beginning with ActivitySim version 1.3, string-valued +columns created in preprocessors are converted to categorical columns automatically, +which means that ignoring encoding for string-valued outputs is equivalent to +using the `astype('category')` method, and is not recommended. +``` + For models with utility expressions that include a lot of string comparisons, (e.g. because they are built for the legacy `pandas.eval` interpreter and have not been updated) sharrow can be disabled by setting @@ -410,7 +488,7 @@ taz_skims: ``` If groups of similarly named variables should have the same encoding applied, -they can be identifed by regular expressions ("regex") instead of explicitly +they can be identified by regular expressions ("regex") instead of explicitly giving each name. For example: ```yaml @@ -485,3 +563,76 @@ taz_skims: For more details on all the settings available for digital encoding, see [DigitalEncoding](activitysim.core.configuration.network.DigitalEncoding). + +## Troubleshooting + +If you encounter errors when running the model with sharrow enabled, it is +important to address them before using the model for analysis. This is +especially important when errors are found running in "test" mode (activated +by `sharrow: test` in the top level settings.yaml). Errors may +indicate that either sharrow or the legacy evaluator is not correctly processing +the mathematical expressions in the utility specifications. + +### "utility not aligned" Error + +One common error that can occur when running the model with sharrow in "test" +mode is the "utility not aligned" error. This error occurs when a sharrow +compiled utility calculation does not sufficiently match the legacy utility +calculation. We say "sufficiently" here because the two calculations may have +slight differences due to numerical precision optimizations applied by sharrow. +These optimizations can result in minor differences in the final utility values, +which are typically inconsequential for model results. However, if the differences +are too large, the "utility not aligned" error will be raised. This error does +not indicate whether the incorrect result is from the sharrow or legacy calculation +(or both), and it is up to the user to determine how to align the calculations +so they are reflective of the model developer's intent. + +To troubleshoot the "utility not aligned" error, the user can use a Python debugger +to compare the utility values calculated by sharrow and the legacy evaluator. +ActivitySim also includes error handler code that will attempt to find the +problematic utility expression and print it to the console or log file, under the +heading "possible problematic expressions". This can be helpful in quickly narrowing +down which lines of a specification file are causing the error. + +Common causes of the "utility not aligned" error include: + +- model data includes `NaN` values but the component settings do not + disable `fastmath` (see [Performance Considerations](#performance-considerations)) +- incorrect use of arithmatic on logical values (see + [Arithmetic on Logical Values](#arithmetic-on-logical-values)) + +### Insufficient system resources + +For large models run on large servers, it is possible to overwhelm the system +with too many processes and threads, which can result in the following error: + +``` +OSError: Insufficient system resources exist to complete the requested service +``` + +This error can be resolved by reducing the number of processes and/or threads per +process. See [Multiprocessing](../users-guide/performance/multiprocessing.md) and +[Multithreading](../users-guide/performance/multithreading.md) in the User's Guide +for more information on how to adjust these settings. + +### Permission Error + +If running a model using multiprocessing with sharrow enabled, it is necessary +to have pre-compiled all the utility specifications to prevent the multiple +processes from competing to write to the same cache location on disk. Failure +to do this can result in a permission error, as some processes may be unable to +write to the cache location. + +``` +PermissionError: The process cannot access the file because it is being used by another process +``` + +To resolve this error, run the model with sharrow enabled in single-process mode +to pre-compile all the utility specifications. If that does not resolve the error, +it is possible that some compiling is being triggered in multiprocess steps that +is not being handled in the single process mode. This is likely due to the presence +of string or categorical columns created in a preprocessor that are not being +stored in a stable data format. To resolve this error, ensure that all expressions +in pre-processors are written in a manner that results in stable data types (e.g. +integers, floats, or categorical columns with a fixed set of categories). See +see [Performance Considerations](#performance-considerations)) for examples. diff --git a/docs/users-guide/example_models.rst b/docs/users-guide/example_models.rst index be8f580e1..74ad80981 100644 --- a/docs/users-guide/example_models.rst +++ b/docs/users-guide/example_models.rst @@ -2756,7 +2756,7 @@ Skims are named ___