diff --git a/.clang-tidy b/.clang-tidy
index a996e64c0a4..8d269d52fa9 100644
--- a/.clang-tidy
+++ b/.clang-tidy
@@ -10,6 +10,8 @@ Checks: |
-bugprone-implicit-widening-of-multiplication-result,
-bugprone-macro-parentheses,
-bugprone-reserved-identifier,
+ -bugprone-switch-missing-default-case,
+ -bugprone-unchecked-optional-access,
clang-analyzer-alpha.*,
modernize-deprecated-headers,
modernize-make-shared,
@@ -41,7 +43,6 @@ Checks: |
readability-function-size'
WarningsAsErrors: '*,-clang-analyzer-core.StackAddrEscapeBase,-clang-analyzer-optin.mpi.MPI-Checker'
HeaderFilterRegex: '.*'
-AnalyzeTemporaryDtors: false
FormatStyle: none
User: espresso
CheckOptions:
diff --git a/.github/actions/build_and_check/action.yml b/.github/actions/build_and_check/action.yml
index 8b41a8ad119..5e87f0e7ad0 100644
--- a/.github/actions/build_and_check/action.yml
+++ b/.github/actions/build_and_check/action.yml
@@ -6,7 +6,7 @@ runs:
- run: |
brew install boost boost-mpi fftw
brew install hdf5-mpi
- pip3 install -c requirements.txt numpy "cython<3.0" h5py scipy
+ pip3 install -c requirements.txt "cython<3.0" numpy scipy h5py packaging
shell: bash
if: runner.os == 'macOS'
- run: |
diff --git a/.github/workflows/deploy.yml b/.github/workflows/deploy.yml
index 06de904ae70..7ab35c52b74 100644
--- a/.github/workflows/deploy.yml
+++ b/.github/workflows/deploy.yml
@@ -16,7 +16,7 @@ jobs:
- name: Install pandoc
uses: r-lib/actions/setup-pandoc@v2
- name: Setup SSH agent
- uses: webfactory/ssh-agent@v0.7.0
+ uses: webfactory/ssh-agent@v0.9.0
with:
ssh-private-key: ${{ secrets.GH_PAGES_SSH_PRIVATE_KEY }}
- name: Checkout
diff --git a/.github/workflows/push_pull.yml b/.github/workflows/push_pull.yml
index 1a2eb4e39fd..3cbd6fc2d46 100644
--- a/.github/workflows/push_pull.yml
+++ b/.github/workflows/push_pull.yml
@@ -9,7 +9,7 @@ permissions:
jobs:
macos:
- runs-on: macos-12
+ runs-on: macos-13
if: ${{ github.repository == 'espressomd/espresso' }}
steps:
- name: Checkout
@@ -20,22 +20,22 @@ jobs:
key: macos
save: ${{ github.ref == 'refs/heads/python' }}
- name: Setup Python environment
- uses: actions/setup-python@v4.3.1
+ uses: actions/setup-python@v5.1.0
with:
- python-version: '3.9'
+ python-version: '3.12'
- name: Get runner specifications
run: system_profiler SPHardwareDataType
- name: Build and check
uses: ./.github/actions/build_and_check
env:
- build_procs: 3
- check_procs: 3
+ build_procs: 4
+ check_procs: 4
with_ccache: 'true'
debian:
runs-on: ubuntu-latest
container:
- image: ghcr.io/espressomd/docker/debian:339903979196fd7e72127f2cb5bfb27759d129f9-base-layer
+ image: ghcr.io/espressomd/docker/debian:f7f8ef2c0ca93c67aa16b9f91785492fb04ecc1b-base-layer
credentials:
username: ${{ github.actor }}
password: ${{ secrets.github_token }}
@@ -74,7 +74,7 @@ jobs:
runs-on: ubuntu-latest
if: ${{ github.repository == 'espressomd/espresso' }}
container:
- image: ghcr.io/espressomd/docker/ubuntu-wo-dependencies:339903979196fd7e72127f2cb5bfb27759d129f9-base-layer
+ image: ghcr.io/espressomd/docker/ubuntu-wo-dependencies:f7f8ef2c0ca93c67aa16b9f91785492fb04ecc1b-base-layer
credentials:
username: ${{ github.actor }}
password: ${{ secrets.github_token }}
diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml
index e80fe191cf5..ea94df3ffd8 100644
--- a/.gitlab-ci.yml
+++ b/.gitlab-ci.yml
@@ -1,4 +1,4 @@
-image: ghcr.io/espressomd/docker/ubuntu-22.04:339903979196fd7e72127f2cb5bfb27759d129f9
+image: ghcr.io/espressomd/docker/ubuntu:f7f8ef2c0ca93c67aa16b9f91785492fb04ecc1b
stages:
- prepare
@@ -18,6 +18,7 @@ stages:
.notification_job_template: ¬ification_job_definition
<<: *global_job_definition
+ image: ghcr.io/espressomd/docker/fedora:f7f8ef2c0ca93c67aa16b9f91785492fb04ecc1b
variables:
GIT_SUBMODULE_STRATEGY: none
before_script:
@@ -33,7 +34,7 @@ variables:
GIT_SUBMODULE_STRATEGY: recursive
CCACHE_DIR: /cache
CCACHE_MAXSIZE: 100G
- with_ccache: "true"
+ with_ccache: 'true'
status_pending:
<<: *notification_job_definition
@@ -80,9 +81,9 @@ default:
<<: *global_job_definition
stage: build
variables:
- CC: 'gcc-10'
- CXX: 'g++-10'
- GCOV: 'gcov-10'
+ CC: 'gcc-13'
+ CXX: 'g++-13'
+ GCOV: 'gcov-13'
with_cuda: 'false'
myconfig: 'default'
with_coverage: 'true'
@@ -101,11 +102,10 @@ maxset:
<<: *global_job_definition
stage: build
variables:
- CC: 'gcc-10'
- CXX: 'g++-10'
- GCOV: 'gcov-10'
+ CC: 'gcc-13'
+ CXX: 'g++-13'
+ GCOV: 'gcov-13'
with_cuda: 'false'
- with_cxx_standard: '20'
myconfig: 'maxset'
with_coverage: 'true'
with_scafacos: 'true'
@@ -113,7 +113,7 @@ maxset:
with_stokesian_dynamics: 'true'
with_caliper: 'true'
check_skip_long: 'true'
- cmake_params: '-D ESPRESSO_TEST_NP=8'
+ cmake_params: '-D CMAKE_CXX_STANDARD=23 -D ESPRESSO_TEST_NP=8'
script:
- bash maintainer/CI/build_cmake.sh
tags:
@@ -126,9 +126,9 @@ no_rotation:
<<: *global_job_definition
stage: build
variables:
- CC: 'gcc-10'
- CXX: 'g++-10'
- GCOV: 'gcov-10'
+ CC: 'gcc-13'
+ CXX: 'g++-13'
+ GCOV: 'gcov-13'
with_cuda: 'false'
myconfig: 'no_rotation'
with_coverage: 'true'
@@ -141,16 +141,17 @@ no_rotation:
- no-cuda
- numa
-fedora:36:
+fedora:40:
<<: *global_job_definition
stage: build
- image: ghcr.io/espressomd/docker/fedora:339903979196fd7e72127f2cb5bfb27759d129f9
+ image: ghcr.io/espressomd/docker/fedora:f7f8ef2c0ca93c67aa16b9f91785492fb04ecc1b
variables:
with_cuda: 'false'
with_gsl: 'false'
myconfig: 'maxset'
make_check_python: 'true'
with_stokesian_dynamics: 'true'
+ cmake_params: '-D CMAKE_INCLUDE_PATH=/usr/include/mpich-x86_64 -D CMAKE_PREFIX_PATH=/usr/lib64/mpich/lib/'
script:
- bash maintainer/CI/build_cmake.sh
tags:
@@ -163,19 +164,21 @@ clang-sanitizer:
<<: *global_job_definition
stage: build
variables:
- CC: 'clang-14'
- CXX: 'clang++-14'
- CUDAXX: 'clang++-14'
+ CC: 'clang-18'
+ CXX: 'clang++-18'
+ CUDACXX: 'clang++-18'
myconfig: 'maxset'
with_cuda: 'true'
- with_coverage: 'false'
+ with_cuda_compiler: 'clang'
with_static_analysis: 'true'
check_skip_long: 'true'
- with_asan: 'true'
+ with_asan: 'false'
with_ubsan: 'true'
with_scafacos: 'true'
with_walberla: 'true'
with_stokesian_dynamics: 'true'
+ with_coverage: 'false'
+ with_coverage_python: 'false'
script:
- bash maintainer/CI/build_cmake.sh
timeout: 2h
@@ -188,9 +191,9 @@ fast_math:
<<: *global_job_definition
stage: build
variables:
- CC: 'gcc-10'
- CXX: 'g++-10'
- GCOV: 'gcov-10'
+ CC: 'gcc-12'
+ CXX: 'g++-12'
+ GCOV: 'gcov-12'
myconfig: 'maxset'
with_cuda: 'true'
with_coverage: 'false'
@@ -204,13 +207,13 @@ fast_math:
- cuda
when: manual
-cuda11-coverage:
+cuda12-coverage:
<<: *global_job_definition
stage: build
variables:
- CC: 'gcc-10'
- CXX: 'g++-10'
- GCOV: 'gcov-10'
+ CC: 'gcc-12'
+ CXX: 'g++-12'
+ GCOV: 'gcov-12'
myconfig: 'maxset'
with_cuda: 'true'
with_coverage: 'true'
@@ -225,13 +228,13 @@ cuda11-coverage:
- cuda
- numa
-cuda11-maxset:
+cuda12-maxset:
<<: *global_job_definition
stage: build
variables:
- CC: 'gcc-10'
- CXX: 'g++-10'
- GCOV: 'gcov-10'
+ CC: 'gcc-12'
+ CXX: 'g++-12'
+ GCOV: 'gcov-12'
myconfig: 'maxset'
with_cuda: 'true'
with_coverage: 'false'
@@ -254,14 +257,15 @@ cuda11-maxset:
- cuda
- numa
- avx2
+ - reuse-artifacts-same-arch
tutorials-samples-maxset:
<<: *global_job_definition
stage: build
variables:
- CC: 'gcc-10'
- CXX: 'g++-10'
- GCOV: 'gcov-10'
+ CC: 'gcc-12'
+ CXX: 'g++-12'
+ GCOV: 'gcov-12'
myconfig: 'maxset'
with_cuda: 'true'
with_coverage: 'false'
@@ -288,9 +292,9 @@ tutorials-samples-default:
<<: *global_job_definition
stage: build
variables:
- CC: 'gcc-10'
- CXX: 'g++-10'
- GCOV: 'gcov-10'
+ CC: 'gcc-12'
+ CXX: 'g++-12'
+ GCOV: 'gcov-12'
myconfig: 'default'
with_cuda: 'true'
with_coverage: 'false'
@@ -317,9 +321,9 @@ tutorials-samples-empty:
<<: *global_job_definition
stage: build
variables:
- CC: 'gcc-10'
- CXX: 'g++-10'
- GCOV: 'gcov-10'
+ CC: 'gcc-12'
+ CXX: 'g++-12'
+ GCOV: 'gcov-12'
myconfig: 'empty'
with_cuda: 'true'
with_coverage: 'false'
@@ -344,9 +348,9 @@ tutorials-samples-no-gpu:
<<: *global_job_definition
stage: build
variables:
- CC: 'gcc-10'
- CXX: 'g++-10'
- GCOV: 'gcov-10'
+ CC: 'gcc-12'
+ CXX: 'g++-12'
+ GCOV: 'gcov-12'
myconfig: 'maxset'
with_cuda: 'true'
with_coverage: 'false'
@@ -373,9 +377,9 @@ installation:
<<: *global_job_definition
stage: build
variables:
- CC: 'gcc-10'
- CXX: 'g++-10'
- GCOV: 'gcov-10'
+ CC: 'gcc-12'
+ CXX: 'g++-12'
+ GCOV: 'gcov-12'
myconfig: 'maxset'
with_cuda: 'true'
with_coverage: 'false'
@@ -383,6 +387,7 @@ installation:
make_check_python: 'false'
with_scafacos: 'true'
with_walberla: 'true'
+ with_walberla_avx: 'true'
with_stokesian_dynamics: 'true'
srcdir: '${CI_PROJECT_DIR}'
build_type: 'Release'
@@ -391,31 +396,26 @@ installation:
- cd build
- make install
- cmake . -D ESPRESSO_BUILD_TESTS=ON
- # get path of installed files
- - CI_INSTALL_DIR="/tmp/espresso-unit-tests"
- - CI_INSTALL_PYTHON_PATH=$(dirname $(find "${CI_INSTALL_DIR}/lib" -name espressomd))
- - CI_CORES=$(cmake -L . | grep ESPRESSO_CTEST_ARGS | grep --color=never -Po '(?<=-j)[0-9]+')
- # deploy object-in-fluid module
- - cp -r "src/python/object_in_fluid" "${CI_INSTALL_PYTHON_PATH}/object_in_fluid"
- # run all tests with the installed files
- - sed -i "s|$(pwd)/pypresso|${CI_INSTALL_DIR}/bin/pypresso|" testsuite/{python,scripts/samples,scripts/tutorials}/CTestTestfile.cmake
+ - sed -i "s|$(pwd)/pypresso|/tmp/espresso-unit-tests/bin/pypresso|" testsuite/{python,scripts/samples,scripts/tutorials}/CTestTestfile.cmake
- make check_python_skip_long
- make check_samples
- make check_tutorials
tags:
- espresso
- cuda
+ - avx2
when: manual
empty:
<<: *global_job_definition
stage: build
variables:
- CC: 'clang-14'
- CXX: 'clang++-14'
- CUDAXX: 'clang++-14'
+ CC: 'clang-18'
+ CXX: 'clang++-18'
+ CUDACXX: 'clang++-18'
myconfig: 'empty'
with_cuda: 'true'
+ with_cuda_compiler: 'clang'
with_static_analysis: 'true'
with_scafacos: 'false'
with_walberla: 'false'
@@ -433,7 +433,7 @@ check_sphinx:
<<: *global_job_definition
stage: additional_checks
needs:
- - cuda11-maxset
+ - cuda12-maxset
when: on_success
script:
- cd ${CI_PROJECT_DIR}/build
@@ -450,12 +450,13 @@ check_sphinx:
- espresso
- cuda
- numa
+ - reuse-artifacts-same-arch
run_tutorials:
<<: *global_job_definition
stage: additional_checks
needs:
- - cuda11-maxset
+ - cuda12-maxset
when: on_success
script:
- cd ${CI_PROJECT_DIR}/build
@@ -470,10 +471,12 @@ run_tutorials:
paths:
- build/doc/tutorials
expire_in: 1 week
+ timeout: 2h
tags:
- espresso
- cuda
- numa
+ - reuse-artifacts-same-arch
only:
- schedules
@@ -481,7 +484,7 @@ run_doxygen:
<<: *global_job_definition
stage: additional_checks
needs:
- - cuda11-maxset
+ - cuda12-maxset
when: on_success
only:
- python
@@ -496,13 +499,14 @@ run_doxygen:
- espresso
- no-cuda
- numa
+ - reuse-artifacts-same-arch
maxset_no_gpu:
<<: *global_job_definition
stage: additional_checks
when: on_success
needs:
- - cuda11-maxset
+ - cuda12-maxset
script:
- export CUDA_VISIBLE_DEVICES=""
- cd ${CI_PROJECT_DIR}/build
@@ -511,13 +515,14 @@ maxset_no_gpu:
- espresso
- no-cuda
- numa
+ - reuse-artifacts-same-arch
maxset_3_cores:
<<: *global_job_definition
stage: additional_checks
when: on_success
needs:
- - cuda11-maxset
+ - cuda12-maxset
script:
- cd ${CI_PROJECT_DIR}/build
- cmake -D ESPRESSO_TEST_NP=3 .
@@ -526,6 +531,7 @@ maxset_3_cores:
- espresso
- cuda
- numa
+ - reuse-artifacts-same-arch
status_success:
<<: *notification_job_definition
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
index 9212797e37e..43720dfacec 100644
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -19,7 +19,7 @@ repos:
always_run: false
files: '.*\.(py|pyx|pxd)'
exclude: '\.pylintrc|.*.\.py\.in|^libs/'
- args: ["--ignore=E266,E402,E701,W291,W293", "--in-place", "--aggressive"]
+ args: ["--ignore=E266,E402,E701,W291,W293", "--in-place"]
- id: cmake-format
name: cmake-format
diff --git a/.pylintrc b/.pylintrc
index c6667ebc3aa..27461e3c92b 100644
--- a/.pylintrc
+++ b/.pylintrc
@@ -1,16 +1,21 @@
-[MASTER]
+[MAIN]
+
+# Analyse import fallback blocks. This can be used to support both Python 2 and
+# 3 compatible code, which means that the block might have code that exists
+# only in one or another interpreter, leading to false positives when analysed.
+analyse-fallback-blocks=no
# A comma-separated list of package or module names from where C extensions may
# be loaded. Extensions are loading into the active Python interpreter and may
# run arbitrary code.
-extension-pkg-whitelist=
+extension-pkg-allow-list=
-# Add files or directories to the blacklist. They should be base names, not
-# paths.
-ignore=CVS build
+# Files or directories to be skipped. They should be base names, not paths.
+ignore=build
-# Add files or directories matching the regex patterns to the blacklist. The
-# regex matches against base names, not paths.
+# Files or directories matching the regular expression patterns are skipped.
+# The regex matches against base names, not paths. The default value ignores
+# Emacs file locks
ignore-patterns=
# Python code to execute, usually for sys.path manipulation such as
@@ -18,23 +23,25 @@ ignore-patterns=
#init-hook=
# Use multiple processes to speed up Pylint. Specifying 0 will auto-detect the
-# number of processors available to use.
-jobs=2
+# number of processors available to use, and will cap the count on Windows to
+# avoid hangs.
+jobs=4
# Control the amount of potential inferred values when inferring a single
# object. This can help the performance when dealing with large functions or
# complex, nested conditions.
limit-inference-results=100
-# List of plugins (as comma separated values of python modules names) to load,
+# List of plugins (as comma separated values of python module names) to load,
# usually to register additional checkers.
load-plugins=
# Pickle collected data for later comparisons.
persistent=yes
-# Specify a configuration file.
-#rcfile=
+# Minimum Python version to use for version dependent checks. Will default to
+# the version used to run pylint.
+py-version=3.10
# When enabled, pylint would attempt to guess common misconfiguration and emit
# user-friendly hints instead of false-positive error messages.
@@ -45,218 +52,21 @@ suggestion-mode=yes
unsafe-load-any-extension=no
-[MESSAGES CONTROL]
-
-# Only show warnings with the listed confidence levels. Leave empty to show
-# all. Valid levels: HIGH, INFERENCE, INFERENCE_FAILURE, UNDEFINED.
-confidence=
-
-# Disable the message, report, category or checker with the given id(s). You
-# can either give multiple identifiers separated by comma (,) or put this
-# option multiple times (only on the command line, not in the configuration
-# file where it should appear only once). You can also use "--disable=all" to
-# disable everything first and then reenable specific checks. For example, if
-# you want to run only the similarities checker, you can use "--disable=all
-# --enable=similarities". If you want to run only the classes checker, but have
-# no Warning level messages displayed, use "--disable=all --enable=classes
-# --disable=W".
-disable=all
-
-# Enable the message, report, category or checker with the given id(s). You can
-# either give multiple identifier separated by comma (,) or put this option
-# multiple time (only on the command line, not in the configuration file where
-# it should appear only once). See also the "--disable" option for examples.
-enable=dangerous-default-value, # W0102
- duplicate-key, # W0109
- wildcard-import, # W0401
- assert-on-tuple, # W0199
- unused-import, # W0611
- unused-variable, # W0612
- unused-argument, # W0613
- unused-wildcard-import, # W0614
- deprecated-method, # W1505
- cyclic-import, # R0401
- trailing-comma-tuple, # R1707
- bad-classmethod-argument, # C0202
- undefined-variable, # E0602
-
-
-[REPORTS]
-
-# Python expression which should return a note less than 10 (10 is the highest
-# note). You have access to the variables errors warning, statement which
-# respectively contain the number of errors / warnings messages and the total
-# number of statements analyzed. This is used by the global evaluation report
-# (RP0004).
-evaluation=10.0 - ((float(5 * error + warning + refactor + convention) / statement) * 10)
-
-# Template used to display messages. This is a python new-style format string
-# used to format the message information. See doc for all details.
-#msg-template=
-
-# Set the output format. Available formats are text, parseable, colorized, json
-# and msvs (visual studio). You can also give a reporter class, e.g.
-# mypackage.mymodule.MyReporterClass.
-output-format=text
-
-# Tells whether to display a full report or only the messages.
-reports=no
-
-# Activate the evaluation score.
-score=no
-
-
-[REFACTORING]
-
-# Maximum number of nested blocks for function / method body
-max-nested-blocks=5
-
-# Complete name of functions that never returns. When checking for
-# inconsistent-return-statements if a never returning function is called then
-# it will be considered as an explicit return statement and no message will be
-# printed.
-never-returning-functions=sys.exit
-
-
-[VARIABLES]
-
-# List of additional names supposed to be defined in builtins. Remember that
-# you should avoid defining new builtins when possible.
-additional-builtins=
-
-# Tells whether unused global variables should be treated as a violation.
-allow-global-unused-variables=yes
-
-# List of strings which can identify a callback function by name. A callback
-# name must start or end with one of those strings.
-callbacks=cb_,
- _cb
-
-# A regular expression matching the name of dummy variables (i.e. expected to
-# not be used).
-dummy-variables-rgx=_+$|(_[a-zA-Z0-9_]*[a-zA-Z0-9]+?$)|dummy|^ignored_|^unused_
-
-# Argument names that match this expression will be ignored. Default to name
-# with leading underscore.
-ignored-argument-names=_.*|^ignored_|^unused_
-
-# Tells whether we should check for unused import in __init__ files.
-init-import=no
-
-# List of qualified module names which can have objects that can redefine
-# builtins.
-redefining-builtins-modules=six.moves,past.builtins,future.builtins,builtins,io
-
-
-[TYPECHECK]
-
-# List of decorators that produce context managers, such as
-# contextlib.contextmanager. Add to this list to register other decorators that
-# produce valid context managers.
-contextmanager-decorators=contextlib.contextmanager
-
-# List of members which are set dynamically and missed by pylint inference
-# system, and so shouldn't trigger E1101 when accessed. Python regular
-# expressions are accepted.
-generated-members=
-
-# Tells whether missing members accessed in mixin class should be ignored. A
-# mixin class is detected if its name ends with "mixin" (case insensitive).
-ignore-mixin-members=yes
-
-# Tells whether to warn about missing members when the owner of the attribute
-# is inferred to be None.
-ignore-none=yes
-
-# This flag controls whether pylint should warn about no-member and similar
-# checks whenever an opaque object is returned when inferring. The inference
-# can return multiple potential results while evaluating a Python object, but
-# some branches might not be evaluated, which results in partial inference. In
-# that case, it might be useful to still emit no-member and other checks for
-# the rest of the inferred objects.
-ignore-on-opaque-inference=yes
-
-# List of class names for which member attributes should not be checked (useful
-# for classes with dynamically set attributes). This supports the use of
-# qualified names.
-ignored-classes=optparse.Values,thread._local,_thread._local
-
-# List of module names for which member attributes should not be checked
-# (useful for modules/projects where namespaces are manipulated during runtime
-# and thus existing member attributes cannot be deduced by static analysis. It
-# supports qualified module names, as well as Unix pattern matching.
-ignored-modules=
-
-# Show a hint with possible names when a member name was not found. The aspect
-# of finding the hint is based on edit distance.
-missing-member-hint=yes
-
-# The minimum edit distance a name should have in order to be considered a
-# similar match for a missing member name.
-missing-member-hint-distance=1
-
-# The total number of similar names that should be taken in consideration when
-# showing a hint for a missing member.
-missing-member-max-choices=1
-
-
-[SPELLING]
-
-# Limits count of emitted suggestions for spelling mistakes.
-max-spelling-suggestions=4
-
-# Spelling dictionary name. Available dictionaries: de (aspell), de_AT
-# (aspell), de_CH (aspell), de_DE (aspell), en (aspell), en_AU (aspell), en_CA
-# (aspell), en_GB (aspell), en_US (aspell)..
-spelling-dict=
-
-# List of comma separated words that should not be checked.
-spelling-ignore-words=
-
-# A path to a file that contains private dictionary; one word per line.
-spelling-private-dict-file=
-
-# Tells whether to store unknown words to indicated private dictionary in
-# --spelling-private-dict-file option instead of raising a message.
-spelling-store-unknown-words=no
-
-
-[MISCELLANEOUS]
-
-# List of note tags to take in consideration, separated by a comma.
-notes=FIXME,
- XXX,
- TODO
-
-
-[SIMILARITIES]
-
-# Ignore comments when computing similarities.
-ignore-comments=yes
-
-# Ignore docstrings when computing similarities.
-ignore-docstrings=yes
-
-# Ignore imports when computing similarities.
-ignore-imports=no
-
-# Minimum lines number of a similarity.
-min-similarity-lines=4
-
-
[BASIC]
# Naming style matching correct argument names.
argument-naming-style=snake_case
# Regular expression matching correct argument names. Overrides argument-
-# naming-style.
+# naming-style. If left empty, argument names will be checked with the set
+# naming style.
#argument-rgx=
# Naming style matching correct attribute names.
attr-naming-style=snake_case
# Regular expression matching correct attribute names. Overrides attr-naming-
+# style. If left empty, attribute names will be checked with the set naming
# style.
#attr-rgx=
@@ -272,20 +82,22 @@ bad-names=foo,
class-attribute-naming-style=any
# Regular expression matching correct class attribute names. Overrides class-
-# attribute-naming-style.
+# attribute-naming-style. If left empty, class attribute names will be checked
+# with the set naming style.
#class-attribute-rgx=
# Naming style matching correct class names.
class-naming-style=PascalCase
# Regular expression matching correct class names. Overrides class-naming-
-# style.
+# style. If left empty, class names will be checked with the set naming style.
#class-rgx=
# Naming style matching correct constant names.
const-naming-style=UPPER_CASE
# Regular expression matching correct constant names. Overrides const-naming-
+# style. If left empty, constant names will be checked with the set naming
# style.
#const-rgx=
@@ -297,7 +109,8 @@ docstring-min-length=-1
function-naming-style=snake_case
# Regular expression matching correct function names. Overrides function-
-# naming-style.
+# naming-style. If left empty, function names will be checked with the set
+# naming style.
#function-rgx=
# Good variable names which should always be accepted, separated by a comma.
@@ -315,21 +128,22 @@ include-naming-hint=no
inlinevar-naming-style=any
# Regular expression matching correct inline iteration names. Overrides
-# inlinevar-naming-style.
+# inlinevar-naming-style. If left empty, inline iteration names will be checked
+# with the set naming style.
#inlinevar-rgx=
# Naming style matching correct method names.
method-naming-style=snake_case
# Regular expression matching correct method names. Overrides method-naming-
-# style.
+# style. If left empty, method names will be checked with the set naming style.
#method-rgx=
# Naming style matching correct module names.
module-naming-style=snake_case
# Regular expression matching correct module names. Overrides module-naming-
-# style.
+# style. If left empty, module names will be checked with the set naming style.
#module-rgx=
# Colon-delimited sets of names that determine each other's naming style when
@@ -349,10 +163,75 @@ property-classes=abc.abstractproperty
variable-naming-style=snake_case
# Regular expression matching correct variable names. Overrides variable-
-# naming-style.
+# naming-style. If left empty, variable names will be checked with the set
+# naming style.
#variable-rgx=
+[CLASSES]
+
+# Warn about protected attribute access inside special methods
+check-protected-access-in-special-methods=no
+
+# List of method names used to declare (i.e. assign) instance attributes.
+defining-attr-methods=__init__,
+ __new__,
+ setUp
+
+# List of member names, which should be excluded from the protected access
+# warning.
+exclude-protected=_asdict,
+ _fields,
+ _replace,
+ _source,
+ _make
+
+# List of valid names for the first argument in a class method.
+valid-classmethod-first-arg=cls
+
+# List of valid names for the first argument in a metaclass class method.
+valid-metaclass-classmethod-first-arg=cls
+
+
+[DESIGN]
+
+# Maximum number of arguments for function / method.
+max-args=5
+
+# Maximum number of attributes for a class (see R0902).
+max-attributes=7
+
+# Maximum number of boolean expressions in an if statement (see R0916).
+max-bool-expr=5
+
+# Maximum number of branch for function / method body.
+max-branches=12
+
+# Maximum number of locals for function / method body.
+max-locals=15
+
+# Maximum number of parents for a class (see R0901).
+max-parents=7
+
+# Maximum number of public methods for a class (see R0904).
+max-public-methods=20
+
+# Maximum number of return / yield for function / method body.
+max-returns=6
+
+# Maximum number of statements in function / method body.
+max-statements=50
+
+# Minimum number of public methods for a class (see R0903).
+min-public-methods=2
+
+
+[EXCEPTIONS]
+
+# Exceptions that will emit a warning when caught.
+overgeneral-exceptions=builtins.BaseException,builtins.Exception
+
+
[FORMAT]
# Expected format of line ending, e.g. empty (any line ending), LF or CRLF.
@@ -374,13 +253,6 @@ max-line-length=100
# Maximum number of lines in a module.
max-module-lines=1000
-# List of optional constructs for which whitespace checking is disabled. `dict-
-# separator` is used to allow tabulation in dicts, etc.: {1 : 1,\n222: 2}.
-# `trailing-comma` allows a space between comma and closing bracket: (a, ).
-# `empty-line` allows space-only lines.
-no-space-check=trailing-comma,
- dict-separator
-
# Allow the body of a class to be on the same line as the declaration if body
# contains single statement.
single-line-class-stmt=no
@@ -390,48 +262,25 @@ single-line-class-stmt=no
single-line-if-stmt=no
-[LOGGING]
-
-# Format style used to check logging format string. `old` means using %
-# formatting, while `new` is for `{}` formatting.
-logging-format-style=old
-
-# Logging modules to check that the string format arguments are in logging
-# function parameter format.
-logging-modules=logging
-
-
-[STRING]
-
-# This flag controls whether the implicit-str-concat-in-sequence should
-# generate a warning on implicit string concatenation in sequences defined over
-# several lines.
-check-str-concat-over-line-jumps=no
-
-
[IMPORTS]
# Allow wildcard imports from modules that define __all__.
allow-wildcard-with-all=no
-# Analyse import fallback blocks. This can be used to support both Python 2 and
-# 3 compatible code, which means that the block might have code that exists
-# only in one or another interpreter, leading to false positives when analysed.
-analyse-fallback-blocks=no
-
# Deprecated modules which should not be used, separated by a comma.
deprecated-modules=optparse,tkinter.tix
-# Create a graph of external dependencies in the given file (report RP0402 must
-# not be disabled).
+# Output a graph (.gv or any supported image format) of external dependencies
+# to the given file (report RP0402 must not be disabled).
ext-import-graph=
-# Create a graph of every (i.e. internal and external) dependencies in the
-# given file (report RP0402 must not be disabled).
+# Output a graph (.gv or any supported image format) of all (i.e. internal and
+# external) dependencies to the given file (report RP0402 must not be
+# disabled).
import-graph=
-# Create a graph of internal dependencies in the given file (report RP0402 must
-# not be disabled).
+# Output a graph (.gv or any supported image format) of internal dependencies
+# to the given file (report RP0402 must not be disabled).
int-import-graph=
# Force import order to recognize a module as part of the standard
@@ -442,64 +291,236 @@ known-standard-library=
known-third-party=enchant
-[CLASSES]
+[LOGGING]
-# List of method names used to declare (i.e. assign) instance attributes.
-defining-attr-methods=__init__,
- __new__,
- setUp
+# The type of string formatting that logging methods do. `old` means using %
+# formatting, `new` is for `{}` formatting.
+logging-format-style=new
-# List of member names, which should be excluded from the protected access
-# warning.
-exclude-protected=_asdict,
- _fields,
- _replace,
- _source,
- _make
+# Logging modules to check that the string format arguments are in logging
+# function parameter format.
+logging-modules=logging
-# List of valid names for the first argument in a class method.
-valid-classmethod-first-arg=cls
-# List of valid names for the first argument in a metaclass class method.
-valid-metaclass-classmethod-first-arg=cls
+[MESSAGES CONTROL]
+# Only show warnings with the listed confidence levels. Leave empty to show
+# all. Valid levels: HIGH, CONTROL_FLOW, INFERENCE, INFERENCE_FAILURE,
+# UNDEFINED.
+confidence=
-[DESIGN]
+# Disable the message, report, category or checker with the given id(s). You
+# can either give multiple identifiers separated by comma (,) or put this
+# option multiple times (only on the command line, not in the configuration
+# file where it should appear only once). You can also use "--disable=all" to
+# disable everything first and then re-enable specific checks. For example, if
+# you want to run only the similarities checker, you can use "--disable=all
+# --enable=similarities". If you want to run only the classes checker, but have
+# no Warning level messages displayed, use "--disable=all --enable=classes
+# --disable=W".
+disable=all
-# Maximum number of arguments for function / method.
-max-args=5
+# Enable the message, report, category or checker with the given id(s). You can
+# either give multiple identifier separated by comma (,) or put this option
+# multiple time (only on the command line, not in the configuration file where
+# it should appear only once). See also the "--disable" option for examples.
+enable=dangerous-default-value, # W0102
+ duplicate-key, # W0109
+ wildcard-import, # W0401
+ assert-on-tuple, # W0199
+ unused-import, # W0611
+ unused-variable, # W0612
+ unused-argument, # W0613
+ unused-wildcard-import, # W0614
+ deprecated-method, # W1505
+ cyclic-import, # R0401
+ trailing-comma-tuple, # R1707
+ bad-classmethod-argument, # C0202
+ undefined-variable, # E0602
-# Maximum number of attributes for a class (see R0902).
-max-attributes=7
-# Maximum number of boolean expressions in an if statement.
-max-bool-expr=5
+[METHOD_ARGS]
-# Maximum number of branch for function / method body.
-max-branches=12
+# List of qualified names (i.e., library.method) which require a timeout
+# parameter e.g. 'requests.api.get,requests.api.post'
+timeout-methods=requests.api.delete,requests.api.get,requests.api.head,requests.api.options,requests.api.patch,requests.api.post,requests.api.put,requests.api.request
-# Maximum number of locals for function / method body.
-max-locals=15
-# Maximum number of parents for a class (see R0901).
-max-parents=7
+[MISCELLANEOUS]
-# Maximum number of public methods for a class (see R0904).
-max-public-methods=20
+# List of note tags to take in consideration, separated by a comma.
+notes=FIXME,
+ XXX,
+ TODO
-# Maximum number of return / yield for function / method body.
-max-returns=6
-# Maximum number of statements in function / method body.
-max-statements=50
+[REFACTORING]
-# Minimum number of public methods for a class (see R0903).
-min-public-methods=2
+# Maximum number of nested blocks for function / method body
+max-nested-blocks=5
+# Complete name of functions that never returns. When checking for
+# inconsistent-return-statements if a never returning function is called then
+# it will be considered as an explicit return statement and no message will be
+# printed.
+never-returning-functions=sys.exit
-[EXCEPTIONS]
-# Exceptions that will emit a warning when being caught. Defaults to
-# "BaseException, Exception".
-overgeneral-exceptions=BaseException,
- Exception
+[REPORTS]
+
+# Python expression which should return a score less than or equal to 10. You
+# have access to the variables 'fatal', 'error', 'warning', 'refactor',
+# 'convention', and 'info' which contain the number of messages in each
+# category, as well as 'statement' which is the total number of statements
+# analyzed. This score is used by the global evaluation report (RP0004).
+evaluation=10.0 - ((float(5 * error + warning + refactor + convention) / statement) * 10)
+
+# Template used to display messages. This is a python new-style format string
+# used to format the message information. See doc for all details.
+msg-template=
+
+# Set the output format. Available formats are: text, parseable, colorized,
+# json2 (improved json format), json (old json format) and msvs (visual
+# studio). You can also give a reporter class, e.g.
+# mypackage.mymodule.MyReporterClass.
+output-format=text
+
+# Tells whether to display a full report or only the messages.
+reports=no
+
+# Activate the evaluation score.
+score=no
+
+
+[SIMILARITIES]
+
+# Comments are removed from the similarity computation
+ignore-comments=yes
+
+# Docstrings are removed from the similarity computation
+ignore-docstrings=yes
+
+# Imports are removed from the similarity computation
+ignore-imports=no
+
+# Signatures are removed from the similarity computation
+ignore-signatures=yes
+
+# Minimum lines number of a similarity.
+min-similarity-lines=4
+
+
+[SPELLING]
+
+# Limits count of emitted suggestions for spelling mistakes.
+max-spelling-suggestions=4
+
+# Spelling dictionary name. No available dictionaries : You need to install
+# both the python package and the system dependency for enchant to work.
+spelling-dict=
+
+# List of comma separated words that should be considered directives if they
+# appear at the beginning of a comment and should not be checked.
+spelling-ignore-comment-directives=fmt: on,fmt: off,noqa:,noqa,nosec,isort:skip,mypy:
+
+# List of comma separated words that should not be checked.
+spelling-ignore-words=
+
+# A path to a file that contains the private dictionary; one word per line.
+spelling-private-dict-file=
+
+# Tells whether to store unknown words to the private dictionary (see the
+# --spelling-private-dict-file option) instead of raising a message.
+spelling-store-unknown-words=no
+
+
+[STRING]
+
+# This flag controls whether inconsistent-quotes generates a warning when the
+# character used as a quote delimiter is used inconsistently within a module.
+check-quote-consistency=no
+
+# This flag controls whether the implicit-str-concat should generate a warning
+# on implicit string concatenation in sequences defined over several lines.
+check-str-concat-over-line-jumps=no
+
+
+[TYPECHECK]
+
+# List of decorators that produce context managers, such as
+# contextlib.contextmanager. Add to this list to register other decorators that
+# produce valid context managers.
+contextmanager-decorators=contextlib.contextmanager
+
+# List of members which are set dynamically and missed by pylint inference
+# system, and so shouldn't trigger E1101 when accessed. Python regular
+# expressions are accepted.
+generated-members=
+
+# Tells whether missing members accessed in mixin class should be ignored. A
+# mixin class is detected if its name ends with "mixin" (case insensitive).
+ignore-mixin-members=yes
+
+# Tells whether to warn about missing members when the owner of the attribute
+# is inferred to be None.
+ignore-none=yes
+
+# This flag controls whether pylint should warn about no-member and similar
+# checks whenever an opaque object is returned when inferring. The inference
+# can return multiple potential results while evaluating a Python object, but
+# some branches might not be evaluated, which results in partial inference. In
+# that case, it might be useful to still emit no-member and other checks for
+# the rest of the inferred objects.
+ignore-on-opaque-inference=yes
+
+# List of class names for which member attributes should not be checked (useful
+# for classes with dynamically set attributes). This supports the use of
+# qualified names.
+ignored-classes=optparse.Values,thread._local,_thread._local
+
+# List of module names for which member attributes should not be checked
+# (useful for modules/projects where namespaces are manipulated during runtime
+# and thus existing member attributes cannot be deduced by static analysis. It
+# supports qualified module names, as well as Unix pattern matching.
+ignored-modules=
+
+# Show a hint with possible names when a member name was not found. The aspect
+# of finding the hint is based on edit distance.
+missing-member-hint=yes
+
+# The minimum edit distance a name should have in order to be considered a
+# similar match for a missing member name.
+missing-member-hint-distance=1
+
+# The total number of similar names that should be taken in consideration when
+# showing a hint for a missing member.
+missing-member-max-choices=1
+
+
+[VARIABLES]
+
+# List of additional names supposed to be defined in builtins. Remember that
+# you should avoid defining new builtins when possible.
+additional-builtins=
+
+# Tells whether unused global variables should be treated as a violation.
+allow-global-unused-variables=yes
+
+# List of strings which can identify a callback function by name. A callback
+# name must start or end with one of those strings.
+callbacks=cb_,
+ _cb
+
+# A regular expression matching the name of dummy variables (i.e. expected to
+# not be used).
+dummy-variables-rgx=_+$|(_[a-zA-Z0-9_]*[a-zA-Z0-9]+?$)|dummy|^ignored_|^unused_
+
+# Argument names that match this expression will be ignored.
+ignored-argument-names=_.*|^ignored_|^unused_
+
+# Tells whether we should check for unused import in __init__ files.
+init-import=no
+
+# List of qualified module names which can have objects that can redefine
+# builtins.
+redefining-builtins-modules=six.moves,past.builtins,future.builtins,builtins,io
diff --git a/CMakeLists.txt b/CMakeLists.txt
index 7c40cb72a7c..fd98f91fac4 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -19,7 +19,7 @@
# along with this program. If not, see .
#
-cmake_minimum_required(VERSION 3.20)
+cmake_minimum_required(VERSION 3.25.1)
message(STATUS "CMake version: ${CMAKE_VERSION}")
if(POLICY CMP0076)
# make target_sources() convert relative paths to absolute
@@ -34,7 +34,7 @@ set(CMAKE_MODULE_PATH ${CMAKE_CURRENT_SOURCE_DIR}/cmake)
# C++ standard
enable_language(CXX)
-set(CMAKE_CXX_STANDARD 17 CACHE STRING "C++ standard to be used")
+set(CMAKE_CXX_STANDARD 20 CACHE STRING "C++ standard to be used")
set(CMAKE_CXX_STANDARD_REQUIRED ON)
set(CMAKE_CXX_EXTENSIONS OFF)
@@ -47,11 +47,11 @@ macro(espresso_minimal_compiler_version)
endif()
endmacro()
-espresso_minimal_compiler_version("GNU" 8.0.0)
-espresso_minimal_compiler_version("Clang" 9.0.0)
-espresso_minimal_compiler_version("AppleClang" 11.0.0)
-espresso_minimal_compiler_version("Intel" 18.0)
-espresso_minimal_compiler_version("IntelLLVM" 2021.0)
+espresso_minimal_compiler_version("GNU" 10.5.0)
+espresso_minimal_compiler_version("Clang" 14.0.0)
+espresso_minimal_compiler_version("AppleClang" 14.0.0)
+espresso_minimal_compiler_version("Intel" 2021.9)
+espresso_minimal_compiler_version("IntelLLVM" 2023.1)
include(FeatureSummary)
project(ESPResSo)
@@ -190,46 +190,30 @@ add_library(espresso::avx_flags ALIAS espresso_avx_flags)
# CUDA compiler
if(ESPRESSO_BUILD_WITH_CUDA)
- set(ESPRESSO_DEFINE_CUDA_ARCHITECTURES OFF)
- if(NOT DEFINED CMAKE_CUDA_ARCHITECTURES)
- set(ESPRESSO_DEFINE_CUDA_ARCHITECTURES ON)
- endif()
+ cmake_minimum_required(VERSION 3.25.2)
include(CheckLanguage)
enable_language(CUDA)
check_language(CUDA)
- set(CMAKE_CUDA_STANDARD ${CMAKE_CXX_STANDARD})
+ espresso_minimal_compiler_version("GNU" 11.4.0)
+ espresso_minimal_compiler_version("Clang" 17.0.0)
+ set(CMAKE_CUDA_STANDARD 20)
set(CMAKE_CUDA_STANDARD_REQUIRED ON)
- set(ESPRESSO_MINIMAL_CUDA_VERSION 11.0)
+ set(ESPRESSO_MINIMAL_CUDA_VERSION 12.0)
find_package(CUDAToolkit ${ESPRESSO_MINIMAL_CUDA_VERSION} REQUIRED)
- if(ESPRESSO_DEFINE_CUDA_ARCHITECTURES)
- unset(ESPRESSO_CUDA_ARCHITECTURES)
- # 1. sm_75: RTX-2000 series (Turing)
- # 2. sm_61: GTX-1000 series (Pascal)
- # 3. sm_52: GTX-900 series (Maxwell)
- if(CMAKE_CUDA_COMPILER_ID STREQUAL "NVIDIA")
- list(APPEND ESPRESSO_CUDA_ARCHITECTURES 75)
- list(APPEND ESPRESSO_CUDA_ARCHITECTURES 61)
- elseif(CMAKE_CUDA_COMPILER_ID STREQUAL "Clang")
- # GTX-900 series (Maxwell)
- if(CMAKE_CUDA_COMPILER_VERSION VERSION_LESS 12)
- list(APPEND ESPRESSO_CUDA_ARCHITECTURES 52)
- endif()
- if(CMAKE_CUDA_COMPILER_VERSION VERSION_GREATER_EQUAL 10)
- list(APPEND ESPRESSO_CUDA_ARCHITECTURES 61)
- # With Clang 14+, architectures sm_70+ are only supported with Thrust
- # 1.11+ from CUDA 11.3+, for details see
- # https://github.com/NVIDIA/cub/pull/170
- if((CMAKE_CUDA_COMPILER_VERSION VERSION_LESS 14)
- OR (CUDAToolkit_VERSION VERSION_GREATER_EQUAL 11.3.0))
- list(APPEND ESPRESSO_CUDA_ARCHITECTURES 75)
- endif()
- endif()
- endif()
- # only override CMAKE_CUDA_ARCHITECTURES when dependencies are satisfied
- if(DEFINED ESPRESSO_CUDA_ARCHITECTURES)
- set(CMAKE_CUDA_ARCHITECTURES ${ESPRESSO_CUDA_ARCHITECTURES})
+ if(NOT DEFINED ESPRESSO_CMAKE_CUDA_ARCHITECTURES)
+ if("$ENV{CUDAARCHS}" STREQUAL "")
+ # 1. sm_61: GTX-1000 series (Pascal)
+ # 2. sm_75: RTX-2000 series (Turing)
+ # 3. sm_86: RTX-3000 series (Ampere)
+ # 4. sm_89: RTX-4000 series (Ada)
+ set(ESPRESSO_CUDA_ARCHITECTURES "61;75")
+ else()
+ set(ESPRESSO_CUDA_ARCHITECTURES "$ENV{CUDAARCHS}")
endif()
+ set(ESPRESSO_CMAKE_CUDA_ARCHITECTURES "${ESPRESSO_CUDA_ARCHITECTURES}"
+ CACHE INTERNAL "")
endif()
+ set(CMAKE_CUDA_ARCHITECTURES "${ESPRESSO_CMAKE_CUDA_ARCHITECTURES}")
if(CMAKE_CUDA_COMPILER_ID STREQUAL "NVIDIA")
find_package(CUDACompilerNVCC ${ESPRESSO_MINIMAL_CUDA_VERSION} REQUIRED)
elseif(CMAKE_CUDA_COMPILER_ID STREQUAL "Clang")
@@ -238,7 +222,7 @@ if(ESPRESSO_BUILD_WITH_CUDA)
FATAL_ERROR
"Cannot enable code coverage with Clang as the CUDA compiler")
endif()
- find_package(CUDACompilerClang 9.0 REQUIRED)
+ find_package(CUDACompilerClang 17.0 REQUIRED)
else()
message(FATAL_ERROR "Unknown CUDA compiler '${CMAKE_CUDA_COMPILER_ID}'")
endif()
@@ -246,8 +230,8 @@ endif()
# Python interpreter and Cython interface library
if(ESPRESSO_BUILD_WITH_PYTHON)
- find_package(Python 3.9 REQUIRED COMPONENTS Interpreter Development NumPy)
- find_package(Cython 0.29.21...<3.0.8 REQUIRED)
+ find_package(Python 3.10 REQUIRED COMPONENTS Interpreter Development NumPy)
+ find_package(Cython 0.29.28...<3.0.10 REQUIRED)
find_program(IPYTHON_EXECUTABLE NAMES jupyter ipython3 ipython)
endif()
@@ -457,9 +441,8 @@ if(ESPRESSO_BUILD_WITH_COVERAGE)
espresso_coverage_flags INTERFACE -g -fprofile-instr-generate
-fcoverage-mapping)
else()
- target_compile_options(
- espresso_coverage_flags INTERFACE -g --coverage -fprofile-arcs
- -ftest-coverage)
+ target_compile_options(espresso_coverage_flags INTERFACE -g --coverage
+ -fprofile-abs-path)
target_link_libraries(espresso_coverage_flags INTERFACE gcov)
endif()
endif()
@@ -472,7 +455,6 @@ target_compile_options(
-Wall
-Wextra
-pedantic
- $<$:-Werror>
# add extra warnings
$<$:-Wextern-initializer>
$<$:-Wrange-loop-analysis>
@@ -482,16 +464,35 @@ target_compile_options(
$<$:-Wmissing-variable-declarations>
$<$,$,11.0.0>>:-Wnon-c-typedef-for-linkage>
$<$>:-Wdelete-non-virtual-dtor>
- # disable warnings from -Wextra
+ # disable warnings from -Wall and -Wextra
-Wno-sign-compare
-Wno-unused-function
-Wno-unused-parameter
+ -Wno-array-bounds
+ $<$:-Wno-restrict>
$<$:-Wno-clobbered>
$<$:-diag-disable=592>
$<$:-Wno-gnu-zero-variadic-macro-arguments>
$<$,$,8.1.0>>:-Wno-cast-function-type>
$<$>:-Wno-implicit-fallthrough>
- $<$>:-Wno-unused-private-field>)
+ $<$>:-Wno-unused-private-field>
+ # warnings are errors
+ $<$:-Werror>)
+
+if(ESPRESSO_BUILD_WITH_CUDA)
+ target_compile_options(
+ espresso_cuda_flags
+ INTERFACE
+ -Wall
+ -Wextra
+ -Wno-sign-compare
+ -Wno-unused-parameter
+ $<$>:-Wno-implicit-fallthrough>
+ # warnings are errors
+ $<$,$>:--Werror=all-warnings>
+ $<$,$>:-Werror>
+ )
+endif()
# disable warning from -Wextra on ARM processors
if(CMAKE_CXX_COMPILER_ID STREQUAL "GNU" AND CMAKE_SYSTEM_PROCESSOR MATCHES
@@ -526,16 +527,25 @@ if(ESPRESSO_BUILD_WITH_ASAN)
target_compile_options(espresso_cpp_flags INTERFACE -fsanitize=address
-fno-omit-frame-pointer)
target_link_libraries(espresso_cpp_flags INTERFACE -fsanitize=address)
+ if(ESPRESSO_BUILD_WITH_CUDA)
+ target_link_libraries(espresso_cuda_flags INTERFACE -fsanitize=address)
+ endif()
endif()
if(ESPRESSO_BUILD_WITH_MSAN)
set(CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} -g -O1")
target_compile_options(espresso_cpp_flags INTERFACE -fsanitize=memory
-fno-omit-frame-pointer)
target_link_libraries(espresso_cpp_flags INTERFACE -fsanitize=memory)
+ if(ESPRESSO_BUILD_WITH_CUDA)
+ target_link_libraries(espresso_cuda_flags INTERFACE -fsanitize=memory)
+ endif()
endif()
if(ESPRESSO_BUILD_WITH_UBSAN)
target_compile_options(espresso_cpp_flags INTERFACE -fsanitize=undefined)
target_link_libraries(espresso_cpp_flags INTERFACE -fsanitize=undefined)
+ if(ESPRESSO_BUILD_WITH_CUDA)
+ target_link_libraries(espresso_cuda_flags INTERFACE -fsanitize=undefined)
+ endif()
endif()
target_link_libraries(espresso_cpp_flags INTERFACE espresso::coverage_flags)
@@ -595,7 +605,7 @@ if(ESPRESSO_BUILD_WITH_WALBERLA)
FetchContent_Declare(
walberla
GIT_REPOSITORY https://i10git.cs.fau.de/walberla/walberla.git
- GIT_TAG 065ce5f311850371a97ac4766f47dbb5ca8424ba
+ GIT_TAG b0842e1a493ce19ef1bbb8d2cf382fc343970a7f
)
# workaround for https://gitlab.kitware.com/cmake/cmake/-/issues/21146
if(NOT DEFINED walberla_SOURCE_DIR OR NOT EXISTS "${walberla_SOURCE_DIR}")
@@ -614,10 +624,9 @@ if(ESPRESSO_BUILD_WITH_WALBERLA)
set(CMAKE_POSITION_INDEPENDENT_CODE on CACHE BOOL "")
if(ESPRESSO_BUILD_WITH_CUDA)
set(WALBERLA_BUILD_WITH_CUDA "on" CACHE BOOL "")
- if(CMAKE_VERSION VERSION_LESS 3.25 OR NOT ESPRESSO_CUDA_COMPILER STREQUAL
- "clang")
+ if(NOT ESPRESSO_CUDA_COMPILER STREQUAL "clang")
if(NOT DEFINED CMAKE_CUDA_ARCHITECTURES)
- set(CMAKE_CUDA_ARCHITECTURES 75)
+ message(FATAL_ERROR "variable CMAKE_CUDA_ARCHITECTURES is undefined")
endif()
endif()
endif()
@@ -637,7 +646,7 @@ if(ESPRESSO_BUILD_WITH_WALBERLA)
set(WALBERLA_LIBS ${WALBERLA_LIBS} walberla::fft)
endif()
if(ESPRESSO_BUILD_WITH_CUDA AND WALBERLA_BUILD_WITH_CUDA)
- set(WALBERLA_LIBS ${WALBERLA_LIBS} walberla::cuda)
+ set(WALBERLA_LIBS ${WALBERLA_LIBS} walberla::gpu)
endif()
# workaround for https://gitlab.kitware.com/cmake/cmake/-/issues/21283
foreach(target_w_namespace IN LISTS WALBERLA_LIBS)
@@ -660,7 +669,7 @@ if(ESPRESSO_BUILD_WITH_CALIPER)
FetchContent_Declare(
caliper
GIT_REPOSITORY https://github.com/LLNL/Caliper.git
- GIT_TAG v2.9.1
+ GIT_TAG v2.10.0
)
if(NOT DEFINED caliper_SOURCE_DIR OR NOT EXISTS "${caliper_SOURCE_DIR}")
FetchContent_Populate(caliper)
@@ -668,17 +677,18 @@ if(ESPRESSO_BUILD_WITH_CALIPER)
# cmake-format: on
set(CALIPER_OPTION_PREFIX on CACHE BOOL "")
set(CALIPER_WITH_MPI on CACHE BOOL "")
- if(ESPRESSO_BUILD_WITH_CUDA)
- set(CALIPER_WITH_NVTX on CACHE BOOL "")
- set(CALIPER_WITH_CUPTI on CACHE BOOL "")
- endif()
+ set(CALIPER_WITH_NVTX off CACHE BOOL "")
+ set(CALIPER_WITH_CUPTI off CACHE BOOL "")
set(CALIPER_BUILD_SHARED_LIBS on CACHE BOOL "")
add_subdirectory("${caliper_SOURCE_DIR}")
- if(CMAKE_CXX_COMPILER_ID MATCHES "Clang" OR CMAKE_CXX_COMPILER_ID STREQUAL
- "GNU")
- target_compile_options(caliper-services
- PRIVATE -Wno-deprecated-declarations)
- endif()
+ target_compile_options(
+ caliper-services
+ PRIVATE
+ $<$:-Wno-deprecated-declarations>)
+ target_compile_options(
+ caliper-runtime
+ PRIVATE $<$:-Wno-maybe-uninitialized>
+ $<$:-Wno-volatile>)
endif()
#
diff --git a/NEWS b/NEWS
index 2632dadb3d8..a0867e89755 100644
--- a/NEWS
+++ b/NEWS
@@ -2,6 +2,129 @@
= ESPRESSO NEWS =
=================
+ESPResSo 4.2.2
+==============
+
+This release provides a number of corrections for the ESPResSo 4.2 line.
+We recommend that this release be used for all production simulations.
+The interface has not been changed between ESPResSo 4.2.1 and 4.2.2.
+However, some bugs were discovered which can affect simulation results.
+Please find the list of changes below. The numbers in brackets refer to
+ticket numbers on https://github.com/espressomd/espresso
+
+Improved documentation
+----------------------
+
+* Installation instructions now mention the FFTW3 MPI dependency
+ of long-range solvers and provide recommended version numbers
+ for Jupyter Notebook dependencies (#4790).
+
+* Installation instructions now mention Python environments (#4922).
+
+* Observables not properly document return values, array shapes,
+ and use a more consistent mathematical notation (#4898).
+
+Bug fixes
+---------
+
+* Fatal runtime errors due to MPI global variables lifetime
+ were addressed (#4858). Older ESPResSo releases built with
+ Boost 1.84 or later might randomly crash when exiting
+ the Python interpreter.
+
+* Virtual sites no longer contribute to the kinetic energy
+ of the system (#4839). The regression was introduced
+ in April 2021 and affected the 4.2 branch of ESPResSo.
+
+* Inertialess tracers are now integrated along the z-axis (#4714).
+ The regression was introduced in February 2022 and affected
+ the 4.2 branch of ESPResSo.
+
+* Inertialess tracers now throw an exception when attempting to use
+ LB GPU with 2 or more MPI ranks (#4714). Before, tracers on non-root
+ MPI ranks would be silently ignored by the CUDA kernels,
+ and would have a constant velocity, either 0 if the particle never
+ visited the fluid domain on the root rank, or the last known velocity
+ if the particle was once on the root rank. This bug affected all
+ ESPResSo versions.
+
+* Particles close to the faces of the simulation box are now properly
+ coupled to the LB fluid (#4827). Due to numerical instability, it was
+ previously possible for particles to be outside the box simulation by
+ a tiny amount and skip LB particle coupling. The probability of this
+ bug occurring was low, but could be enhanced in simulations that
+ purposefully placed particle near the faces of the simulation box:
+ polymers sheared by Lees-Edwards boundary conditions, raspberry
+ particles (colloids, bacteria, etc.) when crossing a periodic
+ boundary, or cell membranes placed close to a periodic boundary.
+
+* Resizing the box now throws a runtime error if there are constraints
+ present (#4778), since constraint preconditions might no longer be
+ fulfilled. For example, a wall constraint might end up outside the
+ box boundaries when the box shrinks.
+
+* Resizing the box via `system.box_l = new_box_l` now throws
+ a runtime error if there are particles present, because particle
+ position folding cannot be guaranteed to be correct (#4901);
+ use `system.change_volume_and_rescale_particles()` instead,
+ which properly rescales particle positions.
+
+* The velocity Verlet NpT propagator doesn't apply friction and noise
+ on angular velocities. ESPResSo now throws an error when NpT
+ encounters a rotating particle (#4843). This bug affected all
+ ESPResSo versions.
+
+* The Brownian thermostat can no longer be configured with
+ `act_on_virtual=True` due to an unresolved bug (#4295)
+ that will be addressed in the next minor release.
+
+* Restrictions on the number of MPI ranks have been lifted from the
+ checkpointing mechanism (#4724). It is now possible to use
+ checkpointing again in MPI-parallel simulations when the system
+ contains LB boundaries or `Union` shape-based constraints.
+ These restrictions had been introduced in 4.2.0 for technical
+ reasons that have since been resolved.
+
+* When passing an invalid value to a function that expects an input
+ parameter of type `list` of size 3, an exception is now raised (#4911).
+ Previously, some functions would print an error message and continue
+ their execution with uninitialized data.
+
+* The per-`type` and per-`mol_id` contributions from
+ `system.analysis.energy()`, `system.analysis.pressure()`
+ and `system.analysis.pressure_tensor()` now return the correct
+ values (#4788). Older version of ESPResSo were confusing the
+ particle `mol_id` with the particle `type`. The total pressure
+ was unreliable when `mol_id` properties were set to non-zero values.
+
+* The OpenGL visualizer now extracts the correct non-bonded potential
+ parameter `sigma` when feature `WCA` is compiled in but `LENNARD_JONES`
+ isn't (#4720). The regression was introduced in 4.2.1.
+
+* Method `OifCell.elastic_forces()` no longer throws a `TypeError` (#4813).
+
+* Benchmark scripts were adjusted to support large particle numbers (#4753).
+
+Under the hood changes
+----------------------
+
+* Several Clang 16 and GCC 13 compiler diagnostics have been addressed
+ (#4715).
+
+* A non-critical GCC C++20 deprecation warning in Cython-generated code
+ was disabled (#4725).
+
+* Several deprecation warnings emitted by CMake 3.27 have been silenced
+ (#4792).
+
+* Add support for setuptools version 67.3.0 and above (#4709).
+
+* Add support for Python 3.12 in testsuites run by CTest (#4852).
+
+* Python requirements have been updated (#4924).
+
+* CI pipeline URLs have been fixed (#4736).
+
ESPResSo 4.2.1
==============
diff --git a/Readme.md b/Readme.md
index 95048e8c80b..098817a275c 100644
--- a/Readme.md
+++ b/Readme.md
@@ -1,3 +1,9 @@
+# Invitation to the ESPResSo Summer School 2024
+
+[![CECAM Flagship School registration link](https://img.shields.io/badge/CECAM%20Flagship%20School-Register%20Now-blue?style=for-the-badge)](https://www.cecam.org/workshop-details/1324)
+
+The summer school "Simulating soft matter across scales" will take place on October 7-11, 2024, in Stuttgart. Registration is now open on [CECAM](https://www.cecam.org/workshop-details/1324).
+
# ESPResSo
[![GitLab CI](https://gitlab.icp.uni-stuttgart.de/espressomd/espresso/badges/python/pipeline.svg)](https://gitlab.icp.uni-stuttgart.de/espressomd/espresso/-/commits/python)
@@ -70,7 +76,7 @@ For most users, we recommend downloading the latest release version of ESPResSo.
can find it in the [release page](https://github.com/espressomd/espresso/releases),
together with past releases until 4.0. When choosing a release, we recommend that
you get the latest bugfix release in that line. For example, for 4.2 you would like
-to use 4.2.1.
+to use 4.2.2.
### Join the community
diff --git a/cmake/FindCUDACompilerClang.cmake b/cmake/FindCUDACompilerClang.cmake
index 113eefde460..56f02f1023a 100644
--- a/cmake/FindCUDACompilerClang.cmake
+++ b/cmake/FindCUDACompilerClang.cmake
@@ -95,24 +95,34 @@ target_compile_options(
$<$:-O3 -DNDEBUG>
$<$:-O2 -DNDEBUG>
$<$:-O2 -g -DNDEBUG>
- $<$:-O3 -g>
+ $<$:-O3 -g -fprofile-instr-generate -fcoverage-mapping>
$<$:-O3 -g>
)
-function(espresso_add_gpu_library)
- set(options STATIC SHARED MODULE EXCLUDE_FROM_ALL)
- set(oneValueArgs)
- set(multiValueArgs)
- cmake_parse_arguments(ARG "${options}" "${oneValueArgs}" "${multiValueArgs}" ${ARGN})
- list(GET ARG_UNPARSED_ARGUMENTS 0 TARGET_NAME)
- list(REMOVE_AT ARG_UNPARSED_ARGUMENTS 0)
- set(TARGET_SOURCES ${ARG_UNPARSED_ARGUMENTS})
+function(espresso_setup_gpu_app)
+ cmake_parse_arguments(TARGET "" "NAME" "SOURCES" ${ARGN})
set_source_files_properties(${TARGET_SOURCES} PROPERTIES LANGUAGE "CUDA")
- add_library(${ARGV})
set_target_properties(${TARGET_NAME} PROPERTIES LINKER_LANGUAGE "CXX")
target_link_libraries(${TARGET_NAME} PRIVATE espresso::cuda_flags)
endfunction()
+function(espresso_add_gpu_library)
+ add_library(${ARGV})
+ cmake_parse_arguments(ARG "STATIC;SHARED;MODULE;EXCLUDE_FROM_ALL" "" "" ${ARGN})
+ list(GET ARGV 0 TARGET_NAME)
+ set(TARGET_SOURCES ${ARG_UNPARSED_ARGUMENTS})
+ list(POP_FRONT TARGET_SOURCES)
+ espresso_setup_gpu_app(NAME ${TARGET_NAME} SOURCES ${TARGET_SOURCES})
+endfunction()
+
+function(espresso_add_gpu_executable)
+ add_executable(${ARGV})
+ list(GET ARGV 0 TARGET_NAME)
+ set(TARGET_SOURCES ${ARGV})
+ list(POP_FRONT TARGET_SOURCES)
+ espresso_setup_gpu_app(NAME ${TARGET_NAME} SOURCES ${TARGET_SOURCES})
+endfunction()
+
include(FindPackageHandleStandardArgs)
find_package_handle_standard_args(
CUDACompilerClang REQUIRED_VARS CMAKE_CUDA_COMPILER VERSION_VAR
diff --git a/cmake/FindCUDACompilerNVCC.cmake b/cmake/FindCUDACompilerNVCC.cmake
index 0ddce340a8c..d9600ecf6d7 100644
--- a/cmake/FindCUDACompilerNVCC.cmake
+++ b/cmake/FindCUDACompilerNVCC.cmake
@@ -23,12 +23,12 @@
# include the toolkit libraries and declare a custom
# `add_library()` wrapper function named `espresso_add_gpu_library()`.
-get_filename_component(ESPRESO_CUDAToolkit_ROOT_RESOLVED "${CUDAToolkit_ROOT}/bin/nvcc" REALPATH)
-get_filename_component(ESPRESO_CMAKE_CUDA_COMPILER_RESOLVED "${CMAKE_CUDA_COMPILER}" REALPATH)
+file(REAL_PATH "${CUDAToolkit_ROOT}/bin/nvcc" ESPRESO_CUDAToolkit_ROOT_RESOLVED)
+file(REAL_PATH "${CMAKE_CUDA_COMPILER}" ESPRESO_CMAKE_CUDA_COMPILER_RESOLVED)
if(NOT "${ESPRESO_CUDAToolkit_ROOT_RESOLVED}" STREQUAL "${ESPRESO_CMAKE_CUDA_COMPILER_RESOLVED}"
AND NOT ESPRESSO_INSIDE_DOCKER)
- get_filename_component(ESPRESSO_NVCC_EXECUTABLE_DIRNAME "${CMAKE_CUDA_COMPILER}" DIRECTORY)
- get_filename_component(ESPRESSO_NVCC_EXECUTABLE_DIRNAME "${ESPRESSO_NVCC_EXECUTABLE_DIRNAME}" DIRECTORY)
+ cmake_path(GET CMAKE_CUDA_COMPILER PARENT_PATH ESPRESSO_NVCC_EXECUTABLE_DIRNAME)
+ cmake_path(GET ESPRESSO_NVCC_EXECUTABLE_DIRNAME PARENT_PATH ESPRESSO_NVCC_EXECUTABLE_DIRNAME)
message(
WARNING
"Your nvcc compiler (${CMAKE_CUDA_COMPILER}) does not appear to match your CUDA toolkit installation (${CUDAToolkit_ROOT}). While ESPResSo will still compile, you might get unexpected crashes. Try hinting it with '-D CUDAToolkit_ROOT=\"${ESPRESSO_NVCC_EXECUTABLE_DIRNAME}\"'."
@@ -47,17 +47,25 @@ target_compile_options(
$<$:-Xptxas=-O3 -Xcompiler=-O3 -DNDEBUG>
$<$:-Xptxas=-O2 -Xcompiler=-Os -DNDEBUG>
$<$:-Xptxas=-O2 -Xcompiler=-O2,-g -DNDEBUG>
- $<$:-Xptxas=-O3 -Xcompiler=-Og,-g>
+ $<$:-Xptxas=-O3 -Xcompiler=-Og,-g,--coverage,-fprofile-abs-path>
$<$:-Xptxas=-O3 -Xcompiler=-O3,-g>
- $<$:-Xcompiler=-Werror;-Xptxas=-Werror>
$<$:-Xcompiler=-isysroot;-Xcompiler=${CMAKE_OSX_SYSROOT}>
+ # workaround for https://github.com/espressomd/espresso/issues/4943
+ $<$:$<$:--coverage -fprofile-abs-path>>
)
function(espresso_add_gpu_library)
add_library(${ARGV})
set(TARGET_NAME ${ARGV0})
set_target_properties(${TARGET_NAME} PROPERTIES CUDA_SEPARABLE_COMPILATION ON)
- target_link_libraries(${TARGET_NAME} PRIVATE espresso::cuda_flags)
+ target_link_libraries(${TARGET_NAME} PRIVATE espresso::cuda_flags $<$:gcov>)
+endfunction()
+
+function(espresso_add_gpu_executable)
+ add_executable(${ARGV})
+ set(TARGET_NAME ${ARGV0})
+ set_target_properties(${TARGET_NAME} PROPERTIES CUDA_SEPARABLE_COMPILATION ON)
+ target_link_libraries(${TARGET_NAME} PRIVATE espresso::cuda_flags $<$:gcov>)
endfunction()
include(FindPackageHandleStandardArgs)
diff --git a/cmake/FindCython.cmake b/cmake/FindCython.cmake
index f0373c6217e..ccfc6903a1f 100644
--- a/cmake/FindCython.cmake
+++ b/cmake/FindCython.cmake
@@ -35,11 +35,11 @@
# Use the Cython executable that lives next to the Python executable
# if it is a local installation.
if(Python_EXECUTABLE)
- get_filename_component(_python_path ${Python_EXECUTABLE} PATH)
+ cmake_path(GET Python_EXECUTABLE PARENT_PATH _python_path)
elseif(Python3_EXECUTABLE)
- get_filename_component(_python_path ${Python3_EXECUTABLE} PATH)
+ cmake_path(GET Python3_EXECUTABLE PARENT_PATH _python_path)
elseif(DEFINED PYTHON_EXECUTABLE)
- get_filename_component(_python_path ${PYTHON_EXECUTABLE} PATH)
+ cmake_path(GET PYTHON_EXECUTABLE PARENT_PATH _python_path)
endif()
if(DEFINED _python_path)
diff --git a/cmake/espresso_resource_files.cmake b/cmake/espresso_resource_files.cmake
new file mode 100644
index 00000000000..6daae13c6c4
--- /dev/null
+++ b/cmake/espresso_resource_files.cmake
@@ -0,0 +1,44 @@
+#
+# Copyright (C) 2024 The ESPResSo project
+#
+# This file is part of ESPResSo.
+#
+# ESPResSo is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# ESPResSo is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program. If not, see .
+#
+
+define_property(TARGET PROPERTY EspressoResourceFiles
+ BRIEF_DOCS "List of resource files to be deployed with target")
+
+# Register resource files (Python files, text files, etc.) that need to be
+# deployed alongside a target. If the file exists in the project source
+# directory, it is configured with COPYONLY. If not, it is assumed to be a
+# generated file.
+function(espresso_target_resources)
+ list(POP_FRONT ARGV TARGET_NAME)
+ foreach(RESOURCE_RELPATH ${ARGV})
+ if(IS_ABSOLUTE ${RESOURCE_RELPATH})
+ message(
+ FATAL_ERROR
+ "function espresso_target_resources() only supports relative paths, could not process \"${RESOURCE_RELPATH}\""
+ )
+ endif()
+ set(RESOURCE_SOURCE_PATH "${CMAKE_CURRENT_SOURCE_DIR}/${RESOURCE_RELPATH}")
+ set(RESOURCE_BINARY_PATH "${CMAKE_CURRENT_BINARY_DIR}/${RESOURCE_RELPATH}")
+ if(EXISTS ${RESOURCE_SOURCE_PATH})
+ configure_file(${RESOURCE_SOURCE_PATH} ${RESOURCE_BINARY_PATH} COPYONLY)
+ endif()
+ set_property(TARGET ${TARGET_NAME} APPEND
+ PROPERTY EspressoResourceFiles "${RESOURCE_BINARY_PATH}")
+ endforeach()
+endfunction()
diff --git a/cmake/unit_test.cmake b/cmake/espresso_unit_test.cmake
similarity index 74%
rename from cmake/unit_test.cmake
rename to cmake/espresso_unit_test.cmake
index 535d5ac80ca..c857d9bed13 100644
--- a/cmake/unit_test.cmake
+++ b/cmake/espresso_unit_test.cmake
@@ -1,5 +1,5 @@
#
-# Copyright (C) 2016-2022 The ESPResSo project
+# Copyright (C) 2016-2024 The ESPResSo project
#
# This file is part of ESPResSo.
#
@@ -17,10 +17,17 @@
# along with this program. If not, see .
#
-# unit_test function
-function(UNIT_TEST)
- cmake_parse_arguments(TEST "" "NAME;NUM_PROC" "SRC;DEPENDS" ${ARGN})
- add_executable(${TEST_NAME} ${TEST_SRC})
+function(ESPRESSO_UNIT_TEST)
+ cmake_parse_arguments(TEST "" "SRC;NAME;NUM_PROC" "DEPENDS" ${ARGN})
+ if(NOT DEFINED TEST_NAME)
+ cmake_path(GET TEST_SRC STEM TEST_NAME)
+ set(TEST_NAME ${TEST_NAME} PARENT_SCOPE)
+ endif()
+ if(${TEST_SRC} MATCHES ".*\.cu$")
+ espresso_add_gpu_executable(${TEST_NAME} ${TEST_SRC})
+ else()
+ add_executable(${TEST_NAME} ${TEST_SRC})
+ endif()
# Build tests only when testing
set_target_properties(${TEST_NAME} PROPERTIES EXCLUDE_FROM_ALL ON)
set_target_properties(${TEST_NAME} PROPERTIES CXX_CLANG_TIDY "${ESPRESSO_CXX_CLANG_TIDY}")
@@ -29,7 +36,15 @@ function(UNIT_TEST)
target_link_libraries(${TEST_NAME} PRIVATE ${TEST_DEPENDS})
endif()
target_include_directories(${TEST_NAME} PRIVATE ${CMAKE_SOURCE_DIR}/src/core)
- target_link_libraries(${TEST_NAME} PRIVATE espresso::config espresso::cpp_flags)
+ if(ESPRESSO_BUILD_WITH_COVERAGE AND CMAKE_CXX_COMPILER_ID STREQUAL "GNU")
+ target_compile_options(
+ ${TEST_NAME} PRIVATE -fno-default-inline -fno-elide-constructors)
+ endif()
+ if(${TEST_SRC} MATCHES ".*\.cu$")
+ target_link_libraries(${TEST_NAME} PRIVATE espresso::config CUDA::cuda_driver CUDA::cudart)
+ else()
+ target_link_libraries(${TEST_NAME} PRIVATE espresso::config espresso::cpp_flags)
+ endif()
# If NUM_PROC is given, set up MPI parallel test case
if(TEST_NUM_PROC)
@@ -60,4 +75,4 @@ function(UNIT_TEST)
${TEST_NAME} PROPERTIES ENVIRONMENT "${TEST_ENV_VARIABLES}")
add_dependencies(check_unit_tests ${TEST_NAME})
-endfunction(UNIT_TEST)
+endfunction()
diff --git a/doc/bibliography.bib b/doc/bibliography.bib
index c9423a91904..77c61e8901a 100644
--- a/doc/bibliography.bib
+++ b/doc/bibliography.bib
@@ -239,21 +239,6 @@ @Article{brown95a
publisher={Taylor \& Francis},
}
-@InCollection{burtscher11a,
-author = {Burtscher, Martin and Pingali, Keshav},
-chapter = {6},
-title = {An efficient {CUDA} implementation of the tree-based {B}arnes {H}ut n-body algorithm},
-editor = {Hwu, Wen-mei W.},
-booktitle = {{GPU} Computing Gems Emerald Edition},
-publisher = {Morgan Kaufmann},
-address = {Boston},
-pages = {75--92},
-year = {2011},
-series = {Applications of GPU Computing Series},
-isbn = {978-0-12-384988-5},
-doi = {10.1016/B978-0-12-384988-5.00006-1},
-}
-
@Article{cerda08d,
title = {{P3M} algorithm for dipolar interactions},
author = {Cerd\`{a}, Juan J. and Ballenegger, Vincent and Lenz, Olaf and Holm, Christian},
@@ -925,17 +910,6 @@ @Article{plimpton95a
publisher={Elsevier}
}
-@Article{polyakov13a,
-author = {Polyakov, A. Yu. and Lyutyy, T. V. and Denisov, S. and Reva, V. V. and H\"{a}nggi, P.},
-title = {Large-scale ferrofluid simulations on graphics processing units},
-journal = {Computer Physics Communications},
-year = {2013},
-volume = {184},
-number = {6},
-pages = {1483--1489},
-doi = {10.1016/j.cpc.2013.01.016},
-}
-
@Book{pottier10a,
title={Nonequilibrium Statistical Physics},
subtitle={Linear Irreversible Processes},
diff --git a/doc/doxygen/Doxyfile.in b/doc/doxygen/Doxyfile.in
index e4cc02ded2e..67333f21afd 100644
--- a/doc/doxygen/Doxyfile.in
+++ b/doc/doxygen/Doxyfile.in
@@ -1,4 +1,4 @@
-# Doxyfile 1.8.17
+# Doxyfile 1.9.8
@INCLUDE = "@CMAKE_CURRENT_BINARY_DIR@/doxy-features"
@@ -95,14 +95,6 @@ ALLOW_UNICODE_NAMES = NO
OUTPUT_LANGUAGE = English
-# The OUTPUT_TEXT_DIRECTION tag is used to specify the direction in which all
-# documentation generated by doxygen is written. Doxygen will use this
-# information to generate all generated output in the proper direction.
-# Possible values are: None, LTR, RTL and Context.
-# The default value is: None.
-
-OUTPUT_TEXT_DIRECTION = None
-
# If the BRIEF_MEMBER_DESC tag is set to YES, doxygen will include brief member
# descriptions after the members that are listed in the file and class
# documentation (similar to Javadoc). Set to NO to disable this.
@@ -424,6 +416,14 @@ TYPEDEF_HIDES_STRUCT = NO
LOOKUP_CACHE_SIZE = 0
+# If the TIMESTAMP tag is set different from NO then each generated page will
+# contain the date or date and time when the page was generated. Setting this to
+# NO can help when comparing the output of multiple runs.
+# Possible values are: YES, NO, DATETIME and DATE.
+# The default value is: NO.
+
+TIMESTAMP = YES
+
#---------------------------------------------------------------------------
# Build related configuration options
#---------------------------------------------------------------------------
@@ -1202,15 +1202,6 @@ HTML_COLORSTYLE_SAT = 100
HTML_COLORSTYLE_GAMMA = 80
-# If the HTML_TIMESTAMP tag is set to YES then the footer of each generated HTML
-# page will contain the date and time when the page was generated. Setting this
-# to YES can help to show when doxygen was last run and thus if the
-# documentation is up to date.
-# The default value is: NO.
-# This tag requires that the tag GENERATE_HTML is set to YES.
-
-HTML_TIMESTAMP = YES
-
# If the HTML_DYNAMIC_SECTIONS tag is set to YES then the generated HTML
# documentation will contain sections that can be hidden and shown after the
# page has loaded.
@@ -1481,17 +1472,6 @@ EXT_LINKS_IN_WINDOW = NO
FORMULA_FONTSIZE = 10
-# Use the FORMULA_TRANSPARENT tag to determine whether or not the images
-# generated for formulas are transparent PNGs. Transparent PNGs are not
-# supported properly for IE 6.0, but are supported on all modern browsers.
-#
-# Note that when changing this option you need to delete any form_*.png files in
-# the HTML output directory before the changes have effect.
-# The default value is: YES.
-# This tag requires that the tag GENERATE_HTML is set to YES.
-
-FORMULA_TRANSPARENT = YES
-
# Enable the USE_MATHJAX option to render LaTeX formulas using MathJax (see
# https://www.mathjax.org) which uses client side JavaScript for the rendering
# instead of using pre-rendered bitmaps. Use this if you do not have LaTeX
@@ -1774,16 +1754,6 @@ LATEX_BATCHMODE = NO
LATEX_HIDE_INDICES = NO
-# If the LATEX_SOURCE_CODE tag is set to YES then doxygen will include source
-# code with syntax highlighting in the LaTeX output.
-#
-# Note that which sources are shown also depends on other settings such as
-# SOURCE_BROWSER.
-# The default value is: NO.
-# This tag requires that the tag GENERATE_LATEX is set to YES.
-
-LATEX_SOURCE_CODE = NO
-
# The LATEX_BIB_STYLE tag can be used to specify the style to use for the
# bibliography, e.g. plainnat, or ieeetr. See
# https://en.wikipedia.org/wiki/BibTeX and \cite for more info.
@@ -1792,14 +1762,6 @@ LATEX_SOURCE_CODE = NO
LATEX_BIB_STYLE = plainnat
-# If the LATEX_TIMESTAMP tag is set to YES then the footer of each generated
-# page will contain the date and time when the page was generated. Setting this
-# to NO can help when comparing the output of multiple runs.
-# The default value is: NO.
-# This tag requires that the tag GENERATE_LATEX is set to YES.
-
-LATEX_TIMESTAMP = NO
-
#---------------------------------------------------------------------------
# Configuration options related to the RTF output
#---------------------------------------------------------------------------
@@ -1856,16 +1818,6 @@ RTF_STYLESHEET_FILE =
RTF_EXTENSIONS_FILE =
-# If the RTF_SOURCE_CODE tag is set to YES then doxygen will include source code
-# with syntax highlighting in the RTF output.
-#
-# Note that which sources are shown also depends on other settings such as
-# SOURCE_BROWSER.
-# The default value is: NO.
-# This tag requires that the tag GENERATE_RTF is set to YES.
-
-RTF_SOURCE_CODE = NO
-
#---------------------------------------------------------------------------
# Configuration options related to the man page output
#---------------------------------------------------------------------------
@@ -1955,15 +1907,6 @@ GENERATE_DOCBOOK = NO
DOCBOOK_OUTPUT = docbook
-# If the DOCBOOK_PROGRAMLISTING tag is set to YES, doxygen will include the
-# program listings (including syntax highlighting and cross-referencing
-# information) to the DOCBOOK output. Note that enabling this will significantly
-# increase the size of the DOCBOOK output.
-# The default value is: NO.
-# This tag requires that the tag GENERATE_DOCBOOK is set to YES.
-
-DOCBOOK_PROGRAMLISTING = NO
-
#---------------------------------------------------------------------------
# Configuration options for the AutoGen Definitions output
#---------------------------------------------------------------------------
@@ -2143,25 +2086,9 @@ EXTERNAL_GROUPS = YES
EXTERNAL_PAGES = YES
#---------------------------------------------------------------------------
-# Configuration options related to the dot tool
+# Configuration options related to diagram generator tools
#---------------------------------------------------------------------------
-# If the CLASS_DIAGRAMS tag is set to YES, doxygen will generate a class diagram
-# (in HTML and LaTeX) for classes with base or super classes. Setting the tag to
-# NO turns the diagrams off. Note that this option also works with HAVE_DOT
-# disabled, but it is recommended to install and use dot, since it yields more
-# powerful graphs.
-# The default value is: YES.
-
-CLASS_DIAGRAMS = YES
-
-# You can include diagrams made with dia in doxygen documentation. Doxygen will
-# then run dia to produce the diagram and insert it in the documentation. The
-# DIA_PATH tag allows you to specify the directory where the dia binary resides.
-# If left empty dia is assumed to be found in the default search path.
-
-DIA_PATH =
-
# If set to YES the inheritance and collaboration graphs will hide inheritance
# and usage relations if the target is undocumented or is not a class.
# The default value is: YES.
@@ -2187,23 +2114,6 @@ HAVE_DOT = $(HAVE_DOT)
DOT_NUM_THREADS = 0
-# When you want a differently looking font in the dot files that doxygen
-# generates you can specify the font name using DOT_FONTNAME. You need to make
-# sure dot is able to find the font, which can be done by putting it in a
-# standard location or by setting the DOTFONTPATH environment variable or by
-# setting DOT_FONTPATH to the directory containing the font.
-# The default value is: Helvetica.
-# This tag requires that the tag HAVE_DOT is set to YES.
-
-DOT_FONTNAME = Helvetica
-
-# The DOT_FONTSIZE tag can be used to set the size (in points) of the font of
-# dot graphs.
-# Minimum value: 4, maximum value: 24, default value: 10.
-# This tag requires that the tag HAVE_DOT is set to YES.
-
-DOT_FONTSIZE = 10
-
# By default doxygen will tell dot to use the default font as specified with
# DOT_FONTNAME. If you specify a different font using DOT_FONTNAME you can set
# the path where dot can find it using this tag.
@@ -2418,18 +2328,6 @@ DOT_GRAPH_MAX_NODES = 100
MAX_DOT_GRAPH_DEPTH = 0
-# Set the DOT_TRANSPARENT tag to YES to generate images with a transparent
-# background. This is disabled by default, because dot on Windows does not seem
-# to support this out of the box.
-#
-# Warning: Depending on the platform used, enabling this option may lead to
-# badly anti-aliased labels on the edges of a graph (i.e. they become hard to
-# read).
-# The default value is: NO.
-# This tag requires that the tag HAVE_DOT is set to YES.
-
-DOT_TRANSPARENT = YES
-
# Set the DOT_MULTI_TARGETS tag to YES to allow dot to generate multiple output
# files in one run (i.e. multiple -o and -T options on the command line). This
# makes dot run faster, but since only newer versions of dot (>1.8.10) support
diff --git a/doc/sphinx/CMakeLists.txt b/doc/sphinx/CMakeLists.txt
index 7dc5dc3a0bb..676a0e9dffd 100644
--- a/doc/sphinx/CMakeLists.txt
+++ b/doc/sphinx/CMakeLists.txt
@@ -72,11 +72,11 @@ if(SPHINX_FOUND)
"${CMAKE_CURRENT_SOURCE_DIR}/bibliography.rst")
foreach(file ${FILE_LIST})
- get_filename_component(basename ${file} NAME)
+ cmake_path(GET file FILENAME basename)
configure_file(${file} ${CMAKE_CURRENT_BINARY_DIR}/${basename} COPYONLY)
endforeach()
- configure_file(${CMAKE_SOURCE_DIR}/doc/bibliography.bib
- ${CMAKE_CURRENT_BINARY_DIR}/bibliography.bib COPYONLY)
+ configure_file("${CMAKE_SOURCE_DIR}/doc/bibliography.bib"
+ "${CMAKE_CURRENT_BINARY_DIR}/bibliography.bib" COPYONLY)
# Place files to be excluded from SPHINX documentation
set(EXCLUDE "${SPHINX_PYTHON_DIR}/gen_code_info.py"
diff --git a/doc/sphinx/advanced_methods.rst b/doc/sphinx/advanced_methods.rst
index b18b813281d..a4553a0a138 100644
--- a/doc/sphinx/advanced_methods.rst
+++ b/doc/sphinx/advanced_methods.rst
@@ -126,43 +126,6 @@ Several modes are available for different types of binding.
part_type_to_be_glued=3,
part_type_after_glueing=4)
-* ``"bind_three_particles"`` allows for the creation of agglomerates which maintain
- their shape similarly to those create by the mode ``"bind_at_point_of_collision"``.
- The present approach works without virtual sites. Instead, for each two-particle
- collision, the surrounding is searched for a third particle. If one is found,
- angular bonds are placed to maintain the local shape.
- If all three particles are within the cutoff distance, an angle bond is added
- on each of the three particles in addition
- to the distance based bonds between the particle centers.
- If two particles are within the cutoff of a central particle (e.g., chain of three particles)
- an angle bond is placed on the central particle.
- The angular bonds being added are determined from the angle between the particles.
- This method does not depend on the particles' rotational
- degrees of freedom being integrated. Virtual sites are not required.
- The method, along with the corresponding bonds are setup as follows::
-
- first_angle_bond_id = 0
- n_angle_bonds = 181 # 0 to 180 degrees in one degree steps
- for i in range(0, n_angle_bonds, 1):
- bond_id = first_angle_bond_id + i
- system.bonded_inter[bond_id] = espressomd.interactions.AngleHarmonic(
- bend=1., phi0=float(i) / float(n_angle_bonds - 1) * np.pi)
-
- bond_centers = espressomd.interactions.HarmonicBond(k=1., r_0=0.1, r_cut=0.5)
- system.bonded_inter.add(bond_centers)
-
- system.collision_detection.set_params(
- mode="bind_three_particles",
- bond_centers=bond_centers,
- bond_three_particles=first_angle_bond_id,
- three_particle_binding_angle_resolution=n_angle_bonds,
- distance=0.1)
-
- Important: The bonds for the angles are mapped via their numerical bond ids.
- In this example, ids from 0 to 180 are used. All other bonds required for
- the simulation need to be added to the system after those bonds. In particular,
- this applies to the bonded interaction passed via ``bond_centers``
-
The following limitations currently apply for the collision detection:
@@ -1301,8 +1264,7 @@ In |es|, the basic ingredients to simulate such a system are split into three bo
The system-wide thermostat has to be applied to the centre of mass and not to
the core particle directly. Therefore, the particles have to be excluded from
global thermostatting. With ``THERMOSTAT_PER_PARTICLE`` enabled, we set the
-friction coefficient of the Drude complex to zero, which allows
-to still use a global Langevin thermostat for non-polarizable particles.
+friction coefficient of the Drude complex to zero.
As the Drude charge should not alter the *charge* or *mass* of the Drude
complex, both properties have to be subtracted from the core when adding the
@@ -1313,9 +1275,11 @@ polarizability :math:`\alpha` (in units of inverse volume) with :math:`q_d =
The following helper method takes into account all the preceding considerations
and can be used to conveniently add a Drude particle to a given core particle.
-It returns an `espressomd.particle_data.ParticleHandle` to the created Drude
+It returns a :class:`~espressomd.particle_data.ParticleHandle` of the created Drude
particle. Note that as the function also adds the first two bonds between Drude
-and core, these bonds have to be already available.::
+and core, these bonds have to be already available.
+
+.. code-block::
import espressomd.drude_helpers
dh = espressomd.drude_helpers.DrudeHelpers()
diff --git a/doc/sphinx/electrostatics.rst b/doc/sphinx/electrostatics.rst
index e3a5e39e417..8f6713cbcc8 100644
--- a/doc/sphinx/electrostatics.rst
+++ b/doc/sphinx/electrostatics.rst
@@ -342,10 +342,6 @@ MMM1D
:class:`espressomd.electrostatics.MMM1D`
-.. note::
- Required features: ``ELECTROSTATICS`` for MMM1D, the GPU version
- additionally needs the features ``CUDA`` and ``MMM1D_GPU``.
-
Please cite :cite:`arnold05b` when using MMM1D. See :ref:`MMM1D theory` for
the details.
@@ -369,34 +365,6 @@ change the value of the ``timings`` argument of the
:class:`~espressomd.electrostatics.MMM1D` class,
which controls the number of test force calculations.
-.. _MMM1D on GPU:
-
-MMM1D on GPU
-~~~~~~~~~~~~
-
-:class:`espressomd.electrostatics.MMM1DGPU`
-
-MMM1D is also available in a GPU implementation. Unlike its CPU
-counterpart, it does not need the N-squared cell system.
-
-::
-
- import espressomd.electrostatics
- mmm1d = espressomd.electrostatics.MMM1DGPU(prefactor=C, far_switch_radius=fr,
- maxPWerror=err, tune=False, bessel_cutoff=bc)
- mmm1d = espressomd.electrostatics.MMM1DGPU(prefactor=C, maxPWerror=err)
-
-The first form sets parameters manually. The switch radius determines at which
-xy-distance the force calculation switches from the near to the far
-formula. If the Bessel cutoff is not explicitly given, it is determined
-from the maximal pairwise error, otherwise this error only counts for
-the near formula. The second tuning form just takes the maximal pairwise
-error and tries out a lot of switching radii to find out the fastest one.
-
-For details on the MMM family of algorithms, refer to appendix
-:ref:`The MMM family of algorithms`.
-
-
.. _ScaFaCoS electrostatics:
ScaFaCoS electrostatics
diff --git a/doc/sphinx/installation.rst b/doc/sphinx/installation.rst
index 46ba6fbe7ee..6b86106b8b2 100644
--- a/doc/sphinx/installation.rst
+++ b/doc/sphinx/installation.rst
@@ -25,7 +25,7 @@ performance of the code. Therefore it is not possible to build a single
binary that can satisfy all needs. For performance reasons a user
should always activate only those features that are actually needed.
This means, however, that learning how to compile is a necessary evil.
-The build system of |es| uses CMake [4]_ to compile
+The build system of |es| uses CMake to compile
software easily on a wide range of platforms.
Users who only need a "default" installation of |es| and have an account
@@ -45,10 +45,10 @@ are required to be able to compile and use |es|:
.. glossary::
CMake
- The build system is based on CMake.
+ The build system is based on CMake version 3 or later [4]_.
C++ compiler
- The C++ core of |es| needs to be built by a C++17-capable compiler.
+ The C++ core of |es| needs to be built by a C++20-capable compiler.
Boost
A number of advanced C++ features used by |es| are provided by Boost.
@@ -58,6 +58,11 @@ are required to be able to compile and use |es|:
For some algorithms like P\ :math:`^3`\ M, |es| needs the FFTW library
version 3 or later [5]_ for Fourier transforms, including header files.
+ CUDA
+ For some algorithms like P\ :math:`^3`\ M,
+ |es| provides GPU-accelerated implementations for NVIDIA GPUs.
+ We strongly recommend CUDA 12.0 or later [6]_.
+
MPI
An MPI library that implements the MPI standard version 1.2 is required
to run simulations in parallel. |es| is currently tested against
@@ -79,22 +84,43 @@ are required to be able to compile and use |es|:
Python
|es|'s main user interface relies on Python 3.
+ We strongly recommend using Python environments to isolate
+ packages required by |es| from packages installed system-wide.
+ This can be achieved using venv [7]_, conda [8]_, or any similar tool.
+ Inside an environment, commands of the form
+ ``sudo apt install python3-numpy python3-scipy``
+ can be rewritten as ``python3 -m pip install numpy scipy``,
+ and thus do not require root privileges.
+
+ Depending on your needs, you may choose to install all |es|
+ dependencies inside the environment, or only the subset of
+ dependencies not already satisfied by your workstation or cluster.
+ For the exact syntax to create and configure an environment,
+ please refer to the tool documentation.
+
Cython
Cython is used for connecting the C++ core to Python.
+ Python environment tools may allow you to install a Python executable
+ that is more recent than the system-wide Python executable.
+ Be aware this might lead to compatibility issues if Cython
+ accidentally picks up the system-wide :file:`Python.h` header file.
+ In that scenario, you will have to manually adapt the C++ compiler
+ include paths to find the correct :file:`Python.h` header file.
+
.. _Installing requirements on Ubuntu Linux:
Installing requirements on Ubuntu Linux
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-To compile |es| on Ubuntu 22.04 LTS, install the following dependencies:
+To compile |es| on Ubuntu 24.04 LTS, install the following dependencies:
.. code-block:: bash
- sudo apt install build-essential cmake cython3 python3-pip python3-numpy \
- libboost-all-dev openmpi-common fftw3-dev libfftw3-mpi-dev libhdf5-dev libhdf5-openmpi-dev \
- python3-scipy python3-opengl libgsl-dev freeglut3
+ sudo apt install build-essential cmake cython3 python3-dev openmpi-bin \
+ libboost-all-dev fftw3-dev libfftw3-mpi-dev libhdf5-dev libhdf5-openmpi-dev \
+ python3-pip python3-numpy python3-scipy python3-opengl libgsl-dev freeglut3
Optionally the ccmake utility can be installed for easier configuration:
@@ -120,7 +146,7 @@ paths before building the project, for example via environment variables:
.. code-block:: bash
- export CUDA_TOOLKIT_ROOT_DIR="/usr/local/cuda-11.5"
+ export CUDA_TOOLKIT_ROOT_DIR="/usr/local/cuda-12.0"
export PATH="${CUDA_TOOLKIT_ROOT_DIR}/bin${PATH:+:$PATH}"
export LD_LIBRARY_PATH="${CUDA_TOOLKIT_ROOT_DIR}/lib64${LD_LIBRARY_PATH:+:$LD_LIBRARY_PATH}"
@@ -130,41 +156,41 @@ to activate CUDA. These commands may need to be adapted depending on which
operating system and CUDA version you are using.
You can control the list of CUDA architectures to generate device code for.
-For example, ``-D CMAKE_CUDA_ARCHITECTURES=61;75`` will generate device code
-for both sm_61 and sm_75 architectures.
+For example, ``CUDAARCHS="61;75" cmake .. -D ESPRESSO_BUILD_WITH_CUDA=ON``
+will generate device code for both sm_61 and sm_75 architectures.
-On Ubuntu 22.04, the default GCC compiler is too recent for nvcc and will fail
-to compile sources that rely on ``std::function``. You can either use GCC 10:
+On Ubuntu 24.04, the default GCC compiler may too recent for nvcc.
+You can either use GCC 12:
.. code-block:: bash
- CC=gcc-10 CXX=g++-10 CUDACXX=/usr/local/cuda-11.5/bin/nvcc cmake .. \
+ CC=gcc-12 CXX=g++-12 CUDACXX=/usr/local/cuda-12.0/bin/nvcc cmake .. \
-D ESPRESSO_BUILD_WITH_CUDA=ON \
- -D CUDAToolkit_ROOT=/usr/local/cuda-11.5 \
- -D CMAKE_CUDA_FLAGS="--compiler-bindir=/usr/bin/g++-10"
+ -D CUDAToolkit_ROOT=/usr/local/cuda-12.0 \
+ -D CMAKE_CUDA_FLAGS="--compiler-bindir=/usr/bin/g++-12"
-or alternatively install Clang 14 as a replacement for nvcc and GCC:
+or alternatively install Clang 18 as a replacement for nvcc and GCC:
.. code-block:: bash
- CC=clang-14 CXX=clang++-14 CUDACXX=clang++-14 cmake .. \
+ CC=clang-18 CXX=clang++-18 CUDACXX=clang++-18 cmake .. \
-D ESPRESSO_BUILD_WITH_CUDA=ON \
- -D CUDAToolkit_ROOT=/usr/local/cuda-11.5 \
- -D CMAKE_CXX_FLAGS="-I/usr/include/x86_64-linux-gnu/c++/10 -I/usr/include/c++/10 --cuda-path=/usr/local/cuda-11.5" \
- -D CMAKE_CUDA_FLAGS="-I/usr/include/x86_64-linux-gnu/c++/10 -I/usr/include/c++/10 --cuda-path=/usr/local/cuda-11.5"
+ -D CUDAToolkit_ROOT=/usr/local/cuda-12.0 \
+ -D CMAKE_CXX_FLAGS="-I/usr/include/x86_64-linux-gnu/c++/12 -I/usr/include/c++/12 --cuda-path=/usr/local/cuda-12.0" \
+ -D CMAKE_CUDA_FLAGS="-I/usr/include/x86_64-linux-gnu/c++/12 -I/usr/include/c++/12 --cuda-path=/usr/local/cuda-12.0"
Please note that all CMake options and compiler flags that involve
``/usr/local/cuda-*`` need to be adapted to your CUDA environment.
But they are only necessary on systems with multiple CUDA releases installed,
and can be safely removed if you have only one CUDA release installed.
-Please also note that with Clang, you still need the GCC 10 toolchain,
-which can be set up with ``apt install gcc-10 g++-10 libstdc++-10-dev``.
+Please also note that with Clang, you still need the GCC 12 toolchain,
+which can be set up with ``apt install gcc-12 g++-12 libstdc++-12-dev``.
The extra compiler flags in the Clang CMake command above are needed to pin
the search paths of Clang. By default, it searches trough the most recent
-GCC version, which is GCC 12 on Ubuntu 22.04. It is not possible to install
-the NVIDIA driver without GCC 12 due to a dependency resolution issue
-(``nvidia-dkms`` depends on ``dkms`` which depends on ``gcc-12``).
+GCC version, which is GCC 13 on Ubuntu 24.04. It is not possible to install
+the NVIDIA driver without GCC 13 due to a dependency resolution issue
+(``nvidia-dkms`` depends on ``dkms`` which depends on ``gcc-13``).
.. _Requirements for building the documentation:
@@ -255,11 +281,11 @@ Installing requirements on Windows via WSL
To run |es| on Windows, use the Linux subsystem. For that you need to
-* follow `these instructions `__ to install Ubuntu
-* start Ubuntu (or open an Ubuntu tab in `Windows Terminal `__)
+* follow `these instructions `__ to install Ubuntu
+* start Ubuntu (or open an Ubuntu tab in `Windows Terminal `__)
* execute ``sudo apt update`` to prepare the installation of dependencies
* optional step: If you have a NVIDIA graphics card available and want to make
- use of |es|'s GPU acceleration, follow `these instructions `__
+ use of |es|'s GPU acceleration, follow `these instructions `__
to set up CUDA.
* follow the instructions for :ref:`Installing requirements on Ubuntu Linux`
@@ -409,10 +435,6 @@ General features
.. seealso:: :ref:`Electrostatics`
-- ``MMM1D_GPU``: This enables MMM1D on GPU. It is faster than the CPU version
- by several orders of magnitude, but has float precision instead of double
- precision.
-
- ``MMM1D_MACHINE_PREC``: This enables high-precision Bessel functions
for MMM1D on CPU. Comes with a 60% slow-down penalty. The low-precision
functions are enabled by default and are precise enough for most applications.
@@ -807,12 +829,13 @@ When an option is enabled, additional options may become available.
For example with ``-D ESPRESSO_BUILD_TESTS=ON``, one can specify
the CTest parameters with ``-D ESPRESSO_CTEST_ARGS=-j$(nproc)``.
-Environment variables can be passed to CMake. For example, to select Clang, use
-``CC=clang CXX=clang++ CUDACXX=clang++ cmake .. -D ESPRESSO_BUILD_WITH_CUDA=ON``.
-If you have multiple versions of the CUDA library installed, you can select the
-correct one with ``CUDA_BIN_PATH=/usr/local/cuda-11.5 cmake .. -D ESPRESSO_BUILD_WITH_CUDA=ON``
-(with Clang as the CUDA compiler, you also need to override its default CUDA
-path with ``-D CMAKE_CUDA_FLAGS=--cuda-path=/usr/local/cuda-11.5``).
+Environment variables can be passed to CMake. For example, to select the Clang
+compiler and specify which GPU architectures to generate device code for, use
+``CC=clang CXX=clang++ CUDACXX=clang++ CUDAARCHS="61;75" cmake .. -D ESPRESSO_BUILD_WITH_CUDA=ON``.
+When multiple versions of the CUDA library are available, the correct one can be
+selected with ``CUDA_BIN_PATH=/usr/local/cuda-12.0 cmake .. -D ESPRESSO_BUILD_WITH_CUDA=ON``
+(with Clang as the CUDA compiler, it is also necessary to override its default
+CUDA path with ``-D CMAKE_CUDA_FLAGS=--cuda-path=/usr/local/cuda-12.0``).
.. _Build types and compiler flags:
@@ -990,3 +1013,12 @@ ____
.. [5]
https://www.fftw.org/
+
+.. [6]
+ https://docs.nvidia.com/cuda/
+
+.. [7]
+ https://docs.python.org/3/library/venv.html
+
+.. [8]
+ https://conda.io/projects/conda/en/latest/user-guide/tasks/manage-environments.html
diff --git a/doc/sphinx/integration.rst b/doc/sphinx/integration.rst
index 2c82f184043..6802e59d086 100644
--- a/doc/sphinx/integration.rst
+++ b/doc/sphinx/integration.rst
@@ -9,7 +9,7 @@ Particle integration and propagation
------------------------------------
The main integration scheme of |es| is the velocity Verlet algorithm.
-A steepest descent algorithm is used to minimize the system.
+A steepest descent algorithm is used to minimize forces and torques in the system.
Additional integration schemes are available, which can be coupled to
thermostats to enable Langevin dynamics, Brownian dynamics, Stokesian dynamics,
@@ -21,109 +21,107 @@ Integrators
-----------
To run the integrator call the method
-:meth:`system.integrate.run() `::
+:meth:`system.integrator.run() `::
system.integrator.run(number_of_steps, recalc_forces=False, reuse_forces=False)
where ``number_of_steps`` is the number of time steps the integrator should perform.
-.. _Velocity Verlet Algorithm:
+The following sections detail the different integrators available.
+
+.. _Velocity Verlet algorithm:
Velocity Verlet algorithm
^^^^^^^^^^^^^^^^^^^^^^^^^
-:meth:`espressomd.integrate.IntegratorHandle.set_vv`
+The velocity Verlet integrator is active by default.
+If you used a different integrator and want to switch back, use
+:meth:`system.integrator.set_vv() `.
+
+The Velocity Verlet algorithm is used for equations of motion of the general form
-The equations of motion for the trajectory of point-like particles read
+.. math::
-.. math:: \dot v_i(t) = F_i(\{x_j\},v_i,t)/m_i \\ \dot x_i(t) = v_i(t),
+ \begin{aligned}
+ \dot{\vec{x}}_i(t) &= \vec{v}_i(t), \\
+ \dot{\vec{v}}_i(t) &= \frac{\vec{F}_i(\{ \vec{x}_j \} ,\vec{v}_i,t)}{m_i},
+ \end{aligned}
-where :math:`x_i`, :math:`v_i`, :math:`m_i` are position, velocity and mass of
-particle :math:`i` and :math:`F_i(\{x_j\},v_i,t)` the forces acting on it.
-These forces comprise all interactions with other particles and external fields
-as well as non-deterministic contributions described in :ref:`Thermostats`.
+where :math:`\vec{x}_i`, :math:`\vec{v}_i`, :math:`m_i` are position, velocity and mass of
+particle :math:`i` and :math:`\vec{F}_i(\{\vec{x}_j\},\vec{v}_i,t)` the forces acting on it.
+The force :math:`\vec{F}_i` comprises all interactions of particle :math:`i` with other particles :math:`j` and external fields
+as well as contributions from thermostats, see :ref:`Thermostats`.
-For numerical integration, this equation is discretized to the following steps (:cite:`rapaport04a` eqs. 3.5.8 - 3.5.10):
+For numerical integration, the equation of motion is discretized to the following steps (:cite:`rapaport04a` eqs. 3.5.8 - 3.5.10):
1. Calculate the velocity at the half step
- .. math:: v(t+dt/2) = v(t) + \frac{F(x(t),v(t-dt/2),t)}{m} dt/2
+ .. math:: \vec{v}(t+dt/2) = \vec{v}(t) + \frac{\vec{F}(\vec{x}(t),\vec{v}(t-dt/2),t)}{m} dt/2
2. Calculate the new position
- .. math:: x(t+dt) = x(t) + v(t+dt/2) dt
+ .. math:: \vec{x}(t+dt) = \vec{x}(t) + \vec{v}(t+dt/2) dt
3. Calculate the force based on the new position
- .. math:: F = F(x(t+dt), v(t+dt/2), t+dt)
+ .. math:: \vec{F} = \vec{F}(\vec{x}(t+dt), \vec{v}(t+dt/2), t+dt)
4. Calculate the new velocity
- .. math:: v(t+dt) = v(t+dt/2) + \frac{F(x(t+dt),t+dt)}{m} dt/2
+ .. math:: \vec{v}(t+dt) = \vec{v}(t+dt/2) + \frac{\vec{F}(\vec{x}(t+dt), \vec{v}(t+dt/2), t+dt)}{m} dt/2
+
+Here, for simplicity, we have omitted the particle index :math:`i`.
+Read, e.g., :math:`\vec{x}` as the position of all particles.
Note that this implementation of the velocity Verlet algorithm reuses
forces in step 1. That is, they are computed once in step 3,
-but used twice, in step 4 and in step 1 of the next iteration. In the first time
-step after setting up, there are no forces present yet. Therefore, |es| has
+but used twice, in step 4 and in step 1 of the next iteration.
+The first time the integrator is called, there are no forces present yet.
+Therefore, |es| has
to compute them before the first time step. That has two consequences:
-first, random forces are redrawn, resulting in a narrower distribution
-of the random forces, which we compensate by stretching. Second,
-coupling forces of e.g. the lattice-Boltzmann fluid cannot be computed
+first, if thermostats are active, random forces are computed twice during
+the first time step, resulting in a narrower distribution of the random forces.
+Second,
+coupling forces of, e.g., the lattice-Boltzmann fluid cannot be computed
and are therefore lacking in the first half time step. In order to
minimize these effects, |es| has a quite conservative heuristics to decide
-whether a change makes it necessary to recompute forces before the first
-time step. Therefore, calling 100 times
-:meth:`espressomd.integrate.Integrator.run` with ``steps=1`` does the
-same as with ``steps=100``, apart from some small calling overhead.
-
-However, for checkpointing, there is no way for |es| to tell that the forces
-that you read back in actually match the parameters that are set.
-Therefore, |es| would recompute the forces before the first time step, which
-makes it essentially impossible to checkpoint LB simulations, where it
-is vital to keep the coupling forces. To work around this, there is
-an additional parameter ``reuse_forces``, which tells integrate to not recalculate
-the forces for the first time step, but use that the values still stored
-with the particles. Use this only if you are absolutely sure that the
-forces stored match your current setup!
-
-The opposite problem occurs when timing interactions: In this case, one
-would like to recompute the forces, despite the fact that they are
-already correctly calculated. To this aim, the option ``recalc_forces`` can be used to
-enforce force recalculation.
+whether a change makes it necessary to recompute forces before the first time step.
+Therefore, calling
+:meth:`espressomd.integrate.Integrator.run` 100 times with ``steps=1`` is equivalent to calling it once with ``steps=100``.
+
+When resuming a simulation, you can either use the forces that are stored on the particles by using the additional parameter ``reuse_forces = True``, or recalculate the forces again from the current configuration ``reuse_forces = False``.
+Setting ``reuse_forces = True`` is useful when restarting a simulation from a checkpoint to obtain exactlty the same result as if the integration had continued without interruption.
+You can also use ``recalc_forces = True`` to recalculate forces even if they are already correctly computed.
.. _Isotropic NpT integrator:
Isotropic NpT integrator
^^^^^^^^^^^^^^^^^^^^^^^^
-:meth:`espressomd.integrate.IntegratorHandle.set_isotropic_npt`
+Simuations in the NpT ensemble are performed with the isotropic NpT integrator :meth:`~espressomd.integrate.IntegratorHandle.set_isotropic_npt`.
+A code snippet would look like::
-As the NpT thermostat alters the way the equations of motion are integrated, it is
-discussed here and only a brief summary is given in :ref:`Thermostats`.
+ import espressomd
-To activate the NpT integrator, use :meth:`~espressomd.integrate.IntegratorHandle.set_isotropic_npt`
-with parameters:
+ system = espressomd.System(box_l=[1, 1, 1])
+ system.thermostat.set_npt(kT=1.0, gamma0=1.0, gammav=1.0, seed=42)
+ system.integrator.set_isotropic_npt(ext_pressure=1.0, piston=1.0)
+
+The parameters of the integrator are
* ``ext_pressure``: The external pressure
* ``piston``: The mass of the applied piston
* ``direction``: Flags to enable/disable box dimensions to be subject to fluctuations. By default, all directions are enabled.
-Additionally, a NpT thermostat has to be set by :meth:`~espressomd.thermostat.Thermostat.set_npt()`
+Additionally, an NpT thermostat has to be set by :meth:`~espressomd.thermostat.Thermostat.set_npt()`
with parameters:
* ``kT``: Thermal energy of the heat bath
* ``gamma0``: Friction coefficient of the bath
* ``gammav``: Artificial friction coefficient for the volume fluctuations.
-A code snippet would look like::
-
- import espressomd
-
- system = espressomd.System(box_l=[1, 1, 1])
- system.thermostat.set_npt(kT=1.0, gamma0=1.0, gammav=1.0, seed=42)
- system.integrator.set_isotropic_npt(ext_pressure=1.0, piston=1.0)
-
-The physical meaning of these parameters is described below:
+The physical meaning of these parameters and the equations of motion are described below.
+We recommend reading :ref:`Langevin thermostat` before continuing.
The relaxation towards a desired pressure :math:`P` (parameter ``ext_pressure``)
is enabled by treating the box
@@ -138,12 +136,12 @@ associated with the volume is postulated. This results in a "force" on the box s
where
-.. math:: \mathcal{P} = \frac{1}{Vd} \sum_{i,j} f_{ij}x_{ij} + \frac{1}{Vd} \sum_i m_i v_i^2
+.. math:: \mathcal{P} = \frac{1}{Vd} \sum_{i,j} \vec{f}_{ij}\vec{x}_{ij} + \frac{1}{Vd} \sum_i m_i v_i^2 ,
-Here :math:`\mathcal{P}` is the instantaneous pressure, :math:`d` the dimension
-of the system (number of flags set by ``direction``), :math:`f_{ij}` the
+is the instantaneous pressure, with :math:`d` the dimension
+of the system (number of flags set by ``direction``), :math:`\vec{f}_{ij}` the
short range interaction force between particles :math:`i` and :math:`j` and
-:math:`x_{ij}= x_j - x_i`.
+:math:`\vec{x}_{ij}= \vec{x}_j - \vec{x}_i`.
In addition to this deterministic force, a friction :math:`-\frac{\gamma^V}{Q}\Pi(t)`
and noise :math:`\sqrt{k_B T \gamma^V} \eta(t)` are added for the box
@@ -160,12 +158,12 @@ The discretisation consists of the following steps (see :cite:`kolb99a` for a fu
1. Calculate the particle velocities at the half step
- .. math:: v'(t+dt/2) = v(t) + \frac{F(x(t),v(t-dt/2),t)}{m} dt/2
+ .. math:: \vec{v}'(t+dt/2) = \vec{v}(t) + \frac{\vec{F}(\vec{x}(t),\vec{v}(t-dt/2),t)}{m} dt/2
2. Calculate the instantaneous pressure and "volume momentum"
- .. math:: \mathcal{P} = \mathcal{P}(x(t),V(t),f(x(t)), v'(t+dt/2))
- .. math:: \Pi(t+dt/2) = \Pi(t) + (\mathcal{P}-P) dt/2 -\frac{\gamma^V}{Q}\Pi(t) dt/2 + \sqrt{k_B T \gamma^V dt} \overline{\eta}
+ .. math:: \mathcal{P} = \mathcal{P}(\vec{x}(t),V(t),\vec{f}(\vec{x}(t)), \vec{v}'(t+dt/2))
+ .. math:: \Pi(t+dt/2) = \Pi(t) + (\mathcal{P}-P) dt/2 -\frac{\gamma^V}{Q}\Pi(t) dt/2 + \sqrt{k_B T \gamma^V dt} {\eta_*}
3. Calculate box volume and scaling parameter :math:`L` at half step and full step, scale the simulation box accordingly
@@ -176,27 +174,27 @@ The discretisation consists of the following steps (see :cite:`kolb99a` for a fu
4. Update particle positions and scale velocities
- .. math:: x(t+dt) = \frac{L(t+dt)}{L(t)} \left[ x(t) + \frac{L^2(t)}{L^2(t+dt/2)} v(t+dt/2) dt \right]
- .. math:: v(t+dt/2) = \frac{L(t)}{L(t+dt)} v'(t+dt/2)
+ .. math:: \vec{x}(t+dt) = \frac{L(t+dt)}{L(t)} \left[ \vec{x}(t) + \frac{L^2(t)}{L^2(t+dt/2)} \vec{v}(t+dt/2) dt \right]
+ .. math:: \vec{v}(t+dt/2) = \frac{L(t)}{L(t+dt)} \vec{v}'(t+dt/2)
5. Calculate forces, instantaneous pressure and "volume momentum"
- .. math:: F = F(x(t+dt),v(t+dt/2),t)
- .. math:: \mathcal{P} = \mathcal{P}(x(t+dt),V(t+dt),f(x(t+dt)), v(t+dt/2))
- .. math:: \Pi(t+dt) = \Pi(t+dt/2) + (\mathcal{P}-P) dt/2 -\frac{\gamma^V}{Q}\Pi(t+dt/2) dt/2 + \sqrt{k_B T \gamma^V dt} \overline{\eta}
+ .. math:: \vec{F} = \vec{F}(\vec{x}(t+dt),\vec{v}(t+dt/2),t)
+ .. math:: \mathcal{P} = \mathcal{P}(\vec{x}(t+dt),V(t+dt),\vec{f}(\vec{x}(t+dt)), \vec{v}(t+dt/2))
+ .. math:: \Pi(t+dt) = \Pi(t+dt/2) + (\mathcal{P}-P) dt/2 -\frac{\gamma^V}{Q}\Pi(t+dt/2) dt/2 + \sqrt{k_B T \gamma^V dt} {\eta_*}
- with uncorrelated numbers :math:`\overline{\eta}` drawn from a random uniform process :math:`\eta(t)`
+ with uncorrelated numbers :math:`{\eta_*}` drawn from a random uniform process.
6. Update the velocities
- .. math:: v(t+dt) = v(t+dt/2) + \frac{F(t+dt)}{m} dt/2
+ .. math:: \vec{v}(t+dt) = \vec{v}(t+dt/2) + \frac{\vec{F}(t+dt)}{m} dt/2
Notes:
-* The NpT algorithm is only tested for all 3 directions enabled for scaling. Usage of ``direction`` is considered an experimental feature.
+* The NpT algorithm is only tested for ``direction = 3 * [True]``. Usage of other ``direction`` is considered an experimental feature.
* In step 4, only those coordinates are scaled for which ``direction`` is set.
* For the instantaneous pressure, the same limitations of applicability hold as described in :ref:`Pressure`.
-* The particle forces :math:`F` include interactions as well as a friction (:math:`\gamma^0`) and noise term (:math:`\sqrt{k_B T \gamma^0 dt} \overline{\eta}`) analogous to the terms in the :ref:`Langevin thermostat`.
+* The particle forces :math:`\vec{F}` include interactions as well as a friction (:math:`\gamma^0`) and noise term (:math:`\sqrt{k_B T \gamma^0 dt} {\eta_*}`) analogous to the terms in the :ref:`Langevin thermostat`.
* The particle forces are only calculated in step 5 and then reused in step 1 of the next iteration. See :ref:`Velocity Verlet Algorithm` for the implications of that.
* The NpT algorithm doesn't support :ref:`Lees-Edwards boundary conditions`.
* The NpT algorithm doesn't support propagation of angular velocities.
@@ -205,36 +203,40 @@ Notes:
Steepest descent
^^^^^^^^^^^^^^^^
+To activate steepest descent, use :meth:`espressomd.integrate.IntegratorHandle.set_steepest_descent`.
+A code snippet could look like::
-:meth:`espressomd.integrate.IntegratorHandle.set_steepest_descent`
+ max_steps = 20 # maximal number of steps
+ system.integrator.set_steepest_descent(
+ f_max=0, gamma=0.1, max_displacement=0.1)
+ system.integrator.run(max_steps)
+ system.integrator.set_vv() # to switch back to velocity Verlet
+
+The 'equation of motion' in discretised form reads
+
+.. math:: \vec{x}(t + \Delta t) = \vec{x}(t) + \min\left(|\gamma\vec{F}(t)\Delta t|, \vec{r}_{\text{max}}\right) \cdot \vec{F}(t)/|\vec{F}(t)|
+
+with :math:`\vec{r}_{\text{max}}` the maximal displacement, :math:`\gamma`
+the friction coefficient, :math:`\vec{x}` the particle position,
+:math:`\vec{F}` the force on the particle, and :math:`\Delta t` the time step.
This feature is used to propagate each particle by a small distance parallel to the force acting on it.
When only conservative forces for which a potential exists are in use, this is equivalent to a steepest descent energy minimization.
A common application is removing overlap between randomly placed particles.
-
Please note that the behavior is undefined if a thermostat is activated,
-in which case the integrator will generate an error. The integrator runs
-the following steepest descent algorithm:
+in which case the integrator will generate an error.
-.. math:: \vec{r}_{i+1} = \vec{r}_i + \min(\gamma \vec{F}_i, \vec{r}_{\text{max_displacement}}),
-
-while the maximal force/torque is bigger than ``f_max`` or for at most ``steps`` times. The energy
+Steepest descent is applied
+while the maximal force/torque is bigger than ``f_max``, or for at most ``max_steps`` times. The energy
is relaxed by ``gamma``, while the change per coordinate per step is limited to ``max_displacement``.
The combination of ``gamma`` and ``max_displacement`` can be used to get a poor man's adaptive update.
Rotational degrees of freedom are treated similarly: each particle is
rotated around an axis parallel to the torque acting on the particle,
-with ``max_displacement`` interpreted as the maximal rotation angle.
+with ``max_displacement`` interpreted as the maximal rotation angle in radians.
Please be aware of the fact that this needs not to converge to a local
minimum in periodic boundary conditions. Translational and rotational
coordinates that are fixed using the ``fix`` and ``rotation`` attribute of particles are not altered.
-Usage example::
-
- system.integrator.set_steepest_descent(
- f_max=0, gamma=0.1, max_displacement=0.1)
- system.integrator.run(20) # maximal number of steps
- system.integrator.set_vv() # to switch back to velocity Verlet
-
.. _Using a custom convergence criterion:
Using a custom convergence criterion
@@ -244,23 +246,23 @@ The ``f_max`` parameter can be set to zero to prevent the integrator from
halting when a specific force/torque is reached. The integration can then
be carried out in a loop with a custom convergence criterion::
- min_sigma = 1 # size of the smallest particle
- max_sigma = 5 # size of the largest particle
- min_dist = 0.0
+ min_dist_target = 1 # minimum distance that all particles should have
+
system.integrator.set_steepest_descent(f_max=0, gamma=10,
- max_displacement=min_sigma * 0.01)
- # gradient descent until particles are separated by at least max_sigma
- while min_dist < max_sigma:
+ max_displacement= 0.01)
+ # gradient descent until particles are separated by at least min_dist_target
+ min_dist = 0.0
+ while min_dist < min_dist_target:
min_dist = system.analysis.min_dist()
system.integrator.run(10)
system.integrator.set_vv()
When writing a custom convergence criterion based on forces or torques, keep
in mind that particles whose motion and rotation are fixed in space along
-some or all axes with ``fix`` or ``rotation`` need to be filtered from the
-force/torque observable used in the custom convergence criterion. Since these
-two properties can be cast to boolean values, they can be used as masks to
-remove forces/torques that are ignored by the integrator::
+some or all axes with ``fix`` or ``rotation`` still experience forces and torques.
+Therefore, they need to be filtered from the
+force/torque observable used in the custom convergence criterion. A code snippet
+that achieves this filtering could look like::
particles = system.part.all()
max_force = np.max(np.linalg.norm(particles.f * np.logical_not(particles.fix), axis=1))
@@ -313,8 +315,44 @@ mesh surface deformation.
Brownian Dynamics
^^^^^^^^^^^^^^^^^
-Brownian Dynamics integrator :cite:`schlick10a`.
-See details in :ref:`Brownian thermostat`.
+To activate Brownian dynamics, use :meth:`espressomd.integrate.IntegratorHandle.set_brownian_dynamics`.
+A code snippet would look like::
+
+ import espressomd
+ system = espressomd.System(box_l=[1, 1, 1])
+ system.thermostat.set_brownian(kT=1.0, gamma=1.0, seed=41)
+ system.integrator.set_brownian_dynamics()
+
+In addition to the integrator, the corresponding thermostat has to be set.
+The thermostat holds the parameters used in the Brownian equation of motion.
+
+The particle trajectories are governed by
+
+.. math:: \dot{\vec{x}}_i(t) = \gamma^{-1} \vec{F}_i(\{\vec{x}_j\}, \{\vec{v}_j\}, t) + \sqrt{2 k_B T \gamma^{-1}} \vec{\eta}_i(t),
+
+where :math:`\vec{F}_i` are all deterministic forces from interactions and :math:`\vec{\eta}_i`
+are random forces with zero mean and unit variance.
+This equation of motion follows from Langevin's equation of motion (see :ref:`Langevin thermostat`)
+by setting the mass of the particle to zero.
+
+|es|'s discretisation is based on :cite:`schlick10a`, :cite:`ermak78a`
+and reads
+
+.. math:: \vec{x}(t+ dt) = \gamma^{-1} \vec{F}(\vec{x}(t), \vec{v}(t), t) dt + \sqrt{2 k_B T \gamma^{-1} dt} \vec{\eta}_*(t)
+
+where :math:`\vec{\eta_*}` are pseudo-random numbers with zero mean and unit variance (particle indices are omitted for clarity).
+Velocities are obtained directly from
+
+.. math:: \vec{v}(t) = \gamma^{-1} \vec{F} + \sqrt{2 k_B T \gamma^{-1} dt^{-1}} \vec{\eta}_{*}(t)
+
+Be aware that the velocity contains random terms and is therefore not continuous in time.
+
+Rotational motion is implemented analogously.
+Note: the rotational Brownian dynamics implementation is only compatible with particles which have
+the isotropic moment of inertia tensor.
+Otherwise, the viscous terminal angular velocity
+is not defined, i.e., it has no constant direction.
+
.. _Stokesian Dynamics:
@@ -368,10 +406,7 @@ Note that this setup represents a system at zero temperature. In order to
thermalize the system, the SD thermostat needs to be activated (see
:ref:`Stokesian thermostat`).
-.. _Important_SD:
-
-Important
-"""""""""
+**Note:**
The particles must be prevented from overlapping. It is mathematically allowed
for the particles to overlap to a certain degree. However, once the distance
@@ -394,7 +429,7 @@ sphere diameters.
Thermostats
-----------
-To add a thermostat, call the appropriate setter::
+To add a thermostat, call the appropriate setter, e.g., ::
system.thermostat.set_langevin(kT=1.0, gamma=1.0, seed=41)
@@ -403,18 +438,17 @@ subsections.
You may combine different thermostats by turning them on sequentially.
Not all combinations of thermostats are sensible, though, and some
-integrators only work with a specific thermostat. The list of possible
-combinations of integrators and thermostats is hardcoded and automatically
-check against at the start of integration.
+thermostats only work with specific integrators.
+The list of possible combinations of integrators and thermostats is hardcoded and automatically
+checked against at the start of integration.
Note that there is only one temperature for all thermostats.
The list of active thermostats can be cleared at any time with
:py:meth:`system.thermostat.turn_off() `.
Since |es| does not enforce a particular unit system, it cannot know about
-the current value of the Boltzmann constant. Therefore, when specifying
-the temperature of a thermostat, you actually do not define the
-temperature, but the value of the thermal energy :math:`k_B T` in the
-current unit system (see the discussion on units, Section :ref:`On units`).
+the current value of the Boltzmann constant. Therefore, instead of specifying
+the temperature, you have to provide a value for the thermal energy :math:`k_B T` in the
+current unit system (see the discussion on units, Section (:ref:`On units`)).
All thermostats have a ``seed`` argument that controls the state of the random
number generator (Philox Counter-based RNG). This seed is required on first
@@ -437,46 +471,51 @@ Best explained in an example::
system = espressomd.System(box_l=[1, 1, 1])
system.thermostat.set_langevin(kT=1.0, gamma=1.0, seed=41)
-As explained before the temperature is set as thermal energy :math:`k_\mathrm{B} T`.
+The temperature is set as thermal energy :math:`k_\mathrm{B} T`.
The Langevin thermostat is based on an extension of Newton's equation of motion to
account for drag and collisions with a fluid:
-.. math:: m_i \dot{v}_i(t) = f_i(\{x_j\},v_i,t) - \gamma v_i(t) + \sqrt{2\gamma k_B T} \eta_i(t).
+.. math:: m_i \dot{\vec{v}}_i(t) = \vec{f}_i(\{\vec{x}_j\}, \, \vec{v}_i,t) - \gamma \vec{v}_i(t) + \sqrt{2\gamma k_B T} \vec{\eta}_i(t).
-Here, :math:`f_i` are all deterministic forces from interactions,
-:math:`\gamma` the bare friction coefficient and :math:`\eta` a random, "thermal" force.
+Here, :math:`\vec{f}_i` are all deterministic forces from interactions,
+:math:`\gamma` the friction coefficient and :math:`\vec{\eta}` a random, "thermal" force.
The friction term accounts for dissipation in a surrounding fluid whereas
the random force mimics collisions of the particle with solvent molecules
at temperature :math:`T` and satisfies
-.. math:: <\eta(t)> = 0 , <\eta^\alpha_i(t)\eta^\beta_j(t')> = \delta_{\alpha\beta} \delta_{ij}\delta(t-t')
+.. math:: <\vec{\eta}(t)> = \vec{0} , <\eta^\alpha_i(t)\eta^\beta_j(t')> = \delta_{\alpha\beta} \delta_{ij}\delta(t-t')
(:math:`<\cdot>` denotes the ensemble average and :math:`\alpha,\beta` are spatial coordinates).
In the |es| implementation of the Langevin thermostat,
the additional terms only enter in the force calculation.
-This reduces the accuracy of the velocity Verlet integrator
-by one order in :math:`dt` because forces are now velocity-dependent.
+The general form of the equation of motion is still the same as
+for Newton's equations, therefore the velocity Verlet integrator is
+used.
+The accuracy of the velocity Verlet integrator is reduced by
+one order in :math:`dt` because forces are now velocity-dependent.
+
+The random process :math:`\vec{\eta}(t)` is discretized by drawing an uncorrelated random numbers
+:math:`\vec{\eta_*}` for each particle.
+The distribution of :math:`{\vec{\eta}_*}` is uniform and satisfies
-The random process :math:`\eta(t)` is discretized by drawing an uncorrelated random number
-:math:`\overline{\eta}` for each component of all the particle forces.
-The distribution of :math:`\overline{\eta}` is uniform and satisfies
+.. math:: <\vec{\eta}_*> = \vec{0} ,\, <\eta_*^\alpha \eta_*^\beta> = \frac{\delta_{\alpha,\beta}}{dt},
-.. math:: <\overline{\eta}> = 0 , <\overline{\eta}\overline{\eta}> = 1/dt
+approximating the delta-correlation of the continuous equation.
If the feature ``ROTATION`` is compiled in, the rotational degrees of freedom are
also coupled to the thermostat. If only the first two arguments are
specified then the friction coefficient for the rotation is set to the
same value as that for the translation.
A separate rotational friction coefficient can be set by inputting
-``gamma_rotate``. The two options allow one to switch the translational and rotational
+``gamma_rotation``. The two options allow one to switch the translational and rotational
thermalization on or off separately, maintaining the frictional behavior. This
can be useful, for instance, in high Péclet number active matter systems, where
one wants to thermalize only the rotational degrees of freedom while
translational degrees of freedom are affected by the self-propulsion.
-The keywords ``gamma`` and ``gamma_rotate`` can be specified as a scalar,
+The keywords ``gamma`` and ``gamma_rotation`` can be specified as a scalar,
or, with feature ``PARTICLE_ANISOTROPY`` compiled in, as the three eigenvalues
of the respective friction coefficient tensor. This is enables the simulation of
the anisotropic diffusion of anisotropic colloids (rods, etc.).
@@ -491,74 +530,11 @@ friction coefficient for every particle individually via the feature
Brownian thermostat
^^^^^^^^^^^^^^^^^^^
-Brownian thermostat is a formal name of a thermostat enabling the
-Brownian Dynamics feature (see :cite:`schlick10a`) which implies
-a propagation scheme involving systematic and thermal parts of the
-classical Ermak-McCammom's (see :cite:`ermak78a`)
-Brownian Dynamics. Currently it is implemented without
-hydrodynamic interactions, i.e.
-with a diagonal diffusion tensor.
-The hydrodynamic interactions feature will be available later
-as a part of the present Brownian Dynamics or
-implemented separately within the Stokesian Dynamics.
-
In order to activate the Brownian thermostat, the member function
:py:attr:`~espressomd.thermostat.Thermostat.set_brownian` of the thermostat
class :class:`espressomd.thermostat.Thermostat` has to be invoked.
-The system integrator should be also changed.
-Best explained in an example::
-
- import espressomd
- system = espressomd.System(box_l=[1, 1, 1])
- system.thermostat.set_brownian(kT=1.0, gamma=1.0, seed=41)
- system.integrator.set_brownian_dynamics()
-
-where ``gamma`` (hereinafter :math:`\gamma`) is a viscous friction coefficient.
-In terms of the Python interface and setup, the Brownian thermostat is very
-similar to the :ref:`Langevin thermostat`. The feature
-``THERMOSTAT_PER_PARTICLE`` is used to control the per-particle
-temperature and the friction coefficient setup. The major differences are
-its internal integrator implementation and other temporal constraints.
-The integrator is still a symplectic velocity Verlet-like one.
-It is implemented via a viscous drag part and a random walk of both the position and
-velocity. Due to a nature of the Brownian Dynamics method, its time step :math:`\Delta t`
-should be large enough compared to the relaxation time
-:math:`m/\gamma` where :math:`m` is the particle mass.
-This requirement is just a conceptual one
-without specific implementation technical restrictions.
-Note that with all similarities of
-Langevin and Brownian Dynamics, the Langevin thermostat temporal constraint
-is opposite. A velocity is restarting from zero at every step.
-Formally, the previous step velocity at the beginning of the the :math:`\Delta t` interval
-is dissipated further
-and does not contribute to the end one as well as to the positional random walk.
-Another temporal constraint
-which is valid for both Langevin and Brownian Dynamics: conservative forces
-should not change significantly over the :math:`\Delta t` interval.
-
-The viscous terminal velocity :math:`\Delta v` and corresponding positional
-step :math:`\Delta r` are fully driven by conservative forces :math:`F`:
-
-.. math:: \Delta r = \frac{F \cdot \Delta t}{\gamma}
-
-.. math:: \Delta v = \frac{F}{\gamma}
-
-A positional random walk variance of each coordinate :math:`\sigma_p^2`
-corresponds to a diffusion within the Wiener process:
-
-.. math:: \sigma_p^2 = 2 \frac{kT}{\gamma} \cdot \Delta t
-
-Each velocity component random walk variance :math:`\sigma_v^2` is defined by the heat
-component:
-
-.. math:: \sigma_v^2 = \frac{kT}{m}
-
-Note: the velocity random walk is propagated from zero at each step.
-
-A rotational motion is implemented similarly.
-Note: the rotational Brownian dynamics implementation is compatible with particles which have
-the isotropic moment of inertia tensor only. Otherwise, the viscous terminal angular velocity
-is not defined, i.e. it has no constant direction over the time.
+The system integrator must be also changed.
+For details, see :ref:`Brownian Dynamics`.
.. _Isotropic NpT thermostat:
@@ -569,16 +545,7 @@ This feature allows to simulate an (on average) homogeneous and isotropic system
In order to use this feature, ``NPT`` has to be defined in the :file:`myconfig.hpp`.
Activate the NpT thermostat with the command :py:meth:`~espressomd.thermostat.Thermostat.set_npt`
and setup the integrator for the NpT ensemble with :py:meth:`~espressomd.integrate.IntegratorHandle.set_isotropic_npt`.
-
-For example::
-
- import espressomd
-
- system = espressomd.System(box_l=[1, 1, 1])
- system.thermostat.set_npt(kT=1.0, gamma0=1.0, gammav=1.0, seed=41)
- system.integrator.set_isotropic_npt(ext_pressure=1.0, piston=1.0)
-
-For an explanation of the algorithm involved, see :ref:`Isotropic NpT integrator`.
+For details, see :ref:`Isotropic NpT integrator`.
Be aware that this feature is neither properly examined for all systems
nor is it maintained regularly. If you use it and notice strange
@@ -595,7 +562,7 @@ are not applied to every particle individually but instead
encoded in a dissipative interaction between particles :cite:`soddemann03a`.
To realize a complete DPD fluid model in |es|, three parts are needed:
-the DPD thermostat, which controls the temperate, a dissipative interaction
+the DPD thermostat, which controls the temperature, a dissipative interaction
between the particles that make up the fluid, see :ref:`DPD interaction`,
and a repulsive conservative force, see :ref:`Hat interaction`.
@@ -608,9 +575,7 @@ The friction coefficients and cutoff are controlled via the
The friction (dissipative) and noise (random) term are coupled via the
fluctuation-dissipation theorem. The friction term is a function of the
-relative velocity of particle pairs. The DPD thermostat is better for
-dynamics than the Langevin thermostat, since it mimics hydrodynamics in
-the system.
+relative velocity of particle pairs. In addition to the physics covered by the Langevin thermostat, the DPD thermostat mimics hydrodynamics in the system.
As a conservative force any interaction potential can be used,
see :ref:`Isotropic non-bonded interactions`. A common choice is
@@ -638,17 +603,12 @@ Lattice-Boltzmann thermostat
The :ref:`Lattice-Boltzmann` thermostat acts similar to the :ref:`Langevin thermostat` in that the governing equation for particles is
-.. math:: m_i \dot{v}_i(t) = f_i(\{x_j\},v_i,t) - \gamma (v_i(t)-u(x_i(t),t)) + \sqrt{2\gamma k_B T} \eta_i(t).
-
-where :math:`u(x,t)` is the fluid velocity at position :math:`x` and time :math:`t`.
-To preserve momentum, an equal and opposite friction force and random force act on the fluid.
+.. math:: m_i \dot{\vec{v}}_i(t) = \vec{f}_i(\{\vec{x}_j\},\vec{v}_i,t) - \gamma (\vec{v}_i(t)-\vec{u}(\vec{x}_i(t),t)) + \sqrt{2\gamma k_B T} \vec{\eta}_i(t).
-Numerically the fluid velocity is determined from the lattice-Boltzmann node velocities
-by interpolating as described in :ref:`Interpolating velocities`.
-The backcoupling of friction forces and noise to the fluid is also done by distributing those forces amongst the nearest LB nodes.
-Details for both the interpolation and the force distribution can be found in :cite:`ahlrichs99a` and :cite:`dunweg09a`.
+where :math:`\vec{u}(\vec{x},t)` is the fluid velocity at position :math:`\vec{x}` and time :math:`t`.
+Different from the Langevin thermostat, here, the friction is calculated with respect to a moving fluid.
-The LB fluid can be used to thermalize particles, while also including their hydrodynamic interactions.
+An LB fluid must be used to provide the fluid velocity, while also including hydrodynamic interactions between particles.
The LB thermostat expects an instance of either :class:`espressomd.lb.LBFluidWalberla` or :class:`espressomd.lb.LBFluidWalberlaGPU`.
Temperature is set via the ``kT`` argument of the LB fluid.
@@ -657,21 +617,33 @@ parameter ``gamma``. To enable the LB thermostat, use::
import espressomd
import espressomd.lb
- system = espressomd.System(box_l=[1, 1, 1])
- lbf = espressomd.lb.LBFluidWalberla(agrid=1, density=1, kinematic_viscosity=1, tau=0.01)
- self.system.lb = lbf
+ system = espressomd.System(box_l=[8., 8., 8.])
+ system.time_step = 0.01
+ system.cell_system.skin = 0.4
+ lbf = espressomd.lb.LBFluidWalberla(agrid=1., tau=0.01, density=1.,
+ kinematic_viscosity=1.)
+ system.lb = lbf
system.thermostat.set_lb(LB_fluid=lbf, seed=123, gamma=1.5)
+ system.part.add(pos=[0., 0., 0.], ext_force=[0., 0., 1.])
+ system.integrator.run(10)
-No other thermostatting mechanism is necessary
-then. Please switch off any other thermostat before starting the LB
-thermostatting mechanism.
+Numerically the fluid velocity is determined from the lattice-Boltzmann node velocities
+by interpolating as described in :ref:`Interpolating velocities`.
+To preserve momentum, friction and random forces are also applied to the fluid, with equal magnitude and opposite sign.
+This backcoupling of forces on the fluid is done by distributing the forces amongst the nearest LB nodes.
+Details for both the interpolation and the force distribution can be found in :cite:`ahlrichs99a` and :cite:`dunweg09a`.
The LBM implementation provides a fully thermalized LB fluid, all
nonconserved modes, including the pressure tensor, fluctuate correctly
according to the given temperature and the relaxation parameters. All
-fluctuations can be switched off by setting the temperature to 0.
+fluctuations can be switched off by setting the temperature to zero.
+The deterministic part of the hydrodynamic interaction is then still active.
+
+If the LB thermostat is active, no other thermostatting mechanism is necessary.
+Please switch off any other thermostat before starting the LB
+thermostatting mechanism.
-.. note:: Coupling between LB and MD only happens if the LB thermostat is set with a :math:`\gamma \ge 0.0`.
+.. note:: Coupling between LB and MD only happens if the LB thermostat is set with a :math:`\gamma > 0.0`.
.. _Stokesian thermostat:
@@ -697,4 +669,4 @@ needs to be activated via::
system.integrator.run(100)
where ``kT`` denotes the desired temperature of the system, and ``seed`` the
-seed for the random number generator.
+seed for the random number generator. For details, see :ref:`Stokesian Dynamics`.
diff --git a/doc/sphinx/introduction.rst b/doc/sphinx/introduction.rst
index 7ec21ba3f93..ecb50e180f1 100644
--- a/doc/sphinx/introduction.rst
+++ b/doc/sphinx/introduction.rst
@@ -485,8 +485,6 @@ report so to the developers using the instructions in :ref:`Contributing`.
+--------------------------------+------------------------+------------------+------------+
| MMM1D | Single | Good | No |
+--------------------------------+------------------------+------------------+------------+
-| MMM1D on GPU | Single | Single | No |
-+--------------------------------+------------------------+------------------+------------+
| ELC | Good | Good | Yes |
+--------------------------------+------------------------+------------------+------------+
| ICC* | Group | Group | Yes |
@@ -644,7 +642,7 @@ You may also provide the patch level, when relevant. If you developed code
for |es| and made it available in a publicly accessible repository, you
should consider providing the corresponding URL, for example in a footnote:
- The method was implemented for ESPResSo 4.2.1[24] and the source code is
+ The method was implemented for ESPResSo 4.2.2[24] and the source code is
available online\ :superscript:`note 1`.
| ____________
diff --git a/doc/sphinx/io.rst b/doc/sphinx/io.rst
index a8f0a8b7435..1e8a0fbb11a 100644
--- a/doc/sphinx/io.rst
+++ b/doc/sphinx/io.rst
@@ -186,8 +186,7 @@ Be aware of the following limitations:
several electrostatic and magnetostatic solvers automatically introduce
a deviation of the order of 1e-7, either due to floating-point rounding
errors (:class:`~espressomd.electrostatics.P3MGPU`), or due to re-tuning
- using the most recent system state (:class:`~espressomd.electrostatics.MMM1D`,
- :class:`~espressomd.electrostatics.MMM1DGPU`).
+ using the most recent system state (:class:`~espressomd.electrostatics.MMM1D`).
When in doubt, you can easily verify the absence of a "force jump" when
loading from a checkpoint by replacing the electrostatics actor with your
combination of features in files :file:`samples/save_checkpoint.py` and
diff --git a/doc/sphinx/lb.rst b/doc/sphinx/lb.rst
index 7ceecf4e1a1..9f1f0189699 100644
--- a/doc/sphinx/lb.rst
+++ b/doc/sphinx/lb.rst
@@ -387,6 +387,16 @@ of the LBM in analogy to the example for the CPU given in section
system.lb = lbf
system.integrator.run(100)
+The waLBerla library supports multi-GPU simulations.
+Without a suitable CUDA-aware MPI library, multi-GPU simulations are slower
+than single-GPU simulations, and would only be relevant for LB systems that
+are too large to fit in the memory of a single GPU device.
+Multi-GPU support in |es| is an experimental feature whose API may change at any time.
+It can be activated by invoking the following expression before the creation
+of the first LB GPU instance::
+
+ system.cuda_init_handle.call_method("set_device_id_per_rank")
+
.. _Electrohydrodynamics:
Electrohydrodynamics
diff --git a/doc/sphinx/magnetostatics.rst b/doc/sphinx/magnetostatics.rst
index c8b990c8798..e2b3e923988 100644
--- a/doc/sphinx/magnetostatics.rst
+++ b/doc/sphinx/magnetostatics.rst
@@ -168,31 +168,6 @@ via an observable.
Both the CPU and GPU implementations support MPI-parallelization.
-.. _Barnes-Hut octree sum on GPU:
-
-Barnes-Hut octree sum on GPU
-----------------------------
-
-:class:`espressomd.magnetostatics.DipolarBarnesHutGpu`
-
-This interaction calculates energies and forces between dipoles by
-summing over the spatial octree cells (aka ``leaves``).
-Far enough cells are considered as a single dipole with a cumulative
-vector in the cell center of mass. Parameters which determine that the
-cell is far enough are :math:`I_{\mathrm{tol}}^2` and
-:math:`\varepsilon^2` which define a fraction of the cell and
-an additive distance respectively. For the detailed description of the
-Barnes-Hut method application to the dipole-dipole interactions, please
-refer to :cite:`polyakov13a`.
-
-To use the method, create an instance of :class:`~espressomd.magnetostatics.DipolarBarnesHutGpu`
-and attach it to the system::
-
- import espressomd.magnetostatics
- bh = espressomd.magnetostatics.DipolarBarnesHutGpu(prefactor=1., epssq=200.0, itolsq=8.0)
- system.magnetostatics.solver = bh
-
-
.. _ScaFaCoS magnetostatics:
ScaFaCoS magnetostatics
diff --git a/doc/sphinx/particles.rst b/doc/sphinx/particles.rst
index 0106aa354c6..a6fafbe3f01 100644
--- a/doc/sphinx/particles.rst
+++ b/doc/sphinx/particles.rst
@@ -289,7 +289,6 @@ and :attr:`~espressomd.propagation.Propagation.ROT_VS_RELATIVE`.
particles you create::
import espressomd
-
system = espressomd.System(box_l=[10., 10., 10.])
p1 = system.part.add(pos=[1., 2., 3.])
@@ -301,7 +300,9 @@ and :attr:`~espressomd.propagation.Propagation.ROT_VS_RELATIVE`.
p2.vs_auto_relate_to(p1)
The :meth:`~espressomd.particle_data.ParticleHandle.is_virtual`
- method on particle ``p2`` will now return ``True``.
+ method of particle ``p2`` will now return ``True``, and its
+ :attr:`~espressomd.particle_data.ParticleHandle.propagation`
+ attribute will return the correct combination of flags.
#. Repeat the previous step with more virtual sites, if desired.
@@ -317,7 +318,8 @@ Please note:
virtual site in the non-virtual particles body-fixed frame. This
information is saved in the virtual site's
:attr:`~espressomd.particle_data.ParticleHandle.vs_relative` attribute.
- Take care, not to overwrite it after using ``vs_auto_relate``.
+ Take care, not to overwrite it after using
+ :meth:`~espressomd.particle_data.ParticleHandle.vs_auto_relate_to`.
- Virtual sites can not be placed relative to other virtual sites, as
the order in which the positions of virtual sites are updated is not
@@ -326,7 +328,7 @@ Please note:
- In case you know the correct quaternions, you can also setup a virtual
site using its :attr:`~espressomd.particle_data.ParticleHandle.vs_relative`
- and :attr:`~espressomd.particle_data.ParticleHandle.virtual` attributes.
+ and :attr:`~espressomd.particle_data.ParticleHandle.propagation` attributes.
- In a simulation on more than one CPU, the effective cell size needs
to be larger than the largest distance between a non-virtual particle
@@ -346,6 +348,14 @@ Please note:
- The presence of rigid bodies constructed by means of virtual sites
adds a contribution to the scalar pressure and pressure tensor.
+- The :meth:`~espressomd.particle_data.ParticleHandle.vs_auto_relate_to`
+ has additional keyword arguments for controlling whether the virtual site
+ should be coupled to a lattice-Boltzmann fluid (``couple_to_lb=True``) or
+ to the Langevin thermostat (``couple_to_langevin=True``), or both
+ (in that case LB is used for translation and Langevin for rotation);
+ this is achieved internally by adding extra propagation flags.
+
+
.. _Inertialess lattice-Boltzmann tracers:
Inertialess lattice-Boltzmann tracers
@@ -354,13 +364,92 @@ Inertialess lattice-Boltzmann tracers
Using the propagation mode :attr:`~espressomd.propagation.Propagation.TRANS_LB_TRACER`,
the virtual sites follow the motion of a LB fluid. This is achieved by integrating
their position using the fluid velocity at the virtual sites' position.
-Forces acting on the virtual sites are directly transferred as force density
+Forces acting on the virtual sites are directly transferred as a force density
onto the lattice-Boltzmann fluid, making the coupling free of inertia.
Please note that the velocity attribute of the virtual particles
does not carry valid information for this virtual sites scheme.
The feature stems from the implementation of the
:ref:`Immersed Boundary Method for soft elastic objects`, but can be used independently.
+In the following example, a particle is advected by a fluid flowing along the x-axis::
+
+ import espressomd
+ import espressomd.lb
+ import espressomd.propagation
+ Propagation = espressomd.propagation.Propagation
+ system = espressomd.System(box_l=[8., 8., 8.])
+ system.time_step = 0.01
+ system.cell_system.skin = 0.
+ lbf = espressomd.lb.LBFluidWalberla(agrid=1., tau=0.01, density=1.,
+ kinematic_viscosity=1.)
+ system.lb = lbf
+ system.thermostat.set_lb(LB_fluid=lbf, seed=123, gamma=1.5)
+ lbf[:, :, :].velocity = [0.1, 0., 0.]
+ p = system.part.add(pos=[0., 0., 0.], propagation=Propagation.TRANS_LB_TRACER)
+ system.integrator.run(10)
+ print(p.pos.round(3))
+
+
+.. _Per-particle propagation:
+
+Per-particle propagation
+------------------------
+
+Particle positions, quaternions, velocities and angular velocities are integrated
+according to the main integrator, which may be coupled to a thermostat and a barostat
+(see :ref:`Particle integration and propagation` for more details).
+The default integrator is the :ref:`Velocity Verlet algorithm`.
+
+Which equations of motion are being used can be controlled on a per-particle level.
+This is achieved by setting the particle
+:attr:`~espressomd.particle_data.ParticleHandle.propagation` attribute with a
+combination of propagation flags from :class:`~espressomd.propagation.Propagation`.
+
+Depending on which main integrator is selected, different "secondary" integrators
+become available. The velocity Verlet integrator is available as a secondary
+integrator, using flags :class:`~espressomd.propagation.Propagation.TRANS_NEWTON`
+for translation following Newton's equations of motion and
+:class:`~espressomd.propagation.Propagation.ROT_EULER` for rotation
+following Euler's equations of rotation; in this way, selected particles
+can be decoupled from a thermostat.
+:ref:`Virtual sites` also rely on secondary integrators, such as
+:class:`~espressomd.propagation.Propagation.TRANS_VS_RELATIVE` and
+:class:`~espressomd.propagation.Propagation.ROT_VS_RELATIVE` for
+:ref:`Rigid arrangements of particles` or
+:class:`~espressomd.propagation.Propagation.TRANS_LB_TRACER` for
+:ref:`Inertialess lattice-Boltzmann tracers`.
+
+In the following example, particle 1 follows Langevin dynamics (NVT ensemble),
+while particle 2 follows Newtonian dynamics (NVE ensemble)::
+
+ import espressomd
+ import espressomd.propagation
+ Propagation = espressomd.propagation.Propagation
+ system = espressomd.System(box_l=[8., 8., 8.])
+ system.time_step = 0.01
+ system.cell_system.skin = 0.
+ system.thermostat.set_langevin(kT=0.001, gamma=2., seed=42)
+ p1 = system.part.add(pos=[0., 0., 0.], v=[1., 0., 0.],
+ omega_lab=[1., 0., 0.], rotation=[True, True, True])
+ p2 = system.part.add(pos=[0., 0., 0.], v=[1., 0., 0.],
+ omega_lab=[1., 0., 0.], rotation=[True, True, True])
+ p1.propagation = Propagation.TRANS_LANGEVIN | Propagation.ROT_LANGEVIN
+ p2.propagation = Propagation.TRANS_NEWTON | Propagation.ROT_EULER
+ system.integrator.run(1)
+
+Not all combinations of propagation flags are allowed!
+
+The friction coefficient of thermostats can be controlled on a per-particle level too.
+Values stored in particle attributes :attr:`~espressomd.particle_data.ParticleHandle.gamma`
+and :attr:`~espressomd.particle_data.ParticleHandle.gamma_rot` will override
+the friction coefficients of most thermostats.
+Requires feature ``THERMOSTAT_PER_PARTICLE``.
+This is used for example to model
+:ref:`particle polarizability with thermalized cold Drude oscillators`.
+These attributes can also be defined as 3D vectors to model particle anisotropy.
+Requires feature ``PARTICLE_ANISOTROPY``.
+
+
.. _Interacting with groups of particles:
Interacting with groups of particles
diff --git a/doc/sphinx/running.rst b/doc/sphinx/running.rst
index d99937f7ff7..164e102cc71 100644
--- a/doc/sphinx/running.rst
+++ b/doc/sphinx/running.rst
@@ -770,16 +770,16 @@ and long-range forces (FFT summation) contribute equally to the runtime:
.. code-block:: none
- $ CALI_CONFIG_PROFILE=runtime-report ./pypresso ../samples/p3m.py --cpu
- Path Inclusive time Exclusive time Time %
- integrate 14.18 0.01 0.08
- Integration loop 13.84 0.43 2.88
- force_calc 13.41 0.20 1.35
- copy_forces_from_GPU 0.01 0.01 0.07
- short_range_loop 6.55 6.55 44.02
- calc_long_range_forces 6.40 6.40 43.00
- init_forces 0.24 0.24 1.58
- copy_particles_to_GPU 0.01 0.01 0.07
+ $ CALI_CONFIG=runtime-report ./pypresso ../samples/p3m.py --cpu
+ Path Min time/rank Max time/rank Avg time/rank Time %
+ integrate 0.13 0.13 0.13 0.52
+ Integration loop 1.49 1.49 1.49 6.03
+ calculate_forces 1.14 1.14 1.14 4.62
+ copy_particles_to_GPU 0.01 0.01 0.01 0.03
+ init_forces 0.14 0.14 0.14 0.56
+ calc_long_range_forces 8.78 8.78 8.78 35.66
+ short_range_loop 10.77 10.77 10.77 43.76
+ copy_forces_from_GPU 0.02 0.02 0.02 0.08
For the GPU implementation of the P3M algorithm, the long-range force
calculation is cheaper, however the transfer of particle data to and from
@@ -787,16 +787,16 @@ the GPU incur additional costs that are not negligible:
.. code-block:: none
- $ CALI_CONFIG_PROFILE=runtime-report ./pypresso ../samples/p3m.py --gpu
- Path Inclusive time Exclusive time Time %
- integrate 14.30 0.03 0.14
- Integration loop 13.87 1.76 7.90
- force_calc 12.12 0.82 3.68
- copy_forces_from_GPU 2.09 2.09 9.42
- short_range_loop 3.20 3.20 14.38
- calc_long_range_forces 3.75 3.75 16.87
- init_forces 1.25 1.25 5.61
- copy_particles_to_GPU 1.01 1.01 4.56
+ $ CALI_CONFIG=runtime-report ./pypresso ../samples/p3m.py --gpu
+ Path Min time/rank Max time/rank Avg time/rank Time %
+ integrate 0.42 0.42 0.42 1.03
+ Integration loop 0.50 0.50 0.50 1.22
+ calculate_forces 0.62 0.62 0.62 1.51
+ copy_particles_to_GPU 0.27 0.27 0.27 0.66
+ init_forces 0.09 0.09 0.09 0.22
+ calc_long_range_forces 0.60 0.60 0.60 1.46
+ short_range_loop 0.85 0.85 0.85 2.06
+ copy_forces_from_GPU 1.06 1.06 1.06 2.58
For a more fine-grained report on GPU kernels:
diff --git a/doc/tutorials/CMakeLists.txt b/doc/tutorials/CMakeLists.txt
index 18798d65b63..2f23dcfad81 100644
--- a/doc/tutorials/CMakeLists.txt
+++ b/doc/tutorials/CMakeLists.txt
@@ -49,13 +49,13 @@ function(NB_EXPORT)
if(NOT "${NB_EXPORT_SUFFIX}" STREQUAL "")
set(NB_EXPORT_TARGET "${NB_EXPORT_TARGET}_${NB_EXPORT_SUFFIX}")
endif()
- get_filename_component(NB_FILE_BASE ${NB_FILE} NAME_WE)
- get_filename_component(NB_FILE_EXT ${NB_FILE} EXT)
- set(HTML_FILE "${NB_FILE_BASE}.html")
- set(PY_FILE "${NB_FILE_BASE}.py")
+ cmake_path(GET NB_FILE STEM NB_FILE_STEM)
+ cmake_path(GET NB_FILE EXTENSION NB_FILE_EXT)
+ set(HTML_FILE "${NB_FILE_STEM}.html")
+ set(PY_FILE "${NB_FILE_STEM}.py")
- if(${NB_EXPORT_HTML_RUN})
- set(NB_FILE_RUN "${NB_FILE_BASE}.run${NB_FILE_EXT}")
+ if(NB_EXPORT_HTML_RUN)
+ set(NB_FILE_RUN "${NB_FILE_STEM}.run${NB_FILE_EXT}")
add_custom_command(
OUTPUT ${NB_FILE_RUN}
DEPENDS
diff --git a/doc/tutorials/charged_system/charged_system.ipynb b/doc/tutorials/charged_system/charged_system.ipynb
index 032d0ec7e73..df07a1e4e88 100644
--- a/doc/tutorials/charged_system/charged_system.ipynb
+++ b/doc/tutorials/charged_system/charged_system.ipynb
@@ -103,6 +103,7 @@
"WCA_EPSILON = 1.0\n",
"ION_DIAMETER = 1.0\n",
"ROD_RADIUS = 1.0\n",
+ "MASS=1.0\n",
"# particle types\n",
"ROD_TYPE = 1\n",
"COUNTERION_TYPE = 2"
@@ -308,45 +309,6 @@
"For this, we use the steepest descent integrator with a relative convergence criterion for forces and energies."
]
},
- {
- "cell_type": "code",
- "execution_count": null,
- "id": "6ed2ce87",
- "metadata": {},
- "outputs": [],
- "source": [
- "def remove_overlap(system, sd_params):\n",
- " # Removes overlap by steepest descent until forces or energies converge\n",
- " # Set up steepest descent integration\n",
- " system.integrator.set_steepest_descent(f_max=0,\n",
- " gamma=sd_params['damping'],\n",
- " max_displacement=sd_params['max_displacement'])\n",
- "\n",
- " # Initialize integrator to obtain initial forces\n",
- " system.integrator.run(0)\n",
- " maxforce = np.max(np.linalg.norm(system.part.all().f, axis=1))\n",
- " energy = system.analysis.energy()['total']\n",
- "\n",
- " i = 0\n",
- " while i < sd_params['max_steps'] // sd_params['emstep']:\n",
- " prev_maxforce = maxforce\n",
- " prev_energy = energy\n",
- " system.integrator.run(sd_params['emstep'])\n",
- " maxforce = np.max(np.linalg.norm(system.part.all().f, axis=1))\n",
- " relforce = np.abs((maxforce - prev_maxforce) / prev_maxforce)\n",
- " energy = system.analysis.energy()['total']\n",
- " relener = np.abs((energy - prev_energy) / prev_energy)\n",
- " if i > 1 and (i + 1) % 4 == 0:\n",
- " print(f\"minimization step: {(i+1)*sd_params['emstep']:4.0f}\"\n",
- " f\" max. rel. force change:{relforce:+3.3e}\"\n",
- " f\" rel. energy change:{relener:+3.3e}\")\n",
- " if relforce < sd_params['f_tol'] or relener < sd_params['e_tol']:\n",
- " break\n",
- " i += 1\n",
- "\n",
- " system.integrator.set_vv()"
- ]
- },
{
"cell_type": "code",
"execution_count": null,
@@ -354,14 +316,17 @@
"metadata": {},
"outputs": [],
"source": [
- "STEEPEST_DESCENT_PARAMS = {'f_tol': 1e-2,\n",
- " 'e_tol': 1e-5,\n",
- " 'damping': 30,\n",
- " 'max_steps': 10000,\n",
- " 'max_displacement': 0.01,\n",
- " 'emstep': 10}\n",
- "\n",
- "remove_overlap(system, STEEPEST_DESCENT_PARAMS)"
+ "def remove_overlap(system):\n",
+ " FMAX = 0.01 * ION_DIAMETER * MASS / system.time_step**2\n",
+ " system.integrator.set_steepest_descent(\n",
+ " f_max=FMAX,\n",
+ " gamma=10,\n",
+ " max_displacement=0.01)\n",
+ " system.integrator.run(5000)\n",
+ " assert np.all(np.abs(system.part.all().f) < FMAX), \"Overlap removal did not converge!\"\n",
+ " system.integrator.set_vv()\n",
+ " \n",
+ "remove_overlap(system)"
]
},
{
@@ -649,7 +614,7 @@
" run['params']['rod_charge_dens'], N_rod_beads, ROD_TYPE)\n",
" p3m = espressomd.electrostatics.P3M(**p3m_params)\n",
" system.electrostatics.solver = p3m\n",
- " remove_overlap(system, STEEPEST_DESCENT_PARAMS)\n",
+ " remove_overlap(system)\n",
" system.thermostat.set_langevin(**LANGEVIN_PARAMS)\n",
" print('', end='', flush=True)\n",
" integrate_system(system, WARMUP_STEPS)\n",
@@ -885,7 +850,7 @@
"\n",
"p3m = espressomd.electrostatics.P3M(**p3m_params)\n",
"system.electrostatics.solver = p3m\n",
- "remove_overlap(system, STEEPEST_DESCENT_PARAMS)\n",
+ "remove_overlap(system)\n",
"system.thermostat.set_langevin(**LANGEVIN_PARAMS)\n",
"print('', end='', flush=True)\n",
"integrate_system(system, WARMUP_STEPS)\n",
diff --git a/doc/tutorials/constant_pH/constant_pH.ipynb b/doc/tutorials/constant_pH/constant_pH.ipynb
index f5f0577aec3..e147a6c3bb7 100644
--- a/doc/tutorials/constant_pH/constant_pH.ipynb
+++ b/doc/tutorials/constant_pH/constant_pH.ipynb
@@ -145,11 +145,8 @@
"plt.rcParams.update({'font.size': 18})\n",
"\n",
"import numpy as np\n",
- "import pkg_resources\n",
"import pint # module for working with units and dimensions\n",
"import time\n",
- "assert pkg_resources.packaging.specifiers.SpecifierSet('>=0.10.1').contains(pint.__version__), \\\n",
- " f'pint version {pint.__version__} is too old: several numpy operations can cast away the unit'\n",
"\n",
"import espressomd\n",
"espressomd.assert_features(['WCA', 'ELECTROSTATICS'])\n",
@@ -548,15 +545,27 @@
"source": [
"if USE_WCA:\n",
" # set the WCA interaction between all particle pairs\n",
+ " wca_sigma = 1.0\n",
+ " wca_epsilon = 1.0\n",
" for type_1 in TYPES.values():\n",
" for type_2 in TYPES.values():\n",
" if type_1 >= type_2:\n",
- " system.non_bonded_inter[type_1, type_2].wca.set_params(epsilon=1.0, sigma=1.0)\n",
+ " system.non_bonded_inter[type_1, type_2].wca.set_params(epsilon=wca_epsilon, sigma=wca_sigma)\n",
"\n",
" # relax the overlaps with steepest descent\n",
- " system.integrator.set_steepest_descent(f_max=0, gamma=0.1, max_displacement=0.1)\n",
- " system.integrator.run(20)\n",
- " system.integrator.set_vv() # to switch back to velocity Verlet\n",
+ " mass = 1.0\n",
+ " FMAX = 0.01 * wca_sigma * mass / system.time_step**2\n",
+ "\n",
+ " system.integrator.set_steepest_descent(\n",
+ " f_max=FMAX,\n",
+ " gamma=0.1,\n",
+ " max_displacement=0.1)\n",
+ "\n",
+ " system.integrator.run(5000)\n",
+ " assert np.all(np.abs(system.part.all().f) 0.05:\n",
- " system.integrator.run(10)\n",
- " energy_new = system.analysis.energy()['total']\n",
- " # Prevent division by zero errors:\n",
- " if energy < sys.float_info.epsilon:\n",
- " break\n",
- " relative_energy_change = (energy - energy_new) / energy\n",
- " print(f'Minimization, relative change in energy: {relative_energy_change:.4f}')\n",
- " energy = energy_new"
+ "system.integrator.set_steepest_descent(\n",
+ " f_max=FMAX,\n",
+ " gamma=0.1,\n",
+ " max_displacement=0.05)\n",
+ "\n",
+ "system.integrator.run(5000)\n",
+ "assert np.all(np.abs(system.part.all().f) < FMAX), \"Overlap removal did not converge!\""
]
},
- {
- "cell_type": "code",
- "execution_count": null,
- "id": "63fdafa0",
- "metadata": {},
- "outputs": [],
- "source": []
- },
{
"cell_type": "markdown",
"id": "7d476671",
diff --git a/doc/tutorials/ferrofluid/ferrofluid_part2.ipynb b/doc/tutorials/ferrofluid/ferrofluid_part2.ipynb
index 4d58268f734..453d5f9e979 100644
--- a/doc/tutorials/ferrofluid/ferrofluid_part2.ipynb
+++ b/doc/tutorials/ferrofluid/ferrofluid_part2.ipynb
@@ -169,11 +169,17 @@
"particles = system.part.add(pos=pos, rotation=N_PART * [(True, True, True)], dip=dip, fix=N_PART * [(False, False, True)])\n",
"\n",
"# Remove overlap between particles by means of the steepest descent method\n",
+ "MASS = 1.0\n",
+ "FMAX = 0.01 * LJ_SIGMA * MASS / system.time_step**2\n",
+ "\n",
"system.integrator.set_steepest_descent(\n",
- " f_max=0, gamma=0.1, max_displacement=0.05)\n",
+ " f_max=FMAX,\n",
+ " gamma=0.1,\n",
+ " max_displacement=0.05)\n",
+ "\n",
+ "system.integrator.run(5000)\n",
+ "assert np.all(np.abs(system.part.all().f) < FMAX), \"Overlap removal did not converge!\"\n",
"\n",
- "while system.analysis.energy()[\"total\"] > 5 * KT * N_PART:\n",
- " system.integrator.run(20)\n",
"\n",
"# Switch to velocity Verlet integrator\n",
"system.integrator.set_vv()\n",
@@ -217,7 +223,7 @@
"outputs": [],
"source": [
"# magnetic field times dipole moment\n",
- "H_dipm = ALPHA * KT\n",
+ "H_dipm = ALPHA * KT / MU_0\n",
"H_field = [H_dipm, 0, 0]"
]
},
diff --git a/doc/tutorials/ferrofluid/ferrofluid_part3.ipynb b/doc/tutorials/ferrofluid/ferrofluid_part3.ipynb
index 5f4843fa35b..b7f896f46a6 100644
--- a/doc/tutorials/ferrofluid/ferrofluid_part3.ipynb
+++ b/doc/tutorials/ferrofluid/ferrofluid_part3.ipynb
@@ -238,11 +238,14 @@
"particles = system.part.add(pos=pos, rotation=N * [(True, True, True)], dip=dip)\n",
"\n",
"# Remove overlap between particles by means of the steepest descent method\n",
+ "mass = 1.0\n",
+ "f_max = 0.01 * lj_sigma * mass / system.time_step**2\n",
"system.integrator.set_steepest_descent(\n",
- " f_max=0, gamma=0.1, max_displacement=0.05)\n",
- "\n",
- "while system.analysis.energy()[\"total\"] > 5 * kT * N:\n",
- " system.integrator.run(20)\n",
+ " f_max=f_max,\n",
+ " gamma=0.1,\n",
+ " max_displacement=0.05)\n",
+ "system.integrator.run(5000)\n",
+ "assert np.all(np.abs(system.part.all().f) < f_max), \"Overlap removal did not converge!\"\n",
"\n",
"# Switch to velocity Verlet integrator\n",
"system.integrator.set_vv()\n",
@@ -557,7 +560,7 @@
" if alpha == 0:\n",
" continue\n",
" # set magnetic field constraint\n",
- " H_dipm = (alpha * kT)\n",
+ " H_dipm = alpha * kT / mu_0\n",
" H_field = [H_dipm, 0, 0]\n",
" H_constraint = espressomd.constraints.HomogeneousMagneticField(H=H_field)\n",
" system.constraints.add(H_constraint)\n",
diff --git a/doc/tutorials/langevin_dynamics/langevin_dynamics.ipynb b/doc/tutorials/langevin_dynamics/langevin_dynamics.ipynb
index 372ca029cee..804725d1799 100644
--- a/doc/tutorials/langevin_dynamics/langevin_dynamics.ipynb
+++ b/doc/tutorials/langevin_dynamics/langevin_dynamics.ipynb
@@ -489,7 +489,7 @@
"# SOLUTION CELL\n",
"plt.figure(figsize=(10, 6))\n",
"plt.xlabel(r'$\\gamma$')\n",
- "plt.ylabel('Diffusion coefficient [$\\sigma^2/t$]')\n",
+ "plt.ylabel(r'Diffusion coefficient [$\\sigma^2/t$]')\n",
"x = np.linspace(0.9 * min(gammas), 1.1 * max(gammas), 50)\n",
"y = KT / x\n",
"plt.plot(x, y, '-', label=r'$k_\\mathrm{B}T\\gamma^{-1}$')\n",
diff --git a/doc/tutorials/lennard_jones/lennard_jones.ipynb b/doc/tutorials/lennard_jones/lennard_jones.ipynb
index f6d846e9a44..80f8b9c2943 100644
--- a/doc/tutorials/lennard_jones/lennard_jones.ipynb
+++ b/doc/tutorials/lennard_jones/lennard_jones.ipynb
@@ -120,8 +120,8 @@
"plt.plot(xs, ys_lj, label='LJ')\n",
"plt.plot(xs, ys_WCA, label='WCA')\n",
"plt.axhline(y=0, color='grey')\n",
- "plt.xlabel(\"$r/\\sigma$\")\n",
- "plt.ylabel(\"$V(r)/(k_{\\mathrm{B}}T)$\")\n",
+ "plt.xlabel(r\"$r/\\sigma$\")\n",
+ "plt.ylabel(r\"$V(r)/(k_{\\mathrm{B}}T)$\")\n",
"plt.legend()\n",
"plt.ylim(-1.5, 2.5)\n",
"plt.show()"
@@ -539,18 +539,14 @@
"id": "b560a981",
"metadata": {},
"source": [
- "**Exercise:**\n",
- "\n",
- "* Use [espressomd.integrate.set_steepest_descent](https://espressomd.github.io/doc/integration.html#steepest-descent) to relax the initial configuration.\n",
- " Use a maximal displacement $\\vec{r}_{\\mathrm{max}}$ of MAX_DISPLACEMENT.\n",
- " The particle displacement is related to the particle force via a damping constant $\\gamma$, such that $\\vec{r}(t + dt) = \\vec{r}(t) + \\min(\\gamma \\vec{F}(t), \\vec{r}_{\\mathrm{max}})$. Use a damping constant gamma = DAMPING.\n",
- "* Use the relative change of the system maximal force as a convergence criterion.\n",
- " Check the documentation [espressomd.particle_data module](https://espressomd.github.io/doc/espressomd.html#module-espressomd.particle_data) to obtain the forces.\n",
- " The steepest descent has converged if the relative force change between two rounds of minimizations is less than the threshold value F_TOL. Note that by default [espressomd.integrate.set_steepest_descent](https://espressomd.github.io/doc/espressomd.html#espressomd.integrate.SteepestDescent) will halt when the system maximal force is less than some value f_max. When a custom convergence criterion is implemented, as it is the case here, the default convergence criterion needs to be disabled by setting f_max=0.\n",
- "* Break the minimization loop after a maximal number of MAX_STEPS steps or if convergence is achieved.\n",
- " Check for convergence every EMSTEP steps.\n",
+ "We will use [espressomd.integrate.set_steepest_descent()](https://espressomd.github.io/doc/integration.html#steepest-descent) to relax the initial configuration.\n",
+ "The particle displacement is related to the particle force via a damping constant $\\gamma$, such that:\n",
+ "$$\\vec{x}_i(t + \\Delta t) = \\vec{x}_i(t) + \\min\\left(|\\gamma\\vec{F}_i(t)\\Delta t|, r_{\\text{max}}\\right) \\cdot \\vec{F}_i(t)/|\\vec{F}_i(t)|$$\n",
"\n",
- "***Hint:*** To obtain the initial forces one has to initialize the integrator using integ_steps=0, i.e. call system.integrator.run(0) before accessing the force array."
+ "with $r_{\\text{max}}$ the maximal displacement, $\\gamma$ the friction coefficient, $\\vec{x}$ the particle position,\n",
+ "$\\vec{F}$ the force on the particle, $\\Delta t$ the time step, and $i$ the vector index.\n",
+ "We will integrate until the largest particle force in the system falls below a specific threshold value `FMAX`,\n",
+ "chosen in such a way that integrating the system with that force would lead to a displacement inferior or equal to 1% of the particle diameter."
]
},
{
@@ -560,35 +556,18 @@
"metadata": {},
"outputs": [],
"source": [
- "# SOLUTION CELL\n",
- "# Set up steepest descent integration\n",
- "system.integrator.set_steepest_descent(f_max=0, # use a relative convergence criterion only\n",
- " gamma=DAMPING,\n",
- " max_displacement=MAX_DISPLACEMENT)\n",
+ "MASS = 1.0\n",
+ "FMAX = 0.01 * LJ_SIG * MASS / system.time_step**2\n",
"\n",
- "# Initialize integrator to obtain initial forces\n",
- "system.integrator.run(0)\n",
- "old_force = np.max(np.linalg.norm(system.part.all().f, axis=1))\n",
+ "system.integrator.set_steepest_descent(\n",
+ " f_max=FMAX,\n",
+ " gamma=10,\n",
+ " max_displacement=0.01)\n",
"\n",
- "\n",
- "while system.time / system.time_step < MAX_STEPS:\n",
- " system.integrator.run(EM_STEP)\n",
- " force = np.max(np.linalg.norm(system.part.all().f, axis=1))\n",
- " rel_force = np.abs((force - old_force) / old_force)\n",
- " print(f'rel. force change: {rel_force:.2e}')\n",
- " if rel_force < F_TOL:\n",
- " break\n",
- " old_force = force"
+ "system.integrator.run(200)\n",
+ "assert np.all(np.abs(system.part.all().f) < FMAX), \"Overlap removal did not converge!\""
]
},
- {
- "cell_type": "code",
- "execution_count": null,
- "id": "62f80d15",
- "metadata": {},
- "outputs": [],
- "source": []
- },
{
"cell_type": "code",
"execution_count": null,
diff --git a/doc/tutorials/polymers/polymers.ipynb b/doc/tutorials/polymers/polymers.ipynb
index 18320e7aeea..30760f3803f 100644
--- a/doc/tutorials/polymers/polymers.ipynb
+++ b/doc/tutorials/polymers/polymers.ipynb
@@ -147,7 +147,7 @@
"```\n",
"\n",
"creates a Lennard-Jones interaction with $\\varepsilon=1.$, $\\sigma=1.$,\n",
- "$r_{\\text{cut}} = 1.1225$ and $\\varepsilon_{\\text{shift}}=0.25$ between particles\n",
+ "$r_{\\text{cut}} = 2^{1/6} \\simeq 1.1225$ and $\\varepsilon_{\\text{shift}}=0.25$ between particles\n",
"of type 0, which is the desired repulsive interaction. The command\n",
"\n",
"```python\n",
@@ -349,6 +349,7 @@
"STEPS = 100\n",
"KT = 1.0\n",
"GAMMA = 5.0\n",
+ "MASS = 1.0\n",
"POLYMER_PARAMS = {'n_polymers': 1, 'bond_length': 1, 'seed': 42, 'min_distance': 0.9}\n",
"POLYMER_MODEL = 'Rouse'\n",
"assert POLYMER_MODEL in ('Rouse', 'Zimm')\n",
@@ -358,11 +359,15 @@
"\n",
"# System setup\n",
"system = espressomd.System(box_l=3 * [BOX_L])\n",
+ "system.time_step = TIME_STEP\n",
"system.cell_system.skin = 0.4\n",
"\n",
"# Lennard-Jones interaction\n",
+ "LJ_SIGMA=1.0\n",
+ "LJ_EPSILON=1.0\n",
+ "LJ_CUTOFF=2.0**(1.0 / 6.0)\n",
"system.non_bonded_inter[0, 0].lennard_jones.set_params(\n",
- " epsilon=1.0, sigma=1.0, shift=\"auto\", cutoff=2.0**(1.0 / 6.0))\n",
+ " epsilon=LJ_EPSILON, sigma=LJ_SIGMA, shift=\"auto\", cutoff=LJ_CUTOFF)\n",
"\n",
"# Fene interaction\n",
"fene = espressomd.interactions.FeneBond(k=7, r_0=1, d_r_max=2)\n",
@@ -377,21 +382,23 @@
"rh_results = []\n",
"rf_results = []\n",
"rg_results = []\n",
- "for index, N in enumerate(N_MONOMERS):\n",
+ "for N in N_MONOMERS:\n",
" logging.info(f\"Polymer size: {N}\")\n",
" build_polymer(system, N, POLYMER_PARAMS, fene)\n",
"\n",
- " logging.info(\"Warming up the polymer chain.\")\n",
- " system.time_step = 0.002\n",
+ " logging.info(\"Removing overlaps ...\")\n",
+ " FMAX = 0.001 * LJ_SIGMA * MASS / system.time_step**2\n",
" system.integrator.set_steepest_descent(\n",
- " f_max=1.0,\n",
+ " f_max=FMAX,\n",
" gamma=10,\n",
" max_displacement=0.01)\n",
- " system.integrator.run(2000)\n",
+ "\n",
+ " system.integrator.run(100)\n",
+ " assert np.all(np.abs(system.part.all().f) < FMAX), \"Overlap removal did not converge!\"\n",
" system.integrator.set_vv()\n",
- " logging.info(\"Warmup finished.\")\n",
+ " logging.info(\"Remove overlap finished.\")\n",
"\n",
- " logging.info(\"Equilibration.\")\n",
+ " logging.info(\"Equilibration ...\")\n",
" system.time_step = TIME_STEP\n",
" system.thermostat.set_langevin(kT=1.0, gamma=50, seed=42)\n",
" system.integrator.run(2000)\n",
@@ -404,7 +411,7 @@
" elif POLYMER_MODEL == 'Zimm':\n",
" solvent_lbm(system, KT, GAMMA)\n",
"\n",
- " logging.info(\"Warming up the system with the fluid.\")\n",
+ " logging.info(\"Warming up the system with the fluid ...\")\n",
" system.integrator.run(1000)\n",
" logging.info(\"Warming up the system with the fluid finished.\")\n",
"\n",
@@ -553,8 +560,7 @@
" popt, _ = scipy.optimize.curve_fit(\n",
" lambda x, a, b: kirkwood_zimm(x, a, b, rh_exponent), n_monomers, diffusion)\n",
" y = kirkwood_zimm(x, popt[0], popt[1], rh_exponent)\n",
- " label = f'''\\\n",
- " $D^{{\\\\mathrm{{fit}}}} = \\\n",
+ " label = f'''$D^{{\\\\mathrm{{fit}}}} = \\\n",
" \\\\frac{{{popt[0]:.2f}}}{{N}} + \\\n",
" \\\\frac{{{popt[1] * 6 * np.pi:.3f} }}{{6\\\\pi}} \\\\cdot \\\n",
" \\\\frac{{{1}}}{{N^{{{rh_exponent:.2f}}}}}$ \\\n",
@@ -651,7 +657,7 @@
" ls='', marker='o', capsize=5, capthick=1,\n",
" label=r'$R_g^{\\mathrm{simulation}}$')\n",
"plt.xlabel('Number of monomers $N$')\n",
- "plt.ylabel('Radius of gyration [$\\sigma$]')\n",
+ "plt.ylabel(r'Radius of gyration [$\\sigma$]')\n",
"plt.legend()\n",
"plt.show()"
]
@@ -699,9 +705,7 @@
"cell_type": "code",
"execution_count": null,
"id": "06b65488",
- "metadata": {
- "scrolled": true
- },
+ "metadata": {},
"outputs": [],
"source": [
"rh_summary = standard_error_mean_autocorrelation(rh_results, r'\\operatorname{acf}(R_h)')\n",
@@ -717,7 +721,7 @@
" ls='', marker='o', capsize=5, capthick=1,\n",
" label=r'$R_h^{\\mathrm{simulation}}$')\n",
"plt.xlabel('Number of monomers $N$')\n",
- "plt.ylabel('Hydrodynamic radius [$\\sigma$]')\n",
+ "plt.ylabel(r'Hydrodynamic radius [$\\sigma$]')\n",
"plt.legend()\n",
"plt.show()"
]
diff --git a/doc/tutorials/widom_insertion/widom_insertion.ipynb b/doc/tutorials/widom_insertion/widom_insertion.ipynb
index 7e3dff1e60c..04e9140afd7 100644
--- a/doc/tutorials/widom_insertion/widom_insertion.ipynb
+++ b/doc/tutorials/widom_insertion/widom_insertion.ipynb
@@ -173,6 +173,7 @@
"\n",
"# number of salt ion pairs\n",
"N_ION_PAIRS = 50\n",
+ "MASS=1.0\n",
"\n",
"# particle types and charges\n",
"types = {\n",
@@ -307,10 +308,15 @@
"source": [
"# SOLUTION CELL\n",
"def warmup():\n",
- " system.integrator.set_steepest_descent(f_max=0, gamma=1e-3, max_displacement=0.01)\n",
- "\n",
- " print(\"Removing overlaps...\", flush=True)\n",
- " system.integrator.run(10000)\n",
+ " FMAX = 0.01 * LJ_SIGMA * MASS / system.time_step**2\n",
+ "\n",
+ " system.integrator.set_steepest_descent(\n",
+ " f_max=FMAX,\n",
+ " gamma=1e-3,\n",
+ " max_displacement=0.01)\n",
+ " print(\"Remove overlaps...\", flush=True)\n",
+ " system.integrator.run(5000)\n",
+ " assert np.all(np.abs(system.part.all().f) < FMAX), \"Overlap removal did not converge!\"\n",
"\n",
" system.integrator.set_vv()\n",
" system.thermostat.set_langevin(kT=KT, gamma=GAMMA, seed=42)\n",
diff --git a/maintainer/CI/build_cmake.sh b/maintainer/CI/build_cmake.sh
index 7a95caaae96..4d4ce68d157 100755
--- a/maintainer/CI/build_cmake.sh
+++ b/maintainer/CI/build_cmake.sh
@@ -1,6 +1,6 @@
#!/usr/bin/env bash
#
-# Copyright (C) 2016-2022 The ESPResSo project
+# Copyright (C) 2016-2024 The ESPResSo project
# Copyright (C) 2014 Olaf Lenz
#
# Copying and distribution of this file, with or without modification,
@@ -120,7 +120,6 @@ set_default_value make_check_benchmarks false
set_default_value with_fast_math false
set_default_value with_cuda false
set_default_value with_cuda_compiler "nvcc"
-set_default_value with_cxx_standard 17
set_default_value build_type "RelWithAssert"
set_default_value with_ccache false
set_default_value with_hdf5 true
@@ -130,7 +129,7 @@ set_default_value with_scafacos false
set_default_value with_walberla false
set_default_value with_walberla_avx false
set_default_value with_stokesian_dynamics false
-set_default_value test_timeout 300
+set_default_value test_timeout 500
set_default_value hide_gpu false
set_default_value mpiexec_preflags ""
@@ -148,54 +147,22 @@ if [ "${with_fast_math}" = true ]; then
cmake_param_protected="-DCMAKE_CXX_FLAGS=-ffast-math"
fi
-cmake_params="-D CMAKE_BUILD_TYPE=${build_type} -D CMAKE_CXX_STANDARD=${with_cxx_standard} -D ESPRESSO_WARNINGS_ARE_ERRORS=ON ${cmake_params}"
+cmake_params="-D CMAKE_BUILD_TYPE=${build_type} -D ESPRESSO_WARNINGS_ARE_ERRORS=ON ${cmake_params}"
cmake_params="${cmake_params} -D CMAKE_INSTALL_PREFIX=/tmp/espresso-unit-tests -D ESPRESSO_INSIDE_DOCKER=ON"
cmake_params="${cmake_params} -D ESPRESSO_CTEST_ARGS:STRING=-j${check_procs} -D ESPRESSO_TEST_TIMEOUT=${test_timeout}"
-if [ "${make_check_benchmarks}" = true ]; then
- cmake_params="${cmake_params} -D ESPRESSO_BUILD_BENCHMARKS=ON"
-fi
-
-if [ "${with_ccache}" = true ]; then
- cmake_params="${cmake_params} -D ESPRESSO_BUILD_WITH_CCACHE=ON"
-fi
-
-if [ "${with_caliper}" = true ]; then
- cmake_params="${cmake_params} -D ESPRESSO_BUILD_WITH_CALIPER=ON"
-fi
-
-if [ "${with_hdf5}" = true ]; then
- cmake_params="${cmake_params} -D ESPRESSO_BUILD_WITH_HDF5=ON"
-else
- cmake_params="${cmake_params} -D ESPRESSO_BUILD_WITH_HDF5=OFF"
-fi
-
-if [ "${with_fftw}" = true ]; then
- cmake_params="${cmake_params} -D ESPRESSO_BUILD_WITH_FFTW=ON"
-else
- cmake_params="${cmake_params} -D ESPRESSO_BUILD_WITH_FFTW=OFF"
-fi
-
-if [ "${with_gsl}" = true ]; then
- cmake_params="${cmake_params} -D ESPRESSO_BUILD_WITH_GSL=ON"
-else
- cmake_params="${cmake_params} -D ESPRESSO_BUILD_WITH_GSL=OFF"
-fi
-
-if [ "${with_scafacos}" = true ]; then
- cmake_params="${cmake_params} -D ESPRESSO_BUILD_WITH_SCAFACOS=ON"
-else
- cmake_params="${cmake_params} -D ESPRESSO_BUILD_WITH_SCAFACOS=OFF"
-fi
-
-if [ "${with_stokesian_dynamics}" = true ]; then
- cmake_params="${cmake_params} -D ESPRESSO_BUILD_WITH_STOKESIAN_DYNAMICS=ON"
-else
- cmake_params="${cmake_params} -D ESPRESSO_BUILD_WITH_STOKESIAN_DYNAMICS=OFF"
-fi
+cmake_params="${cmake_params} -D ESPRESSO_BUILD_BENCHMARKS=${make_check_benchmarks}"
+cmake_params="${cmake_params} -D ESPRESSO_BUILD_WITH_CCACHE=${with_ccache}"
+cmake_params="${cmake_params} -D ESPRESSO_BUILD_WITH_CALIPER=${with_caliper}"
+cmake_params="${cmake_params} -D ESPRESSO_BUILD_WITH_HDF5=${with_hdf5}"
+cmake_params="${cmake_params} -D ESPRESSO_BUILD_WITH_FFTW=${with_fftw}"
+cmake_params="${cmake_params} -D ESPRESSO_BUILD_WITH_GSL=${with_gsl}"
+cmake_params="${cmake_params} -D ESPRESSO_BUILD_WITH_SCAFACOS=${with_scafacos}"
+cmake_params="${cmake_params} -D ESPRESSO_BUILD_WITH_STOKESIAN_DYNAMICS=${with_stokesian_dynamics}"
+cmake_params="${cmake_params} -D ESPRESSO_BUILD_WITH_WALBERLA=${with_walberla}"
if [ "${with_walberla}" = true ]; then
- cmake_params="${cmake_params} -D ESPRESSO_BUILD_WITH_WALBERLA=ON -D ESPRESSO_BUILD_WITH_WALBERLA_FFT=ON"
+ cmake_params="${cmake_params} -D ESPRESSO_BUILD_WITH_WALBERLA_FFT=ON"
if [ "${with_walberla_avx}" = true ]; then
cmake_params="${cmake_params} -D ESPRESSO_BUILD_WITH_WALBERLA_AVX=ON"
fi
@@ -204,39 +171,18 @@ if [ "${with_walberla}" = true ]; then
mpiexec_preflags="${mpiexec_preflags:+$mpiexec_preflags;}--bind-to;none"
fi
-if [ "${with_coverage}" = true ]; then
- cmake_params="-D ESPRESSO_BUILD_WITH_COVERAGE=ON ${cmake_params}"
-fi
-
-if [ "${with_coverage_python}" = true ]; then
- cmake_params="-D ESPRESSO_BUILD_WITH_COVERAGE_PYTHON=ON ${cmake_params}"
-fi
-
-if [ "${with_asan}" = true ]; then
- cmake_params="-D ESPRESSO_BUILD_WITH_ASAN=ON ${cmake_params}"
-fi
-
-if [ "${with_ubsan}" = true ]; then
- cmake_params="-D ESPRESSO_BUILD_WITH_UBSAN=ON ${cmake_params}"
-fi
-
-if [ "${with_static_analysis}" = true ]; then
- cmake_params="-D ESPRESSO_BUILD_WITH_CLANG_TIDY=ON ${cmake_params}"
-fi
-
-if [ "${run_checks}" = true ]; then
- cmake_params="${cmake_params} -D ESPRESSO_BUILD_TESTS=ON"
-else
- cmake_params="${cmake_params} -D ESPRESSO_BUILD_TESTS=OFF"
-fi
+cmake_params="${cmake_params} -D ESPRESSO_BUILD_WITH_COVERAGE=${with_coverage}"
+cmake_params="${cmake_params} -D ESPRESSO_BUILD_WITH_COVERAGE_PYTHON=${with_coverage_python}"
+cmake_params="${cmake_params} -D ESPRESSO_BUILD_WITH_ASAN=${with_asan}"
+cmake_params="${cmake_params} -D ESPRESSO_BUILD_WITH_UBSAN=${with_ubsan}"
+cmake_params="${cmake_params} -D ESPRESSO_BUILD_WITH_CLANG_TIDY=${with_static_analysis}"
+cmake_params="${cmake_params} -D ESPRESSO_BUILD_WITH_CUDA=${with_cuda}"
if [ "${with_cuda}" = true ]; then
- cmake_params="-D ESPRESSO_BUILD_WITH_CUDA=ON -D CUDAToolkit_ROOT=/usr/lib/cuda ${cmake_params}"
- if [ "${CUDACXX}" = "" ]; then
- cmake_params="-D CMAKE_CUDA_FLAGS='--compiler-bindir=/usr/bin/g++-10' ${cmake_params}"
+ cmake_params="${cmake_params} -D CUDAToolkit_ROOT=/usr/lib/cuda"
+ if [ "${CUDACXX}" = "" ] && [ "${CXX}" != "" ]; then
+ cmake_params="${cmake_params} -D CMAKE_CUDA_FLAGS='--compiler-bindir=$(which "${CXX}")'"
fi
-else
- cmake_params="-D ESPRESSO_BUILD_WITH_CUDA=OFF ${cmake_params}"
fi
command -v nvidia-smi && nvidia-smi || true
@@ -264,19 +210,28 @@ cd "${builddir}"
# load MPI module if necessary
if [ -f "/etc/os-release" ]; then
- grep -q suse /etc/os-release && . /etc/profile.d/modules.sh && module load gnu-openmpi
- grep -q 'rhel\|fedora' /etc/os-release && for f in /etc/profile.d/*module*.sh; do . "${f}"; done && module load mpi
- grep -q "Ubuntu 22.04" /etc/os-release && export MPIEXEC_PREFLAGS="--mca;btl_vader_single_copy_mechanism;none${mpiexec_preflags:+;$mpiexec_preflags}"
+ grep -q "suse" /etc/os-release && . /etc/profile.d/modules.sh && module load gnu-openmpi
+ grep -q "rhel\|fedora" /etc/os-release && for f in /etc/profile.d/*module*.sh; do . "${f}"; done && module load mpi
+fi
+
+# setup environment
+if grep -q "Ubuntu" /etc/os-release; then
+ default_gcov="$(which "gcov")"
+ custom_gcov="$(which "${GCOV:-gcov}")"
+ if [ ! "${custom_gcov}" = "${default_gcov}" ] && [ -d "${HOME}/.local/var/lib/alternatives" ]; then
+ update-alternatives --altdir "${HOME}/.local/etc/alternatives" \
+ --admindir "${HOME}/.local/var/lib/alternatives" \
+ --install "${HOME}/.local/bin/gcov" "gcov" "${custom_gcov}" 10
+ fi
fi
# CONFIGURE
start "CONFIGURE"
-MYCONFIG_DIR="${srcdir}/maintainer/configs"
if [ "${myconfig}" = "default" ]; then
echo "Using default myconfig."
else
- myconfig_file="${MYCONFIG_DIR}/${myconfig}.hpp"
+ myconfig_file="${srcdir}/maintainer/configs/${myconfig}.hpp"
if [ ! -e "${myconfig_file}" ]; then
echo "${myconfig_file} does not exist!"
exit 1
@@ -306,9 +261,8 @@ end "BUILD"
# library. See details in https://github.com/espressomd/espresso/issues/2249
# Can't do this check on CUDA though because nvcc creates a host function
# that just calls exit() for each device function, and can't do this with
-# coverage because gcov 9.0 adds code that calls exit(), and can't do this
# with walberla because the library calls exit() in assertions.
-if [[ "${with_coverage}" == false && ( "${with_cuda}" == false || "${with_cuda_compiler}" != "nvcc" ) && "${with_walberla}" != "true" ]]; then
+if [[ ( "${with_cuda}" == false || "${with_cuda_compiler}" != "nvcc" ) && "${with_walberla}" != "true" ]]; then
if nm -o -C $(find . -name '*.so') | grep '[^a-z]exit@@GLIBC'; then
echo "Found calls to exit() function in shared libraries."
exit 1
@@ -400,12 +354,7 @@ if [ "${with_coverage}" = true ] || [ "${with_coverage_python}" = true ]; then
if [ "${with_coverage}" = true ]; then
echo "Running lcov and gcov..."
codecov_opts="${codecov_opts} --gcov"
- lcov --gcov-tool "${GCOV:-gcov}" -q --directory . --ignore-errors graph --capture --output-file coverage.info # capture coverage info
- lcov --gcov-tool "${GCOV:-gcov}" -q --remove coverage.info '/usr/*' --output-file coverage.info # filter out system
- lcov --gcov-tool "${GCOV:-gcov}" -q --remove coverage.info '*/doc/*' --output-file coverage.info # filter out docs
- if [ -d _deps/ ]; then
- lcov --gcov-tool "${GCOV:-gcov}" -q --remove coverage.info $(realpath _deps/)'/*' --output-file coverage.info # filter out external projects
- fi
+ "${srcdir}/maintainer/CI/run_lcov.sh" coverage.info
fi
if [ "${with_coverage_python}" = true ]; then
echo "Running python3-coverage..."
diff --git a/maintainer/CI/doc_warnings.sh b/maintainer/CI/doc_warnings.sh
index dc084d1313d..d9f4eac093a 100755
--- a/maintainer/CI/doc_warnings.sh
+++ b/maintainer/CI/doc_warnings.sh
@@ -31,7 +31,7 @@
# not enclosed within tags. Sphinx doesn't use line
# wrapping, so these broken links can be found via text search. The first
# negative lookahead filters out common Python types (for performance reasons).
-regex_sphinx_broken_link='(?!(int|float|complex|bool|str|bytes|array|bytearray|memoryview|object|list|tuple|range|slice|dict|set|frozenset|(?:numpy\.|np\.)?(?:nd)?array)<)[^<>]+?
(?!)'
+regex_sphinx_broken_link='(?!(int|float|complex|bool|str|bytes|array|bytearray|memoryview|object|list|tuple|range|slice|dict|set|frozenset|(?:numpy\.|np\.)?(?:nd)?array|EnumType|IntEnum|StrEnum|ReprEnum|Enum|IntFlag|Flag)<)[^<>]+?
(?!)'
if [ ! -f doc/sphinx/html/index.html ]; then
echo "Please run Sphinx first."
diff --git a/maintainer/CI/jupyter_warnings.py b/maintainer/CI/jupyter_warnings.py
index 520754b73ec..8c7b6ec161a 100755
--- a/maintainer/CI/jupyter_warnings.py
+++ b/maintainer/CI/jupyter_warnings.py
@@ -23,9 +23,11 @@
"""
import sys
+import json
import pathlib
import lxml.etree
+import jupyter_core.paths
import nbformat
import nbconvert
@@ -58,7 +60,15 @@ def detect_invalid_urls(nb, build_root='.', html_exporter=None):
'''
# convert notebooks to HTML
if html_exporter is None:
- html_exporter = nbconvert.HTMLExporter()
+ kwargs = {}
+ for path in jupyter_core.paths.jupyter_config_path():
+ filepath = pathlib.Path(path) / "jupyter_nbconvert_config.json"
+ if filepath.is_file():
+ with open(filepath) as f:
+ config = json.load(f)
+ kwargs = config.get("HTMLExporter", {})
+ break
+ html_exporter = nbconvert.HTMLExporter(**kwargs)
html_exporter.template_name = 'classic'
html_string = html_exporter.from_notebook_node(nb)[0]
# parse HTML
diff --git a/maintainer/CI/run_lcov.sh b/maintainer/CI/run_lcov.sh
new file mode 100755
index 00000000000..22052acf45b
--- /dev/null
+++ b/maintainer/CI/run_lcov.sh
@@ -0,0 +1,45 @@
+#!/usr/bin/env sh
+#
+# Copyright (C) 2017-2024 The ESPResSo project
+#
+# This file is part of ESPResSo.
+#
+# ESPResSo is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# ESPResSo is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program. If not, see .
+#
+
+set -e
+
+output="${1:-coverage.info}"
+bindir="$(realpath .)"
+srcdir="$(sed -nr "s/^ESPResSo_SOURCE_DIR:STATIC=(.+)/\1/p" "${bindir}/CMakeCache.txt")"
+
+if [ "${srcdir}" = "" ]; then
+ echo "Cannot extract ESPResSo_SOURCE_DIR variable from the CMake cache" >&2
+ exit 2
+fi
+
+lcov --gcov-tool "${GCOV:-gcov}" \
+ --quiet \
+ --ignore-errors graph,mismatch,mismatch,gcov,unused \
+ --directory . \
+ --filter brace,blank,range,region \
+ --capture \
+ --rc lcov_json_module="JSON::XS" \
+ --exclude "/usr/*" \
+ --exclude "*/tmpxft_*cudafe1.stub.*" \
+ --exclude "${bindir}/_deps/*" \
+ --exclude "${bindir}/src/python/espressomd/*" \
+ --exclude "${srcdir}/src/walberla_bridge/src/*/generated_kernels/*" \
+ --exclude "${srcdir}/libs/*" \
+ --output-file "${output}"
diff --git a/maintainer/benchmarks/benchmarks.py b/maintainer/benchmarks/benchmarks.py
index 6182c9eafa1..3d39d2fd1dc 100644
--- a/maintainer/benchmarks/benchmarks.py
+++ b/maintainer/benchmarks/benchmarks.py
@@ -84,7 +84,7 @@ def get_timings(system, n_steps, n_iterations, verbose=True):
energy = system.analysis.energy()["total"]
verlet = system.cell_system.get_state()["verlet_reuse"]
print(
- f"step {i}, time: {1000*t:.1f} ms, verlet: {verlet:.2f}, energy: {energy:.2e}")
+ f"step {i}, time: {1000 * t:.1f} ms, verlet: {verlet:.2f}, energy: {energy:.2e}")
return np.array(timings)
@@ -131,7 +131,7 @@ def write_report(filepath, n_proc, timings, n_steps, label=''):
cmd = " ".join(x for x in sys.argv[1:] if not x.startswith("--output"))
avg, ci = get_average_time(timings)
header = '"script","arguments","cores","mean","ci","nsteps","duration","label"\n'
- report = f'"{script}","{cmd}",{n_proc},{avg:.3e},{ci:.3e},{n_steps},{np.sum(timings):.1f},"{label}"\n'
+ report = f'"{script}","{cmd}",{n_proc},{avg:.3e},{ci:.3e},{n_steps},{np.sum(timings):.1f},"{label}"\n' # nopep8
if pathlib.Path(filepath).is_file():
header = ''
with open(filepath, "a") as f:
diff --git a/maintainer/benchmarks/mc_acid_base_reservoir.py b/maintainer/benchmarks/mc_acid_base_reservoir.py
index edecf9bb68f..1eb87804fc2 100644
--- a/maintainer/benchmarks/mc_acid_base_reservoir.py
+++ b/maintainer/benchmarks/mc_acid_base_reservoir.py
@@ -24,7 +24,6 @@
import espressomd
import espressomd.electrostatics
import espressomd.reaction_methods
-import pkg_resources
import argparse
parser = argparse.ArgumentParser(description="Benchmark MC simulations in the grand-reaction ensemble. "
@@ -45,8 +44,6 @@
# process and check arguments
assert args.particles_per_core >= 100, "you need to use at least 100 particles per core to avoid finite-size effects in the simulation"
espressomd.assert_features(['WCA', 'ELECTROSTATICS'])
-assert pkg_resources.packaging.specifiers.SpecifierSet('>=0.10.1').contains(pint.__version__), \
- f'pint version {pint.__version__} is too old: several numpy operations can cast away the unit'
def calc_ideal_alpha(pH, pKa):
@@ -96,7 +93,7 @@ def calc_donnan_coefficient(c_acid, I_res, charge=-1):
NUM_SAMPLES = 100
INTEGRATION_STEPS_PER_SAMPLE = 100
assert TOTAL_NUM_MC_STEPS % NUM_SAMPLES == 0, \
- f"Total number of MC steps must be divisible by total number of samples, got {TOTAL_NUM_MC_STEPS} and {NUM_SAMPLES}"
+ f"Total number of MC steps must be divisible by total number of samples, got {TOTAL_NUM_MC_STEPS} and {NUM_SAMPLES}" # nopep8
MC_STEPS_PER_SAMPLE = TOTAL_NUM_MC_STEPS // NUM_SAMPLES
# definitions of reduced units
@@ -269,7 +266,8 @@ def report_progress(system, i, next_i):
n_All = len(system.part)
if i == next_i:
print(
- f"run {i:d} time {system.time:.3g} completed {i / NUM_SAMPLES * 100:.0f}%",
+ f"run {i:d} time {system.time:.3g} completed "
+ f"{i / NUM_SAMPLES * 100:.0f}%",
f"instantaneous values: All {n_All:d} Na {n_Na:d} Cl {n_Cl:d}",
f"A {n_A:d} alpha {n_A / N_ACID:.3f}")
if i == 0:
@@ -311,7 +309,8 @@ def report_progress(system, i, next_i):
energy = system.analysis.energy()["total"]
verlet = system.cell_system.get_state()["verlet_reuse"]
print(
- f"step {i}, time MD: {t_MD:.2e}, time MC: {t_MC:.2e}, verlet: {verlet:.2f}, energy: {energy:.2e}")
+ f"step {i}, time MD: {t_MD:.2e}, time MC: {t_MC:.2e}, "
+ f"verlet: {verlet:.2f}, energy: {energy:.2e}")
# average time
avg_MC, ci_MC = benchmarks.get_average_time(timings_MC)
diff --git a/maintainer/check_features.py b/maintainer/check_features.py
index 05a411e97ef..a102c0d1ee6 100755
--- a/maintainer/check_features.py
+++ b/maintainer/check_features.py
@@ -26,7 +26,7 @@
import featuredefs
if len(sys.argv) != 2:
- print("Usage: %s FILE" % sys.argv[0])
+ print(f"Usage: {sys.argv[0]} FILE")
exit(2)
fdefs = featuredefs.defs(sys.argv[1])
diff --git a/maintainer/configs/maxset.hpp b/maintainer/configs/maxset.hpp
index ccce79f02cf..de5775c1bf7 100644
--- a/maintainer/configs/maxset.hpp
+++ b/maintainer/configs/maxset.hpp
@@ -28,9 +28,6 @@
#define DPD
#define ELECTROSTATICS
-#ifdef CUDA
-#define MMM1D_GPU
-#endif
#define DIPOLES
#ifdef SCAFACOS
#define SCAFACOS_DIPOLES
diff --git a/maintainer/format/autopep8.sh b/maintainer/format/autopep8.sh
index c59ee8d694d..175910909f9 100755
--- a/maintainer/format/autopep8.sh
+++ b/maintainer/format/autopep8.sh
@@ -1,4 +1,5 @@
#!/bin/sh
+#
# Copyright (C) 2018-2022 The ESPResSo project
#
# This file is part of ESPResSo.
@@ -15,10 +16,11 @@
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see .
+#
-AUTOPEP8_VER=1.6.0
-PYCODESTYLE_VER=2.8.0
+AUTOPEP8_VER=2.1.0
+PYCODESTYLE_VER=2.11.1
python3 -m autopep8 --help 2>&1 > /dev/null
if [ "$?" = "0" ]; then
diff --git a/maintainer/format/clang-format.sh b/maintainer/format/clang-format.sh
index 0ee45b41a6c..347c9cbb879 100755
--- a/maintainer/format/clang-format.sh
+++ b/maintainer/format/clang-format.sh
@@ -1,4 +1,5 @@
#!/bin/sh
+#
# Copyright (C) 2018-2022 The ESPResSo project
#
# This file is part of ESPResSo.
@@ -15,8 +16,9 @@
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see .
+#
-CLANG_FORMAT_VER=14.0
+CLANG_FORMAT_VER=18.1
if hash clang-format-${CLANG_FORMAT_VER} 2>/dev/null; then
CLANGFORMAT="$(which clang-format-${CLANG_FORMAT_VER})"
elif hash clang-format-${CLANG_FORMAT_VER%.*} 2>/dev/null; then
diff --git a/maintainer/format/cmake-format.sh b/maintainer/format/cmake-format.sh
index edc73cf6795..e00ea203157 100755
--- a/maintainer/format/cmake-format.sh
+++ b/maintainer/format/cmake-format.sh
@@ -1,4 +1,5 @@
#!/bin/sh
+#
# Copyright (C) 2018-2022 The ESPResSo project
#
# This file is part of ESPResSo.
@@ -15,6 +16,7 @@
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see .
+#
CMAKE_FORMAT_VER=0.6.13
python3 -m cmakelang.format 2>&1 > /dev/null
diff --git a/maintainer/format/ex_flag.sh b/maintainer/format/ex_flag.sh
index b14c9356637..6b58e2a4bdc 100755
--- a/maintainer/format/ex_flag.sh
+++ b/maintainer/format/ex_flag.sh
@@ -1,4 +1,5 @@
#!/bin/sh
+#
# Copyright (C) 2018-2022 The ESPResSo project
#
# This file is part of ESPResSo.
@@ -15,6 +16,6 @@
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see .
-
+#
chmod -x "$@"
diff --git a/maintainer/lint/pre_commit.sh b/maintainer/lint/pre_commit.sh
index 96eb4f9ffaf..3d7b2d4d445 100755
--- a/maintainer/lint/pre_commit.sh
+++ b/maintainer/lint/pre_commit.sh
@@ -1,4 +1,5 @@
#!/bin/sh
+#
# Copyright (C) 2018-2022 The ESPResSo project
#
# This file is part of ESPResSo.
@@ -15,9 +16,9 @@
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see .
+#
-
-python3 -m pre_commit 2>&1 >/dev/null
+pre-commit 2>&1 >/dev/null
if [ "$?" = "0" ]; then
precommit="python3 -m pre_commit"
else
diff --git a/maintainer/lint/pylint.sh b/maintainer/lint/pylint.sh
index fc17a66e654..f8ca595a97c 100755
--- a/maintainer/lint/pylint.sh
+++ b/maintainer/lint/pylint.sh
@@ -1,5 +1,6 @@
#!/bin/sh
-# Copyright (C) 2018-2022 The ESPResSo project
+#
+# Copyright (C) 2018-2024 The ESPResSo project
#
# This file is part of ESPResSo.
#
@@ -15,6 +16,7 @@
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see .
+#
python3 -m pylint --help 2>&1 > /dev/null
diff --git a/maintainer/walberla_kernels/Readme.md b/maintainer/walberla_kernels/Readme.md
index 37ecd2a7cea..ef7f2fb0c5e 100644
--- a/maintainer/walberla_kernels/Readme.md
+++ b/maintainer/walberla_kernels/Readme.md
@@ -20,7 +20,7 @@ The kernels can be regenerated with this shell script:
```sh
# adapt these paths to the build environment
-export VERSION=1.2
+export VERSION=1.3.3
export DEPS="${HOME}/walberla_deps"
export PYTHONPATH="${DEPS}/${VERSION}/lbmpy:${DEPS}/${VERSION}/pystencils:${DEPS}/devel/walberla/python/"
@@ -34,6 +34,8 @@ function generate_ek_kernels {
function format_lb_kernels {
$(git rev-parse --show-toplevel)/maintainer/format/clang-format.sh -i *.h
$(git rev-parse --show-toplevel)/maintainer/format/clang-format.sh -i *.cpp -style "{Language: Cpp, ColumnLimit: 0}"
+ $(git rev-parse --show-toplevel)/maintainer/format/clang-format.sh -i *.cu -style "{Language: Cpp, ColumnLimit: 0}"
+ $(git rev-parse --show-toplevel)/maintainer/format/clang-format.sh -i *.cuh -style "{Language: Cpp}"
}
function format_ek_kernels {
$(git rev-parse --show-toplevel)/maintainer/format/clang-format.sh -i *.h
@@ -44,7 +46,10 @@ function format_ek_kernels {
cd $(git rev-parse --show-toplevel)/src/walberla_bridge/src/lattice_boltzmann/generated_kernels/
generate_lb_kernels
generate_lb_kernels --single-precision
+generate_lb_kernels --gpu
+generate_lb_kernels --gpu --single-precision
format_lb_kernels
+git diff src/walberla_bridge/src/lattice_boltzmann/generated_kernels/Dynamic_UBB_*CUDA*.cu # verify pragmas
# EK kernels
cd $(git rev-parse --show-toplevel)/src/walberla_bridge/src/electrokinetics/generated_kernels/
@@ -54,6 +59,10 @@ format_ek_kernels
mv ReactionKernel*.{cpp,h} $(git rev-parse --show-toplevel)/src/walberla_bridge/src/electrokinetics/reactions/generated_kernels/
```
+The code generation is not deterministic, therefore the list of changes might
+be quite large. If you only adapted a few lines in a specific template file,
+then you only need to commit the corresponding output files.
+
WARNING: The code generation sorts the arguments alphabetically by symbol name.
If you rename something, you may have to adapt the order of arguments in the
calling code!
diff --git a/maintainer/walberla_kernels/code_generation_context.py b/maintainer/walberla_kernels/code_generation_context.py
index 583a3fca578..77f86183e0e 100644
--- a/maintainer/walberla_kernels/code_generation_context.py
+++ b/maintainer/walberla_kernels/code_generation_context.py
@@ -19,7 +19,6 @@
import os
import re
-import jinja2
import hashlib
import lbmpy
import lbmpy_walberla
@@ -27,48 +26,6 @@
import pystencils_walberla
-def adapt_pystencils():
- """
- Adapt pystencils to the SFINAE method (add the block offset lambda
- callback and the time_step increment).
- """
- old_add_pystencils_filters_to_jinja_env = pystencils_walberla.codegen.add_pystencils_filters_to_jinja_env
-
- def new_add_pystencils_filters_to_jinja_env(jinja_env):
- # save original pystencils to adapt
- old_add_pystencils_filters_to_jinja_env(jinja_env)
- old_generate_members = jinja_env.filters["generate_members"]
- old_generate_refs_for_kernel_parameters = jinja_env.filters[
- "generate_refs_for_kernel_parameters"]
-
- @jinja2.pass_context
- def new_generate_members(*args, **kwargs):
- output = old_generate_members(*args, **kwargs)
- token = " block_offset_0_;"
- if token in output:
- i = output.index(token)
- vartype = output[:i].split("\n")[-1].strip()
- output += f"\nstd::function block_offset_generator = [](IBlock * const, {vartype}&, {vartype}&, {vartype}&) {{ }};"
- return output
-
- def new_generate_refs_for_kernel_parameters(*args, **kwargs):
- output = old_generate_refs_for_kernel_parameters(*args, **kwargs)
- if "block_offset_0" in output:
- old_token = "auto & block_offset_"
- new_token = "auto block_offset_"
- assert output.count(old_token) == 3, \
- f"could not find '{old_token}' in '''\n{output}\n'''"
- output = output.replace(old_token, new_token)
- output += "\nblock_offset_generator(block, block_offset_0, block_offset_1, block_offset_2);"
- return output
-
- # replace pystencils
- jinja_env.filters["generate_members"] = new_generate_members
- jinja_env.filters["generate_refs_for_kernel_parameters"] = new_generate_refs_for_kernel_parameters
-
- pystencils_walberla.codegen.add_pystencils_filters_to_jinja_env = new_add_pystencils_filters_to_jinja_env
-
-
def earmark_generated_kernels():
"""
Add an earmark at the beginning of generated kernels to document the
@@ -83,12 +40,13 @@ def earmark_generated_kernels():
walberla_commit = f.read()
token = "// kernel generated with"
earmark = (
- f"{token} pystencils v{pystencils.__version__}, lbmpy v{lbmpy.__version__}, "
- f"lbmpy_walberla/pystencils_walberla from waLBerla commit {walberla_commit}"
+ f"{token} pystencils v{pystencils.__version__}, "
+ f"lbmpy v{lbmpy.__version__}, "
+ f"lbmpy_walberla/pystencils_walberla from "
+ f"waLBerla commit {walberla_commit}"
)
for filename in os.listdir("."):
- if not filename.endswith(
- ".tmpl.h") and filename.endswith((".h", ".cpp", ".cu")):
+ if filename.endswith((".h", ".cpp", ".cu", ".cuh")):
with open(filename, "r+") as f:
content = f.read()
if token not in content:
@@ -100,7 +58,7 @@ def earmark_generated_kernels():
pos = content.find("//=====", 5)
pos = content.find("\n", pos) + 1
f.seek(pos)
- f.write(f"\n{earmark}\n{content[pos:]}")
+ f.write(f"\n{earmark}\n{content[pos:].rstrip()}\n")
def guard_generated_kernels_clang_format():
@@ -117,9 +75,9 @@ def guard_generated_kernels_clang_format():
if not all_ns:
continue
for ns in all_ns:
- content = re.sub(rf"(?<=[^a-zA-Z0-9_]){ns}(?=[^a-zA-Z0-9_])",
- f"internal_{hashlib.md5(ns.encode('utf-8')).hexdigest()}",
- content)
+ ns_hash = hashlib.md5(ns.encode('utf-8')).hexdigest()
+ content = re.sub(f"(?<=[^a-zA-Z0-9_]){ns}(?=[^a-zA-Z0-9_])",
+ f"internal_{ns_hash}", content)
with open(filename, "w") as f:
f.write(content)
@@ -138,7 +96,6 @@ def __init__(self):
sys.argv = sys.argv[:1]
super().__init__()
sys.argv = old_sys_argv
- adapt_pystencils()
def __exit__(self, *args, **kwargs):
super().__exit__(*args, **kwargs)
diff --git a/maintainer/walberla_kernels/custom_additional_extensions.py b/maintainer/walberla_kernels/custom_additional_extensions.py
index 3ff0b83cdd2..1b09ea483cf 100644
--- a/maintainer/walberla_kernels/custom_additional_extensions.py
+++ b/maintainer/walberla_kernels/custom_additional_extensions.py
@@ -88,10 +88,8 @@ def __call__(self, field, direction_symbol, index_field, **kwargs):
conds = [
sp.Equality(
direction_symbol,
- ps.typing.CastFunc(
- d + 1,
- np.int32)) for d in range(
- len(accesses))]
+ ps.typing.CastFunc(d + 1, np.int32))
+ for d in range(len(accesses))]
# use conditional
conditional = None
@@ -137,8 +135,7 @@ def __init__(self, stencil, boundary_object):
@property
def constructor_arguments(self):
- return f", std::function<{self.data_type}(const Cell &, const shared_ptr&, IBlock&)>& " \
- "dirichletCallback "
+ return f", std::function<{self.data_type}(const Cell &, const shared_ptr&, IBlock&)>& dirichletCallback " # nopep8
@property
def initialiser_list(self):
@@ -153,15 +150,15 @@ def additional_parameters_for_fill_function(self):
return " const shared_ptr &blocks, "
def data_initialisation(self, _):
- init_list = [f"{self.data_type} InitialisatonAdditionalData = elementInitaliser(Cell(it.x(), it.y(), it.z()), "
- "blocks, *block);", "element.value = InitialisatonAdditionalData;"]
+ init_list = [
+ f"{self.data_type} InitialisatonAdditionalData = elementInitaliser(Cell(it.x(), it.y(), it.z()), blocks, *block);", # nopep8
+ "element.value = InitialisatonAdditionalData;"]
return "\n".join(init_list)
@property
def additional_member_variable(self):
- return f"std::function<{self.data_type}(const Cell &, const shared_ptr&, IBlock&)> " \
- "elementInitaliser; "
+ return f"std::function<{self.data_type}(const Cell &, const shared_ptr&, IBlock&)> elementInitaliser; " # nopep8
class FluxAdditionalDataHandler(
@@ -174,8 +171,7 @@ def __init__(self, stencil, boundary_object):
@property
def constructor_arguments(self):
- return f", std::function(const Cell &, const shared_ptr&, IBlock&)>& " \
- "fluxCallback "
+ return f", std::function(const Cell &, const shared_ptr&, IBlock&)>& fluxCallback " # nopep8
@property
def initialiser_list(self):
@@ -193,8 +189,8 @@ def data_initialisation(self, direction):
dirVec = self.stencil_info[direction][1]
init_list = [
- f"Vector3<{self.data_type}> InitialisatonAdditionalData = elementInitaliser(Cell(it.x() + {dirVec[0]}, it.y() + {dirVec[1]}, it.z() + {dirVec[2]}), "
- "blocks, *block);", "element.flux_0 = InitialisatonAdditionalData[0];",
+ f"Vector3<{self.data_type}> InitialisatonAdditionalData = elementInitaliser(Cell(it.x() + {dirVec[0]}, it.y() + {dirVec[1]}, it.z() + {dirVec[2]}), blocks, *block);", # nopep8
+ "element.flux_0 = InitialisatonAdditionalData[0];",
"element.flux_1 = InitialisatonAdditionalData[1];"]
if self._dim == 3:
init_list.append(
@@ -204,13 +200,11 @@ def data_initialisation(self, direction):
@property
def additional_member_variable(self):
- return f"std::function(const Cell &, const shared_ptr&, IBlock&)> " \
- "elementInitaliser; "
+ return f"std::function(const Cell &, const shared_ptr&, IBlock&)> elementInitaliser; " # nopep8
-# this custom boundary generator is necessary because our boundary
-# condition writes to several fields at once which is impossible with the
-# shipped one
+# this custom boundary generator is necessary because our boundary condition
+# writes to several fields at once which is impossible with the shipped one
def generate_boundary(
generation_context,
stencil,
@@ -250,9 +244,8 @@ def generate_boundary(
index_struct_dtype,
layout=[0],
shape=(
- ps.typing.TypedSymbol(
- "indexVectorSize", ps.typing.BasicType(np.int32)
- ),
+ ps.typing.TypedSymbol("indexVectorSize",
+ ps.typing.BasicType(np.int32)),
1,
),
strides=(1, 1),
diff --git a/maintainer/walberla_kernels/generate_ek_kernels.py b/maintainer/walberla_kernels/generate_ek_kernels.py
index e54f19023f0..54a20a107e7 100644
--- a/maintainer/walberla_kernels/generate_ek_kernels.py
+++ b/maintainer/walberla_kernels/generate_ek_kernels.py
@@ -22,6 +22,7 @@
import sympy as sp
import lbmpy
import argparse
+import packaging.specifiers
import pystencils_espresso
import code_generation_context
@@ -35,6 +36,12 @@
help='Use single-precision')
args = parser.parse_args()
+# Make sure we have the correct versions of the required dependencies
+for module, requirement in [(ps, "==1.2"), (lbmpy, "==1.2")]:
+ assert packaging.specifiers.SpecifierSet(requirement).contains(module.__version__), \
+ f"{module.__name__} version {module.__version__} " \
+ f"doesn't match requirement {requirement}"
+
double_precision: bool = not args.single_precision
data_type_cpp = "double" if double_precision else "float"
diff --git a/maintainer/walberla_kernels/generate_lb_kernels.py b/maintainer/walberla_kernels/generate_lb_kernels.py
index dc3083450b4..9afd75925c1 100644
--- a/maintainer/walberla_kernels/generate_lb_kernels.py
+++ b/maintainer/walberla_kernels/generate_lb_kernels.py
@@ -18,9 +18,10 @@
#
import argparse
-import pkg_resources
+import packaging.specifiers
import sympy as sp
+import numpy as np
import pystencils as ps
import pystencils_walberla
@@ -52,9 +53,10 @@
target = ps.Target.CPU
# Make sure we have the correct versions of the required dependencies
-for module, requirement in [(ps, "==1.2"), (lbmpy, "==1.2")]:
- assert pkg_resources.packaging.specifiers.SpecifierSet(requirement).contains(module.__version__), \
- f"{module.__name__} version {module.__version__} doesn't match requirement {requirement}"
+for module, requirement in [(ps, "==1.3.3"), (lbmpy, "==1.3.3")]:
+ assert packaging.specifiers.SpecifierSet(requirement).contains(module.__version__), \
+ f"{module.__name__} version {module.__version__} " \
+ f"doesn't match requirement {requirement}"
def paramlist(parameters, keys):
@@ -66,6 +68,7 @@ def paramlist(parameters, keys):
with code_generation_context.CodeGeneration() as ctx:
ctx.double_accuracy = not args.single_precision
if target == ps.Target.GPU:
+ ctx.gpu = True
ctx.cuda = True
# vectorization parameters
@@ -115,7 +118,7 @@ def paramlist(parameters, keys):
# generate initial densities
for params, target_suffix in paramlist(parameters, (default_key,)):
- pystencils_walberla.codegen.generate_sweep(
+ pystencils_walberla.generate_sweep(
ctx,
f"InitialPDFsSetter{precision_prefix}{target_suffix}",
pystencils_espresso.generate_setters(ctx, method, params),
@@ -146,38 +149,44 @@ def paramlist(parameters, keys):
params
)
+ block_offsets = tuple(
+ ps.TypedSymbol(f"block_offset_{i}", np.uint32)
+ for i in range(3))
+
# generate thermalized LB
collision_rule_thermalized = lbmpy.creationfunctions.create_lb_collision_rule(
method,
zero_centered=False,
fluctuating={
"temperature": kT,
- "block_offsets": "walberla",
+ "block_offsets": block_offsets,
"rng_node": precision_rng
},
optimization={"cse_global": True,
"double_precision": ctx.double_accuracy}
)
for params, target_suffix in paramlist(parameters, ("GPU", "CPU", "AVX")):
+ stem = f"CollideSweep{precision_prefix}Thermalized{target_suffix}"
pystencils_espresso.generate_collision_sweep(
ctx,
method,
collision_rule_thermalized,
- f"CollideSweep{precision_prefix}Thermalized{target_suffix}",
- params
+ stem,
+ params,
+ block_offset=block_offsets,
)
# generate accessors
for _, target_suffix in paramlist(parameters, ("GPU", "CPU")):
- filename = f"FieldAccessors{precision_prefix}{target_suffix}"
+ stem = f"FieldAccessors{precision_prefix}{target_suffix}"
if target == ps.Target.GPU:
templates = {
- f"{filename}.h": "templates/FieldAccessors.tmpl.cuh",
- f"{filename}.cu": "templates/FieldAccessors.tmpl.cu",
+ f"{stem}.cuh": "templates/FieldAccessors.tmpl.cuh",
+ f"{stem}.cu": "templates/FieldAccessors.tmpl.cu",
}
else:
templates = {
- f"{filename}.h": "templates/FieldAccessors.tmpl.h",
+ f"{stem}.h": "templates/FieldAccessors.tmpl.h",
}
walberla_lbm_generation.generate_macroscopic_values_accessors(
ctx, config, method, templates
diff --git a/maintainer/walberla_kernels/lbmpy_espresso.py b/maintainer/walberla_kernels/lbmpy_espresso.py
index 5055fac308c..8a755d347b0 100644
--- a/maintainer/walberla_kernels/lbmpy_espresso.py
+++ b/maintainer/walberla_kernels/lbmpy_espresso.py
@@ -19,8 +19,8 @@
import pystencils as ps
-import lbmpy.advanced_streaming.indexing
import lbmpy.boundaries
+import lbmpy.custom_code_nodes
import lbmpy_walberla.additional_data_handler
@@ -39,15 +39,20 @@ def data_initialisation(self, direction):
This way, the dynamic UBB can be used to implement a LB boundary.
'''
code = super().data_initialisation(direction)
- dirVec = self.stencil_info[direction][1]
- token = ' = elementInitaliser(Cell(it.x(){}, it.y(){}, it.z(){}),'
- old_initialiser = token.format('', '', '')
- assert old_initialiser in code
- new_initialiser = token.format(
- '+' + str(dirVec[0]),
- '+' + str(dirVec[1]),
- '+' + str(dirVec[2])).replace('+-', '-')
- return code.replace(old_initialiser, new_initialiser)
+ assert "InitialisationAdditionalData" in code
+ assert "elementInitialiser" in code
+ assert "element.vel_0" in code
+ bb_vec = self.stencil_info[direction][1]
+ cell_args = [f"it.{direction}() + {bb_vec[i]}".replace('+ -', '-')
+ for i, direction in enumerate("xyz")]
+ code = [
+ "auto const InitialisationAdditionalData = elementInitialiser(",
+ f"Cell({', '.join(cell_args)}), blocks, *block);",
+ "element.vel_0 = InitialisationAdditionalData[0];",
+ "element.vel_1 = InitialisationAdditionalData[1];",
+ "element.vel_2 = InitialisationAdditionalData[2];",
+ ]
+ return "\n".join(code)
class UBB(lbmpy.boundaries.UBB):
@@ -71,7 +76,7 @@ def __call__(self, f_out, f_in, dir_symbol,
if len(assignments) > 1:
out.extend(assignments[:-1])
- neighbor_offset = lbmpy.advanced_streaming.indexing.NeighbourOffsetArrays.neighbour_offset(
+ neighbor_offset = lbmpy.custom_code_nodes.NeighbourOffsetArrays.neighbour_offset(
dir_symbol, lb_method.stencil)
assignment = assignments[-1]
diff --git a/maintainer/walberla_kernels/pystencils_espresso.py b/maintainer/walberla_kernels/pystencils_espresso.py
index 1980ba14387..91350eb7248 100644
--- a/maintainer/walberla_kernels/pystencils_espresso.py
+++ b/maintainer/walberla_kernels/pystencils_espresso.py
@@ -23,12 +23,13 @@
import lbmpy.updatekernels
import pystencils as ps
import pystencils_walberla
+import pystencils_walberla.utility
def skip_philox_unthermalized(code, result_symbols, rng_name):
for r in result_symbols:
statement = f" {r.name};"
- assert statement in code, f"no declaration for variable '{r.name}' in '{code}'"
+ assert statement in code, f"no declaration for variable '{r.name}' in '{code}'" # nopep8
code = code.replace(statement, f" {r.name}{{}};", 1)
statement = f"{rng_name}("
assert code.count(statement) == 1, f"need 1 '{rng_name}' call in '{code}'"
@@ -107,11 +108,11 @@ def generate_fields(config, stencil):
def generate_config(ctx, params):
- return pystencils_walberla.codegen.config_from_context(ctx, **params)
+ return pystencils_walberla.utility.config_from_context(ctx, **params)
def generate_collision_sweep(
- ctx, lb_method, collision_rule, class_name, params):
+ ctx, lb_method, collision_rule, class_name, params, **kwargs):
config = generate_config(ctx, params)
# Symbols for PDF (twice, due to double buffering)
@@ -127,8 +128,8 @@ def generate_collision_sweep(
collide_update_rule, config=config, **params)
collide_ast.function_name = 'kernel_collide'
collide_ast.assumed_inner_stride_one = True
- pystencils_walberla.codegen.generate_sweep(
- ctx, class_name, collide_ast, **params)
+ pystencils_walberla.generate_sweep(
+ ctx, class_name, collide_ast, **params, **kwargs)
def generate_stream_sweep(ctx, lb_method, class_name, params):
@@ -144,7 +145,7 @@ def generate_stream_sweep(ctx, lb_method, class_name, params):
stream_ast = ps.create_kernel(stream_update_rule, config=config, **params)
stream_ast.function_name = 'kernel_stream'
stream_ast.assumed_inner_stride_one = True
- pystencils_walberla.codegen.generate_sweep(
+ pystencils_walberla.generate_sweep(
ctx, class_name, stream_ast,
field_swaps=[(fields['pdfs'], fields['pdfs_tmp'])], **params)
diff --git a/maintainer/walberla_kernels/templates/Boundary.tmpl.h b/maintainer/walberla_kernels/templates/Boundary.tmpl.h
index bdeaf57c06d..a51c9c10b58 100644
--- a/maintainer/walberla_kernels/templates/Boundary.tmpl.h
+++ b/maintainer/walberla_kernels/templates/Boundary.tmpl.h
@@ -31,8 +31,8 @@
{% if target is equalto 'cpu' -%}
#include
{%- elif target is equalto 'gpu' -%}
-#include
-#include
+#include
+#include
{%- endif %}
#include
#include
@@ -49,12 +49,14 @@
#include {{header}}
{% endfor %}
-#ifdef __GNUC__
-#define RESTRICT __restrict__
-#elif _MSC_VER
-#define RESTRICT __restrict
-#else
-#define RESTRICT
+#if defined(__clang__)
+#pragma clang diagnostic push
+#pragma clang diagnostic ignored "-Wunused-variable"
+#pragma clang diagnostic ignored "-Wunused-parameter"
+#elif defined(__GNUC__) or defined(__GNUG__)
+#pragma GCC diagnostic push
+#pragma GCC diagnostic ignored "-Wunused-variable"
+#pragma GCC diagnostic ignored "-Wunused-parameter"
#endif
namespace walberla {
@@ -85,7 +87,7 @@ class {{class_name}}
{% if target == 'gpu' -%}
~IndexVectors() {
for( auto & gpuVec: gpuVectors_)
- cudaFree( gpuVec );
+ gpuFree( gpuVec );
}
{% endif -%}
@@ -100,7 +102,7 @@ class {{class_name}}
{
{% if target == 'gpu' -%}
for( auto & gpuVec: gpuVectors_)
- cudaFree( gpuVec );
+ gpuFree( gpuVec );
gpuVectors_.resize( cpuVectors_.size() );
WALBERLA_ASSERT_EQUAL(cpuVectors_.size(), NUM_TYPES);
@@ -108,8 +110,8 @@ class {{class_name}}
{
auto & gpuVec = gpuVectors_[i];
auto & cpuVec = cpuVectors_[i];
- cudaMalloc( &gpuVec, sizeof({{StructName}}) * cpuVec.size() );
- cudaMemcpy( gpuVec, &cpuVec[0], sizeof({{StructName}}) * cpuVec.size(), cudaMemcpyHostToDevice );
+ gpuMalloc( &gpuVec, sizeof({{StructName}}) * cpuVec.size() );
+ gpuMemcpy( gpuVec, &cpuVec[0], sizeof({{StructName}}) * cpuVec.size(), gpuMemcpyHostToDevice );
}
{%- endif %}
}
@@ -136,12 +138,12 @@ class {{class_name}}
{};
void run (
- {{- ["IBlock * block", kernel.kernel_selection_parameters, ["cudaStream_t stream = nullptr"] if target == 'gpu' else []] | type_identifier_list -}}
+ {{- ["IBlock * block", kernel.kernel_selection_parameters, ["gpuStream_t stream = nullptr"] if target == 'gpu' else []] | type_identifier_list -}}
);
{% if generate_functor -%}
void operator() (
- {{- ["IBlock * block", kernel.kernel_selection_parameters, ["cudaStream_t stream = nullptr"] if target == 'gpu' else []] | type_identifier_list -}}
+ {{- ["IBlock * block", kernel.kernel_selection_parameters, ["gpuStream_t stream = nullptr"] if target == 'gpu' else []] | type_identifier_list -}}
)
{
run( {{- ["block", kernel.kernel_selection_parameters, ["stream"] if target == 'gpu' else []] | identifier_list -}} );
@@ -149,28 +151,28 @@ class {{class_name}}
{%- endif %}
void inner (
- {{- ["IBlock * block", kernel.kernel_selection_parameters, ["cudaStream_t stream = nullptr"] if target == 'gpu' else []] | type_identifier_list -}}
+ {{- ["IBlock * block", kernel.kernel_selection_parameters, ["gpuStream_t stream = nullptr"] if target == 'gpu' else []] | type_identifier_list -}}
);
void outer (
- {{- ["IBlock * block", kernel.kernel_selection_parameters, ["cudaStream_t stream = nullptr"] if target == 'gpu' else []] | type_identifier_list -}}
+ {{- ["IBlock * block", kernel.kernel_selection_parameters, ["gpuStream_t stream = nullptr"] if target == 'gpu' else []] | type_identifier_list -}}
);
- std::function getSweep( {{- [interface_spec.high_level_args, ["cudaStream_t stream = nullptr"] if target == 'gpu' else []] | type_identifier_list -}} )
+ std::function getSweep( {{- [interface_spec.high_level_args, ["gpuStream_t stream = nullptr"] if target == 'gpu' else []] | type_identifier_list -}} )
{
return [ {{- ["this", interface_spec.high_level_args, ["stream"] if target == 'gpu' else []] | identifier_list -}} ]
(IBlock * b)
{ this->run( {{- [ ['b'], interface_spec.mapping_codes, ["stream"] if target == 'gpu' else [] ] | identifier_list -}} ); };
}
- std::function getInnerSweep( {{- [interface_spec.high_level_args, ["cudaStream_t stream = nullptr"] if target == 'gpu' else []] | type_identifier_list -}} )
+ std::function getInnerSweep( {{- [interface_spec.high_level_args, ["gpuStream_t stream = nullptr"] if target == 'gpu' else []] | type_identifier_list -}} )
{
return [ {{- [ ['this'], interface_spec.high_level_args, ["stream"] if target == 'gpu' else [] ] | identifier_list -}} ]
(IBlock * b)
{ this->inner( {{- [ ['b'], interface_spec.mapping_codes, ["stream"] if target == 'gpu' else [] ] | identifier_list -}} ); };
}
- std::function getOuterSweep( {{- [interface_spec.high_level_args, ["cudaStream_t stream = nullptr"] if target == 'gpu' else []] | type_identifier_list -}} )
+ std::function getOuterSweep( {{- [interface_spec.high_level_args, ["gpuStream_t stream = nullptr"] if target == 'gpu' else []] | type_identifier_list -}} )
{
return [ {{- [ ['this'], interface_spec.high_level_args, ["stream"] if target == 'gpu' else [] ] | identifier_list -}} ]
(IBlock * b)
@@ -198,7 +200,7 @@ class {{class_name}}
auto * flagField = block->getData< FlagField_T > ( flagFieldID );
{{additional_data_handler.additional_field_data|indent(4)}}
- assert(flagField->flagExists(boundaryFlagUID and
+ assert(flagField->flagExists(boundaryFlagUID) and
flagField->flagExists(domainFlagUID));
auto boundaryFlag = flagField->getFlag(boundaryFlagUID);
@@ -293,7 +295,7 @@ class {{class_name}}
private:
void run_impl(
{{- ["IBlock * block", "IndexVectors::Type type",
- kernel.kernel_selection_parameters, ["cudaStream_t stream = nullptr"] if target == 'gpu' else []]
+ kernel.kernel_selection_parameters, ["gpuStream_t stream = nullptr"] if target == 'gpu' else []]
| type_identifier_list -}}
);
diff --git a/maintainer/walberla_kernels/templates/FieldAccessors.tmpl.cu b/maintainer/walberla_kernels/templates/FieldAccessors.tmpl.cu
new file mode 100644
index 00000000000..c9f8ae4dc97
--- /dev/null
+++ b/maintainer/walberla_kernels/templates/FieldAccessors.tmpl.cu
@@ -0,0 +1,1001 @@
+/*
+ * Copyright (C) 2023-2024 The ESPResSo project
+ * Copyright (C) 2020 The waLBerla project
+ *
+ * This file is part of ESPResSo.
+ *
+ * ESPResSo is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * ESPResSo is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see .
+ */
+
+/**
+ * @file
+ * Lattice field accessors.
+ * Adapted from the waLBerla source file
+ * https://i10git.cs.fau.de/walberla/walberla/-/blob/a16141524c58ab88386e2a0f8fdd7c63c5edd704/python/lbmpy_walberla/templates/LatticeModel.tmpl.h
+ */
+
+#include
+#include
+#include
+#include
+#include
+
+#include
+
+#include
+#include
+#include
+#include
+
+#include
+#include
+
+#include
+#include
+
+#if defined(__NVCC__)
+#define RESTRICT __restrict__
+#pragma nv_diagnostic push
+#pragma nv_diag_suppress 177 // unused variable
+#elif defined(__clang__)
+#if defined(__CUDA__)
+#if defined(__CUDA_ARCH__)
+// clang compiling CUDA code in device mode
+#define RESTRICT __restrict__
+#pragma clang diagnostic push
+#pragma clang diagnostic ignored "-Wunused-variable"
+#else
+// clang compiling CUDA code in host mode
+#define RESTRICT __restrict__
+#pragma clang diagnostic push
+#pragma clang diagnostic ignored "-Wunused-variable"
+#endif
+#endif
+#elif defined(__GNUC__) or defined(__GNUG__)
+#define RESTRICT __restrict__
+#pragma GCC diagnostic push
+#pragma GCC diagnostic ignored "-Wunused-variable"
+#elif defined(_MSC_VER)
+#define RESTRICT __restrict
+#else
+#define RESTRICT
+#endif
+
+/** @brief Get linear index of flattened data with original layout @c fzyx. */
+static __forceinline__ __device__ uint getLinearIndex( uint3 blockIdx, uint3 threadIdx, uint3 gridDim, uint3 blockDim, uint fOffset ) {
+ auto const x = threadIdx.x;
+ auto const y = blockIdx.x;
+ auto const z = blockIdx.y;
+ auto const f = blockIdx.z;
+ auto const ySize = gridDim.x;
+ auto const zSize = gridDim.y;
+ auto const fSize = fOffset;
+ return f +
+ z * fSize +
+ y * fSize * zSize +
+ x * fSize * zSize * ySize ;
+}
+
+namespace walberla {
+namespace {{namespace}} {
+namespace accessor {
+
+namespace Population
+{
+// LCOV_EXCL_START
+ __global__ void kernel_get(
+ gpu::FieldAccessor< {{dtype}} > pdf,
+ {{dtype}} * RESTRICT pop )
+ {
+ auto const offset = getLinearIndex(blockIdx, threadIdx, gridDim, blockDim, {{Q}}u);
+ pdf.set( blockIdx, threadIdx );
+ pop += offset;
+ if (pdf.isValidPosition()) {
+ {% for i in range(Q) -%}
+ pop[{{i}}u] = pdf.get({{i}}u);
+ {% endfor -%}
+ }
+ }
+
+ __global__ void kernel_set(
+ gpu::FieldAccessor< {{dtype}} > pdf,
+ {{dtype}} const * RESTRICT pop )
+ {
+ auto const offset = getLinearIndex(blockIdx, threadIdx, gridDim, blockDim, {{Q}}u);
+ pdf.set( blockIdx, threadIdx );
+ pop += offset;
+ if (pdf.isValidPosition()) {
+ {% for i in range(Q) -%}
+ pdf.get({{i}}u) = pop[{{i}}u];
+ {% endfor -%}
+ }
+ }
+
+ __global__ void kernel_broadcast(
+ gpu::FieldAccessor< {{dtype}} > pdf,
+ {{dtype}} const * RESTRICT pop )
+ {
+ pdf.set( blockIdx, threadIdx );
+ if (pdf.isValidPosition()) {
+ {% for i in range(Q) -%}
+ pdf.get({{i}}u) = pop[{{i}}u];
+ {% endfor -%}
+ }
+ }
+
+ __global__ void kernel_set_vel(
+ gpu::FieldAccessor< {{dtype}} > pdf,
+ gpu::FieldAccessor< {{dtype}} > velocity,
+ gpu::FieldAccessor< {{dtype}} > force,
+ {{dtype}} const * RESTRICT pop )
+ {
+ auto const offset = getLinearIndex(blockIdx, threadIdx, gridDim, blockDim, {{Q}}u);
+ pdf.set( blockIdx, threadIdx );
+ velocity.set( blockIdx, threadIdx );
+ force.set( blockIdx, threadIdx );
+ pop += offset;
+ if (pdf.isValidPosition()) {
+ {% for i in range(Q) -%}
+ const {{dtype}} f_{{i}} = pdf.get({{i}}u) = pop[{{i}}u];
+ {% endfor -%}
+ {{momentum_density_getter | substitute_force_getter_cu | indent(8) }}
+ const {{dtype}} rho_inv = {{dtype}} {1} / rho;
+ {% for i in range(D) -%}
+ velocity.get({{i}}u) = md_{{i}} * rho_inv;
+ {% endfor %}
+ }
+ }
+// LCOV_EXCL_STOP
+
+ std::array<{{dtype}}, {{Q}}u> get(
+ gpu::GPUField< {{dtype}} > const * pdf_field,
+ Cell const & cell )
+ {
+ CellInterval ci ( cell, cell );
+ thrust::device_vector< {{dtype}} > dev_data({{Q}}u);
+ auto const dev_data_ptr = thrust::raw_pointer_cast(dev_data.data());
+ auto kernel = gpu::make_kernel( kernel_get );
+ kernel.addFieldIndexingParam( gpu::FieldIndexing< {{dtype}} >::interval( *pdf_field, ci ) );
+ kernel.addParam( dev_data_ptr );
+ kernel();
+ std::array<{{dtype}}, {{Q}}u> pop;
+ thrust::copy(dev_data.begin(), dev_data.end(), pop.data());
+ return pop;
+ }
+
+ void set(
+ gpu::GPUField< {{dtype}} > * pdf_field,
+ std::array< {{dtype}}, {{Q}}u > const & pop,
+ Cell const & cell )
+ {
+ thrust::device_vector< {{dtype}} > dev_data(pop.begin(), pop.end());
+ auto const dev_data_ptr = thrust::raw_pointer_cast(dev_data.data());
+ CellInterval ci ( cell, cell );
+ auto kernel = gpu::make_kernel( kernel_set );
+ kernel.addFieldIndexingParam( gpu::FieldIndexing< {{dtype}} >::interval( *pdf_field, ci ) );
+ kernel.addParam( const_cast(dev_data_ptr) );
+ kernel();
+ }
+
+ void set(
+ gpu::GPUField< {{dtype}} > * pdf_field,
+ gpu::GPUField< {{dtype}} > * velocity_field,
+ gpu::GPUField< {{dtype}} > const * force_field,
+ std::array< {{dtype}}, {{Q}}u > const & pop,
+ Cell const & cell )
+ {
+ thrust::device_vector< {{dtype}} > dev_data(pop.begin(), pop.end());
+ auto const dev_data_ptr = thrust::raw_pointer_cast(dev_data.data());
+ CellInterval ci ( cell, cell );
+ auto kernel = gpu::make_kernel( kernel_set_vel );
+ kernel.addFieldIndexingParam( gpu::FieldIndexing< {{dtype}} >::interval( *pdf_field, ci ) );
+ kernel.addFieldIndexingParam( gpu::FieldIndexing< {{dtype}} >::interval( *velocity_field, ci ) );
+ kernel.addFieldIndexingParam( gpu::FieldIndexing< {{dtype}} >::interval( *force_field, ci ) );
+ kernel.addParam( const_cast(dev_data_ptr) );
+ kernel();
+ }
+
+ void initialize(
+ gpu::GPUField< {{dtype}} > * pdf_field,
+ std::array< {{dtype}}, {{Q}}u > const & pop )
+ {
+ CellInterval ci = pdf_field->xyzSizeWithGhostLayer();
+ thrust::device_vector< {{dtype}} > dev_data(pop.begin(), pop.end());
+ auto const dev_data_ptr = thrust::raw_pointer_cast(dev_data.data());
+ auto kernel = gpu::make_kernel( kernel_broadcast );
+ kernel.addFieldIndexingParam( gpu::FieldIndexing< {{dtype}} >::interval( *pdf_field, ci ) );
+ kernel.addParam( const_cast(dev_data_ptr) );
+ kernel();
+ }
+
+ std::vector< {{dtype}} > get(
+ gpu::GPUField< {{dtype}} > const * pdf_field,
+ CellInterval const & ci )
+ {
+ thrust::device_vector< {{dtype}} > dev_data(ci.numCells() * {{Q}}u);
+ auto const dev_data_ptr = thrust::raw_pointer_cast(dev_data.data());
+ auto kernel = gpu::make_kernel( kernel_get );
+ kernel.addFieldIndexingParam( gpu::FieldIndexing< {{dtype}} >::interval( *pdf_field, ci ) );
+ kernel.addParam( dev_data_ptr );
+ kernel();
+ std::vector< {{dtype}} > out(ci.numCells() * {{Q}}u);
+ thrust::copy(dev_data.begin(), dev_data.end(), out.data());
+ return out;
+ }
+
+ void set(
+ gpu::GPUField< {{dtype}} > * pdf_field,
+ std::vector< {{dtype}} > const & values,
+ CellInterval const & ci )
+ {
+ thrust::device_vector< {{dtype}} > dev_data(values.begin(), values.end());
+ auto const dev_data_ptr = thrust::raw_pointer_cast(dev_data.data());
+ auto kernel = gpu::make_kernel( kernel_set );
+ kernel.addFieldIndexingParam( gpu::FieldIndexing< {{dtype}} >::interval( *pdf_field, ci ) );
+ kernel.addParam( const_cast(dev_data_ptr) );
+ kernel();
+ }
+
+ void set(
+ gpu::GPUField< {{dtype}} > * pdf_field,
+ gpu::GPUField< {{dtype}} > * velocity_field,
+ gpu::GPUField< {{dtype}} > const * force_field,
+ std::vector< {{dtype}} > const & values,
+ CellInterval const & ci )
+ {
+ thrust::device_vector< {{dtype}} > dev_data(values.begin(), values.end());
+ auto const dev_data_ptr = thrust::raw_pointer_cast(dev_data.data());
+ auto kernel = gpu::make_kernel( kernel_set_vel );
+ kernel.addFieldIndexingParam( gpu::FieldIndexing< {{dtype}} >::interval( *pdf_field, ci ) );
+ kernel.addFieldIndexingParam( gpu::FieldIndexing< {{dtype}} >::interval( *velocity_field, ci ) );
+ kernel.addFieldIndexingParam( gpu::FieldIndexing< {{dtype}} >::interval( *force_field, ci ) );
+ kernel.addParam( const_cast(dev_data_ptr) );
+ kernel();
+ }
+} // namespace Population
+
+namespace Vector
+{
+// LCOV_EXCL_START
+ __global__ void kernel_get(
+ gpu::FieldAccessor< {{dtype}} > vec,
+ {{dtype}} * u_out )
+ {
+ auto const offset = getLinearIndex(blockIdx, threadIdx, gridDim, blockDim, {{D}}u);
+ vec.set( blockIdx, threadIdx );
+ u_out += offset;
+ if (vec.isValidPosition()) {
+ {% for i in range(D) -%}
+ u_out[{{i}}u] = vec.get({{i}}u);
+ {% endfor %}
+ }
+ }
+
+ __global__ void kernel_set(
+ gpu::FieldAccessor< {{dtype}} > vec,
+ {{dtype}} const * RESTRICT u_in )
+ {
+ auto const offset = getLinearIndex(blockIdx, threadIdx, gridDim, blockDim, {{D}}u);
+ vec.set( blockIdx, threadIdx );
+ u_in += offset;
+ if (vec.isValidPosition()) {
+ {% for i in range(D) -%}
+ vec.get({{i}}u) = u_in[{{i}}u];
+ {% endfor %}
+ }
+ }
+
+ __global__ void kernel_broadcast(
+ gpu::FieldAccessor< {{dtype}} > vec,
+ {{dtype}} const * RESTRICT u_in )
+ {
+ vec.set( blockIdx, threadIdx );
+ if (vec.isValidPosition()) {
+ {% for i in range(D) -%}
+ vec.get({{i}}u) = u_in[{{i}}u];
+ {% endfor %}
+ }
+ }
+
+ __global__ void kernel_add(
+ gpu::FieldAccessor< {{dtype}} > vec,
+ {{dtype}} const * RESTRICT u_in )
+ {
+ auto const offset = getLinearIndex(blockIdx, threadIdx, gridDim, blockDim, {{D}}u);
+ vec.set( blockIdx, threadIdx );
+ u_in += offset;
+ if (vec.isValidPosition()) {
+ {% for i in range(D) -%}
+ vec.get({{i}}u) += u_in[{{i}}u];
+ {% endfor %}
+ }
+ }
+
+ __global__ void kernel_broadcast_add(
+ gpu::FieldAccessor< {{dtype}} > vec,
+ {{dtype}} const * RESTRICT u_in )
+ {
+ vec.set( blockIdx, threadIdx );
+ if (vec.isValidPosition()) {
+ {% for i in range(D) -%}
+ vec.get({{i}}u) += u_in[{{i}}u];
+ {% endfor %}
+ }
+ }
+// LCOV_EXCL_STOP
+
+ Vector{{D}}< {{dtype}} > get(
+ gpu::GPUField< {{dtype}} > const * vec_field,
+ Cell const & cell)
+ {
+ CellInterval ci ( cell, cell );
+ thrust::device_vector< {{dtype}} > dev_data({{D}}u);
+ auto const dev_data_ptr = thrust::raw_pointer_cast(dev_data.data());
+ auto kernel = gpu::make_kernel( kernel_get );
+ kernel.addFieldIndexingParam( gpu::FieldIndexing< {{dtype}} >::interval( *vec_field, ci ) );
+ kernel.addParam( dev_data_ptr );
+ kernel();
+ Vector{{D}}< {{dtype}} > vec;
+ thrust::copy(dev_data.begin(), dev_data.end(), vec.data());
+ return vec;
+ }
+
+ void set(
+ gpu::GPUField< {{dtype}} > * vec_field,
+ Vector{{D}}< {{dtype}} > const & vec,
+ Cell const & cell )
+ {
+ CellInterval ci ( cell, cell );
+ thrust::device_vector< {{dtype}} > dev_data(vec.data(), vec.data() + {{D}}u);
+ auto const dev_data_ptr = thrust::raw_pointer_cast(dev_data.data());
+ auto kernel = gpu::make_kernel( kernel_set );
+ kernel.addFieldIndexingParam( gpu::FieldIndexing< {{dtype}} >::interval( *vec_field, ci ) );
+ kernel.addParam( const_cast(dev_data_ptr) );
+ kernel();
+ }
+
+ void add(
+ gpu::GPUField< {{dtype}} > * vec_field,
+ Vector{{D}}< {{dtype}} > const & vec,
+ Cell const &cell )
+ {
+ CellInterval ci ( cell, cell );
+ thrust::device_vector< {{dtype}} > dev_data(vec.data(), vec.data() + {{D}}u);
+ auto const dev_data_ptr = thrust::raw_pointer_cast(dev_data.data());
+ auto kernel = gpu::make_kernel( kernel_add );
+ kernel.addFieldIndexingParam( gpu::FieldIndexing< {{dtype}} >::interval( *vec_field, ci ) );
+ kernel.addParam( const_cast(dev_data_ptr) );
+ kernel();
+ }
+
+ void initialize(
+ gpu::GPUField< {{dtype}} > * vec_field,
+ Vector{{D}}< {{dtype}} > const & vec )
+ {
+ CellInterval ci = vec_field->xyzSizeWithGhostLayer();
+ thrust::device_vector< {{dtype}} > dev_data(vec.data(), vec.data() + {{D}}u);
+ auto const dev_data_ptr = thrust::raw_pointer_cast(dev_data.data());
+ auto kernel = gpu::make_kernel( kernel_broadcast );
+ kernel.addFieldIndexingParam( gpu::FieldIndexing< {{dtype}} >::interval( *vec_field, ci ) );
+ kernel.addParam( const_cast(dev_data_ptr) );
+ kernel();
+ }
+
+ void add_to_all(
+ gpu::GPUField< {{dtype}} > * vec_field,
+ Vector{{D}}< {{dtype}} > const & vec )
+ {
+ CellInterval ci = vec_field->xyzSizeWithGhostLayer();
+ thrust::device_vector< {{dtype}} > dev_data(vec.data(), vec.data() + {{D}}u);
+ auto const dev_data_ptr = thrust::raw_pointer_cast(dev_data.data());
+ auto kernel = gpu::make_kernel( kernel_broadcast_add );
+ kernel.addFieldIndexingParam( gpu::FieldIndexing< {{dtype}} >::interval( *vec_field, ci ) );
+ kernel.addParam( const_cast(dev_data_ptr) );
+ kernel();
+ }
+
+ std::vector< {{dtype}} > get(
+ gpu::GPUField< {{dtype}} > const * vec_field,
+ CellInterval const & ci)
+ {
+ thrust::device_vector< {{dtype}} > dev_data(ci.numCells() * {{D}}u);
+ auto const dev_data_ptr = thrust::raw_pointer_cast(dev_data.data());
+ auto kernel = gpu::make_kernel( kernel_get );
+ kernel.addFieldIndexingParam( gpu::FieldIndexing< {{dtype}} >::interval( *vec_field, ci ) );
+ kernel.addParam( dev_data_ptr );
+ kernel();
+ std::vector< {{dtype}} > out(ci.numCells() * {{D}}u);
+ thrust::copy(dev_data.begin(), dev_data.end(), out.data());
+ return out;
+ }
+
+ void set(
+ gpu::GPUField< {{dtype}} > * vec_field,
+ std::vector< {{dtype}} > const & values,
+ CellInterval const & ci )
+ {
+ thrust::device_vector< {{dtype}} > dev_data(values.begin(), values.end());
+ auto const dev_data_ptr = thrust::raw_pointer_cast(dev_data.data());
+ auto kernel = gpu::make_kernel( kernel_set );
+ kernel.addFieldIndexingParam( gpu::FieldIndexing< {{dtype}} >::interval( *vec_field, ci ) );
+ kernel.addParam( const_cast(dev_data_ptr) );
+ kernel();
+ }
+} // namespace Vector
+
+namespace Interpolation
+{
+// LCOV_EXCL_START
+ /** @brief Calculate interpolation weights. */
+ static __forceinline__ __device__ void calculate_weights(
+ {{dtype}} const *RESTRICT const pos,
+ int *RESTRICT const corner,
+ {{dtype}} *RESTRICT const weights,
+ uint gl)
+ {
+ #pragma unroll
+ for (int dim = 0; dim < {{D}}; ++dim) {
+ auto const fractional_index = pos[dim] - {{dtype}}{0.5};
+ auto const nmp = floorf(fractional_index);
+ auto const distance = fractional_index - nmp - {{dtype}}{0.5};
+ corner[dim] = __{{dtype}}2int_rn(nmp) + static_cast(gl);
+ weights[dim * 2 + 0] = {{dtype}}{0.5} - distance;
+ weights[dim * 2 + 1] = {{dtype}}{0.5} + distance;
+ }
+ }
+
+ __global__ void kernel_get(
+ gpu::FieldAccessor< {{dtype}} > vec,
+ {{dtype}} const *RESTRICT const pos,
+ {{dtype}} *RESTRICT const vel,
+ uint n_pos,
+ uint gl)
+ {
+
+ uint pos_index = blockIdx.y * gridDim.x * blockDim.x +
+ blockDim.x * blockIdx.x + threadIdx.x;
+
+ vec.set({0u, 0u, 0u}, {0u, 0u, 0u});
+ if (vec.isValidPosition() and pos_index < n_pos) {
+ auto const array_offset = pos_index * uint({{D}}u);
+ int corner[{{D}}];
+ {{dtype}} weights[{{D}}][2];
+ calculate_weights(pos + array_offset, corner, &weights[0][0], gl);
+ #pragma unroll
+ for (int i = 0; i < 2; i++) {
+ auto const cx = corner[0] + i;
+ auto const wx = weights[0][i];
+ #pragma unroll
+ for (int j = 0; j < 2; j++) {
+ auto const cy = corner[1] + j;
+ auto const wxy = wx * weights[1][j];
+ #pragma unroll
+ for (int k = 0; k < 2; k++) {
+ auto const cz = corner[2] + k;
+ auto const weight = wxy * weights[2][k];
+ {% for cf in range(D) -%}
+ vel[array_offset + {{cf}}u] += weight * vec.getNeighbor(cx, cy, cz, {{cf}}u);
+ {% endfor %}
+ }
+ }
+ }
+ }
+ }
+
+ __global__ void kernel_set(
+ gpu::FieldAccessor< {{dtype}} > vec,
+ {{dtype}} const *RESTRICT const pos,
+ {{dtype}} const *RESTRICT const forces,
+ uint n_pos,
+ uint gl )
+ {
+
+ uint pos_index = blockIdx.y * gridDim.x * blockDim.x +
+ blockDim.x * blockIdx.x + threadIdx.x;
+
+ vec.set({0u, 0u, 0u}, {0u, 0u, 0u});
+ if (vec.isValidPosition() and pos_index < n_pos) {
+ auto const array_offset = pos_index * uint({{D}}u);
+ int corner[{{D}}];
+ {{dtype}} weights[{{D}}][2];
+ calculate_weights(pos + array_offset, corner, &weights[0][0], gl);
+ #pragma unroll
+ for (int i = 0; i < 2; i++) {
+ auto const cx = corner[0] + i;
+ auto const wx = weights[0][i];
+ #pragma unroll
+ for (int j = 0; j < 2; j++) {
+ auto const cy = corner[1] + j;
+ auto const wxy = wx * weights[1][j];
+ #pragma unroll
+ for (int k = 0; k < 2; k++) {
+ auto const cz = corner[2] + k;
+ auto const weight = wxy * weights[2][k];
+ {% for cf in range(D) -%}
+ atomicAdd(&vec.getNeighbor(cx, cy, cz, {{cf}}u),
+ weight * forces[array_offset + {{cf}}u]);
+ {% endfor %}
+ }
+ }
+ }
+ }
+ }
+// LCOV_EXCL_STOP
+
+ static dim3 calculate_dim_grid(uint const threads_x,
+ uint const blocks_per_grid_y,
+ uint const threads_per_block) {
+ assert(threads_x >= 1u);
+ assert(blocks_per_grid_y >= 1u);
+ assert(threads_per_block >= 1u);
+ auto const threads_y = threads_per_block * blocks_per_grid_y;
+ auto const blocks_per_grid_x = (threads_x + threads_y - 1) / threads_y;
+ return make_uint3(blocks_per_grid_x, blocks_per_grid_y, 1);
+ }
+
+ std::vector< {{dtype}} >
+ get(
+ gpu::GPUField< {{dtype}} > const *vec_field,
+ std::vector< {{dtype}} > const &pos,
+ uint gl )
+ {
+ thrust::device_vector< {{dtype}} > dev_pos(pos.begin(), pos.end());
+ thrust::device_vector< {{dtype}} > dev_vel(pos.size());
+ auto const dev_pos_ptr = thrust::raw_pointer_cast(dev_pos.data());
+ auto const dev_vel_ptr = thrust::raw_pointer_cast(dev_vel.data());
+
+ auto const threads_per_block = uint(64u);
+ auto const n_pos = static_cast(pos.size() / {{D}}ul);
+ auto const dim_grid = calculate_dim_grid(n_pos, 4u, threads_per_block);
+ kernel_get<<>>(
+ gpu::FieldIndexing< {{dtype}} >::withGhostLayerXYZ(*vec_field, gl).gpuAccess(),
+ dev_pos_ptr, dev_vel_ptr, n_pos, gl);
+
+ std::vector< {{dtype}} > out(pos.size());
+ thrust::copy(dev_vel.begin(), dev_vel.end(), out.data());
+ return out;
+ }
+
+ void set(
+ gpu::GPUField< {{dtype}} > const *vec_field,
+ std::vector< {{dtype}} > const &pos,
+ std::vector< {{dtype}} > const &forces,
+ uint gl )
+ {
+ thrust::device_vector< {{dtype}} > dev_pos(pos.begin(), pos.end());
+ thrust::device_vector< {{dtype}} > dev_for(forces.begin(), forces.end());
+ auto const dev_pos_ptr = thrust::raw_pointer_cast(dev_pos.data());
+ auto const dev_for_ptr = thrust::raw_pointer_cast(dev_for.data());
+
+ auto const threads_per_block = uint(64u);
+ auto const n_pos = static_cast(pos.size() / {{D}}ul);
+ auto const dim_grid = calculate_dim_grid(n_pos, 4u, threads_per_block);
+ kernel_set<<>>(
+ gpu::FieldIndexing< {{dtype}} >::withGhostLayerXYZ(*vec_field, gl).gpuAccess(),
+ dev_pos_ptr, dev_for_ptr, n_pos, gl);
+ }
+} // namespace Interpolation
+
+namespace Equilibrium
+{
+// LCOV_EXCL_START
+ __device__ void kernel_set_device(
+ gpu::FieldAccessor< {{dtype}} > pdf,
+ {{dtype}} const * RESTRICT const u,
+ {{dtype}} rho )
+ {
+ {%if not compressible %}
+ rho -= {{dtype}}(1.0);
+ {%endif %}
+
+ {% for eqTerm in equilibrium -%}
+ pdf.get({{loop.index0 }}u) = {{eqTerm}};
+ {% endfor -%}
+ }
+// LCOV_EXCL_STOP
+} // namespace Equilibrium
+
+namespace Density
+{
+// LCOV_EXCL_START
+ __global__ void kernel_get(
+ gpu::FieldAccessor< {{dtype}} > pdf,
+ {{dtype}} * RESTRICT rho_out )
+ {
+ auto const offset = getLinearIndex(blockIdx, threadIdx, gridDim, blockDim, 1u);
+ pdf.set( blockIdx, threadIdx );
+ rho_out += offset;
+ if (pdf.isValidPosition()) {
+ {% for i in range(Q) -%}
+ {{dtype}} const f_{{i}} = pdf.get({{i}}u);
+ {% endfor -%}
+ {{density_getters | indent(12)}}
+ rho_out[0u] = rho;
+ }
+ }
+
+ __global__ void kernel_set(
+ gpu::FieldAccessor< {{dtype}} > pdf,
+ {{dtype}} const * RESTRICT rho_in )
+ {
+ auto const offset = getLinearIndex(blockIdx, threadIdx, gridDim, blockDim, 1u);
+ pdf.set( blockIdx, threadIdx );
+ rho_in += offset;
+ if (pdf.isValidPosition()) {
+ {% for i in range(Q) -%}
+ {{dtype}} const f_{{i}} = pdf.get({{i}}u);
+ {% endfor -%}
+ {{unshifted_momentum_density_getter | indent(12)}}
+
+ // calculate current velocity (before density change)
+ {{dtype}} const rho_inv = {{dtype}} {1} / rho;
+ {{dtype}} const u_old[{{D}}] = { {% for i in range(D) %}momdensity_{{i}} * rho_inv{% if not loop.last %}, {% endif %}{% endfor %} };
+
+ Equilibrium::kernel_set_device(pdf, u_old, rho_in[0u] {%if not compressible %} + {{dtype}} {1} {%endif%});
+ }
+ }
+// LCOV_EXCL_STOP
+
+ {{dtype}} get(
+ gpu::GPUField< {{dtype}} > const * pdf_field,
+ Cell const & cell )
+ {
+ CellInterval ci ( cell, cell );
+ thrust::device_vector< {{dtype}} > dev_data(1u);
+ auto const dev_data_ptr = thrust::raw_pointer_cast(dev_data.data());
+ auto kernel = gpu::make_kernel( kernel_get );
+ kernel.addFieldIndexingParam( gpu::FieldIndexing< {{dtype}} >::interval( *pdf_field, ci ) );
+ kernel.addParam( dev_data_ptr );
+ kernel();
+ {{dtype}} rho = dev_data[0u];
+ return rho;
+ }
+
+ std::vector< {{dtype}} > get(
+ gpu::GPUField< {{dtype}} > const * pdf_field,
+ CellInterval const & ci )
+ {
+ thrust::device_vector< {{dtype}} > dev_data(ci.numCells());
+ auto const dev_data_ptr = thrust::raw_pointer_cast(dev_data.data());
+ auto kernel = gpu::make_kernel( kernel_get );
+ kernel.addFieldIndexingParam( gpu::FieldIndexing< {{dtype}} >::interval( *pdf_field, ci ) );
+ kernel.addParam( dev_data_ptr );
+ kernel();
+ std::vector< {{dtype}} > out(dev_data.size());
+ thrust::copy(dev_data.begin(), dev_data.end(), out.begin());
+ return out;
+ }
+
+ void set(
+ gpu::GPUField< {{dtype}} > * pdf_field,
+ const {{dtype}} rho,
+ Cell const & cell )
+ {
+ CellInterval ci ( cell, cell );
+ thrust::device_vector< {{dtype}} > dev_data(1u, rho);
+ auto const dev_data_ptr = thrust::raw_pointer_cast(dev_data.data());
+ auto kernel = gpu::make_kernel( kernel_set );
+ kernel.addFieldIndexingParam( gpu::FieldIndexing< {{dtype}} >::interval( *pdf_field, ci ) );
+ kernel.addParam( const_cast(dev_data_ptr) );
+ kernel();
+ }
+
+ void set(
+ gpu::GPUField< {{dtype}} > * pdf_field,
+ std::vector< {{dtype}} > const & values,
+ CellInterval const & ci )
+ {
+ thrust::device_vector< {{dtype}} > dev_data(values.begin(), values.end());
+ auto const dev_data_ptr = thrust::raw_pointer_cast(dev_data.data());
+ auto kernel = gpu::make_kernel( kernel_set );
+ kernel.addFieldIndexingParam( gpu::FieldIndexing< {{dtype}} >::interval( *pdf_field, ci ) );
+ kernel.addParam( const_cast(dev_data_ptr) );
+ kernel();
+ }
+} // namespace Density
+
+namespace Velocity
+{
+// LCOV_EXCL_START
+ __global__ void kernel_get(
+ gpu::FieldAccessor< {{dtype}} > pdf,
+ gpu::FieldAccessor< {{dtype}} > force,
+ {{dtype}} * RESTRICT u_out )
+ {
+ auto const offset = getLinearIndex(blockIdx, threadIdx, gridDim, blockDim, {{D}}u);
+ pdf.set( blockIdx, threadIdx );
+ force.set( blockIdx, threadIdx );
+ u_out += offset;
+ if (pdf.isValidPosition()) {
+ {% for i in range(Q) -%}
+ {{dtype}} const f_{{i}} = pdf.get({{i}}u);
+ {% endfor -%}
+ {{momentum_density_getter | substitute_force_getter_cu | indent(8) }}
+ auto const rho_inv = {{dtype}} {1} / rho;
+ {% for i in range(D) -%}
+ u_out[{{i}}u] = md_{{i}} * rho_inv;
+ {% endfor %}
+ }
+ }
+
+ __global__ void kernel_set(
+ gpu::FieldAccessor< {{dtype}} > pdf,
+ gpu::FieldAccessor< {{dtype}} > velocity,
+ gpu::FieldAccessor< {{dtype}} > force,
+ {{dtype}} const * RESTRICT u_in )
+ {
+ auto const offset = getLinearIndex(blockIdx, threadIdx, gridDim, blockDim, {{D}}u);
+ pdf.set( blockIdx, threadIdx );
+ velocity.set( blockIdx, threadIdx );
+ force.set( blockIdx, threadIdx );
+ u_in += offset;
+ if (pdf.isValidPosition()) {
+ {% for i in range(Q) -%}
+ {{dtype}} const f_{{i}} = pdf.get({{i}}u);
+ {% endfor -%}
+ {{dtype}} const * RESTRICT const u = u_in;
+ {{density_getters | indent(8)}}
+ {{density_velocity_setter_macroscopic_values | substitute_force_getter_cu | indent(8)}}
+ {% for i in range(D) -%}
+ velocity.get({{i}}u) = u_in[{{i}}u];
+ {% endfor %}
+ {{dtype}} u_new[{{D}}] = { {% for i in range(D) %}u_{{i}}{% if not loop.last %}, {% endif %}{% endfor %} };
+
+ Equilibrium::kernel_set_device(pdf, u_new, rho {%if not compressible %} + {{dtype}}(1) {%endif%});
+ }
+ }
+// LCOV_EXCL_STOP
+
+ Vector{{D}}< {{dtype}} > get(
+ gpu::GPUField< {{dtype}} > const * pdf_field,
+ gpu::GPUField< {{dtype}} > const * force_field,
+ Cell const & cell )
+ {
+ CellInterval ci ( cell, cell );
+ thrust::device_vector< {{dtype}} > dev_data({{D}}u);
+ auto const dev_data_ptr = thrust::raw_pointer_cast(dev_data.data());
+ auto kernel = gpu::make_kernel( kernel_get );
+ kernel.addFieldIndexingParam( gpu::FieldIndexing< {{dtype}} >::interval( *pdf_field, ci ) );
+ kernel.addFieldIndexingParam( gpu::FieldIndexing< {{dtype}} >::interval( *force_field, ci ) );
+ kernel.addParam( dev_data_ptr );
+ kernel();
+ Vector{{D}}< {{dtype}} > vec;
+ thrust::copy(dev_data.begin(), dev_data.end(), vec.data());
+ return vec;
+ }
+
+ std::vector< {{dtype}} > get(
+ gpu::GPUField< {{dtype}} > const * pdf_field,
+ gpu::GPUField< {{dtype}} > const * force_field,
+ CellInterval const & ci )
+ {
+ thrust::device_vector< {{dtype}} > dev_data({{D}}u);
+ auto const dev_data_ptr = thrust::raw_pointer_cast(dev_data.data());
+ auto kernel = gpu::make_kernel( kernel_get );
+ kernel.addFieldIndexingParam( gpu::FieldIndexing< {{dtype}} >::interval( *pdf_field, ci ) );
+ kernel.addFieldIndexingParam( gpu::FieldIndexing< {{dtype}} >::interval( *force_field, ci ) );
+ kernel.addParam( dev_data_ptr );
+ kernel();
+ std::vector< {{dtype}} > out(dev_data.size());
+ thrust::copy(dev_data.begin(), dev_data.end(), out.data());
+ return out;
+ }
+
+ void set(
+ gpu::GPUField< {{dtype}} > * pdf_field,
+ gpu::GPUField< {{dtype}} > * velocity_field,
+ gpu::GPUField< {{dtype}} > const * force_field,
+ Vector{{D}}< {{dtype}} > const & u,
+ Cell const & cell )
+ {
+ CellInterval ci ( cell, cell );
+ thrust::device_vector< {{dtype}} > dev_data(u.data(), u.data() + {{D}}u);
+ auto const dev_data_ptr = thrust::raw_pointer_cast(dev_data.data());
+ auto kernel = gpu::make_kernel( kernel_set );
+ kernel.addFieldIndexingParam( gpu::FieldIndexing< {{dtype}} >::interval( *pdf_field, ci ) );
+ kernel.addFieldIndexingParam( gpu::FieldIndexing< {{dtype}} >::interval( *velocity_field, ci ) );
+ kernel.addFieldIndexingParam( gpu::FieldIndexing< {{dtype}} >::interval( *force_field, ci ) );
+ kernel.addParam( const_cast(dev_data_ptr) );
+ kernel();
+ }
+
+ void set(
+ gpu::GPUField< {{dtype}} > * pdf_field,
+ gpu::GPUField< {{dtype}} > * velocity_field,
+ gpu::GPUField< {{dtype}} > const * force_field,
+ std::vector< {{dtype}} > const & values,
+ CellInterval const & ci )
+ {
+ thrust::device_vector< {{dtype}} > dev_data(values.begin(), values.end());
+ auto const dev_data_ptr = thrust::raw_pointer_cast(dev_data.data());
+ auto kernel = gpu::make_kernel( kernel_set );
+ kernel.addFieldIndexingParam( gpu::FieldIndexing< {{dtype}} >::interval( *pdf_field, ci ) );
+ kernel.addFieldIndexingParam( gpu::FieldIndexing< {{dtype}} >::interval( *velocity_field, ci ) );
+ kernel.addFieldIndexingParam( gpu::FieldIndexing< {{dtype}} >::interval( *force_field, ci ) );
+ kernel.addParam( const_cast(dev_data_ptr) );
+ kernel();
+ }
+} // namespace Velocity
+
+namespace Force {
+// LCOV_EXCL_START
+ __global__ void kernel_set(
+ gpu::FieldAccessor< {{dtype}} > pdf,
+ gpu::FieldAccessor< {{dtype}} > velocity,
+ gpu::FieldAccessor< {{dtype}} > force,
+ {{dtype}} const * RESTRICT f_in )
+ {
+ auto const offset = getLinearIndex(blockIdx, threadIdx, gridDim, blockDim, {{D}}u);
+ pdf.set( blockIdx, threadIdx );
+ velocity.set( blockIdx, threadIdx );
+ force.set( blockIdx, threadIdx );
+ f_in += offset;
+ if (pdf.isValidPosition()) {
+ {% for i in range(Q) -%}
+ {{dtype}} const f_{{i}} = pdf.get({{i}}u);
+ {% endfor -%}
+
+ {{momentum_density_getter | substitute_force_getter_pattern("force->get\(x, ?y, ?z, ?([0-9])u?\)", "f_in[\g<1>u]") | indent(8) }}
+ auto const rho_inv = {{dtype}} {1} / rho;
+
+ {% for i in range(D) -%}
+ force.get({{i}}u) = f_in[{{i}}u];
+ {% endfor %}
+
+ {% for i in range(D) -%}
+ velocity.get({{i}}u) = md_{{i}} * rho_inv;
+ {% endfor %}
+ }
+ }
+// LCOV_EXCL_STOP
+
+ void
+ set( gpu::GPUField< {{dtype}} > const * pdf_field,
+ gpu::GPUField< {{dtype}} > * velocity_field,
+ gpu::GPUField< {{dtype}} > * force_field,
+ Vector{{D}}< {{dtype}} > const & u,
+ Cell const & cell )
+ {
+ CellInterval ci ( cell, cell );
+ thrust::device_vector< {{dtype}} > dev_data(u.data(), u.data() + {{D}}u);
+ auto const dev_data_ptr = thrust::raw_pointer_cast(dev_data.data());
+ auto kernel = gpu::make_kernel( kernel_set );
+ kernel.addFieldIndexingParam( gpu::FieldIndexing< {{dtype}} >::interval( *pdf_field, ci ) );
+ kernel.addFieldIndexingParam( gpu::FieldIndexing< {{dtype}} >::interval( *velocity_field, ci ) );
+ kernel.addFieldIndexingParam( gpu::FieldIndexing< {{dtype}} >::interval( *force_field, ci ) );
+ kernel.addParam( const_cast(dev_data_ptr) );
+ kernel();
+ }
+
+ void
+ set( gpu::GPUField< {{dtype}} > const * pdf_field,
+ gpu::GPUField< {{dtype}} > * velocity_field,
+ gpu::GPUField< {{dtype}} > * force_field,
+ std::vector< {{dtype}} > const & values,
+ CellInterval const & ci )
+ {
+ thrust::device_vector< {{dtype}} > dev_data(values.begin(), values.end());
+ auto const dev_data_ptr = thrust::raw_pointer_cast(dev_data.data());
+ auto kernel = gpu::make_kernel( kernel_set );
+ kernel.addFieldIndexingParam( gpu::FieldIndexing< {{dtype}} >::interval( *pdf_field, ci ) );
+ kernel.addFieldIndexingParam( gpu::FieldIndexing< {{dtype}} >::interval( *velocity_field, ci ) );
+ kernel.addFieldIndexingParam( gpu::FieldIndexing< {{dtype}} >::interval( *force_field, ci ) );
+ kernel.addParam( const_cast(dev_data_ptr) );
+ kernel();
+ }
+} // namespace Force
+
+namespace MomentumDensity
+{
+// LCOV_EXCL_START
+ __global__ void kernel_sum(
+ gpu::FieldAccessor< {{dtype}} > pdf,
+ gpu::FieldAccessor< {{dtype}} > force,
+ {{dtype}} * RESTRICT out )
+ {
+ auto const offset = getLinearIndex(blockIdx, threadIdx, gridDim, blockDim, {{D}}u);
+ pdf.set( blockIdx, threadIdx );
+ force.set( blockIdx, threadIdx );
+ out += offset;
+ if (pdf.isValidPosition()) {
+ {% for i in range(Q) -%}
+ {{dtype}} const f_{{i}} = pdf.get({{i}}u);
+ {% endfor -%}
+ {{momentum_density_getter | substitute_force_getter_cu | indent(8) }}
+ {% for i in range(D) -%}
+ out[{{i}}u] += md_{{i}};
+ {% endfor %}
+ }
+ }
+// LCOV_EXCL_STOP
+
+ Vector{{D}}< {{dtype}} > reduce(
+ gpu::GPUField< {{dtype}} > const * pdf_field,
+ gpu::GPUField< {{dtype}} > const * force_field )
+ {
+ thrust::device_vector< {{dtype}} > dev_data({{D}}u, {{dtype}} {0});
+ auto const dev_data_ptr = thrust::raw_pointer_cast(dev_data.data());
+ WALBERLA_FOR_ALL_CELLS_XYZ(pdf_field, {
+ Cell cell(x, y, z);
+ CellInterval ci ( cell, cell );
+ auto kernel = gpu::make_kernel( kernel_sum );
+ kernel.addFieldIndexingParam( gpu::FieldIndexing< {{dtype}} >::interval( *pdf_field, ci ) );
+ kernel.addFieldIndexingParam( gpu::FieldIndexing< {{dtype}} >::interval( *force_field, ci ) );
+ kernel.addParam( dev_data_ptr );
+ kernel();
+ });
+ Vector{{D}}< {{dtype}} > mom({{dtype}} {0});
+ thrust::copy(dev_data.begin(), dev_data.begin() + {{D}}u, mom.data());
+ return mom;
+ }
+} // namespace MomentumDensity
+
+namespace PressureTensor
+{
+// LCOV_EXCL_START
+ __global__ void kernel_get(
+ gpu::FieldAccessor< {{dtype}} > pdf,
+ {{dtype}} * RESTRICT p_out )
+ {
+ auto const offset = getLinearIndex(blockIdx, threadIdx, gridDim, blockDim, {{D**2}}u);
+ pdf.set( blockIdx, threadIdx );
+ p_out += offset;
+ if (pdf.isValidPosition()) {
+ {% for i in range(Q) -%}
+ {{dtype}} const f_{{i}} = pdf.get({{i}}u);
+ {% endfor -%}
+ {{second_momentum_getter | indent(12) }}
+ {% for i in range(D**2) -%}
+ p_out[{{i}}u] = p_{{i}};
+ {% endfor %}
+ }
+ }
+// LCOV_EXCL_STOP
+
+ Matrix{{D}}< {{dtype}} > get(
+ gpu::GPUField< {{dtype}} > const * pdf_field,
+ Cell const & cell )
+ {
+ CellInterval ci ( cell, cell );
+ thrust::device_vector< {{dtype}} > dev_data({{D**2}}u);
+ auto const dev_data_ptr = thrust::raw_pointer_cast(dev_data.data());
+ auto kernel = gpu::make_kernel( kernel_get );
+ kernel.addFieldIndexingParam( gpu::FieldIndexing< {{dtype}} >::interval( *pdf_field, ci ) );
+ kernel.addParam( dev_data_ptr );
+ kernel();
+ Matrix{{D}}< {{dtype}} > out;
+ thrust::copy(dev_data.begin(), dev_data.end(), out.data());
+ return out;
+ }
+
+ std::vector< {{dtype}} > get(
+ gpu::GPUField< {{dtype}} > const * pdf_field,
+ CellInterval const & ci )
+ {
+ thrust::device_vector< {{dtype}} > dev_data({{D**2}}u * ci.numCells());
+ auto const dev_data_ptr = thrust::raw_pointer_cast(dev_data.data());
+ auto kernel = gpu::make_kernel( kernel_get );
+ kernel.addFieldIndexingParam( gpu::FieldIndexing< {{dtype}} >::interval( *pdf_field, ci ) );
+ kernel.addParam( dev_data_ptr );
+ kernel();
+ std::vector< {{dtype}} > out(dev_data.size());
+ thrust::copy(dev_data.begin(), dev_data.end(), out.data());
+ return out;
+ }
+} // namespace PressureTensor
+
+
+} // namespace accessor
+} // namespace {{namespace}}
+} // namespace walberla
diff --git a/maintainer/walberla_kernels/templates/FieldAccessors.tmpl.cuh b/maintainer/walberla_kernels/templates/FieldAccessors.tmpl.cuh
new file mode 100644
index 00000000000..65f776abee3
--- /dev/null
+++ b/maintainer/walberla_kernels/templates/FieldAccessors.tmpl.cuh
@@ -0,0 +1,220 @@
+/*
+ * Copyright (C) 2023-2024 The ESPResSo project
+ * Copyright (C) 2020 The waLBerla project
+ *
+ * This file is part of ESPResSo.
+ *
+ * ESPResSo is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * ESPResSo is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see .
+ */
+
+/**
+ * @file
+ * Lattice field accessors.
+ * Adapted from the waLBerla source file
+ * https://i10git.cs.fau.de/walberla/walberla/-/blob/a16141524c58ab88386e2a0f8fdd7c63c5edd704/python/lbmpy_walberla/templates/LatticeModel.tmpl.h
+ */
+
+#pragma once
+
+#include
+#include
+#include
+#include
+#include
+
+#include
+
+#include
+#include
+#include
+
+namespace walberla {
+namespace {{namespace}} {
+namespace accessor {
+
+namespace Population {
+ /** @brief Get populations from a single cell. */
+ std::array<{{dtype}}, {{Q}}u>
+ get( gpu::GPUField< {{dtype}} > const * pdf_field,
+ Cell const & cell );
+ /** @brief Set populations on a single cell. */
+ void
+ set( gpu::GPUField< {{dtype}} > * pdf_field,
+ std::array< {{dtype}}, {{Q}}u > const & pop,
+ Cell const & cell );
+ /** @brief Set populations and recalculate velocities on a single cell. */
+ void
+ set( gpu::GPUField< {{dtype}} > * pdf_field,
+ gpu::GPUField< {{dtype}} > * velocity_field,
+ gpu::GPUField< {{dtype}} > const * force_field,
+ std::array< {{dtype}}, {{Q}}u > const & pop,
+ Cell const & cell );
+ /** @brief Initialize all cells with the same value. */
+ void initialize(
+ gpu::GPUField< {{dtype}} > * pdf_field,
+ std::array< {{dtype}}, {{Q}}u > const & pop );
+ /** @brief Get populations from a cell interval. */
+ std::vector< {{dtype}} >
+ get( gpu::GPUField< {{dtype}} > const * pdf_field,
+ CellInterval const & ci );
+ /** @brief Set populations on a cell interval. */
+ void
+ set( gpu::GPUField< {{dtype}} > * pdf_field,
+ std::vector< {{dtype}} > const & values,
+ CellInterval const & ci );
+ /** @brief Set populations and recalculate velocities on a cell interval. */
+ void
+ set( gpu::GPUField< {{dtype}} > * pdf_field,
+ gpu::GPUField< {{dtype}} > * velocity_field,
+ gpu::GPUField< {{dtype}} > const * force_field,
+ std::vector< {{dtype}} > const & values,
+ CellInterval const & ci );
+} // namespace Population
+
+namespace Vector {
+ /** @brief Get value from a single cell. */
+ Vector{{D}}< {{dtype}} >
+ get( gpu::GPUField< {{dtype}} > const * field,
+ Cell const & cell );
+ /** @brief Set value on a single cell. */
+ void set( gpu::GPUField< {{dtype}} > * field,
+ Vector{{D}}< {{dtype}} > const & vec,
+ Cell const & cell );
+ /** @brief Add value to a single cell. */
+ void add( gpu::GPUField< {{dtype}} > * field,
+ Vector{{D}}< {{dtype}} > const & vec,
+ Cell const & cell );
+ /** @brief Initialize all cells with the same value. */
+ void initialize( gpu::GPUField< {{dtype}} > * field,
+ Vector{{D}}< {{dtype}} > const & vec);
+ /** @brief Add value to all cells. */
+ void add_to_all( gpu::GPUField< {{dtype}} > * field,
+ Vector{{D}}< {{dtype}} > const & vec);
+ /** @brief Get values from a cell interval. */
+ std::vector< {{dtype}} >
+ get( gpu::GPUField< {{dtype}} > const * vec_field,
+ CellInterval const & ci);
+ /** @brief Set values on a cell interval. */
+ void
+ set( gpu::GPUField< {{dtype}} > * vec_field,
+ std::vector< {{dtype}} > const & values,
+ CellInterval const & ci );
+
+} // namespace Vector
+
+namespace Interpolation {
+ std::vector< {{dtype}} >
+ get( gpu::GPUField< {{dtype}} > const *vec_field,
+ std::vector< {{dtype}} > const &pos,
+ uint gl );
+ void
+ set( gpu::GPUField< {{dtype}} > const *vec_field,
+ std::vector< {{dtype}} > const &pos,
+ std::vector< {{dtype}} > const &forces,
+ uint gl );
+} // namespace Interpolation
+
+namespace Density {
+ {{dtype}}
+ get( gpu::GPUField< {{dtype}} > const * pdf_field,
+ Cell const & cell );
+ void
+ set( gpu::GPUField< {{dtype}} > * pdf_field,
+ {{dtype}} const rho,
+ Cell const & cell );
+ std::vector< {{dtype}} >
+ get( gpu::GPUField< {{dtype}} > const * pdf_field,
+ CellInterval const & ci );
+ void
+ set( gpu::GPUField< {{dtype}} > * pdf_field,
+ std::vector< {{dtype}} > const & values,
+ CellInterval const & ci );
+} // namespace Density
+
+namespace Velocity {
+ Vector{{D}}< {{dtype}} >
+ get( gpu::GPUField< {{dtype}} > const * pdf_field,
+ gpu::GPUField< {{dtype}} > const * force_field,
+ Cell const & cell );
+ std::vector< {{dtype}} >
+ get( gpu::GPUField< {{dtype}} > const * pdf_field,
+ gpu::GPUField< {{dtype}} > const * force_field,
+ CellInterval const & ci );
+ void
+ set( gpu::GPUField< {{dtype}} > * pdf_field,
+ gpu::GPUField< {{dtype}} > * velocity_field,
+ gpu::GPUField< {{dtype}} > const * force_field,
+ Vector{{D}}< {{dtype}} > const & u,
+ Cell const & cell );
+ void
+ set( gpu::GPUField< {{dtype}} > * pdf_field,
+ gpu::GPUField< {{dtype}} > * velocity_field,
+ gpu::GPUField< {{dtype}} > const * force_field,
+ std::vector< {{dtype}} > const & values,
+ CellInterval const & ci );
+} // namespace Velocity
+
+namespace Force {
+ void
+ set( gpu::GPUField< {{dtype}} > const * pdf_field,
+ gpu::GPUField< {{dtype}} > * velocity_field,
+ gpu::GPUField< {{dtype}} > * force_field,
+ Vector{{D}}< {{dtype}} > const & u,
+ Cell const & cell );
+ void
+ set( gpu::GPUField< {{dtype}} > const * pdf_field,
+ gpu::GPUField< {{dtype}} > * velocity_field,
+ gpu::GPUField< {{dtype}} > * force_field,
+ std::vector< {{dtype}} > const & values,
+ CellInterval const & ci );
+} // namespace Force
+
+namespace DensityAndVelocity {
+ std::tuple< {{dtype}} , Vector{{D}}< {{dtype}} > >
+ get( gpu::GPUField< {{dtype}} > const * pdf_field,
+ gpu::GPUField< {{dtype}} > const * force_field,
+ Cell const & cell );
+ void
+ set( gpu::GPUField< {{dtype}} > * pdf_field,
+ gpu::GPUField< {{dtype}} > * force_field,
+ Vector{{D}}< {{dtype}} > const & u,
+ {{dtype}} const rho,
+ Cell const & cell );
+} // namespace DensityAndVelocity
+
+namespace DensityAndMomentumDensity {
+ std::tuple< {{dtype}} , Vector{{D}}< {{dtype}} > >
+ get( gpu::GPUField< {{dtype}} > const * pdf_field,
+ gpu::GPUField< {{dtype}} > const * force_field,
+ Cell const & cell );
+} // namespace DensityAndMomentumDensity
+
+namespace MomentumDensity {
+ Vector{{D}}< {{dtype}} >
+ reduce( gpu::GPUField< {{dtype}} > const * pdf_field,
+ gpu::GPUField< {{dtype}} > const * force_field );
+} // namespace MomentumDensity
+
+namespace PressureTensor {
+ Matrix{{D}}< {{dtype}} >
+ get( gpu::GPUField< {{dtype}} > const * pdf_field,
+ Cell const & cell );
+ std::vector< {{dtype}} >
+ get( gpu::GPUField< {{dtype}} > const * pdf_field,
+ CellInterval const & ci );
+} // namespace PressureTensor
+
+} // namespace accessor
+} // namespace {{namespace}}
+} // namespace walberla
diff --git a/maintainer/walberla_kernels/templates/FieldAccessors.tmpl.h b/maintainer/walberla_kernels/templates/FieldAccessors.tmpl.h
index 37e1edcf9cd..d443243bbab 100644
--- a/maintainer/walberla_kernels/templates/FieldAccessors.tmpl.h
+++ b/maintainer/walberla_kernels/templates/FieldAccessors.tmpl.h
@@ -37,19 +37,18 @@
#include
#include
+#include
#include
#include
#ifdef WALBERLA_CXX_COMPILER_IS_GNU
#pragma GCC diagnostic push
#pragma GCC diagnostic ignored "-Wunused-variable"
-#pragma GCC diagnostic ignored "-Wunused-parameter"
#endif
#ifdef WALBERLA_CXX_COMPILER_IS_CLANG
#pragma clang diagnostic push
#pragma clang diagnostic ignored "-Wunused-variable"
-#pragma clang diagnostic ignored "-Wunused-parameter"
#endif
namespace walberla {
@@ -58,7 +57,7 @@ namespace accessor {
namespace Population
{
- inline std::array<{{dtype}}, {{Q}}u>
+ inline auto
get( GhostLayerField< {{dtype}}, uint_t{ {{Q}}u } > const * pdf_field,
Cell const & cell )
{
@@ -82,8 +81,30 @@ namespace Population
}
inline void
- broadcast( GhostLayerField< {{dtype}}, uint_t{ {{Q}}u } > * pdf_field,
- std::array<{{dtype}}, {{Q}}u> const & pop)
+ set( GhostLayerField< {{dtype}}, uint_t{ {{Q}}u } > * pdf_field,
+ GhostLayerField< {{dtype}}, uint_t{ {{D}}u } > * velocity_field,
+ GhostLayerField< {{dtype}}, uint_t{ {{D}}u } > const * force_field,
+ std::array<{{dtype}}, {{Q}}u> const & pop,
+ Cell const & cell )
+ {
+ auto & xyz0 = pdf_field->get(cell, uint_t{ 0u });
+ {% for i in range(Q) -%}
+ const {{dtype}} f_{{i}} = pdf_field->getF( &xyz0, uint_t{ {{i}}u }) = pop[{{i}}u];
+ {% endfor -%}
+
+ {% for c in "xyz" -%}
+ const auto {{c}} = cell.{{c}}();
+ {% endfor -%}
+ {{momentum_density_getter | substitute_force_getter_cpp | indent(8) }}
+ const auto rho_inv = {{dtype}} {1} / rho;
+ {% for i in range(D) -%}
+ velocity_field->get(cell, uint_t{ {{i}}u }) = md_{{i}} * rho_inv;
+ {% endfor -%}
+ }
+
+ inline void
+ initialize( GhostLayerField< {{dtype}}, uint_t{ {{Q}}u } > * pdf_field,
+ std::array<{{dtype}}, {{Q}}u> const & pop)
{
WALBERLA_FOR_ALL_CELLS_INCLUDING_GHOST_LAYER_XYZ(pdf_field, {
{{dtype}} & xyz0 = pdf_field->get(x, y, z, uint_t{ 0u });
@@ -93,7 +114,7 @@ namespace Population
});
}
- inline std::vector< {{dtype}} >
+ inline auto
get( GhostLayerField< {{dtype}}, uint_t{ {{Q}}u } > const * pdf_field,
CellInterval const & ci )
{
@@ -118,15 +139,42 @@ namespace Population
CellInterval const & ci )
{
assert(uint_c(values.size()) == ci.numCells() * uint_t({{Q}}u));
- auto values_ptr = values.data();
+ auto pop = values.data();
+ for (auto x = ci.xMin(); x <= ci.xMax(); ++x) {
+ for (auto y = ci.yMin(); y <= ci.yMax(); ++y) {
+ for (auto z = ci.zMin(); z <= ci.zMax(); ++z) {
+ {{dtype}} & xyz0 = pdf_field->get(x, y, z, uint_t{ 0u });
+ {% for i in range(Q) -%}
+ pdf_field->getF( &xyz0, uint_t{ {{i}}u }) = pop[{{i}}u];
+ {% endfor -%}
+ std::advance(pop, {{Q}});
+ }
+ }
+ }
+ }
+
+ inline void
+ set( GhostLayerField< {{dtype}}, uint_t{ {{Q}}u } > * pdf_field,
+ GhostLayerField< {{dtype}}, uint_t{ {{D}}u } > * velocity_field,
+ GhostLayerField< {{dtype}}, uint_t{ {{D}}u } > const * force_field,
+ std::vector< {{dtype}} > const & values,
+ CellInterval const & ci )
+ {
+ assert(uint_c(values.size()) == ci.numCells() * uint_t({{Q}}u));
+ auto pop = values.data();
for (auto x = ci.xMin(); x <= ci.xMax(); ++x) {
for (auto y = ci.yMin(); y <= ci.yMax(); ++y) {
for (auto z = ci.zMin(); z <= ci.zMax(); ++z) {
{{dtype}} & xyz0 = pdf_field->get(x, y, z, uint_t{ 0u });
{% for i in range(Q) -%}
- pdf_field->getF( &xyz0, uint_t{ {{i}}u }) = values_ptr[{{i}}u];
+ const {{dtype}} f_{{i}} = pdf_field->getF( &xyz0, uint_t{ {{i}}u }) = pop[{{i}}u];
+ {% endfor -%}
+ {{momentum_density_getter | substitute_force_getter_cpp | indent(12) }}
+ const auto rho_inv = {{dtype}} {1} / rho;
+ {% for i in range(D) -%}
+ velocity_field->get(x, y, z, uint_t{ {{i}}u }) = md_{{i}} * rho_inv;
{% endfor -%}
- values_ptr += {{Q}}u;
+ std::advance(pop, {{Q}});
}
}
}
@@ -135,7 +183,7 @@ namespace Population
namespace Vector
{
- inline Vector{{D}}< {{dtype}} >
+ inline auto
get( GhostLayerField< {{dtype}}, uint_t{ {{D}}u } > const * vec_field,
Cell const & cell )
{
@@ -170,8 +218,8 @@ namespace Vector
}
inline void
- broadcast( GhostLayerField< {{dtype}}, uint_t{ {{D}}u } > * vec_field,
- Vector{{D}}< {{dtype}} > const & vec)
+ initialize( GhostLayerField< {{dtype}}, uint_t{ {{D}}u } > * vec_field,
+ Vector{{D}}< {{dtype}} > const & vec)
{
WALBERLA_FOR_ALL_CELLS_INCLUDING_GHOST_LAYER_XYZ(vec_field, {
{{dtype}} & xyz0 = vec_field->get(x, y, z, uint_t{ 0u });
@@ -193,7 +241,7 @@ namespace Vector
});
}
- inline std::vector< {{dtype}} >
+ inline auto
get( GhostLayerField< {{dtype}}, uint_t{ {{D}}u } > const * vec_field,
CellInterval const & ci )
{
@@ -226,7 +274,7 @@ namespace Vector
{% for i in range(D) -%}
vec_field->getF( &xyz0, uint_t{ {{i}}u }) = values_ptr[{{i}}u];
{% endfor -%}
- values_ptr += {{D}}u;
+ std::advance(values_ptr, {{D}});
}
}
}
@@ -237,11 +285,11 @@ namespace EquilibriumDistribution
{
inline {{dtype}}
get( stencil::Direction const direction,
- Vector{{D}}< {{dtype}} > const & u = Vector{{D}}< {{dtype}} >( {{dtype}}(0.0) ),
- {{dtype}} rho = {{dtype}}(1.0) )
+ Vector{{D}}< {{dtype}} > const & u = Vector{{D}}< {{dtype}} >( {{dtype}} {0} ),
+ {{dtype}} rho = {{dtype}} {1} )
{
{% if not compressible %}
- rho -= {{dtype}}(1.0);
+ rho -= {{dtype}} {1};
{% endif %}
{{equilibrium_from_direction}}
}
@@ -256,7 +304,7 @@ namespace Equilibrium
Cell const & cell )
{
{%if not compressible %}
- rho -= {{dtype}}(1.0);
+ rho -= {{dtype}} {1};
{%endif %}
{{dtype}} & xyz0 = pdf_field->get(cell, uint_t{ 0u });
@@ -293,13 +341,13 @@ namespace Density
{{unshifted_momentum_density_getter | indent(8)}}
// calculate current velocity (before density change)
- const {{dtype}} conversion = {{dtype}}(1) / rho;
+ const {{dtype}} conversion = {{dtype}} {1} / rho;
Vector{{D}}< {{dtype}} > velocity;
{% for i in range(D) -%}
velocity[{{i}}u] = momdensity_{{i}} * conversion;
{% endfor %}
- Equilibrium::set(pdf_field, velocity, rho_in {%if not compressible %} + {{dtype}}(1) {%endif%}, cell);
+ Equilibrium::set(pdf_field, velocity, rho_in {%if not compressible %} + {{dtype}} {1} {%endif%}, cell);
}
inline std::vector< {{dtype}} >
@@ -341,13 +389,13 @@ namespace Density
{{unshifted_momentum_density_getter | indent(12)}}
// calculate current velocity (before density change)
- const {{dtype}} conversion = {{dtype}}(1) / rho;
+ const {{dtype}} conversion = {{dtype}} {1} / rho;
Vector{{D}}< {{dtype}} > velocity;
{% for i in range(D) -%}
velocity[{{i}}u] = momdensity_{{i}} * conversion;
{% endfor %}
- Equilibrium::set(pdf_field, velocity, *values_it {%if not compressible %} + {{dtype}}(1) {%endif%}, Cell{x, y, z});
+ Equilibrium::set(pdf_field, velocity, *values_it {%if not compressible %} + {{dtype}} {1} {%endif%}, Cell{x, y, z});
++values_it;
}
}
@@ -357,8 +405,53 @@ namespace Density
namespace Velocity
{
+ inline auto
+ get( GhostLayerField< {{dtype}}, uint_t{ {{Q}}u } > const * pdf_field,
+ GhostLayerField< {{dtype}}, uint_t{ {{D}}u } > const * force_field,
+ Cell const & cell )
+ {
+ const {{dtype}} & xyz0 = pdf_field->get(cell, uint_t{ 0u });
+ {% for i in range(Q) -%}
+ const {{dtype}} f_{{i}} = pdf_field->getF( &xyz0, uint_t{ {{i}}u });
+ {% endfor -%}
+
+ {% for c in "xyz" -%}
+ const auto {{c}} = cell.{{c}}();
+ {% endfor -%}
+ {{momentum_density_getter | substitute_force_getter_cpp | indent(8) }}
+ const {{dtype}} rho_inv = {{dtype}} {1} / rho;
+
+ return Vector3<{{dtype}}>(md_0 * rho_inv, md_1 * rho_inv, md_2 * rho_inv);
+ }
+
+ inline auto
+ get( GhostLayerField< {{dtype}}, uint_t{ {{Q}}u } > const * pdf_field,
+ GhostLayerField< {{dtype}}, uint_t{ {{D}}u } > const * force_field,
+ CellInterval const & ci )
+ {
+ std::vector< {{dtype}} > out;
+ out.reserve(ci.numCells() * uint_t({{D}}u));
+ for (auto x = ci.xMin(); x <= ci.xMax(); ++x) {
+ for (auto y = ci.yMin(); y <= ci.yMax(); ++y) {
+ for (auto z = ci.zMin(); z <= ci.zMax(); ++z) {
+ const {{dtype}} & xyz0 = pdf_field->get(x, y, z, uint_t{ 0u });
+ {% for i in range(Q) -%}
+ const {{dtype}} f_{{i}} = pdf_field->getF( &xyz0, uint_t{ {{i}}u });
+ {% endfor -%}
+ {{momentum_density_getter | substitute_force_getter_cpp | indent(12) }}
+ const {{dtype}} rho_inv = {{dtype}} {1} / rho;
+ {% for i in range(D) -%}
+ out.emplace_back(md_{{i}} * rho_inv);
+ {% endfor -%}
+ }
+ }
+ }
+ return out;
+ }
+
inline void
set( GhostLayerField< {{dtype}}, uint_t{ {{Q}}u } > * pdf_field,
+ GhostLayerField< {{dtype}}, uint_t{ {{D}}u } > * velocity_field,
GhostLayerField< {{dtype}}, uint_t{ {{D}}u } > const * force_field,
Vector{{D}}< {{dtype}} > const & u,
Cell const & cell )
@@ -373,14 +466,113 @@ namespace Velocity
const auto {{c}} = cell.{{c}}();
{% endfor -%}
{{density_velocity_setter_macroscopic_values | substitute_force_getter_cpp | indent(8)}}
+ {% for i in range(D) -%}
+ velocity_field->get(x, y, z, uint_t{ {{i}}u }) = u[{{i}}u];
+ {% endfor %}
+
+ Equilibrium::set(pdf_field, Vector{{D}}<{{dtype}}>({% for i in range(D) %}u_{{i}}{% if not loop.last %}, {% endif %}{% endfor %}), rho {%if not compressible %} + {{dtype}} {1} {%endif%}, cell);
+ }
+
+ inline void
+ set( GhostLayerField< {{dtype}}, uint_t{ {{Q}}u } > * pdf_field,
+ GhostLayerField< {{dtype}}, uint_t{ {{D}}u } > * velocity_field,
+ GhostLayerField< {{dtype}}, uint_t{ {{D}}u } > const * force_field,
+ std::vector< {{dtype}} > const & values,
+ CellInterval const & ci )
+ {
+ assert(uint_c(values.size()) == ci.numCells() * uint_t({{D}}u));
+ auto u = values.data();
+ for (auto x = ci.xMin(); x <= ci.xMax(); ++x) {
+ for (auto y = ci.yMin(); y <= ci.yMax(); ++y) {
+ for (auto z = ci.zMin(); z <= ci.zMax(); ++z) {
+ {{dtype}} & pdf_xyz0 = pdf_field->get(x, y, z, uint_t{ 0u });
+ {{dtype}} & vel_xyz0 = velocity_field->get(x, y, z, uint_t{ 0u });
+ {% for i in range(Q) -%}
+ const {{dtype}} f_{{i}} = pdf_field->getF( &pdf_xyz0, uint_t{ {{i}}u });
+ {% endfor -%}
+ {{density_getters | indent(8)}}
+
+ {{density_velocity_setter_macroscopic_values | substitute_force_getter_cpp | indent(8)}}
+ {% for i in range(D) -%}
+ velocity_field->getF( &vel_xyz0, uint_t{ {{i}}u }) = u[{{i}}u];
+ {% endfor %}
+ std::advance(u, {{D}});
- Equilibrium::set(pdf_field, Vector{{D}}<{{dtype}}>({% for i in range(D) %}u_{{i}}{% if not loop.last %}, {% endif %}{% endfor %}), rho {%if not compressible %} + {{dtype}}(1) {%endif%}, cell);
+ Equilibrium::set(pdf_field, Vector{{D}}<{{dtype}}>({% for i in range(D) %}u_{{i}}{% if not loop.last %}, {% endif %}{% endfor %}), rho {%if not compressible %} + {{dtype}} {1} {%endif%}, Cell{x, y, z});
+ }
+ }
+ }
}
} // namespace Velocity
+namespace Force
+{
+ inline void
+ set( GhostLayerField< {{dtype}}, uint_t{ {{Q}}u } > const * pdf_field,
+ GhostLayerField< {{dtype}}, uint_t{ {{D}}u } > * velocity_field,
+ GhostLayerField< {{dtype}}, uint_t{ {{D}}u } > * force_field,
+ Vector{{D}}< {{dtype}} > const & force,
+ Cell const & cell )
+ {
+ {{dtype}} const & pdf_xyz0 = pdf_field->get(cell, uint_t{ 0u });
+ {{dtype}} & vel_xyz0 = velocity_field->get(cell, uint_t{ 0u });
+ {{dtype}} & laf_xyz0 = force_field->get(cell, uint_t{ 0u });
+ {% for i in range(Q) -%}
+ const {{dtype}} f_{{i}} = pdf_field->getF( &pdf_xyz0, uint_t{ {{i}}u });
+ {% endfor -%}
+
+ {{momentum_density_getter | substitute_force_getter_pattern("force->get\(x, ?y, ?z, ?([0-9])u?\)", "force[\g<1>u]") | indent(8) }}
+ auto const rho_inv = {{dtype}} {1} / rho;
+
+ {% for i in range(D) -%}
+ force_field->getF( &laf_xyz0, uint_t{ {{i}}u }) = force[{{i}}u];
+ {% endfor %}
+
+ {% for i in range(D) -%}
+ velocity_field->getF( &vel_xyz0, uint_t{ {{i}}u }) = md_{{i}} * rho_inv;
+ {% endfor %}
+ }
+
+ inline void
+ set( GhostLayerField< {{dtype}}, uint_t{ {{Q}}u } > const * pdf_field,
+ GhostLayerField< {{dtype}}, uint_t{ {{D}}u } > * velocity_field,
+ GhostLayerField< {{dtype}}, uint_t{ {{D}}u } > * force_field,
+ std::vector< {{dtype}} > const & values,
+ CellInterval const & ci )
+ {
+ assert(uint_c(values.size()) == ci.numCells() * uint_t({{D}}u));
+ auto force = values.data();
+ for (auto x = ci.xMin(); x <= ci.xMax(); ++x) {
+ for (auto y = ci.yMin(); y <= ci.yMax(); ++y) {
+ for (auto z = ci.zMin(); z <= ci.zMax(); ++z) {
+ {{dtype}} const & pdf_xyz0 = pdf_field->get(x, y, z, uint_t{ 0u });
+ {{dtype}} & vel_xyz0 = velocity_field->get(x, y, z, uint_t{ 0u });
+ {{dtype}} & laf_xyz0 = force_field->get(x, y, z, uint_t{ 0u });
+ {% for i in range(Q) -%}
+ const {{dtype}} f_{{i}} = pdf_field->getF( &pdf_xyz0, uint_t{ {{i}}u });
+ {% endfor -%}
+
+ {{momentum_density_getter | substitute_force_getter_pattern("force->get\(x, ?y, ?z, ?([0-9])u?\)", "force[\g<1>u]") | indent(12) }}
+ auto const rho_inv = {{dtype}} {1} / rho;
+
+ {% for i in range(D) -%}
+ force_field->getF( &laf_xyz0, uint_t{ {{i}}u }) = force[{{i}}u];
+ {% endfor %}
+
+ {% for i in range(D) -%}
+ velocity_field->getF( &vel_xyz0, uint_t{ {{i}}u }) = md_{{i}} * rho_inv;
+ {% endfor %}
+
+ std::advance(force, {{D}});
+ }
+ }
+ }
+ }
+} // namespace Force
+
namespace MomentumDensity
{
- inline Vector{{D}}< {{dtype}} >
+ inline auto
reduce( GhostLayerField< {{dtype}}, uint_t{ {{Q}}u } > const * pdf_field,
GhostLayerField< {{dtype}}, uint_t{ {{D}}u } > const * force_field )
{
@@ -403,7 +595,7 @@ namespace MomentumDensity
namespace PressureTensor
{
- inline Matrix{{D}}< {{dtype}} >
+ inline auto
get( GhostLayerField< {{dtype}}, uint_t{ {{Q}}u } > const * pdf_field,
Cell const & cell )
{
@@ -422,6 +614,33 @@ namespace PressureTensor
{% endfor %}
return pressureTensor;
}
+
+ inline auto
+ get( GhostLayerField< {{dtype}}, uint_t{ {{Q}}u } > const * pdf_field,
+ CellInterval const & ci )
+ {
+ std::vector< {{dtype}} > out;
+ out.reserve(ci.numCells() * uint_t({{D**2}}u));
+ for (auto x = ci.xMin(); x <= ci.xMax(); ++x) {
+ for (auto y = ci.yMin(); y <= ci.yMax(); ++y) {
+ for (auto z = ci.zMin(); z <= ci.zMax(); ++z) {
+ const {{dtype}} & xyz0 = pdf_field->get(x, y, z, uint_t{ 0u });
+ {% for i in range(Q) -%}
+ const {{dtype}} f_{{i}} = pdf_field->getF( &xyz0, uint_t{ {{i}}u });
+ {% endfor -%}
+
+ {{second_momentum_getter | indent(12) }}
+
+ {% for i in range(D) -%}
+ {% for j in range(D) -%}
+ out.emplace_back(p_{{i*D+j}});
+ {% endfor %}
+ {% endfor %}
+ }
+ }
+ }
+ return out;
+ }
} // namespace PressureTensor
} // namespace accessor
diff --git a/maintainer/walberla_kernels/walberla_lbm_generation.py b/maintainer/walberla_kernels/walberla_lbm_generation.py
index 72f5ffdfec4..6aec095662d 100644
--- a/maintainer/walberla_kernels/walberla_lbm_generation.py
+++ b/maintainer/walberla_kernels/walberla_lbm_generation.py
@@ -19,6 +19,7 @@
#
import os
+import re
import sympy as sp
import pystencils as ps
import lbmpy_walberla
@@ -104,14 +105,29 @@ def equations_to_code(equations, variable_prefix="",
return "\n".join(result)
+def substitute_force_getter_pattern(code, pattern, subst):
+ re_pat = re.compile(pattern)
+ assert re_pat.search(code) is not None, f"pattern '{pattern} not found in '''\n{code}\n'''" # nopep8
+ return re_pat.sub(subst, code)
+
+
def substitute_force_getter_cpp(code):
field_getter = "force->"
- assert field_getter in code is not None, f"pattern '{field_getter} not found in '''\n{code}\n'''"
+ assert field_getter in code is not None, f"pattern '{field_getter} not found in '''\n{code}\n'''" # nopep8
return code.replace(field_getter, "force_field->")
+def substitute_force_getter_cu(code):
+ field_getter = "force->get(x,y,z,"
+ assert field_getter in code is not None, \
+ f"pattern '{field_getter} not found in '''\n{code}\n'''"
+ return code.replace(field_getter, "force.get(")
+
+
def add_espresso_filters_to_jinja_env(jinja_env):
jinja_env.filters["substitute_force_getter_cpp"] = substitute_force_getter_cpp
+ jinja_env.filters["substitute_force_getter_cu"] = substitute_force_getter_cu
+ jinja_env.filters["substitute_force_getter_pattern"] = substitute_force_getter_pattern
def generate_macroscopic_values_accessors(ctx, config, lb_method, templates):
diff --git a/requirements.txt b/requirements.txt
index ea77f9d6c09..2465a35b426 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,43 +1,48 @@
# build system
-cython>=0.29.21,<=3.0.7
-setuptools>=59.6.0
+cython>=0.29.21,<3.0.10
+setuptools>=68.1.2
+packaging>=24.0
# required scientific packages
-numpy>=1.23
-h5py>=3.6.0
+numpy>=1.26.4,<2.0
+h5py>=3.10.0
# optional scientific packages
-scipy>=1.8.0
-pint>=0.18
+scipy>=1.11.4
+pandas>=1.3.5
+pint>=0.19.2
+ase>=3.22.1
# optional packages for graphics and external devices
-matplotlib>=3.5.1
+matplotlib>=3.6.3
vtk>=9.1.0
PyOpenGL>=3.1.5
pygame>=2.1.2
# waLBerla dependencies
-pystencils==1.2
-lbmpy==1.2
+pystencils==1.3.3
+lbmpy==1.3.3
sympy==1.9
islpy==2022.2.1
-jinja2>=3.0.3
+jinja2>=3.1.2
# CI-related
-requests>=2.25.1
-lxml>=4.8.0
-coverage>=6.2
+requests>=2.32.0
+lxml>=5.1.0
+coverage>=7.4.4
# sphinx and its dependencies
-sphinx>=4.3.2
-sphinx-toggleprompt==0.4.0
-sphinxcontrib-bibtex>=2.6.1
-numpydoc>=1.5.0
-pybtex>=0.23
+sphinx>=7.2.6
+sphinx-toggleprompt==0.5.2
+sphinxcontrib-bibtex>=2.6.2
+numpydoc>=1.6.0
+pybtex>=0.24.0
# jupyter dependencies
-jupyterlab>=4.0.8
-nbformat==5.1.3
-nbconvert==6.5.1
-tqdm>=4.57.0
+jupyterlab>=3.5
+ipykernel>=6.29.3
+nbformat==5.9.1
+nbconvert==6.5.3
+tqdm>=4.66.2
# linters and their dependencies
-autopep8==1.6.0
-pycodestyle==2.8.0
-pylint>=2.12.2
-astroid>=2.9.3
+pep8==1.7.1
+autopep8==2.1.0
+pycodestyle==2.11.1
+pylint>=3.0.3
+astroid>=3.0.2
isort>=5.6.4
-pre-commit>=2.17.0
+pre-commit>=3.6.2
cmakelang==0.6.13
diff --git a/samples/h5md_trajectory.py b/samples/h5md_trajectory.py
index 6d5f7b4a846..8cf8ba44ca5 100644
--- a/samples/h5md_trajectory.py
+++ b/samples/h5md_trajectory.py
@@ -66,7 +66,8 @@
xyz_folded.append(system.part.all().pos_folded[:])
xyz_unfolded.append(system.part.all().pos[:])
# resize box (simulates NpT)
-system.box_l = system.box_l + 1.
+for i in range(3):
+ system.change_volume_and_rescale_particles(system.box_l[i] + 1., "xyz"[i])
system.integrator.run(10)
h5.write()
xyz_folded.append(system.part.all().pos_folded[:])
diff --git a/samples/lbf.py b/samples/lbf.py
index b418df1a960..905b219f831 100644
--- a/samples/lbf.py
+++ b/samples/lbf.py
@@ -60,6 +60,7 @@
lb_params = {'agrid': 1, 'density': 1, 'kinematic_viscosity': 1, 'tau': 0.01,
+ 'single_precision': False,
'ext_force_density': [0, 0, -1.0 / (box_l**3)]}
if args.gpu:
diff --git a/samples/reaction_ensemble_complex_reaction.py b/samples/reaction_ensemble_complex_reaction.py
index 93c2c2805dd..ab4974d0df0 100644
--- a/samples/reaction_ensemble_complex_reaction.py
+++ b/samples/reaction_ensemble_complex_reaction.py
@@ -148,7 +148,9 @@ def equations(variables):
print("concentrations sampled with the reaction ensemble vs. analytical solutions:")
for ptype in types:
- print(f" type {types_name[ptype]}: {concentrations[ptype]:.4f} +/- {concentrations_95ci[ptype]:.4f} mol/l (95% CI), expected: {concentrations_numerical[ptype]:.4f} mol/l")
+ print(f" type {types_name[ptype]}: {concentrations[ptype]:.4f} "
+ f"+/- {concentrations_95ci[ptype]:.4f} mol/l (95% CI), "
+ f"expected: {concentrations_numerical[ptype]:.4f} mol/l")
K_sim = ((concentrations[type_C] / c_ref_in_mol_per_l)**nu_C
* (concentrations[type_D] / c_ref_in_mol_per_l)**nu_D
diff --git a/samples/slice_input.py b/samples/slice_input.py
index 1feb8f0fe9a..255b792e08b 100644
--- a/samples/slice_input.py
+++ b/samples/slice_input.py
@@ -60,30 +60,30 @@
partcls = system.part.add(id=id_list, pos=pos_list, type=type_list)
p0p1 = system.part.by_ids([0, 1])
-print("TYPE\n%s" % partcls.type)
+print(f"TYPE\n{partcls.type}")
p0p1.type = [3, 3]
-print("TYPE_NEW\n%s" % partcls.type)
+print(f"TYPE_NEW\n{partcls.type}")
-print("POS\n%s" % partcls.pos)
+print(f"POS\n%s" % partcls.pos)
system.part.by_ids(range(5)).pos = [[1, 1, 1], [2, 2, 2], [
3, 3, 3], [4, 4, 4], [5, 5, 5]]
-print("POS_NEW\n%s" % partcls.pos)
+print(f"POS_NEW\n{partcls.pos}")
-print("V\n%s" % partcls.v)
+print(f"V\n%s" % partcls.v)
p0p1.v = [[1, 2, 3], [2, 3, 4]]
-print("V_NEW\n%s" % partcls.v)
+print(f"V_NEW\n{partcls.v}")
-print("F\n%s" % partcls.f)
+print(f"F\n{partcls.f}")
p0p1.f = [[3, 4, 5], [4, 5, 6]]
-print("F_NEW\n%s" % partcls.f)
+print(f"F_NEW\n{partcls.f}")
if espressomd.has_features(["MASS"]):
- print("MASS\n%s" % partcls.mass)
+ print(f"MASS\n{partcls.mass}")
p0p1.mass = [2, 3]
- print("MASS_NEW\n%s" % partcls.mass)
+ print(f"MASS_NEW\n{partcls.mass}")
if espressomd.has_features(["ELECTROSTATICS"]):
- print("Q\n%s" % partcls.q)
+ print(f"Q\n{partcls.q}")
system.part.by_ids(range(0, n_part, 2)).q = np.ones(n_part // 2)
system.part.by_ids(range(1, n_part, 2)).q = -np.ones(n_part // 2)
- print("Q_NEW\n%s" % partcls.q)
+ print(f"Q_NEW\n{partcls.q}")
diff --git a/samples/visualization_cellsystem.py b/samples/visualization_cellsystem.py
index 19f4638ddd1..3f38c6f0065 100644
--- a/samples/visualization_cellsystem.py
+++ b/samples/visualization_cellsystem.py
@@ -47,7 +47,7 @@
system.time_step = 0.0005
system.cell_system.set_regular_decomposition(use_verlet_lists=True)
system.cell_system.skin = 0.4
-#system.cell_system.node_grid = [i, j, k]
+# system.cell_system.node_grid = [i, j, k]
for i in range(100):
system.part.add(pos=box * np.random.random(3))
diff --git a/samples/visualization_ljliquid.py b/samples/visualization_ljliquid.py
index 9934ddc203c..8722ebd6ac0 100644
--- a/samples/visualization_ljliquid.py
+++ b/samples/visualization_ljliquid.py
@@ -93,7 +93,7 @@
f"Simulate {n_part} particles in a cubic box {box_l} at density {density}.")
print("Interactions:\n")
act_min_dist = system.analysis.min_dist()
-print(f"Start with minimal distance {act_min_dist}")
+print(f"Start with minimal distance {act_min_dist:.3f}")
visualizer = espressomd.visualization.openGLLive(system)
@@ -127,7 +127,7 @@
#############################################################
# Integration #
#############################################################
-print("\nStart integration: run %d times %d steps" % (int_n_times, int_steps))
+print(f"\nStart integration: run {int_n_times} times {int_steps} steps")
# print initial energies
energies = system.analysis.energy()
diff --git a/src/config/features.def b/src/config/features.def
index 59a2583e7ba..ff8eb2a0413 100644
--- a/src/config/features.def
+++ b/src/config/features.def
@@ -54,7 +54,6 @@ ROTATIONAL_INERTIA implies ROTATION
/* Electrostatics */
ELECTROSTATICS
P3M equals ELECTROSTATICS and FFTW
-MMM1D_GPU requires CUDA and ELECTROSTATICS
MMM1D_MACHINE_PREC requires ELECTROSTATICS
/* Magnetostatics */
@@ -63,8 +62,6 @@ DP3M equals DIPOLES and FFTW
DIPOLAR_DIRECT_SUM requires CUDA
DIPOLAR_DIRECT_SUM equals DIPOLES and ROTATION and CUDA
DIPOLE_FIELD_TRACKING implies DIPOLES
-DIPOLAR_BARNES_HUT requires CUDA
-DIPOLAR_BARNES_HUT equals DIPOLES and ROTATION and CUDA
/* Virtual sites features */
VIRTUAL_SITES
diff --git a/src/config/include/config/config.hpp b/src/config/include/config/config.hpp
index f392f9fff80..a3a8d14ea9f 100644
--- a/src/config/include/config/config.hpp
+++ b/src/config/include/config/config.hpp
@@ -54,7 +54,7 @@
#endif
/** Whether to use the approximation of Abramowitz/Stegun @cite abramowitz65a
- * @ref AS_erfc_part() for \f$\exp(d^2) \mathrm{erfc}(d)\f$,
+ * @ref Utils::AS_erfc_part() for \f$\exp(d^2) \mathrm{erfc}(d)\f$,
* or the C function std::erfc() in P3M and Ewald summation.
*/
#ifndef USE_ERFC_APPROXIMATION
diff --git a/src/config/myconfig-default.hpp b/src/config/myconfig-default.hpp
index 4e8c8df611a..f385f81ee3e 100644
--- a/src/config/myconfig-default.hpp
+++ b/src/config/myconfig-default.hpp
@@ -40,9 +40,6 @@
// Charges and dipoles
#define ELECTROSTATICS
-#ifdef CUDA
-#define MMM1D_GPU
-#endif
#define DIPOLES
// Active matter
diff --git a/src/core/BondList.hpp b/src/core/BondList.hpp
index e3b0eb00aa2..1c077cb3f7f 100644
--- a/src/core/BondList.hpp
+++ b/src/core/BondList.hpp
@@ -19,12 +19,10 @@
#ifndef ESPRESSO_BONDLIST_HPP
#define ESPRESSO_BONDLIST_HPP
-#include
#include
#include
#include
-#include
#include
#include
#include
@@ -33,6 +31,7 @@
#include
#include
#include
+#include
#include
/**
@@ -45,18 +44,18 @@
class BondView {
/* Bond id */
int m_id = -1;
- Utils::Span m_partners;
+ std::span m_partners;
public:
BondView() = default;
- BondView(int id, Utils::Span partners)
+ BondView(int id, std::span partners)
: m_id(id), m_partners(partners) {}
int bond_id() const { return m_id; }
- Utils::Span const &partner_ids() const { return m_partners; }
+ auto const &partner_ids() const { return m_partners; }
bool operator==(BondView const &rhs) const {
- return m_id == rhs.m_id and boost::equal(m_partners, rhs.m_partners);
+ return m_id == rhs.m_id and std::ranges::equal(m_partners, rhs.m_partners);
}
bool operator!=(BondView const &rhs) const { return not(*this == rhs); }
@@ -108,13 +107,13 @@ class BondList {
template void serialize(Archive &ar, long int /* version */) {
if (Archive::is_loading::value) {
std::size_t size{};
- ar &size;
+ ar & size;
m_storage.resize(size);
}
if (Archive::is_saving::value) {
auto size = m_storage.size();
- ar &size;
+ ar & size;
}
ar &boost::serialization::make_array(m_storage.data(), m_storage.size());
@@ -140,8 +139,8 @@ class BondList {
auto const partners_begin = m_it;
auto const partners_end = id_pos;
auto const dist = std::distance(partners_begin, partners_end);
- return {-(*id_pos) - 1, Utils::make_span(std::addressof(*partners_begin),
- static_cast(dist))};
+ return {-(*id_pos) - 1, std::span(std::addressof(*partners_begin),
+ static_cast(dist))};
}
};
@@ -188,7 +187,7 @@ class BondList {
* @param bond Bond to add.
*/
void insert(BondView const &bond) {
- boost::copy(bond.partner_ids(), std::back_inserter(m_storage));
+ std::ranges::copy(bond.partner_ids(), std::back_inserter(m_storage));
assert(bond.bond_id() >= 0);
m_storage.push_back(-(bond.bond_id() + 1));
}
diff --git a/src/core/BoxGeometry.hpp b/src/core/BoxGeometry.hpp
index 6ba20e48e86..fdf3f6321a6 100644
--- a/src/core/BoxGeometry.hpp
+++ b/src/core/BoxGeometry.hpp
@@ -99,17 +99,17 @@ class BoxGeometry {
public:
BoxGeometry() {
set_length(Utils::Vector3d{1., 1., 1.});
- set_periodic(0, true);
- set_periodic(1, true);
- set_periodic(2, true);
+ set_periodic(0u, true);
+ set_periodic(1u, true);
+ set_periodic(2u, true);
set_type(BoxType::CUBOID);
}
BoxGeometry(BoxGeometry const &rhs) {
m_type = rhs.type();
set_length(rhs.length());
- set_periodic(0, rhs.periodic(0));
- set_periodic(1, rhs.periodic(1));
- set_periodic(2, rhs.periodic(2));
+ set_periodic(0u, rhs.periodic(0u));
+ set_periodic(1u, rhs.periodic(1u));
+ set_periodic(2u, rhs.periodic(2u));
m_lees_edwards_bc = rhs.m_lees_edwards_bc;
}
@@ -190,7 +190,7 @@ class BoxGeometry {
* i.e. a - b. Can be negative.
*/
template T inline get_mi_coord(T a, T b, unsigned coord) const {
- assert(coord <= 2);
+ assert(coord <= 2u);
return detail::get_mi_coord(a, b, m_length[coord], m_length_inv[coord],
m_length_half[coord], m_periodic[coord]);
@@ -210,8 +210,7 @@ class BoxGeometry {
Utils::Vector get_mi_vector(const Utils::Vector &a,
const Utils::Vector &b) const {
if (type() == BoxType::LEES_EDWARDS) {
- auto const shear_plane_normal =
- static_cast(lees_edwards_bc().shear_plane_normal);
+ auto const shear_plane_normal = lees_edwards_bc().shear_plane_normal;
auto a_tmp = a;
auto b_tmp = b;
a_tmp[shear_plane_normal] = Algorithm::periodic_fold(
@@ -250,10 +249,8 @@ class BoxGeometry {
auto ret = u - v;
if (type() == BoxType::LEES_EDWARDS) {
auto const &le = m_lees_edwards_bc;
- auto const shear_plane_normal =
- static_cast(le.shear_plane_normal);
- auto const shear_direction =
- static_cast(le.shear_direction);
+ auto const shear_plane_normal = le.shear_plane_normal;
+ auto const shear_direction = le.shear_direction;
auto const dy = x[shear_plane_normal] - y[shear_plane_normal];
if (fabs(dy) > 0.5 * length_half()[shear_plane_normal]) {
ret[shear_direction] -= Utils::sgn(dy) * le.shear_velocity;
@@ -264,11 +261,11 @@ class BoxGeometry {
/** @brief Fold coordinates to primary simulation box in-place.
* Lees-Edwards offset is ignored.
- * @param[in,out] pos coordinate to fold
+ * @param[in,out] pos coordinates to fold
* @param[in,out] image_box image box offset
*/
void fold_position(Utils::Vector3d &pos, Utils::Vector3i &image_box) const {
- for (unsigned int i = 0u; i < 3u; i++) {
+ for (auto i = 0u; i < 3u; i++) {
if (m_periodic[i]) {
auto const result =
Algorithm::periodic_fold(pos[i], image_box[i], m_length[i]);
@@ -284,21 +281,39 @@ class BoxGeometry {
}
}
- /** @brief Calculate coordinates folded to primary simulation box.
- * @param p coordinate to fold
- * @return Folded coordinates.
+ /**
+ * @brief Calculate coordinates folded to primary simulation box.
+ * @param[in] pos coordinates to fold
+ * @return Folded coordinates.
*/
- auto folded_position(Utils::Vector3d const &p) const {
- Utils::Vector3d p_folded;
+ auto folded_position(Utils::Vector3d const &pos) const {
+ auto pos_folded = pos;
for (unsigned int i = 0u; i < 3u; i++) {
if (m_periodic[i]) {
- p_folded[i] = Algorithm::periodic_fold(p[i], m_length[i]);
- } else {
- p_folded[i] = p[i];
+ pos_folded[i] = Algorithm::periodic_fold(pos[i], m_length[i]);
+ }
+ }
+
+ return pos_folded;
+ }
+
+ /**
+ * @brief Calculate image box of coordinates folded to primary simulation box.
+ * @param[in] pos coordinates
+ * @param[in] image_box image box to fold
+ * @return Folded image box.
+ */
+ auto folded_image_box(Utils::Vector3d const &pos,
+ Utils::Vector3i const &image_box) const {
+ auto image_box_folded = image_box;
+ for (auto i = 0u; i < 3u; i++) {
+ if (m_periodic[i]) {
+ image_box_folded[i] =
+ Algorithm::periodic_fold(pos[i], image_box[i], m_length[i]).second;
}
}
- return p_folded;
+ return image_box_folded;
}
/** @brief Calculate image box shift vector */
diff --git a/src/core/CMakeLists.txt b/src/core/CMakeLists.txt
index 6f93be1162b..0e7b82024d1 100644
--- a/src/core/CMakeLists.txt
+++ b/src/core/CMakeLists.txt
@@ -58,10 +58,9 @@ if(ESPRESSO_BUILD_WITH_CUDA)
target_sources(espresso_core PRIVATE cuda/init.cpp)
espresso_add_gpu_library(
espresso_cuda SHARED cuda/common_cuda.cu cuda/init_cuda.cu
- cuda/CudaHostAllocator.cu magnetostatics/barnes_hut_gpu_cuda.cu
- magnetostatics/dipolar_direct_sum_gpu_cuda.cu
- electrostatics/mmm1d_gpu_cuda.cu electrostatics/p3m_gpu_cuda.cu
- electrostatics/p3m_gpu_error_cuda.cu system/GpuParticleData_cuda.cu)
+ cuda/CudaHostAllocator.cu magnetostatics/dipolar_direct_sum_gpu_cuda.cu
+ electrostatics/p3m_gpu_cuda.cu electrostatics/p3m_gpu_error_cuda.cu
+ system/GpuParticleData_cuda.cu)
add_library(espresso::cuda ALIAS espresso_cuda)
target_link_libraries(
espresso_cuda PRIVATE CUDA::cuda_driver CUDA::cudart CUDA::cufft
@@ -83,15 +82,21 @@ install(TARGETS espresso_core
target_link_libraries(
espresso_core PRIVATE espresso::config espresso::utils::mpi espresso::shapes
- espresso::profiler espresso::cpp_flags
+ espresso::cpp_flags
PUBLIC espresso::utils MPI::MPI_CXX Random123 espresso::particle_observables
- Boost::serialization Boost::mpi)
+ Boost::serialization Boost::mpi espresso::profiler)
target_include_directories(espresso_core PUBLIC ${CMAKE_CURRENT_SOURCE_DIR})
if(ESPRESSO_BUILD_WITH_WALBERLA)
- target_link_libraries(espresso_core PRIVATE espresso::walberla
- ${WALBERLA_LIBS})
+ target_link_libraries(
+ espresso_core
+ PRIVATE espresso::walberla
+ $<$:espresso::walberla_cuda>)
+endif()
+
+if(ESPRESSO_BUILD_WITH_FFTW)
+ add_subdirectory(fft)
endif()
add_subdirectory(accumulators)
@@ -122,3 +127,10 @@ add_subdirectory(virtual_sites)
if(ESPRESSO_BUILD_TESTS)
add_subdirectory(unit_tests)
endif()
+
+if(ESPRESSO_BUILD_WITH_HDF5 AND ESPRESSO_BUILD_WITH_COVERAGE
+ AND NOT CMAKE_CXX_COMPILER_ID MATCHES "Clang")
+ set_source_files_properties(
+ io/writer/h5md_core.cpp PROPERTIES COMPILE_OPTIONS -felide-constructors
+ DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR})
+endif()
diff --git a/src/core/CellParticleIterator.hpp b/src/core/CellParticleIterator.hpp
index b6a5ee9d26d..1853d828bc9 100644
--- a/src/core/CellParticleIterator.hpp
+++ b/src/core/CellParticleIterator.hpp
@@ -22,4 +22,4 @@
#include "ParticleIterator.hpp"
#include "cell_system/Cell.hpp"
-using CellParticleIterator = ParticleIterator;
+using CellParticleIterator = ParticleIterator::iterator>;
diff --git a/src/core/MpiCallbacks.hpp b/src/core/MpiCallbacks.hpp
index 0174076a742..4b5f1a03aa6 100644
--- a/src/core/MpiCallbacks.hpp
+++ b/src/core/MpiCallbacks.hpp
@@ -43,7 +43,6 @@
#include
#include
#include
-#include
#include
#include
@@ -271,7 +270,7 @@ class MpiCallbacks {
if (m_comm.rank() == 0) {
try {
abort_loop();
- } catch (...) {
+ } catch (...) { // NOLINT(bugprone-empty-catch)
}
}
}
@@ -331,12 +330,9 @@ class MpiCallbacks {
* @param id Identifier of the callback to remove.
*/
void remove(int id) {
- m_callbacks.erase(
- boost::remove_if(m_callbacks,
- [ptr = m_callback_map[id]](auto const &e) {
- return e.get() == ptr;
- }),
- m_callbacks.end());
+ std::erase_if(m_callbacks, [ptr = m_callback_map[id]](auto const &e) {
+ return e.get() == ptr;
+ });
m_callback_map.remove(id);
}
diff --git a/src/core/Observable_stat.cpp b/src/core/Observable_stat.cpp
index d50d367f368..f13704af9d1 100644
--- a/src/core/Observable_stat.cpp
+++ b/src/core/Observable_stat.cpp
@@ -25,7 +25,6 @@
#include "communication.hpp"
-#include
#include
#include
@@ -33,6 +32,7 @@
#include
#include
#include
+#include
#include
Observable_stat::Observable_stat(std::size_t chunk_size, std::size_t n_bonded,
@@ -46,9 +46,7 @@ Observable_stat::Observable_stat(std::size_t chunk_size, std::size_t n_bonded,
#else
constexpr std::size_t n_vs = 0;
#endif
- auto const n_non_bonded =
- static_cast(Utils::lower_triangular(max_type, max_type)) +
- 1ul;
+ auto const n_non_bonded = get_non_bonded_offset(max_type, max_type) + 1ul;
constexpr std::size_t n_ext_fields = 1; // reduction over all fields
constexpr std::size_t n_kinetic = 1; // linear+angular kinetic contributions
@@ -57,26 +55,23 @@ Observable_stat::Observable_stat(std::size_t chunk_size, std::size_t n_bonded,
m_data = std::vector(m_chunk_size * n_elements);
// spans for the different contributions
- kinetic = Utils::Span(m_data.data(), m_chunk_size);
- bonded = Utils::Span(kinetic.end(), n_bonded * m_chunk_size);
- coulomb = Utils::Span(bonded.end(), n_coulomb * m_chunk_size);
- dipolar = Utils::Span(coulomb.end(), n_dipolar * m_chunk_size);
- virtual_sites = Utils::Span(dipolar.end(), n_vs * m_chunk_size);
+ kinetic = std::span(m_data.data(), m_chunk_size);
+ bonded = std::span(kinetic.end(), n_bonded * m_chunk_size);
+ coulomb = std::span(bonded.end(), n_coulomb * m_chunk_size);
+ dipolar = std::span(coulomb.end(), n_dipolar * m_chunk_size);
+ virtual_sites = std::span(dipolar.end(), n_vs * m_chunk_size);
external_fields =
- Utils::Span(virtual_sites.end(), n_ext_fields * m_chunk_size);
+ std::span(virtual_sites.end(), n_ext_fields * m_chunk_size);
non_bonded_intra =
- Utils::Span(external_fields.end(), n_non_bonded * m_chunk_size);
+ std::span(external_fields.end(), n_non_bonded * m_chunk_size);
non_bonded_inter =
- Utils::Span(non_bonded_intra.end(), n_non_bonded * m_chunk_size);
- assert(non_bonded_inter.end() == (m_data.data() + m_data.size()));
+ std::span(non_bonded_intra.end(), n_non_bonded * m_chunk_size);
+ assert(&*non_bonded_inter.end() == (m_data.data() + m_data.size()));
}
-Utils::Span
-Observable_stat::get_non_bonded_contribution(Utils::Span base_pointer,
- int type1, int type2) const {
- auto const offset = static_cast(
+std::size_t Observable_stat::get_non_bonded_offset(int type1, int type2) const {
+ return static_cast(
Utils::lower_triangular(std::max(type1, type2), std::min(type1, type2)));
- return {base_pointer.begin() + offset * m_chunk_size, m_chunk_size};
}
void Observable_stat::mpi_reduce() {
diff --git a/src/core/Observable_stat.hpp b/src/core/Observable_stat.hpp
index a5e40db0d56..5a04649a63b 100644
--- a/src/core/Observable_stat.hpp
+++ b/src/core/Observable_stat.hpp
@@ -19,15 +19,12 @@
#pragma once
-#include
-#include
-
-#include
-
#include
#include
#include
#include
+#include
+#include
#include
/** Observable for the pressure and energy. */
@@ -37,10 +34,14 @@ class Observable_stat {
/** Number of doubles per data item */
std::size_t m_chunk_size;
+ std::size_t get_non_bonded_offset(int type1, int type2) const;
+
/** Get contribution from a non-bonded interaction */
- Utils::Span
- get_non_bonded_contribution(Utils::Span base_pointer, int type1,
- int type2) const;
+ auto get_non_bonded_contribution(std::span view, int type1,
+ int type2) const {
+ auto const offset = get_non_bonded_offset(type1, type2);
+ return view.subspan(offset * m_chunk_size, m_chunk_size);
+ }
public:
Observable_stat(std::size_t chunk_size, std::size_t n_bonded, int max_type);
@@ -52,10 +53,10 @@ class Observable_stat {
* @param column Which column to sum up (only relevant for multi-dimensional
* observables).
*/
- double accumulate(double acc = 0.0, std::size_t column = 0) const {
+ double accumulate(double acc = 0.0, std::size_t column = 0ul) const {
assert(column < m_chunk_size);
- if (m_chunk_size == 1)
- return boost::accumulate(m_data, acc);
+ if (m_chunk_size == 1ul)
+ return std::accumulate(m_data.begin(), m_data.end(), acc);
for (auto it = m_data.begin() + static_cast(column);
it < m_data.end(); it += static_cast(m_chunk_size))
@@ -65,40 +66,40 @@ class Observable_stat {
/** Rescale values */
void rescale(double volume) {
- auto const fac = 1. / volume;
- boost::transform(m_data, m_data.begin(), [fac](auto e) { return e * fac; });
+ std::ranges::transform(m_data, m_data.begin(),
+ std::bind_front(std::multiplies{}, 1. / volume));
}
/** Contribution from linear and angular kinetic energy (accumulated). */
- Utils::Span kinetic;
+ std::span kinetic;
/** Contribution(s) from bonded interactions. */
- Utils::Span bonded;
+ std::span bonded;
/** Contribution(s) from Coulomb interactions. */
- Utils::Span coulomb;
+ std::span coulomb;
/** Contribution(s) from dipolar interactions. */
- Utils::Span dipolar;
+ std::span dipolar;
/** Contribution from virtual sites (accumulated). */
- Utils::Span virtual_sites;
+ std::span virtual_sites;
/** Contribution from external fields (accumulated). */
- Utils::Span external_fields;
+ std::span external_fields;
/** Contribution(s) from non-bonded intramolecular interactions. */
- Utils::Span non_bonded_intra;
+ std::span non_bonded_intra;
/** Contribution(s) from non-bonded intermolecular interactions. */
- Utils::Span non_bonded_inter;
+ std::span non_bonded_inter;
/** Get contribution from a bonded interaction */
- Utils::Span bonded_contribution(int bond_id) const {
+ std::span bonded_contribution(int bond_id) const {
auto const offset = m_chunk_size * static_cast(bond_id);
return {bonded.data() + offset, m_chunk_size};
}
void add_non_bonded_contribution(int type1, int type2, int molid1, int molid2,
- Utils::Span data) {
+ std::span data) {
assert(data.size() == m_chunk_size);
- auto const span = (molid1 == molid2) ? non_bonded_intra : non_bonded_inter;
- auto const dest = get_non_bonded_contribution(span, type1, type2);
+ auto const view = (molid1 == molid2) ? non_bonded_intra : non_bonded_inter;
+ auto const dest = get_non_bonded_contribution(view, type1, type2);
- boost::transform(dest, data, dest.begin(), std::plus<>{});
+ std::ranges::transform(dest, data, dest.begin(), std::plus{});
}
void add_non_bonded_contribution(int type1, int type2, int molid1, int molid2,
@@ -107,14 +108,12 @@ class Observable_stat {
}
/** Get contribution from a non-bonded intramolecular interaction */
- Utils::Span non_bonded_intra_contribution(int type1,
- int type2) const {
+ auto non_bonded_intra_contribution(int type1, int type2) const {
return get_non_bonded_contribution(non_bonded_intra, type1, type2);
}
/** Get contribution from a non-bonded intermolecular interaction */
- Utils::Span non_bonded_inter_contribution(int type1,
- int type2) const {
+ auto non_bonded_inter_contribution(int type1, int type2) const {
return get_non_bonded_contribution(non_bonded_inter, type1, type2);
}
diff --git a/src/core/PartCfg.cpp b/src/core/PartCfg.cpp
index 21c330d3ce3..222aff96a9c 100644
--- a/src/core/PartCfg.cpp
+++ b/src/core/PartCfg.cpp
@@ -23,10 +23,9 @@
#include "particle_node.hpp"
#include "system/System.hpp"
-#include
-
#include
#include
+#include
void PartCfg::update() {
m_parts.clear();
@@ -37,8 +36,7 @@ void PartCfg::update() {
for (std::size_t offset = 0; offset < ids.size();) {
auto const this_size = std::clamp(chunk_size, std::size_t{0},
std::size_t{ids.size() - offset});
- auto const chunk_ids =
- Utils::make_const_span(ids.data() + offset, this_size);
+ auto const chunk_ids = std::span(ids.data() + offset, this_size);
prefetch_particle_data(chunk_ids);
diff --git a/src/core/Particle.hpp b/src/core/Particle.hpp
index 46b9085e043..2e125ae3abb 100644
--- a/src/core/Particle.hpp
+++ b/src/core/Particle.hpp
@@ -35,12 +35,11 @@
#include
#include
+#include
#include
#include
namespace detail {
-inline void check_axis_idx_valid(unsigned int const axis) { assert(axis <= 2); }
-
inline bool get_nth_bit(uint8_t const bitfield, unsigned int const bit_idx) {
return bitfield & (1u << bit_idx);
}
@@ -57,7 +56,7 @@ struct ParticleParametersSwimming {
bool is_engine_force_on_fluid = false;
template void serialize(Archive &ar, long int /* version */) {
- ar &f_swim &swimming &is_engine_force_on_fluid;
+ ar & f_swim & swimming & is_engine_force_on_fluid;
}
};
#endif
@@ -160,10 +159,10 @@ struct ParticleProperties {
Utils::Quaternion quat = Utils::Quaternion::identity();
template void serialize(Archive &ar, long int) {
- ar &to_particle_id;
- ar &distance;
- ar &rel_orientation;
- ar &quat;
+ ar & to_particle_id;
+ ar & distance;
+ ar & rel_orientation;
+ ar & quat;
}
} vs_relative;
#endif // VIRTUAL_SITES_RELATIVE
@@ -199,53 +198,53 @@ struct ParticleProperties {
#endif
template void serialize(Archive &ar, long int /* version */) {
- ar &identity;
- ar &mol_id;
- ar &type;
- ar &propagation;
+ ar & identity;
+ ar & mol_id;
+ ar & type;
+ ar & propagation;
#ifdef MASS
- ar &mass;
+ ar & mass;
#endif
#ifdef ROTATIONAL_INERTIA
- ar &rinertia;
+ ar & rinertia;
#endif
#ifdef ROTATION
- ar &rotation;
+ ar & rotation;
#endif
#ifdef ELECTROSTATICS
- ar &q;
+ ar & q;
#endif
#ifdef LB_ELECTROHYDRODYNAMICS
- ar &mu_E;
+ ar & mu_E;
#endif
#ifdef DIPOLES
- ar &dipm;
+ ar & dipm;
#endif
#ifdef DIPOLE_FIELD_TRACKING
- ar &dip_fld;
+ ar & dip_fld;
#endif
#ifdef VIRTUAL_SITES_RELATIVE
- ar &vs_relative;
+ ar & vs_relative;
#endif
#ifdef THERMOSTAT_PER_PARTICLE
- ar γ
+ ar & gamma;
#ifdef ROTATION
- ar &gamma_rot;
+ ar & gamma_rot;
#endif
#endif // THERMOSTAT_PER_PARTICLE
#ifdef EXTERNAL_FORCES
- ar &ext_flag;
- ar &ext_force;
+ ar & ext_flag;
+ ar & ext_force;
#ifdef ROTATION
- ar &ext_torque;
+ ar & ext_torque;
#endif
#endif // EXTERNAL_FORCES
#ifdef ENGINE
- ar &swim;
+ ar & swim;
#endif
}
};
@@ -274,13 +273,13 @@ struct ParticlePosition {
#endif
template void serialize(Archive &ar, long int /* version */) {
- ar &p;
- ar &i;
+ ar & p;
+ ar & i;
#ifdef ROTATION
- ar &quat;
+ ar & quat;
#endif
#ifdef BOND_CONSTRAINT
- ar &p_last_timestep;
+ ar & p_last_timestep;
#endif
}
};
@@ -300,15 +299,17 @@ struct ParticleForce {
friend ParticleForce operator+(ParticleForce const &lhs,
ParticleForce const &rhs) {
-#ifdef ROTATION
- return {lhs.f + rhs.f, lhs.torque + rhs.torque};
-#else
- return lhs.f + rhs.f;
-#endif
+ ParticleForce result = lhs;
+ result += rhs;
+ return result;
}
ParticleForce &operator+=(ParticleForce const &rhs) {
- return *this = *this + rhs;
+ f += rhs.f;
+#ifdef ROTATION
+ torque += rhs.torque;
+#endif
+ return *this;
}
/** force. */
@@ -320,9 +321,9 @@ struct ParticleForce {
#endif
template void serialize(Archive &ar, long int /* version */) {
- ar &f;
+ ar & f;
#ifdef ROTATION
- ar &torque;
+ ar & torque;
#endif
}
};
@@ -343,9 +344,9 @@ struct ParticleMomentum {
#endif
template void serialize(Archive &ar, long int /* version */) {
- ar &v;
+ ar & v;
#ifdef ROTATION
- ar ω
+ ar & omega;
#endif
}
};
@@ -363,10 +364,10 @@ struct ParticleLocal {
double lees_edwards_offset = 0.;
template void serialize(Archive &ar, long int /* version */) {
- ar &ghost;
- ar &lees_edwards_flag;
- ar &p_old;
- ar &lees_edwards_offset;
+ ar & ghost;
+ ar & lees_edwards_flag;
+ ar & p_old;
+ ar & lees_edwards_offset;
}
};
@@ -385,7 +386,7 @@ struct ParticleRattle {
}
template void serialize(Archive &ar, long int /* version */) {
- ar &correction;
+ ar & correction;
}
};
#endif
@@ -458,11 +459,11 @@ struct Particle { // NOLINT(bugprone-exception-escape)
auto &rotation() { return p.rotation; }
bool can_rotate() const { return static_cast |