From a120d7d420b0a1cd628772a8ec66c37c0a55b57f Mon Sep 17 00:00:00 2001 From: Sam Reeve <6740307+streeve@users.noreply.github.com> Date: Fri, 12 Apr 2024 13:57:54 -0400 Subject: [PATCH 01/11] Skeleton draft --- paper/paper.bib | 86 +++++++++++++++++++++++++++++++++++++++++++++++++ paper/paper.md | 73 +++++++++++++++++++++++++++++++++++++++++ 2 files changed, 159 insertions(+) create mode 100644 paper/paper.bib create mode 100644 paper/paper.md diff --git a/paper/paper.bib b/paper/paper.bib new file mode 100644 index 0000000..2b527be --- /dev/null +++ b/paper/paper.bib @@ -0,0 +1,86 @@ +@article{copa:2021, + author = {Susan M Mniszewski and James Belak and Jean-Luc Fattebert and Christian FA Negre and Stuart R Slattery and Adetokunbo A Adedoyin and Robert F Bird and Choongseok Chang and Guangye Chen and Stéphane Ethier and Shane Fogerty and Salman Habib and Christoph Junghans and Damien Lebrun-Grandié and Jamaludin Mohd-Yusof and Stan G Moore and Daniel Osei-Kuffuor and Steven J Plimpton and Adrian Pope and Samuel Temple Reeve and Lee Ricketson and Aaron Scheinberg and Amil Y Sharma and Michael E Wall}, + title ={Enabling particle applications for exascale computing platforms}, + journal = {The International Journal of High Performance Computing Applications}, + volume = {0}, + number = {0}, + pages = {10943420211022829}, + year = {2021}, + doi = {10.1177/10943420211022829} +} + +@article{Slattery2022, + doi = {10.21105/joss.04115}, + url = {https://doi.org/10.21105/joss.04115}, + year = {2022}, + publisher = {The Open Journal}, + volume = {7}, + number = {72}, + pages = {4115}, + author = {Stuart Slattery and Samuel Temple Reeve and Christoph Junghans and Damien Lebrun-Grandié and Robert Bird and Guangye Chen and Shane Fogerty and Yuxing Qiu and Stephan Schulz and Aaron Scheinberg and Austin Isner and Kwitae Chong and Stan Moore and Timothy Germann and James Belak and Susan Mniszewski}, + title = {Cabana: A Performance Portable Library for Particle-Based Simulations}, + journal = {Journal of Open Source Software} } + +@article{kokkos:2022, + title = {Kokkos 3: {Programming} {Model} {Extensions} for the {Exascale} {Era}}, + volume = {33}, + issn = {1558-2183}, + shorttitle = {Kokkos 3}, + doi = {10.1109/TPDS.2021.3097283}, + number = {4}, + journal = {IEEE Transactions on Parallel and Distributed Systems}, + author = {Trott, Christian R. and Lebrun-Grandié, Damien and Arndt, Daniel and Ciesko, Jan and Dang, Vinh and Ellingwood, Nathan and Gayatri, Rahulkumar and Harvey, Evan and Hollman, Daisy S. and Ibanez, Dan and Liber, Nevin and Madsen, Jonathan and Miles, Jeff and Poliakoff, David and Powell, Amy and Rajamanickam, Sivasankaran and Simberg, Mikael and Sunderland, Dan and Turcksin, Bruno and Wilke, Jeremiah}, + month = apr, + year = {2022}, + note = {Conference Name: IEEE Transactions on Parallel and Distributed Systems}, + keywords = {Benchmark testing, exascale, Graphics processing units, Hardware, heterogeneous computing, high-performance computing, Kernel, Laboratories, Layout, Performance portability, Programming, programming models}, + pages = {805--817}, +} + +@book{hockney, + address = {Bristol England ; Philadelphia}, + edition = {1st edition}, + title = {Computer {Simulation} {Using} {Particles}}, + language = {English}, + publisher = {CRC Press}, + author = {Hockney, R. W. and Eastwood, J. W.}, + month = jan, + year = {1989}, + isbn = {978-0-85274-392-8} +} + +@article{ecp:2020, + title = {Exascale applications: skin in the game}, + volume = {378}, + shorttitle = {Exascale applications}, + number = {2166}, + journal = {Philosophical Transactions of the Royal Society A: Mathematical, Physical and Engineering Sciences}, + author = {Alexander, Francis and Almgren, Ann and Bell, John and Bhattacharjee, Amitava and Chen, Jacqueline and Colella, Phil and Daniel, David and DeSlippe, Jack and Diachin, Lori and Draeger, Erik and Dubey, Anshu and Dunning, Thom and Evans, Thomas and Foster, Ian and Francois, Marianne and Germann, Tim and Gordon, Mark and Habib, Salman and Halappanavar, Mahantesh and Hamilton, Steven and Hart, William and (Henry) Huang, Zhenyu and Hungerford, Aimee and Kasen, Daniel and Kent, Paul R. C. and Kolev, Tzanio and Kothe, Douglas B. and Kronfeld, Andreas and Luo, Ye and Mackenzie, Paul and McCallen, David and Messer, Bronson and Mniszewski, Sue and Oehmen, Chris and Perazzo, Amedeo and Perez, Danny and Richards, David and Rider, William J. and Rieben, Rob and Roche, Kenneth and Siegel, Andrew and Sprague, Michael and Steefel, Carl and Stevens, Rick and Syamlal, Madhava and Taylor, Mark and Turner, John and Vay, Jean-Luc and Voter, Artur F. and Windus, Theresa L. and Yelick, Katherine}, + month = mar, + year = {2020}, + pages = {20190056}, + doi = {10.1098/rsta.2019.0056} +} + +@article{arborx:2020, + title={ArborX: A Performance Portable Geometric Search Library}, + author={Lebrun-Grandié, Damien and Prokopenko, Andrey and Turcksin, Bruno and Slattery, Stuart R}, + journal={ACM Transactions on Mathematical Software (TOMS)}, + volume={47}, + number={1}, + pages={1--15}, + year={2020}, + publisher={ACM New York, NY, USA}, + doi={10.1145/3412558} +} + +@article{openfpm, + title={OpenFPM: A scalable open framework for particle and particle-mesh codes on parallel computers}, + author={Incardona, Pietro and Leo, Antonio and Zaluzhnyi, Yaroslav and Ramaswamy, Rajesh and Sbalzarini, Ivo F}, + journal={Computer Physics Communications}, + volume={241}, + pages={155--177}, + year={2019}, + publisher={Elsevier}, + doi={10.1016/j.cpc.2019.03.007} +} diff --git a/paper/paper.md b/paper/paper.md new file mode 100644 index 0000000..d886c6c --- /dev/null +++ b/paper/paper.md @@ -0,0 +1,73 @@ +--- +title: 'Picasso: Performance Portable Particle-in-Cell' +tags: + - C++ + - Kokkos + - Cabana + - particle-in-cell + - material point method +authors: + - name: Stuart Slattery^[corresponding author] + orcid: 0000-0003-0103-888X + affiliation: 1 + - name: Samuel Temple Reeve + orcid: 0000-0002-4250-9476 + affiliation: 1 + - name: Austin Isner + affiliation: 1 + - name: Kwitae Chong + affiliation: 1 + - name: Lance Bullerwell + affiliation: 1 +affiliations: + - name: Oak Ridge National Laboratory, Oak Ridge, TN, USA + index: 1 +date: 12 April 2024 +bibliography: paper.bib +--- + +# Summary + +Particle-in-cell (PIC) methods are used is disparate fields, from plasma +physics to solid mechanics and computer graphics. +`Picasso` is a performance portable library for PIC simulations, developed +throughout the Exascale Computing Project (ECP) [@ecp:2020]. `Picasso` builds +on the `Cabana` library [@cabana:2022] for particle and structured grid +methods, which in turn extends the `Kokkos` library for on-node parallelism +across hardware architectures [@kokkos:2022] and `MPI` for scalable multi-node +communication. `Picasso` provides interpolation schemes for particle-to-grid +and grid-to-particle updates, embedded free surface tracking, and robust +linear algebra support for particle and grid operations. + +# Statement of need + + +# Library capabilities + +## Interpolation + + +## Free surfaces + + +## Examples and performance testing + + +# Acknowledgments + +This work was supported by the Exascale Computing Project (17-SC-20-SC), a +collaborative effort of the U.S. DOE Office of Science and the NNSA. + +This manuscript has been authored by UT-Battelle, LLC under Contract No. +DE-AC05-00OR22725 with the U.S. Department of Energy (DOE). The publisher, by +accepting the article for publication, acknowledges that the United States +Government retains a non-exclusive, paid-up, irrevocable, world-wide license to +publish or reproduce the published form of this manuscript, or allow others to +do so, for United States Government purposes. The DOE will provide public +access to these results of federally sponsored research in accordance with the +DOE Public Access Plan. + +This research used resources of the Oak Ridge Leadership Computing Facility +(OLCF), supported by DOE under contract DE-AC05-00OR22725. + +# References From 94de8e1efe34c7babe28edeb328b39054ca03740 Mon Sep 17 00:00:00 2001 From: Sam Reeve <6740307+streeve@users.noreply.github.com> Date: Mon, 15 Apr 2024 15:49:00 -0400 Subject: [PATCH 02/11] WIPx --- paper/paper.md | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) diff --git a/paper/paper.md b/paper/paper.md index d886c6c..7e69f87 100644 --- a/paper/paper.md +++ b/paper/paper.md @@ -37,10 +37,22 @@ methods, which in turn extends the `Kokkos` library for on-node parallelism across hardware architectures [@kokkos:2022] and `MPI` for scalable multi-node communication. `Picasso` provides interpolation schemes for particle-to-grid and grid-to-particle updates, embedded free surface tracking, and robust -linear algebra support for particle and grid operations. +linear algebra support for particle and grid operations. This separation of +concerns results in a parallelism layer, a simulation motif (and distributed +communication) layer, and finally a PIC algorithms layer, all enabling the +user-level physics. # Statement of need +Computational predictions are increasingly necessary for answering scientific +and engineering questions. As these needs continue to expand, striking the +proper balance of computational performance, portability across hardware +architectures, and programmer productivity requires substantial effort. +While `Kokkos` provides general portability for parallel algorithms, many +particle-specific extensions are necessary in `Cabana` to improve the +productivity, both in capability and in achieving peak performance. +Even still, there are no known current libraries to support PIC algorithms +and related complex particle operations. # Library capabilities From 08202166c302435dd92fd9c77bf9ef6e69866d9c Mon Sep 17 00:00:00 2001 From: Sam Reeve <6740307+streeve@users.noreply.github.com> Date: Mon, 12 Aug 2024 17:39:36 -0400 Subject: [PATCH 03/11] add portable pic refs --- paper/paper.bib | 36 ++++++++++++++++++++++++++++++++++++ 1 file changed, 36 insertions(+) diff --git a/paper/paper.bib b/paper/paper.bib index 2b527be..9e00d50 100644 --- a/paper/paper.bib +++ b/paper/paper.bib @@ -84,3 +84,39 @@ @article{openfpm publisher={Elsevier}, doi={10.1016/j.cpc.2019.03.007} } + +@inproceedings{ippl, + title={Scaling and performance portability of the particle-in-cell scheme for plasma physics applications + through mini-apps targeting exascale architectures}, + author={Muralikrishnan, Sriramkrishnan and Frey, Matthias and Vinciguerra, Alessandro and Ligotino, Michael + and Cerfon, Antoine J and Stoyanov, Miroslav and Gayatri, Rahulkumar and Adelmann, Andreas}, + booktitle={Proceedings of the 2024 SIAM Conference on Parallel Processing for Scientific Computing (PP)}, + pages={26--38}, + year={2024}, + organization={SIAM} +} + +@ARTICLE{vpic, + author={Bird, Robert and Tan, Nigel and Luedtke, Scott V. and Harrell, Stephen Lien and Taufer, Michela and Albright, Brian}, + journal={IEEE Transactions on Parallel and Distributed Systems}, + title={VPIC 2.0: Next Generation Particle-in-Cell Simulations}, + year={2022}, + volume={33}, + number={4}, + pages={952-963}, + keywords={Plasmas;Hardware;Physics;Libraries;Mathematical model;Shape;Layout;Simulation;portability;plasma physics;particle-in-cell}, + doi={10.1109/TPDS.2021.3084795} +} + +@article{pumipic, +title = {PUMIPic: A mesh-based approach to unstructured mesh Particle-In-Cell on GPUs}, +journal = {Journal of Parallel and Distributed Computing}, +volume = {157}, +pages = {1-12}, +year = {2021}, +issn = {0743-7315}, +doi = {https://doi.org/10.1016/j.jpdc.2021.06.004}, +url = {https://www.sciencedirect.com/science/article/pii/S0743731521001337}, +author = {Gerrett Diamond and Cameron W. Smith and Chonglin Zhang and Eisung Yoon and Mark S. Shephard}, +keywords = {Particle-in-cell, Unstructured mesh, GPU}, +} \ No newline at end of file From b29b7f69bf372f77aada7f4e23d7278880547840 Mon Sep 17 00:00:00 2001 From: Sam Reeve <6740307+streeve@users.noreply.github.com> Date: Mon, 19 Aug 2024 10:36:48 -0400 Subject: [PATCH 04/11] Add new pic method papers --- paper/paper.bib | 22 +++++++++++++++++++++- 1 file changed, 21 insertions(+), 1 deletion(-) diff --git a/paper/paper.bib b/paper/paper.bib index 9e00d50..12ecbc5 100644 --- a/paper/paper.bib +++ b/paper/paper.bib @@ -119,4 +119,24 @@ @article{pumipic url = {https://www.sciencedirect.com/science/article/pii/S0743731521001337}, author = {Gerrett Diamond and Cameron W. Smith and Chonglin Zhang and Eisung Yoon and Mark S. Shephard}, keywords = {Particle-in-cell, Unstructured mesh, GPU}, -} \ No newline at end of file +} + +@article{powerpic, + title={The power particle-in-cell method}, + author={Qu, Ziyin and Li, Minchen and De Goes, Fernando and Jiang, Chenfanfu}, + journal={ACM Transactions on Graphics}, + volume={41}, + number={4}, + year={2022} +} + +@inproceedings{ipic, + title={The Impulse Particle-In-Cell Method}, + author={Sancho, Sergio and Tang, Jingwei and Batty, Christopher and Azevedo, Vinicius C}, + booktitle={Computer Graphics Forum}, + volume={43}, + number={2}, + pages={e15022}, + year={2024}, + organization={Wiley Online Library} +} From da39f188064a235f0ba26f82d7a8d516658f150f Mon Sep 17 00:00:00 2001 From: Austin Isner Date: Mon, 19 Aug 2024 14:44:28 -0400 Subject: [PATCH 05/11] Add Library Capabilities section --- paper/paper.md | 24 +++++++++++++++++++++++- 1 file changed, 23 insertions(+), 1 deletion(-) diff --git a/paper/paper.md b/paper/paper.md index 7e69f87..31cac95 100644 --- a/paper/paper.md +++ b/paper/paper.md @@ -29,7 +29,7 @@ bibliography: paper.bib # Summary Particle-in-cell (PIC) methods are used is disparate fields, from plasma -physics to solid mechanics and computer graphics. +physics to solid mechanics and computer graphics. `Picasso` is a performance portable library for PIC simulations, developed throughout the Exascale Computing Project (ECP) [@ecp:2020]. `Picasso` builds on the `Cabana` library [@cabana:2022] for particle and structured grid @@ -55,6 +55,28 @@ Even still, there are no known current libraries to support PIC algorithms and related complex particle operations. # Library capabilities +`Picasso` introduces various classes and supporting types for enabling a high-level specification of field data and PIC algorithms, with the intent to hide implementation details such as memory spaces and allocation, halo communication patterns, and device versus host access. The design principles of performance transparency and PIC domain abstraction thus allows the algorithm designer/user implementor to focus only on the field data definitions and their interdependencies within various sequential "operations", allowing for a natural identification. `Picasso` defines two necessary types for facilitating data management and communication: the `FieldManager` and `GridOperator`'s, discussed in the following sections. + +At the core of `Picasso`'s domain model is the concept of a `FieldLayout`, which contains a location and a field tag. A location can be one of any structured mesh entity (`Node`, `Cell`, `Face`, `Edge`), where `D` is a specific dimension `I`,`J`, or `K`, or a `Particle`. A tag is any type with a string-like `label()` static member function. The complete specification of a `FieldLayout` uniquely defines a field as known to the `FieldManager`, while also allowing the same field tag (e.g. `Temperature`) to be associated with multiple locations (e.g. both `Node` and `Particle`). +### Field Manager +The `Picasso::FieldManager` is a single collection wrapper type for storing unique handles to field layouts in an `unordered_map`. The `FieldManager` itself is separately initialized by passing it to each `GridOperator::setup()` function. + +### Grid Operators +The `Picasso::GridOperator` encapsulates all of the field management work associated with parallel grid operations. Both particle-centric loops are supported (for operations like P2G and G2P) as well as entity centric loops for more traditional discrete grid operations. +A `GridOperator` is defined over a given mesh type and a user defines the fields which must be gathered on the grid to apply the operator and the fields which will be scattered on the grid after the operation is complete. All distributed parallel work is managed by these dependencies - the user needs to only write the body of the operator kernel to be applied at each data point (e.g. each particle or mesh entity). +The order of the dependency templates (if there are any dependencies) is defined above in the documentation for `GridOperatorDependencies`. + +### Particle Initialization +`Picasso` provides two modes for initializing particles on a background grid: `InitUniform` and `InitRandom`. In addition to requiring parameters (particles per dimension for `InitUniform` or particles per cell for `InitRandom`), a user-defined predicate functor must also be provided that returns true or false based on certain criteria of the candidate particle, such as position. The design for a custom user-hook during the creation phase allows for flexible and arbitrary particle initialization. + +### Batched Linear Algebra +Matrix-vector and matrix-matrix operations are nearly ubiquitous in particle-to-grid and grid-to-particle mappings, function space projections, and other common operations wherein support for writing concise vectorial expressions is benefitial for both code readability, as well as exposing algorithmic parallelism. The `Picasso` library also implements kernel-level dense linear algebra operations in a corresponding `LinearAlgebra` namespace using a combination of expression templates for lazy evaluation and data structures to hold intermediates for eager evaluations when necessary. The general concept in the `Picasso` implementation for lazy vs. eager evaluations disencumbers the algorithm author from considering additional performance factors such as overhead incurred from excessive copies or total operation counts in otherwise unoptimized code. Using built-in operator overloading with support for expression templates, users can build complex nested tensorial expressions that are a mixture of both eager and lazy evaluations. For example, if `A` and `B` are NxN and `x` is length N: +```cpp + auto C = 0.5 * (A + ~A) * B + (x * ~x); +``` +where the returned `C` is an NxN matrix, `~` is the transpose operator, and the `*` is an outer-product operation in the last term. + +`Picasso` provides an interface for various supported linear algebra types defined in `FieldTypes`: `Scalar`, `Vector`, `Matrix`, as well as specialized support for higher-rank `Tensor3`, `Tensor4`, and `Quaternion` types. Field tags need only derive from these types in order to make use of `Picasso` linear algebra features. Although all basic operations on vectors and matrices are implemented, several specialized operations are also available, including matrix determinant, inverse, exponential, LU, and SVD decompositions, higher-order tensor contractions, and quaternion-matrix conjugation. ## Interpolation From a4846c90a83a97dfdcde7a98601f105514b433a6 Mon Sep 17 00:00:00 2001 From: Austin Isner Date: Mon, 9 Sep 2024 10:15:16 -0400 Subject: [PATCH 06/11] Fill-in section on level sets and free surfaces. - Added parallel Hopf-Lax reference --- paper/paper.bib | 21 +++++++++++++++++---- paper/paper.md | 6 +++--- 2 files changed, 20 insertions(+), 7 deletions(-) diff --git a/paper/paper.bib b/paper/paper.bib index 12ecbc5..4a9340a 100644 --- a/paper/paper.bib +++ b/paper/paper.bib @@ -9,7 +9,7 @@ @article{copa:2021 doi = {10.1177/10943420211022829} } -@article{Slattery2022, +@article{Slattery2022, doi = {10.21105/joss.04115}, url = {https://doi.org/10.21105/joss.04115}, year = {2022}, @@ -19,7 +19,7 @@ @article{Slattery2022 pages = {4115}, author = {Stuart Slattery and Samuel Temple Reeve and Christoph Junghans and Damien Lebrun-Grandié and Robert Bird and Guangye Chen and Shane Fogerty and Yuxing Qiu and Stephan Schulz and Aaron Scheinberg and Austin Isner and Kwitae Chong and Stan Moore and Timothy Germann and James Belak and Susan Mniszewski}, title = {Cabana: A Performance Portable Library for Particle-Based Simulations}, - journal = {Journal of Open Source Software} } + journal = {Journal of Open Source Software} } @article{kokkos:2022, title = {Kokkos 3: {Programming} {Model} {Extensions} for the {Exascale} {Era}}, @@ -98,8 +98,8 @@ @inproceedings{ippl @ARTICLE{vpic, author={Bird, Robert and Tan, Nigel and Luedtke, Scott V. and Harrell, Stephen Lien and Taufer, Michela and Albright, Brian}, - journal={IEEE Transactions on Parallel and Distributed Systems}, - title={VPIC 2.0: Next Generation Particle-in-Cell Simulations}, + journal={IEEE Transactions on Parallel and Distributed Systems}, + title={VPIC 2.0: Next Generation Particle-in-Cell Simulations}, year={2022}, volume={33}, number={4}, @@ -140,3 +140,16 @@ @inproceedings{ipic year={2024}, organization={Wiley Online Library} } + +@article{Royston:2018, + author = {Michael Royston and Andre Pradhana and Byungjoon Lee and Yat Tin Chow and Wotao Yin and Joseph Teran and Stanley Osher}, + doi = {https://doi.org/10.1016/j.jcp.2018.01.035}, + issn = {0021-9991}, + journal = {Journal of Computational Physics}, + keywords = {Level set methods, Eikonal equation, Hamilton Jacobi, Hopf--Lax}, + pages = {7-17}, + title = {Parallel redistancing using the {Hopf--Lax} formula}, + url = {https://www.sciencedirect.com/science/article/pii/S0021999118300457}, + volume = {365}, + year = {2018} +} diff --git a/paper/paper.md b/paper/paper.md index 31cac95..e5d4a04 100644 --- a/paper/paper.md +++ b/paper/paper.md @@ -70,7 +70,7 @@ The order of the dependency templates (if there are any dependencies) is defined `Picasso` provides two modes for initializing particles on a background grid: `InitUniform` and `InitRandom`. In addition to requiring parameters (particles per dimension for `InitUniform` or particles per cell for `InitRandom`), a user-defined predicate functor must also be provided that returns true or false based on certain criteria of the candidate particle, such as position. The design for a custom user-hook during the creation phase allows for flexible and arbitrary particle initialization. ### Batched Linear Algebra -Matrix-vector and matrix-matrix operations are nearly ubiquitous in particle-to-grid and grid-to-particle mappings, function space projections, and other common operations wherein support for writing concise vectorial expressions is benefitial for both code readability, as well as exposing algorithmic parallelism. The `Picasso` library also implements kernel-level dense linear algebra operations in a corresponding `LinearAlgebra` namespace using a combination of expression templates for lazy evaluation and data structures to hold intermediates for eager evaluations when necessary. The general concept in the `Picasso` implementation for lazy vs. eager evaluations disencumbers the algorithm author from considering additional performance factors such as overhead incurred from excessive copies or total operation counts in otherwise unoptimized code. Using built-in operator overloading with support for expression templates, users can build complex nested tensorial expressions that are a mixture of both eager and lazy evaluations. For example, if `A` and `B` are NxN and `x` is length N: +Matrix-vector and matrix-matrix operations are nearly ubiquitous in particle-to-grid and grid-to-particle mappings, function space projections, and other common operations wherein support for writing concise vectorial expressions is benefitial for both code readability, as well as exposing algorithmic parallelism. The `Picasso` library also implements kernel-level dense linear algebra operations in a corresponding `LinearAlgebra` namespace using a combination of expression templates for lazy evaluation and data structures to hold intermediates for eager evaluations when necessary. The general concept in the `Picasso` implementation for lazy vs. eager evaluations alleviates the consideration of additional performance factors such as overhead incurred from excessive copies or total operation counts in otherwise unoptimized code. Using built-in operator overloading with support for expression templates, users can build complex nested tensorial expressions that are a mixture of both eager and lazy evaluations. For example, if `A` and `B` are NxN and `x` is length N: ```cpp auto C = 0.5 * (A + ~A) * B + (x * ~x); ``` @@ -81,8 +81,8 @@ where the returned `C` is an NxN matrix, `~` is the transpose operator, and the ## Interpolation -## Free surfaces - +## Level Sets and Boundaries +The `Picasso::LevelSet` and `Picasso::ParticleLevelSet` are fast parallel implementations of grid-based signed distance fields (SDF), which are relevant to level-set based approaches in interface reconstruction and boundary tracking. In particular, the `ParticleLevelSet` class relies on functionality from [ArborX::BVH](https://github.com/arborx/ArborX) (bounding volume hiererarchy) for nearest point queries to compute a level set signed distance estimate, $\phi_i^{0}$, on the mesh using analytic spherical particle level sets $\phi_p$ as proxy. A parallel Hopf-Lax redistancing algorithm is also implemented that properly reinitializes the level set estimate $\phi$ into a final SDF ($|\nabla\phi_i|=1$). Additionally, the `Picasso::MarchingCubes` namespace provides methods and data structures for triangulating the resulting SDF. The computed mesh facets may be serialized to disk in STL format or used for further processing, for instance in convex hull initialization or embedded free surface tracking algorithms. ## Examples and performance testing From 38f1aaa34bdbf41def4d4054415f2e6cfc8968f9 Mon Sep 17 00:00:00 2001 From: Sam Reeve <6740307+streeve@users.noreply.github.com> Date: Mon, 30 Sep 2024 17:40:42 -0400 Subject: [PATCH 07/11] Updates for need and future work --- paper/paper.bib | 34 ++++++++++++++++++++++++++++++++++ paper/paper.md | 35 +++++++++++++++++++++-------------- 2 files changed, 55 insertions(+), 14 deletions(-) diff --git a/paper/paper.bib b/paper/paper.bib index 4a9340a..911f956 100644 --- a/paper/paper.bib +++ b/paper/paper.bib @@ -153,3 +153,37 @@ @article{Royston:2018 volume = {365}, year = {2018} } + +@ARTICLE{ppp:2021, + author={Pennycook, S. John and Sewall, Jason D. and Jacobsen, Douglas W. and Deakin, Tom and McIntosh-Smith, Simon}, + journal={Computing in Science \& Engineering}, + title={Navigating Performance, Portability, and Productivity}, + year={2021}, + volume={23}, + number={5}, + pages={28-38}, + keywords={Measurement;Productivity;Performance evaluation;Navigation;Harmonic analysis}, + doi={10.1109/MCSE.2021.3097276} + } + +@article{polypic, + title={A polynomial particle-in-cell method}, + author={Fu, Chuyuan and Guo, Qi and Gast, Theodore and Jiang, Chenfanfu and Teran, Joseph}, + journal={ACM Transactions on Graphics (TOG)}, + volume={36}, + number={6}, + pages={1--12}, + year={2017}, + publisher={ACM New York, NY, USA} +} + +@article{apic, + title={The affine particle-in-cell method}, + author={Jiang, Chenfanfu and Schroeder, Craig and Selle, Andrew and Teran, Joseph and Stomakhin, Alexey}, + journal={ACM Transactions on Graphics (TOG)}, + volume={34}, + number={4}, + pages={1--10}, + year={2015}, + publisher={ACM New York, NY, USA} +} diff --git a/paper/paper.md b/paper/paper.md index e5d4a04..944799e 100644 --- a/paper/paper.md +++ b/paper/paper.md @@ -29,32 +29,34 @@ bibliography: paper.bib # Summary Particle-in-cell (PIC) methods are used is disparate fields, from plasma -physics to solid mechanics and computer graphics. +physics and solid mechanics to computer graphics. `Picasso` is a performance portable library for PIC simulations, developed -throughout the Exascale Computing Project (ECP) [@ecp:2020]. `Picasso` builds +through the Exascale Computing Project (ECP) [@ecp:2020]. `Picasso` builds on the `Cabana` library [@cabana:2022] for particle and structured grid methods, which in turn extends the `Kokkos` library for on-node parallelism -across hardware architectures [@kokkos:2022] and `MPI` for scalable multi-node -communication. `Picasso` provides interpolation schemes for particle-to-grid +across hardware architectures [@kokkos:2022] and uses `MPI` for scalable distributed +simulation. `Picasso` provides interpolation schemes for particle-to-grid and grid-to-particle updates, embedded free surface tracking, and robust linear algebra support for particle and grid operations. This separation of concerns results in a parallelism layer, a simulation motif (and distributed -communication) layer, and finally a PIC algorithms layer, all enabling the -user-level physics. +communication) layer, and finally a PIC algorithm and utility layer, all enabling +performant user-level physics simulations. # Statement of need Computational predictions are increasingly necessary for answering scientific -and engineering questions. As these needs continue to expand, striking the -proper balance of computational performance, portability across hardware -architectures, and programmer productivity requires substantial effort. -While `Kokkos` provides general portability for parallel algorithms, many -particle-specific extensions are necessary in `Cabana` to improve the -productivity, both in capability and in achieving peak performance. -Even still, there are no known current libraries to support PIC algorithms -and related complex particle operations. +and engineering questions; PIC methods are a robust option for simulations of highly +dynamic systems, including plasmas, fluids, and solid materials undergoing severe +deformation. As these needs continue to expand, striking the proper balance of +computational performance, portability across hardware architectures, and programmer +productivity requires substantial effort [@ppp:2021]. +Existing frameworks for PIC simulations are increasingly written with performance +portability in mind, but focused much more on the plasma physics domain [@vpic, @ippl, @pumipic]. +In addition, there are no known current libraries to support state-of-the-art PIC algorithms +and related complex particle operations [@apic, @polypic]. # Library capabilities + `Picasso` introduces various classes and supporting types for enabling a high-level specification of field data and PIC algorithms, with the intent to hide implementation details such as memory spaces and allocation, halo communication patterns, and device versus host access. The design principles of performance transparency and PIC domain abstraction thus allows the algorithm designer/user implementor to focus only on the field data definitions and their interdependencies within various sequential "operations", allowing for a natural identification. `Picasso` defines two necessary types for facilitating data management and communication: the `FieldManager` and `GridOperator`'s, discussed in the following sections. At the core of `Picasso`'s domain model is the concept of a `FieldLayout`, which contains a location and a field tag. A location can be one of any structured mesh entity (`Node`, `Cell`, `Face`, `Edge`), where `D` is a specific dimension `I`,`J`, or `K`, or a `Particle`. A tag is any type with a string-like `label()` static member function. The complete specification of a `FieldLayout` uniquely defines a field as known to the `FieldManager`, while also allowing the same field tag (e.g. `Temperature`) to be associated with multiple locations (e.g. both `Node` and `Particle`). @@ -87,6 +89,11 @@ The `Picasso::LevelSet` and `Picasso::ParticleLevelSet` are fast parallel implem ## Examples and performance testing +## Future work + +Picasso also provides a clear place to include further algorithmic development in the PIC +field, e.g. @powerpic, @ipic. + # Acknowledgments This work was supported by the Exascale Computing Project (17-SC-20-SC), a From 677f09f33cb39ebb44f39b6c9207df540d32c52e Mon Sep 17 00:00:00 2001 From: "Chong, Kwitae" Date: Tue, 1 Oct 2024 23:30:56 -0400 Subject: [PATCH 08/11] add interpolation --- paper/paper.bib | 9 +++++++++ paper/paper.md | 2 +- 2 files changed, 10 insertions(+), 1 deletion(-) diff --git a/paper/paper.bib b/paper/paper.bib index 911f956..c8dc2d1 100644 --- a/paper/paper.bib +++ b/paper/paper.bib @@ -187,3 +187,12 @@ @article{apic year={2015}, publisher={ACM New York, NY, USA} } + +@article{fiip, + author = {J. Brackbill and H. Ruppel}, + title = {{FLIP}: A method for adaptively zoned, Particle-In-Cell calculations of fluid flows in two dimensions}, + journal = {J Comp Phys}, + volume = 65, + pages = {314--343}, + year = 1986 +} diff --git a/paper/paper.md b/paper/paper.md index 944799e..c610377 100644 --- a/paper/paper.md +++ b/paper/paper.md @@ -81,7 +81,7 @@ where the returned `C` is an NxN matrix, `~` is the transpose operator, and the `Picasso` provides an interface for various supported linear algebra types defined in `FieldTypes`: `Scalar`, `Vector`, `Matrix`, as well as specialized support for higher-rank `Tensor3`, `Tensor4`, and `Quaternion` types. Field tags need only derive from these types in order to make use of `Picasso` linear algebra features. Although all basic operations on vectors and matrices are implemented, several specialized operations are also available, including matrix determinant, inverse, exponential, LU, and SVD decompositions, higher-order tensor contractions, and quaternion-matrix conjugation. ## Interpolation - +`Picasso` provides several interpolation methods such as `value`, 'gradient` and `divergence` on both `P2G` and `G2P` with appropriate spline data types such that `value` adopts `SplineValue` and `gradient` and `divergence` use `SplineGradient`. Those methods can be applied on both collocated/staggered nodes and cells upto thrid order spline function. Moreover, `Picasso` extends those methods to general interpolation schemes, Fluid-Implicit-Paritcle (FLIP) [@flip], Affine Particle-In-Cell (APIC) [@apic] and Polynomial Particle-In-Cell (PolyPIC) [@polypic] method between particle and grid. Each schemes are ditinguished depending on the mode of interpolating field variable. For example, in the case of momentum interpolation, FLIP transfers the increment of velocity on the grid rather than velocity itself while APIC augments velocity information by adding additiong velocity gradient around particle. PolyPIC adds polynomial velocity mode in addition to APIC. ## Level Sets and Boundaries The `Picasso::LevelSet` and `Picasso::ParticleLevelSet` are fast parallel implementations of grid-based signed distance fields (SDF), which are relevant to level-set based approaches in interface reconstruction and boundary tracking. In particular, the `ParticleLevelSet` class relies on functionality from [ArborX::BVH](https://github.com/arborx/ArborX) (bounding volume hiererarchy) for nearest point queries to compute a level set signed distance estimate, $\phi_i^{0}$, on the mesh using analytic spherical particle level sets $\phi_p$ as proxy. A parallel Hopf-Lax redistancing algorithm is also implemented that properly reinitializes the level set estimate $\phi$ into a final SDF ($|\nabla\phi_i|=1$). Additionally, the `Picasso::MarchingCubes` namespace provides methods and data structures for triangulating the resulting SDF. The computed mesh facets may be serialized to disk in STL format or used for further processing, for instance in convex hull initialization or embedded free surface tracking algorithms. From 0f9165840287d300827c2ce55ec2093666c142a4 Mon Sep 17 00:00:00 2001 From: Sam Reeve <6740307+streeve@users.noreply.github.com> Date: Tue, 22 Oct 2024 14:39:41 -0400 Subject: [PATCH 09/11] Overall edits --- paper/paper.md | 49 ++++++++++++++++++++++++++----------------------- 1 file changed, 26 insertions(+), 23 deletions(-) diff --git a/paper/paper.md b/paper/paper.md index c610377..a4d3524 100644 --- a/paper/paper.md +++ b/paper/paper.md @@ -35,44 +35,45 @@ through the Exascale Computing Project (ECP) [@ecp:2020]. `Picasso` builds on the `Cabana` library [@cabana:2022] for particle and structured grid methods, which in turn extends the `Kokkos` library for on-node parallelism across hardware architectures [@kokkos:2022] and uses `MPI` for scalable distributed -simulation. `Picasso` provides interpolation schemes for particle-to-grid -and grid-to-particle updates, embedded free surface tracking, and robust +simulation. `Picasso` provides interpolation schemes for particle-to-grid (P2G) +and grid-to-particle (G2P) updates, embedded free surface tracking, and robust linear algebra support for particle and grid operations. This separation of concerns results in a parallelism layer, a simulation motif (and distributed -communication) layer, and finally a PIC algorithm and utility layer, all enabling -performant user-level physics simulations. +communication) layer, and finally a PIC algorithm and utility layer, together enabling +performant user-level PIC physics simulations. # Statement of need Computational predictions are increasingly necessary for answering scientific and engineering questions; PIC methods are a robust option for simulations of highly -dynamic systems, including plasmas, fluids, and solid materials undergoing severe +dynamic systems including plasmas, fluids, and solid materials undergoing severe deformation. As these needs continue to expand, striking the proper balance of computational performance, portability across hardware architectures, and programmer productivity requires substantial effort [@ppp:2021]. Existing frameworks for PIC simulations are increasingly written with performance -portability in mind, but focused much more on the plasma physics domain [@vpic, @ippl, @pumipic]. +portability in mind, but focused more often on the plasma physics domain [@vpic, @ippl, @pumipic]. In addition, there are no known current libraries to support state-of-the-art PIC algorithms -and related complex particle operations [@apic, @polypic]. +and related complex particle operations [@apic, @polypic] used for fluid and solid mechanics. # Library capabilities -`Picasso` introduces various classes and supporting types for enabling a high-level specification of field data and PIC algorithms, with the intent to hide implementation details such as memory spaces and allocation, halo communication patterns, and device versus host access. The design principles of performance transparency and PIC domain abstraction thus allows the algorithm designer/user implementor to focus only on the field data definitions and their interdependencies within various sequential "operations", allowing for a natural identification. `Picasso` defines two necessary types for facilitating data management and communication: the `FieldManager` and `GridOperator`'s, discussed in the following sections. +`Picasso` introduces various classes and supporting types for enabling a high-level specification of field data and PIC algorithms, with the intent to simplify implementation details including memory allocation, distributed communication patterns, and device versus host memory access. The design principles of performance transparency and PIC domain abstraction thus allows the algorithm designer and/or user to focus only on the field data definitions and their interdependencies within various sequential "operations", allowing for a natural identification. `Picasso` defines two necessary types for facilitating data management and communication: the `FieldManager` and `GridOperator`'s, discussed in the following sections. +## Field Management At the core of `Picasso`'s domain model is the concept of a `FieldLayout`, which contains a location and a field tag. A location can be one of any structured mesh entity (`Node`, `Cell`, `Face`, `Edge`), where `D` is a specific dimension `I`,`J`, or `K`, or a `Particle`. A tag is any type with a string-like `label()` static member function. The complete specification of a `FieldLayout` uniquely defines a field as known to the `FieldManager`, while also allowing the same field tag (e.g. `Temperature`) to be associated with multiple locations (e.g. both `Node` and `Particle`). -### Field Manager -The `Picasso::FieldManager` is a single collection wrapper type for storing unique handles to field layouts in an `unordered_map`. The `FieldManager` itself is separately initialized by passing it to each `GridOperator::setup()` function. -### Grid Operators -The `Picasso::GridOperator` encapsulates all of the field management work associated with parallel grid operations. Both particle-centric loops are supported (for operations like P2G and G2P) as well as entity centric loops for more traditional discrete grid operations. -A `GridOperator` is defined over a given mesh type and a user defines the fields which must be gathered on the grid to apply the operator and the fields which will be scattered on the grid after the operation is complete. All distributed parallel work is managed by these dependencies - the user needs to only write the body of the operator kernel to be applied at each data point (e.g. each particle or mesh entity). -The order of the dependency templates (if there are any dependencies) is defined above in the documentation for `GridOperatorDependencies`. +The `FieldManager` is a single collection wrapper type for storing unique handles to field layouts in an `unordered_map`. The `FieldManager` itself is separately initialized by passing it to each `GridOperator::setup()` function. -### Particle Initialization -`Picasso` provides two modes for initializing particles on a background grid: `InitUniform` and `InitRandom`. In addition to requiring parameters (particles per dimension for `InitUniform` or particles per cell for `InitRandom`), a user-defined predicate functor must also be provided that returns true or false based on certain criteria of the candidate particle, such as position. The design for a custom user-hook during the creation phase allows for flexible and arbitrary particle initialization. +## Particle and Grid Operations +The `GridOperator` encapsulates all of the field management work associated with parallel grid operations. Both particle-centric loops are supported (for operations like P2G and G2P) as well as grid entity-centric loops for more traditional discrete grid operations. +A `GridOperator` is defined over a entity type and the user defines which fields must be gathered prior to applying the operator, as well as the fields which must be scattered after the operation is complete. All distributed parallel work is managed by these data dependencies: the user need only write the body of the kernel to be applied at each data point (e.g. each particle or mesh entity). +The order of the dependency template types (if there are any) is defined above in the documentation for `GridOperatorDependencies`. -### Batched Linear Algebra -Matrix-vector and matrix-matrix operations are nearly ubiquitous in particle-to-grid and grid-to-particle mappings, function space projections, and other common operations wherein support for writing concise vectorial expressions is benefitial for both code readability, as well as exposing algorithmic parallelism. The `Picasso` library also implements kernel-level dense linear algebra operations in a corresponding `LinearAlgebra` namespace using a combination of expression templates for lazy evaluation and data structures to hold intermediates for eager evaluations when necessary. The general concept in the `Picasso` implementation for lazy vs. eager evaluations alleviates the consideration of additional performance factors such as overhead incurred from excessive copies or total operation counts in otherwise unoptimized code. Using built-in operator overloading with support for expression templates, users can build complex nested tensorial expressions that are a mixture of both eager and lazy evaluations. For example, if `A` and `B` are NxN and `x` is length N: +## Field Initialization +`Picasso` provides two modes for initializing particles on a background grid: `Uniform` and `Random`. Both create particles per grid cell, but with differing inputs ( number of particles per dimension for `Uniform` or particles per cell for `Random`). In addition, a user-defined predicate functor must also be provided that returns true or false based on criteria for the candidate particle, e.g. position. The design for a custom user-hook during the creation phase allows for flexible and arbitrary particle initialization. All particle and grid field values can be initialized using existing `Kokkos` and `Cabana` loops. + +## Batched Linear Algebra +Matrix-vector and matrix-matrix operations are nearly ubiquitous in P2G and G2P mappings, function space projections, and other common operations wherein support for writing concise vectorial expressions is benefitial for both code readability, as well as exposing algorithmic parallelism. The `Picasso` library also implements kernel-level dense linear algebra operations in a corresponding `LinearAlgebra` namespace using a combination of expression templates for lazy evaluation and data structures to hold intermediates for eager evaluations when necessary. The general concept in the `Picasso` implementation for lazy vs. eager evaluations alleviates the consideration of additional performance factors such as overhead incurred from excessive copies or total operation counts in otherwise unoptimized code. Using built-in operator overloading with support for expression templates, users can build complex nested tensorial expressions that are a mixture of both eager and lazy evaluations. For example, if `A` and `B` are NxN and `x` is length N: ```cpp auto C = 0.5 * (A + ~A) * B + (x * ~x); ``` @@ -81,18 +82,20 @@ where the returned `C` is an NxN matrix, `~` is the transpose operator, and the `Picasso` provides an interface for various supported linear algebra types defined in `FieldTypes`: `Scalar`, `Vector`, `Matrix`, as well as specialized support for higher-rank `Tensor3`, `Tensor4`, and `Quaternion` types. Field tags need only derive from these types in order to make use of `Picasso` linear algebra features. Although all basic operations on vectors and matrices are implemented, several specialized operations are also available, including matrix determinant, inverse, exponential, LU, and SVD decompositions, higher-order tensor contractions, and quaternion-matrix conjugation. ## Interpolation -`Picasso` provides several interpolation methods such as `value`, 'gradient` and `divergence` on both `P2G` and `G2P` with appropriate spline data types such that `value` adopts `SplineValue` and `gradient` and `divergence` use `SplineGradient`. Those methods can be applied on both collocated/staggered nodes and cells upto thrid order spline function. Moreover, `Picasso` extends those methods to general interpolation schemes, Fluid-Implicit-Paritcle (FLIP) [@flip], Affine Particle-In-Cell (APIC) [@apic] and Polynomial Particle-In-Cell (PolyPIC) [@polypic] method between particle and grid. Each schemes are ditinguished depending on the mode of interpolating field variable. For example, in the case of momentum interpolation, FLIP transfers the increment of velocity on the grid rather than velocity itself while APIC augments velocity information by adding additiong velocity gradient around particle. PolyPIC adds polynomial velocity mode in addition to APIC. +`Picasso` provides interpolation across quantites including `value`, `gradient` and `divergence` for both P2G and G2P. Spline data types are automatically determined such that `value` adopts `SplineValue` and `gradient` and `divergence` use `SplineGradient`, etc. Those methods can be applied on both collocated/staggered nodes and cells up to second order spline functions. Moreover, `Picasso` extends those methods to general interpolation schemes: Fluid-Implicit-Particle (FLIP) [@flip], Affine Particle-In-Cell (APIC) [@apic], and Polynomial Particle-In-Cell (PolyPIC) [@polypic] methods. Each scheme is disinguished by the mode of interpolating field variable. In the case of momentum interpolation, FLIP transfers the increment of velocity on the grid rather than velocity itself, APIC augments velocity information by adding additional velocity gradients around the particle, and PolyPIC adds polynomial velocity modes in addition to APIC. -## Level Sets and Boundaries -The `Picasso::LevelSet` and `Picasso::ParticleLevelSet` are fast parallel implementations of grid-based signed distance fields (SDF), which are relevant to level-set based approaches in interface reconstruction and boundary tracking. In particular, the `ParticleLevelSet` class relies on functionality from [ArborX::BVH](https://github.com/arborx/ArborX) (bounding volume hiererarchy) for nearest point queries to compute a level set signed distance estimate, $\phi_i^{0}$, on the mesh using analytic spherical particle level sets $\phi_p$ as proxy. A parallel Hopf-Lax redistancing algorithm is also implemented that properly reinitializes the level set estimate $\phi$ into a final SDF ($|\nabla\phi_i|=1$). Additionally, the `Picasso::MarchingCubes` namespace provides methods and data structures for triangulating the resulting SDF. The computed mesh facets may be serialized to disk in STL format or used for further processing, for instance in convex hull initialization or embedded free surface tracking algorithms. +## Embedded free surfaces +The `LevelSet` and `ParticleLevelSet` are fast parallel implementations of grid-based signed distance fields (SDF), which are relevant to level-set based approaches in interface reconstruction and boundary tracking. In particular, the `ParticleLevelSet` class relies on functionality from [ArborX::BVH](https://github.com/arborx/ArborX) (bounding volume hiererarchy) for nearest point queries to compute a level set signed distance estimate, $\phi_i^{0}$, on the mesh using analytic spherical particle level sets $\phi_p$ as proxy. A parallel Hopf-Lax redistancing algorithm is also implemented that properly reinitializes the level set estimate $\phi$ into a final SDF ($|\nabla\phi_i|=1$). Additionally, the `MarchingCubes` namespace provides methods and data structures for triangulating the resulting SDF. The computed mesh facets may be serialized to disk in STL format or used for further processing, for instance in convex hull initialization or embedded free surface tracking algorithms. -## Examples and performance testing +# Examples and performance testing +`Picasso` currently implements the dam break problem as a demonstration of fluid mechanics capabilities. ## Future work Picasso also provides a clear place to include further algorithmic development in the PIC -field, e.g. @powerpic, @ipic. +field, e.g. @powerpic, @ipic. Extensions to Picasso for plasma physics and cosmology will require further integrations +with the Cabana library fast Fourier transform and structured solver interfaces for electric and magnetic field support. # Acknowledgments From e44da6b44fecbc7e070c068e3a6e4766252f59d9 Mon Sep 17 00:00:00 2001 From: "Chong, Kwitae" Date: Mon, 28 Oct 2024 17:22:21 -0400 Subject: [PATCH 10/11] add dam break example description --- paper/paper.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/paper/paper.md b/paper/paper.md index a4d3524..5479bf0 100644 --- a/paper/paper.md +++ b/paper/paper.md @@ -89,7 +89,7 @@ The `LevelSet` and `ParticleLevelSet` are fast parallel implementations of grid- # Examples and performance testing -`Picasso` currently implements the dam break problem as a demonstration of fluid mechanics capabilities. +`Picasso` currently implements the dam break problem as a demonstration of fluid mechanics capabilities. In the dam break example, a block of water are discretized with particles in the corner of walls. Once the membrane of water breaks, water collapse due to the gravity and shows splash and free surface wave. In each steps, P2G interpolates mass and momentum from particles to grid and velocity on grid is updated by solving Euler equation in which pressure term is implemented with equation of state. After no-flow through boundary condition is enforced the position and velocity of particles are newly updated with G2P. ## Future work From 59db5078f3c0d0f64ac758993b1f97a7b87dc903 Mon Sep 17 00:00:00 2001 From: Sam Reeve <6740307+streeve@users.noreply.github.com> Date: Tue, 29 Oct 2024 11:51:49 -0400 Subject: [PATCH 11/11] More updates --- paper/paper.md | 21 +++++++++++---------- 1 file changed, 11 insertions(+), 10 deletions(-) diff --git a/paper/paper.md b/paper/paper.md index 5479bf0..12898b2 100644 --- a/paper/paper.md +++ b/paper/paper.md @@ -36,7 +36,7 @@ on the `Cabana` library [@cabana:2022] for particle and structured grid methods, which in turn extends the `Kokkos` library for on-node parallelism across hardware architectures [@kokkos:2022] and uses `MPI` for scalable distributed simulation. `Picasso` provides interpolation schemes for particle-to-grid (P2G) -and grid-to-particle (G2P) updates, embedded free surface tracking, and robust +and grid-to-particle (G2P) updates, embedded free surface tracking built on `ArborX` geometric search [@arborx:2020], and robust linear algebra support for particle and grid operations. This separation of concerns results in a parallelism layer, a simulation motif (and distributed communication) layer, and finally a PIC algorithm and utility layer, together enabling @@ -53,7 +53,7 @@ productivity requires substantial effort [@ppp:2021]. Existing frameworks for PIC simulations are increasingly written with performance portability in mind, but focused more often on the plasma physics domain [@vpic, @ippl, @pumipic]. In addition, there are no known current libraries to support state-of-the-art PIC algorithms -and related complex particle operations [@apic, @polypic] used for fluid and solid mechanics. +and related complex particle operations [@apic, @polypic] used for fluid and solid mechanics PIC and material point method (MPM) simulations. # Library capabilities @@ -73,29 +73,30 @@ The order of the dependency template types (if there are any) is defined above i `Picasso` provides two modes for initializing particles on a background grid: `Uniform` and `Random`. Both create particles per grid cell, but with differing inputs ( number of particles per dimension for `Uniform` or particles per cell for `Random`). In addition, a user-defined predicate functor must also be provided that returns true or false based on criteria for the candidate particle, e.g. position. The design for a custom user-hook during the creation phase allows for flexible and arbitrary particle initialization. All particle and grid field values can be initialized using existing `Kokkos` and `Cabana` loops. ## Batched Linear Algebra -Matrix-vector and matrix-matrix operations are nearly ubiquitous in P2G and G2P mappings, function space projections, and other common operations wherein support for writing concise vectorial expressions is benefitial for both code readability, as well as exposing algorithmic parallelism. The `Picasso` library also implements kernel-level dense linear algebra operations in a corresponding `LinearAlgebra` namespace using a combination of expression templates for lazy evaluation and data structures to hold intermediates for eager evaluations when necessary. The general concept in the `Picasso` implementation for lazy vs. eager evaluations alleviates the consideration of additional performance factors such as overhead incurred from excessive copies or total operation counts in otherwise unoptimized code. Using built-in operator overloading with support for expression templates, users can build complex nested tensorial expressions that are a mixture of both eager and lazy evaluations. For example, if `A` and `B` are NxN and `x` is length N: +Matrix-vector and matrix-matrix operations are ubiquitous in P2G and G2P mappings, function space projections, and other common operations wherein support for writing concise vectorial expressions is benefitial for both code readability, as well as exposing algorithmic parallelism. The `Picasso` library also implements kernel-level dense linear algebra operations in a the `LinearAlgebra` namespace using a combination of expression templates for lazy evaluation and data structures to hold intermediates for eager evaluations when necessary. The general concept in the `Picasso` implementation for lazy vs. eager evaluations alleviates the consideration of additional performance factors such as overhead incurred from excessive copies or total operation counts in otherwise unoptimized code. Using built-in operator overloading with support for expression templates, users can build complex nested tensorial expressions that are a mixture of both eager and lazy evaluations. For example, if `A` and `B` are NxN and `x` is length N: ```cpp auto C = 0.5 * (A + ~A) * B + (x * ~x); ``` where the returned `C` is an NxN matrix, `~` is the transpose operator, and the `*` is an outer-product operation in the last term. -`Picasso` provides an interface for various supported linear algebra types defined in `FieldTypes`: `Scalar`, `Vector`, `Matrix`, as well as specialized support for higher-rank `Tensor3`, `Tensor4`, and `Quaternion` types. Field tags need only derive from these types in order to make use of `Picasso` linear algebra features. Although all basic operations on vectors and matrices are implemented, several specialized operations are also available, including matrix determinant, inverse, exponential, LU, and SVD decompositions, higher-order tensor contractions, and quaternion-matrix conjugation. +`Picasso` provides an interface for various supported linear algebra types defined in `FieldTypes`: `Scalar`, `Vector`, `Matrix`, as well as specialized support for higher-rank `Tensor3`, `Tensor4`, and `Quaternion` types. Particle and grid field tags need only derive from these base types in order to make use of `Picasso` linear algebra features. Although all basic operations on vectors and matrices are implemented, several specialized operations are also available, including matrix determinant, inverse, exponential, LU, and SVD decompositions, higher-order tensor contractions, and quaternion-matrix conjugation. ## Interpolation -`Picasso` provides interpolation across quantites including `value`, `gradient` and `divergence` for both P2G and G2P. Spline data types are automatically determined such that `value` adopts `SplineValue` and `gradient` and `divergence` use `SplineGradient`, etc. Those methods can be applied on both collocated/staggered nodes and cells up to second order spline functions. Moreover, `Picasso` extends those methods to general interpolation schemes: Fluid-Implicit-Particle (FLIP) [@flip], Affine Particle-In-Cell (APIC) [@apic], and Polynomial Particle-In-Cell (PolyPIC) [@polypic] methods. Each scheme is disinguished by the mode of interpolating field variable. In the case of momentum interpolation, FLIP transfers the increment of velocity on the grid rather than velocity itself, APIC augments velocity information by adding additional velocity gradients around the particle, and PolyPIC adds polynomial velocity modes in addition to APIC. +`Picasso` provides P2G and G2P interpolation building upon the `Cabana` structured grid support including `value`, `gradient` and `divergence`. Spline data types are automatically determined such that `value` adopts `SplineValue` and `gradient` and `divergence` use `SplineGradient`, etc. Those methods can be applied on both collocated or staggered cell entities up to second order spline functions. In particular, `Picasso` extends the basic interpolation to general interpolation schemes: Fluid-Implicit-Particle (FLIP) [@flip], Affine Particle-In-Cell (APIC) [@apic], and Polynomial Particle-In-Cell (PolyPIC) [@polypic] methods. Each scheme is disinguished by the mode of interpolating field variable. In the case of momentum interpolation, FLIP transfers the increment of velocity on the grid rather than velocity itself, APIC augments velocity information by adding additional velocity gradients around the particle, and PolyPIC adds polynomial velocity modes in addition to APIC. ## Embedded free surfaces -The `LevelSet` and `ParticleLevelSet` are fast parallel implementations of grid-based signed distance fields (SDF), which are relevant to level-set based approaches in interface reconstruction and boundary tracking. In particular, the `ParticleLevelSet` class relies on functionality from [ArborX::BVH](https://github.com/arborx/ArborX) (bounding volume hiererarchy) for nearest point queries to compute a level set signed distance estimate, $\phi_i^{0}$, on the mesh using analytic spherical particle level sets $\phi_p$ as proxy. A parallel Hopf-Lax redistancing algorithm is also implemented that properly reinitializes the level set estimate $\phi$ into a final SDF ($|\nabla\phi_i|=1$). Additionally, the `MarchingCubes` namespace provides methods and data structures for triangulating the resulting SDF. The computed mesh facets may be serialized to disk in STL format or used for further processing, for instance in convex hull initialization or embedded free surface tracking algorithms. +The `LevelSet` and `ParticleLevelSet` classes are fast parallel implementations of grid-based signed distance fields (SDF), which are relevant to level-set based approaches in interface reconstruction and boundary tracking. In particular, the `ParticleLevelSet` class relies on functionality from the `ArborX::BVH` (bounding volume hiererarchy) for nearest point queries to compute a level set signed distance estimate, $\phi_i^{0}$, on the mesh using analytic spherical particle level sets $\phi_p$ as proxy. A parallel Hopf-Lax redistancing algorithm is also implemented that properly reinitializes the level set estimate $\phi$ into a final SDF ($|\nabla\phi_i|=1$). Additionally, the `MarchingCubes` namespace provides methods and data structures for triangulating the resulting SDF. The computed mesh facets may be output in STL format or used for further processing, for instance in convex hull initialization or embedded free surface tracking algorithms. # Examples and performance testing -`Picasso` currently implements the dam break problem as a demonstration of fluid mechanics capabilities. In the dam break example, a block of water are discretized with particles in the corner of walls. Once the membrane of water breaks, water collapse due to the gravity and shows splash and free surface wave. In each steps, P2G interpolates mass and momentum from particles to grid and velocity on grid is updated by solving Euler equation in which pressure term is implemented with equation of state. After no-flow through boundary condition is enforced the position and velocity of particles are newly updated with G2P. +`Picasso` currently implements the dam break problem as a demonstration of fluid mechanics capabilities and intended library usage. An initially static column of water is created in one quarter of the domain and allowed to fall under gravity and splash within the system using no-flow-through boundary conditions. In each step, P2G interpolates mass and momentum from particles to the grid, velocity on the grid is updated including a pressure term implemented with an equation of state, and finally the position and velocity of the particles are updated with G2P. +The conservation of total energy is used to exemplify the tradeoff of accuracy with the computational cost of each interpolation scheme in Figure XXX. ## Future work -Picasso also provides a clear place to include further algorithmic development in the PIC -field, e.g. @powerpic, @ipic. Extensions to Picasso for plasma physics and cosmology will require further integrations -with the Cabana library fast Fourier transform and structured solver interfaces for electric and magnetic field support. +`Picasso` expands the `Kokkos`-based library ecosystem for PIC, MPM, and related hybrid particle-grid methods. +`Picasso` therefore provides a clear place to include further algorithmic PIC development, e.g. @powerpic and @ipic. Extensions to Picasso for plasma physics and cosmology will require further integrations +with the existing `Cabana` library fast Fourier transform and structured solver interfaces or compatibility layers with other PIC libraries IPPL or PUMIPic. Finally, as is appropriate `Picasso` development which is useful across all particle methods will be moved upstream to the `Cabana` library and similarly generally useful PIC capabilities which originate in physics applications will be moved into `Picasso`. # Acknowledgments