diff --git a/paper/paper.bib b/paper/paper.bib
new file mode 100644
index 0000000..c8dc2d1
--- /dev/null
+++ b/paper/paper.bib
@@ -0,0 +1,198 @@
+@article{copa:2021,
+  author = {Susan M Mniszewski and James Belak and Jean-Luc Fattebert and Christian FA Negre and Stuart R Slattery and Adetokunbo A Adedoyin and Robert F Bird and Choongseok Chang and Guangye Chen and Stéphane Ethier and Shane Fogerty and Salman Habib and Christoph Junghans and Damien Lebrun-Grandié and Jamaludin Mohd-Yusof and Stan G Moore and Daniel Osei-Kuffuor and Steven J Plimpton and Adrian Pope and Samuel Temple Reeve and Lee Ricketson and Aaron Scheinberg and Amil Y Sharma and Michael E Wall},
+  title ={Enabling particle applications for exascale computing platforms},
+  journal = {The International Journal of High Performance Computing Applications},
+  volume = {0},
+  number = {0},
+  pages = {10943420211022829},
+  year = {2021},
+  doi = {10.1177/10943420211022829}
+}
+
+@article{Slattery2022,
+    doi = {10.21105/joss.04115},
+    url = {https://doi.org/10.21105/joss.04115},
+    year = {2022},
+    publisher = {The Open Journal},
+    volume = {7},
+    number = {72},
+    pages = {4115},
+    author = {Stuart Slattery and Samuel Temple Reeve and Christoph Junghans and Damien Lebrun-Grandié and Robert Bird and Guangye Chen and Shane Fogerty and Yuxing Qiu and Stephan Schulz and Aaron Scheinberg and Austin Isner and Kwitae Chong and Stan Moore and Timothy Germann and James Belak and Susan Mniszewski},
+    title = {Cabana: A Performance Portable Library for Particle-Based Simulations},
+    journal = {Journal of Open Source Software} }
+
+@article{kokkos:2022,
+    title = {Kokkos 3: {Programming} {Model} {Extensions} for the {Exascale} {Era}},
+    volume = {33},
+    issn = {1558-2183},
+    shorttitle = {Kokkos 3},
+    doi = {10.1109/TPDS.2021.3097283},
+    number = {4},
+    journal = {IEEE Transactions on Parallel and Distributed Systems},
+    author = {Trott, Christian R. and Lebrun-Grandié, Damien and Arndt, Daniel and Ciesko, Jan and Dang, Vinh and Ellingwood, Nathan and Gayatri, Rahulkumar and Harvey, Evan and Hollman, Daisy S. and Ibanez, Dan and Liber, Nevin and Madsen, Jonathan and Miles, Jeff and Poliakoff, David and Powell, Amy and Rajamanickam, Sivasankaran and Simberg, Mikael and Sunderland, Dan and Turcksin, Bruno and Wilke, Jeremiah},
+    month = apr,
+    year = {2022},
+    note = {Conference Name: IEEE Transactions on Parallel and Distributed Systems},
+    keywords = {Benchmark testing, exascale, Graphics processing units, Hardware, heterogeneous computing, high-performance computing, Kernel, Laboratories, Layout, Performance portability, Programming, programming models},
+    pages = {805--817},
+}
+
+@book{hockney,
+    address = {Bristol England ; Philadelphia},
+    edition = {1st edition},
+    title = {Computer {Simulation} {Using} {Particles}},
+    language = {English},
+    publisher = {CRC Press},
+    author = {Hockney, R. W. and Eastwood, J. W.},
+    month = jan,
+    year = {1989},
+    isbn = {978-0-85274-392-8}
+}
+
+@article{ecp:2020,
+    title = {Exascale applications: skin in the game},
+    volume = {378},
+    shorttitle = {Exascale applications},
+    number = {2166},
+    journal = {Philosophical Transactions of the Royal Society A: Mathematical, Physical and Engineering Sciences},
+    author = {Alexander, Francis and Almgren, Ann and Bell, John and Bhattacharjee, Amitava and Chen, Jacqueline and Colella, Phil and Daniel, David and DeSlippe, Jack and Diachin, Lori and Draeger, Erik and Dubey, Anshu and Dunning, Thom and Evans, Thomas and Foster, Ian and Francois, Marianne and Germann, Tim and Gordon, Mark and Habib, Salman and Halappanavar, Mahantesh and Hamilton, Steven and Hart, William and (Henry) Huang, Zhenyu and Hungerford, Aimee and Kasen, Daniel and Kent, Paul R. C. and Kolev, Tzanio and Kothe, Douglas B. and Kronfeld, Andreas and Luo, Ye and Mackenzie, Paul and McCallen, David and Messer, Bronson and Mniszewski, Sue and Oehmen, Chris and Perazzo, Amedeo and Perez, Danny and Richards, David and Rider, William J. and Rieben, Rob and Roche, Kenneth and Siegel, Andrew and Sprague, Michael and Steefel, Carl and Stevens, Rick and Syamlal, Madhava and Taylor, Mark and Turner, John and Vay, Jean-Luc and Voter, Artur F. and Windus, Theresa L. and Yelick, Katherine},
+    month = mar,
+    year = {2020},
+    pages = {20190056},
+    doi = {10.1098/rsta.2019.0056}
+}
+
+@article{arborx:2020,
+  title={ArborX: A Performance Portable Geometric Search Library},
+  author={Lebrun-Grandié, Damien and Prokopenko, Andrey and Turcksin, Bruno and Slattery, Stuart R},
+  journal={ACM Transactions on Mathematical Software (TOMS)},
+  volume={47},
+  number={1},
+  pages={1--15},
+  year={2020},
+  publisher={ACM New York, NY, USA},
+  doi={10.1145/3412558}
+}
+
+@article{openfpm,
+  title={OpenFPM: A scalable open framework for particle and particle-mesh codes on parallel computers},
+  author={Incardona, Pietro and Leo, Antonio and Zaluzhnyi, Yaroslav and Ramaswamy, Rajesh and Sbalzarini, Ivo F},
+  journal={Computer Physics Communications},
+  volume={241},
+  pages={155--177},
+  year={2019},
+  publisher={Elsevier},
+  doi={10.1016/j.cpc.2019.03.007}
+}
+
+@inproceedings{ippl,
+  title={Scaling and performance portability of the particle-in-cell scheme for plasma physics applications
+         through mini-apps targeting exascale architectures},
+  author={Muralikrishnan, Sriramkrishnan and Frey, Matthias and Vinciguerra, Alessandro and Ligotino, Michael
+          and Cerfon, Antoine J and Stoyanov, Miroslav and Gayatri, Rahulkumar and Adelmann, Andreas},
+  booktitle={Proceedings of the 2024 SIAM Conference on Parallel Processing for Scientific Computing (PP)},
+  pages={26--38},
+  year={2024},
+  organization={SIAM}
+}
+
+@ARTICLE{vpic,
+  author={Bird, Robert and Tan, Nigel and Luedtke, Scott V. and Harrell, Stephen Lien and Taufer, Michela and Albright, Brian},
+  journal={IEEE Transactions on Parallel and Distributed Systems},
+  title={VPIC 2.0: Next Generation Particle-in-Cell Simulations},
+  year={2022},
+  volume={33},
+  number={4},
+  pages={952-963},
+  keywords={Plasmas;Hardware;Physics;Libraries;Mathematical model;Shape;Layout;Simulation;portability;plasma physics;particle-in-cell},
+  doi={10.1109/TPDS.2021.3084795}
+}
+
+@article{pumipic,
+title = {PUMIPic: A mesh-based approach to unstructured mesh Particle-In-Cell on GPUs},
+journal = {Journal of Parallel and Distributed Computing},
+volume = {157},
+pages = {1-12},
+year = {2021},
+issn = {0743-7315},
+doi = {https://doi.org/10.1016/j.jpdc.2021.06.004},
+url = {https://www.sciencedirect.com/science/article/pii/S0743731521001337},
+author = {Gerrett Diamond and Cameron W. Smith and Chonglin Zhang and Eisung Yoon and Mark S. Shephard},
+keywords = {Particle-in-cell, Unstructured mesh, GPU},
+}
+
+@article{powerpic,
+  title={The power particle-in-cell method},
+  author={Qu, Ziyin and Li, Minchen and De Goes, Fernando and Jiang, Chenfanfu},
+  journal={ACM Transactions on Graphics},
+  volume={41},
+  number={4},
+  year={2022}
+}
+
+@inproceedings{ipic,
+  title={The Impulse Particle-In-Cell Method},
+  author={Sancho, Sergio and Tang, Jingwei and Batty, Christopher and Azevedo, Vinicius C},
+  booktitle={Computer Graphics Forum},
+  volume={43},
+  number={2},
+  pages={e15022},
+  year={2024},
+  organization={Wiley Online Library}
+}
+
+@article{Royston:2018,
+	author = {Michael Royston and Andre Pradhana and Byungjoon Lee and Yat Tin Chow and Wotao Yin and Joseph Teran and Stanley Osher},
+	doi = {https://doi.org/10.1016/j.jcp.2018.01.035},
+	issn = {0021-9991},
+	journal = {Journal of Computational Physics},
+	keywords = {Level set methods, Eikonal equation, Hamilton Jacobi, Hopf--Lax},
+	pages = {7-17},
+	title = {Parallel redistancing using the {Hopf--Lax} formula},
+	url = {https://www.sciencedirect.com/science/article/pii/S0021999118300457},
+	volume = {365},
+	year = {2018}
+}
+
+@ARTICLE{ppp:2021,
+  author={Pennycook, S. John and Sewall, Jason D. and Jacobsen, Douglas W. and Deakin, Tom and McIntosh-Smith, Simon},
+  journal={Computing in Science \& Engineering},
+  title={Navigating Performance, Portability, and Productivity},
+  year={2021},
+  volume={23},
+  number={5},
+  pages={28-38},
+  keywords={Measurement;Productivity;Performance evaluation;Navigation;Harmonic analysis},
+  doi={10.1109/MCSE.2021.3097276}
+  }
+
+@article{polypic,
+  title={A polynomial particle-in-cell method},
+  author={Fu, Chuyuan and Guo, Qi and Gast, Theodore and Jiang, Chenfanfu and Teran, Joseph},
+  journal={ACM Transactions on Graphics (TOG)},
+  volume={36},
+  number={6},
+  pages={1--12},
+  year={2017},
+  publisher={ACM New York, NY, USA}
+}
+
+@article{apic,
+  title={The affine particle-in-cell method},
+  author={Jiang, Chenfanfu and Schroeder, Craig and Selle, Andrew and Teran, Joseph and Stomakhin, Alexey},
+  journal={ACM Transactions on Graphics (TOG)},
+  volume={34},
+  number={4},
+  pages={1--10},
+  year={2015},
+  publisher={ACM New York, NY, USA}
+}
+
+@article{fiip,
+	author = {J. Brackbill and H. Ruppel},
+	title = {{FLIP}: A method for adaptively zoned, Particle-In-Cell calculations of fluid flows in two dimensions},
+	journal = {J Comp Phys},
+	volume = 65,
+	pages = {314--343},
+	year = 1986
+}
diff --git a/paper/paper.md b/paper/paper.md
new file mode 100644
index 0000000..12898b2
--- /dev/null
+++ b/paper/paper.md
@@ -0,0 +1,118 @@
+---
+title: 'Picasso: Performance Portable Particle-in-Cell'
+tags:
+  - C++
+  - Kokkos
+  - Cabana
+  - particle-in-cell
+  - material point method
+authors:
+  - name: Stuart Slattery^[corresponding author]
+    orcid: 0000-0003-0103-888X
+    affiliation: 1
+  - name: Samuel Temple Reeve
+    orcid: 0000-0002-4250-9476
+    affiliation: 1
+  - name: Austin Isner
+    affiliation: 1
+  - name: Kwitae Chong
+    affiliation: 1
+  - name: Lance Bullerwell
+    affiliation: 1
+affiliations:
+  - name: Oak Ridge National Laboratory, Oak Ridge, TN, USA
+    index: 1
+date: 12 April 2024
+bibliography: paper.bib
+---
+
+# Summary
+
+Particle-in-cell (PIC) methods are used is disparate fields, from plasma
+physics and solid mechanics to computer graphics.
+`Picasso` is a performance portable library for PIC simulations, developed
+through the Exascale Computing Project (ECP) [@ecp:2020]. `Picasso` builds
+on the `Cabana` library [@cabana:2022] for particle and structured grid
+methods, which in turn extends the `Kokkos` library for on-node parallelism
+across hardware architectures [@kokkos:2022] and uses `MPI` for scalable distributed
+simulation. `Picasso` provides interpolation schemes for particle-to-grid (P2G)
+and grid-to-particle (G2P) updates, embedded free surface tracking built on `ArborX` geometric search [@arborx:2020], and robust
+linear algebra support for particle and grid operations. This separation of
+concerns results in a parallelism layer, a simulation motif (and distributed
+communication) layer, and finally a PIC algorithm and utility layer, together enabling
+performant user-level PIC physics simulations.
+
+# Statement of need
+
+Computational predictions are increasingly necessary for answering scientific
+and engineering questions; PIC methods are a robust option for simulations of highly
+dynamic systems including plasmas, fluids, and solid materials undergoing severe
+deformation. As these needs continue to expand, striking the proper balance of
+computational performance, portability across hardware architectures, and programmer
+productivity requires substantial effort [@ppp:2021].
+Existing frameworks for PIC simulations are increasingly written with performance
+portability in mind, but focused more often on the plasma physics domain [@vpic, @ippl, @pumipic].
+In addition, there are no known current libraries to support state-of-the-art PIC algorithms
+and related complex particle operations [@apic, @polypic] used for fluid and solid mechanics PIC and material point method (MPM) simulations.
+
+# Library capabilities
+
+`Picasso` introduces various classes and supporting types for enabling a high-level specification of field data and PIC algorithms, with the intent to simplify implementation details including memory allocation, distributed communication patterns, and device versus host memory access. The design principles of performance transparency and PIC domain abstraction thus allows the algorithm designer and/or user to focus only on the field data definitions and their interdependencies within various sequential "operations", allowing for a natural identification. `Picasso` defines two necessary types for facilitating data management and communication: the `FieldManager` and `GridOperator`'s, discussed in the following sections.
+
+## Field Management
+At the core of `Picasso`'s domain model is the concept of a `FieldLayout`, which contains a location and a field tag. A location can be one of any structured mesh entity (`Node`, `Cell`, `Face<D>`, `Edge<D>`), where `D` is a specific dimension `I`,`J`, or `K`, or a `Particle`. A tag is any type with a string-like `label()` static member function. The complete specification of a `FieldLayout` uniquely defines a field as known to the `FieldManager`, while also allowing the same field tag (e.g. `Temperature`) to be associated with multiple locations (e.g. both `Node` and `Particle`).
+
+The `FieldManager` is a single collection wrapper type for storing unique handles to field layouts in an `unordered_map`. The `FieldManager` itself is separately initialized by passing it to each `GridOperator::setup()` function.
+
+## Particle and Grid Operations
+The `GridOperator` encapsulates all of the field management work associated with parallel grid operations. Both particle-centric loops are supported (for operations like P2G and G2P) as well as grid entity-centric loops for more traditional discrete grid operations.
+A `GridOperator` is defined over a entity type and the user defines which fields must be gathered prior to applying the operator, as well as the fields which must be scattered after the operation is complete. All distributed parallel work is managed by these data dependencies: the user need only write the body of the kernel to be applied at each data point (e.g. each particle or mesh entity).
+The order of the dependency template types (if there are any) is defined above in the documentation for `GridOperatorDependencies`.
+
+## Field Initialization
+`Picasso` provides two modes for initializing particles on a background grid: `Uniform` and `Random`. Both create particles per grid cell, but with differing inputs ( number of particles per dimension for `Uniform` or particles per cell for `Random`). In addition, a user-defined predicate functor must also be provided that returns true or false based on criteria for the candidate particle, e.g. position. The design for a custom user-hook during the creation phase allows for flexible and arbitrary particle initialization. All particle and grid field values can be initialized using existing `Kokkos` and `Cabana` loops.
+
+## Batched Linear Algebra
+Matrix-vector and matrix-matrix operations are ubiquitous in P2G and G2P mappings, function space projections, and other common operations wherein support for writing concise vectorial expressions is benefitial for both code readability, as well as exposing algorithmic parallelism.  The `Picasso` library also implements kernel-level dense linear algebra operations in a the `LinearAlgebra` namespace using a combination of expression templates for lazy evaluation and data structures to hold intermediates for eager evaluations when necessary. The general concept in the `Picasso` implementation for lazy vs. eager evaluations alleviates the consideration of additional performance factors such as overhead incurred from excessive copies or total operation counts in otherwise unoptimized code. Using built-in operator overloading with support for expression templates, users can build complex nested tensorial expressions that are a mixture of both eager and lazy evaluations. For example, if `A` and `B` are NxN and `x` is length N:
+```cpp
+  auto C = 0.5 * (A + ~A) * B + (x * ~x);
+```
+where the returned `C` is an NxN matrix, `~` is the transpose operator, and the `*` is an outer-product operation in the last term.
+
+`Picasso` provides an interface for various supported linear algebra types defined in `FieldTypes`: `Scalar`, `Vector`, `Matrix`, as well as specialized support for higher-rank `Tensor3`, `Tensor4`, and `Quaternion` types. Particle and grid field tags need only derive from these base types in order to make use of `Picasso` linear algebra features. Although all basic operations on vectors and matrices are implemented, several specialized operations are also available, including matrix determinant, inverse, exponential, LU, and SVD decompositions, higher-order tensor contractions, and quaternion-matrix conjugation.
+
+## Interpolation
+`Picasso` provides P2G and G2P interpolation building upon the `Cabana` structured grid support including `value`, `gradient` and `divergence`. Spline data types are automatically determined such that `value` adopts `SplineValue` and `gradient` and `divergence` use `SplineGradient`, etc. Those methods can be applied on both collocated or staggered cell entities up to second order spline functions. In particular, `Picasso` extends the basic interpolation to general interpolation schemes: Fluid-Implicit-Particle (FLIP) [@flip], Affine Particle-In-Cell (APIC) [@apic], and Polynomial Particle-In-Cell (PolyPIC) [@polypic] methods. Each scheme is disinguished by the mode of interpolating field variable. In the case of momentum interpolation, FLIP transfers the increment of velocity on the grid rather than velocity itself, APIC augments velocity information by adding additional velocity gradients around the particle, and PolyPIC adds polynomial velocity modes in addition to APIC.  
+
+## Embedded free surfaces
+The `LevelSet` and `ParticleLevelSet` classes are fast parallel implementations of grid-based signed distance fields (SDF), which are relevant to level-set based approaches in interface reconstruction and boundary tracking. In particular, the `ParticleLevelSet` class relies on functionality from the `ArborX::BVH` (bounding volume hiererarchy) for nearest point queries to compute a level set signed distance estimate, $\phi_i^{0}$, on the mesh using analytic spherical particle level sets $\phi_p$ as proxy. A parallel Hopf-Lax redistancing algorithm is also implemented that properly reinitializes the level set estimate $\phi$ into a final SDF ($|\nabla\phi_i|=1$). Additionally, the `MarchingCubes` namespace provides methods and data structures for triangulating the resulting SDF. The computed mesh facets may be output in STL format or used for further processing, for instance in convex hull initialization or embedded free surface tracking algorithms.
+
+# Examples and performance testing
+
+`Picasso` currently implements the dam break problem as a demonstration of fluid mechanics capabilities and intended library usage. An initially static column of water is created in one quarter of the domain and allowed to fall under gravity and splash within the system using no-flow-through boundary conditions. In each step, P2G interpolates mass and momentum from particles to the grid, velocity on the grid is updated including a pressure term implemented with an equation of state, and finally the position and velocity of the particles are updated with G2P.
+The conservation of total energy is used to exemplify the tradeoff of accuracy with the computational cost of each interpolation scheme in Figure XXX.
+
+## Future work
+
+`Picasso` expands the `Kokkos`-based library ecosystem for PIC, MPM, and related hybrid particle-grid methods. 
+`Picasso` therefore provides a clear place to include further algorithmic PIC development, e.g. @powerpic and @ipic. Extensions to Picasso for plasma physics and cosmology will require further integrations
+with the existing `Cabana` library fast Fourier transform and structured solver interfaces or compatibility layers with other PIC libraries IPPL or PUMIPic. Finally, as is appropriate `Picasso` development which is useful across all particle methods will be moved upstream to the `Cabana` library and similarly generally useful PIC capabilities which originate in physics applications will be moved into `Picasso`.
+
+# Acknowledgments
+
+This work was supported by the Exascale Computing Project (17-SC-20-SC), a
+collaborative effort of the U.S. DOE Office of Science and the NNSA.
+
+This manuscript has been authored by UT-Battelle, LLC under Contract No.
+DE-AC05-00OR22725 with the U.S. Department of Energy (DOE). The publisher, by
+accepting the article for publication, acknowledges that the United States
+Government retains a non-exclusive, paid-up, irrevocable, world-wide license to
+publish or reproduce the published form of this manuscript, or allow others to
+do so, for United States Government purposes. The DOE will provide public
+access to these results of federally sponsored research in accordance with the
+DOE Public Access Plan.
+
+This research used resources of the Oak Ridge Leadership Computing Facility
+(OLCF), supported by DOE under contract DE-AC05-00OR22725.
+
+# References