From e1ffec817055e2f469b04e5c8ada634d14e948e5 Mon Sep 17 00:00:00 2001
From: Dario Panici <dpanici@princeton.edu>
Date: Wed, 12 Jun 2024 14:26:25 -0400
Subject: [PATCH 01/46] add netket to use vmap_chunked, change the vectorize
 calls in objective jvp_scaled to use custom vectorize

---
 desc/backend.py                   | 140 ++++++++++++++++++++++++++++++
 desc/objectives/objective_funs.py |  19 ++--
 requirements.txt                  |   1 +
 3 files changed, 155 insertions(+), 5 deletions(-)

diff --git a/desc/backend.py b/desc/backend.py
index 77cf6f090d..c996925f5f 100644
--- a/desc/backend.py
+++ b/desc/backend.py
@@ -1,5 +1,6 @@
 """Backend functions for DESC, with options for JAX or regular numpy."""
 
+import functools
 import os
 import warnings
 
@@ -369,6 +370,145 @@ def tangent_solve(g, y):
         )
         return x, (jnp.linalg.norm(res), niter)
 
+    from jax._src.numpy.vectorize import (
+        _apply_excluded,
+        _check_output_dims,
+        _parse_gufunc_signature,
+        _parse_input_dimensions,
+    )
+    from netket.jax import vmap_chunked
+
+    def batched_vectorize(
+        pyfunc, *, excluded=frozenset(), signature=None, chunk_size=12
+    ):
+        """Define a vectorized function with broadcasting and batching.
+
+        below is taken from JAX
+        FIXME: change restof docstring
+        :func:`vectorize` is a convenience wrapper for defining vectorized
+        functions with broadcasting, in the style of NumPy's
+        `generalized universal functions
+        <https://numpy.org/doc/stable/reference/c-api/generalized-ufuncs.html>`_.
+        It allows for defining functions that are automatically repeated across
+        any leading dimensions, without the implementation of the function needing to
+        be concerned about how to handle higher dimensional inputs.
+
+        :func:`jax.numpy.vectorize` has the same interface as
+        :class:`numpy.vectorize`, but it is syntactic sugar for an auto-batching
+        transformation (:func:`vmap`) rather than a Python loop. This should be
+        considerably more efficient, but the implementation must be written in terms
+        of functions that act on JAX arrays.
+
+        Args
+        ----
+            pyfunc: function to vectorize.
+            excluded: optional set of integers representing positional arguments for
+            which the function will not be vectorized. These will be passed directly
+            to ``pyfunc`` unmodified.
+            signature: optional generalized universal function signature, e.g.,
+            ``(m,n),(n)->(m)`` for vectorized matrix-vector multiplication. If
+            provided, ``pyfunc`` will be called with (and expected to return) arrays
+            with shapes given by the size of corresponding core dimensions. By
+            default, pyfunc is assumed to take scalars arrays as input and output.
+            chunk_size: the size of the batches to pass to vmap. if 1, will only
+
+        Returns
+        -------
+            Vectorized version of the given function.
+
+        """
+        if any(not isinstance(exclude, (str, int)) for exclude in excluded):
+            raise TypeError(
+                "jax.numpy.vectorize can only exclude integer or string arguments, "
+                "but excluded={!r}".format(excluded)
+            )
+        if any(isinstance(e, int) and e < 0 for e in excluded):
+            raise ValueError(f"excluded={excluded!r} contains negative numbers")
+
+        @functools.wraps(pyfunc)
+        def wrapped(*args, **kwargs):
+            error_context = (
+                "on vectorized function with excluded={!r} and "
+                "signature={!r}".format(excluded, signature)
+            )
+            excluded_func, args, kwargs = _apply_excluded(
+                pyfunc, excluded, args, kwargs
+            )
+
+            if signature is not None:
+                input_core_dims, output_core_dims = _parse_gufunc_signature(signature)
+            else:
+                input_core_dims = [()] * len(args)
+                output_core_dims = None
+
+            none_args = {i for i, arg in enumerate(args) if arg is None}
+            if any(none_args):
+                if any(input_core_dims[i] != () for i in none_args):
+                    raise ValueError(
+                        f"Cannot pass None at locations {none_args} with {signature=}"
+                    )
+                excluded_func, args, _ = _apply_excluded(
+                    excluded_func, none_args, args, {}
+                )
+                input_core_dims = [
+                    dim for i, dim in enumerate(input_core_dims) if i not in none_args
+                ]
+
+            args = tuple(map(jnp.asarray, args))
+
+            broadcast_shape, dim_sizes = _parse_input_dimensions(
+                args, input_core_dims, error_context
+            )
+
+            checked_func = _check_output_dims(
+                excluded_func, dim_sizes, output_core_dims, error_context
+            )
+
+            # Rather than broadcasting all arguments to full broadcast shapes, prefer
+            # expanding dimensions using vmap. By pushing broadcasting
+            # into vmap, we can make use of more efficient batching rules for
+            # primitives where only some arguments are batched (e.g., for
+            # lax_linalg.triangular_solve), and avoid instantiating large broadcasted
+            # arrays.
+
+            squeezed_args = []
+            rev_filled_shapes = []
+
+            for arg, core_dims in zip(args, input_core_dims):
+                noncore_shape = arg.shape[: arg.ndim - len(core_dims)]
+
+                pad_ndim = len(broadcast_shape) - len(noncore_shape)
+                filled_shape = pad_ndim * (1,) + noncore_shape
+                rev_filled_shapes.append(filled_shape[::-1])
+
+                squeeze_indices = tuple(
+                    i for i, size in enumerate(noncore_shape) if size == 1
+                )
+                squeezed_arg = jnp.squeeze(arg, axis=squeeze_indices)
+                squeezed_args.append(squeezed_arg)
+
+            vectorized_func = checked_func
+            dims_to_expand = []
+            for negdim, axis_sizes in enumerate(zip(*rev_filled_shapes)):
+                in_axes = tuple(None if size == 1 else 0 for size in axis_sizes)
+                if all(axis is None for axis in in_axes):
+                    dims_to_expand.append(len(broadcast_shape) - 1 - negdim)
+                else:
+                    # change the vmap here to chunked_vmap
+                    vectorized_func = vmap_chunked(
+                        vectorized_func, in_axes, chunk_size=chunk_size
+                    )
+            result = vectorized_func(*squeezed_args)
+
+            if not dims_to_expand:
+                return result
+            elif isinstance(result, tuple):
+                return tuple(jnp.expand_dims(r, axis=dims_to_expand) for r in result)
+            else:
+                return jnp.expand_dims(result, axis=dims_to_expand)
+
+        return wrapped
+
 
 # we can't really test the numpy backend stuff in automated testing, so we ignore it
 # for coverage purposes
diff --git a/desc/objectives/objective_funs.py b/desc/objectives/objective_funs.py
index 0a98bb0a28..85612b6b72 100644
--- a/desc/objectives/objective_funs.py
+++ b/desc/objectives/objective_funs.py
@@ -5,7 +5,14 @@
 
 import numpy as np
 
-from desc.backend import jit, jnp, tree_flatten, tree_unflatten, use_jax
+from desc.backend import (
+    batched_vectorize,
+    jit,
+    jnp,
+    tree_flatten,
+    tree_unflatten,
+    use_jax,
+)
 from desc.derivatives import Derivative
 from desc.io import IOAble
 from desc.optimizable import Optimizable
@@ -444,15 +451,17 @@ def _jvp(self, v, x, constants=None, op="compute_scaled"):
         fun = lambda x: getattr(self, op)(x, constants)
         if len(v) == 1:
             jvpfun = lambda dx: Derivative.compute_jvp(fun, 0, dx, x)
-            return jnp.vectorize(jvpfun, signature="(n)->(k)")(v[0])
+            return batched_vectorize(jvpfun, signature="(n)->(k)")(v[0])
         elif len(v) == 2:
             jvpfun = lambda dx1, dx2: Derivative.compute_jvp2(fun, 0, 0, dx1, dx2, x)
-            return jnp.vectorize(jvpfun, signature="(n),(n)->(k)")(v[0], v[1])
+            return batched_vectorize(jvpfun, signature="(n),(n)->(k)")(v[0], v[1])
         elif len(v) == 3:
             jvpfun = lambda dx1, dx2, dx3: Derivative.compute_jvp3(
                 fun, 0, 0, 0, dx1, dx2, dx3, x
             )
-            return jnp.vectorize(jvpfun, signature="(n),(n),(n)->(k)")(v[0], v[1], v[2])
+            return batched_vectorize(jvpfun, signature="(n),(n),(n)->(k)")(
+                v[0], v[1], v[2]
+            )
         else:
             raise NotImplementedError("Cannot compute JVP higher than 3rd order.")
 
@@ -1024,7 +1033,7 @@ def _jvp(self, v, x, constants=None, op="compute_scaled"):
         fun = lambda *x: getattr(self, op)(*x, constants=constants)
         jvpfun = lambda *dx: Derivative.compute_jvp(fun, tuple(range(len(x))), dx, *x)
         sig = ",".join(f"(n{i})" for i in range(len(x))) + "->(k)"
-        return jnp.vectorize(jvpfun, signature=sig)(*v)
+        return batched_vectorize(jvpfun, signature=sig)(*v)
 
     def jvp_scaled(self, v, x, constants=None):
         """Compute Jacobian-vector product of self.compute_scaled.
diff --git a/requirements.txt b/requirements.txt
index a667a2a2db..e29871e88e 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -12,3 +12,4 @@ psutil
 pylatexenc >= 2.0, < 3.0
 scipy >= 1.7.0, < 2.0.0
 termcolor
+netket

From 5aef59c322a6d2f97bd9779acbaf33862dd749ef Mon Sep 17 00:00:00 2001
From: Dario Panici <dpanici@princeton.edu>
Date: Wed, 12 Jun 2024 14:39:53 -0400
Subject: [PATCH 02/46] add chunk_size arg

---
 desc/objectives/_equilibrium.py   | 2 ++
 desc/objectives/objective_funs.py | 4 +++-
 tests/test_examples.py            | 2 +-
 3 files changed, 6 insertions(+), 2 deletions(-)

diff --git a/desc/objectives/_equilibrium.py b/desc/objectives/_equilibrium.py
index d859e996b4..6bcb8a6c67 100644
--- a/desc/objectives/_equilibrium.py
+++ b/desc/objectives/_equilibrium.py
@@ -83,6 +83,7 @@ def __init__(
         deriv_mode="auto",
         grid=None,
         name="force",
+        chunk_size=12,
     ):
         if target is None and bounds is None:
             target = 0
@@ -97,6 +98,7 @@ def __init__(
             loss_function=loss_function,
             deriv_mode=deriv_mode,
             name=name,
+            chunk_size=chunk_size,
         )
 
     def build(self, use_jit=True, verbose=1):
diff --git a/desc/objectives/objective_funs.py b/desc/objectives/objective_funs.py
index 85612b6b72..ccb4415751 100644
--- a/desc/objectives/objective_funs.py
+++ b/desc/objectives/objective_funs.py
@@ -792,6 +792,7 @@ def __init__(
         loss_function=None,
         deriv_mode="auto",
         name=None,
+        chunk_size=1,
     ):
         if self._scalar:
             assert self._coordinates == ""
@@ -802,6 +803,7 @@ def __init__(
         assert (bounds is None) or (target is None), "Cannot use both bounds and target"
         assert loss_function in [None, "mean", "min", "max"]
         assert deriv_mode in {"auto", "fwd", "rev"}
+        self.chunk_size = chunk_size
 
         self._target = target
         self._bounds = bounds
@@ -1033,7 +1035,7 @@ def _jvp(self, v, x, constants=None, op="compute_scaled"):
         fun = lambda *x: getattr(self, op)(*x, constants=constants)
         jvpfun = lambda *dx: Derivative.compute_jvp(fun, tuple(range(len(x))), dx, *x)
         sig = ",".join(f"(n{i})" for i in range(len(x))) + "->(k)"
-        return batched_vectorize(jvpfun, signature=sig)(*v)
+        return batched_vectorize(jvpfun, signature=sig, chunk_size=self.chunk_size)(*v)
 
     def jvp_scaled(self, v, x, constants=None):
         """Compute Jacobian-vector product of self.compute_scaled.
diff --git a/tests/test_examples.py b/tests/test_examples.py
index b84cd967b1..530089e107 100644
--- a/tests/test_examples.py
+++ b/tests/test_examples.py
@@ -165,7 +165,7 @@ def test_1d_optimization():
     eq = get("SOLOVEV")
     objective = ObjectiveFunction(AspectRatio(eq=eq, target=2.5))
     constraints = (
-        ForceBalance(eq=eq),
+        ForceBalance(eq=eq, chunk_size=50),
         FixBoundaryR(eq=eq),
         FixBoundaryZ(eq=eq, modes=eq.surface.Z_basis.modes[0:-1, :]),
         FixPressure(eq=eq),

From 67844a73b2af41e67b400b2e2a6c58552ce55135 Mon Sep 17 00:00:00 2001
From: Dario Panici <dpanici@princeton.edu>
Date: Wed, 12 Jun 2024 15:39:57 -0400
Subject: [PATCH 03/46] add chunk_size to objectivefunction as well

---
 desc/objectives/objective_funs.py | 25 +++++++++++++++++++------
 1 file changed, 19 insertions(+), 6 deletions(-)

diff --git a/desc/objectives/objective_funs.py b/desc/objectives/objective_funs.py
index ccb4415751..91559b7725 100644
--- a/desc/objectives/objective_funs.py
+++ b/desc/objectives/objective_funs.py
@@ -46,7 +46,12 @@ class ObjectiveFunction(IOAble):
     _io_attrs_ = ["_objectives"]
 
     def __init__(
-        self, objectives, use_jit=True, deriv_mode="auto", name="ObjectiveFunction"
+        self,
+        objectives,
+        use_jit=True,
+        deriv_mode="auto",
+        name="ObjectiveFunction",
+        chunk_size=1,
     ):
         if not isinstance(objectives, (tuple, list)):
             objectives = (objectives,)
@@ -55,6 +60,10 @@ def __init__(
         ), "members of ObjectiveFunction should be instances of _Objective"
         assert use_jit in {True, False}
         assert deriv_mode in {"auto", "batched", "looped", "blocked"}
+        if chunk_size is None:
+            self.chunk_size = objectives[0].chunk_size
+        else:
+            self.chunk_size = chunk_size
 
         self._objectives = objectives
         self._use_jit = use_jit
@@ -451,17 +460,21 @@ def _jvp(self, v, x, constants=None, op="compute_scaled"):
         fun = lambda x: getattr(self, op)(x, constants)
         if len(v) == 1:
             jvpfun = lambda dx: Derivative.compute_jvp(fun, 0, dx, x)
-            return batched_vectorize(jvpfun, signature="(n)->(k)")(v[0])
+            return batched_vectorize(
+                jvpfun, signature="(n)->(k)", chunk_size=self.chunk_size
+            )(v[0])
         elif len(v) == 2:
             jvpfun = lambda dx1, dx2: Derivative.compute_jvp2(fun, 0, 0, dx1, dx2, x)
-            return batched_vectorize(jvpfun, signature="(n),(n)->(k)")(v[0], v[1])
+            return batched_vectorize(
+                jvpfun, signature="(n),(n)->(k)", chunk_size=self.chunk_size
+            )(v[0], v[1])
         elif len(v) == 3:
             jvpfun = lambda dx1, dx2, dx3: Derivative.compute_jvp3(
                 fun, 0, 0, 0, dx1, dx2, dx3, x
             )
-            return batched_vectorize(jvpfun, signature="(n),(n),(n)->(k)")(
-                v[0], v[1], v[2]
-            )
+            return batched_vectorize(
+                jvpfun, signature="(n),(n),(n)->(k)", chunk_size=self.chunk_size
+            )(v[0], v[1], v[2])
         else:
             raise NotImplementedError("Cannot compute JVP higher than 3rd order.")
 

From 517a5194e5b67577f573c8ce963060a6b9630c67 Mon Sep 17 00:00:00 2001
From: Dario Panici <dpanici@princeton.edu>
Date: Fri, 14 Jun 2024 17:11:28 -0400
Subject: [PATCH 04/46] set default chunk size to None to disable chunking by
 default:

---
 desc/objectives/objective_funs.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/desc/objectives/objective_funs.py b/desc/objectives/objective_funs.py
index 91559b7725..98927fd86e 100644
--- a/desc/objectives/objective_funs.py
+++ b/desc/objectives/objective_funs.py
@@ -51,7 +51,7 @@ def __init__(
         use_jit=True,
         deriv_mode="auto",
         name="ObjectiveFunction",
-        chunk_size=1,
+        chunk_size=None,
     ):
         if not isinstance(objectives, (tuple, list)):
             objectives = (objectives,)
@@ -805,7 +805,7 @@ def __init__(
         loss_function=None,
         deriv_mode="auto",
         name=None,
-        chunk_size=1,
+        chunk_size=None,
     ):
         if self._scalar:
             assert self._coordinates == ""

From 6b786d06bd3213f696e0e782a872c64cd8a01357 Mon Sep 17 00:00:00 2001
From: Dario Panici <dpanici@princeton.edu>
Date: Fri, 14 Jun 2024 19:02:36 -0400
Subject: [PATCH 05/46] fix default chunk size in backend

---
 desc/backend.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/desc/backend.py b/desc/backend.py
index c996925f5f..e2a657f3f8 100644
--- a/desc/backend.py
+++ b/desc/backend.py
@@ -379,7 +379,7 @@ def tangent_solve(g, y):
     from netket.jax import vmap_chunked
 
     def batched_vectorize(
-        pyfunc, *, excluded=frozenset(), signature=None, chunk_size=12
+        pyfunc, *, excluded=frozenset(), signature=None, chunk_size=None
     ):
         """Define a vectorized function with broadcasting and batching.
 

From 135adce7282f9a28cc8d76446300366dd08da36f Mon Sep 17 00:00:00 2001
From: Dario Panici <dpanici@princeton.edu>
Date: Mon, 24 Jun 2024 16:31:22 -0400
Subject: [PATCH 06/46] add a comment

---
 desc/objectives/objective_funs.py | 10 ++++++++++
 1 file changed, 10 insertions(+)

diff --git a/desc/objectives/objective_funs.py b/desc/objectives/objective_funs.py
index 98927fd86e..2393e88662 100644
--- a/desc/objectives/objective_funs.py
+++ b/desc/objectives/objective_funs.py
@@ -40,6 +40,16 @@ class ObjectiveFunction(IOAble):
         otherwise "blocked".
     name : str
         Name of the objective function.
+    chunk_size : int, optional
+        If `"batched"` deriv_mode is used, will calculate the Jacobian
+        ``chunk_size`` columns at a time, instead of all at once. A
+        ``chunk_size`` of 1 is equivalent to using `"looped"` deriv_mode.
+        The memory usage of the Jacobian calculation is linearly proportional to
+        ``chunk_size``:the smaller the ``chunk_size``, the less memory the Jacobian
+        calculation will require (with some baseline memory usage). The time it takes
+        to compute the Jacobian roughly ``t ~1/chunk_size` with some baseline time,
+        so the larger the ``chunk_size``, the faster the calculation takes.
+        If None, it will default to the largest possible size i.e. ``dim_x``
 
     """
 

From 03660438d2b0b9f24960aadb747d9c6ff674f66c Mon Sep 17 00:00:00 2001
From: "Dario G. Panici" <dpanici@princeton.edu>
Date: Tue, 2 Jul 2024 18:17:39 -0400
Subject: [PATCH 07/46] add chunk_size arg to solve continuation

---
 desc/continuation.py       | 34 ++++++++++++++++++++++++++++------
 desc/objectives/getters.py |  8 +++++---
 2 files changed, 33 insertions(+), 9 deletions(-)

diff --git a/desc/continuation.py b/desc/continuation.py
index 8be7e33cc5..635fcb8765 100644
--- a/desc/continuation.py
+++ b/desc/continuation.py
@@ -29,6 +29,7 @@ def _solve_axisym(
     maxiter=100,
     verbose=1,
     checkpoint_path=None,
+    chunk_size=None,
 ):
     """Solve initial axisymmetric case with adaptive step sizing."""
     timer = Timer()
@@ -99,7 +100,9 @@ def _solve_axisym(
             surf_i = surf_i2
 
         constraints_i = get_fixed_boundary_constraints(eq=eqi)
-        objective_i = get_equilibrium_objective(eq=eqi, mode=objective)
+        objective_i = get_equilibrium_objective(
+            eq=eqi, mode=objective, chunk_size=chunk_size
+        )
 
         if verbose:
             _print_iteration_summary(
@@ -196,6 +199,7 @@ def _add_pressure(
     maxiter=100,
     verbose=1,
     checkpoint_path=None,
+    chunk_size=None,
 ):
     """Add pressure with adaptive step sizing."""
     timer = Timer()
@@ -224,7 +228,9 @@ def _add_pressure(
         pres_ratio += pres_step
 
         constraints_i = get_fixed_boundary_constraints(eq=eqi)
-        objective_i = get_equilibrium_objective(eq=eqi, mode=objective)
+        objective_i = get_equilibrium_objective(
+            eq=eqi, mode=objective, chunk_size=chunk_size
+        )
 
         if verbose:
             _print_iteration_summary(
@@ -324,6 +330,7 @@ def _add_shaping(
     maxiter=100,
     verbose=1,
     checkpoint_path=None,
+    chunk_size=None,
 ):
     """Add 3D shaping with adaptive step sizing."""
     timer = Timer()
@@ -353,7 +360,9 @@ def _add_shaping(
         bdry_ratio += bdry_step
 
         constraints_i = get_fixed_boundary_constraints(eq=eqi)
-        objective_i = get_equilibrium_objective(eq=eqi, mode=objective)
+        objective_i = get_equilibrium_objective(
+            eq=eqi, mode=objective, chunk_size=chunk_size
+        )
 
         if verbose:
             _print_iteration_summary(
@@ -451,6 +460,7 @@ def solve_continuation_automatic(  # noqa: C901
     maxiter=100,
     verbose=1,
     checkpoint_path=None,
+    chunk_size=None,
     **kwargs,
 ):
     """Solve for an equilibrium using an automatic continuation method.
@@ -529,6 +539,7 @@ def solve_continuation_automatic(  # noqa: C901
         maxiter,
         verbose,
         checkpoint_path,
+        chunk_size=chunk_size,
     )
 
     # for zero current we want to do shaping before pressure to avoid having a
@@ -547,6 +558,7 @@ def solve_continuation_automatic(  # noqa: C901
             maxiter,
             verbose,
             checkpoint_path,
+            chunk_size=chunk_size,
         )
 
         eqfam = _add_pressure(
@@ -562,6 +574,7 @@ def solve_continuation_automatic(  # noqa: C901
             maxiter,
             verbose,
             checkpoint_path,
+            chunk_size=chunk_size,
         )
 
     # for other cases such as fixed iota or nonzero current we do pressure first
@@ -580,6 +593,7 @@ def solve_continuation_automatic(  # noqa: C901
             maxiter,
             verbose,
             checkpoint_path,
+            chunk_size=chunk_size,
         )
 
         eqfam = _add_shaping(
@@ -595,6 +609,7 @@ def solve_continuation_automatic(  # noqa: C901
             maxiter,
             verbose,
             checkpoint_path,
+            chunk_size=chunk_size,
         )
     eq.params_dict = eqfam[-1].params_dict
     eqfam[-1] = eq
@@ -625,6 +640,7 @@ def solve_continuation(  # noqa: C901
     maxiter=100,
     verbose=1,
     checkpoint_path=None,
+    chunk_size=None,
 ):
     """Solve for an equilibrium by continuation method.
 
@@ -685,7 +701,9 @@ def solve_continuation(  # noqa: C901
 
     if not isinstance(optimizer, Optimizer):
         optimizer = Optimizer(optimizer)
-    objective_i = get_equilibrium_objective(eq=eqfam[0], mode=objective)
+    objective_i = get_equilibrium_objective(
+        eq=eqfam[0], mode=objective, chunk_size=chunk_size
+    )
     constraints_i = get_fixed_boundary_constraints(eq=eqfam[0])
 
     ii = 0
@@ -732,7 +750,9 @@ def solve_continuation(  # noqa: C901
                 print("Perturbing equilibrium")
             # TODO: pass Jx if available
             eqp = eqfam[ii - 1].copy()
-            objective_i = get_equilibrium_objective(eq=eqp, mode=objective)
+            objective_i = get_equilibrium_objective(
+                eq=eqp, mode=objective, chunk_size=chunk_size
+            )
             constraints_i = get_fixed_boundary_constraints(eq=eqp)
             eqp.change_resolution(**eqi.resolution)
             eqp.perturb(
@@ -752,7 +772,9 @@ def solve_continuation(  # noqa: C901
 
         if not stop:
             # TODO: add ability to rebind objectives
-            objective_i = get_equilibrium_objective(eq=eqi, mode=objective)
+            objective_i = get_equilibrium_objective(
+                eq=eqi, mode=objective, chunk_size=chunk_size
+            )
             constraints_i = get_fixed_boundary_constraints(eq=eqi)
             eqi.solve(
                 optimizer=optimizer,
diff --git a/desc/objectives/getters.py b/desc/objectives/getters.py
index 9a7c980e32..993d2e02a1 100644
--- a/desc/objectives/getters.py
+++ b/desc/objectives/getters.py
@@ -41,7 +41,7 @@
 }
 
 
-def get_equilibrium_objective(eq, mode="force", normalize=True):
+def get_equilibrium_objective(eq, mode="force", normalize=True, **kwargs):
     """Get the objective function for a typical force balance equilibrium problem.
 
     Parameters
@@ -61,7 +61,9 @@ def get_equilibrium_objective(eq, mode="force", normalize=True):
         An objective function with default force balance objectives.
 
     """
-    kwargs = {"eq": eq, "normalize": normalize, "normalize_target": normalize}
+    kwargs = {"eq": eq, "normalize": normalize, "normalize_target": normalize}.update(
+        kwargs
+    )
     if mode == "energy":
         objectives = Energy(**kwargs)
     elif mode == "force":
@@ -70,7 +72,7 @@ def get_equilibrium_objective(eq, mode="force", normalize=True):
         objectives = (RadialForceBalance(**kwargs), HelicalForceBalance(**kwargs))
     else:
         raise ValueError("got an unknown equilibrium objective type '{}'".format(mode))
-    return ObjectiveFunction(objectives)
+    return ObjectiveFunction(objectives, chunk_size=kwargs.get("chunk_size", None))
 
 
 def get_fixed_axis_constraints(eq, profiles=True, normalize=True):

From 131ae78929ebbf0972c63160581176323fe187cf Mon Sep 17 00:00:00 2001
From: "Dario G. Panici" <dpanici@princeton.edu>
Date: Tue, 2 Jul 2024 18:29:23 -0400
Subject: [PATCH 08/46] fix error

---
 desc/objectives/getters.py | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/desc/objectives/getters.py b/desc/objectives/getters.py
index 993d2e02a1..ba0800e028 100644
--- a/desc/objectives/getters.py
+++ b/desc/objectives/getters.py
@@ -61,9 +61,10 @@ def get_equilibrium_objective(eq, mode="force", normalize=True, **kwargs):
         An objective function with default force balance objectives.
 
     """
-    kwargs = {"eq": eq, "normalize": normalize, "normalize_target": normalize}.update(
-        kwargs
-    )
+    kwargs = {
+        **{"eq": eq, "normalize": normalize, "normalize_target": normalize},
+        **kwargs,
+    }
     if mode == "energy":
         objectives = Energy(**kwargs)
     elif mode == "force":

From feb006e006987637046aab56a6473b2dd0a2c88b Mon Sep 17 00:00:00 2001
From: "Dario G. Panici" <dpanici@princeton.edu>
Date: Thu, 22 Aug 2024 13:11:06 -0400
Subject: [PATCH 09/46] remove netket dependence, vendor only needed parts from
 their vmap_chunked implementation

---
 desc/backend.py                   | 139 --------
 desc/objectives/objective_funs.py |  11 +-
 desc/utils.py                     | 508 +++++++++++++++++++++++++++++-
 requirements.txt                  |   1 -
 4 files changed, 509 insertions(+), 150 deletions(-)

diff --git a/desc/backend.py b/desc/backend.py
index 31f7e26054..c26213b045 100644
--- a/desc/backend.py
+++ b/desc/backend.py
@@ -394,145 +394,6 @@ def tangent_solve(g, y):
         )
         return x, (jnp.linalg.norm(res), niter)
 
-    from jax._src.numpy.vectorize import (
-        _apply_excluded,
-        _check_output_dims,
-        _parse_gufunc_signature,
-        _parse_input_dimensions,
-    )
-    from netket.jax import vmap_chunked
-
-    def batched_vectorize(
-        pyfunc, *, excluded=frozenset(), signature=None, chunk_size=None
-    ):
-        """Define a vectorized function with broadcasting and batching.
-
-        below is taken from JAX
-        FIXME: change restof docstring
-        :func:`vectorize` is a convenience wrapper for defining vectorized
-        functions with broadcasting, in the style of NumPy's
-        `generalized universal functions
-        <https://numpy.org/doc/stable/reference/c-api/generalized-ufuncs.html>`_.
-        It allows for defining functions that are automatically repeated across
-        any leading dimensions, without the implementation of the function needing to
-        be concerned about how to handle higher dimensional inputs.
-
-        :func:`jax.numpy.vectorize` has the same interface as
-        :class:`numpy.vectorize`, but it is syntactic sugar for an auto-batching
-        transformation (:func:`vmap`) rather than a Python loop. This should be
-        considerably more efficient, but the implementation must be written in terms
-        of functions that act on JAX arrays.
-
-        Args
-        ----
-            pyfunc: function to vectorize.
-            excluded: optional set of integers representing positional arguments for
-            which the function will not be vectorized. These will be passed directly
-            to ``pyfunc`` unmodified.
-            signature: optional generalized universal function signature, e.g.,
-            ``(m,n),(n)->(m)`` for vectorized matrix-vector multiplication. If
-            provided, ``pyfunc`` will be called with (and expected to return) arrays
-            with shapes given by the size of corresponding core dimensions. By
-            default, pyfunc is assumed to take scalars arrays as input and output.
-            chunk_size: the size of the batches to pass to vmap. if 1, will only
-
-        Returns
-        -------
-            Vectorized version of the given function.
-
-        """
-        if any(not isinstance(exclude, (str, int)) for exclude in excluded):
-            raise TypeError(
-                "jax.numpy.vectorize can only exclude integer or string arguments, "
-                "but excluded={!r}".format(excluded)
-            )
-        if any(isinstance(e, int) and e < 0 for e in excluded):
-            raise ValueError(f"excluded={excluded!r} contains negative numbers")
-
-        @functools.wraps(pyfunc)
-        def wrapped(*args, **kwargs):
-            error_context = (
-                "on vectorized function with excluded={!r} and "
-                "signature={!r}".format(excluded, signature)
-            )
-            excluded_func, args, kwargs = _apply_excluded(
-                pyfunc, excluded, args, kwargs
-            )
-
-            if signature is not None:
-                input_core_dims, output_core_dims = _parse_gufunc_signature(signature)
-            else:
-                input_core_dims = [()] * len(args)
-                output_core_dims = None
-
-            none_args = {i for i, arg in enumerate(args) if arg is None}
-            if any(none_args):
-                if any(input_core_dims[i] != () for i in none_args):
-                    raise ValueError(
-                        f"Cannot pass None at locations {none_args} with {signature=}"
-                    )
-                excluded_func, args, _ = _apply_excluded(
-                    excluded_func, none_args, args, {}
-                )
-                input_core_dims = [
-                    dim for i, dim in enumerate(input_core_dims) if i not in none_args
-                ]
-
-            args = tuple(map(jnp.asarray, args))
-
-            broadcast_shape, dim_sizes = _parse_input_dimensions(
-                args, input_core_dims, error_context
-            )
-
-            checked_func = _check_output_dims(
-                excluded_func, dim_sizes, output_core_dims, error_context
-            )
-
-            # Rather than broadcasting all arguments to full broadcast shapes, prefer
-            # expanding dimensions using vmap. By pushing broadcasting
-            # into vmap, we can make use of more efficient batching rules for
-            # primitives where only some arguments are batched (e.g., for
-            # lax_linalg.triangular_solve), and avoid instantiating large broadcasted
-            # arrays.
-
-            squeezed_args = []
-            rev_filled_shapes = []
-
-            for arg, core_dims in zip(args, input_core_dims):
-                noncore_shape = arg.shape[: arg.ndim - len(core_dims)]
-
-                pad_ndim = len(broadcast_shape) - len(noncore_shape)
-                filled_shape = pad_ndim * (1,) + noncore_shape
-                rev_filled_shapes.append(filled_shape[::-1])
-
-                squeeze_indices = tuple(
-                    i for i, size in enumerate(noncore_shape) if size == 1
-                )
-                squeezed_arg = jnp.squeeze(arg, axis=squeeze_indices)
-                squeezed_args.append(squeezed_arg)
-
-            vectorized_func = checked_func
-            dims_to_expand = []
-            for negdim, axis_sizes in enumerate(zip(*rev_filled_shapes)):
-                in_axes = tuple(None if size == 1 else 0 for size in axis_sizes)
-                if all(axis is None for axis in in_axes):
-                    dims_to_expand.append(len(broadcast_shape) - 1 - negdim)
-                else:
-                    # change the vmap here to chunked_vmap
-                    vectorized_func = vmap_chunked(
-                        vectorized_func, in_axes, chunk_size=chunk_size
-                    )
-            result = vectorized_func(*squeezed_args)
-
-            if not dims_to_expand:
-                return result
-            elif isinstance(result, tuple):
-                return tuple(jnp.expand_dims(r, axis=dims_to_expand) for r in result)
-            else:
-                return jnp.expand_dims(result, axis=dims_to_expand)
-
-        return wrapped
-
 
 # we can't really test the numpy backend stuff in automated testing, so we ignore it
 # for coverage purposes
diff --git a/desc/objectives/objective_funs.py b/desc/objectives/objective_funs.py
index 34593d5398..35c5cbae08 100644
--- a/desc/objectives/objective_funs.py
+++ b/desc/objectives/objective_funs.py
@@ -5,20 +5,13 @@
 
 import numpy as np
 
-from desc.backend import (
-    batched_vectorize,
-    execute_on_cpu,
-    jit,
-    jnp,
-    tree_flatten,
-    tree_unflatten,
-    use_jax,
-)
+from desc.backend import execute_on_cpu, jit, jnp, tree_flatten, tree_unflatten, use_jax
 from desc.derivatives import Derivative
 from desc.io import IOAble
 from desc.optimizable import Optimizable
 from desc.utils import (
     Timer,
+    batched_vectorize,
     errorif,
     flatten_list,
     is_broadcastable,
diff --git a/desc/utils.py b/desc/utils.py
index a1df551229..92dd67df7b 100644
--- a/desc/utils.py
+++ b/desc/utils.py
@@ -3,12 +3,25 @@
 import operator
 import warnings
 from itertools import combinations_with_replacement, permutations
+from typing import Callable, Optional
 
 import numpy as np
 from scipy.special import factorial
 from termcolor import colored
 
-from desc.backend import fori_loop, jit, jnp
+from desc.backend import fori_loop, functools, jax, jit, jnp
+
+if jax.__version_info__ >= (0, 4, 16):
+    from jax.extend import linear_util as lu
+else:
+    from jax import linear_util as lu
+
+from jax._src.numpy.vectorize import (
+    _apply_excluded,
+    _check_output_dims,
+    _parse_gufunc_signature,
+    _parse_input_dimensions,
+)
 
 
 class Timer:
@@ -682,3 +695,496 @@ def broadcast_tree(tree_in, tree_out, dtype=int):
     # invalid tree structure
     else:
         raise ValueError("trees must be nested lists of dicts")
+
+
+# credit to _chunk_utils.py of netket package for below section
+def _treeify(f):
+    def _f(x, *args, **kwargs):
+        return jax.tree_util.tree_map(lambda y: f(y, *args, **kwargs), x)
+
+    return _f
+
+
+@_treeify
+def _unchunk(x):
+    return x.reshape((-1,) + x.shape[2:])
+
+
+@_treeify
+def _chunk(x, chunk_size=None):
+    # chunk_size=None -> add just a dummy chunk dimension, same as np.expand_dims(x, 0)
+    if x.ndim == 0:
+        raise ValueError("x cannot be chunked as it has 0 dimensions.")
+    n = x.shape[0]
+    if chunk_size is None:
+        chunk_size = n
+
+    n_chunks, residual = divmod(n, chunk_size)
+    if residual != 0:
+        raise ValueError(
+            "The first dimension of x must be divisible by chunk_size."
+            + f"\n            Got x.shape={x.shape} but chunk_size={chunk_size}."
+        )
+    return x.reshape((n_chunks, chunk_size) + x.shape[1:])
+
+
+def _chunk_size(x):
+    b = set(map(lambda x: x.shape[:2], jax.tree_util.tree_leaves(x)))
+    if len(b) != 1:
+        raise ValueError(
+            "The arrays in x have inconsistent chunk_size or number of chunks"
+        )
+    return b.pop()[1]
+
+
+def unchunk(x_chunked):
+    """Merge the first two axes of an array (or a pytree of arrays).
+
+    Parameters
+    ----------
+    x_chunked: an array (or pytree of arrays) of at least 2 dimensions
+
+    Returns
+    -------
+    (x, chunk_fn) : tuple
+        where x is x_chunked reshaped to (-1,)+x.shape[2:]
+        and chunk_fn is a function which restores x given x_chunked
+
+    """
+    return _unchunk(x_chunked), functools.partial(
+        _chunk, chunk_size=_chunk_size(x_chunked)
+    )
+
+
+def chunk(x, chunk_size=None):
+    """Split an array (or a pytree of arrays) into chunks along the first axis.
+
+    Parameters
+    ----------
+        x: an array (or pytree of arrays)
+        chunk_size: an integer or None (default)
+            The first axis in x must be a multiple of chunk_size
+
+    Returns
+    -------
+    (x_chunked, unchunk_fn): tuple
+        - x_chunked is x reshaped to (-1, chunk_size)+x.shape[1:]
+          if chunk_size is None then it defaults to x.shape[0], i.e. just one chunk
+        - unchunk_fn is a function which restores x given x_chunked
+    """
+    return _chunk(x, chunk_size), _unchunk
+
+
+####
+
+# credit to _scanmap.py from netket package for below
+
+_tree_add = functools.partial(jax.tree_util.tree_map, jax.lax.add)
+_tree_zeros_like = functools.partial(
+    jax.tree_util.tree_map, lambda x: jnp.zeros(x.shape, dtype=x.dtype)
+)
+
+
+# TODO put it somewhere
+def _multimap(f, *args):
+    try:
+        return tuple(map(lambda a: f(*a), zip(*args)))
+    except TypeError:
+        return f(*args)
+
+
+def scan_append_reduce(f, x, append_cond, op=_tree_add, zero_fun=_tree_zeros_like):
+    """Evaluate f element by element in x while appending and/or reducing the results.
+
+    Parameters
+    ----------
+        f: a function that takes elements of the leading dimension of x
+        x: a pytree where each leaf array has the same leading dimension
+        append_cond: a bool (if f returns just one result) or a tuple of
+                     bools (if f returns multiple values)
+                     which indicates whether the individual result should
+                     be appended or reduced
+        op: a function to (pairwise) reduce the specified results. Defaults to a sum.
+        zero_fun: a function which prepares the zero element of op for a given input
+                  shape/dtype tree. Defaults to zeros.
+
+    Returns
+    -------
+        The (tuple of) results corresponding to the output of f
+        where each result is given by:
+        if append_cond is True:
+            a (pytree of) array(s) with leading dimension same as x,
+            containing the evaluation of f at each element in x
+        else (append_cond is False):
+            a (pytree of) array(s) with the same shape as the corresponding
+            output of f, containing the reduction over op of f evaluated at each x
+
+
+    Example:
+
+        import jax.numpy as jnp
+        from netket.jax import scan_append_reduce
+
+        def f(x):
+             y = jnp.sin(x)
+             return y, y, y**2
+
+        N = 100
+        x = jnp.linspace(0.,jnp.pi,N)
+
+        y, s, s2 = scan_append_reduce(f, x, (True, False, False))
+        mean = s/N
+        var = s2/N - mean**2
+    """
+    # TODO: different op for each result
+
+    x0 = jax.tree_util.tree_map(lambda x: x[0], x)
+
+    # special code path if there is only one element
+    # to avoid having to rely on xla/llvm to optimize the overhead away
+    if jax.tree_util.tree_leaves(x)[0].shape[0] == 1:
+        return _multimap(
+            lambda c, x: jnp.expand_dims(x, 0) if c else x, append_cond, f(x0)
+        )
+
+    # the original idea was to use pytrees,
+    # however for now just operate on the return value tuple
+    _get_append_part = functools.partial(
+        _multimap, lambda c, x: x if c else None, append_cond
+    )
+    _get_op_part = functools.partial(
+        _multimap, lambda c, x: x if not c else None, append_cond
+    )
+    _tree_select = functools.partial(
+        _multimap, lambda c, t1, t2: t1 if c else t2, append_cond
+    )
+
+    carry_init = True, _get_op_part(zero_fun(jax.eval_shape(f, x0)))
+
+    def f_(carry, x):
+        is_first, y_carry = carry
+        y = f(x)
+        y_op = _get_op_part(y)
+        y_append = _get_append_part(y)
+        y_reduce = op(y_carry, y_op)
+        return (False, y_reduce), y_append
+
+    (_, res_op), res_append = jax.lax.scan(f_, carry_init, x, unroll=1)
+    # reconstruct the result from the reduced and appended parts in the two trees
+    return _tree_select(res_append, res_op)
+
+
+scan_append = functools.partial(scan_append_reduce, append_cond=True)
+scan_reduce = functools.partial(scan_append_reduce, append_cond=False)
+
+
+# TODO in_axes a la vmap?
+def _scanmap(fun, scan_fun, argnums=0):
+    """A helper function to wrap f with a scan_fun.
+
+    Example
+    -------
+        import jax.numpy as jnp
+        from functools import partial
+
+        from desc.utils import _scanmap, scan_append_reduce
+
+        scan_fun = partial(scan_append_reduce, append_cond=(True, False, False))
+
+        @partial(_scanmap, scan_fun=scan_fun, argnums=1)
+        def f(c, x):
+             y = jnp.sin(x) + c
+             return y, y, y**2
+
+        N = 100
+        x = jnp.linspace(0.,jnp.pi,N)
+        c = 1.
+
+
+        y, s, s2 = f(c, x)
+        mean = s/N
+        var = s2/N - mean**2
+    """
+
+    def f_(*args, **kwargs):
+        f = lu.wrap_init(fun, kwargs)
+        f_partial, dyn_args = jax.api_util.argnums_partial(
+            f, argnums, args, require_static_args_hashable=False
+        )
+        return scan_fun(lambda x: f_partial.call_wrapped(*x), dyn_args)
+
+    return f_
+
+
+# credit to _vmap_chunked.py from netket package
+# taking the parts not meant to support sharding, as we do not
+# need that in DESC
+
+
+def _eval_fun_in_chunks(vmapped_fun, chunk_size, argnums, *args, **kwargs):
+    n_elements = jax.tree_util.tree_leaves(args[argnums[0]])[0].shape[0]
+    n_chunks, n_rest = divmod(n_elements, chunk_size)
+
+    if n_chunks == 0 or chunk_size >= n_elements:
+        y = vmapped_fun(*args, **kwargs)
+    else:
+        # split inputs
+        def _get_chunks(x):
+            x_chunks = jax.tree_util.tree_map(
+                lambda x_: x_[: n_elements - n_rest, ...], x
+            )
+            x_chunks = _chunk(x_chunks, chunk_size)
+            return x_chunks
+
+        def _get_rest(x):
+            x_rest = jax.tree_util.tree_map(
+                lambda x_: x_[n_elements - n_rest :, ...], x
+            )
+            return x_rest
+
+        args_chunks = [
+            _get_chunks(a) if i in argnums else a for i, a in enumerate(args)
+        ]
+        args_rest = [_get_rest(a) if i in argnums else a for i, a in enumerate(args)]
+
+        y_chunks = _unchunk(
+            _scanmap(vmapped_fun, scan_append, argnums)(*args_chunks, **kwargs)
+        )
+
+        if n_rest == 0:
+            y = y_chunks
+        else:
+            y_rest = vmapped_fun(*args_rest, **kwargs)
+            y = jax.tree_util.tree_map(
+                lambda y1, y2: jnp.concatenate((y1, y2)), y_chunks, y_rest
+            )
+    return y
+
+
+def _chunk_vmapped_function(
+    vmapped_fun: Callable,
+    chunk_size: Optional[int],
+    argnums=0,
+) -> Callable:
+    """Takes a vmapped function and computes it in chunks."""
+    if chunk_size is None:
+        return vmapped_fun
+
+    if isinstance(argnums, int):
+        argnums = (argnums,)
+    return functools.partial(_eval_fun_in_chunks, vmapped_fun, chunk_size, argnums)
+
+
+def _parse_in_axes(in_axes):
+    if isinstance(in_axes, int):
+        in_axes = (in_axes,)
+
+    if not set(in_axes).issubset((0, None)):
+        raise NotImplementedError("Only in_axes 0/None are currently supported")
+
+    argnums = tuple(
+        map(lambda ix: ix[0], filter(lambda ix: ix[1] is not None, enumerate(in_axes)))
+    )
+    return in_axes, argnums
+
+
+def apply_chunked(
+    f: Callable,
+    in_axes=0,
+    *,
+    chunk_size: Optional[int],
+) -> Callable:
+    """Compute f in smaller chunks over axis 0.
+
+    Takes an implicitly vmapped function over the axis 0 and uses scan to
+    do the computations in smaller chunks over the 0-th axis of all input arguments.
+
+    For this to work, the function `f` should be `vectorized` along the `in_axes`
+    of the arguments. This means that the function `f` should respect the following
+    condition:
+
+    .. code-block:: python
+
+        assert f(x) == jnp.concatenate([f(x_i) for x_i in x], axis=0)
+
+    which is automatically satisfied if `f` is obtained by vmapping a function,
+    such as:
+
+    .. code-block:: python
+
+        f = jax.vmap(f_orig)
+
+
+    Parameters
+    ----------
+        f: A function that satisfies the condition above
+        in_axes: The axes that should be scanned along. Only supports `0` or `None`
+        chunk_size: The maximum size of the chunks to be used. If it is `None`,
+           chunking is disabled
+
+    """
+    _, argnums = _parse_in_axes(in_axes)
+    return _chunk_vmapped_function(
+        f,
+        chunk_size,
+        argnums,
+    )
+
+
+def vmap_chunked(
+    f: Callable,
+    in_axes=0,
+    *,
+    chunk_size: Optional[int],
+) -> Callable:
+    """Behaves like jax.vmap but uses scan to chunk the computations in smaller chunks.
+
+    This function is essentially equivalent to:
+
+    .. code-block:: python
+
+        nk.jax.apply_chunked(jax.vmap(f, in_axes), in_axes, chunk_size)
+
+    Some limitations to `in_axes` apply.
+
+    Parameters
+    ----------
+        f: The function to be vectorised.
+        in_axes: The axes that should be scanned along. Only supports `0` or `None`
+        chunk_size: The maximum size of the chunks to be used. If it is `None`,
+            chunking is disabled
+
+
+    Returns
+    -------
+        f: A vectorised and chunked function
+    """
+    in_axes, argnums = _parse_in_axes(in_axes)
+    vmapped_fun = jax.vmap(f, in_axes=in_axes)
+    return _chunk_vmapped_function(vmapped_fun, chunk_size, argnums)
+
+
+def batched_vectorize(pyfunc, *, excluded=frozenset(), signature=None, chunk_size=None):
+    """Define a vectorized function with broadcasting and batching.
+
+    below is taken from JAX
+    FIXME: change restof docstring
+    :func:`vectorize` is a convenience wrapper for defining vectorized
+    functions with broadcasting, in the style of NumPy's
+    `generalized universal functions
+    <https://numpy.org/doc/stable/reference/c-api/generalized-ufuncs.html>`_.
+    It allows for defining functions that are automatically repeated across
+    any leading dimensions, without the implementation of the function needing to
+    be concerned about how to handle higher dimensional inputs.
+
+    :func:`jax.numpy.vectorize` has the same interface as
+    :class:`numpy.vectorize`, but it is syntactic sugar for an auto-batching
+    transformation (:func:`vmap`) rather than a Python loop. This should be
+    considerably more efficient, but the implementation must be written in terms
+    of functions that act on JAX arrays.
+
+    Parameters
+    ----------
+        pyfunc: function to vectorize.
+        excluded: optional set of integers representing positional arguments for
+        which the function will not be vectorized. These will be passed directly
+        to ``pyfunc`` unmodified.
+        signature: optional generalized universal function signature, e.g.,
+        ``(m,n),(n)->(m)`` for vectorized matrix-vector multiplication. If
+        provided, ``pyfunc`` will be called with (and expected to return) arrays
+        with shapes given by the size of corresponding core dimensions. By
+        default, pyfunc is assumed to take scalars arrays as input and output.
+        chunk_size: the size of the batches to pass to vmap. if 1, will only
+
+    Returns
+    -------
+        Vectorized version of the given function.
+
+    """
+    if any(not isinstance(exclude, (str, int)) for exclude in excluded):
+        raise TypeError(
+            "jax.numpy.vectorize can only exclude integer or string arguments, "
+            "but excluded={!r}".format(excluded)
+        )
+    if any(isinstance(e, int) and e < 0 for e in excluded):
+        raise ValueError(f"excluded={excluded!r} contains negative numbers")
+
+    @functools.wraps(pyfunc)
+    def wrapped(*args, **kwargs):
+        error_context = (
+            "on vectorized function with excluded={!r} and "
+            "signature={!r}".format(excluded, signature)
+        )
+        excluded_func, args, kwargs = _apply_excluded(pyfunc, excluded, args, kwargs)
+
+        if signature is not None:
+            input_core_dims, output_core_dims = _parse_gufunc_signature(signature)
+        else:
+            input_core_dims = [()] * len(args)
+            output_core_dims = None
+
+        none_args = {i for i, arg in enumerate(args) if arg is None}
+        if any(none_args):
+            if any(input_core_dims[i] != () for i in none_args):
+                raise ValueError(
+                    f"Cannot pass None at locations {none_args} with {signature=}"
+                )
+            excluded_func, args, _ = _apply_excluded(excluded_func, none_args, args, {})
+            input_core_dims = [
+                dim for i, dim in enumerate(input_core_dims) if i not in none_args
+            ]
+
+        args = tuple(map(jnp.asarray, args))
+
+        broadcast_shape, dim_sizes = _parse_input_dimensions(
+            args, input_core_dims, error_context
+        )
+
+        checked_func = _check_output_dims(
+            excluded_func, dim_sizes, output_core_dims, error_context
+        )
+
+        # Rather than broadcasting all arguments to full broadcast shapes, prefer
+        # expanding dimensions using vmap. By pushing broadcasting
+        # into vmap, we can make use of more efficient batching rules for
+        # primitives where only some arguments are batched (e.g., for
+        # lax_linalg.triangular_solve), and avoid instantiating large broadcasted
+        # arrays.
+
+        squeezed_args = []
+        rev_filled_shapes = []
+
+        for arg, core_dims in zip(args, input_core_dims):
+            noncore_shape = arg.shape[: arg.ndim - len(core_dims)]
+
+            pad_ndim = len(broadcast_shape) - len(noncore_shape)
+            filled_shape = pad_ndim * (1,) + noncore_shape
+            rev_filled_shapes.append(filled_shape[::-1])
+
+            squeeze_indices = tuple(
+                i for i, size in enumerate(noncore_shape) if size == 1
+            )
+            squeezed_arg = jnp.squeeze(arg, axis=squeeze_indices)
+            squeezed_args.append(squeezed_arg)
+
+        vectorized_func = checked_func
+        dims_to_expand = []
+        for negdim, axis_sizes in enumerate(zip(*rev_filled_shapes)):
+            in_axes = tuple(None if size == 1 else 0 for size in axis_sizes)
+            if all(axis is None for axis in in_axes):
+                dims_to_expand.append(len(broadcast_shape) - 1 - negdim)
+            else:
+                # change the vmap here to chunked_vmap
+                vectorized_func = vmap_chunked(
+                    vectorized_func, in_axes, chunk_size=chunk_size
+                )
+        result = vectorized_func(*squeezed_args)
+
+        if not dims_to_expand:
+            return result
+        elif isinstance(result, tuple):
+            return tuple(jnp.expand_dims(r, axis=dims_to_expand) for r in result)
+        else:
+            return jnp.expand_dims(result, axis=dims_to_expand)
+
+    return wrapped
diff --git a/requirements.txt b/requirements.txt
index e29871e88e..a667a2a2db 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -12,4 +12,3 @@ psutil
 pylatexenc >= 2.0, < 3.0
 scipy >= 1.7.0, < 2.0.0
 termcolor
-netket

From eda29ee15185780c42380b719507684685e748d8 Mon Sep 17 00:00:00 2001
From: Dario Panici <dpanici@princeton.edu>
Date: Thu, 22 Aug 2024 16:03:43 -0400
Subject: [PATCH 10/46] add chunk_size arg to every Obective

---
 desc/objectives/_bootstrap.py     |  15 ++++
 desc/objectives/_coils.py         | 134 ++++++++++++++++++++++++++++++
 desc/objectives/_equilibrium.py   |  89 +++++++++++++++++++-
 desc/objectives/_free_boundary.py |  51 +++++++++++-
 desc/objectives/_generic.py       |  32 +++++++
 desc/objectives/_geometry.py      | 120 ++++++++++++++++++++++++++
 desc/objectives/_omnigenity.py    |  77 ++++++++++++++++-
 desc/objectives/_profiles.py      |  60 +++++++++++++
 desc/objectives/_stability.py     |  30 +++++++
 desc/objectives/objective_funs.py |  22 ++---
 tests/test_examples.py            |   2 +-
 11 files changed, 615 insertions(+), 17 deletions(-)

diff --git a/desc/objectives/_bootstrap.py b/desc/objectives/_bootstrap.py
index 365a9882c2..3807edb87f 100644
--- a/desc/objectives/_bootstrap.py
+++ b/desc/objectives/_bootstrap.py
@@ -66,6 +66,19 @@ class BootstrapRedlConsistency(_Objective):
         or quasi-axisymmetry; set to +/-NFP for quasi-helical symmetry.
     name : str, optional
         Name of the objective function.
+    chunk_size : int, optional
+        If `"blocked"` deriv_mode is used in the ObjectiveFunction, will
+        calculate the Jacobian for this objective ``chunk_size`` columns at a time,
+        instead of all at once.  The memory usage of the Jacobian calculation is
+        linearly proportional to ``chunk_size``: the smaller the ``chunk_size``, the
+        less memory the Jacobian calculation will require (with some baseline memory
+        usage). The time to compute the Jacobian is roughly ``t ~1/chunk_size``
+        with some baseline time, so the larger the ``chunk_size``, the faster the
+        calculation takes, at the cost of requiring more memory. A ``chunk_size``
+        of 1 corresponds to the least memory intensive,  but slowest method of
+        calculating the Jacobian.
+        If None, it will default to the largest possible `chunk_size` i.e. ``dim_x``
+
     """
 
     _coordinates = "r"
@@ -85,6 +98,7 @@ def __init__(
         grid=None,
         helicity=(1, 0),
         name="Bootstrap current self-consistency (Redl)",
+        chunk_size=None,
     ):
         if target is None and bounds is None:
             target = 0
@@ -102,6 +116,7 @@ def __init__(
             loss_function=loss_function,
             deriv_mode=deriv_mode,
             name=name,
+            chunk_size=chunk_size,
         )
 
     def build(self, use_jit=True, verbose=1):
diff --git a/desc/objectives/_coils.py b/desc/objectives/_coils.py
index 62ef664622..ee947a1952 100644
--- a/desc/objectives/_coils.py
+++ b/desc/objectives/_coils.py
@@ -60,6 +60,19 @@ class _CoilObjective(_Objective):
         If a list, must have the same structure as coil.
     name : str, optional
         Name of the objective function.
+    chunk_size : int, optional
+        If `"blocked"` deriv_mode is used in the ObjectiveFunction, will
+        calculate the Jacobian for this objective ``chunk_size`` columns at a time,
+        instead of all at once.  The memory usage of the Jacobian calculation is
+        linearly proportional to ``chunk_size``: the smaller the ``chunk_size``, the
+        less memory the Jacobian calculation will require (with some baseline memory
+        usage). The time to compute the Jacobian is roughly ``t ~1/chunk_size``
+        with some baseline time, so the larger the ``chunk_size``, the faster the
+        calculation takes, at the cost of requiring more memory. A ``chunk_size``
+        of 1 corresponds to the least memory intensive,  but slowest method of
+        calculating the Jacobian.
+        If None, it will default to the largest possible `chunk_size` i.e. ``dim_x``
+
 
     """
 
@@ -76,6 +89,7 @@ def __init__(
         deriv_mode="auto",
         grid=None,
         name=None,
+        chunk_size=None,
     ):
         self._grid = grid
         self._data_keys = data_keys
@@ -90,6 +104,7 @@ def __init__(
             loss_function=loss_function,
             deriv_mode=deriv_mode,
             name=name,
+            chunk_size=chunk_size,
         )
 
     def build(self, use_jit=True, verbose=1):  # noqa:C901
@@ -249,6 +264,19 @@ class CoilLength(_CoilObjective):
         Defaults to ``LinearGrid(N=2 * coil.N + 5)``
     name : str, optional
         Name of the objective function.
+    chunk_size : int, optional
+        If `"blocked"` deriv_mode is used in the ObjectiveFunction, will
+        calculate the Jacobian for this objective ``chunk_size`` columns at a time,
+        instead of all at once.  The memory usage of the Jacobian calculation is
+        linearly proportional to ``chunk_size``: the smaller the ``chunk_size``, the
+        less memory the Jacobian calculation will require (with some baseline memory
+        usage). The time to compute the Jacobian is roughly ``t ~1/chunk_size``
+        with some baseline time, so the larger the ``chunk_size``, the faster the
+        calculation takes, at the cost of requiring more memory. A ``chunk_size``
+        of 1 corresponds to the least memory intensive,  but slowest method of
+        calculating the Jacobian.
+        If None, it will default to the largest possible `chunk_size` i.e. ``dim_x``
+
 
     """
 
@@ -268,6 +296,7 @@ def __init__(
         deriv_mode="auto",
         grid=None,
         name="coil length",
+        chunk_size=None,
     ):
         if target is None and bounds is None:
             target = 2 * np.pi
@@ -284,6 +313,7 @@ def __init__(
             deriv_mode=deriv_mode,
             grid=grid,
             name=name,
+            chunk_size=chunk_size,
         )
 
     def build(self, use_jit=True, verbose=1):
@@ -374,6 +404,19 @@ class CoilCurvature(_CoilObjective):
         Defaults to ``LinearGrid(N=2 * coil.N + 5)``
     name : str, optional
         Name of the objective function.
+    chunk_size : int, optional
+        If `"blocked"` deriv_mode is used in the ObjectiveFunction, will
+        calculate the Jacobian for this objective ``chunk_size`` columns at a time,
+        instead of all at once.  The memory usage of the Jacobian calculation is
+        linearly proportional to ``chunk_size``: the smaller the ``chunk_size``, the
+        less memory the Jacobian calculation will require (with some baseline memory
+        usage). The time to compute the Jacobian is roughly ``t ~1/chunk_size``
+        with some baseline time, so the larger the ``chunk_size``, the faster the
+        calculation takes, at the cost of requiring more memory. A ``chunk_size``
+        of 1 corresponds to the least memory intensive,  but slowest method of
+        calculating the Jacobian.
+        If None, it will default to the largest possible `chunk_size` i.e. ``dim_x``
+
 
     """
 
@@ -393,6 +436,7 @@ def __init__(
         deriv_mode="auto",
         grid=None,
         name="coil curvature",
+        chunk_size=None,
     ):
         if target is None and bounds is None:
             bounds = (0, 1)
@@ -409,6 +453,7 @@ def __init__(
             deriv_mode=deriv_mode,
             grid=grid,
             name=name,
+            chunk_size=chunk_size,
         )
 
     def build(self, use_jit=True, verbose=1):
@@ -494,6 +539,19 @@ class CoilTorsion(_CoilObjective):
         Defaults to ``LinearGrid(N=2 * coil.N + 5)``
     name : str, optional
         Name of the objective function.
+    chunk_size : int, optional
+        If `"blocked"` deriv_mode is used in the ObjectiveFunction, will
+        calculate the Jacobian for this objective ``chunk_size`` columns at a time,
+        instead of all at once.  The memory usage of the Jacobian calculation is
+        linearly proportional to ``chunk_size``: the smaller the ``chunk_size``, the
+        less memory the Jacobian calculation will require (with some baseline memory
+        usage). The time to compute the Jacobian is roughly ``t ~1/chunk_size``
+        with some baseline time, so the larger the ``chunk_size``, the faster the
+        calculation takes, at the cost of requiring more memory. A ``chunk_size``
+        of 1 corresponds to the least memory intensive,  but slowest method of
+        calculating the Jacobian.
+        If None, it will default to the largest possible `chunk_size` i.e. ``dim_x``
+
 
     """
 
@@ -513,6 +571,7 @@ def __init__(
         deriv_mode="auto",
         grid=None,
         name="coil torsion",
+        chunk_size=None,
     ):
         if target is None and bounds is None:
             target = 0
@@ -529,6 +588,7 @@ def __init__(
             deriv_mode=deriv_mode,
             grid=grid,
             name=name,
+            chunk_size=chunk_size,
         )
 
     def build(self, use_jit=True, verbose=1):
@@ -614,6 +674,19 @@ class CoilCurrentLength(CoilLength):
         Defaults to ``LinearGrid(N=2 * coil.N + 5)``
     name : str, optional
         Name of the objective function.
+    chunk_size : int, optional
+        If `"blocked"` deriv_mode is used in the ObjectiveFunction, will
+        calculate the Jacobian for this objective ``chunk_size`` columns at a time,
+        instead of all at once.  The memory usage of the Jacobian calculation is
+        linearly proportional to ``chunk_size``: the smaller the ``chunk_size``, the
+        less memory the Jacobian calculation will require (with some baseline memory
+        usage). The time to compute the Jacobian is roughly ``t ~1/chunk_size``
+        with some baseline time, so the larger the ``chunk_size``, the faster the
+        calculation takes, at the cost of requiring more memory. A ``chunk_size``
+        of 1 corresponds to the least memory intensive,  but slowest method of
+        calculating the Jacobian.
+        If None, it will default to the largest possible `chunk_size` i.e. ``dim_x``
+
 
     """
 
@@ -633,6 +706,7 @@ def __init__(
         deriv_mode="auto",
         grid=None,
         name="coil current length",
+        chunk_size=None,
     ):
         if target is None and bounds is None:
             target = 0
@@ -648,6 +722,7 @@ def __init__(
             deriv_mode=deriv_mode,
             grid=grid,
             name=name,
+            chunk_size=chunk_size,
         )
 
     def build(self, use_jit=True, verbose=1):
@@ -742,6 +817,19 @@ class CoilSetMinDistance(_Objective):
         If a list, must have the same structure as coils.
     name : str, optional
         Name of the objective function.
+    chunk_size : int, optional
+        If `"blocked"` deriv_mode is used in the ObjectiveFunction, will
+        calculate the Jacobian for this objective ``chunk_size`` columns at a time,
+        instead of all at once.  The memory usage of the Jacobian calculation is
+        linearly proportional to ``chunk_size``: the smaller the ``chunk_size``, the
+        less memory the Jacobian calculation will require (with some baseline memory
+        usage). The time to compute the Jacobian is roughly ``t ~1/chunk_size``
+        with some baseline time, so the larger the ``chunk_size``, the faster the
+        calculation takes, at the cost of requiring more memory. A ``chunk_size``
+        of 1 corresponds to the least memory intensive,  but slowest method of
+        calculating the Jacobian.
+        If None, it will default to the largest possible `chunk_size` i.e. ``dim_x``
+
 
     """
 
@@ -761,6 +849,7 @@ def __init__(
         deriv_mode="auto",
         grid=None,
         name="coil-coil minimum distance",
+        chunk_size=None,
     ):
         from desc.coils import CoilSet
 
@@ -782,6 +871,7 @@ def __init__(
             loss_function=loss_function,
             deriv_mode=deriv_mode,
             name=name,
+            chunk_size=chunk_size,
         )
 
     def build(self, use_jit=True, verbose=1):
@@ -916,6 +1006,19 @@ class PlasmaCoilSetMinDistance(_Objective):
         False by default, so that self.things = [coil, eq].
     name : str, optional
         Name of the objective function.
+    chunk_size : int, optional
+        If `"blocked"` deriv_mode is used in the ObjectiveFunction, will
+        calculate the Jacobian for this objective ``chunk_size`` columns at a time,
+        instead of all at once.  The memory usage of the Jacobian calculation is
+        linearly proportional to ``chunk_size``: the smaller the ``chunk_size``, the
+        less memory the Jacobian calculation will require (with some baseline memory
+        usage). The time to compute the Jacobian is roughly ``t ~1/chunk_size``
+        with some baseline time, so the larger the ``chunk_size``, the faster the
+        calculation takes, at the cost of requiring more memory. A ``chunk_size``
+        of 1 corresponds to the least memory intensive,  but slowest method of
+        calculating the Jacobian.
+        If None, it will default to the largest possible `chunk_size` i.e. ``dim_x``
+
 
     """
 
@@ -939,6 +1042,7 @@ def __init__(
         eq_fixed=False,
         coils_fixed=False,
         name="plasma-coil minimum distance",
+        chunk_size=None,
     ):
         if target is None and bounds is None:
             bounds = (1, np.inf)
@@ -964,6 +1068,7 @@ def __init__(
             loss_function=loss_function,
             deriv_mode=deriv_mode,
             name=name,
+            chunk_size=chunk_size,
         )
 
     def build(self, use_jit=True, verbose=1):
@@ -1146,6 +1251,18 @@ class QuadraticFlux(_Objective):
         plasma currents) is set to zero.
     name : str
         Name of the objective function.
+    chunk_size : int, optional
+        If `"blocked"` deriv_mode is used in the ObjectiveFunction, will
+        calculate the Jacobian for this objective ``chunk_size`` columns at a time,
+        instead of all at once.  The memory usage of the Jacobian calculation is
+        linearly proportional to ``chunk_size``: the smaller the ``chunk_size``, the
+        less memory the Jacobian calculation will require (with some baseline memory
+        usage). The time to compute the Jacobian is roughly ``t ~1/chunk_size``
+        with some baseline time, so the larger the ``chunk_size``, the faster the
+        calculation takes, at the cost of requiring more memory. A ``chunk_size``
+        of 1 corresponds to the least memory intensive,  but slowest method of
+        calculating the Jacobian.
+        If None, it will default to the largest possible `chunk_size` i.e. ``dim_x``
 
     """
 
@@ -1169,6 +1286,7 @@ def __init__(
         field_grid=None,
         vacuum=False,
         name="Quadratic flux",
+        chunk_size=None,
     ):
         if target is None and bounds is None:
             target = 0
@@ -1187,6 +1305,7 @@ def __init__(
             normalize=normalize,
             normalize_target=normalize_target,
             name=name,
+            chunk_size=chunk_size,
         )
 
     def build(self, use_jit=True, verbose=1):
@@ -1348,6 +1467,19 @@ class ToroidalFlux(_Objective):
         zeta=jnp.array(0.0), NFP=eq.NFP).
     name : str, optional
         Name of the objective function.
+    chunk_size : int, optional
+        If `"blocked"` deriv_mode is used in the ObjectiveFunction, will
+        calculate the Jacobian for this objective ``chunk_size`` columns at a time,
+        instead of all at once.  The memory usage of the Jacobian calculation is
+        linearly proportional to ``chunk_size``: the smaller the ``chunk_size``, the
+        less memory the Jacobian calculation will require (with some baseline memory
+        usage). The time to compute the Jacobian is roughly ``t ~1/chunk_size``
+        with some baseline time, so the larger the ``chunk_size``, the faster the
+        calculation takes, at the cost of requiring more memory. A ``chunk_size``
+        of 1 corresponds to the least memory intensive,  but slowest method of
+        calculating the Jacobian.
+        If None, it will default to the largest possible `chunk_size` i.e. ``dim_x``
+
 
     """
 
@@ -1369,6 +1501,7 @@ def __init__(
         field_grid=None,
         eval_grid=None,
         name="toroidal-flux",
+        chunk_size=None,
     ):
         if target is None and bounds is None:
             target = eq.Psi
@@ -1387,6 +1520,7 @@ def __init__(
             loss_function=loss_function,
             deriv_mode=deriv_mode,
             name=name,
+            chunk_size=chunk_size,
         )
 
     def build(self, use_jit=True, verbose=1):
diff --git a/desc/objectives/_equilibrium.py b/desc/objectives/_equilibrium.py
index 6bcb8a6c67..8ab17f029e 100644
--- a/desc/objectives/_equilibrium.py
+++ b/desc/objectives/_equilibrium.py
@@ -63,6 +63,18 @@ class ForceBalance(_Objective):
         Defaults to ``ConcentricGrid(eq.L_grid, eq.M_grid, eq.N_grid)``
     name : str, optional
         Name of the objective function.
+    chunk_size : int, optional
+        If `"blocked"` deriv_mode is used in the ObjectiveFunction, will
+        calculate the Jacobian for this objective ``chunk_size`` columns at a time,
+        instead of all at once.  The memory usage of the Jacobian calculation is
+        linearly proportional to ``chunk_size``: the smaller the ``chunk_size``, the
+        less memory the Jacobian calculation will require (with some baseline memory
+        usage). The time to compute the Jacobian is roughly ``t ~1/chunk_size``
+        with some baseline time, so the larger the ``chunk_size``, the faster the
+        calculation takes, at the cost of requiring more memory. A ``chunk_size``
+        of 1 corresponds to the least memory intensive,  but slowest method of
+        calculating the Jacobian.
+        If None, it will default to the largest possible `chunk_size` i.e. ``dim_x``
 
     """
 
@@ -83,7 +95,7 @@ def __init__(
         deriv_mode="auto",
         grid=None,
         name="force",
-        chunk_size=12,
+        chunk_size=None,
     ):
         if target is None and bounds is None:
             target = 0
@@ -237,6 +249,19 @@ class ForceBalanceAnisotropic(_Objective):
         Defaults to ``ConcentricGrid(eq.L_grid, eq.M_grid, eq.N_grid)``
     name : str
         Name of the objective function.
+    chunk_size : int, optional
+        If `"blocked"` deriv_mode is used in the ObjectiveFunction, will
+        calculate the Jacobian for this objective ``chunk_size`` columns at a time,
+        instead of all at once.  The memory usage of the Jacobian calculation is
+        linearly proportional to ``chunk_size``: the smaller the ``chunk_size``, the
+        less memory the Jacobian calculation will require (with some baseline memory
+        usage). The time to compute the Jacobian is roughly ``t ~1/chunk_size``
+        with some baseline time, so the larger the ``chunk_size``, the faster the
+        calculation takes, at the cost of requiring more memory. A ``chunk_size``
+        of 1 corresponds to the least memory intensive,  but slowest method of
+        calculating the Jacobian.
+        If None, it will default to the largest possible `chunk_size` i.e. ``dim_x``
+
 
     """
 
@@ -257,6 +282,7 @@ def __init__(
         deriv_mode="auto",
         grid=None,
         name="force-anisotropic",
+        chunk_size=None,
     ):
         if target is None and bounds is None:
             target = 0
@@ -271,6 +297,7 @@ def __init__(
             loss_function=loss_function,
             deriv_mode=deriv_mode,
             name=name,
+            chunk_size=chunk_size,
         )
 
     def build(self, use_jit=True, verbose=1):
@@ -395,6 +422,19 @@ class RadialForceBalance(_Objective):
         Defaults to ``ConcentricGrid(eq.L_grid, eq.M_grid, eq.N_grid)``
     name : str, optional
         Name of the objective function.
+    chunk_size : int, optional
+        If `"blocked"` deriv_mode is used in the ObjectiveFunction, will
+        calculate the Jacobian for this objective ``chunk_size`` columns at a time,
+        instead of all at once.  The memory usage of the Jacobian calculation is
+        linearly proportional to ``chunk_size``: the smaller the ``chunk_size``, the
+        less memory the Jacobian calculation will require (with some baseline memory
+        usage). The time to compute the Jacobian is roughly ``t ~1/chunk_size``
+        with some baseline time, so the larger the ``chunk_size``, the faster the
+        calculation takes, at the cost of requiring more memory. A ``chunk_size``
+        of 1 corresponds to the least memory intensive,  but slowest method of
+        calculating the Jacobian.
+        If None, it will default to the largest possible `chunk_size` i.e. ``dim_x``
+
 
     """
 
@@ -415,6 +455,7 @@ def __init__(
         deriv_mode="auto",
         grid=None,
         name="radial force",
+        chunk_size=None,
     ):
         if target is None and bounds is None:
             target = 0
@@ -429,6 +470,7 @@ def __init__(
             loss_function=loss_function,
             deriv_mode=deriv_mode,
             name=name,
+            chunk_size=chunk_size,
         )
 
     def build(self, use_jit=True, verbose=1):
@@ -553,6 +595,19 @@ class HelicalForceBalance(_Objective):
         Defaults to ``ConcentricGrid(eq.L_grid, eq.M_grid, eq.N_grid)``
     name : str, optional
         Name of the objective function.
+    chunk_size : int, optional
+        If `"blocked"` deriv_mode is used in the ObjectiveFunction, will
+        calculate the Jacobian for this objective ``chunk_size`` columns at a time,
+        instead of all at once.  The memory usage of the Jacobian calculation is
+        linearly proportional to ``chunk_size``: the smaller the ``chunk_size``, the
+        less memory the Jacobian calculation will require (with some baseline memory
+        usage). The time to compute the Jacobian is roughly ``t ~1/chunk_size``
+        with some baseline time, so the larger the ``chunk_size``, the faster the
+        calculation takes, at the cost of requiring more memory. A ``chunk_size``
+        of 1 corresponds to the least memory intensive,  but slowest method of
+        calculating the Jacobian.
+        If None, it will default to the largest possible `chunk_size` i.e. ``dim_x``
+
 
     """
 
@@ -573,6 +628,7 @@ def __init__(
         deriv_mode="auto",
         grid=None,
         name="helical force",
+        chunk_size=None,
     ):
         if target is None and bounds is None:
             target = 0
@@ -587,6 +643,7 @@ def __init__(
             loss_function=loss_function,
             deriv_mode=deriv_mode,
             name=name,
+            chunk_size=chunk_size,
         )
 
     def build(self, use_jit=True, verbose=1):
@@ -709,6 +766,19 @@ class Energy(_Objective):
         Adiabatic (compressional) index. Default = 0.
     name : str, optional
         Name of the objective function.
+    chunk_size : int, optional
+        If `"blocked"` deriv_mode is used in the ObjectiveFunction, will
+        calculate the Jacobian for this objective ``chunk_size`` columns at a time,
+        instead of all at once.  The memory usage of the Jacobian calculation is
+        linearly proportional to ``chunk_size``: the smaller the ``chunk_size``, the
+        less memory the Jacobian calculation will require (with some baseline memory
+        usage). The time to compute the Jacobian is roughly ``t ~1/chunk_size``
+        with some baseline time, so the larger the ``chunk_size``, the faster the
+        calculation takes, at the cost of requiring more memory. A ``chunk_size``
+        of 1 corresponds to the least memory intensive,  but slowest method of
+        calculating the Jacobian.
+        If None, it will default to the largest possible `chunk_size` i.e. ``dim_x``
+
 
     """
 
@@ -732,6 +802,7 @@ def __init__(
         grid=None,
         gamma=0,
         name="energy",
+        chunk_size=None,
     ):
         if target is None and bounds is None:
             target = 0
@@ -747,6 +818,7 @@ def __init__(
             loss_function=loss_function,
             deriv_mode=deriv_mode,
             name=name,
+            chunk_size=chunk_size,
         )
 
     def build(self, use_jit=True, verbose=1):
@@ -876,6 +948,19 @@ class CurrentDensity(_Objective):
         Defaults to ``ConcentricGrid(eq.L_grid, eq.M_grid, eq.N_grid)``
     name : str, optional
         Name of the objective function.
+    chunk_size : int, optional
+        If `"blocked"` deriv_mode is used in the ObjectiveFunction, will
+        calculate the Jacobian for this objective ``chunk_size`` columns at a time,
+        instead of all at once.  The memory usage of the Jacobian calculation is
+        linearly proportional to ``chunk_size``: the smaller the ``chunk_size``, the
+        less memory the Jacobian calculation will require (with some baseline memory
+        usage). The time to compute the Jacobian is roughly ``t ~1/chunk_size``
+        with some baseline time, so the larger the ``chunk_size``, the faster the
+        calculation takes, at the cost of requiring more memory. A ``chunk_size``
+        of 1 corresponds to the least memory intensive,  but slowest method of
+        calculating the Jacobian.
+        If None, it will default to the largest possible `chunk_size` i.e. ``dim_x``
+
 
     """
 
@@ -896,6 +981,7 @@ def __init__(
         deriv_mode="auto",
         grid=None,
         name="current density",
+        chunk_size=None,
     ):
         if target is None and bounds is None:
             target = 0
@@ -910,6 +996,7 @@ def __init__(
             loss_function=loss_function,
             deriv_mode=deriv_mode,
             name=name,
+            chunk_size=chunk_size,
         )
 
     def build(self, use_jit=True, verbose=1):
diff --git a/desc/objectives/_free_boundary.py b/desc/objectives/_free_boundary.py
index 3093f40f1e..ccfe81a957 100644
--- a/desc/objectives/_free_boundary.py
+++ b/desc/objectives/_free_boundary.py
@@ -75,8 +75,21 @@ class VacuumBoundaryError(_Objective):
     field_fixed : bool
         Whether to assume the field is fixed. For free boundary solve, should
         be fixed. For single stage optimization, should be False (default).
-    name : str
+    name : str, optional
         Name of the objective function.
+    chunk_size : int, optional
+        If `"blocked"` deriv_mode is used in the ObjectiveFunction, will
+        calculate the Jacobian for this objective ``chunk_size`` columns at a time,
+        instead of all at once.  The memory usage of the Jacobian calculation is
+        linearly proportional to ``chunk_size``: the smaller the ``chunk_size``, the
+        less memory the Jacobian calculation will require (with some baseline memory
+        usage). The time to compute the Jacobian is roughly ``t ~1/chunk_size``
+        with some baseline time, so the larger the ``chunk_size``, the faster the
+        calculation takes, at the cost of requiring more memory. A ``chunk_size``
+        of 1 corresponds to the least memory intensive,  but slowest method of
+        calculating the Jacobian.
+        If None, it will default to the largest possible `chunk_size` i.e. ``dim_x``
+
 
     """
 
@@ -101,6 +114,7 @@ def __init__(
         field_grid=None,
         field_fixed=False,
         name="Vacuum boundary error",
+        chunk_size=None,
     ):
         if target is None and bounds is None:
             target = 0
@@ -123,6 +137,7 @@ def __init__(
             loss_function=loss_function,
             deriv_mode=deriv_mode,
             name=name,
+            chunk_size=chunk_size,
         )
 
     def build(self, use_jit=True, verbose=1):
@@ -390,8 +405,21 @@ class BoundaryError(_Objective):
     loop : bool
         If True, evaluate integral using loops, as opposed to vmap. Slower, but uses
         less memory.
-    name : str
+    name : str, optional
         Name of the objective function.
+    chunk_size : int, optional
+        If `"blocked"` deriv_mode is used in the ObjectiveFunction, will
+        calculate the Jacobian for this objective ``chunk_size`` columns at a time,
+        instead of all at once.  The memory usage of the Jacobian calculation is
+        linearly proportional to ``chunk_size``: the smaller the ``chunk_size``, the
+        less memory the Jacobian calculation will require (with some baseline memory
+        usage). The time to compute the Jacobian is roughly ``t ~1/chunk_size``
+        with some baseline time, so the larger the ``chunk_size``, the faster the
+        calculation takes, at the cost of requiring more memory. A ``chunk_size``
+        of 1 corresponds to the least memory intensive,  but slowest method of
+        calculating the Jacobian.
+        If None, it will default to the largest possible `chunk_size` i.e. ``dim_x``
+
 
 
     Examples
@@ -437,6 +465,7 @@ def __init__(
         field_fixed=False,
         loop=True,
         name="Boundary error",
+        chunk_size=None,
     ):
         if target is None and bounds is None:
             target = 0
@@ -463,6 +492,7 @@ def __init__(
             loss_function=loss_function,
             deriv_mode=deriv_mode,
             name=name,
+            chunk_size=chunk_size,
         )
 
     def build(self, use_jit=True, verbose=1):
@@ -825,8 +855,21 @@ class BoundaryErrorNESTOR(_Objective):
         "auto" selects forward or reverse mode based on the size of the input and output
         of the objective. Has no effect on self.grad or self.hess which always use
         reverse mode and forward over reverse mode respectively.
-    name : str
+    name : str, optional
         Name of the objective function.
+    chunk_size : int, optional
+        If `"blocked"` deriv_mode is used in the ObjectiveFunction, will
+        calculate the Jacobian for this objective ``chunk_size`` columns at a time,
+        instead of all at once.  The memory usage of the Jacobian calculation is
+        linearly proportional to ``chunk_size``: the smaller the ``chunk_size``, the
+        less memory the Jacobian calculation will require (with some baseline memory
+        usage). The time to compute the Jacobian is roughly ``t ~1/chunk_size``
+        with some baseline time, so the larger the ``chunk_size``, the faster the
+        calculation takes, at the cost of requiring more memory. A ``chunk_size``
+        of 1 corresponds to the least memory intensive,  but slowest method of
+        calculating the Jacobian.
+        If None, it will default to the largest possible `chunk_size` i.e. ``dim_x``
+
 
     """
 
@@ -853,6 +896,7 @@ def __init__(
         loss_function=None,
         deriv_mode="auto",
         name="NESTOR Boundary",
+        chunk_size=None,
     ):
         if target is None and bounds is None:
             target = 0
@@ -872,6 +916,7 @@ def __init__(
             loss_function=loss_function,
             deriv_mode=deriv_mode,
             name=name,
+            chunk_size=chunk_size,
         )
 
     def build(self, use_jit=True, verbose=1):
diff --git a/desc/objectives/_generic.py b/desc/objectives/_generic.py
index 84c7b10117..d0be8f2f48 100644
--- a/desc/objectives/_generic.py
+++ b/desc/objectives/_generic.py
@@ -57,6 +57,19 @@ class GenericObjective(_Objective):
         ``QuadratureGrid(eq.L_grid, eq.M_grid, eq.N_grid)`` if thing is an Equilibrium.
     name : str, optional
         Name of the objective function.
+    chunk_size : int, optional
+        If `"blocked"` deriv_mode is used in the ObjectiveFunction, will
+        calculate the Jacobian for this objective ``chunk_size`` columns at a time,
+        instead of all at once.  The memory usage of the Jacobian calculation is
+        linearly proportional to ``chunk_size``: the smaller the ``chunk_size``, the
+        less memory the Jacobian calculation will require (with some baseline memory
+        usage). The time to compute the Jacobian is roughly ``t ~1/chunk_size``
+        with some baseline time, so the larger the ``chunk_size``, the faster the
+        calculation takes, at the cost of requiring more memory. A ``chunk_size``
+        of 1 corresponds to the least memory intensive,  but slowest method of
+        calculating the Jacobian.
+        If None, it will default to the largest possible `chunk_size` i.e. ``dim_x``
+
 
     """
 
@@ -75,6 +88,7 @@ def __init__(
         deriv_mode="auto",
         grid=None,
         name="generic",
+        chunk_size=None,
         **kwargs,
     ):
         errorif(
@@ -97,6 +111,7 @@ def __init__(
             loss_function=loss_function,
             deriv_mode=deriv_mode,
             name=name,
+            chunk_size=chunk_size,
         )
         self._p = _parse_parameterization(thing)
         self._scalar = not bool(data_index[self._p][self.f]["dim"])
@@ -225,6 +240,7 @@ def __init__(
         normalize=False,
         normalize_target=False,
         name="custom linear",
+        chunk_size=None,
     ):
         if target is None and bounds is None:
             target = 0
@@ -237,6 +253,7 @@ def __init__(
             normalize=normalize,
             normalize_target=normalize_target,
             name=name,
+            chunk_size=chunk_size,
         )
 
     def build(self, use_jit=False, verbose=1):
@@ -342,6 +359,19 @@ class ObjectiveFromUser(_Objective):
         ``QuadratureGrid(eq.L_grid, eq.M_grid, eq.N_grid)`` if thing is an Equilibrium.
     name : str, optional
         Name of the objective function.
+    chunk_size : int, optional
+        If `"blocked"` deriv_mode is used in the ObjectiveFunction, will
+        calculate the Jacobian for this objective ``chunk_size`` columns at a time,
+        instead of all at once.  The memory usage of the Jacobian calculation is
+        linearly proportional to ``chunk_size``: the smaller the ``chunk_size``, the
+        less memory the Jacobian calculation will require (with some baseline memory
+        usage). The time to compute the Jacobian is roughly ``t ~1/chunk_size``
+        with some baseline time, so the larger the ``chunk_size``, the faster the
+        calculation takes, at the cost of requiring more memory. A ``chunk_size``
+        of 1 corresponds to the least memory intensive,  but slowest method of
+        calculating the Jacobian.
+        If None, it will default to the largest possible `chunk_size` i.e. ``dim_x``
+
 
     Examples
     --------
@@ -378,6 +408,7 @@ def __init__(
         deriv_mode="auto",
         grid=None,
         name="custom",
+        chunk_size=None,
         **kwargs,
     ):
         errorif(
@@ -400,6 +431,7 @@ def __init__(
             loss_function=loss_function,
             deriv_mode=deriv_mode,
             name=name,
+            chunk_size=chunk_size,
         )
         self._p = _parse_parameterization(thing)
 
diff --git a/desc/objectives/_geometry.py b/desc/objectives/_geometry.py
index f72e9f7605..c646985e2a 100644
--- a/desc/objectives/_geometry.py
+++ b/desc/objectives/_geometry.py
@@ -54,6 +54,19 @@ class AspectRatio(_Objective):
         or ``LinearGrid(M=2*eq.M, N=2*eq.N)`` for ``FourierRZToroidalSurface``.
     name : str, optional
         Name of the objective function.
+    chunk_size : int, optional
+        If `"blocked"` deriv_mode is used in the ObjectiveFunction, will
+        calculate the Jacobian for this objective ``chunk_size`` columns at a time,
+        instead of all at once.  The memory usage of the Jacobian calculation is
+        linearly proportional to ``chunk_size``: the smaller the ``chunk_size``, the
+        less memory the Jacobian calculation will require (with some baseline memory
+        usage). The time to compute the Jacobian is roughly ``t ~1/chunk_size``
+        with some baseline time, so the larger the ``chunk_size``, the faster the
+        calculation takes, at the cost of requiring more memory. A ``chunk_size``
+        of 1 corresponds to the least memory intensive,  but slowest method of
+        calculating the Jacobian.
+        If None, it will default to the largest possible `chunk_size` i.e. ``dim_x``
+
 
     """
 
@@ -73,6 +86,7 @@ def __init__(
         deriv_mode="auto",
         grid=None,
         name="aspect ratio",
+        chunk_size=None,
     ):
         if target is None and bounds is None:
             target = 2
@@ -87,6 +101,7 @@ def __init__(
             loss_function=loss_function,
             deriv_mode=deriv_mode,
             name=name,
+            chunk_size=chunk_size,
         )
 
     def build(self, use_jit=True, verbose=1):
@@ -216,6 +231,19 @@ class Elongation(_Objective):
         or ``LinearGrid(M=2*eq.M, N=2*eq.N)`` for ``FourierRZToroidalSurface``.
     name : str, optional
         Name of the objective function.
+    chunk_size : int, optional
+        If `"blocked"` deriv_mode is used in the ObjectiveFunction, will
+        calculate the Jacobian for this objective ``chunk_size`` columns at a time,
+        instead of all at once.  The memory usage of the Jacobian calculation is
+        linearly proportional to ``chunk_size``: the smaller the ``chunk_size``, the
+        less memory the Jacobian calculation will require (with some baseline memory
+        usage). The time to compute the Jacobian is roughly ``t ~1/chunk_size``
+        with some baseline time, so the larger the ``chunk_size``, the faster the
+        calculation takes, at the cost of requiring more memory. A ``chunk_size``
+        of 1 corresponds to the least memory intensive,  but slowest method of
+        calculating the Jacobian.
+        If None, it will default to the largest possible `chunk_size` i.e. ``dim_x``
+
 
     """
 
@@ -235,6 +263,7 @@ def __init__(
         deriv_mode="auto",
         grid=None,
         name="elongation",
+        chunk_size=None,
     ):
         if target is None and bounds is None:
             target = 1
@@ -249,6 +278,7 @@ def __init__(
             loss_function=loss_function,
             deriv_mode=deriv_mode,
             name=name,
+            chunk_size=chunk_size,
         )
 
     def build(self, use_jit=True, verbose=1):
@@ -377,6 +407,19 @@ class Volume(_Objective):
         or ``LinearGrid(M=2*eq.M, N=2*eq.N)`` for ``FourierRZToroidalSurface``.
     name : str, optional
         Name of the objective function.
+    chunk_size : int, optional
+        If `"blocked"` deriv_mode is used in the ObjectiveFunction, will
+        calculate the Jacobian for this objective ``chunk_size`` columns at a time,
+        instead of all at once.  The memory usage of the Jacobian calculation is
+        linearly proportional to ``chunk_size``: the smaller the ``chunk_size``, the
+        less memory the Jacobian calculation will require (with some baseline memory
+        usage). The time to compute the Jacobian is roughly ``t ~1/chunk_size``
+        with some baseline time, so the larger the ``chunk_size``, the faster the
+        calculation takes, at the cost of requiring more memory. A ``chunk_size``
+        of 1 corresponds to the least memory intensive,  but slowest method of
+        calculating the Jacobian.
+        If None, it will default to the largest possible `chunk_size` i.e. ``dim_x``
+
 
     """
 
@@ -396,6 +439,7 @@ def __init__(
         deriv_mode="auto",
         grid=None,
         name="volume",
+        chunk_size=None,
     ):
         if target is None and bounds is None:
             target = 1
@@ -410,6 +454,7 @@ def __init__(
             loss_function=loss_function,
             deriv_mode=deriv_mode,
             name=name,
+            chunk_size=chunk_size,
         )
 
     def build(self, use_jit=True, verbose=1):
@@ -587,6 +632,19 @@ class PlasmaVesselDistance(_Objective):
         more accurate approximation of the true min.
     name : str, optional
         Name of the objective function.
+    chunk_size : int, optional
+        If `"blocked"` deriv_mode is used in the ObjectiveFunction, will
+        calculate the Jacobian for this objective ``chunk_size`` columns at a time,
+        instead of all at once.  The memory usage of the Jacobian calculation is
+        linearly proportional to ``chunk_size``: the smaller the ``chunk_size``, the
+        less memory the Jacobian calculation will require (with some baseline memory
+        usage). The time to compute the Jacobian is roughly ``t ~1/chunk_size``
+        with some baseline time, so the larger the ``chunk_size``, the faster the
+        calculation takes, at the cost of requiring more memory. A ``chunk_size``
+        of 1 corresponds to the least memory intensive,  but slowest method of
+        calculating the Jacobian.
+        If None, it will default to the largest possible `chunk_size` i.e. ``dim_x``
+
     """
 
     _coordinates = "rtz"
@@ -612,6 +670,7 @@ def __init__(
         softmin_alpha=1.0,
         name="plasma-vessel distance",
         use_signed_distance=False,
+        chunk_size=None,
         **kwargs,
     ):
         if target is None and bounds is None:
@@ -651,6 +710,7 @@ def __init__(
             loss_function=loss_function,
             deriv_mode=deriv_mode,
             name=name,
+            chunk_size=chunk_size,
         )
 
     def build(self, use_jit=True, verbose=1):
@@ -925,6 +985,19 @@ class MeanCurvature(_Objective):
         or ``LinearGrid(M=2*eq.M, N=2*eq.N)`` for ``FourierRZToroidalSurface``.
     name : str, optional
         Name of the objective function.
+    chunk_size : int, optional
+        If `"blocked"` deriv_mode is used in the ObjectiveFunction, will
+        calculate the Jacobian for this objective ``chunk_size`` columns at a time,
+        instead of all at once.  The memory usage of the Jacobian calculation is
+        linearly proportional to ``chunk_size``: the smaller the ``chunk_size``, the
+        less memory the Jacobian calculation will require (with some baseline memory
+        usage). The time to compute the Jacobian is roughly ``t ~1/chunk_size``
+        with some baseline time, so the larger the ``chunk_size``, the faster the
+        calculation takes, at the cost of requiring more memory. A ``chunk_size``
+        of 1 corresponds to the least memory intensive,  but slowest method of
+        calculating the Jacobian.
+        If None, it will default to the largest possible `chunk_size` i.e. ``dim_x``
+
 
     """
 
@@ -944,6 +1017,7 @@ def __init__(
         deriv_mode="auto",
         grid=None,
         name="mean curvature",
+        chunk_size=None,
     ):
         if target is None and bounds is None:
             bounds = (-np.inf, 0)
@@ -958,6 +1032,7 @@ def __init__(
             loss_function=loss_function,
             deriv_mode=deriv_mode,
             name=name,
+            chunk_size=chunk_size,
         )
 
     def build(self, use_jit=True, verbose=1):
@@ -1085,6 +1160,19 @@ class PrincipalCurvature(_Objective):
         or ``LinearGrid(M=2*eq.M, N=2*eq.N)`` for ``FourierRZToroidalSurface``.
     name : str, optional
         Name of the objective function.
+    chunk_size : int, optional
+        If `"blocked"` deriv_mode is used in the ObjectiveFunction, will
+        calculate the Jacobian for this objective ``chunk_size`` columns at a time,
+        instead of all at once.  The memory usage of the Jacobian calculation is
+        linearly proportional to ``chunk_size``: the smaller the ``chunk_size``, the
+        less memory the Jacobian calculation will require (with some baseline memory
+        usage). The time to compute the Jacobian is roughly ``t ~1/chunk_size``
+        with some baseline time, so the larger the ``chunk_size``, the faster the
+        calculation takes, at the cost of requiring more memory. A ``chunk_size``
+        of 1 corresponds to the least memory intensive,  but slowest method of
+        calculating the Jacobian.
+        If None, it will default to the largest possible `chunk_size` i.e. ``dim_x``
+
 
     """
 
@@ -1104,6 +1192,7 @@ def __init__(
         deriv_mode="auto",
         grid=None,
         name="principal-curvature",
+        chunk_size=None,
     ):
         if target is None and bounds is None:
             target = 1
@@ -1118,6 +1207,7 @@ def __init__(
             loss_function=loss_function,
             deriv_mode=deriv_mode,
             name=name,
+            chunk_size=chunk_size,
         )
 
     def build(self, use_jit=True, verbose=1):
@@ -1240,6 +1330,19 @@ class BScaleLength(_Objective):
         ``LinearGrid(M=eq.M_grid, N=eq.N_grid)``.
     name : str, optional
         Name of the objective function.
+    chunk_size : int, optional
+        If `"blocked"` deriv_mode is used in the ObjectiveFunction, will
+        calculate the Jacobian for this objective ``chunk_size`` columns at a time,
+        instead of all at once.  The memory usage of the Jacobian calculation is
+        linearly proportional to ``chunk_size``: the smaller the ``chunk_size``, the
+        less memory the Jacobian calculation will require (with some baseline memory
+        usage). The time to compute the Jacobian is roughly ``t ~1/chunk_size``
+        with some baseline time, so the larger the ``chunk_size``, the faster the
+        calculation takes, at the cost of requiring more memory. A ``chunk_size``
+        of 1 corresponds to the least memory intensive,  but slowest method of
+        calculating the Jacobian.
+        If None, it will default to the largest possible `chunk_size` i.e. ``dim_x``
+
 
     """
 
@@ -1259,6 +1362,7 @@ def __init__(
         deriv_mode="auto",
         grid=None,
         name="B-scale-length",
+        chunk_size=None,
     ):
         if target is None and bounds is None:
             bounds = (1, np.inf)
@@ -1273,6 +1377,7 @@ def __init__(
             loss_function=loss_function,
             deriv_mode=deriv_mode,
             name=name,
+            chunk_size=chunk_size,
         )
 
     def build(self, use_jit=True, verbose=1):
@@ -1391,6 +1496,19 @@ class GoodCoordinates(_Objective):
         Collocation grid containing the nodes to evaluate at.
     name : str, optional
         Name of the objective function.
+    chunk_size : int, optional
+        If `"blocked"` deriv_mode is used in the ObjectiveFunction, will
+        calculate the Jacobian for this objective ``chunk_size`` columns at a time,
+        instead of all at once.  The memory usage of the Jacobian calculation is
+        linearly proportional to ``chunk_size``: the smaller the ``chunk_size``, the
+        less memory the Jacobian calculation will require (with some baseline memory
+        usage). The time to compute the Jacobian is roughly ``t ~1/chunk_size``
+        with some baseline time, so the larger the ``chunk_size``, the faster the
+        calculation takes, at the cost of requiring more memory. A ``chunk_size``
+        of 1 corresponds to the least memory intensive,  but slowest method of
+        calculating the Jacobian.
+        If None, it will default to the largest possible `chunk_size` i.e. ``dim_x``
+
 
     """
 
@@ -1411,6 +1529,7 @@ def __init__(
         deriv_mode="auto",
         grid=None,
         name="coordinate goodness",
+        chunk_size=None,
     ):
         if target is None and bounds is None:
             target = 0
@@ -1426,6 +1545,7 @@ def __init__(
             loss_function=loss_function,
             deriv_mode=deriv_mode,
             name=name,
+            chunk_size=chunk_size,
         )
 
     def build(self, use_jit=True, verbose=1):
diff --git a/desc/objectives/_omnigenity.py b/desc/objectives/_omnigenity.py
index 8653d4a770..1ffcbd9af7 100644
--- a/desc/objectives/_omnigenity.py
+++ b/desc/objectives/_omnigenity.py
@@ -57,6 +57,19 @@ class QuasisymmetryBoozer(_Objective):
         Toroidal resolution of Boozer transformation. Default = 2 * eq.N.
     name : str, optional
         Name of the objective function.
+    chunk_size : int, optional
+        If `"blocked"` deriv_mode is used in the ObjectiveFunction, will
+        calculate the Jacobian for this objective ``chunk_size`` columns at a time,
+        instead of all at once.  The memory usage of the Jacobian calculation is
+        linearly proportional to ``chunk_size``: the smaller the ``chunk_size``, the
+        less memory the Jacobian calculation will require (with some baseline memory
+        usage). The time to compute the Jacobian is roughly ``t ~1/chunk_size``
+        with some baseline time, so the larger the ``chunk_size``, the faster the
+        calculation takes, at the cost of requiring more memory. A ``chunk_size``
+        of 1 corresponds to the least memory intensive,  but slowest method of
+        calculating the Jacobian.
+        If None, it will default to the largest possible `chunk_size` i.e. ``dim_x``
+
 
     """
 
@@ -78,6 +91,7 @@ def __init__(
         M_booz=None,
         N_booz=None,
         name="QS Boozer",
+        chunk_size=None,
     ):
         if target is None and bounds is None:
             target = 0
@@ -95,6 +109,7 @@ def __init__(
             loss_function=loss_function,
             deriv_mode=deriv_mode,
             name=name,
+            chunk_size=chunk_size,
         )
 
         self._print_value_fmt = (
@@ -279,6 +294,19 @@ class QuasisymmetryTwoTerm(_Objective):
         Type of quasi-symmetry (M, N).
     name : str, optional
         Name of the objective function.
+    chunk_size : int, optional
+        If `"blocked"` deriv_mode is used in the ObjectiveFunction, will
+        calculate the Jacobian for this objective ``chunk_size`` columns at a time,
+        instead of all at once.  The memory usage of the Jacobian calculation is
+        linearly proportional to ``chunk_size``: the smaller the ``chunk_size``, the
+        less memory the Jacobian calculation will require (with some baseline memory
+        usage). The time to compute the Jacobian is roughly ``t ~1/chunk_size``
+        with some baseline time, so the larger the ``chunk_size``, the faster the
+        calculation takes, at the cost of requiring more memory. A ``chunk_size``
+        of 1 corresponds to the least memory intensive,  but slowest method of
+        calculating the Jacobian.
+        If None, it will default to the largest possible `chunk_size` i.e. ``dim_x``
+
 
     """
 
@@ -299,6 +327,7 @@ def __init__(
         grid=None,
         helicity=(1, 0),
         name="QS two-term",
+        chunk_size=None,
     ):
         if target is None and bounds is None:
             target = 0
@@ -314,6 +343,7 @@ def __init__(
             loss_function=loss_function,
             deriv_mode=deriv_mode,
             name=name,
+            chunk_size=chunk_size,
         )
 
         self._print_value_fmt = (
@@ -471,6 +501,19 @@ class QuasisymmetryTripleProduct(_Objective):
         Defaults to ``LinearGrid(M=eq.M_grid, N=eq.N_grid)``.
     name : str, optional
         Name of the objective function.
+    chunk_size : int, optional
+        If `"blocked"` deriv_mode is used in the ObjectiveFunction, will
+        calculate the Jacobian for this objective ``chunk_size`` columns at a time,
+        instead of all at once.  The memory usage of the Jacobian calculation is
+        linearly proportional to ``chunk_size``: the smaller the ``chunk_size``, the
+        less memory the Jacobian calculation will require (with some baseline memory
+        usage). The time to compute the Jacobian is roughly ``t ~1/chunk_size``
+        with some baseline time, so the larger the ``chunk_size``, the faster the
+        calculation takes, at the cost of requiring more memory. A ``chunk_size``
+        of 1 corresponds to the least memory intensive,  but slowest method of
+        calculating the Jacobian.
+        If None, it will default to the largest possible `chunk_size` i.e. ``dim_x``
+
 
     """
 
@@ -490,6 +533,7 @@ def __init__(
         deriv_mode="auto",
         grid=None,
         name="QS triple product",
+        chunk_size=None,
     ):
         if target is None and bounds is None:
             target = 0
@@ -504,6 +548,7 @@ def __init__(
             loss_function=loss_function,
             deriv_mode=deriv_mode,
             name=name,
+            chunk_size=chunk_size,
         )
 
     def build(self, use_jit=True, verbose=1):
@@ -647,8 +692,21 @@ class Omnigenity(_Objective):
         computation time during optimization and self.things = [eq] only.
         If False, the field is allowed to change during the optimization and its
         associated data are re-computed at every iteration (Default).
-    name : str
+    name : str, optional
         Name of the objective function.
+    chunk_size : int, optional
+        If `"blocked"` deriv_mode is used in the ObjectiveFunction, will
+        calculate the Jacobian for this objective ``chunk_size`` columns at a time,
+        instead of all at once.  The memory usage of the Jacobian calculation is
+        linearly proportional to ``chunk_size``: the smaller the ``chunk_size``, the
+        less memory the Jacobian calculation will require (with some baseline memory
+        usage). The time to compute the Jacobian is roughly ``t ~1/chunk_size``
+        with some baseline time, so the larger the ``chunk_size``, the faster the
+        calculation takes, at the cost of requiring more memory. A ``chunk_size``
+        of 1 corresponds to the least memory intensive,  but slowest method of
+        calculating the Jacobian.
+        If None, it will default to the largest possible `chunk_size` i.e. ``dim_x``
+
 
     """
 
@@ -675,6 +733,7 @@ def __init__(
         eq_fixed=False,
         field_fixed=False,
         name="omnigenity",
+        chunk_size=None,
     ):
         if target is None and bounds is None:
             target = 0
@@ -706,6 +765,7 @@ def __init__(
             loss_function=loss_function,
             deriv_mode=deriv_mode,
             name=name,
+            chunk_size=chunk_size,
         )
 
     def build(self, use_jit=True, verbose=1):
@@ -984,6 +1044,19 @@ class Isodynamicity(_Objective):
         Defaults to ``LinearGrid(M=eq.M_grid, N=eq.N_grid)``.
     name : str, optional
         Name of the objective function.
+    chunk_size : int, optional
+        If `"blocked"` deriv_mode is used in the ObjectiveFunction, will
+        calculate the Jacobian for this objective ``chunk_size`` columns at a time,
+        instead of all at once.  The memory usage of the Jacobian calculation is
+        linearly proportional to ``chunk_size``: the smaller the ``chunk_size``, the
+        less memory the Jacobian calculation will require (with some baseline memory
+        usage). The time to compute the Jacobian is roughly ``t ~1/chunk_size``
+        with some baseline time, so the larger the ``chunk_size``, the faster the
+        calculation takes, at the cost of requiring more memory. A ``chunk_size``
+        of 1 corresponds to the least memory intensive,  but slowest method of
+        calculating the Jacobian.
+        If None, it will default to the largest possible `chunk_size` i.e. ``dim_x``
+
 
     """
 
@@ -1003,6 +1076,7 @@ def __init__(
         deriv_mode="auto",
         grid=None,
         name="Isodynamicity",
+        chunk_size=None,
     ):
         if target is None and bounds is None:
             target = 0
@@ -1017,6 +1091,7 @@ def __init__(
             loss_function=loss_function,
             deriv_mode=deriv_mode,
             name=name,
+            chunk_size=chunk_size,
         )
 
     def build(self, use_jit=True, verbose=1):
diff --git a/desc/objectives/_profiles.py b/desc/objectives/_profiles.py
index 34dc9ee29d..3d714a8a00 100644
--- a/desc/objectives/_profiles.py
+++ b/desc/objectives/_profiles.py
@@ -53,6 +53,19 @@ class Pressure(_Objective):
         Defaults to ``LinearGrid(L=eq.L_grid)``.
     name : str, optional
         Name of the objective function.
+    chunk_size : int, optional
+        If `"blocked"` deriv_mode is used in the ObjectiveFunction, will
+        calculate the Jacobian for this objective ``chunk_size`` columns at a time,
+        instead of all at once.  The memory usage of the Jacobian calculation is
+        linearly proportional to ``chunk_size``: the smaller the ``chunk_size``, the
+        less memory the Jacobian calculation will require (with some baseline memory
+        usage). The time to compute the Jacobian is roughly ``t ~1/chunk_size``
+        with some baseline time, so the larger the ``chunk_size``, the faster the
+        calculation takes, at the cost of requiring more memory. A ``chunk_size``
+        of 1 corresponds to the least memory intensive,  but slowest method of
+        calculating the Jacobian.
+        If None, it will default to the largest possible `chunk_size` i.e. ``dim_x``
+
 
     """
 
@@ -72,6 +85,7 @@ def __init__(
         deriv_mode="auto",
         grid=None,
         name="pressure",
+        chunk_size=None,
     ):
         if target is None and bounds is None:
             target = 0
@@ -86,6 +100,7 @@ def __init__(
             loss_function=loss_function,
             deriv_mode=deriv_mode,
             name=name,
+            chunk_size=chunk_size,
         )
 
     def build(self, use_jit=True, verbose=1):
@@ -211,6 +226,19 @@ class RotationalTransform(_Objective):
         are required.
     name : str, optional
         Name of the objective function.
+    chunk_size : int, optional
+        If `"blocked"` deriv_mode is used in the ObjectiveFunction, will
+        calculate the Jacobian for this objective ``chunk_size`` columns at a time,
+        instead of all at once.  The memory usage of the Jacobian calculation is
+        linearly proportional to ``chunk_size``: the smaller the ``chunk_size``, the
+        less memory the Jacobian calculation will require (with some baseline memory
+        usage). The time to compute the Jacobian is roughly ``t ~1/chunk_size``
+        with some baseline time, so the larger the ``chunk_size``, the faster the
+        calculation takes, at the cost of requiring more memory. A ``chunk_size``
+        of 1 corresponds to the least memory intensive,  but slowest method of
+        calculating the Jacobian.
+        If None, it will default to the largest possible `chunk_size` i.e. ``dim_x``
+
 
     """
 
@@ -230,6 +258,7 @@ def __init__(
         deriv_mode="auto",
         grid=None,
         name="rotational transform",
+        chunk_size=None,
     ):
         if target is None and bounds is None:
             target = 0
@@ -244,6 +273,7 @@ def __init__(
             loss_function=loss_function,
             deriv_mode=deriv_mode,
             name=name,
+            chunk_size=chunk_size,
         )
 
     def build(self, use_jit=True, verbose=1):
@@ -382,6 +412,19 @@ class Shear(_Objective):
         are required.
     name : str, optional
         Name of the objective function.
+    chunk_size : int, optional
+        If `"blocked"` deriv_mode is used in the ObjectiveFunction, will
+        calculate the Jacobian for this objective ``chunk_size`` columns at a time,
+        instead of all at once.  The memory usage of the Jacobian calculation is
+        linearly proportional to ``chunk_size``: the smaller the ``chunk_size``, the
+        less memory the Jacobian calculation will require (with some baseline memory
+        usage). The time to compute the Jacobian is roughly ``t ~1/chunk_size``
+        with some baseline time, so the larger the ``chunk_size``, the faster the
+        calculation takes, at the cost of requiring more memory. A ``chunk_size``
+        of 1 corresponds to the least memory intensive,  but slowest method of
+        calculating the Jacobian.
+        If None, it will default to the largest possible `chunk_size` i.e. ``dim_x``
+
 
     """
 
@@ -401,6 +444,7 @@ def __init__(
         deriv_mode="auto",
         grid=None,
         name="shear",
+        chunk_size=None,
     ):
         if target is None and bounds is None:
             bounds = (-np.inf, 0)
@@ -415,6 +459,7 @@ def __init__(
             loss_function=loss_function,
             deriv_mode=deriv_mode,
             name=name,
+            chunk_size=chunk_size,
         )
 
     def build(self, use_jit=True, verbose=1):
@@ -549,6 +594,19 @@ class ToroidalCurrent(_Objective):
         are required.
     name : str, optional
         Name of the objective function.
+    chunk_size : int, optional
+        If `"blocked"` deriv_mode is used in the ObjectiveFunction, will
+        calculate the Jacobian for this objective ``chunk_size`` columns at a time,
+        instead of all at once.  The memory usage of the Jacobian calculation is
+        linearly proportional to ``chunk_size``: the smaller the ``chunk_size``, the
+        less memory the Jacobian calculation will require (with some baseline memory
+        usage). The time to compute the Jacobian is roughly ``t ~1/chunk_size``
+        with some baseline time, so the larger the ``chunk_size``, the faster the
+        calculation takes, at the cost of requiring more memory. A ``chunk_size``
+        of 1 corresponds to the least memory intensive,  but slowest method of
+        calculating the Jacobian.
+        If None, it will default to the largest possible `chunk_size` i.e. ``dim_x``
+
 
     """
 
@@ -568,6 +626,7 @@ def __init__(
         deriv_mode="auto",
         grid=None,
         name="toroidal current",
+        chunk_size=None,
     ):
         if target is None and bounds is None:
             target = 0
@@ -582,6 +641,7 @@ def __init__(
             loss_function=loss_function,
             deriv_mode=deriv_mode,
             name=name,
+            chunk_size=chunk_size,
         )
 
     def build(self, use_jit=True, verbose=1):
diff --git a/desc/objectives/_stability.py b/desc/objectives/_stability.py
index 0806375794..28c3a07871 100644
--- a/desc/objectives/_stability.py
+++ b/desc/objectives/_stability.py
@@ -63,6 +63,19 @@ class MercierStability(_Objective):
         are required.
     name : str, optional
         Name of the objective function.
+    chunk_size : int, optional
+        If `"blocked"` deriv_mode is used in the ObjectiveFunction, will
+        calculate the Jacobian for this objective ``chunk_size`` columns at a time,
+        instead of all at once.  The memory usage of the Jacobian calculation is
+        linearly proportional to ``chunk_size``: the smaller the ``chunk_size``, the
+        less memory the Jacobian calculation will require (with some baseline memory
+        usage). The time to compute the Jacobian is roughly ``t ~1/chunk_size``
+        with some baseline time, so the larger the ``chunk_size``, the faster the
+        calculation takes, at the cost of requiring more memory. A ``chunk_size``
+        of 1 corresponds to the least memory intensive,  but slowest method of
+        calculating the Jacobian.
+        If None, it will default to the largest possible `chunk_size` i.e. ``dim_x``
+
 
     """
 
@@ -82,6 +95,7 @@ def __init__(
         deriv_mode="auto",
         grid=None,
         name="Mercier Stability",
+        chunk_size=None,
     ):
         if target is None and bounds is None:
             bounds = (0, np.inf)
@@ -96,6 +110,7 @@ def __init__(
             loss_function=loss_function,
             deriv_mode=deriv_mode,
             name=name,
+            chunk_size=chunk_size,
         )
 
     def build(self, use_jit=True, verbose=1):
@@ -244,6 +259,19 @@ class MagneticWell(_Objective):
         are required.
     name : str, optional
         Name of the objective function.
+    chunk_size : int, optional
+        If `"blocked"` deriv_mode is used in the ObjectiveFunction, will
+        calculate the Jacobian for this objective ``chunk_size`` columns at a time,
+        instead of all at once.  The memory usage of the Jacobian calculation is
+        linearly proportional to ``chunk_size``: the smaller the ``chunk_size``, the
+        less memory the Jacobian calculation will require (with some baseline memory
+        usage). The time to compute the Jacobian is roughly ``t ~1/chunk_size``
+        with some baseline time, so the larger the ``chunk_size``, the faster the
+        calculation takes, at the cost of requiring more memory. A ``chunk_size``
+        of 1 corresponds to the least memory intensive,  but slowest method of
+        calculating the Jacobian.
+        If None, it will default to the largest possible `chunk_size` i.e. ``dim_x``
+
 
     """
 
@@ -263,6 +291,7 @@ def __init__(
         deriv_mode="auto",
         grid=None,
         name="Magnetic Well",
+        chunk_size=None,
     ):
         if target is None and bounds is None:
             bounds = (0, np.inf)
@@ -277,6 +306,7 @@ def __init__(
             loss_function=loss_function,
             deriv_mode=deriv_mode,
             name=name,
+            chunk_size=chunk_size,
         )
 
     def build(self, use_jit=True, verbose=1):
diff --git a/desc/objectives/objective_funs.py b/desc/objectives/objective_funs.py
index 35c5cbae08..33ceda4b14 100644
--- a/desc/objectives/objective_funs.py
+++ b/desc/objectives/objective_funs.py
@@ -33,10 +33,11 @@ class ObjectiveFunction(IOAble):
         Method for computing Jacobian matrices. "batched" uses forward mode, applied to
         the entire objective at once, and is generally the fastest for vector valued
         objectives, though most memory intensive. "blocked" builds the Jacobian for each
-        objective separately, using each objective's preferred AD mode. Generally the
-        most efficient option when mixing scalar and vector valued objectives.
-        "looped" uses forward mode jacobian vector products in a loop to build the
-        Jacobian column by column. Generally the slowest, but most memory efficient.
+        objective separately, using each objective's preferred AD mode (and
+        each objective's `chunk_size`). Generally the most efficient option when mixing
+        scalar and vector valued objectives. "looped" uses forward mode jacobian vector
+        products in a loop to build the Jacobian column by column. Generally the
+        slowest, but most memory efficient.
         "auto" defaults to "batched" if all sub-objectives are set to "fwd",
         otherwise "blocked".
     name : str
@@ -46,10 +47,11 @@ class ObjectiveFunction(IOAble):
         ``chunk_size`` columns at a time, instead of all at once. A
         ``chunk_size`` of 1 is equivalent to using `"looped"` deriv_mode.
         The memory usage of the Jacobian calculation is linearly proportional to
-        ``chunk_size``:the smaller the ``chunk_size``, the less memory the Jacobian
+        ``chunk_size``: the smaller the ``chunk_size``, the less memory the Jacobian
         calculation will require (with some baseline memory usage). The time it takes
-        to compute the Jacobian roughly ``t ~1/chunk_size` with some baseline time,
-        so the larger the ``chunk_size``, the faster the calculation takes.
+        to compute the Jacobian is roughly ``t ~1/chunk_size` with some baseline time,
+        so the larger the ``chunk_size``, the faster the calculation takes, at the cost
+        of requiring more memory.
         If None, it will default to the largest possible size i.e. ``dim_x``
 
     """
@@ -71,10 +73,8 @@ def __init__(
         ), "members of ObjectiveFunction should be instances of _Objective"
         assert use_jit in {True, False}
         assert deriv_mode in {"auto", "batched", "looped", "blocked"}
-        if chunk_size is None:
-            self.chunk_size = objectives[0].chunk_size
-        else:
-            self.chunk_size = chunk_size
+
+        self.chunk_size = chunk_size
 
         self._objectives = objectives
         self._use_jit = use_jit
diff --git a/tests/test_examples.py b/tests/test_examples.py
index b19684830e..c3b7d29ad6 100644
--- a/tests/test_examples.py
+++ b/tests/test_examples.py
@@ -175,7 +175,7 @@ def test_1d_optimization():
     eq = get("SOLOVEV")
     objective = ObjectiveFunction(AspectRatio(eq=eq, target=2.5))
     constraints = (
-        ForceBalance(eq=eq, chunk_size=50),
+        ForceBalance(eq=eq, chunk_size=None),
         FixBoundaryR(eq=eq),
         FixBoundaryZ(eq=eq, modes=eq.surface.Z_basis.modes[0:-1, :]),
         FixPressure(eq=eq),

From 7aca7ad9f5849bdb244eafc8f7f03d2596dbcbbe Mon Sep 17 00:00:00 2001
From: Dario Panici <dpanici@princeton.edu>
Date: Thu, 22 Aug 2024 16:22:59 -0400
Subject: [PATCH 11/46] put chunk size in a few tests

---
 tests/test_examples.py | 7 +++----
 1 file changed, 3 insertions(+), 4 deletions(-)

diff --git a/tests/test_examples.py b/tests/test_examples.py
index c3b7d29ad6..4e8aa7ffba 100644
--- a/tests/test_examples.py
+++ b/tests/test_examples.py
@@ -175,7 +175,7 @@ def test_1d_optimization():
     eq = get("SOLOVEV")
     objective = ObjectiveFunction(AspectRatio(eq=eq, target=2.5))
     constraints = (
-        ForceBalance(eq=eq, chunk_size=None),
+        ForceBalance(eq=eq),
         FixBoundaryR(eq=eq),
         FixBoundaryZ(eq=eq, modes=eq.surface.Z_basis.modes[0:-1, :]),
         FixPressure(eq=eq),
@@ -201,6 +201,7 @@ def run_qh_step(n, eq):
             QuasisymmetryTwoTerm(eq=eq, helicity=(1, eq.NFP), grid=grid),
             AspectRatio(eq=eq, target=8, weight=1e2),
         ),
+        chunk_size=1000,
     )
     R_modes = np.vstack(
         (
@@ -301,9 +302,7 @@ def test_ATF_results(tmpdir_factory):
         spectral_indexing=eq0.spectral_indexing,
     )
     eqf = EquilibriaFamily.solve_continuation_automatic(
-        eq,
-        verbose=2,
-        checkpoint_path=output_dir.join("ATF.h5"),
+        eq, verbose=2, checkpoint_path=output_dir.join("ATF.h5"), chunk_size=500
     )
     eqf = load(output_dir.join("ATF.h5"))
     rho_err, theta_err = area_difference_desc(eq0, eqf[-1])

From 2239746f0cec95202942e1929325e6300059f050 Mon Sep 17 00:00:00 2001
From: Dario Panici <dpanici@princeton.edu>
Date: Thu, 22 Aug 2024 16:25:49 -0400
Subject: [PATCH 12/46] add chunk size arg to blocked test

---
 tests/test_objective_funs.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/tests/test_objective_funs.py b/tests/test_objective_funs.py
index d664ac356a..c9073a2a97 100644
--- a/tests/test_objective_funs.py
+++ b/tests/test_objective_funs.py
@@ -1238,8 +1238,8 @@ def test_derivative_modes():
     )
     obj2 = ObjectiveFunction(
         [
-            PlasmaVesselDistance(eq, surf),
-            MagneticWell(eq),
+            PlasmaVesselDistance(eq, surf, chunk_size=2),
+            MagneticWell(eq, chunk_size=3),
         ],
         deriv_mode="blocked",
         use_jit=False,

From 74e9d0c8ae0ae497279ed24f966e8dd63306f2b0 Mon Sep 17 00:00:00 2001
From: Dario Panici <dpanici@princeton.edu>
Date: Mon, 26 Aug 2024 10:33:58 -0400
Subject: [PATCH 13/46] add info about chunk_size to docs

---
 docs/adding_objectives.rst | 41 ++++++++++++++++++++++++++++++++++++++
 1 file changed, 41 insertions(+)

diff --git a/docs/adding_objectives.rst b/docs/adding_objectives.rst
index cffa3486c3..a36603ab89 100644
--- a/docs/adding_objectives.rst
+++ b/docs/adding_objectives.rst
@@ -70,6 +70,18 @@ A full example objective with comments describing the key points is given below:
             Collocation grid containing the nodes to evaluate at.
         name : str, optional
             Name of the objective function.
+        chunk_size : int, optional
+            If `"blocked"` deriv_mode is used in the ObjectiveFunction, will
+            calculate the Jacobian for this objective ``chunk_size`` columns at a time,
+            instead of all at once.  The memory usage of the Jacobian calculation is
+            linearly proportional to ``chunk_size``: the smaller the ``chunk_size``, the
+            less memory the Jacobian calculation will require (with some baseline memory
+            usage). The time to compute the Jacobian is roughly ``t ~1/chunk_size``
+            with some baseline time, so the larger the ``chunk_size``, the faster the
+            calculation takes, at the cost of requiring more memory. A ``chunk_size``
+            of 1 corresponds to the least memory intensive,  but slowest method of
+            calculating the Jacobian.
+            If None, it will default to the largest possible `chunk_size` i.e. ``dim_x``
 
         """
 
@@ -88,6 +100,7 @@ A full example objective with comments describing the key points is given below:
             normalize_target=True,
             grid=None,
             name="QS triple product",
+            chunk_size=None,
         ):
             # we don't have to do much here, mostly just call ``super().__init__()``
             if target is None and bounds is None:
@@ -101,6 +114,7 @@ A full example objective with comments describing the key points is given below:
                 normalize=normalize,
                 normalize_target=normalize_target,
                 name=name,
+                chunk_size=chunk_size
             )
 
         def build(self, use_jit=True, verbose=1):
@@ -234,3 +248,30 @@ available for the ``loss_function`` kwarg are ``[None,"mean","min","max"]``, wit
 ``None`` meaning using the usual default objective cost, while ``"mean"`` takes the
 average of the raw objective values (before subtracting the target/bounds or
 normalization), ``"min"`` takes the minimum, and ``"max"`` takes the maximum.
+
+Reducing Memory Size of Objective Jacobian Calculation
+------------------------------------------------------
+
+During optimization, one of the most memory-intensive steps is the calculation of the Jacobian
+of the cost function. This memory cost comes from attempting to calculate the entire Jacobian
+matrix in one vectorized operation. However, this can be tuned between high memory usage but quick (default)
+and low memory usage but slower with the ``chunk_size`` keyword argument. By default, where this matters
+is when creating the overall ``ObjectiveFunction`` to be used in the optimization (where by default ``deriv_mode="batched"``). The Jacobian is a
+matrix of shape [``obj.dim_f`` x ``obj.dim_x``], and the calculation of the Jacobian is vectorized over
+the columns (the ``obj.dim_x`` dimension), where ``obj`` is the ``ObjectiveFunction`` object. Passing in the ``chunk_size`` attribute allows one to split up
+the vectorized computation into chunks of ``chunk_size`` columns at a time, allowing one to compute the Jacobian
+in a slightly slower, but more memory-efficient manner. The memory usage of the Jacobian calculation is
+linearly proportional to ``chunk_size``: the smaller the ``chunk_size``, the less memory the Jacobian calculation will
+require (with some baseline memory usage). The time to compute the Jacobian is roughly ``t ~1/chunk_size``
+with some baseline time, so the larger the ``chunk_size``, the faster the calculation takes,
+at the cost of requiring more memory. A ``chunk_size`` of 1 corresponds to the least memory intensive,
+but slowest method of calculating the Jacobian. If ``chunk_size=None``, it will default to the largest
+possible `chunk_size` i.e. ``obj.dim_x``.
+
+If ``deriv_mode="blocked"`` is specified when the ``ObjectiveFunction`` is created, then the Jacobian will
+be calculated individually for each of the sub-objectives inside of the ``ObjectiveFunction``, and in that case
+the ``chunk_size`` of the individual ``_Objective`` objects inside of the ``ObjectiveFunction`` will be used.
+For example, if ``obj1 = QuasisymmetryTripleProduct(eq, chunk_size=100)``, ``obj2 = MeanCurvature(eq, chunk_size=2000)``
+and ``obj = ObjectiveFunction((obj1, obj2), deriv_mode="blocked")``, then the Jacobian will be calculated with a
+``chunk_size=100`` for the quasisymmetry part and a ``chunk_size=2000`` for the curvature part, then the full Jacobian
+will be formed as a block diagonal matrix with the individual Jacobians of these two objectives.

From 9ad2688f7eddfa198741a328e3992b5bbaafab83 Mon Sep 17 00:00:00 2001
From: Dario Panici <dpanici@princeton.edu>
Date: Mon, 26 Aug 2024 13:21:55 -0400
Subject: [PATCH 14/46] change attribution statements

---
 desc/utils.py | 28 +++++++++++++++++++++++-----
 1 file changed, 23 insertions(+), 5 deletions(-)

diff --git a/desc/utils.py b/desc/utils.py
index cce9564342..f0e9a875ae 100644
--- a/desc/utils.py
+++ b/desc/utils.py
@@ -697,7 +697,15 @@ def broadcast_tree(tree_in, tree_out, dtype=int):
         raise ValueError("trees must be nested lists of dicts")
 
 
-# credit to _chunk_utils.py of netket package for below section
+# The following section of this code is a derived from the NetKet project
+# https://github.com/netket/netket/blob/9881c9fb217a2ac4dc9274a054bf6e6a2993c519/
+# netket/jax/_chunk_utils.py
+#
+# The original copyright notice is as follows
+# Copyright 2021 The NetKet Authors - All rights reserved.
+# Licensed under the Apache License, Version 2.0 (the "License");
+
+
 def _treeify(f):
     def _f(x, *args, **kwargs):
         return jax.tree_util.tree_map(lambda y: f(y, *args, **kwargs), x)
@@ -777,7 +785,13 @@ def chunk(x, chunk_size=None):
 
 ####
 
-# credit to _scanmap.py from netket package for below
+# The following section of this code is a derived from the NetKet project
+# https://github.com/netket/netket/blob/9881c9fb217a2ac4dc9274a054bf6e6a2993c519/
+# netket/jax/_scanmap.py
+#
+# The original copyright notice is as follows
+# Copyright 2021 The NetKet Authors - All rights reserved.
+# Licensed under the Apache License, Version 2.0 (the "License");
 
 _tree_add = functools.partial(jax.tree_util.tree_map, jax.lax.add)
 _tree_zeros_like = functools.partial(
@@ -916,9 +930,13 @@ def f_(*args, **kwargs):
     return f_
 
 
-# credit to _vmap_chunked.py from netket package
-# taking the parts not meant to support sharding, as we do not
-# need that in DESC
+# The following section of this code is derived from the NetKet project
+# https://github.com/netket/netket/blob/9881c9fb217a2ac4dc9274a054bf6e6a2993c519/
+# netket/jax/_vmap_chunked.py
+#
+# The original copyright notice is as follows
+# Copyright 2021 The NetKet Authors - All rights reserved.
+# Licensed under the Apache License, Version 2.0 (the "License");
 
 
 def _eval_fun_in_chunks(vmapped_fun, chunk_size, argnums, *args, **kwargs):

From 41171c86442cadcad5af27d974797d644e6a19c7 Mon Sep 17 00:00:00 2001
From: Dario Panici <dpanici@princeton.edu>
Date: Mon, 26 Aug 2024 13:22:17 -0400
Subject: [PATCH 15/46] fix typo in  attribution statements

---
 desc/utils.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/desc/utils.py b/desc/utils.py
index f0e9a875ae..834d4535e0 100644
--- a/desc/utils.py
+++ b/desc/utils.py
@@ -697,7 +697,7 @@ def broadcast_tree(tree_in, tree_out, dtype=int):
         raise ValueError("trees must be nested lists of dicts")
 
 
-# The following section of this code is a derived from the NetKet project
+# The following section of this code is derived from the NetKet project
 # https://github.com/netket/netket/blob/9881c9fb217a2ac4dc9274a054bf6e6a2993c519/
 # netket/jax/_chunk_utils.py
 #
@@ -785,7 +785,7 @@ def chunk(x, chunk_size=None):
 
 ####
 
-# The following section of this code is a derived from the NetKet project
+# The following section of this code is derived from the NetKet project
 # https://github.com/netket/netket/blob/9881c9fb217a2ac4dc9274a054bf6e6a2993c519/
 # netket/jax/_scanmap.py
 #

From 5d15de3fbfc9d3f9d5df05c957994058eed867d4 Mon Sep 17 00:00:00 2001
From: Dario Panici <dpanici@princeton.edu>
Date: Sat, 7 Sep 2024 22:22:19 -0400
Subject: [PATCH 16/46] add warnings

---
 desc/objectives/objective_funs.py | 21 +++++++++++++++++++++
 tests/test_objective_funs.py      | 10 ++++++----
 2 files changed, 27 insertions(+), 4 deletions(-)

diff --git a/desc/objectives/objective_funs.py b/desc/objectives/objective_funs.py
index cf245a2d44..2bc7dfaf66 100644
--- a/desc/objectives/objective_funs.py
+++ b/desc/objectives/objective_funs.py
@@ -18,6 +18,7 @@
     is_broadcastable,
     setdefault,
     unique_list,
+    warnif,
 )
 
 
@@ -150,6 +151,26 @@ def build(self, use_jit=None, verbose=1):
             self._scalar = False
 
         self._set_derivatives()
+        sub_obj_chunk_sizes = [obj.chunk_size for obj in self.objectives]
+        warnif(
+            np.any(sub_obj_chunk_sizes) and self._deriv_mode != "blocked",
+            UserWarning,
+            "'chunk_size' was passed into one or more sub-objectives, but the"
+            "ObjectiveFunction is  using 'batched' deriv_mode, so sub-objective "
+            "'chunk_size' will be ignored in favor of the ObjectiveFunction's "
+            f"'chunk_size' of {self.chunk_size}."
+            " Specify 'blocked' deriv_mode if each sub-objective is desired to have a "
+            "different 'chunk_size' for its Jacobian computation.",
+        )
+        warnif(
+            self.chunk_size is not None and self._deriv_mode == "blocked",
+            UserWarning,
+            "'chunk_size' was passed into ObjectiveFunction, but the"
+            "ObjectiveFunction is using 'blocked' deriv_mode, so sub-objective "
+            "'chunk_size' are used to compute each sub-objective's Jacobian, "
+            "`ignoring the ObjectiveFunction's 'chunk_size'.",
+        )
+
         if not self.use_jit:
             self._unjit()
 
diff --git a/tests/test_objective_funs.py b/tests/test_objective_funs.py
index 76cfb59ebe..e5e17679e8 100644
--- a/tests/test_objective_funs.py
+++ b/tests/test_objective_funs.py
@@ -1284,7 +1284,7 @@ def test_derivative_modes():
     surf = FourierRZToroidalSurface()
     obj1 = ObjectiveFunction(
         [
-            PlasmaVesselDistance(eq, surf),
+            PlasmaVesselDistance(eq, surf, chunk_size=1),
             MagneticWell(eq),
         ],
         deriv_mode="batched",
@@ -1296,6 +1296,7 @@ def test_derivative_modes():
             MagneticWell(eq, chunk_size=3),
         ],
         deriv_mode="blocked",
+        chunk_size=10,
         use_jit=False,
     )
     obj3 = ObjectiveFunction(
@@ -1306,9 +1307,10 @@ def test_derivative_modes():
         deriv_mode="looped",
         use_jit=False,
     )
-
-    obj1.build()
-    obj2.build()
+    with pytest.warns(UserWarning, match="chunk_size"):
+        obj1.build()
+    with pytest.warns(UserWarning, match="chunk_size"):
+        obj2.build()
     obj3.build()
     x = obj1.x(eq, surf)
     g1 = obj1.grad(x)

From 9aca31fbcff14cab788b94f722ef1e01239a1076 Mon Sep 17 00:00:00 2001
From: Dario Panici <dpanici@princeton.edu>
Date: Sat, 7 Sep 2024 22:48:37 -0400
Subject: [PATCH 17/46] set default to be dim_x/4 for chunk_size

---
 desc/objectives/objective_funs.py | 16 +++++++++++++++-
 tests/test_objective_funs.py      |  6 +++++-
 2 files changed, 20 insertions(+), 2 deletions(-)

diff --git a/desc/objectives/objective_funs.py b/desc/objectives/objective_funs.py
index 2bc7dfaf66..de408f91f1 100644
--- a/desc/objectives/objective_funs.py
+++ b/desc/objectives/objective_funs.py
@@ -54,7 +54,7 @@ class ObjectiveFunction(IOAble):
         to compute the Jacobian is roughly ``t ~1/chunk_size` with some baseline time,
         so the larger the ``chunk_size``, the faster the calculation takes, at the cost
         of requiring more memory.
-        If None, it will default to the largest possible size i.e. ``dim_x``
+        If None, it will default to ``np.ceil(dim_x/4)``
 
     """
 
@@ -175,6 +175,20 @@ def build(self, use_jit=None, verbose=1):
             self._unjit()
 
         self._set_things()
+        if self.chunk_size is None and self._deriv_mode == "batched":
+            # set chunk_size to 1/4 of number columns of Jacobian
+            # as the default for batched deriv_mode
+            self.chunk_size = int(np.ceil(self.dim_x / 4))
+        if self._deriv_mode == "blocked":
+            # set chunk_size for each sub-objective
+            # to 1/4 of number columns of Jacobian
+            # as the default for batched deriv_mode
+            for obj in self.objectives:
+                obj.chunk_size = (
+                    int(np.ceil(sum(t.dim_x for t in obj.things) / 4))
+                    if obj.chunk_size is None
+                    else obj.chunk_size
+                )
 
         self._built = True
         timer.stop("Objective build")
diff --git a/tests/test_objective_funs.py b/tests/test_objective_funs.py
index e5e17679e8..9140ed4f9c 100644
--- a/tests/test_objective_funs.py
+++ b/tests/test_objective_funs.py
@@ -1293,7 +1293,7 @@ def test_derivative_modes():
     obj2 = ObjectiveFunction(
         [
             PlasmaVesselDistance(eq, surf, chunk_size=2),
-            MagneticWell(eq, chunk_size=3),
+            MagneticWell(eq),
         ],
         deriv_mode="blocked",
         chunk_size=10,
@@ -1311,6 +1311,10 @@ def test_derivative_modes():
         obj1.build()
     with pytest.warns(UserWarning, match="chunk_size"):
         obj2.build()
+    # check that default size works for blocked
+    assert obj2.objectives[1].chunk_size == np.ceil(
+        sum(t.dim_x for t in obj2.objectives[1].things) / 4
+    )
     obj3.build()
     x = obj1.x(eq, surf)
     g1 = obj1.grad(x)

From 6f2bbdbfd58d1a68e5247d66cd9d443324f06958 Mon Sep 17 00:00:00 2001
From: Dario Panici <dpanici@princeton.edu>
Date: Mon, 9 Sep 2024 09:15:09 +0900
Subject: [PATCH 18/46] fix warning causing test fail

---
 tests/test_examples.py | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/tests/test_examples.py b/tests/test_examples.py
index ad52b6c139..4d3308d2a1 100644
--- a/tests/test_examples.py
+++ b/tests/test_examples.py
@@ -201,7 +201,6 @@ def run_qh_step(n, eq):
             QuasisymmetryTwoTerm(eq=eq, helicity=(1, eq.NFP), grid=grid),
             AspectRatio(eq=eq, target=8, weight=1e2),
         ),
-        chunk_size=1000,
     )
     R_modes = np.vstack(
         (
@@ -1041,7 +1040,7 @@ def test_freeb_vacuum():
         FixPsi(eq=eq),
     )
     objective = ObjectiveFunction(
-        VacuumBoundaryError(eq=eq, field=ext_field, field_fixed=True)
+        VacuumBoundaryError(eq=eq, field=ext_field, field_fixed=True), chunk_size=1000
     )
     eq, _ = eq.optimize(
         objective,

From f3b5f2561338848fcb4f3ebc846e02cbcbb32fbf Mon Sep 17 00:00:00 2001
From: Dario Panici <dpanici@princeton.edu>
Date: Mon, 9 Sep 2024 10:09:05 +0900
Subject: [PATCH 19/46] change chunk_size to jac_chunk_size, change docstring

---
 desc/continuation.py              |  33 ++--
 desc/objectives/_bootstrap.py     |  28 ++--
 desc/objectives/_coils.py         | 252 +++++++++++++++---------------
 desc/objectives/_equilibrium.py   | 168 ++++++++++----------
 desc/objectives/_free_boundary.py |  84 +++++-----
 desc/objectives/_generic.py       |  60 +++----
 desc/objectives/_geometry.py      | 224 +++++++++++++-------------
 desc/objectives/_omnigenity.py    | 140 ++++++++---------
 desc/objectives/_power_balance.py |  28 ++++
 desc/objectives/_profiles.py      | 112 ++++++-------
 desc/objectives/_stability.py     |  56 +++----
 desc/objectives/getters.py        |   8 +-
 desc/objectives/objective_funs.py |  83 +++++-----
 desc/utils.py                     |  71 +++++----
 docs/adding_objectives.rst        |  42 ++---
 tests/test_examples.py            |   5 +-
 tests/test_objective_funs.py      |  12 +-
 17 files changed, 722 insertions(+), 684 deletions(-)

diff --git a/desc/continuation.py b/desc/continuation.py
index 635fcb8765..e4d38ab72f 100644
--- a/desc/continuation.py
+++ b/desc/continuation.py
@@ -29,7 +29,7 @@ def _solve_axisym(
     maxiter=100,
     verbose=1,
     checkpoint_path=None,
-    chunk_size=None,
+    jac_chunk_size=None,
 ):
     """Solve initial axisymmetric case with adaptive step sizing."""
     timer = Timer()
@@ -101,7 +101,7 @@ def _solve_axisym(
 
         constraints_i = get_fixed_boundary_constraints(eq=eqi)
         objective_i = get_equilibrium_objective(
-            eq=eqi, mode=objective, chunk_size=chunk_size
+            eq=eqi, mode=objective, jac_chunk_size=jac_chunk_size
         )
 
         if verbose:
@@ -199,7 +199,7 @@ def _add_pressure(
     maxiter=100,
     verbose=1,
     checkpoint_path=None,
-    chunk_size=None,
+    jac_chunk_size=None,
 ):
     """Add pressure with adaptive step sizing."""
     timer = Timer()
@@ -229,7 +229,7 @@ def _add_pressure(
 
         constraints_i = get_fixed_boundary_constraints(eq=eqi)
         objective_i = get_equilibrium_objective(
-            eq=eqi, mode=objective, chunk_size=chunk_size
+            eq=eqi, mode=objective, jac_chunk_size=jac_chunk_size
         )
 
         if verbose:
@@ -330,7 +330,7 @@ def _add_shaping(
     maxiter=100,
     verbose=1,
     checkpoint_path=None,
-    chunk_size=None,
+    jac_chunk_size=None,
 ):
     """Add 3D shaping with adaptive step sizing."""
     timer = Timer()
@@ -361,7 +361,7 @@ def _add_shaping(
 
         constraints_i = get_fixed_boundary_constraints(eq=eqi)
         objective_i = get_equilibrium_objective(
-            eq=eqi, mode=objective, chunk_size=chunk_size
+            eq=eqi, mode=objective, jac_chunk_size=jac_chunk_size
         )
 
         if verbose:
@@ -460,7 +460,7 @@ def solve_continuation_automatic(  # noqa: C901
     maxiter=100,
     verbose=1,
     checkpoint_path=None,
-    chunk_size=None,
+    jac_chunk_size=None,
     **kwargs,
 ):
     """Solve for an equilibrium using an automatic continuation method.
@@ -539,7 +539,7 @@ def solve_continuation_automatic(  # noqa: C901
         maxiter,
         verbose,
         checkpoint_path,
-        chunk_size=chunk_size,
+        jac_chunk_size=jac_chunk_size,
     )
 
     # for zero current we want to do shaping before pressure to avoid having a
@@ -558,7 +558,7 @@ def solve_continuation_automatic(  # noqa: C901
             maxiter,
             verbose,
             checkpoint_path,
-            chunk_size=chunk_size,
+            jac_chunk_size=jac_chunk_size,
         )
 
         eqfam = _add_pressure(
@@ -574,7 +574,7 @@ def solve_continuation_automatic(  # noqa: C901
             maxiter,
             verbose,
             checkpoint_path,
-            chunk_size=chunk_size,
+            jac_chunk_size=jac_chunk_size,
         )
 
     # for other cases such as fixed iota or nonzero current we do pressure first
@@ -593,7 +593,7 @@ def solve_continuation_automatic(  # noqa: C901
             maxiter,
             verbose,
             checkpoint_path,
-            chunk_size=chunk_size,
+            jac_chunk_size=jac_chunk_size,
         )
 
         eqfam = _add_shaping(
@@ -609,7 +609,7 @@ def solve_continuation_automatic(  # noqa: C901
             maxiter,
             verbose,
             checkpoint_path,
-            chunk_size=chunk_size,
+            jac_chunk_size=jac_chunk_size,
         )
     eq.params_dict = eqfam[-1].params_dict
     eqfam[-1] = eq
@@ -640,7 +640,7 @@ def solve_continuation(  # noqa: C901
     maxiter=100,
     verbose=1,
     checkpoint_path=None,
-    chunk_size=None,
+    jac_chunk_size=None,
 ):
     """Solve for an equilibrium by continuation method.
 
@@ -702,7 +702,7 @@ def solve_continuation(  # noqa: C901
     if not isinstance(optimizer, Optimizer):
         optimizer = Optimizer(optimizer)
     objective_i = get_equilibrium_objective(
-        eq=eqfam[0], mode=objective, chunk_size=chunk_size
+        eq=eqfam[0], mode=objective, jac_chunk_size=jac_chunk_size
     )
     constraints_i = get_fixed_boundary_constraints(eq=eqfam[0])
 
@@ -751,7 +751,7 @@ def solve_continuation(  # noqa: C901
             # TODO: pass Jx if available
             eqp = eqfam[ii - 1].copy()
             objective_i = get_equilibrium_objective(
-                eq=eqp, mode=objective, chunk_size=chunk_size
+                eq=eqp, mode=objective, jac_chunk_size=jac_chunk_size
             )
             constraints_i = get_fixed_boundary_constraints(eq=eqp)
             eqp.change_resolution(**eqi.resolution)
@@ -771,9 +771,8 @@ def solve_continuation(  # noqa: C901
             stop = True
 
         if not stop:
-            # TODO: add ability to rebind objectives
             objective_i = get_equilibrium_objective(
-                eq=eqi, mode=objective, chunk_size=chunk_size
+                eq=eqi, mode=objective, jac_chunk_size=jac_chunk_size
             )
             constraints_i = get_fixed_boundary_constraints(eq=eqi)
             eqi.solve(
diff --git a/desc/objectives/_bootstrap.py b/desc/objectives/_bootstrap.py
index e76b56cca5..0933fc14be 100644
--- a/desc/objectives/_bootstrap.py
+++ b/desc/objectives/_bootstrap.py
@@ -66,18 +66,18 @@ class BootstrapRedlConsistency(_Objective):
         or quasi-axisymmetry; set to +/-NFP for quasi-helical symmetry.
     name : str, optional
         Name of the objective function.
-    chunk_size : int, optional
-        If `"blocked"` deriv_mode is used in the ObjectiveFunction, will
-        calculate the Jacobian for this objective ``chunk_size`` columns at a time,
-        instead of all at once.  The memory usage of the Jacobian calculation is
-        linearly proportional to ``chunk_size``: the smaller the ``chunk_size``, the
-        less memory the Jacobian calculation will require (with some baseline memory
-        usage). The time to compute the Jacobian is roughly ``t ~1/chunk_size``
-        with some baseline time, so the larger the ``chunk_size``, the faster the
-        calculation takes, at the cost of requiring more memory. A ``chunk_size``
-        of 1 corresponds to the least memory intensive,  but slowest method of
-        calculating the Jacobian.
-        If None, it will default to the largest possible `chunk_size` i.e. ``dim_x``
+    jac_chunk_size : int, optional
+        Will calculate the Jacobian for this objective ``jac_chunk_size``
+        columns at a time, instead of all at once.  The memory usage of the
+        Jacobian calculation is roughly ``memory usage = m0 + m1*jac_chunk_size``:
+        the higher the chunk size, the less memory the Jacobian calculation
+        will require (with some baseline memory usage). The time to compute the
+        Jacobian is roughly ``t=t0 +t1/jac_chunk_size``, so the larger the
+        ``jac_chunk_size``, the faster the calculation takes, at the cost of
+        requiring more memory. A ``jac_chunk_size`` of 1 corresponds to the least
+        memory intensive,  but slowest method of calculating the Jacobian.
+        If None, it will default to the largest possible
+        `jac_chunk_size` i.e. ``np.ceil(dim_x/4)``
 
     """
 
@@ -98,7 +98,7 @@ def __init__(
         grid=None,
         helicity=(1, 0),
         name="Bootstrap current self-consistency (Redl)",
-        chunk_size=None,
+        jac_chunk_size=None,
     ):
         if target is None and bounds is None:
             target = 0
@@ -116,7 +116,7 @@ def __init__(
             loss_function=loss_function,
             deriv_mode=deriv_mode,
             name=name,
-            chunk_size=chunk_size,
+            jac_chunk_size=jac_chunk_size,
         )
 
     def build(self, use_jit=True, verbose=1):
diff --git a/desc/objectives/_coils.py b/desc/objectives/_coils.py
index c88427bb8c..565bf3bdd8 100644
--- a/desc/objectives/_coils.py
+++ b/desc/objectives/_coils.py
@@ -59,18 +59,18 @@ class _CoilObjective(_Objective):
         If a list, must have the same structure as coil.
     name : str, optional
         Name of the objective function.
-    chunk_size : int, optional
-        If `"blocked"` deriv_mode is used in the ObjectiveFunction, will
-        calculate the Jacobian for this objective ``chunk_size`` columns at a time,
-        instead of all at once.  The memory usage of the Jacobian calculation is
-        linearly proportional to ``chunk_size``: the smaller the ``chunk_size``, the
-        less memory the Jacobian calculation will require (with some baseline memory
-        usage). The time to compute the Jacobian is roughly ``t ~1/chunk_size``
-        with some baseline time, so the larger the ``chunk_size``, the faster the
-        calculation takes, at the cost of requiring more memory. A ``chunk_size``
-        of 1 corresponds to the least memory intensive,  but slowest method of
-        calculating the Jacobian.
-        If None, it will default to the largest possible `chunk_size` i.e. ``dim_x``
+    jac_chunk_size : int, optional
+        Will calculate the Jacobian for this objective ``jac_chunk_size``
+        columns at a time, instead of all at once.  The memory usage of the
+        Jacobian calculation is roughly ``memory usage = m0 + m1*jac_chunk_size``:
+        the higher the chunk size, the less memory the Jacobian calculation
+        will require (with some baseline memory usage). The time to compute the
+        Jacobian is roughly ``t=t0 +t1/jac_chunk_size``, so the larger the
+        ``jac_chunk_size``, the faster the calculation takes, at the cost of
+        requiring more memory. A ``jac_chunk_size`` of 1 corresponds to the least
+        memory intensive,  but slowest method of calculating the Jacobian.
+        If None, it will default to the largest possible
+        `jac_chunk_size` i.e. ``np.ceil(dim_x/4)``
 
 
     """
@@ -88,7 +88,7 @@ def __init__(
         deriv_mode="auto",
         grid=None,
         name=None,
-        chunk_size=None,
+        jac_chunk_size=None,
     ):
         self._grid = grid
         self._data_keys = data_keys
@@ -103,7 +103,7 @@ def __init__(
             loss_function=loss_function,
             deriv_mode=deriv_mode,
             name=name,
-            chunk_size=chunk_size,
+            jac_chunk_size=jac_chunk_size,
         )
 
     def build(self, use_jit=True, verbose=1):  # noqa:C901
@@ -269,18 +269,18 @@ class CoilLength(_CoilObjective):
         Defaults to ``LinearGrid(N=2 * coil.N + 5)``
     name : str, optional
         Name of the objective function.
-    chunk_size : int, optional
-        If `"blocked"` deriv_mode is used in the ObjectiveFunction, will
-        calculate the Jacobian for this objective ``chunk_size`` columns at a time,
-        instead of all at once.  The memory usage of the Jacobian calculation is
-        linearly proportional to ``chunk_size``: the smaller the ``chunk_size``, the
-        less memory the Jacobian calculation will require (with some baseline memory
-        usage). The time to compute the Jacobian is roughly ``t ~1/chunk_size``
-        with some baseline time, so the larger the ``chunk_size``, the faster the
-        calculation takes, at the cost of requiring more memory. A ``chunk_size``
-        of 1 corresponds to the least memory intensive,  but slowest method of
-        calculating the Jacobian.
-        If None, it will default to the largest possible `chunk_size` i.e. ``dim_x``
+    jac_chunk_size : int, optional
+        Will calculate the Jacobian for this objective ``jac_chunk_size``
+        columns at a time, instead of all at once.  The memory usage of the
+        Jacobian calculation is roughly ``memory usage = m0 + m1*jac_chunk_size``:
+        the higher the chunk size, the less memory the Jacobian calculation
+        will require (with some baseline memory usage). The time to compute the
+        Jacobian is roughly ``t=t0 +t1/jac_chunk_size``, so the larger the
+        ``jac_chunk_size``, the faster the calculation takes, at the cost of
+        requiring more memory. A ``jac_chunk_size`` of 1 corresponds to the least
+        memory intensive,  but slowest method of calculating the Jacobian.
+        If None, it will default to the largest possible
+        `jac_chunk_size` i.e. ``np.ceil(dim_x/4)``
 
 
     """
@@ -301,7 +301,7 @@ def __init__(
         deriv_mode="auto",
         grid=None,
         name="coil length",
-        chunk_size=None,
+        jac_chunk_size=None,
     ):
         if target is None and bounds is None:
             target = 2 * np.pi
@@ -318,7 +318,7 @@ def __init__(
             deriv_mode=deriv_mode,
             grid=grid,
             name=name,
-            chunk_size=chunk_size,
+            jac_chunk_size=jac_chunk_size,
         )
 
     def build(self, use_jit=True, verbose=1):
@@ -409,18 +409,18 @@ class CoilCurvature(_CoilObjective):
         Defaults to ``LinearGrid(N=2 * coil.N + 5)``
     name : str, optional
         Name of the objective function.
-    chunk_size : int, optional
-        If `"blocked"` deriv_mode is used in the ObjectiveFunction, will
-        calculate the Jacobian for this objective ``chunk_size`` columns at a time,
-        instead of all at once.  The memory usage of the Jacobian calculation is
-        linearly proportional to ``chunk_size``: the smaller the ``chunk_size``, the
-        less memory the Jacobian calculation will require (with some baseline memory
-        usage). The time to compute the Jacobian is roughly ``t ~1/chunk_size``
-        with some baseline time, so the larger the ``chunk_size``, the faster the
-        calculation takes, at the cost of requiring more memory. A ``chunk_size``
-        of 1 corresponds to the least memory intensive,  but slowest method of
-        calculating the Jacobian.
-        If None, it will default to the largest possible `chunk_size` i.e. ``dim_x``
+    jac_chunk_size : int, optional
+        Will calculate the Jacobian for this objective ``jac_chunk_size``
+        columns at a time, instead of all at once.  The memory usage of the
+        Jacobian calculation is roughly ``memory usage = m0 + m1*jac_chunk_size``:
+        the higher the chunk size, the less memory the Jacobian calculation
+        will require (with some baseline memory usage). The time to compute the
+        Jacobian is roughly ``t=t0 +t1/jac_chunk_size``, so the larger the
+        ``jac_chunk_size``, the faster the calculation takes, at the cost of
+        requiring more memory. A ``jac_chunk_size`` of 1 corresponds to the least
+        memory intensive,  but slowest method of calculating the Jacobian.
+        If None, it will default to the largest possible
+        `jac_chunk_size` i.e. ``np.ceil(dim_x/4)``
 
 
     """
@@ -441,7 +441,7 @@ def __init__(
         deriv_mode="auto",
         grid=None,
         name="coil curvature",
-        chunk_size=None,
+        jac_chunk_size=None,
     ):
         if target is None and bounds is None:
             bounds = (0, 1)
@@ -458,7 +458,7 @@ def __init__(
             deriv_mode=deriv_mode,
             grid=grid,
             name=name,
-            chunk_size=chunk_size,
+            jac_chunk_size=jac_chunk_size,
         )
 
     def build(self, use_jit=True, verbose=1):
@@ -544,18 +544,18 @@ class CoilTorsion(_CoilObjective):
         Defaults to ``LinearGrid(N=2 * coil.N + 5)``
     name : str, optional
         Name of the objective function.
-    chunk_size : int, optional
-        If `"blocked"` deriv_mode is used in the ObjectiveFunction, will
-        calculate the Jacobian for this objective ``chunk_size`` columns at a time,
-        instead of all at once.  The memory usage of the Jacobian calculation is
-        linearly proportional to ``chunk_size``: the smaller the ``chunk_size``, the
-        less memory the Jacobian calculation will require (with some baseline memory
-        usage). The time to compute the Jacobian is roughly ``t ~1/chunk_size``
-        with some baseline time, so the larger the ``chunk_size``, the faster the
-        calculation takes, at the cost of requiring more memory. A ``chunk_size``
-        of 1 corresponds to the least memory intensive,  but slowest method of
-        calculating the Jacobian.
-        If None, it will default to the largest possible `chunk_size` i.e. ``dim_x``
+    jac_chunk_size : int, optional
+        Will calculate the Jacobian for this objective ``jac_chunk_size``
+        columns at a time, instead of all at once.  The memory usage of the
+        Jacobian calculation is roughly ``memory usage = m0 + m1*jac_chunk_size``:
+        the higher the chunk size, the less memory the Jacobian calculation
+        will require (with some baseline memory usage). The time to compute the
+        Jacobian is roughly ``t=t0 +t1/jac_chunk_size``, so the larger the
+        ``jac_chunk_size``, the faster the calculation takes, at the cost of
+        requiring more memory. A ``jac_chunk_size`` of 1 corresponds to the least
+        memory intensive,  but slowest method of calculating the Jacobian.
+        If None, it will default to the largest possible
+        `jac_chunk_size` i.e. ``np.ceil(dim_x/4)``
 
 
     """
@@ -576,7 +576,7 @@ def __init__(
         deriv_mode="auto",
         grid=None,
         name="coil torsion",
-        chunk_size=None,
+        jac_chunk_size=None,
     ):
         if target is None and bounds is None:
             target = 0
@@ -593,7 +593,7 @@ def __init__(
             deriv_mode=deriv_mode,
             grid=grid,
             name=name,
-            chunk_size=chunk_size,
+            jac_chunk_size=jac_chunk_size,
         )
 
     def build(self, use_jit=True, verbose=1):
@@ -679,18 +679,18 @@ class CoilCurrentLength(CoilLength):
         Defaults to ``LinearGrid(N=2 * coil.N + 5)``
     name : str, optional
         Name of the objective function.
-    chunk_size : int, optional
-        If `"blocked"` deriv_mode is used in the ObjectiveFunction, will
-        calculate the Jacobian for this objective ``chunk_size`` columns at a time,
-        instead of all at once.  The memory usage of the Jacobian calculation is
-        linearly proportional to ``chunk_size``: the smaller the ``chunk_size``, the
-        less memory the Jacobian calculation will require (with some baseline memory
-        usage). The time to compute the Jacobian is roughly ``t ~1/chunk_size``
-        with some baseline time, so the larger the ``chunk_size``, the faster the
-        calculation takes, at the cost of requiring more memory. A ``chunk_size``
-        of 1 corresponds to the least memory intensive,  but slowest method of
-        calculating the Jacobian.
-        If None, it will default to the largest possible `chunk_size` i.e. ``dim_x``
+    jac_chunk_size : int, optional
+        Will calculate the Jacobian for this objective ``jac_chunk_size``
+        columns at a time, instead of all at once.  The memory usage of the
+        Jacobian calculation is roughly ``memory usage = m0 + m1*jac_chunk_size``:
+        the higher the chunk size, the less memory the Jacobian calculation
+        will require (with some baseline memory usage). The time to compute the
+        Jacobian is roughly ``t=t0 +t1/jac_chunk_size``, so the larger the
+        ``jac_chunk_size``, the faster the calculation takes, at the cost of
+        requiring more memory. A ``jac_chunk_size`` of 1 corresponds to the least
+        memory intensive,  but slowest method of calculating the Jacobian.
+        If None, it will default to the largest possible
+        `jac_chunk_size` i.e. ``np.ceil(dim_x/4)``
 
 
     """
@@ -711,7 +711,7 @@ def __init__(
         deriv_mode="auto",
         grid=None,
         name="coil current length",
-        chunk_size=None,
+        jac_chunk_size=None,
     ):
         if target is None and bounds is None:
             target = 0
@@ -727,7 +727,7 @@ def __init__(
             deriv_mode=deriv_mode,
             grid=grid,
             name=name,
-            chunk_size=chunk_size,
+            jac_chunk_size=jac_chunk_size,
         )
 
     def build(self, use_jit=True, verbose=1):
@@ -822,18 +822,18 @@ class CoilSetMinDistance(_Objective):
         If a list, must have the same structure as coils.
     name : str, optional
         Name of the objective function.
-    chunk_size : int, optional
-        If `"blocked"` deriv_mode is used in the ObjectiveFunction, will
-        calculate the Jacobian for this objective ``chunk_size`` columns at a time,
-        instead of all at once.  The memory usage of the Jacobian calculation is
-        linearly proportional to ``chunk_size``: the smaller the ``chunk_size``, the
-        less memory the Jacobian calculation will require (with some baseline memory
-        usage). The time to compute the Jacobian is roughly ``t ~1/chunk_size``
-        with some baseline time, so the larger the ``chunk_size``, the faster the
-        calculation takes, at the cost of requiring more memory. A ``chunk_size``
-        of 1 corresponds to the least memory intensive,  but slowest method of
-        calculating the Jacobian.
-        If None, it will default to the largest possible `chunk_size` i.e. ``dim_x``
+    jac_chunk_size : int, optional
+        Will calculate the Jacobian for this objective ``jac_chunk_size``
+        columns at a time, instead of all at once.  The memory usage of the
+        Jacobian calculation is roughly ``memory usage = m0 + m1*jac_chunk_size``:
+        the higher the chunk size, the less memory the Jacobian calculation
+        will require (with some baseline memory usage). The time to compute the
+        Jacobian is roughly ``t=t0 +t1/jac_chunk_size``, so the larger the
+        ``jac_chunk_size``, the faster the calculation takes, at the cost of
+        requiring more memory. A ``jac_chunk_size`` of 1 corresponds to the least
+        memory intensive,  but slowest method of calculating the Jacobian.
+        If None, it will default to the largest possible
+        `jac_chunk_size` i.e. ``np.ceil(dim_x/4)``
 
 
     """
@@ -854,7 +854,7 @@ def __init__(
         deriv_mode="auto",
         grid=None,
         name="coil-coil minimum distance",
-        chunk_size=None,
+        jac_chunk_size=None,
     ):
         from desc.coils import CoilSet
 
@@ -876,7 +876,7 @@ def __init__(
             loss_function=loss_function,
             deriv_mode=deriv_mode,
             name=name,
-            chunk_size=chunk_size,
+            jac_chunk_size=jac_chunk_size,
         )
 
     def build(self, use_jit=True, verbose=1):
@@ -1011,18 +1011,18 @@ class PlasmaCoilSetMinDistance(_Objective):
         False by default, so that self.things = [coil, eq].
     name : str, optional
         Name of the objective function.
-    chunk_size : int, optional
-        If `"blocked"` deriv_mode is used in the ObjectiveFunction, will
-        calculate the Jacobian for this objective ``chunk_size`` columns at a time,
-        instead of all at once.  The memory usage of the Jacobian calculation is
-        linearly proportional to ``chunk_size``: the smaller the ``chunk_size``, the
-        less memory the Jacobian calculation will require (with some baseline memory
-        usage). The time to compute the Jacobian is roughly ``t ~1/chunk_size``
-        with some baseline time, so the larger the ``chunk_size``, the faster the
-        calculation takes, at the cost of requiring more memory. A ``chunk_size``
-        of 1 corresponds to the least memory intensive,  but slowest method of
-        calculating the Jacobian.
-        If None, it will default to the largest possible `chunk_size` i.e. ``dim_x``
+    jac_chunk_size : int, optional
+        Will calculate the Jacobian for this objective ``jac_chunk_size``
+        columns at a time, instead of all at once.  The memory usage of the
+        Jacobian calculation is roughly ``memory usage = m0 + m1*jac_chunk_size``:
+        the higher the chunk size, the less memory the Jacobian calculation
+        will require (with some baseline memory usage). The time to compute the
+        Jacobian is roughly ``t=t0 +t1/jac_chunk_size``, so the larger the
+        ``jac_chunk_size``, the faster the calculation takes, at the cost of
+        requiring more memory. A ``jac_chunk_size`` of 1 corresponds to the least
+        memory intensive,  but slowest method of calculating the Jacobian.
+        If None, it will default to the largest possible
+        `jac_chunk_size` i.e. ``np.ceil(dim_x/4)``
 
 
     """
@@ -1047,7 +1047,7 @@ def __init__(
         eq_fixed=False,
         coils_fixed=False,
         name="plasma-coil minimum distance",
-        chunk_size=None,
+        jac_chunk_size=None,
     ):
         if target is None and bounds is None:
             bounds = (1, np.inf)
@@ -1073,7 +1073,7 @@ def __init__(
             loss_function=loss_function,
             deriv_mode=deriv_mode,
             name=name,
-            chunk_size=chunk_size,
+            jac_chunk_size=jac_chunk_size,
         )
 
     def build(self, use_jit=True, verbose=1):
@@ -1256,18 +1256,18 @@ class QuadraticFlux(_Objective):
         plasma currents) is set to zero.
     name : str
         Name of the objective function.
-    chunk_size : int, optional
-        If `"blocked"` deriv_mode is used in the ObjectiveFunction, will
-        calculate the Jacobian for this objective ``chunk_size`` columns at a time,
-        instead of all at once.  The memory usage of the Jacobian calculation is
-        linearly proportional to ``chunk_size``: the smaller the ``chunk_size``, the
-        less memory the Jacobian calculation will require (with some baseline memory
-        usage). The time to compute the Jacobian is roughly ``t ~1/chunk_size``
-        with some baseline time, so the larger the ``chunk_size``, the faster the
-        calculation takes, at the cost of requiring more memory. A ``chunk_size``
-        of 1 corresponds to the least memory intensive,  but slowest method of
-        calculating the Jacobian.
-        If None, it will default to the largest possible `chunk_size` i.e. ``dim_x``
+    jac_chunk_size : int, optional
+        Will calculate the Jacobian for this objective ``jac_chunk_size``
+        columns at a time, instead of all at once.  The memory usage of the
+        Jacobian calculation is roughly ``memory usage = m0 + m1*jac_chunk_size``:
+        the higher the chunk size, the less memory the Jacobian calculation
+        will require (with some baseline memory usage). The time to compute the
+        Jacobian is roughly ``t=t0 +t1/jac_chunk_size``, so the larger the
+        ``jac_chunk_size``, the faster the calculation takes, at the cost of
+        requiring more memory. A ``jac_chunk_size`` of 1 corresponds to the least
+        memory intensive,  but slowest method of calculating the Jacobian.
+        If None, it will default to the largest possible
+        `jac_chunk_size` i.e. ``np.ceil(dim_x/4)``
 
     """
 
@@ -1291,7 +1291,7 @@ def __init__(
         field_grid=None,
         vacuum=False,
         name="Quadratic flux",
-        chunk_size=None,
+        jac_chunk_size=None,
     ):
         if target is None and bounds is None:
             target = 0
@@ -1310,7 +1310,7 @@ def __init__(
             normalize=normalize,
             normalize_target=normalize_target,
             name=name,
-            chunk_size=chunk_size,
+            jac_chunk_size=jac_chunk_size,
         )
 
     def build(self, use_jit=True, verbose=1):
@@ -1480,18 +1480,18 @@ class ToroidalFlux(_Objective):
         zeta=jnp.array(0.0), NFP=eq.NFP).
     name : str, optional
         Name of the objective function.
-    chunk_size : int, optional
-        If `"blocked"` deriv_mode is used in the ObjectiveFunction, will
-        calculate the Jacobian for this objective ``chunk_size`` columns at a time,
-        instead of all at once.  The memory usage of the Jacobian calculation is
-        linearly proportional to ``chunk_size``: the smaller the ``chunk_size``, the
-        less memory the Jacobian calculation will require (with some baseline memory
-        usage). The time to compute the Jacobian is roughly ``t ~1/chunk_size``
-        with some baseline time, so the larger the ``chunk_size``, the faster the
-        calculation takes, at the cost of requiring more memory. A ``chunk_size``
-        of 1 corresponds to the least memory intensive,  but slowest method of
-        calculating the Jacobian.
-        If None, it will default to the largest possible `chunk_size` i.e. ``dim_x``
+    jac_chunk_size : int, optional
+        Will calculate the Jacobian for this objective ``jac_chunk_size``
+        columns at a time, instead of all at once.  The memory usage of the
+        Jacobian calculation is roughly ``memory usage = m0 + m1*jac_chunk_size``:
+        the higher the chunk size, the less memory the Jacobian calculation
+        will require (with some baseline memory usage). The time to compute the
+        Jacobian is roughly ``t=t0 +t1/jac_chunk_size``, so the larger the
+        ``jac_chunk_size``, the faster the calculation takes, at the cost of
+        requiring more memory. A ``jac_chunk_size`` of 1 corresponds to the least
+        memory intensive,  but slowest method of calculating the Jacobian.
+        If None, it will default to the largest possible
+        `jac_chunk_size` i.e. ``np.ceil(dim_x/4)``
 
 
 
@@ -1515,7 +1515,7 @@ def __init__(
         field_grid=None,
         eval_grid=None,
         name="toroidal-flux",
-        chunk_size=None,
+        jac_chunk_size=None,
     ):
         if target is None and bounds is None:
             target = eq.Psi
@@ -1535,7 +1535,7 @@ def __init__(
             loss_function=loss_function,
             deriv_mode=deriv_mode,
             name=name,
-            chunk_size=chunk_size,
+            jac_chunk_size=jac_chunk_size,
         )
 
     def build(self, use_jit=True, verbose=1):
diff --git a/desc/objectives/_equilibrium.py b/desc/objectives/_equilibrium.py
index 352674d5e2..bb2c4dc7a7 100644
--- a/desc/objectives/_equilibrium.py
+++ b/desc/objectives/_equilibrium.py
@@ -63,18 +63,18 @@ class ForceBalance(_Objective):
         Defaults to ``ConcentricGrid(eq.L_grid, eq.M_grid, eq.N_grid)``
     name : str, optional
         Name of the objective function.
-    chunk_size : int, optional
-        If `"blocked"` deriv_mode is used in the ObjectiveFunction, will
-        calculate the Jacobian for this objective ``chunk_size`` columns at a time,
-        instead of all at once.  The memory usage of the Jacobian calculation is
-        linearly proportional to ``chunk_size``: the smaller the ``chunk_size``, the
-        less memory the Jacobian calculation will require (with some baseline memory
-        usage). The time to compute the Jacobian is roughly ``t ~1/chunk_size``
-        with some baseline time, so the larger the ``chunk_size``, the faster the
-        calculation takes, at the cost of requiring more memory. A ``chunk_size``
-        of 1 corresponds to the least memory intensive,  but slowest method of
-        calculating the Jacobian.
-        If None, it will default to the largest possible `chunk_size` i.e. ``dim_x``
+    jac_chunk_size : int, optional
+        Will calculate the Jacobian for this objective ``jac_chunk_size``
+        columns at a time, instead of all at once.  The memory usage of the
+        Jacobian calculation is roughly ``memory usage = m0 + m1*jac_chunk_size``:
+        the higher the chunk size, the less memory the Jacobian calculation
+        will require (with some baseline memory usage). The time to compute the
+        Jacobian is roughly ``t=t0 +t1/jac_chunk_size``, so the larger the
+        ``jac_chunk_size``, the faster the calculation takes, at the cost of
+        requiring more memory. A ``jac_chunk_size`` of 1 corresponds to the least
+        memory intensive,  but slowest method of calculating the Jacobian.
+        If None, it will default to the largest possible
+        `jac_chunk_size` i.e. ``np.ceil(dim_x/4)``
 
     """
 
@@ -95,7 +95,7 @@ def __init__(
         deriv_mode="auto",
         grid=None,
         name="force",
-        chunk_size=None,
+        jac_chunk_size=None,
     ):
         if target is None and bounds is None:
             target = 0
@@ -110,7 +110,7 @@ def __init__(
             loss_function=loss_function,
             deriv_mode=deriv_mode,
             name=name,
-            chunk_size=chunk_size,
+            jac_chunk_size=jac_chunk_size,
         )
 
     def build(self, use_jit=True, verbose=1):
@@ -249,18 +249,18 @@ class ForceBalanceAnisotropic(_Objective):
         Defaults to ``ConcentricGrid(eq.L_grid, eq.M_grid, eq.N_grid)``
     name : str
         Name of the objective function.
-    chunk_size : int, optional
-        If `"blocked"` deriv_mode is used in the ObjectiveFunction, will
-        calculate the Jacobian for this objective ``chunk_size`` columns at a time,
-        instead of all at once.  The memory usage of the Jacobian calculation is
-        linearly proportional to ``chunk_size``: the smaller the ``chunk_size``, the
-        less memory the Jacobian calculation will require (with some baseline memory
-        usage). The time to compute the Jacobian is roughly ``t ~1/chunk_size``
-        with some baseline time, so the larger the ``chunk_size``, the faster the
-        calculation takes, at the cost of requiring more memory. A ``chunk_size``
-        of 1 corresponds to the least memory intensive,  but slowest method of
-        calculating the Jacobian.
-        If None, it will default to the largest possible `chunk_size` i.e. ``dim_x``
+    jac_chunk_size : int, optional
+        Will calculate the Jacobian for this objective ``jac_chunk_size``
+        columns at a time, instead of all at once.  The memory usage of the
+        Jacobian calculation is roughly ``memory usage = m0 + m1*jac_chunk_size``:
+        the higher the chunk size, the less memory the Jacobian calculation
+        will require (with some baseline memory usage). The time to compute the
+        Jacobian is roughly ``t=t0 +t1/jac_chunk_size``, so the larger the
+        ``jac_chunk_size``, the faster the calculation takes, at the cost of
+        requiring more memory. A ``jac_chunk_size`` of 1 corresponds to the least
+        memory intensive,  but slowest method of calculating the Jacobian.
+        If None, it will default to the largest possible
+        `jac_chunk_size` i.e. ``np.ceil(dim_x/4)``
 
 
     """
@@ -282,7 +282,7 @@ def __init__(
         deriv_mode="auto",
         grid=None,
         name="force-anisotropic",
-        chunk_size=None,
+        jac_chunk_size=None,
     ):
         if target is None and bounds is None:
             target = 0
@@ -297,7 +297,7 @@ def __init__(
             loss_function=loss_function,
             deriv_mode=deriv_mode,
             name=name,
-            chunk_size=chunk_size,
+            jac_chunk_size=jac_chunk_size,
         )
 
     def build(self, use_jit=True, verbose=1):
@@ -422,18 +422,18 @@ class RadialForceBalance(_Objective):
         Defaults to ``ConcentricGrid(eq.L_grid, eq.M_grid, eq.N_grid)``
     name : str, optional
         Name of the objective function.
-    chunk_size : int, optional
-        If `"blocked"` deriv_mode is used in the ObjectiveFunction, will
-        calculate the Jacobian for this objective ``chunk_size`` columns at a time,
-        instead of all at once.  The memory usage of the Jacobian calculation is
-        linearly proportional to ``chunk_size``: the smaller the ``chunk_size``, the
-        less memory the Jacobian calculation will require (with some baseline memory
-        usage). The time to compute the Jacobian is roughly ``t ~1/chunk_size``
-        with some baseline time, so the larger the ``chunk_size``, the faster the
-        calculation takes, at the cost of requiring more memory. A ``chunk_size``
-        of 1 corresponds to the least memory intensive,  but slowest method of
-        calculating the Jacobian.
-        If None, it will default to the largest possible `chunk_size` i.e. ``dim_x``
+    jac_chunk_size : int, optional
+        Will calculate the Jacobian for this objective ``jac_chunk_size``
+        columns at a time, instead of all at once.  The memory usage of the
+        Jacobian calculation is roughly ``memory usage = m0 + m1*jac_chunk_size``:
+        the higher the chunk size, the less memory the Jacobian calculation
+        will require (with some baseline memory usage). The time to compute the
+        Jacobian is roughly ``t=t0 +t1/jac_chunk_size``, so the larger the
+        ``jac_chunk_size``, the faster the calculation takes, at the cost of
+        requiring more memory. A ``jac_chunk_size`` of 1 corresponds to the least
+        memory intensive,  but slowest method of calculating the Jacobian.
+        If None, it will default to the largest possible
+        `jac_chunk_size` i.e. ``np.ceil(dim_x/4)``
 
 
     """
@@ -455,7 +455,7 @@ def __init__(
         deriv_mode="auto",
         grid=None,
         name="radial force",
-        chunk_size=None,
+        jac_chunk_size=None,
     ):
         if target is None and bounds is None:
             target = 0
@@ -470,7 +470,7 @@ def __init__(
             loss_function=loss_function,
             deriv_mode=deriv_mode,
             name=name,
-            chunk_size=chunk_size,
+            jac_chunk_size=jac_chunk_size,
         )
 
     def build(self, use_jit=True, verbose=1):
@@ -595,18 +595,18 @@ class HelicalForceBalance(_Objective):
         Defaults to ``ConcentricGrid(eq.L_grid, eq.M_grid, eq.N_grid)``
     name : str, optional
         Name of the objective function.
-    chunk_size : int, optional
-        If `"blocked"` deriv_mode is used in the ObjectiveFunction, will
-        calculate the Jacobian for this objective ``chunk_size`` columns at a time,
-        instead of all at once.  The memory usage of the Jacobian calculation is
-        linearly proportional to ``chunk_size``: the smaller the ``chunk_size``, the
-        less memory the Jacobian calculation will require (with some baseline memory
-        usage). The time to compute the Jacobian is roughly ``t ~1/chunk_size``
-        with some baseline time, so the larger the ``chunk_size``, the faster the
-        calculation takes, at the cost of requiring more memory. A ``chunk_size``
-        of 1 corresponds to the least memory intensive,  but slowest method of
-        calculating the Jacobian.
-        If None, it will default to the largest possible `chunk_size` i.e. ``dim_x``
+    jac_chunk_size : int, optional
+        Will calculate the Jacobian for this objective ``jac_chunk_size``
+        columns at a time, instead of all at once.  The memory usage of the
+        Jacobian calculation is roughly ``memory usage = m0 + m1*jac_chunk_size``:
+        the higher the chunk size, the less memory the Jacobian calculation
+        will require (with some baseline memory usage). The time to compute the
+        Jacobian is roughly ``t=t0 +t1/jac_chunk_size``, so the larger the
+        ``jac_chunk_size``, the faster the calculation takes, at the cost of
+        requiring more memory. A ``jac_chunk_size`` of 1 corresponds to the least
+        memory intensive,  but slowest method of calculating the Jacobian.
+        If None, it will default to the largest possible
+        `jac_chunk_size` i.e. ``np.ceil(dim_x/4)``
 
 
     """
@@ -628,7 +628,7 @@ def __init__(
         deriv_mode="auto",
         grid=None,
         name="helical force",
-        chunk_size=None,
+        jac_chunk_size=None,
     ):
         if target is None and bounds is None:
             target = 0
@@ -643,7 +643,7 @@ def __init__(
             loss_function=loss_function,
             deriv_mode=deriv_mode,
             name=name,
-            chunk_size=chunk_size,
+            jac_chunk_size=jac_chunk_size,
         )
 
     def build(self, use_jit=True, verbose=1):
@@ -766,18 +766,18 @@ class Energy(_Objective):
         Adiabatic (compressional) index. Default = 0.
     name : str, optional
         Name of the objective function.
-    chunk_size : int, optional
-        If `"blocked"` deriv_mode is used in the ObjectiveFunction, will
-        calculate the Jacobian for this objective ``chunk_size`` columns at a time,
-        instead of all at once.  The memory usage of the Jacobian calculation is
-        linearly proportional to ``chunk_size``: the smaller the ``chunk_size``, the
-        less memory the Jacobian calculation will require (with some baseline memory
-        usage). The time to compute the Jacobian is roughly ``t ~1/chunk_size``
-        with some baseline time, so the larger the ``chunk_size``, the faster the
-        calculation takes, at the cost of requiring more memory. A ``chunk_size``
-        of 1 corresponds to the least memory intensive,  but slowest method of
-        calculating the Jacobian.
-        If None, it will default to the largest possible `chunk_size` i.e. ``dim_x``
+    jac_chunk_size : int, optional
+        Will calculate the Jacobian for this objective ``jac_chunk_size``
+        columns at a time, instead of all at once.  The memory usage of the
+        Jacobian calculation is roughly ``memory usage = m0 + m1*jac_chunk_size``:
+        the higher the chunk size, the less memory the Jacobian calculation
+        will require (with some baseline memory usage). The time to compute the
+        Jacobian is roughly ``t=t0 +t1/jac_chunk_size``, so the larger the
+        ``jac_chunk_size``, the faster the calculation takes, at the cost of
+        requiring more memory. A ``jac_chunk_size`` of 1 corresponds to the least
+        memory intensive,  but slowest method of calculating the Jacobian.
+        If None, it will default to the largest possible
+        `jac_chunk_size` i.e. ``np.ceil(dim_x/4)``
 
 
     """
@@ -802,7 +802,7 @@ def __init__(
         grid=None,
         gamma=0,
         name="energy",
-        chunk_size=None,
+        jac_chunk_size=None,
     ):
         if target is None and bounds is None:
             target = 0
@@ -818,7 +818,7 @@ def __init__(
             loss_function=loss_function,
             deriv_mode=deriv_mode,
             name=name,
-            chunk_size=chunk_size,
+            jac_chunk_size=jac_chunk_size,
         )
 
     def build(self, use_jit=True, verbose=1):
@@ -948,18 +948,18 @@ class CurrentDensity(_Objective):
         Defaults to ``ConcentricGrid(eq.L_grid, eq.M_grid, eq.N_grid)``
     name : str, optional
         Name of the objective function.
-    chunk_size : int, optional
-        If `"blocked"` deriv_mode is used in the ObjectiveFunction, will
-        calculate the Jacobian for this objective ``chunk_size`` columns at a time,
-        instead of all at once.  The memory usage of the Jacobian calculation is
-        linearly proportional to ``chunk_size``: the smaller the ``chunk_size``, the
-        less memory the Jacobian calculation will require (with some baseline memory
-        usage). The time to compute the Jacobian is roughly ``t ~1/chunk_size``
-        with some baseline time, so the larger the ``chunk_size``, the faster the
-        calculation takes, at the cost of requiring more memory. A ``chunk_size``
-        of 1 corresponds to the least memory intensive,  but slowest method of
-        calculating the Jacobian.
-        If None, it will default to the largest possible `chunk_size` i.e. ``dim_x``
+    jac_chunk_size : int, optional
+        Will calculate the Jacobian for this objective ``jac_chunk_size``
+        columns at a time, instead of all at once.  The memory usage of the
+        Jacobian calculation is roughly ``memory usage = m0 + m1*jac_chunk_size``:
+        the higher the chunk size, the less memory the Jacobian calculation
+        will require (with some baseline memory usage). The time to compute the
+        Jacobian is roughly ``t=t0 +t1/jac_chunk_size``, so the larger the
+        ``jac_chunk_size``, the faster the calculation takes, at the cost of
+        requiring more memory. A ``jac_chunk_size`` of 1 corresponds to the least
+        memory intensive,  but slowest method of calculating the Jacobian.
+        If None, it will default to the largest possible
+        `jac_chunk_size` i.e. ``np.ceil(dim_x/4)``
 
 
     """
@@ -981,7 +981,7 @@ def __init__(
         deriv_mode="auto",
         grid=None,
         name="current density",
-        chunk_size=None,
+        jac_chunk_size=None,
     ):
         if target is None and bounds is None:
             target = 0
@@ -996,7 +996,7 @@ def __init__(
             loss_function=loss_function,
             deriv_mode=deriv_mode,
             name=name,
-            chunk_size=chunk_size,
+            jac_chunk_size=jac_chunk_size,
         )
 
     def build(self, use_jit=True, verbose=1):
diff --git a/desc/objectives/_free_boundary.py b/desc/objectives/_free_boundary.py
index fb88588121..2a923f03ea 100644
--- a/desc/objectives/_free_boundary.py
+++ b/desc/objectives/_free_boundary.py
@@ -73,18 +73,18 @@ class VacuumBoundaryError(_Objective):
         be fixed. For single stage optimization, should be False (default).
     name : str, optional
         Name of the objective function.
-    chunk_size : int, optional
-        If `"blocked"` deriv_mode is used in the ObjectiveFunction, will
-        calculate the Jacobian for this objective ``chunk_size`` columns at a time,
-        instead of all at once.  The memory usage of the Jacobian calculation is
-        linearly proportional to ``chunk_size``: the smaller the ``chunk_size``, the
-        less memory the Jacobian calculation will require (with some baseline memory
-        usage). The time to compute the Jacobian is roughly ``t ~1/chunk_size``
-        with some baseline time, so the larger the ``chunk_size``, the faster the
-        calculation takes, at the cost of requiring more memory. A ``chunk_size``
-        of 1 corresponds to the least memory intensive,  but slowest method of
-        calculating the Jacobian.
-        If None, it will default to the largest possible `chunk_size` i.e. ``dim_x``
+    jac_chunk_size : int, optional
+        Will calculate the Jacobian for this objective ``jac_chunk_size``
+        columns at a time, instead of all at once.  The memory usage of the
+        Jacobian calculation is roughly ``memory usage = m0 + m1*jac_chunk_size``:
+        the higher the chunk size, the less memory the Jacobian calculation
+        will require (with some baseline memory usage). The time to compute the
+        Jacobian is roughly ``t=t0 +t1/jac_chunk_size``, so the larger the
+        ``jac_chunk_size``, the faster the calculation takes, at the cost of
+        requiring more memory. A ``jac_chunk_size`` of 1 corresponds to the least
+        memory intensive,  but slowest method of calculating the Jacobian.
+        If None, it will default to the largest possible
+        `jac_chunk_size` i.e. ``np.ceil(dim_x/4)``
 
 
     """
@@ -110,7 +110,7 @@ def __init__(
         field_grid=None,
         field_fixed=False,
         name="Vacuum boundary error",
-        chunk_size=None,
+        jac_chunk_size=None,
     ):
         if target is None and bounds is None:
             target = 0
@@ -133,7 +133,7 @@ def __init__(
             loss_function=loss_function,
             deriv_mode=deriv_mode,
             name=name,
-            chunk_size=chunk_size,
+            jac_chunk_size=jac_chunk_size,
         )
 
     def build(self, use_jit=True, verbose=1):
@@ -425,18 +425,18 @@ class BoundaryError(_Objective):
         less memory.
     name : str, optional
         Name of the objective function.
-    chunk_size : int, optional
-        If `"blocked"` deriv_mode is used in the ObjectiveFunction, will
-        calculate the Jacobian for this objective ``chunk_size`` columns at a time,
-        instead of all at once.  The memory usage of the Jacobian calculation is
-        linearly proportional to ``chunk_size``: the smaller the ``chunk_size``, the
-        less memory the Jacobian calculation will require (with some baseline memory
-        usage). The time to compute the Jacobian is roughly ``t ~1/chunk_size``
-        with some baseline time, so the larger the ``chunk_size``, the faster the
-        calculation takes, at the cost of requiring more memory. A ``chunk_size``
-        of 1 corresponds to the least memory intensive,  but slowest method of
-        calculating the Jacobian.
-        If None, it will default to the largest possible `chunk_size` i.e. ``dim_x``
+    jac_chunk_size : int, optional
+        Will calculate the Jacobian for this objective ``jac_chunk_size``
+        columns at a time, instead of all at once.  The memory usage of the
+        Jacobian calculation is roughly ``memory usage = m0 + m1*jac_chunk_size``:
+        the higher the chunk size, the less memory the Jacobian calculation
+        will require (with some baseline memory usage). The time to compute the
+        Jacobian is roughly ``t=t0 +t1/jac_chunk_size``, so the larger the
+        ``jac_chunk_size``, the faster the calculation takes, at the cost of
+        requiring more memory. A ``jac_chunk_size`` of 1 corresponds to the least
+        memory intensive,  but slowest method of calculating the Jacobian.
+        If None, it will default to the largest possible
+        `jac_chunk_size` i.e. ``np.ceil(dim_x/4)``
 
 
 
@@ -483,7 +483,7 @@ def __init__(
         field_fixed=False,
         loop=True,
         name="Boundary error",
-        chunk_size=None,
+        jac_chunk_size=None,
     ):
         if target is None and bounds is None:
             target = 0
@@ -510,7 +510,7 @@ def __init__(
             loss_function=loss_function,
             deriv_mode=deriv_mode,
             name=name,
-            chunk_size=chunk_size,
+            jac_chunk_size=jac_chunk_size,
         )
 
     def build(self, use_jit=True, verbose=1):
@@ -897,18 +897,18 @@ class BoundaryErrorNESTOR(_Objective):
         reverse mode and forward over reverse mode respectively.
     name : str, optional
         Name of the objective function.
-    chunk_size : int, optional
-        If `"blocked"` deriv_mode is used in the ObjectiveFunction, will
-        calculate the Jacobian for this objective ``chunk_size`` columns at a time,
-        instead of all at once.  The memory usage of the Jacobian calculation is
-        linearly proportional to ``chunk_size``: the smaller the ``chunk_size``, the
-        less memory the Jacobian calculation will require (with some baseline memory
-        usage). The time to compute the Jacobian is roughly ``t ~1/chunk_size``
-        with some baseline time, so the larger the ``chunk_size``, the faster the
-        calculation takes, at the cost of requiring more memory. A ``chunk_size``
-        of 1 corresponds to the least memory intensive,  but slowest method of
-        calculating the Jacobian.
-        If None, it will default to the largest possible `chunk_size` i.e. ``dim_x``
+    jac_chunk_size : int, optional
+        Will calculate the Jacobian for this objective ``jac_chunk_size``
+        columns at a time, instead of all at once.  The memory usage of the
+        Jacobian calculation is roughly ``memory usage = m0 + m1*jac_chunk_size``:
+        the higher the chunk size, the less memory the Jacobian calculation
+        will require (with some baseline memory usage). The time to compute the
+        Jacobian is roughly ``t=t0 +t1/jac_chunk_size``, so the larger the
+        ``jac_chunk_size``, the faster the calculation takes, at the cost of
+        requiring more memory. A ``jac_chunk_size`` of 1 corresponds to the least
+        memory intensive,  but slowest method of calculating the Jacobian.
+        If None, it will default to the largest possible
+        `jac_chunk_size` i.e. ``np.ceil(dim_x/4)``
 
 
     """
@@ -936,7 +936,7 @@ def __init__(
         loss_function=None,
         deriv_mode="auto",
         name="NESTOR Boundary",
-        chunk_size=None,
+        jac_chunk_size=None,
     ):
         if target is None and bounds is None:
             target = 0
@@ -956,7 +956,7 @@ def __init__(
             loss_function=loss_function,
             deriv_mode=deriv_mode,
             name=name,
-            chunk_size=chunk_size,
+            jac_chunk_size=jac_chunk_size,
         )
 
     def build(self, use_jit=True, verbose=1):
diff --git a/desc/objectives/_generic.py b/desc/objectives/_generic.py
index 6147ccf04f..9dc7b05f6f 100644
--- a/desc/objectives/_generic.py
+++ b/desc/objectives/_generic.py
@@ -57,18 +57,18 @@ class GenericObjective(_Objective):
         ``QuadratureGrid(eq.L_grid, eq.M_grid, eq.N_grid)`` if thing is an Equilibrium.
     name : str, optional
         Name of the objective function.
-    chunk_size : int, optional
-        If `"blocked"` deriv_mode is used in the ObjectiveFunction, will
-        calculate the Jacobian for this objective ``chunk_size`` columns at a time,
-        instead of all at once.  The memory usage of the Jacobian calculation is
-        linearly proportional to ``chunk_size``: the smaller the ``chunk_size``, the
-        less memory the Jacobian calculation will require (with some baseline memory
-        usage). The time to compute the Jacobian is roughly ``t ~1/chunk_size``
-        with some baseline time, so the larger the ``chunk_size``, the faster the
-        calculation takes, at the cost of requiring more memory. A ``chunk_size``
-        of 1 corresponds to the least memory intensive,  but slowest method of
-        calculating the Jacobian.
-        If None, it will default to the largest possible `chunk_size` i.e. ``dim_x``
+    jac_chunk_size : int, optional
+        Will calculate the Jacobian for this objective ``jac_chunk_size``
+        columns at a time, instead of all at once.  The memory usage of the
+        Jacobian calculation is roughly ``memory usage = m0 + m1*jac_chunk_size``:
+        the higher the chunk size, the less memory the Jacobian calculation
+        will require (with some baseline memory usage). The time to compute the
+        Jacobian is roughly ``t=t0 +t1/jac_chunk_size``, so the larger the
+        ``jac_chunk_size``, the faster the calculation takes, at the cost of
+        requiring more memory. A ``jac_chunk_size`` of 1 corresponds to the least
+        memory intensive,  but slowest method of calculating the Jacobian.
+        If None, it will default to the largest possible
+        `jac_chunk_size` i.e. ``np.ceil(dim_x/4)``
 
 
     """
@@ -88,7 +88,7 @@ def __init__(
         deriv_mode="auto",
         grid=None,
         name="generic",
-        chunk_size=None,
+        jac_chunk_size=None,
         **kwargs,
     ):
         errorif(
@@ -111,7 +111,7 @@ def __init__(
             loss_function=loss_function,
             deriv_mode=deriv_mode,
             name=name,
-            chunk_size=chunk_size,
+            jac_chunk_size=jac_chunk_size,
         )
         self._p = _parse_parameterization(thing)
         self._scalar = not bool(data_index[self._p][self.f]["dim"])
@@ -240,7 +240,7 @@ def __init__(
         normalize=False,
         normalize_target=False,
         name="custom linear",
-        chunk_size=None,
+        jac_chunk_size=None,
     ):
         if target is None and bounds is None:
             target = 0
@@ -253,7 +253,7 @@ def __init__(
             normalize=normalize,
             normalize_target=normalize_target,
             name=name,
-            chunk_size=chunk_size,
+            jac_chunk_size=jac_chunk_size,
         )
 
     def build(self, use_jit=False, verbose=1):
@@ -359,18 +359,18 @@ class ObjectiveFromUser(_Objective):
         ``QuadratureGrid(eq.L_grid, eq.M_grid, eq.N_grid)`` if thing is an Equilibrium.
     name : str, optional
         Name of the objective function.
-    chunk_size : int, optional
-        If `"blocked"` deriv_mode is used in the ObjectiveFunction, will
-        calculate the Jacobian for this objective ``chunk_size`` columns at a time,
-        instead of all at once.  The memory usage of the Jacobian calculation is
-        linearly proportional to ``chunk_size``: the smaller the ``chunk_size``, the
-        less memory the Jacobian calculation will require (with some baseline memory
-        usage). The time to compute the Jacobian is roughly ``t ~1/chunk_size``
-        with some baseline time, so the larger the ``chunk_size``, the faster the
-        calculation takes, at the cost of requiring more memory. A ``chunk_size``
-        of 1 corresponds to the least memory intensive,  but slowest method of
-        calculating the Jacobian.
-        If None, it will default to the largest possible `chunk_size` i.e. ``dim_x``
+    jac_chunk_size : int, optional
+        Will calculate the Jacobian for this objective ``jac_chunk_size``
+        columns at a time, instead of all at once.  The memory usage of the
+        Jacobian calculation is roughly ``memory usage = m0 + m1*jac_chunk_size``:
+        the higher the chunk size, the less memory the Jacobian calculation
+        will require (with some baseline memory usage). The time to compute the
+        Jacobian is roughly ``t=t0 +t1/jac_chunk_size``, so the larger the
+        ``jac_chunk_size``, the faster the calculation takes, at the cost of
+        requiring more memory. A ``jac_chunk_size`` of 1 corresponds to the least
+        memory intensive,  but slowest method of calculating the Jacobian.
+        If None, it will default to the largest possible
+        `jac_chunk_size` i.e. ``np.ceil(dim_x/4)``
 
 
     Examples
@@ -408,7 +408,7 @@ def __init__(
         deriv_mode="auto",
         grid=None,
         name="custom",
-        chunk_size=None,
+        jac_chunk_size=None,
         **kwargs,
     ):
         errorif(
@@ -431,7 +431,7 @@ def __init__(
             loss_function=loss_function,
             deriv_mode=deriv_mode,
             name=name,
-            chunk_size=chunk_size,
+            jac_chunk_size=jac_chunk_size,
         )
         self._p = _parse_parameterization(thing)
 
diff --git a/desc/objectives/_geometry.py b/desc/objectives/_geometry.py
index 7e63c67b82..8eb7d9b8eb 100644
--- a/desc/objectives/_geometry.py
+++ b/desc/objectives/_geometry.py
@@ -53,18 +53,18 @@ class AspectRatio(_Objective):
         or ``LinearGrid(M=2*eq.M, N=2*eq.N)`` for ``FourierRZToroidalSurface``.
     name : str, optional
         Name of the objective function.
-    chunk_size : int, optional
-        If `"blocked"` deriv_mode is used in the ObjectiveFunction, will
-        calculate the Jacobian for this objective ``chunk_size`` columns at a time,
-        instead of all at once.  The memory usage of the Jacobian calculation is
-        linearly proportional to ``chunk_size``: the smaller the ``chunk_size``, the
-        less memory the Jacobian calculation will require (with some baseline memory
-        usage). The time to compute the Jacobian is roughly ``t ~1/chunk_size``
-        with some baseline time, so the larger the ``chunk_size``, the faster the
-        calculation takes, at the cost of requiring more memory. A ``chunk_size``
-        of 1 corresponds to the least memory intensive,  but slowest method of
-        calculating the Jacobian.
-        If None, it will default to the largest possible `chunk_size` i.e. ``dim_x``
+    jac_chunk_size : int, optional
+        Will calculate the Jacobian for this objective ``jac_chunk_size``
+        columns at a time, instead of all at once.  The memory usage of the
+        Jacobian calculation is roughly ``memory usage = m0 + m1*jac_chunk_size``:
+        the higher the chunk size, the less memory the Jacobian calculation
+        will require (with some baseline memory usage). The time to compute the
+        Jacobian is roughly ``t=t0 +t1/jac_chunk_size``, so the larger the
+        ``jac_chunk_size``, the faster the calculation takes, at the cost of
+        requiring more memory. A ``jac_chunk_size`` of 1 corresponds to the least
+        memory intensive,  but slowest method of calculating the Jacobian.
+        If None, it will default to the largest possible
+        `jac_chunk_size` i.e. ``np.ceil(dim_x/4)``
 
 
     """
@@ -85,7 +85,7 @@ def __init__(
         deriv_mode="auto",
         grid=None,
         name="aspect ratio",
-        chunk_size=None,
+        jac_chunk_size=None,
     ):
         if target is None and bounds is None:
             target = 2
@@ -100,7 +100,7 @@ def __init__(
             loss_function=loss_function,
             deriv_mode=deriv_mode,
             name=name,
-            chunk_size=chunk_size,
+            jac_chunk_size=jac_chunk_size,
         )
 
     def build(self, use_jit=True, verbose=1):
@@ -230,18 +230,18 @@ class Elongation(_Objective):
         or ``LinearGrid(M=2*eq.M, N=2*eq.N)`` for ``FourierRZToroidalSurface``.
     name : str, optional
         Name of the objective function.
-    chunk_size : int, optional
-        If `"blocked"` deriv_mode is used in the ObjectiveFunction, will
-        calculate the Jacobian for this objective ``chunk_size`` columns at a time,
-        instead of all at once.  The memory usage of the Jacobian calculation is
-        linearly proportional to ``chunk_size``: the smaller the ``chunk_size``, the
-        less memory the Jacobian calculation will require (with some baseline memory
-        usage). The time to compute the Jacobian is roughly ``t ~1/chunk_size``
-        with some baseline time, so the larger the ``chunk_size``, the faster the
-        calculation takes, at the cost of requiring more memory. A ``chunk_size``
-        of 1 corresponds to the least memory intensive,  but slowest method of
-        calculating the Jacobian.
-        If None, it will default to the largest possible `chunk_size` i.e. ``dim_x``
+    jac_chunk_size : int, optional
+        Will calculate the Jacobian for this objective ``jac_chunk_size``
+        columns at a time, instead of all at once.  The memory usage of the
+        Jacobian calculation is roughly ``memory usage = m0 + m1*jac_chunk_size``:
+        the higher the chunk size, the less memory the Jacobian calculation
+        will require (with some baseline memory usage). The time to compute the
+        Jacobian is roughly ``t=t0 +t1/jac_chunk_size``, so the larger the
+        ``jac_chunk_size``, the faster the calculation takes, at the cost of
+        requiring more memory. A ``jac_chunk_size`` of 1 corresponds to the least
+        memory intensive,  but slowest method of calculating the Jacobian.
+        If None, it will default to the largest possible
+        `jac_chunk_size` i.e. ``np.ceil(dim_x/4)``
 
 
     """
@@ -262,7 +262,7 @@ def __init__(
         deriv_mode="auto",
         grid=None,
         name="elongation",
-        chunk_size=None,
+        jac_chunk_size=None,
     ):
         if target is None and bounds is None:
             target = 1
@@ -277,7 +277,7 @@ def __init__(
             loss_function=loss_function,
             deriv_mode=deriv_mode,
             name=name,
-            chunk_size=chunk_size,
+            jac_chunk_size=jac_chunk_size,
         )
 
     def build(self, use_jit=True, verbose=1):
@@ -406,18 +406,18 @@ class Volume(_Objective):
         or ``LinearGrid(M=2*eq.M, N=2*eq.N)`` for ``FourierRZToroidalSurface``.
     name : str, optional
         Name of the objective function.
-    chunk_size : int, optional
-        If `"blocked"` deriv_mode is used in the ObjectiveFunction, will
-        calculate the Jacobian for this objective ``chunk_size`` columns at a time,
-        instead of all at once.  The memory usage of the Jacobian calculation is
-        linearly proportional to ``chunk_size``: the smaller the ``chunk_size``, the
-        less memory the Jacobian calculation will require (with some baseline memory
-        usage). The time to compute the Jacobian is roughly ``t ~1/chunk_size``
-        with some baseline time, so the larger the ``chunk_size``, the faster the
-        calculation takes, at the cost of requiring more memory. A ``chunk_size``
-        of 1 corresponds to the least memory intensive,  but slowest method of
-        calculating the Jacobian.
-        If None, it will default to the largest possible `chunk_size` i.e. ``dim_x``
+    jac_chunk_size : int, optional
+        Will calculate the Jacobian for this objective ``jac_chunk_size``
+        columns at a time, instead of all at once.  The memory usage of the
+        Jacobian calculation is roughly ``memory usage = m0 + m1*jac_chunk_size``:
+        the higher the chunk size, the less memory the Jacobian calculation
+        will require (with some baseline memory usage). The time to compute the
+        Jacobian is roughly ``t=t0 +t1/jac_chunk_size``, so the larger the
+        ``jac_chunk_size``, the faster the calculation takes, at the cost of
+        requiring more memory. A ``jac_chunk_size`` of 1 corresponds to the least
+        memory intensive,  but slowest method of calculating the Jacobian.
+        If None, it will default to the largest possible
+        `jac_chunk_size` i.e. ``np.ceil(dim_x/4)``
 
 
     """
@@ -438,7 +438,7 @@ def __init__(
         deriv_mode="auto",
         grid=None,
         name="volume",
-        chunk_size=None,
+        jac_chunk_size=None,
     ):
         if target is None and bounds is None:
             target = 1
@@ -453,7 +453,7 @@ def __init__(
             loss_function=loss_function,
             deriv_mode=deriv_mode,
             name=name,
-            chunk_size=chunk_size,
+            jac_chunk_size=jac_chunk_size,
         )
 
     def build(self, use_jit=True, verbose=1):
@@ -631,18 +631,18 @@ class PlasmaVesselDistance(_Objective):
         more accurate approximation of the true min.
     name : str, optional
         Name of the objective function.
-    chunk_size : int, optional
-        If `"blocked"` deriv_mode is used in the ObjectiveFunction, will
-        calculate the Jacobian for this objective ``chunk_size`` columns at a time,
-        instead of all at once.  The memory usage of the Jacobian calculation is
-        linearly proportional to ``chunk_size``: the smaller the ``chunk_size``, the
-        less memory the Jacobian calculation will require (with some baseline memory
-        usage). The time to compute the Jacobian is roughly ``t ~1/chunk_size``
-        with some baseline time, so the larger the ``chunk_size``, the faster the
-        calculation takes, at the cost of requiring more memory. A ``chunk_size``
-        of 1 corresponds to the least memory intensive,  but slowest method of
-        calculating the Jacobian.
-        If None, it will default to the largest possible `chunk_size` i.e. ``dim_x``
+    jac_chunk_size : int, optional
+        Will calculate the Jacobian for this objective ``jac_chunk_size``
+        columns at a time, instead of all at once.  The memory usage of the
+        Jacobian calculation is roughly ``memory usage = m0 + m1*jac_chunk_size``:
+        the higher the chunk size, the less memory the Jacobian calculation
+        will require (with some baseline memory usage). The time to compute the
+        Jacobian is roughly ``t=t0 +t1/jac_chunk_size``, so the larger the
+        ``jac_chunk_size``, the faster the calculation takes, at the cost of
+        requiring more memory. A ``jac_chunk_size`` of 1 corresponds to the least
+        memory intensive,  but slowest method of calculating the Jacobian.
+        If None, it will default to the largest possible
+        `jac_chunk_size` i.e. ``np.ceil(dim_x/4)``
 
     """
 
@@ -669,7 +669,7 @@ def __init__(
         softmin_alpha=1.0,
         name="plasma-vessel distance",
         use_signed_distance=False,
-        chunk_size=None,
+        jac_chunk_size=None,
         **kwargs,
     ):
         if target is None and bounds is None:
@@ -709,7 +709,7 @@ def __init__(
             loss_function=loss_function,
             deriv_mode=deriv_mode,
             name=name,
-            chunk_size=chunk_size,
+            jac_chunk_size=jac_chunk_size,
         )
 
     def build(self, use_jit=True, verbose=1):
@@ -984,18 +984,18 @@ class MeanCurvature(_Objective):
         or ``LinearGrid(M=2*eq.M, N=2*eq.N)`` for ``FourierRZToroidalSurface``.
     name : str, optional
         Name of the objective function.
-    chunk_size : int, optional
-        If `"blocked"` deriv_mode is used in the ObjectiveFunction, will
-        calculate the Jacobian for this objective ``chunk_size`` columns at a time,
-        instead of all at once.  The memory usage of the Jacobian calculation is
-        linearly proportional to ``chunk_size``: the smaller the ``chunk_size``, the
-        less memory the Jacobian calculation will require (with some baseline memory
-        usage). The time to compute the Jacobian is roughly ``t ~1/chunk_size``
-        with some baseline time, so the larger the ``chunk_size``, the faster the
-        calculation takes, at the cost of requiring more memory. A ``chunk_size``
-        of 1 corresponds to the least memory intensive,  but slowest method of
-        calculating the Jacobian.
-        If None, it will default to the largest possible `chunk_size` i.e. ``dim_x``
+    jac_chunk_size : int, optional
+        Will calculate the Jacobian for this objective ``jac_chunk_size``
+        columns at a time, instead of all at once.  The memory usage of the
+        Jacobian calculation is roughly ``memory usage = m0 + m1*jac_chunk_size``:
+        the higher the chunk size, the less memory the Jacobian calculation
+        will require (with some baseline memory usage). The time to compute the
+        Jacobian is roughly ``t=t0 +t1/jac_chunk_size``, so the larger the
+        ``jac_chunk_size``, the faster the calculation takes, at the cost of
+        requiring more memory. A ``jac_chunk_size`` of 1 corresponds to the least
+        memory intensive,  but slowest method of calculating the Jacobian.
+        If None, it will default to the largest possible
+        `jac_chunk_size` i.e. ``np.ceil(dim_x/4)``
 
 
     """
@@ -1016,7 +1016,7 @@ def __init__(
         deriv_mode="auto",
         grid=None,
         name="mean curvature",
-        chunk_size=None,
+        jac_chunk_size=None,
     ):
         if target is None and bounds is None:
             bounds = (-np.inf, 0)
@@ -1031,7 +1031,7 @@ def __init__(
             loss_function=loss_function,
             deriv_mode=deriv_mode,
             name=name,
-            chunk_size=chunk_size,
+            jac_chunk_size=jac_chunk_size,
         )
 
     def build(self, use_jit=True, verbose=1):
@@ -1159,18 +1159,18 @@ class PrincipalCurvature(_Objective):
         or ``LinearGrid(M=2*eq.M, N=2*eq.N)`` for ``FourierRZToroidalSurface``.
     name : str, optional
         Name of the objective function.
-    chunk_size : int, optional
-        If `"blocked"` deriv_mode is used in the ObjectiveFunction, will
-        calculate the Jacobian for this objective ``chunk_size`` columns at a time,
-        instead of all at once.  The memory usage of the Jacobian calculation is
-        linearly proportional to ``chunk_size``: the smaller the ``chunk_size``, the
-        less memory the Jacobian calculation will require (with some baseline memory
-        usage). The time to compute the Jacobian is roughly ``t ~1/chunk_size``
-        with some baseline time, so the larger the ``chunk_size``, the faster the
-        calculation takes, at the cost of requiring more memory. A ``chunk_size``
-        of 1 corresponds to the least memory intensive,  but slowest method of
-        calculating the Jacobian.
-        If None, it will default to the largest possible `chunk_size` i.e. ``dim_x``
+    jac_chunk_size : int, optional
+        Will calculate the Jacobian for this objective ``jac_chunk_size``
+        columns at a time, instead of all at once.  The memory usage of the
+        Jacobian calculation is roughly ``memory usage = m0 + m1*jac_chunk_size``:
+        the higher the chunk size, the less memory the Jacobian calculation
+        will require (with some baseline memory usage). The time to compute the
+        Jacobian is roughly ``t=t0 +t1/jac_chunk_size``, so the larger the
+        ``jac_chunk_size``, the faster the calculation takes, at the cost of
+        requiring more memory. A ``jac_chunk_size`` of 1 corresponds to the least
+        memory intensive,  but slowest method of calculating the Jacobian.
+        If None, it will default to the largest possible
+        `jac_chunk_size` i.e. ``np.ceil(dim_x/4)``
 
 
     """
@@ -1191,7 +1191,7 @@ def __init__(
         deriv_mode="auto",
         grid=None,
         name="principal-curvature",
-        chunk_size=None,
+        jac_chunk_size=None,
     ):
         if target is None and bounds is None:
             target = 1
@@ -1206,7 +1206,7 @@ def __init__(
             loss_function=loss_function,
             deriv_mode=deriv_mode,
             name=name,
-            chunk_size=chunk_size,
+            jac_chunk_size=jac_chunk_size,
         )
 
     def build(self, use_jit=True, verbose=1):
@@ -1329,18 +1329,18 @@ class BScaleLength(_Objective):
         ``LinearGrid(M=eq.M_grid, N=eq.N_grid)``.
     name : str, optional
         Name of the objective function.
-    chunk_size : int, optional
-        If `"blocked"` deriv_mode is used in the ObjectiveFunction, will
-        calculate the Jacobian for this objective ``chunk_size`` columns at a time,
-        instead of all at once.  The memory usage of the Jacobian calculation is
-        linearly proportional to ``chunk_size``: the smaller the ``chunk_size``, the
-        less memory the Jacobian calculation will require (with some baseline memory
-        usage). The time to compute the Jacobian is roughly ``t ~1/chunk_size``
-        with some baseline time, so the larger the ``chunk_size``, the faster the
-        calculation takes, at the cost of requiring more memory. A ``chunk_size``
-        of 1 corresponds to the least memory intensive,  but slowest method of
-        calculating the Jacobian.
-        If None, it will default to the largest possible `chunk_size` i.e. ``dim_x``
+    jac_chunk_size : int, optional
+        Will calculate the Jacobian for this objective ``jac_chunk_size``
+        columns at a time, instead of all at once.  The memory usage of the
+        Jacobian calculation is roughly ``memory usage = m0 + m1*jac_chunk_size``:
+        the higher the chunk size, the less memory the Jacobian calculation
+        will require (with some baseline memory usage). The time to compute the
+        Jacobian is roughly ``t=t0 +t1/jac_chunk_size``, so the larger the
+        ``jac_chunk_size``, the faster the calculation takes, at the cost of
+        requiring more memory. A ``jac_chunk_size`` of 1 corresponds to the least
+        memory intensive,  but slowest method of calculating the Jacobian.
+        If None, it will default to the largest possible
+        `jac_chunk_size` i.e. ``np.ceil(dim_x/4)``
 
 
     """
@@ -1361,7 +1361,7 @@ def __init__(
         deriv_mode="auto",
         grid=None,
         name="B-scale-length",
-        chunk_size=None,
+        jac_chunk_size=None,
     ):
         if target is None and bounds is None:
             bounds = (1, np.inf)
@@ -1376,7 +1376,7 @@ def __init__(
             loss_function=loss_function,
             deriv_mode=deriv_mode,
             name=name,
-            chunk_size=chunk_size,
+            jac_chunk_size=jac_chunk_size,
         )
 
     def build(self, use_jit=True, verbose=1):
@@ -1495,18 +1495,18 @@ class GoodCoordinates(_Objective):
         Collocation grid containing the nodes to evaluate at.
     name : str, optional
         Name of the objective function.
-    chunk_size : int, optional
-        If `"blocked"` deriv_mode is used in the ObjectiveFunction, will
-        calculate the Jacobian for this objective ``chunk_size`` columns at a time,
-        instead of all at once.  The memory usage of the Jacobian calculation is
-        linearly proportional to ``chunk_size``: the smaller the ``chunk_size``, the
-        less memory the Jacobian calculation will require (with some baseline memory
-        usage). The time to compute the Jacobian is roughly ``t ~1/chunk_size``
-        with some baseline time, so the larger the ``chunk_size``, the faster the
-        calculation takes, at the cost of requiring more memory. A ``chunk_size``
-        of 1 corresponds to the least memory intensive,  but slowest method of
-        calculating the Jacobian.
-        If None, it will default to the largest possible `chunk_size` i.e. ``dim_x``
+    jac_chunk_size : int, optional
+        Will calculate the Jacobian for this objective ``jac_chunk_size``
+        columns at a time, instead of all at once.  The memory usage of the
+        Jacobian calculation is roughly ``memory usage = m0 + m1*jac_chunk_size``:
+        the higher the chunk size, the less memory the Jacobian calculation
+        will require (with some baseline memory usage). The time to compute the
+        Jacobian is roughly ``t=t0 +t1/jac_chunk_size``, so the larger the
+        ``jac_chunk_size``, the faster the calculation takes, at the cost of
+        requiring more memory. A ``jac_chunk_size`` of 1 corresponds to the least
+        memory intensive,  but slowest method of calculating the Jacobian.
+        If None, it will default to the largest possible
+        `jac_chunk_size` i.e. ``np.ceil(dim_x/4)``
 
 
     """
@@ -1528,7 +1528,7 @@ def __init__(
         deriv_mode="auto",
         grid=None,
         name="coordinate goodness",
-        chunk_size=None,
+        jac_chunk_size=None,
     ):
         if target is None and bounds is None:
             target = 0
@@ -1544,7 +1544,7 @@ def __init__(
             loss_function=loss_function,
             deriv_mode=deriv_mode,
             name=name,
-            chunk_size=chunk_size,
+            jac_chunk_size=jac_chunk_size,
         )
 
     def build(self, use_jit=True, verbose=1):
diff --git a/desc/objectives/_omnigenity.py b/desc/objectives/_omnigenity.py
index fb35c37f6b..7c72d7f3d3 100644
--- a/desc/objectives/_omnigenity.py
+++ b/desc/objectives/_omnigenity.py
@@ -57,18 +57,18 @@ class QuasisymmetryBoozer(_Objective):
         Toroidal resolution of Boozer transformation. Default = 2 * eq.N.
     name : str, optional
         Name of the objective function.
-    chunk_size : int, optional
-        If `"blocked"` deriv_mode is used in the ObjectiveFunction, will
-        calculate the Jacobian for this objective ``chunk_size`` columns at a time,
-        instead of all at once.  The memory usage of the Jacobian calculation is
-        linearly proportional to ``chunk_size``: the smaller the ``chunk_size``, the
-        less memory the Jacobian calculation will require (with some baseline memory
-        usage). The time to compute the Jacobian is roughly ``t ~1/chunk_size``
-        with some baseline time, so the larger the ``chunk_size``, the faster the
-        calculation takes, at the cost of requiring more memory. A ``chunk_size``
-        of 1 corresponds to the least memory intensive,  but slowest method of
-        calculating the Jacobian.
-        If None, it will default to the largest possible `chunk_size` i.e. ``dim_x``
+    jac_chunk_size : int, optional
+        Will calculate the Jacobian for this objective ``jac_chunk_size``
+        columns at a time, instead of all at once.  The memory usage of the
+        Jacobian calculation is roughly ``memory usage = m0 + m1*jac_chunk_size``:
+        the higher the chunk size, the less memory the Jacobian calculation
+        will require (with some baseline memory usage). The time to compute the
+        Jacobian is roughly ``t=t0 +t1/jac_chunk_size``, so the larger the
+        ``jac_chunk_size``, the faster the calculation takes, at the cost of
+        requiring more memory. A ``jac_chunk_size`` of 1 corresponds to the least
+        memory intensive,  but slowest method of calculating the Jacobian.
+        If None, it will default to the largest possible
+        `jac_chunk_size` i.e. ``np.ceil(dim_x/4)``
 
 
     """
@@ -91,7 +91,7 @@ def __init__(
         M_booz=None,
         N_booz=None,
         name="QS Boozer",
-        chunk_size=None,
+        jac_chunk_size=None,
     ):
         if target is None and bounds is None:
             target = 0
@@ -109,7 +109,7 @@ def __init__(
             loss_function=loss_function,
             deriv_mode=deriv_mode,
             name=name,
-            chunk_size=chunk_size,
+            jac_chunk_size=jac_chunk_size,
         )
 
         self._print_value_fmt = "Quasi-symmetry ({},{}) Boozer error: ".format(
@@ -284,18 +284,18 @@ class QuasisymmetryTwoTerm(_Objective):
         Type of quasi-symmetry (M, N).
     name : str, optional
         Name of the objective function.
-    chunk_size : int, optional
-        If `"blocked"` deriv_mode is used in the ObjectiveFunction, will
-        calculate the Jacobian for this objective ``chunk_size`` columns at a time,
-        instead of all at once.  The memory usage of the Jacobian calculation is
-        linearly proportional to ``chunk_size``: the smaller the ``chunk_size``, the
-        less memory the Jacobian calculation will require (with some baseline memory
-        usage). The time to compute the Jacobian is roughly ``t ~1/chunk_size``
-        with some baseline time, so the larger the ``chunk_size``, the faster the
-        calculation takes, at the cost of requiring more memory. A ``chunk_size``
-        of 1 corresponds to the least memory intensive,  but slowest method of
-        calculating the Jacobian.
-        If None, it will default to the largest possible `chunk_size` i.e. ``dim_x``
+    jac_chunk_size : int, optional
+        Will calculate the Jacobian for this objective ``jac_chunk_size``
+        columns at a time, instead of all at once.  The memory usage of the
+        Jacobian calculation is roughly ``memory usage = m0 + m1*jac_chunk_size``:
+        the higher the chunk size, the less memory the Jacobian calculation
+        will require (with some baseline memory usage). The time to compute the
+        Jacobian is roughly ``t=t0 +t1/jac_chunk_size``, so the larger the
+        ``jac_chunk_size``, the faster the calculation takes, at the cost of
+        requiring more memory. A ``jac_chunk_size`` of 1 corresponds to the least
+        memory intensive,  but slowest method of calculating the Jacobian.
+        If None, it will default to the largest possible
+        `jac_chunk_size` i.e. ``np.ceil(dim_x/4)``
 
 
     """
@@ -317,7 +317,7 @@ def __init__(
         grid=None,
         helicity=(1, 0),
         name="QS two-term",
-        chunk_size=None,
+        jac_chunk_size=None,
     ):
         if target is None and bounds is None:
             target = 0
@@ -333,7 +333,7 @@ def __init__(
             loss_function=loss_function,
             deriv_mode=deriv_mode,
             name=name,
-            chunk_size=chunk_size,
+            jac_chunk_size=jac_chunk_size,
         )
 
         self._print_value_fmt = "Quasi-symmetry ({},{}) two-term error: ".format(
@@ -484,18 +484,18 @@ class QuasisymmetryTripleProduct(_Objective):
         Defaults to ``LinearGrid(M=eq.M_grid, N=eq.N_grid)``.
     name : str, optional
         Name of the objective function.
-    chunk_size : int, optional
-        If `"blocked"` deriv_mode is used in the ObjectiveFunction, will
-        calculate the Jacobian for this objective ``chunk_size`` columns at a time,
-        instead of all at once.  The memory usage of the Jacobian calculation is
-        linearly proportional to ``chunk_size``: the smaller the ``chunk_size``, the
-        less memory the Jacobian calculation will require (with some baseline memory
-        usage). The time to compute the Jacobian is roughly ``t ~1/chunk_size``
-        with some baseline time, so the larger the ``chunk_size``, the faster the
-        calculation takes, at the cost of requiring more memory. A ``chunk_size``
-        of 1 corresponds to the least memory intensive,  but slowest method of
-        calculating the Jacobian.
-        If None, it will default to the largest possible `chunk_size` i.e. ``dim_x``
+    jac_chunk_size : int, optional
+        Will calculate the Jacobian for this objective ``jac_chunk_size``
+        columns at a time, instead of all at once.  The memory usage of the
+        Jacobian calculation is roughly ``memory usage = m0 + m1*jac_chunk_size``:
+        the higher the chunk size, the less memory the Jacobian calculation
+        will require (with some baseline memory usage). The time to compute the
+        Jacobian is roughly ``t=t0 +t1/jac_chunk_size``, so the larger the
+        ``jac_chunk_size``, the faster the calculation takes, at the cost of
+        requiring more memory. A ``jac_chunk_size`` of 1 corresponds to the least
+        memory intensive,  but slowest method of calculating the Jacobian.
+        If None, it will default to the largest possible
+        `jac_chunk_size` i.e. ``np.ceil(dim_x/4)``
 
 
     """
@@ -516,7 +516,7 @@ def __init__(
         deriv_mode="auto",
         grid=None,
         name="QS triple product",
-        chunk_size=None,
+        jac_chunk_size=None,
     ):
         if target is None and bounds is None:
             target = 0
@@ -531,7 +531,7 @@ def __init__(
             loss_function=loss_function,
             deriv_mode=deriv_mode,
             name=name,
-            chunk_size=chunk_size,
+            jac_chunk_size=jac_chunk_size,
         )
 
     def build(self, use_jit=True, verbose=1):
@@ -677,18 +677,18 @@ class Omnigenity(_Objective):
         associated data are re-computed at every iteration (Default).
     name : str, optional
         Name of the objective function.
-    chunk_size : int, optional
-        If `"blocked"` deriv_mode is used in the ObjectiveFunction, will
-        calculate the Jacobian for this objective ``chunk_size`` columns at a time,
-        instead of all at once.  The memory usage of the Jacobian calculation is
-        linearly proportional to ``chunk_size``: the smaller the ``chunk_size``, the
-        less memory the Jacobian calculation will require (with some baseline memory
-        usage). The time to compute the Jacobian is roughly ``t ~1/chunk_size``
-        with some baseline time, so the larger the ``chunk_size``, the faster the
-        calculation takes, at the cost of requiring more memory. A ``chunk_size``
-        of 1 corresponds to the least memory intensive,  but slowest method of
-        calculating the Jacobian.
-        If None, it will default to the largest possible `chunk_size` i.e. ``dim_x``
+    jac_chunk_size : int, optional
+        Will calculate the Jacobian for this objective ``jac_chunk_size``
+        columns at a time, instead of all at once.  The memory usage of the
+        Jacobian calculation is roughly ``memory usage = m0 + m1*jac_chunk_size``:
+        the higher the chunk size, the less memory the Jacobian calculation
+        will require (with some baseline memory usage). The time to compute the
+        Jacobian is roughly ``t=t0 +t1/jac_chunk_size``, so the larger the
+        ``jac_chunk_size``, the faster the calculation takes, at the cost of
+        requiring more memory. A ``jac_chunk_size`` of 1 corresponds to the least
+        memory intensive,  but slowest method of calculating the Jacobian.
+        If None, it will default to the largest possible
+        `jac_chunk_size` i.e. ``np.ceil(dim_x/4)``
 
 
     """
@@ -716,7 +716,7 @@ def __init__(
         eq_fixed=False,
         field_fixed=False,
         name="omnigenity",
-        chunk_size=None,
+        jac_chunk_size=None,
     ):
         if target is None and bounds is None:
             target = 0
@@ -748,7 +748,7 @@ def __init__(
             loss_function=loss_function,
             deriv_mode=deriv_mode,
             name=name,
-            chunk_size=chunk_size,
+            jac_chunk_size=jac_chunk_size,
         )
 
     def build(self, use_jit=True, verbose=1):
@@ -1027,18 +1027,18 @@ class Isodynamicity(_Objective):
         Defaults to ``LinearGrid(M=eq.M_grid, N=eq.N_grid)``.
     name : str, optional
         Name of the objective function.
-    chunk_size : int, optional
-        If `"blocked"` deriv_mode is used in the ObjectiveFunction, will
-        calculate the Jacobian for this objective ``chunk_size`` columns at a time,
-        instead of all at once.  The memory usage of the Jacobian calculation is
-        linearly proportional to ``chunk_size``: the smaller the ``chunk_size``, the
-        less memory the Jacobian calculation will require (with some baseline memory
-        usage). The time to compute the Jacobian is roughly ``t ~1/chunk_size``
-        with some baseline time, so the larger the ``chunk_size``, the faster the
-        calculation takes, at the cost of requiring more memory. A ``chunk_size``
-        of 1 corresponds to the least memory intensive,  but slowest method of
-        calculating the Jacobian.
-        If None, it will default to the largest possible `chunk_size` i.e. ``dim_x``
+    jac_chunk_size : int, optional
+        Will calculate the Jacobian for this objective ``jac_chunk_size``
+        columns at a time, instead of all at once.  The memory usage of the
+        Jacobian calculation is roughly ``memory usage = m0 + m1*jac_chunk_size``:
+        the higher the chunk size, the less memory the Jacobian calculation
+        will require (with some baseline memory usage). The time to compute the
+        Jacobian is roughly ``t=t0 +t1/jac_chunk_size``, so the larger the
+        ``jac_chunk_size``, the faster the calculation takes, at the cost of
+        requiring more memory. A ``jac_chunk_size`` of 1 corresponds to the least
+        memory intensive,  but slowest method of calculating the Jacobian.
+        If None, it will default to the largest possible
+        `jac_chunk_size` i.e. ``np.ceil(dim_x/4)``
 
 
     """
@@ -1059,7 +1059,7 @@ def __init__(
         deriv_mode="auto",
         grid=None,
         name="Isodynamicity",
-        chunk_size=None,
+        jac_chunk_size=None,
     ):
         if target is None and bounds is None:
             target = 0
@@ -1074,7 +1074,7 @@ def __init__(
             loss_function=loss_function,
             deriv_mode=deriv_mode,
             name=name,
-            chunk_size=chunk_size,
+            jac_chunk_size=jac_chunk_size,
         )
 
     def build(self, use_jit=True, verbose=1):
diff --git a/desc/objectives/_power_balance.py b/desc/objectives/_power_balance.py
index 299b248358..63faf689c7 100644
--- a/desc/objectives/_power_balance.py
+++ b/desc/objectives/_power_balance.py
@@ -56,6 +56,18 @@ class FusionPower(_Objective):
         Defaults to ``QuadratureGrid(eq.L_grid, eq.M_grid, eq.N_grid, eq.NFP)``.
     name : str, optional
         Name of the objective function.
+    jac_chunk_size : int, optional
+        Will calculate the Jacobian for this objective ``jac_chunk_size``
+        columns at a time, instead of all at once.  The memory usage of the
+        Jacobian calculation is roughly ``memory usage = m0 + m1*jac_chunk_size``:
+        the higher the chunk size, the less memory the Jacobian calculation
+        will require (with some baseline memory usage). The time to compute the
+        Jacobian is roughly ``t=t0 +t1/jac_chunk_size``, so the larger the
+        ``jac_chunk_size``, the faster the calculation takes, at the cost of
+        requiring more memory. A ``jac_chunk_size`` of 1 corresponds to the least
+        memory intensive,  but slowest method of calculating the Jacobian.
+        If None, it will default to the largest possible
+        `jac_chunk_size` i.e. ``np.ceil(dim_x/4)``
 
     """
 
@@ -76,6 +88,7 @@ def __init__(
         fuel="DT",
         grid=None,
         name="fusion power",
+        jac_chunk_size=None,
     ):
         errorif(
             fuel not in ["DT"], ValueError, f"fuel must be one of ['DT'], got {fuel}."
@@ -94,6 +107,7 @@ def __init__(
             loss_function=loss_function,
             deriv_mode=deriv_mode,
             name=name,
+            jac_chunk_size=jac_chunk_size,
         )
 
     def build(self, use_jit=True, verbose=1):
@@ -241,6 +255,18 @@ class HeatingPowerISS04(_Objective):
         Defaults to ``QuadratureGrid(eq.L_grid, eq.M_grid, eq.N_grid, eq.NFP)``.
     name : str, optional
         Name of the objective function.
+    jac_chunk_size : int, optional
+        Will calculate the Jacobian for this objective ``jac_chunk_size``
+        columns at a time, instead of all at once.  The memory usage of the
+        Jacobian calculation is roughly ``memory usage = m0 + m1*jac_chunk_size``:
+        the higher the chunk size, the less memory the Jacobian calculation
+        will require (with some baseline memory usage). The time to compute the
+        Jacobian is roughly ``t=t0 +t1/jac_chunk_size``, so the larger the
+        ``jac_chunk_size``, the faster the calculation takes, at the cost of
+        requiring more memory. A ``jac_chunk_size`` of 1 corresponds to the least
+        memory intensive,  but slowest method of calculating the Jacobian.
+        If None, it will default to the largest possible
+        `jac_chunk_size` i.e. ``np.ceil(dim_x/4)``
 
     """
 
@@ -262,6 +288,7 @@ def __init__(
         gamma=0,
         grid=None,
         name="heating power",
+        jac_chunk_size=None,
     ):
         if target is None and bounds is None:
             target = 0
@@ -278,6 +305,7 @@ def __init__(
             loss_function=loss_function,
             deriv_mode=deriv_mode,
             name=name,
+            jac_chunk_size=jac_chunk_size,
         )
 
     def build(self, use_jit=True, verbose=1):
diff --git a/desc/objectives/_profiles.py b/desc/objectives/_profiles.py
index bf72f62107..de9926d3ab 100644
--- a/desc/objectives/_profiles.py
+++ b/desc/objectives/_profiles.py
@@ -53,18 +53,18 @@ class Pressure(_Objective):
         Defaults to ``LinearGrid(L=eq.L_grid)``.
     name : str, optional
         Name of the objective function.
-    chunk_size : int, optional
-        If `"blocked"` deriv_mode is used in the ObjectiveFunction, will
-        calculate the Jacobian for this objective ``chunk_size`` columns at a time,
-        instead of all at once.  The memory usage of the Jacobian calculation is
-        linearly proportional to ``chunk_size``: the smaller the ``chunk_size``, the
-        less memory the Jacobian calculation will require (with some baseline memory
-        usage). The time to compute the Jacobian is roughly ``t ~1/chunk_size``
-        with some baseline time, so the larger the ``chunk_size``, the faster the
-        calculation takes, at the cost of requiring more memory. A ``chunk_size``
-        of 1 corresponds to the least memory intensive,  but slowest method of
-        calculating the Jacobian.
-        If None, it will default to the largest possible `chunk_size` i.e. ``dim_x``
+    jac_chunk_size : int, optional
+        Will calculate the Jacobian for this objective ``jac_chunk_size``
+        columns at a time, instead of all at once.  The memory usage of the
+        Jacobian calculation is roughly ``memory usage = m0 + m1*jac_chunk_size``:
+        the higher the chunk size, the less memory the Jacobian calculation
+        will require (with some baseline memory usage). The time to compute the
+        Jacobian is roughly ``t=t0 +t1/jac_chunk_size``, so the larger the
+        ``jac_chunk_size``, the faster the calculation takes, at the cost of
+        requiring more memory. A ``jac_chunk_size`` of 1 corresponds to the least
+        memory intensive,  but slowest method of calculating the Jacobian.
+        If None, it will default to the largest possible
+        `jac_chunk_size` i.e. ``np.ceil(dim_x/4)``
 
 
     """
@@ -85,7 +85,7 @@ def __init__(
         deriv_mode="auto",
         grid=None,
         name="pressure",
-        chunk_size=None,
+        jac_chunk_size=None,
     ):
         if target is None and bounds is None:
             target = 0
@@ -100,7 +100,7 @@ def __init__(
             loss_function=loss_function,
             deriv_mode=deriv_mode,
             name=name,
-            chunk_size=chunk_size,
+            jac_chunk_size=jac_chunk_size,
         )
 
     def build(self, use_jit=True, verbose=1):
@@ -226,18 +226,18 @@ class RotationalTransform(_Objective):
         are required.
     name : str, optional
         Name of the objective function.
-    chunk_size : int, optional
-        If `"blocked"` deriv_mode is used in the ObjectiveFunction, will
-        calculate the Jacobian for this objective ``chunk_size`` columns at a time,
-        instead of all at once.  The memory usage of the Jacobian calculation is
-        linearly proportional to ``chunk_size``: the smaller the ``chunk_size``, the
-        less memory the Jacobian calculation will require (with some baseline memory
-        usage). The time to compute the Jacobian is roughly ``t ~1/chunk_size``
-        with some baseline time, so the larger the ``chunk_size``, the faster the
-        calculation takes, at the cost of requiring more memory. A ``chunk_size``
-        of 1 corresponds to the least memory intensive,  but slowest method of
-        calculating the Jacobian.
-        If None, it will default to the largest possible `chunk_size` i.e. ``dim_x``
+    jac_chunk_size : int, optional
+        Will calculate the Jacobian for this objective ``jac_chunk_size``
+        columns at a time, instead of all at once.  The memory usage of the
+        Jacobian calculation is roughly ``memory usage = m0 + m1*jac_chunk_size``:
+        the higher the chunk size, the less memory the Jacobian calculation
+        will require (with some baseline memory usage). The time to compute the
+        Jacobian is roughly ``t=t0 +t1/jac_chunk_size``, so the larger the
+        ``jac_chunk_size``, the faster the calculation takes, at the cost of
+        requiring more memory. A ``jac_chunk_size`` of 1 corresponds to the least
+        memory intensive,  but slowest method of calculating the Jacobian.
+        If None, it will default to the largest possible
+        `jac_chunk_size` i.e. ``np.ceil(dim_x/4)``
 
 
     """
@@ -258,7 +258,7 @@ def __init__(
         deriv_mode="auto",
         grid=None,
         name="rotational transform",
-        chunk_size=None,
+        jac_chunk_size=None,
     ):
         if target is None and bounds is None:
             target = 0
@@ -273,7 +273,7 @@ def __init__(
             loss_function=loss_function,
             deriv_mode=deriv_mode,
             name=name,
-            chunk_size=chunk_size,
+            jac_chunk_size=jac_chunk_size,
         )
 
     def build(self, use_jit=True, verbose=1):
@@ -412,18 +412,18 @@ class Shear(_Objective):
         are required.
     name : str, optional
         Name of the objective function.
-    chunk_size : int, optional
-        If `"blocked"` deriv_mode is used in the ObjectiveFunction, will
-        calculate the Jacobian for this objective ``chunk_size`` columns at a time,
-        instead of all at once.  The memory usage of the Jacobian calculation is
-        linearly proportional to ``chunk_size``: the smaller the ``chunk_size``, the
-        less memory the Jacobian calculation will require (with some baseline memory
-        usage). The time to compute the Jacobian is roughly ``t ~1/chunk_size``
-        with some baseline time, so the larger the ``chunk_size``, the faster the
-        calculation takes, at the cost of requiring more memory. A ``chunk_size``
-        of 1 corresponds to the least memory intensive,  but slowest method of
-        calculating the Jacobian.
-        If None, it will default to the largest possible `chunk_size` i.e. ``dim_x``
+    jac_chunk_size : int, optional
+        Will calculate the Jacobian for this objective ``jac_chunk_size``
+        columns at a time, instead of all at once.  The memory usage of the
+        Jacobian calculation is roughly ``memory usage = m0 + m1*jac_chunk_size``:
+        the higher the chunk size, the less memory the Jacobian calculation
+        will require (with some baseline memory usage). The time to compute the
+        Jacobian is roughly ``t=t0 +t1/jac_chunk_size``, so the larger the
+        ``jac_chunk_size``, the faster the calculation takes, at the cost of
+        requiring more memory. A ``jac_chunk_size`` of 1 corresponds to the least
+        memory intensive,  but slowest method of calculating the Jacobian.
+        If None, it will default to the largest possible
+        `jac_chunk_size` i.e. ``np.ceil(dim_x/4)``
 
 
     """
@@ -444,7 +444,7 @@ def __init__(
         deriv_mode="auto",
         grid=None,
         name="shear",
-        chunk_size=None,
+        jac_chunk_size=None,
     ):
         if target is None and bounds is None:
             bounds = (-np.inf, 0)
@@ -459,7 +459,7 @@ def __init__(
             loss_function=loss_function,
             deriv_mode=deriv_mode,
             name=name,
-            chunk_size=chunk_size,
+            jac_chunk_size=jac_chunk_size,
         )
 
     def build(self, use_jit=True, verbose=1):
@@ -594,18 +594,18 @@ class ToroidalCurrent(_Objective):
         are required.
     name : str, optional
         Name of the objective function.
-    chunk_size : int, optional
-        If `"blocked"` deriv_mode is used in the ObjectiveFunction, will
-        calculate the Jacobian for this objective ``chunk_size`` columns at a time,
-        instead of all at once.  The memory usage of the Jacobian calculation is
-        linearly proportional to ``chunk_size``: the smaller the ``chunk_size``, the
-        less memory the Jacobian calculation will require (with some baseline memory
-        usage). The time to compute the Jacobian is roughly ``t ~1/chunk_size``
-        with some baseline time, so the larger the ``chunk_size``, the faster the
-        calculation takes, at the cost of requiring more memory. A ``chunk_size``
-        of 1 corresponds to the least memory intensive,  but slowest method of
-        calculating the Jacobian.
-        If None, it will default to the largest possible `chunk_size` i.e. ``dim_x``
+    jac_chunk_size : int, optional
+        Will calculate the Jacobian for this objective ``jac_chunk_size``
+        columns at a time, instead of all at once.  The memory usage of the
+        Jacobian calculation is roughly ``memory usage = m0 + m1*jac_chunk_size``:
+        the higher the chunk size, the less memory the Jacobian calculation
+        will require (with some baseline memory usage). The time to compute the
+        Jacobian is roughly ``t=t0 +t1/jac_chunk_size``, so the larger the
+        ``jac_chunk_size``, the faster the calculation takes, at the cost of
+        requiring more memory. A ``jac_chunk_size`` of 1 corresponds to the least
+        memory intensive,  but slowest method of calculating the Jacobian.
+        If None, it will default to the largest possible
+        `jac_chunk_size` i.e. ``np.ceil(dim_x/4)``
 
 
     """
@@ -626,7 +626,7 @@ def __init__(
         deriv_mode="auto",
         grid=None,
         name="toroidal current",
-        chunk_size=None,
+        jac_chunk_size=None,
     ):
         if target is None and bounds is None:
             target = 0
@@ -641,7 +641,7 @@ def __init__(
             loss_function=loss_function,
             deriv_mode=deriv_mode,
             name=name,
-            chunk_size=chunk_size,
+            jac_chunk_size=jac_chunk_size,
         )
 
     def build(self, use_jit=True, verbose=1):
diff --git a/desc/objectives/_stability.py b/desc/objectives/_stability.py
index 0f4336f016..1520b73e20 100644
--- a/desc/objectives/_stability.py
+++ b/desc/objectives/_stability.py
@@ -63,18 +63,18 @@ class MercierStability(_Objective):
         are required.
     name : str, optional
         Name of the objective function.
-    chunk_size : int, optional
-        If `"blocked"` deriv_mode is used in the ObjectiveFunction, will
-        calculate the Jacobian for this objective ``chunk_size`` columns at a time,
-        instead of all at once.  The memory usage of the Jacobian calculation is
-        linearly proportional to ``chunk_size``: the smaller the ``chunk_size``, the
-        less memory the Jacobian calculation will require (with some baseline memory
-        usage). The time to compute the Jacobian is roughly ``t ~1/chunk_size``
-        with some baseline time, so the larger the ``chunk_size``, the faster the
-        calculation takes, at the cost of requiring more memory. A ``chunk_size``
-        of 1 corresponds to the least memory intensive,  but slowest method of
-        calculating the Jacobian.
-        If None, it will default to the largest possible `chunk_size` i.e. ``dim_x``
+    jac_chunk_size : int, optional
+        Will calculate the Jacobian for this objective ``jac_chunk_size``
+        columns at a time, instead of all at once.  The memory usage of the
+        Jacobian calculation is roughly ``memory usage = m0 + m1*jac_chunk_size``:
+        the higher the chunk size, the less memory the Jacobian calculation
+        will require (with some baseline memory usage). The time to compute the
+        Jacobian is roughly ``t=t0 +t1/jac_chunk_size``, so the larger the
+        ``jac_chunk_size``, the faster the calculation takes, at the cost of
+        requiring more memory. A ``jac_chunk_size`` of 1 corresponds to the least
+        memory intensive,  but slowest method of calculating the Jacobian.
+        If None, it will default to the largest possible
+        `jac_chunk_size` i.e. ``np.ceil(dim_x/4)``
 
 
     """
@@ -95,7 +95,7 @@ def __init__(
         deriv_mode="auto",
         grid=None,
         name="Mercier Stability",
-        chunk_size=None,
+        jac_chunk_size=None,
     ):
         if target is None and bounds is None:
             bounds = (0, np.inf)
@@ -110,7 +110,7 @@ def __init__(
             loss_function=loss_function,
             deriv_mode=deriv_mode,
             name=name,
-            chunk_size=chunk_size,
+            jac_chunk_size=jac_chunk_size,
         )
 
     def build(self, use_jit=True, verbose=1):
@@ -259,18 +259,18 @@ class MagneticWell(_Objective):
         are required.
     name : str, optional
         Name of the objective function.
-    chunk_size : int, optional
-        If `"blocked"` deriv_mode is used in the ObjectiveFunction, will
-        calculate the Jacobian for this objective ``chunk_size`` columns at a time,
-        instead of all at once.  The memory usage of the Jacobian calculation is
-        linearly proportional to ``chunk_size``: the smaller the ``chunk_size``, the
-        less memory the Jacobian calculation will require (with some baseline memory
-        usage). The time to compute the Jacobian is roughly ``t ~1/chunk_size``
-        with some baseline time, so the larger the ``chunk_size``, the faster the
-        calculation takes, at the cost of requiring more memory. A ``chunk_size``
-        of 1 corresponds to the least memory intensive,  but slowest method of
-        calculating the Jacobian.
-        If None, it will default to the largest possible `chunk_size` i.e. ``dim_x``
+    jac_chunk_size : int, optional
+        Will calculate the Jacobian for this objective ``jac_chunk_size``
+        columns at a time, instead of all at once.  The memory usage of the
+        Jacobian calculation is roughly ``memory usage = m0 + m1*jac_chunk_size``:
+        the higher the chunk size, the less memory the Jacobian calculation
+        will require (with some baseline memory usage). The time to compute the
+        Jacobian is roughly ``t=t0 +t1/jac_chunk_size``, so the larger the
+        ``jac_chunk_size``, the faster the calculation takes, at the cost of
+        requiring more memory. A ``jac_chunk_size`` of 1 corresponds to the least
+        memory intensive,  but slowest method of calculating the Jacobian.
+        If None, it will default to the largest possible
+        `jac_chunk_size` i.e. ``np.ceil(dim_x/4)``
 
 
     """
@@ -291,7 +291,7 @@ def __init__(
         deriv_mode="auto",
         grid=None,
         name="Magnetic Well",
-        chunk_size=None,
+        jac_chunk_size=None,
     ):
         if target is None and bounds is None:
             bounds = (0, np.inf)
@@ -306,7 +306,7 @@ def __init__(
             loss_function=loss_function,
             deriv_mode=deriv_mode,
             name=name,
-            chunk_size=chunk_size,
+            jac_chunk_size=jac_chunk_size,
         )
 
     def build(self, use_jit=True, verbose=1):
diff --git a/desc/objectives/getters.py b/desc/objectives/getters.py
index 5875176ca4..a0cc959676 100644
--- a/desc/objectives/getters.py
+++ b/desc/objectives/getters.py
@@ -44,7 +44,9 @@
 }
 
 
-def get_equilibrium_objective(eq, mode="force", normalize=True, **kwargs):
+def get_equilibrium_objective(
+    eq, mode="force", normalize=True, jac_chunk_size=None, **kwargs
+):
     """Get the objective function for a typical force balance equilibrium problem.
 
     Parameters
@@ -57,6 +59,8 @@ def get_equilibrium_objective(eq, mode="force", normalize=True, **kwargs):
         for minimizing MHD energy.
     normalize : bool
         Whether to normalize units of objective.
+    jac_chunk_size : int, optional
+
 
     Returns
     -------
@@ -76,7 +80,7 @@ def get_equilibrium_objective(eq, mode="force", normalize=True, **kwargs):
         objectives = (RadialForceBalance(**kwargs), HelicalForceBalance(**kwargs))
     else:
         raise ValueError("got an unknown equilibrium objective type '{}'".format(mode))
-    return ObjectiveFunction(objectives, chunk_size=kwargs.get("chunk_size", None))
+    return ObjectiveFunction(objectives, jac_chunk_size=jac_chunk_size)
 
 
 def get_fixed_axis_constraints(eq, profiles=True, normalize=True):
diff --git a/desc/objectives/objective_funs.py b/desc/objectives/objective_funs.py
index de408f91f1..87b390db92 100644
--- a/desc/objectives/objective_funs.py
+++ b/desc/objectives/objective_funs.py
@@ -34,26 +34,26 @@ class ObjectiveFunction(IOAble):
     deriv_mode : {"auto", "batched", "blocked", "looped"}
         Method for computing Jacobian matrices. "batched" uses forward mode, applied to
         the entire objective at once, and is generally the fastest for vector valued
-        objectives, though most memory intensive. "blocked" builds the Jacobian for each
-        objective separately, using each objective's preferred AD mode (and
-        each objective's `chunk_size`). Generally the most efficient option when mixing
-        scalar and vector valued objectives. "looped" uses forward mode jacobian vector
-        products in a loop to build the Jacobian column by column. Generally the
+        objectives, though most memory intensive. "blocked" builds the Jacobian for
+        each objective separately, using each objective's preferred AD mode (and
+        each objective's `jac_chunk_size`). Generally the most efficient option when
+        mixing scalar and vector valued objectives. "looped" uses forward mode jacobian
+        vector products in a loop to build the Jacobian column by column. Generally the
         slowest, but most memory efficient.
         "auto" defaults to "batched" if all sub-objectives are set to "fwd",
         otherwise "blocked".
     name : str
         Name of the objective function.
-    chunk_size : int, optional
+    jac_chunk_size : int, optional
         If `"batched"` deriv_mode is used, will calculate the Jacobian
-        ``chunk_size`` columns at a time, instead of all at once. A
-        ``chunk_size`` of 1 is equivalent to using `"looped"` deriv_mode.
-        The memory usage of the Jacobian calculation is linearly proportional to
-        ``chunk_size``: the smaller the ``chunk_size``, the less memory the Jacobian
-        calculation will require (with some baseline memory usage). The time it takes
-        to compute the Jacobian is roughly ``t ~1/chunk_size` with some baseline time,
-        so the larger the ``chunk_size``, the faster the calculation takes, at the cost
-        of requiring more memory.
+        ``jac_chunk_size`` columns at a time, instead of all at once. A
+        ``jac_chunk_size`` of 1 is equivalent to using `"looped"` deriv_mode.
+        The memory usage of the Jacobian calculation is roughly
+        ``memory usage = m0 + m1*jac_chunk_size``: the higher the chunk size,
+        the less memory the Jacobian calculation will require (with some baseline
+        memory usage). The time it takes to compute the Jacobian is roughly
+        ``t= t0 + t1/jac_chunk_size` so the larger the ``jac_chunk_size``, the faster
+        the calculation takes, at the cost of requiring more memory.
         If None, it will default to ``np.ceil(dim_x/4)``
 
     """
@@ -66,7 +66,7 @@ def __init__(
         use_jit=True,
         deriv_mode="auto",
         name="ObjectiveFunction",
-        chunk_size=None,
+        jac_chunk_size=None,
     ):
         if not isinstance(objectives, (tuple, list)):
             objectives = (objectives,)
@@ -76,8 +76,7 @@ def __init__(
         assert use_jit in {True, False}
         assert deriv_mode in {"auto", "batched", "looped", "blocked"}
 
-        self.chunk_size = chunk_size
-
+        self._jac_chunk_size = jac_chunk_size
         self._objectives = objectives
         self._use_jit = use_jit
         self._deriv_mode = deriv_mode
@@ -151,43 +150,43 @@ def build(self, use_jit=None, verbose=1):
             self._scalar = False
 
         self._set_derivatives()
-        sub_obj_chunk_sizes = [obj.chunk_size for obj in self.objectives]
+        sub_obj_jac_chunk_sizes = [obj._jac_chunk_size for obj in self.objectives]
         warnif(
-            np.any(sub_obj_chunk_sizes) and self._deriv_mode != "blocked",
+            np.any(sub_obj_jac_chunk_sizes) and self._deriv_mode != "blocked",
             UserWarning,
-            "'chunk_size' was passed into one or more sub-objectives, but the"
+            "'jac_chunk_size' was passed into one or more sub-objectives, but the"
             "ObjectiveFunction is  using 'batched' deriv_mode, so sub-objective "
-            "'chunk_size' will be ignored in favor of the ObjectiveFunction's "
-            f"'chunk_size' of {self.chunk_size}."
+            "'jac_chunk_size' will be ignored in favor of the ObjectiveFunction's "
+            f"'jac_chunk_size' of {self._jac_chunk_size}."
             " Specify 'blocked' deriv_mode if each sub-objective is desired to have a "
-            "different 'chunk_size' for its Jacobian computation.",
+            "different 'jac_chunk_size' for its Jacobian computation.",
         )
         warnif(
-            self.chunk_size is not None and self._deriv_mode == "blocked",
+            self._jac_chunk_size is not None and self._deriv_mode == "blocked",
             UserWarning,
-            "'chunk_size' was passed into ObjectiveFunction, but the"
+            "'jac_chunk_size' was passed into ObjectiveFunction, but the"
             "ObjectiveFunction is using 'blocked' deriv_mode, so sub-objective "
-            "'chunk_size' are used to compute each sub-objective's Jacobian, "
-            "`ignoring the ObjectiveFunction's 'chunk_size'.",
+            "'jac_chunk_size' are used to compute each sub-objective's Jacobian, "
+            "`ignoring the ObjectiveFunction's 'jac_chunk_size'.",
         )
 
         if not self.use_jit:
             self._unjit()
 
         self._set_things()
-        if self.chunk_size is None and self._deriv_mode == "batched":
-            # set chunk_size to 1/4 of number columns of Jacobian
+        if self._jac_chunk_size is None and self._deriv_mode == "batched":
+            # set jac_chunk_size to 1/4 of number columns of Jacobian
             # as the default for batched deriv_mode
-            self.chunk_size = int(np.ceil(self.dim_x / 4))
+            self._jac_chunk_size = int(np.ceil(self.dim_x / 4))
         if self._deriv_mode == "blocked":
-            # set chunk_size for each sub-objective
+            # set jac_chunk_size for each sub-objective
             # to 1/4 of number columns of Jacobian
             # as the default for batched deriv_mode
             for obj in self.objectives:
-                obj.chunk_size = (
+                obj._jac_chunk_size = (
                     int(np.ceil(sum(t.dim_x for t in obj.things) / 4))
-                    if obj.chunk_size is None
-                    else obj.chunk_size
+                    if obj._jac_chunk_size is None
+                    else obj._jac_chunk_size
                 )
 
         self._built = True
@@ -558,19 +557,21 @@ def _jvp(self, v, x, constants=None, op="compute_scaled"):
         if len(v) == 1:
             jvpfun = lambda dx: Derivative.compute_jvp(fun, 0, dx, x)
             return batched_vectorize(
-                jvpfun, signature="(n)->(k)", chunk_size=self.chunk_size
+                jvpfun, signature="(n)->(k)", jac_chunk_size=self._jac_chunk_size
             )(v[0])
         elif len(v) == 2:
             jvpfun = lambda dx1, dx2: Derivative.compute_jvp2(fun, 0, 0, dx1, dx2, x)
             return batched_vectorize(
-                jvpfun, signature="(n),(n)->(k)", chunk_size=self.chunk_size
+                jvpfun, signature="(n),(n)->(k)", jac_chunk_size=self._jac_chunk_size
             )(v[0], v[1])
         elif len(v) == 3:
             jvpfun = lambda dx1, dx2, dx3: Derivative.compute_jvp3(
                 fun, 0, 0, 0, dx1, dx2, dx3, x
             )
             return batched_vectorize(
-                jvpfun, signature="(n),(n),(n)->(k)", chunk_size=self.chunk_size
+                jvpfun,
+                signature="(n),(n),(n)->(k)",
+                jac_chunk_size=self._jac_chunk_size,
             )(v[0], v[1], v[2])
         else:
             raise NotImplementedError("Cannot compute JVP higher than 3rd order.")
@@ -908,7 +909,7 @@ def __init__(
         loss_function=None,
         deriv_mode="auto",
         name=None,
-        chunk_size=None,
+        jac_chunk_size=None,
     ):
         if self._scalar:
             assert self._coordinates == ""
@@ -919,7 +920,7 @@ def __init__(
         assert (bounds is None) or (target is None), "Cannot use both bounds and target"
         assert loss_function in [None, "mean", "min", "max"]
         assert deriv_mode in {"auto", "fwd", "rev"}
-        self.chunk_size = chunk_size
+        self._jac_chunk_size = jac_chunk_size
 
         self._target = target
         self._bounds = bounds
@@ -1160,7 +1161,9 @@ def _jvp(self, v, x, constants=None, op="compute_scaled"):
         fun = lambda *x: getattr(self, op)(*x, constants=constants)
         jvpfun = lambda *dx: Derivative.compute_jvp(fun, tuple(range(len(x))), dx, *x)
         sig = ",".join(f"(n{i})" for i in range(len(x))) + "->(k)"
-        return batched_vectorize(jvpfun, signature=sig, chunk_size=self.chunk_size)(*v)
+        return batched_vectorize(
+            jvpfun, signature=sig, jac_chunk_size=self._jac_chunk_size
+        )(*v)
 
     @jit
     def jvp_scaled(self, v, x, constants=None):
diff --git a/desc/utils.py b/desc/utils.py
index bf609b700e..c3d0d014cb 100644
--- a/desc/utils.py
+++ b/desc/utils.py
@@ -727,28 +727,29 @@ def _unchunk(x):
 
 
 @_treeify
-def _chunk(x, chunk_size=None):
-    # chunk_size=None -> add just a dummy chunk dimension, same as np.expand_dims(x, 0)
+def _chunk(x, jac_chunk_size=None):
+    # jac_chunk_size=None -> add just a dummy chunk dimension,
+    #  same as np.expand_dims(x, 0)
     if x.ndim == 0:
         raise ValueError("x cannot be chunked as it has 0 dimensions.")
     n = x.shape[0]
-    if chunk_size is None:
-        chunk_size = n
+    if jac_chunk_size is None:
+        jac_chunk_size = n
 
-    n_chunks, residual = divmod(n, chunk_size)
+    n_chunks, residual = divmod(n, jac_chunk_size)
     if residual != 0:
         raise ValueError(
-            "The first dimension of x must be divisible by chunk_size."
-            + f"\n            Got x.shape={x.shape} but chunk_size={chunk_size}."
+            "The first dimension of x must be divisible by jac_chunk_size."
+            + f"\n        Got x.shape={x.shape} but jac_chunk_size={jac_chunk_size}."
         )
-    return x.reshape((n_chunks, chunk_size) + x.shape[1:])
+    return x.reshape((n_chunks, jac_chunk_size) + x.shape[1:])
 
 
-def _chunk_size(x):
+def _jac_chunk_size(x):
     b = set(map(lambda x: x.shape[:2], jax.tree_util.tree_leaves(x)))
     if len(b) != 1:
         raise ValueError(
-            "The arrays in x have inconsistent chunk_size or number of chunks"
+            "The arrays in x have inconsistent jac_chunk_size or number of chunks"
         )
     return b.pop()[1]
 
@@ -768,27 +769,27 @@ def unchunk(x_chunked):
 
     """
     return _unchunk(x_chunked), functools.partial(
-        _chunk, chunk_size=_chunk_size(x_chunked)
+        _chunk, jac_chunk_size=_jac_chunk_size(x_chunked)
     )
 
 
-def chunk(x, chunk_size=None):
+def chunk(x, jac_chunk_size=None):
     """Split an array (or a pytree of arrays) into chunks along the first axis.
 
     Parameters
     ----------
         x: an array (or pytree of arrays)
-        chunk_size: an integer or None (default)
-            The first axis in x must be a multiple of chunk_size
+        jac_chunk_size: an integer or None (default)
+            The first axis in x must be a multiple of jac_chunk_size
 
     Returns
     -------
     (x_chunked, unchunk_fn): tuple
-        - x_chunked is x reshaped to (-1, chunk_size)+x.shape[1:]
-          if chunk_size is None then it defaults to x.shape[0], i.e. just one chunk
+        - x_chunked is x reshaped to (-1, jac_chunk_size)+x.shape[1:]
+          if jac_chunk_size is None then it defaults to x.shape[0], i.e. just one chunk
         - unchunk_fn is a function which restores x given x_chunked
     """
-    return _chunk(x, chunk_size), _unchunk
+    return _chunk(x, jac_chunk_size), _unchunk
 
 
 ####
@@ -947,11 +948,11 @@ def f_(*args, **kwargs):
 # Licensed under the Apache License, Version 2.0 (the "License");
 
 
-def _eval_fun_in_chunks(vmapped_fun, chunk_size, argnums, *args, **kwargs):
+def _eval_fun_in_chunks(vmapped_fun, jac_chunk_size, argnums, *args, **kwargs):
     n_elements = jax.tree_util.tree_leaves(args[argnums[0]])[0].shape[0]
-    n_chunks, n_rest = divmod(n_elements, chunk_size)
+    n_chunks, n_rest = divmod(n_elements, jac_chunk_size)
 
-    if n_chunks == 0 or chunk_size >= n_elements:
+    if n_chunks == 0 or jac_chunk_size >= n_elements:
         y = vmapped_fun(*args, **kwargs)
     else:
         # split inputs
@@ -959,7 +960,7 @@ def _get_chunks(x):
             x_chunks = jax.tree_util.tree_map(
                 lambda x_: x_[: n_elements - n_rest, ...], x
             )
-            x_chunks = _chunk(x_chunks, chunk_size)
+            x_chunks = _chunk(x_chunks, jac_chunk_size)
             return x_chunks
 
         def _get_rest(x):
@@ -989,16 +990,16 @@ def _get_rest(x):
 
 def _chunk_vmapped_function(
     vmapped_fun: Callable,
-    chunk_size: Optional[int],
+    jac_chunk_size: Optional[int],
     argnums=0,
 ) -> Callable:
     """Takes a vmapped function and computes it in chunks."""
-    if chunk_size is None:
+    if jac_chunk_size is None:
         return vmapped_fun
 
     if isinstance(argnums, int):
         argnums = (argnums,)
-    return functools.partial(_eval_fun_in_chunks, vmapped_fun, chunk_size, argnums)
+    return functools.partial(_eval_fun_in_chunks, vmapped_fun, jac_chunk_size, argnums)
 
 
 def _parse_in_axes(in_axes):
@@ -1018,7 +1019,7 @@ def apply_chunked(
     f: Callable,
     in_axes=0,
     *,
-    chunk_size: Optional[int],
+    jac_chunk_size: Optional[int],
 ) -> Callable:
     """Compute f in smaller chunks over axis 0.
 
@@ -1045,14 +1046,14 @@ def apply_chunked(
     ----------
         f: A function that satisfies the condition above
         in_axes: The axes that should be scanned along. Only supports `0` or `None`
-        chunk_size: The maximum size of the chunks to be used. If it is `None`,
+        jac_chunk_size: The maximum size of the chunks to be used. If it is `None`,
            chunking is disabled
 
     """
     _, argnums = _parse_in_axes(in_axes)
     return _chunk_vmapped_function(
         f,
-        chunk_size,
+        jac_chunk_size,
         argnums,
     )
 
@@ -1061,7 +1062,7 @@ def vmap_chunked(
     f: Callable,
     in_axes=0,
     *,
-    chunk_size: Optional[int],
+    jac_chunk_size: Optional[int],
 ) -> Callable:
     """Behaves like jax.vmap but uses scan to chunk the computations in smaller chunks.
 
@@ -1069,7 +1070,7 @@ def vmap_chunked(
 
     .. code-block:: python
 
-        nk.jax.apply_chunked(jax.vmap(f, in_axes), in_axes, chunk_size)
+        nk.jax.apply_chunked(jax.vmap(f, in_axes), in_axes, jac_chunk_size)
 
     Some limitations to `in_axes` apply.
 
@@ -1077,7 +1078,7 @@ def vmap_chunked(
     ----------
         f: The function to be vectorised.
         in_axes: The axes that should be scanned along. Only supports `0` or `None`
-        chunk_size: The maximum size of the chunks to be used. If it is `None`,
+        jac_chunk_size: The maximum size of the chunks to be used. If it is `None`,
             chunking is disabled
 
 
@@ -1087,10 +1088,12 @@ def vmap_chunked(
     """
     in_axes, argnums = _parse_in_axes(in_axes)
     vmapped_fun = jax.vmap(f, in_axes=in_axes)
-    return _chunk_vmapped_function(vmapped_fun, chunk_size, argnums)
+    return _chunk_vmapped_function(vmapped_fun, jac_chunk_size, argnums)
 
 
-def batched_vectorize(pyfunc, *, excluded=frozenset(), signature=None, chunk_size=None):
+def batched_vectorize(
+    pyfunc, *, excluded=frozenset(), signature=None, jac_chunk_size=None
+):
     """Define a vectorized function with broadcasting and batching.
 
     below is taken from JAX
@@ -1120,7 +1123,7 @@ def batched_vectorize(pyfunc, *, excluded=frozenset(), signature=None, chunk_siz
         provided, ``pyfunc`` will be called with (and expected to return) arrays
         with shapes given by the size of corresponding core dimensions. By
         default, pyfunc is assumed to take scalars arrays as input and output.
-        chunk_size: the size of the batches to pass to vmap. if 1, will only
+        jac_chunk_size: the size of the batches to pass to vmap. if 1, will only
 
     Returns
     -------
@@ -1202,7 +1205,7 @@ def wrapped(*args, **kwargs):
             else:
                 # change the vmap here to chunked_vmap
                 vectorized_func = vmap_chunked(
-                    vectorized_func, in_axes, chunk_size=chunk_size
+                    vectorized_func, in_axes, jac_chunk_size=jac_chunk_size
                 )
         result = vectorized_func(*squeezed_args)
 
diff --git a/docs/adding_objectives.rst b/docs/adding_objectives.rst
index 04bebcf79b..d97c31fbfa 100644
--- a/docs/adding_objectives.rst
+++ b/docs/adding_objectives.rst
@@ -70,18 +70,18 @@ A full example objective with comments describing the key points is given below:
             Collocation grid containing the nodes to evaluate at.
         name : str, optional
             Name of the objective function.
-        chunk_size : int, optional
+        jac_chunk_size : int, optional
             If `"blocked"` deriv_mode is used in the ObjectiveFunction, will
-            calculate the Jacobian for this objective ``chunk_size`` columns at a time,
+            calculate the Jacobian for this objective ``jac_chunk_size`` columns at a time,
             instead of all at once.  The memory usage of the Jacobian calculation is
-            linearly proportional to ``chunk_size``: the smaller the ``chunk_size``, the
+            linearly proportional to ``jac_chunk_size``: the smaller the ``jac_chunk_size``, the
             less memory the Jacobian calculation will require (with some baseline memory
-            usage). The time to compute the Jacobian is roughly ``t ~1/chunk_size``
-            with some baseline time, so the larger the ``chunk_size``, the faster the
-            calculation takes, at the cost of requiring more memory. A ``chunk_size``
+            usage). The time to compute the Jacobian is roughly ``t ~1/jac_chunk_size``
+            with some baseline time, so the larger the ``jac_chunk_size``, the faster the
+            calculation takes, at the cost of requiring more memory. A ``jac_chunk_size``
             of 1 corresponds to the least memory intensive,  but slowest method of
             calculating the Jacobian.
-            If None, it will default to the largest possible `chunk_size` i.e. ``dim_x``
+            If None, it will default to the largest possible `jac_chunk_size` i.e. ``dim_x``
 
         """
 
@@ -100,7 +100,7 @@ A full example objective with comments describing the key points is given below:
             normalize_target=True,
             grid=None,
             name="QS triple product",
-            chunk_size=None,
+            jac_chunk_size=None,
         ):
             # we don't have to do much here, mostly just call ``super().__init__()``
             if target is None and bounds is None:
@@ -114,7 +114,7 @@ A full example objective with comments describing the key points is given below:
                 normalize=normalize,
                 normalize_target=normalize_target,
                 name=name,
-                chunk_size=chunk_size
+                jac_chunk_size=jac_chunk_size
             )
 
         def build(self, use_jit=True, verbose=1):
@@ -255,23 +255,23 @@ Reducing Memory Size of Objective Jacobian Calculation
 During optimization, one of the most memory-intensive steps is the calculation of the Jacobian
 of the cost function. This memory cost comes from attempting to calculate the entire Jacobian
 matrix in one vectorized operation. However, this can be tuned between high memory usage but quick (default)
-and low memory usage but slower with the ``chunk_size`` keyword argument. By default, where this matters
+and low memory usage but slower with the ``jac_chunk_size`` keyword argument. By default, where this matters
 is when creating the overall ``ObjectiveFunction`` to be used in the optimization (where by default ``deriv_mode="batched"``). The Jacobian is a
 matrix of shape [``obj.dim_f`` x ``obj.dim_x``], and the calculation of the Jacobian is vectorized over
-the columns (the ``obj.dim_x`` dimension), where ``obj`` is the ``ObjectiveFunction`` object. Passing in the ``chunk_size`` attribute allows one to split up
-the vectorized computation into chunks of ``chunk_size`` columns at a time, allowing one to compute the Jacobian
+the columns (the ``obj.dim_x`` dimension), where ``obj`` is the ``ObjectiveFunction`` object. Passing in the ``jac_chunk_size`` attribute allows one to split up
+the vectorized computation into chunks of ``jac_chunk_size`` columns at a time, allowing one to compute the Jacobian
 in a slightly slower, but more memory-efficient manner. The memory usage of the Jacobian calculation is
-linearly proportional to ``chunk_size``: the smaller the ``chunk_size``, the less memory the Jacobian calculation will
-require (with some baseline memory usage). The time to compute the Jacobian is roughly ``t ~1/chunk_size``
-with some baseline time, so the larger the ``chunk_size``, the faster the calculation takes,
-at the cost of requiring more memory. A ``chunk_size`` of 1 corresponds to the least memory intensive,
-but slowest method of calculating the Jacobian. If ``chunk_size=None``, it will default to the largest
-possible `chunk_size` i.e. ``obj.dim_x``.
+linearly proportional to ``jac_chunk_size``: the smaller the ``jac_chunk_size``, the less memory the Jacobian calculation will
+require (with some baseline memory usage). The time to compute the Jacobian is roughly ``t ~1/jac_chunk_size``
+with some baseline time, so the larger the ``jac_chunk_size``, the faster the calculation takes,
+at the cost of requiring more memory. A ``jac_chunk_size`` of 1 corresponds to the least memory intensive,
+but slowest method of calculating the Jacobian. If ``jac_chunk_size=None``, it will default to the largest
+possible `jac_chunk_size` i.e. ``obj.dim_x``.
 
 If ``deriv_mode="blocked"`` is specified when the ``ObjectiveFunction`` is created, then the Jacobian will
 be calculated individually for each of the sub-objectives inside of the ``ObjectiveFunction``, and in that case
-the ``chunk_size`` of the individual ``_Objective`` objects inside of the ``ObjectiveFunction`` will be used.
-For example, if ``obj1 = QuasisymmetryTripleProduct(eq, chunk_size=100)``, ``obj2 = MeanCurvature(eq, chunk_size=2000)``
+the ``jac_chunk_size`` of the individual ``_Objective`` objects inside of the ``ObjectiveFunction`` will be used.
+For example, if ``obj1 = QuasisymmetryTripleProduct(eq, jac_chunk_size=100)``, ``obj2 = MeanCurvature(eq, jac_chunk_size=2000)``
 and ``obj = ObjectiveFunction((obj1, obj2), deriv_mode="blocked")``, then the Jacobian will be calculated with a
-``chunk_size=100`` for the quasisymmetry part and a ``chunk_size=2000`` for the curvature part, then the full Jacobian
+``jac_chunk_size=100`` for the quasisymmetry part and a ``jac_chunk_size=2000`` for the curvature part, then the full Jacobian
 will be formed as a block diagonal matrix with the individual Jacobians of these two objectives.
diff --git a/tests/test_examples.py b/tests/test_examples.py
index 4d3308d2a1..0ac1ed8f68 100644
--- a/tests/test_examples.py
+++ b/tests/test_examples.py
@@ -301,7 +301,7 @@ def test_ATF_results(tmpdir_factory):
         spectral_indexing=eq0.spectral_indexing,
     )
     eqf = EquilibriaFamily.solve_continuation_automatic(
-        eq, verbose=2, checkpoint_path=output_dir.join("ATF.h5"), chunk_size=500
+        eq, verbose=2, checkpoint_path=output_dir.join("ATF.h5"), jac_chunk_size=500
     )
     eqf = load(output_dir.join("ATF.h5"))
     rho_err, theta_err = area_difference_desc(eq0, eqf[-1])
@@ -1040,7 +1040,8 @@ def test_freeb_vacuum():
         FixPsi(eq=eq),
     )
     objective = ObjectiveFunction(
-        VacuumBoundaryError(eq=eq, field=ext_field, field_fixed=True), chunk_size=1000
+        VacuumBoundaryError(eq=eq, field=ext_field, field_fixed=True),
+        jac_chunk_size=1000,
     )
     eq, _ = eq.optimize(
         objective,
diff --git a/tests/test_objective_funs.py b/tests/test_objective_funs.py
index 9140ed4f9c..1d9d548d15 100644
--- a/tests/test_objective_funs.py
+++ b/tests/test_objective_funs.py
@@ -1284,7 +1284,7 @@ def test_derivative_modes():
     surf = FourierRZToroidalSurface()
     obj1 = ObjectiveFunction(
         [
-            PlasmaVesselDistance(eq, surf, chunk_size=1),
+            PlasmaVesselDistance(eq, surf, jac_chunk_size=1),
             MagneticWell(eq),
         ],
         deriv_mode="batched",
@@ -1292,11 +1292,11 @@ def test_derivative_modes():
     )
     obj2 = ObjectiveFunction(
         [
-            PlasmaVesselDistance(eq, surf, chunk_size=2),
+            PlasmaVesselDistance(eq, surf, jac_chunk_size=2),
             MagneticWell(eq),
         ],
         deriv_mode="blocked",
-        chunk_size=10,
+        jac_chunk_size=10,
         use_jit=False,
     )
     obj3 = ObjectiveFunction(
@@ -1307,12 +1307,12 @@ def test_derivative_modes():
         deriv_mode="looped",
         use_jit=False,
     )
-    with pytest.warns(UserWarning, match="chunk_size"):
+    with pytest.warns(UserWarning, match="jac_chunk_size"):
         obj1.build()
-    with pytest.warns(UserWarning, match="chunk_size"):
+    with pytest.warns(UserWarning, match="jac_chunk_size"):
         obj2.build()
     # check that default size works for blocked
-    assert obj2.objectives[1].chunk_size == np.ceil(
+    assert obj2.objectives[1].jac_chunk_size == np.ceil(
         sum(t.dim_x for t in obj2.objectives[1].things) / 4
     )
     obj3.build()

From e4e27a20b20b583aa65e1456af2e57852c57633c Mon Sep 17 00:00:00 2001
From: Dario Panici <dpanici@princeton.edu>
Date: Mon, 9 Sep 2024 11:04:04 +0900
Subject: [PATCH 20/46] remove unused functions

---
 desc/utils.py                | 52 ------------------------------------
 tests/test_objective_funs.py |  2 +-
 2 files changed, 1 insertion(+), 53 deletions(-)

diff --git a/desc/utils.py b/desc/utils.py
index c3d0d014cb..2c79985098 100644
--- a/desc/utils.py
+++ b/desc/utils.py
@@ -898,7 +898,6 @@ def f_(carry, x):
 
 
 scan_append = functools.partial(scan_append_reduce, append_cond=True)
-scan_reduce = functools.partial(scan_append_reduce, append_cond=False)
 
 
 # TODO in_axes a la vmap?
@@ -1015,49 +1014,6 @@ def _parse_in_axes(in_axes):
     return in_axes, argnums
 
 
-def apply_chunked(
-    f: Callable,
-    in_axes=0,
-    *,
-    jac_chunk_size: Optional[int],
-) -> Callable:
-    """Compute f in smaller chunks over axis 0.
-
-    Takes an implicitly vmapped function over the axis 0 and uses scan to
-    do the computations in smaller chunks over the 0-th axis of all input arguments.
-
-    For this to work, the function `f` should be `vectorized` along the `in_axes`
-    of the arguments. This means that the function `f` should respect the following
-    condition:
-
-    .. code-block:: python
-
-        assert f(x) == jnp.concatenate([f(x_i) for x_i in x], axis=0)
-
-    which is automatically satisfied if `f` is obtained by vmapping a function,
-    such as:
-
-    .. code-block:: python
-
-        f = jax.vmap(f_orig)
-
-
-    Parameters
-    ----------
-        f: A function that satisfies the condition above
-        in_axes: The axes that should be scanned along. Only supports `0` or `None`
-        jac_chunk_size: The maximum size of the chunks to be used. If it is `None`,
-           chunking is disabled
-
-    """
-    _, argnums = _parse_in_axes(in_axes)
-    return _chunk_vmapped_function(
-        f,
-        jac_chunk_size,
-        argnums,
-    )
-
-
 def vmap_chunked(
     f: Callable,
     in_axes=0,
@@ -1066,14 +1022,6 @@ def vmap_chunked(
 ) -> Callable:
     """Behaves like jax.vmap but uses scan to chunk the computations in smaller chunks.
 
-    This function is essentially equivalent to:
-
-    .. code-block:: python
-
-        nk.jax.apply_chunked(jax.vmap(f, in_axes), in_axes, jac_chunk_size)
-
-    Some limitations to `in_axes` apply.
-
     Parameters
     ----------
         f: The function to be vectorised.
diff --git a/tests/test_objective_funs.py b/tests/test_objective_funs.py
index 1d9d548d15..92f1bc95e5 100644
--- a/tests/test_objective_funs.py
+++ b/tests/test_objective_funs.py
@@ -1312,7 +1312,7 @@ def test_derivative_modes():
     with pytest.warns(UserWarning, match="jac_chunk_size"):
         obj2.build()
     # check that default size works for blocked
-    assert obj2.objectives[1].jac_chunk_size == np.ceil(
+    assert obj2.objectives[1]._jac_chunk_size == np.ceil(
         sum(t.dim_x for t in obj2.objectives[1].things) / 4
     )
     obj3.build()

From 51e3e4010767b8ac2d4ae693ef6ca18f053d5b3a Mon Sep 17 00:00:00 2001
From: Dario Panici <dpanici@princeton.edu>
Date: Mon, 9 Sep 2024 11:11:00 +0900
Subject: [PATCH 21/46] simplify chunking fxn

---
 desc/utils.py | 125 +-------------------------------------------------
 1 file changed, 2 insertions(+), 123 deletions(-)

diff --git a/desc/utils.py b/desc/utils.py
index 2c79985098..df6ad3488d 100644
--- a/desc/utils.py
+++ b/desc/utils.py
@@ -802,131 +802,10 @@ def chunk(x, jac_chunk_size=None):
 # Copyright 2021 The NetKet Authors - All rights reserved.
 # Licensed under the Apache License, Version 2.0 (the "License");
 
-_tree_add = functools.partial(jax.tree_util.tree_map, jax.lax.add)
-_tree_zeros_like = functools.partial(
-    jax.tree_util.tree_map, lambda x: jnp.zeros(x.shape, dtype=x.dtype)
-)
-
-
-# TODO put it somewhere
-def _multimap(f, *args):
-    try:
-        return tuple(map(lambda a: f(*a), zip(*args)))
-    except TypeError:
-        return f(*args)
-
-
-def scan_append_reduce(f, x, append_cond, op=_tree_add, zero_fun=_tree_zeros_like):
-    """Evaluate f element by element in x while appending and/or reducing the results.
-
-    Parameters
-    ----------
-        f: a function that takes elements of the leading dimension of x
-        x: a pytree where each leaf array has the same leading dimension
-        append_cond: a bool (if f returns just one result) or a tuple of
-                     bools (if f returns multiple values)
-                     which indicates whether the individual result should
-                     be appended or reduced
-        op: a function to (pairwise) reduce the specified results. Defaults to a sum.
-        zero_fun: a function which prepares the zero element of op for a given input
-                  shape/dtype tree. Defaults to zeros.
-
-    Returns
-    -------
-        The (tuple of) results corresponding to the output of f
-        where each result is given by:
-        if append_cond is True:
-            a (pytree of) array(s) with leading dimension same as x,
-            containing the evaluation of f at each element in x
-        else (append_cond is False):
-            a (pytree of) array(s) with the same shape as the corresponding
-            output of f, containing the reduction over op of f evaluated at each x
-
-
-    Example:
-
-        import jax.numpy as jnp
-        from netket.jax import scan_append_reduce
-
-        def f(x):
-             y = jnp.sin(x)
-             return y, y, y**2
-
-        N = 100
-        x = jnp.linspace(0.,jnp.pi,N)
-
-        y, s, s2 = scan_append_reduce(f, x, (True, False, False))
-        mean = s/N
-        var = s2/N - mean**2
-    """
-    # TODO: different op for each result
-
-    x0 = jax.tree_util.tree_map(lambda x: x[0], x)
-
-    # special code path if there is only one element
-    # to avoid having to rely on xla/llvm to optimize the overhead away
-    if jax.tree_util.tree_leaves(x)[0].shape[0] == 1:
-        return _multimap(
-            lambda c, x: jnp.expand_dims(x, 0) if c else x, append_cond, f(x0)
-        )
-
-    # the original idea was to use pytrees,
-    # however for now just operate on the return value tuple
-    _get_append_part = functools.partial(
-        _multimap, lambda c, x: x if c else None, append_cond
-    )
-    _get_op_part = functools.partial(
-        _multimap, lambda c, x: x if not c else None, append_cond
-    )
-    _tree_select = functools.partial(
-        _multimap, lambda c, t1, t2: t1 if c else t2, append_cond
-    )
-
-    carry_init = True, _get_op_part(zero_fun(jax.eval_shape(f, x0)))
-
-    def f_(carry, x):
-        is_first, y_carry = carry
-        y = f(x)
-        y_op = _get_op_part(y)
-        y_append = _get_append_part(y)
-        y_reduce = op(y_carry, y_op)
-        return (False, y_reduce), y_append
-
-    (_, res_op), res_append = jax.lax.scan(f_, carry_init, x, unroll=1)
-    # reconstruct the result from the reduced and appended parts in the two trees
-    return _tree_select(res_append, res_op)
-
-
-scan_append = functools.partial(scan_append_reduce, append_cond=True)
-
 
 # TODO in_axes a la vmap?
 def _scanmap(fun, scan_fun, argnums=0):
-    """A helper function to wrap f with a scan_fun.
-
-    Example
-    -------
-        import jax.numpy as jnp
-        from functools import partial
-
-        from desc.utils import _scanmap, scan_append_reduce
-
-        scan_fun = partial(scan_append_reduce, append_cond=(True, False, False))
-
-        @partial(_scanmap, scan_fun=scan_fun, argnums=1)
-        def f(c, x):
-             y = jnp.sin(x) + c
-             return y, y, y**2
-
-        N = 100
-        x = jnp.linspace(0.,jnp.pi,N)
-        c = 1.
-
-
-        y, s, s2 = f(c, x)
-        mean = s/N
-        var = s2/N - mean**2
-    """
+    """A helper function to wrap f with a scan_fun."""
 
     def f_(*args, **kwargs):
         f = lu.wrap_init(fun, kwargs)
@@ -974,7 +853,7 @@ def _get_rest(x):
         args_rest = [_get_rest(a) if i in argnums else a for i, a in enumerate(args)]
 
         y_chunks = _unchunk(
-            _scanmap(vmapped_fun, scan_append, argnums)(*args_chunks, **kwargs)
+            _scanmap(vmapped_fun, jax.lax.scan, argnums)(*args_chunks, **kwargs)
         )
 
         if n_rest == 0:

From da359d501fd1b95a3430b4fcac6eee0400153a29 Mon Sep 17 00:00:00 2001
From: Dario Panici <dpanici@princeton.edu>
Date: Mon, 9 Sep 2024 11:31:28 +0900
Subject: [PATCH 22/46] place batcehd_vectorize utils in own file

---
 desc/objectives/objective_funs.py |   2 +-
 desc/utils.py                     | 361 +-----------------------------
 2 files changed, 5 insertions(+), 358 deletions(-)

diff --git a/desc/objectives/objective_funs.py b/desc/objectives/objective_funs.py
index 87b390db92..b089172d31 100644
--- a/desc/objectives/objective_funs.py
+++ b/desc/objectives/objective_funs.py
@@ -12,7 +12,6 @@
 from desc.utils import (
     PRINT_WIDTH,
     Timer,
-    batched_vectorize,
     errorif,
     flatten_list,
     is_broadcastable,
@@ -20,6 +19,7 @@
     unique_list,
     warnif,
 )
+from desc.utils_batched_vectorize import batched_vectorize
 
 
 class ObjectiveFunction(IOAble):
diff --git a/desc/utils.py b/desc/utils.py
index df6ad3488d..d1483aa039 100644
--- a/desc/utils.py
+++ b/desc/utils.py
@@ -2,27 +2,13 @@
 
 import operator
 import warnings
-from functools import partial
 from itertools import combinations_with_replacement, permutations
-from typing import Callable, Optional
 
 import numpy as np
 from scipy.special import factorial
 from termcolor import colored
 
-from desc.backend import flatnonzero, fori_loop, functools, jax, jit, jnp, take
-
-if jax.__version_info__ >= (0, 4, 16):
-    from jax.extend import linear_util as lu
-else:
-    from jax import linear_util as lu
-
-from jax._src.numpy.vectorize import (
-    _apply_excluded,
-    _check_output_dims,
-    _parse_gufunc_signature,
-    _parse_input_dimensions,
-)
+from desc.backend import flatnonzero, fori_loop, functools, jit, jnp, take
 
 
 class Timer:
@@ -705,348 +691,9 @@ def broadcast_tree(tree_in, tree_out, dtype=int):
         raise ValueError("trees must be nested lists of dicts")
 
 
-# The following section of this code is derived from the NetKet project
-# https://github.com/netket/netket/blob/9881c9fb217a2ac4dc9274a054bf6e6a2993c519/
-# netket/jax/_chunk_utils.py
-#
-# The original copyright notice is as follows
-# Copyright 2021 The NetKet Authors - All rights reserved.
-# Licensed under the Apache License, Version 2.0 (the "License");
-
-
-def _treeify(f):
-    def _f(x, *args, **kwargs):
-        return jax.tree_util.tree_map(lambda y: f(y, *args, **kwargs), x)
-
-    return _f
-
-
-@_treeify
-def _unchunk(x):
-    return x.reshape((-1,) + x.shape[2:])
-
-
-@_treeify
-def _chunk(x, jac_chunk_size=None):
-    # jac_chunk_size=None -> add just a dummy chunk dimension,
-    #  same as np.expand_dims(x, 0)
-    if x.ndim == 0:
-        raise ValueError("x cannot be chunked as it has 0 dimensions.")
-    n = x.shape[0]
-    if jac_chunk_size is None:
-        jac_chunk_size = n
-
-    n_chunks, residual = divmod(n, jac_chunk_size)
-    if residual != 0:
-        raise ValueError(
-            "The first dimension of x must be divisible by jac_chunk_size."
-            + f"\n        Got x.shape={x.shape} but jac_chunk_size={jac_chunk_size}."
-        )
-    return x.reshape((n_chunks, jac_chunk_size) + x.shape[1:])
-
-
-def _jac_chunk_size(x):
-    b = set(map(lambda x: x.shape[:2], jax.tree_util.tree_leaves(x)))
-    if len(b) != 1:
-        raise ValueError(
-            "The arrays in x have inconsistent jac_chunk_size or number of chunks"
-        )
-    return b.pop()[1]
-
-
-def unchunk(x_chunked):
-    """Merge the first two axes of an array (or a pytree of arrays).
-
-    Parameters
-    ----------
-    x_chunked: an array (or pytree of arrays) of at least 2 dimensions
-
-    Returns
-    -------
-    (x, chunk_fn) : tuple
-        where x is x_chunked reshaped to (-1,)+x.shape[2:]
-        and chunk_fn is a function which restores x given x_chunked
-
-    """
-    return _unchunk(x_chunked), functools.partial(
-        _chunk, jac_chunk_size=_jac_chunk_size(x_chunked)
-    )
-
-
-def chunk(x, jac_chunk_size=None):
-    """Split an array (or a pytree of arrays) into chunks along the first axis.
-
-    Parameters
-    ----------
-        x: an array (or pytree of arrays)
-        jac_chunk_size: an integer or None (default)
-            The first axis in x must be a multiple of jac_chunk_size
-
-    Returns
-    -------
-    (x_chunked, unchunk_fn): tuple
-        - x_chunked is x reshaped to (-1, jac_chunk_size)+x.shape[1:]
-          if jac_chunk_size is None then it defaults to x.shape[0], i.e. just one chunk
-        - unchunk_fn is a function which restores x given x_chunked
-    """
-    return _chunk(x, jac_chunk_size), _unchunk
-
-
-####
-
-# The following section of this code is derived from the NetKet project
-# https://github.com/netket/netket/blob/9881c9fb217a2ac4dc9274a054bf6e6a2993c519/
-# netket/jax/_scanmap.py
-#
-# The original copyright notice is as follows
-# Copyright 2021 The NetKet Authors - All rights reserved.
-# Licensed under the Apache License, Version 2.0 (the "License");
-
-
-# TODO in_axes a la vmap?
-def _scanmap(fun, scan_fun, argnums=0):
-    """A helper function to wrap f with a scan_fun."""
-
-    def f_(*args, **kwargs):
-        f = lu.wrap_init(fun, kwargs)
-        f_partial, dyn_args = jax.api_util.argnums_partial(
-            f, argnums, args, require_static_args_hashable=False
-        )
-        return scan_fun(lambda x: f_partial.call_wrapped(*x), dyn_args)
-
-    return f_
-
-
-# The following section of this code is derived from the NetKet project
-# https://github.com/netket/netket/blob/9881c9fb217a2ac4dc9274a054bf6e6a2993c519/
-# netket/jax/_vmap_chunked.py
-#
-# The original copyright notice is as follows
-# Copyright 2021 The NetKet Authors - All rights reserved.
-# Licensed under the Apache License, Version 2.0 (the "License");
-
-
-def _eval_fun_in_chunks(vmapped_fun, jac_chunk_size, argnums, *args, **kwargs):
-    n_elements = jax.tree_util.tree_leaves(args[argnums[0]])[0].shape[0]
-    n_chunks, n_rest = divmod(n_elements, jac_chunk_size)
-
-    if n_chunks == 0 or jac_chunk_size >= n_elements:
-        y = vmapped_fun(*args, **kwargs)
-    else:
-        # split inputs
-        def _get_chunks(x):
-            x_chunks = jax.tree_util.tree_map(
-                lambda x_: x_[: n_elements - n_rest, ...], x
-            )
-            x_chunks = _chunk(x_chunks, jac_chunk_size)
-            return x_chunks
-
-        def _get_rest(x):
-            x_rest = jax.tree_util.tree_map(
-                lambda x_: x_[n_elements - n_rest :, ...], x
-            )
-            return x_rest
-
-        args_chunks = [
-            _get_chunks(a) if i in argnums else a for i, a in enumerate(args)
-        ]
-        args_rest = [_get_rest(a) if i in argnums else a for i, a in enumerate(args)]
-
-        y_chunks = _unchunk(
-            _scanmap(vmapped_fun, jax.lax.scan, argnums)(*args_chunks, **kwargs)
-        )
-
-        if n_rest == 0:
-            y = y_chunks
-        else:
-            y_rest = vmapped_fun(*args_rest, **kwargs)
-            y = jax.tree_util.tree_map(
-                lambda y1, y2: jnp.concatenate((y1, y2)), y_chunks, y_rest
-            )
-    return y
-
-
-def _chunk_vmapped_function(
-    vmapped_fun: Callable,
-    jac_chunk_size: Optional[int],
-    argnums=0,
-) -> Callable:
-    """Takes a vmapped function and computes it in chunks."""
-    if jac_chunk_size is None:
-        return vmapped_fun
-
-    if isinstance(argnums, int):
-        argnums = (argnums,)
-    return functools.partial(_eval_fun_in_chunks, vmapped_fun, jac_chunk_size, argnums)
-
-
-def _parse_in_axes(in_axes):
-    if isinstance(in_axes, int):
-        in_axes = (in_axes,)
-
-    if not set(in_axes).issubset((0, None)):
-        raise NotImplementedError("Only in_axes 0/None are currently supported")
-
-    argnums = tuple(
-        map(lambda ix: ix[0], filter(lambda ix: ix[1] is not None, enumerate(in_axes)))
-    )
-    return in_axes, argnums
-
-
-def vmap_chunked(
-    f: Callable,
-    in_axes=0,
-    *,
-    jac_chunk_size: Optional[int],
-) -> Callable:
-    """Behaves like jax.vmap but uses scan to chunk the computations in smaller chunks.
-
-    Parameters
-    ----------
-        f: The function to be vectorised.
-        in_axes: The axes that should be scanned along. Only supports `0` or `None`
-        jac_chunk_size: The maximum size of the chunks to be used. If it is `None`,
-            chunking is disabled
-
-
-    Returns
-    -------
-        f: A vectorised and chunked function
-    """
-    in_axes, argnums = _parse_in_axes(in_axes)
-    vmapped_fun = jax.vmap(f, in_axes=in_axes)
-    return _chunk_vmapped_function(vmapped_fun, jac_chunk_size, argnums)
-
-
-def batched_vectorize(
-    pyfunc, *, excluded=frozenset(), signature=None, jac_chunk_size=None
-):
-    """Define a vectorized function with broadcasting and batching.
-
-    below is taken from JAX
-    FIXME: change restof docstring
-    :func:`vectorize` is a convenience wrapper for defining vectorized
-    functions with broadcasting, in the style of NumPy's
-    `generalized universal functions
-    <https://numpy.org/doc/stable/reference/c-api/generalized-ufuncs.html>`_.
-    It allows for defining functions that are automatically repeated across
-    any leading dimensions, without the implementation of the function needing to
-    be concerned about how to handle higher dimensional inputs.
-
-    :func:`jax.numpy.vectorize` has the same interface as
-    :class:`numpy.vectorize`, but it is syntactic sugar for an auto-batching
-    transformation (:func:`vmap`) rather than a Python loop. This should be
-    considerably more efficient, but the implementation must be written in terms
-    of functions that act on JAX arrays.
-
-    Parameters
-    ----------
-        pyfunc: function to vectorize.
-        excluded: optional set of integers representing positional arguments for
-        which the function will not be vectorized. These will be passed directly
-        to ``pyfunc`` unmodified.
-        signature: optional generalized universal function signature, e.g.,
-        ``(m,n),(n)->(m)`` for vectorized matrix-vector multiplication. If
-        provided, ``pyfunc`` will be called with (and expected to return) arrays
-        with shapes given by the size of corresponding core dimensions. By
-        default, pyfunc is assumed to take scalars arrays as input and output.
-        jac_chunk_size: the size of the batches to pass to vmap. if 1, will only
-
-    Returns
-    -------
-        Vectorized version of the given function.
-
-    """
-    if any(not isinstance(exclude, (str, int)) for exclude in excluded):
-        raise TypeError(
-            "jax.numpy.vectorize can only exclude integer or string arguments, "
-            "but excluded={!r}".format(excluded)
-        )
-    if any(isinstance(e, int) and e < 0 for e in excluded):
-        raise ValueError(f"excluded={excluded!r} contains negative numbers")
-
-    @functools.wraps(pyfunc)
-    def wrapped(*args, **kwargs):
-        error_context = (
-            "on vectorized function with excluded={!r} and "
-            "signature={!r}".format(excluded, signature)
-        )
-        excluded_func, args, kwargs = _apply_excluded(pyfunc, excluded, args, kwargs)
-
-        if signature is not None:
-            input_core_dims, output_core_dims = _parse_gufunc_signature(signature)
-        else:
-            input_core_dims = [()] * len(args)
-            output_core_dims = None
-
-        none_args = {i for i, arg in enumerate(args) if arg is None}
-        if any(none_args):
-            if any(input_core_dims[i] != () for i in none_args):
-                raise ValueError(
-                    f"Cannot pass None at locations {none_args} with {signature=}"
-                )
-            excluded_func, args, _ = _apply_excluded(excluded_func, none_args, args, {})
-            input_core_dims = [
-                dim for i, dim in enumerate(input_core_dims) if i not in none_args
-            ]
-
-        args = tuple(map(jnp.asarray, args))
-
-        broadcast_shape, dim_sizes = _parse_input_dimensions(
-            args, input_core_dims, error_context
-        )
-
-        checked_func = _check_output_dims(
-            excluded_func, dim_sizes, output_core_dims, error_context
-        )
-
-        # Rather than broadcasting all arguments to full broadcast shapes, prefer
-        # expanding dimensions using vmap. By pushing broadcasting
-        # into vmap, we can make use of more efficient batching rules for
-        # primitives where only some arguments are batched (e.g., for
-        # lax_linalg.triangular_solve), and avoid instantiating large broadcasted
-        # arrays.
-
-        squeezed_args = []
-        rev_filled_shapes = []
-
-        for arg, core_dims in zip(args, input_core_dims):
-            noncore_shape = arg.shape[: arg.ndim - len(core_dims)]
-
-            pad_ndim = len(broadcast_shape) - len(noncore_shape)
-            filled_shape = pad_ndim * (1,) + noncore_shape
-            rev_filled_shapes.append(filled_shape[::-1])
-
-            squeeze_indices = tuple(
-                i for i, size in enumerate(noncore_shape) if size == 1
-            )
-            squeezed_arg = jnp.squeeze(arg, axis=squeeze_indices)
-            squeezed_args.append(squeezed_arg)
-
-        vectorized_func = checked_func
-        dims_to_expand = []
-        for negdim, axis_sizes in enumerate(zip(*rev_filled_shapes)):
-            in_axes = tuple(None if size == 1 else 0 for size in axis_sizes)
-            if all(axis is None for axis in in_axes):
-                dims_to_expand.append(len(broadcast_shape) - 1 - negdim)
-            else:
-                # change the vmap here to chunked_vmap
-                vectorized_func = vmap_chunked(
-                    vectorized_func, in_axes, jac_chunk_size=jac_chunk_size
-                )
-        result = vectorized_func(*squeezed_args)
-
-        if not dims_to_expand:
-            return result
-        elif isinstance(result, tuple):
-            return tuple(jnp.expand_dims(r, axis=dims_to_expand) for r in result)
-        else:
-            return jnp.expand_dims(result, axis=dims_to_expand)
-
-    return wrapped
-
-
-@partial(jnp.vectorize, signature="(m),(m)->(n)", excluded={"size", "fill_value"})
+@functools.partial(
+    jnp.vectorize, signature="(m),(m)->(n)", excluded={"size", "fill_value"}
+)
 def take_mask(a, mask, /, *, size=None, fill_value=None):
     """JIT compilable method to return ``a[mask][:size]`` padded by ``fill_value``.
 

From fbebfb8a05038d64da9c130816b4ff45b07c0e22 Mon Sep 17 00:00:00 2001
From: Dario Panici <dpanici@princeton.edu>
Date: Mon, 9 Sep 2024 11:33:39 +0900
Subject: [PATCH 23/46] add file

---
 desc/utils_batched_vectorize.py | 357 ++++++++++++++++++++++++++++++++
 1 file changed, 357 insertions(+)
 create mode 100644 desc/utils_batched_vectorize.py

diff --git a/desc/utils_batched_vectorize.py b/desc/utils_batched_vectorize.py
new file mode 100644
index 0000000000..9d0f18050d
--- /dev/null
+++ b/desc/utils_batched_vectorize.py
@@ -0,0 +1,357 @@
+"""Utility functions for the ``batched_vectorize`` function."""
+
+from typing import Callable, Optional
+
+from desc.backend import functools, jax, jnp
+
+if jax.__version_info__ >= (0, 4, 16):
+    from jax.extend import linear_util as lu
+else:
+    from jax import linear_util as lu
+
+from jax._src.numpy.vectorize import (
+    _apply_excluded,
+    _check_output_dims,
+    _parse_gufunc_signature,
+    _parse_input_dimensions,
+)
+
+# The following section of this code is derived from the NetKet project
+# https://github.com/netket/netket/blob/9881c9fb217a2ac4dc9274a054bf6e6a2993c519/
+# netket/jax/_chunk_utils.py
+#
+# The original copyright notice is as follows
+# Copyright 2021 The NetKet Authors - All rights reserved.
+# Licensed under the Apache License, Version 2.0 (the "License");
+
+
+def _treeify(f):
+    def _f(x, *args, **kwargs):
+        return jax.tree_util.tree_map(lambda y: f(y, *args, **kwargs), x)
+
+    return _f
+
+
+@_treeify
+def _unchunk(x):
+    return x.reshape((-1,) + x.shape[2:])
+
+
+@_treeify
+def _chunk(x, jac_chunk_size=None):
+    # jac_chunk_size=None -> add just a dummy chunk dimension,
+    #  same as np.expand_dims(x, 0)
+    if x.ndim == 0:
+        raise ValueError("x cannot be chunked as it has 0 dimensions.")
+    n = x.shape[0]
+    if jac_chunk_size is None:
+        jac_chunk_size = n
+
+    n_chunks, residual = divmod(n, jac_chunk_size)
+    if residual != 0:
+        raise ValueError(
+            "The first dimension of x must be divisible by jac_chunk_size."
+            + f"\n        Got x.shape={x.shape} but jac_chunk_size={jac_chunk_size}."
+        )
+    return x.reshape((n_chunks, jac_chunk_size) + x.shape[1:])
+
+
+def _jac_chunk_size(x):
+    b = set(map(lambda x: x.shape[:2], jax.tree_util.tree_leaves(x)))
+    if len(b) != 1:
+        raise ValueError(
+            "The arrays in x have inconsistent jac_chunk_size or number of chunks"
+        )
+    return b.pop()[1]
+
+
+def unchunk(x_chunked):
+    """Merge the first two axes of an array (or a pytree of arrays).
+
+    Parameters
+    ----------
+    x_chunked: an array (or pytree of arrays) of at least 2 dimensions
+
+    Returns
+    -------
+    (x, chunk_fn) : tuple
+        where x is x_chunked reshaped to (-1,)+x.shape[2:]
+        and chunk_fn is a function which restores x given x_chunked
+
+    """
+    return _unchunk(x_chunked), functools.partial(
+        _chunk, jac_chunk_size=_jac_chunk_size(x_chunked)
+    )
+
+
+def chunk(x, jac_chunk_size=None):
+    """Split an array (or a pytree of arrays) into chunks along the first axis.
+
+    Parameters
+    ----------
+        x: an array (or pytree of arrays)
+        jac_chunk_size: an integer or None (default)
+            The first axis in x must be a multiple of jac_chunk_size
+
+    Returns
+    -------
+    (x_chunked, unchunk_fn): tuple
+        - x_chunked is x reshaped to (-1, jac_chunk_size)+x.shape[1:]
+          if jac_chunk_size is None then it defaults to x.shape[0], i.e. just one chunk
+        - unchunk_fn is a function which restores x given x_chunked
+    """
+    return _chunk(x, jac_chunk_size), _unchunk
+
+
+####
+
+# The following section of this code is derived from the NetKet project
+# https://github.com/netket/netket/blob/9881c9fb217a2ac4dc9274a054bf6e6a2993c519/
+# netket/jax/_scanmap.py
+#
+# The original copyright notice is as follows
+# Copyright 2021 The NetKet Authors - All rights reserved.
+# Licensed under the Apache License, Version 2.0 (the "License");
+
+
+# TODO in_axes a la vmap?
+def _scanmap(fun, scan_fun, argnums=0):
+    """A helper function to wrap f with a scan_fun."""
+
+    def f_(*args, **kwargs):
+        f = lu.wrap_init(fun, kwargs)
+        f_partial, dyn_args = jax.api_util.argnums_partial(
+            f, argnums, args, require_static_args_hashable=False
+        )
+        return scan_fun(lambda x: f_partial.call_wrapped(*x), dyn_args)
+
+    return f_
+
+
+# The following section of this code is derived from the NetKet project
+# https://github.com/netket/netket/blob/9881c9fb217a2ac4dc9274a054bf6e6a2993c519/
+# netket/jax/_vmap_chunked.py
+#
+# The original copyright notice is as follows
+# Copyright 2021 The NetKet Authors - All rights reserved.
+# Licensed under the Apache License, Version 2.0 (the "License");
+
+
+def _eval_fun_in_chunks(vmapped_fun, jac_chunk_size, argnums, *args, **kwargs):
+    n_elements = jax.tree_util.tree_leaves(args[argnums[0]])[0].shape[0]
+    n_chunks, n_rest = divmod(n_elements, jac_chunk_size)
+
+    if n_chunks == 0 or jac_chunk_size >= n_elements:
+        y = vmapped_fun(*args, **kwargs)
+    else:
+        # split inputs
+        def _get_chunks(x):
+            x_chunks = jax.tree_util.tree_map(
+                lambda x_: x_[: n_elements - n_rest, ...], x
+            )
+            x_chunks = _chunk(x_chunks, jac_chunk_size)
+            return x_chunks
+
+        def _get_rest(x):
+            x_rest = jax.tree_util.tree_map(
+                lambda x_: x_[n_elements - n_rest :, ...], x
+            )
+            return x_rest
+
+        args_chunks = [
+            _get_chunks(a) if i in argnums else a for i, a in enumerate(args)
+        ]
+        args_rest = [_get_rest(a) if i in argnums else a for i, a in enumerate(args)]
+
+        y_chunks = _unchunk(
+            _scanmap(vmapped_fun, jax.lax.scan, argnums)(*args_chunks, **kwargs)
+        )
+
+        if n_rest == 0:
+            y = y_chunks
+        else:
+            y_rest = vmapped_fun(*args_rest, **kwargs)
+            y = jax.tree_util.tree_map(
+                lambda y1, y2: jnp.concatenate((y1, y2)), y_chunks, y_rest
+            )
+    return y
+
+
+def _chunk_vmapped_function(
+    vmapped_fun: Callable,
+    jac_chunk_size: Optional[int],
+    argnums=0,
+) -> Callable:
+    """Takes a vmapped function and computes it in chunks."""
+    if jac_chunk_size is None:
+        return vmapped_fun
+
+    if isinstance(argnums, int):
+        argnums = (argnums,)
+    return functools.partial(_eval_fun_in_chunks, vmapped_fun, jac_chunk_size, argnums)
+
+
+def _parse_in_axes(in_axes):
+    if isinstance(in_axes, int):
+        in_axes = (in_axes,)
+
+    if not set(in_axes).issubset((0, None)):
+        raise NotImplementedError("Only in_axes 0/None are currently supported")
+
+    argnums = tuple(
+        map(lambda ix: ix[0], filter(lambda ix: ix[1] is not None, enumerate(in_axes)))
+    )
+    return in_axes, argnums
+
+
+def vmap_chunked(
+    f: Callable,
+    in_axes=0,
+    *,
+    jac_chunk_size: Optional[int],
+) -> Callable:
+    """Behaves like jax.vmap but uses scan to chunk the computations in smaller chunks.
+
+    Parameters
+    ----------
+        f: The function to be vectorised.
+        in_axes: The axes that should be scanned along. Only supports `0` or `None`
+        jac_chunk_size: The maximum size of the chunks to be used. If it is `None`,
+            chunking is disabled
+
+
+    Returns
+    -------
+        f: A vectorised and chunked function
+    """
+    in_axes, argnums = _parse_in_axes(in_axes)
+    vmapped_fun = jax.vmap(f, in_axes=in_axes)
+    return _chunk_vmapped_function(vmapped_fun, jac_chunk_size, argnums)
+
+
+def batched_vectorize(
+    pyfunc, *, excluded=frozenset(), signature=None, jac_chunk_size=None
+):
+    """Define a vectorized function with broadcasting and batching.
+
+    below is taken from JAX
+    FIXME: change restof docstring
+    :func:`vectorize` is a convenience wrapper for defining vectorized
+    functions with broadcasting, in the style of NumPy's
+    `generalized universal functions
+    <https://numpy.org/doc/stable/reference/c-api/generalized-ufuncs.html>`_.
+    It allows for defining functions that are automatically repeated across
+    any leading dimensions, without the implementation of the function needing to
+    be concerned about how to handle higher dimensional inputs.
+
+    :func:`jax.numpy.vectorize` has the same interface as
+    :class:`numpy.vectorize`, but it is syntactic sugar for an auto-batching
+    transformation (:func:`vmap`) rather than a Python loop. This should be
+    considerably more efficient, but the implementation must be written in terms
+    of functions that act on JAX arrays.
+
+    Parameters
+    ----------
+        pyfunc: function to vectorize.
+        excluded: optional set of integers representing positional arguments for
+        which the function will not be vectorized. These will be passed directly
+        to ``pyfunc`` unmodified.
+        signature: optional generalized universal function signature, e.g.,
+        ``(m,n),(n)->(m)`` for vectorized matrix-vector multiplication. If
+        provided, ``pyfunc`` will be called with (and expected to return) arrays
+        with shapes given by the size of corresponding core dimensions. By
+        default, pyfunc is assumed to take scalars arrays as input and output.
+        jac_chunk_size: the size of the batches to pass to vmap. if 1, will only
+
+    Returns
+    -------
+        Vectorized version of the given function.
+
+    """
+    if any(not isinstance(exclude, (str, int)) for exclude in excluded):
+        raise TypeError(
+            "jax.numpy.vectorize can only exclude integer or string arguments, "
+            "but excluded={!r}".format(excluded)
+        )
+    if any(isinstance(e, int) and e < 0 for e in excluded):
+        raise ValueError(f"excluded={excluded!r} contains negative numbers")
+
+    @functools.wraps(pyfunc)
+    def wrapped(*args, **kwargs):
+        error_context = (
+            "on vectorized function with excluded={!r} and "
+            "signature={!r}".format(excluded, signature)
+        )
+        excluded_func, args, kwargs = _apply_excluded(pyfunc, excluded, args, kwargs)
+
+        if signature is not None:
+            input_core_dims, output_core_dims = _parse_gufunc_signature(signature)
+        else:
+            input_core_dims = [()] * len(args)
+            output_core_dims = None
+
+        none_args = {i for i, arg in enumerate(args) if arg is None}
+        if any(none_args):
+            if any(input_core_dims[i] != () for i in none_args):
+                raise ValueError(
+                    f"Cannot pass None at locations {none_args} with {signature=}"
+                )
+            excluded_func, args, _ = _apply_excluded(excluded_func, none_args, args, {})
+            input_core_dims = [
+                dim for i, dim in enumerate(input_core_dims) if i not in none_args
+            ]
+
+        args = tuple(map(jnp.asarray, args))
+
+        broadcast_shape, dim_sizes = _parse_input_dimensions(
+            args, input_core_dims, error_context
+        )
+
+        checked_func = _check_output_dims(
+            excluded_func, dim_sizes, output_core_dims, error_context
+        )
+
+        # Rather than broadcasting all arguments to full broadcast shapes, prefer
+        # expanding dimensions using vmap. By pushing broadcasting
+        # into vmap, we can make use of more efficient batching rules for
+        # primitives where only some arguments are batched (e.g., for
+        # lax_linalg.triangular_solve), and avoid instantiating large broadcasted
+        # arrays.
+
+        squeezed_args = []
+        rev_filled_shapes = []
+
+        for arg, core_dims in zip(args, input_core_dims):
+            noncore_shape = arg.shape[: arg.ndim - len(core_dims)]
+
+            pad_ndim = len(broadcast_shape) - len(noncore_shape)
+            filled_shape = pad_ndim * (1,) + noncore_shape
+            rev_filled_shapes.append(filled_shape[::-1])
+
+            squeeze_indices = tuple(
+                i for i, size in enumerate(noncore_shape) if size == 1
+            )
+            squeezed_arg = jnp.squeeze(arg, axis=squeeze_indices)
+            squeezed_args.append(squeezed_arg)
+
+        vectorized_func = checked_func
+        dims_to_expand = []
+        for negdim, axis_sizes in enumerate(zip(*rev_filled_shapes)):
+            in_axes = tuple(None if size == 1 else 0 for size in axis_sizes)
+            if all(axis is None for axis in in_axes):
+                dims_to_expand.append(len(broadcast_shape) - 1 - negdim)
+            else:
+                # change the vmap here to chunked_vmap
+                vectorized_func = vmap_chunked(
+                    vectorized_func, in_axes, jac_chunk_size=jac_chunk_size
+                )
+        result = vectorized_func(*squeezed_args)
+
+        if not dims_to_expand:
+            return result
+        elif isinstance(result, tuple):
+            return tuple(jnp.expand_dims(r, axis=dims_to_expand) for r in result)
+        else:
+            return jnp.expand_dims(result, axis=dims_to_expand)
+
+    return wrapped

From fc8c98ccdf83660843223afb917adf15dde78dcf Mon Sep 17 00:00:00 2001
From: Dario Panici <dpanici@princeton.edu>
Date: Mon, 9 Sep 2024 12:08:54 +0900
Subject: [PATCH 24/46] remove jac_looped, deprecate 'looped' deriv_mode

---
 CHANGELOG.md                          |  3 +++
 desc/derivatives.py                   | 21 ++-----------------
 desc/objectives/objective_funs.py     | 30 ++++++++++++++-------------
 desc/optimize/_constraint_wrappers.py |  2 +-
 tests/test_derivatives.py             | 19 -----------------
 tests/test_objective_funs.py          | 17 ++++++++-------
 tests/test_optimizer.py               | 26 ++++++++++++-----------
 7 files changed, 45 insertions(+), 73 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index ccf156230a..2c165fa562 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -18,6 +18,9 @@ Bug Fixes
 - Fixes bugs that occur when saving asymmetric equilibria as wout files
 - Fixes bug that occurs when using ``VMECIO.plot_vmec_comparison`` to compare to an asymmetric wout file
 
+Deprecations
+
+- ``deriv_mode="looped"`` in ``ObjectiveFunction`` is deprecated and will be removed in a future version in favored of ``deriv_mode="batched"`` with ``jac_chunk_size=1``,
 
 
 v0.12.1
diff --git a/desc/derivatives.py b/desc/derivatives.py
index cf9f6d7073..684ea37d45 100644
--- a/desc/derivatives.py
+++ b/desc/derivatives.py
@@ -5,7 +5,7 @@
 import numpy as np
 from termcolor import colored
 
-from desc.backend import fori_loop, jnp, put, use_jax
+from desc.backend import jnp, put, use_jax
 
 if use_jax:
     import jax
@@ -315,23 +315,8 @@ def compute_jvp3(cls, fun, argnum1, argnum2, argnum3, v1, v2, v3, *args, **kwarg
     def _compute_jvp(self, v, *args, **kwargs):
         return self.compute_jvp(self._fun, self.argnum, v, *args, **kwargs)
 
-    def _jac_looped(self, *args, **kwargs):
-
-        n = args[self._argnum].size
-        shp = jax.eval_shape(self._fun, *args).shape
-        I = jnp.eye(n)
-        J = jnp.zeros((*shp, n)).T
-
-        def body(i, J):
-            tangents = I[i]
-            Ji = self._compute_jvp(tangents, *args, **kwargs)
-            J = put(J, i, Ji.T)
-            return J
-
-        return fori_loop(0, n, body, J).T
-
     def _set_mode(self, mode) -> None:
-        if mode not in ["fwd", "rev", "grad", "hess", "jvp", "looped"]:
+        if mode not in ["fwd", "rev", "grad", "hess", "jvp"]:
             raise ValueError(
                 colored("invalid mode option for automatic differentiation", "red")
             )
@@ -347,8 +332,6 @@ def _set_mode(self, mode) -> None:
             self._compute = jax.hessian(self._fun, self._argnum)
         elif self._mode == "jvp":
             self._compute = self._compute_jvp
-        elif self._mode == "looped":
-            self._compute = self._jac_looped
 
 
 class FiniteDiffDerivative(_Derivative):
diff --git a/desc/objectives/objective_funs.py b/desc/objectives/objective_funs.py
index b089172d31..4f639a539b 100644
--- a/desc/objectives/objective_funs.py
+++ b/desc/objectives/objective_funs.py
@@ -31,23 +31,21 @@ class ObjectiveFunction(IOAble):
         List of objectives to be minimized.
     use_jit : bool, optional
         Whether to just-in-time compile the objectives and derivatives.
-    deriv_mode : {"auto", "batched", "blocked", "looped"}
+    deriv_mode : {"auto", "batched", "blocked"}
         Method for computing Jacobian matrices. "batched" uses forward mode, applied to
         the entire objective at once, and is generally the fastest for vector valued
-        objectives, though most memory intensive. "blocked" builds the Jacobian for
+        objectives. Its memory intensity vs. speed may be traded off through the
+        ``jac_chunk_size`` keyword argument. "blocked" builds the Jacobian for
         each objective separately, using each objective's preferred AD mode (and
         each objective's `jac_chunk_size`). Generally the most efficient option when
-        mixing scalar and vector valued objectives. "looped" uses forward mode jacobian
-        vector products in a loop to build the Jacobian column by column. Generally the
-        slowest, but most memory efficient.
+        mixing scalar and vector valued objectives.
         "auto" defaults to "batched" if all sub-objectives are set to "fwd",
         otherwise "blocked".
     name : str
         Name of the objective function.
     jac_chunk_size : int, optional
         If `"batched"` deriv_mode is used, will calculate the Jacobian
-        ``jac_chunk_size`` columns at a time, instead of all at once. A
-        ``jac_chunk_size`` of 1 is equivalent to using `"looped"` deriv_mode.
+        ``jac_chunk_size`` columns at a time, instead of all at once.
         The memory usage of the Jacobian calculation is roughly
         ``memory usage = m0 + m1*jac_chunk_size``: the higher the chunk size,
         the less memory the Jacobian calculation will require (with some baseline
@@ -74,7 +72,17 @@ def __init__(
             isinstance(obj, _Objective) for obj in objectives
         ), "members of ObjectiveFunction should be instances of _Objective"
         assert use_jit in {True, False}
-        assert deriv_mode in {"auto", "batched", "looped", "blocked"}
+        warnif(
+            deriv_mode == "looped",
+            DeprecationWarning,
+            '``deriv_mode="looped"`` is deprecated in favor of'
+            ' ``deriv_mode="batched"`` with ``jac_chunk_size=1``.',
+        )
+        if deriv_mode == "looped":
+            # overwrite the user inputs if deprecated "looped" was given
+            deriv_mode = "batched"
+            jac_chunk_size = 1
+        assert deriv_mode in {"auto", "batched", "blocked"}
 
         self._jac_chunk_size = jac_chunk_size
         self._objectives = objectives
@@ -513,8 +521,6 @@ def jac_scaled(self, x, constants=None):
 
         if self._deriv_mode == "batched":
             J = Derivative(self.compute_scaled, mode="fwd")(x, constants)
-        if self._deriv_mode == "looped":
-            J = Derivative(self.compute_scaled, mode="looped")(x, constants)
         if self._deriv_mode == "blocked":
             J = self._jac_blocked("jac_scaled", x, constants)
 
@@ -528,8 +534,6 @@ def jac_scaled_error(self, x, constants=None):
 
         if self._deriv_mode == "batched":
             J = Derivative(self.compute_scaled_error, mode="fwd")(x, constants)
-        if self._deriv_mode == "looped":
-            J = Derivative(self.compute_scaled_error, mode="looped")(x, constants)
         if self._deriv_mode == "blocked":
             J = self._jac_blocked("jac_scaled_error", x, constants)
 
@@ -543,8 +547,6 @@ def jac_unscaled(self, x, constants=None):
 
         if self._deriv_mode == "batched":
             J = Derivative(self.compute_unscaled, mode="fwd")(x, constants)
-        if self._deriv_mode == "looped":
-            J = Derivative(self.compute_unscaled, mode="looped")(x, constants)
         if self._deriv_mode == "blocked":
             J = self._jac_blocked("jac_unscaled", x, constants)
 
diff --git a/desc/optimize/_constraint_wrappers.py b/desc/optimize/_constraint_wrappers.py
index 2ee6645742..d187b28205 100644
--- a/desc/optimize/_constraint_wrappers.py
+++ b/desc/optimize/_constraint_wrappers.py
@@ -1059,7 +1059,7 @@ def _jvp(self, v, xf, xg, constants, op):
             ]
         )
         tangent = self._unfixed_idx_mat @ dfdc - dxdcv
-        if self._objective._deriv_mode in ["batched", "looped"]:
+        if self._objective._deriv_mode in ["batched"]:
             out = getattr(self._objective, "jvp_" + op)(tangent, xg, constants[0])
         else:  # deriv_mode == "blocked"
             vgs = jnp.split(tangent, np.cumsum(self._dimx_per_thing))
diff --git a/tests/test_derivatives.py b/tests/test_derivatives.py
index 8cb4ed9bf5..29c2f909dc 100644
--- a/tests/test_derivatives.py
+++ b/tests/test_derivatives.py
@@ -121,25 +121,6 @@ def fun(x):
         jac = AutoDiffDerivative(fun, num_blocks=3, shape=A.shape)
         np.testing.assert_allclose(jac(x), A)
 
-    @pytest.mark.unit
-    def test_jac_looped(self):
-        """Test computing the jacobian by explicit looping jvp."""
-
-        def test_fun(x, y, a):
-            return jnp.cos(x) + x * y + a
-
-        x = np.array([1, 5, 0.01, 200])
-        y = np.array([60, 1, 100, 0.02])
-        a = -2.0
-
-        jac1 = AutoDiffDerivative(test_fun, argnum=0, mode="fwd")
-        J1 = jac1.compute(x, y, a)
-
-        jac2 = AutoDiffDerivative(test_fun, argnum=0, mode="looped")
-        J2 = jac2.compute(x, y, a)
-
-        np.testing.assert_allclose(J1, J2, atol=1e-8)
-
 
 class TestJVP:
     """Test calculation of jacobian vector products."""
diff --git a/tests/test_objective_funs.py b/tests/test_objective_funs.py
index 92f1bc95e5..53ac2b63b9 100644
--- a/tests/test_objective_funs.py
+++ b/tests/test_objective_funs.py
@@ -1299,14 +1299,15 @@ def test_derivative_modes():
         jac_chunk_size=10,
         use_jit=False,
     )
-    obj3 = ObjectiveFunction(
-        [
-            PlasmaVesselDistance(eq, surf),
-            MagneticWell(eq),
-        ],
-        deriv_mode="looped",
-        use_jit=False,
-    )
+    with pytest.warns(DeprecationWarning, match="looped"):
+        obj3 = ObjectiveFunction(
+            [
+                PlasmaVesselDistance(eq, surf),
+                MagneticWell(eq),
+            ],
+            deriv_mode="looped",
+            use_jit=False,
+        )
     with pytest.warns(UserWarning, match="jac_chunk_size"):
         obj1.build()
     with pytest.warns(UserWarning, match="jac_chunk_size"):
diff --git a/tests/test_optimizer.py b/tests/test_optimizer.py
index 24e4cd028a..4f0cce0e0b 100644
--- a/tests/test_optimizer.py
+++ b/tests/test_optimizer.py
@@ -1136,15 +1136,16 @@ def test_proximal_jacobian():
         deriv_mode="batched",
         use_jit=False,
     )
-    obj2 = ObjectiveFunction(
-        (
-            QuasisymmetryTripleProduct(eq2, deriv_mode="fwd"),
-            AspectRatio(eq2, deriv_mode="fwd"),
-            Volume(eq2, deriv_mode="fwd"),
-        ),
-        deriv_mode="looped",
-        use_jit=False,
-    )
+    with pytest.warns(DeprecationWarning, match="looped"):
+        obj2 = ObjectiveFunction(
+            (
+                QuasisymmetryTripleProduct(eq2, deriv_mode="fwd"),
+                AspectRatio(eq2, deriv_mode="fwd"),
+                Volume(eq2, deriv_mode="fwd"),
+            ),
+            deriv_mode="looped",
+            use_jit=False,
+        )
     obj3 = ObjectiveFunction(
         (
             QuasisymmetryTripleProduct(eq3, deriv_mode="fwd"),
@@ -1245,9 +1246,10 @@ def test_LinearConstraint_jacobian():
     obj1 = ObjectiveFunction(
         ForceBalance(eq1, deriv_mode="auto"), deriv_mode="batched", use_jit=False
     )
-    obj2 = ObjectiveFunction(
-        ForceBalance(eq2, deriv_mode="fwd"), deriv_mode="looped", use_jit=False
-    )
+    with pytest.warns(DeprecationWarning, match="looped"):
+        obj2 = ObjectiveFunction(
+            ForceBalance(eq2, deriv_mode="fwd"), deriv_mode="looped", use_jit=False
+        )
     obj3 = ObjectiveFunction(
         ForceBalance(eq3, deriv_mode="rev"), deriv_mode="blocked", use_jit=False
     )

From 98c93d500ff1f832e7e533bc781bfaeef7440290 Mon Sep 17 00:00:00 2001
From: Dario Panici <dpanici@princeton.edu>
Date: Mon, 9 Sep 2024 12:22:38 +0900
Subject: [PATCH 25/46] re-implement scan_append, need to be more careful in
 replacing it

---
 desc/utils_batched_vectorize.py | 125 +++++++++++++++++++++++++++++++-
 1 file changed, 123 insertions(+), 2 deletions(-)

diff --git a/desc/utils_batched_vectorize.py b/desc/utils_batched_vectorize.py
index 9d0f18050d..900fdd7532 100644
--- a/desc/utils_batched_vectorize.py
+++ b/desc/utils_batched_vectorize.py
@@ -113,10 +113,131 @@ def chunk(x, jac_chunk_size=None):
 # Copyright 2021 The NetKet Authors - All rights reserved.
 # Licensed under the Apache License, Version 2.0 (the "License");
 
+_tree_add = functools.partial(jax.tree_util.tree_map, jax.lax.add)
+_tree_zeros_like = functools.partial(
+    jax.tree_util.tree_map, lambda x: jnp.zeros(x.shape, dtype=x.dtype)
+)
+
+
+# TODO put it somewhere
+def _multimap(f, *args):
+    try:
+        return tuple(map(lambda a: f(*a), zip(*args)))
+    except TypeError:
+        return f(*args)
+
+
+def scan_append_reduce(f, x, append_cond, op=_tree_add, zero_fun=_tree_zeros_like):
+    """Evaluate f element by element in x while appending and/or reducing the results.
+
+    Parameters
+    ----------
+        f: a function that takes elements of the leading dimension of x
+        x: a pytree where each leaf array has the same leading dimension
+        append_cond: a bool (if f returns just one result) or a tuple of
+                     bools (if f returns multiple values)
+                     which indicates whether the individual result should
+                     be appended or reduced
+        op: a function to (pairwise) reduce the specified results. Defaults to a sum.
+        zero_fun: a function which prepares the zero element of op for a given input
+                  shape/dtype tree. Defaults to zeros.
+
+    Returns
+    -------
+        The (tuple of) results corresponding to the output of f
+        where each result is given by:
+        if append_cond is True:
+            a (pytree of) array(s) with leading dimension same as x,
+            containing the evaluation of f at each element in x
+        else (append_cond is False):
+            a (pytree of) array(s) with the same shape as the corresponding
+            output of f, containing the reduction over op of f evaluated at each x
+
+
+    Example:
+
+        import jax.numpy as jnp
+        from netket.jax import scan_append_reduce
+
+        def f(x):
+             y = jnp.sin(x)
+             return y, y, y**2
+
+        N = 100
+        x = jnp.linspace(0.,jnp.pi,N)
+
+        y, s, s2 = scan_append_reduce(f, x, (True, False, False))
+        mean = s/N
+        var = s2/N - mean**2
+    """
+    # TODO: different op for each result
+
+    x0 = jax.tree_util.tree_map(lambda x: x[0], x)
+
+    # special code path if there is only one element
+    # to avoid having to rely on xla/llvm to optimize the overhead away
+    if jax.tree_util.tree_leaves(x)[0].shape[0] == 1:
+        return _multimap(
+            lambda c, x: jnp.expand_dims(x, 0) if c else x, append_cond, f(x0)
+        )
+
+    # the original idea was to use pytrees,
+    # however for now just operate on the return value tuple
+    _get_append_part = functools.partial(
+        _multimap, lambda c, x: x if c else None, append_cond
+    )
+    _get_op_part = functools.partial(
+        _multimap, lambda c, x: x if not c else None, append_cond
+    )
+    _tree_select = functools.partial(
+        _multimap, lambda c, t1, t2: t1 if c else t2, append_cond
+    )
+
+    carry_init = True, _get_op_part(zero_fun(jax.eval_shape(f, x0)))
+
+    def f_(carry, x):
+        is_first, y_carry = carry
+        y = f(x)
+        y_op = _get_op_part(y)
+        y_append = _get_append_part(y)
+        y_reduce = op(y_carry, y_op)
+        return (False, y_reduce), y_append
+
+    (_, res_op), res_append = jax.lax.scan(f_, carry_init, x, unroll=1)
+    # reconstruct the result from the reduced and appended parts in the two trees
+    return _tree_select(res_append, res_op)
+
+
+scan_append = functools.partial(scan_append_reduce, append_cond=True)
+
 
 # TODO in_axes a la vmap?
 def _scanmap(fun, scan_fun, argnums=0):
-    """A helper function to wrap f with a scan_fun."""
+    """A helper function to wrap f with a scan_fun.
+
+    Example
+    -------
+        import jax.numpy as jnp
+        from functools import partial
+
+        from desc.utils import _scanmap, scan_append_reduce
+
+        scan_fun = partial(scan_append_reduce, append_cond=(True, False, False))
+
+        @partial(_scanmap, scan_fun=scan_fun, argnums=1)
+        def f(c, x):
+             y = jnp.sin(x) + c
+             return y, y, y**2
+
+        N = 100
+        x = jnp.linspace(0.,jnp.pi,N)
+        c = 1.
+
+
+        y, s, s2 = f(c, x)
+        mean = s/N
+        var = s2/N - mean**2
+    """
 
     def f_(*args, **kwargs):
         f = lu.wrap_init(fun, kwargs)
@@ -164,7 +285,7 @@ def _get_rest(x):
         args_rest = [_get_rest(a) if i in argnums else a for i, a in enumerate(args)]
 
         y_chunks = _unchunk(
-            _scanmap(vmapped_fun, jax.lax.scan, argnums)(*args_chunks, **kwargs)
+            _scanmap(vmapped_fun, scan_append, argnums)(*args_chunks, **kwargs)
         )
 
         if n_rest == 0:

From bc1d49815595fb25a3d697cdb8240d617b0cbc0e Mon Sep 17 00:00:00 2001
From: Dario Panici <dpanici@princeton.edu>
Date: Tue, 10 Sep 2024 12:11:54 +0900
Subject: [PATCH 26/46] simplify function slightly

---
 desc/utils_batched_vectorize.py | 32 +++++++++-----------------------
 1 file changed, 9 insertions(+), 23 deletions(-)

diff --git a/desc/utils_batched_vectorize.py b/desc/utils_batched_vectorize.py
index 900fdd7532..8b2c231833 100644
--- a/desc/utils_batched_vectorize.py
+++ b/desc/utils_batched_vectorize.py
@@ -122,12 +122,14 @@ def chunk(x, jac_chunk_size=None):
 # TODO put it somewhere
 def _multimap(f, *args):
     try:
+        print("one")
         return tuple(map(lambda a: f(*a), zip(*args)))
     except TypeError:
+        print("two")
         return f(*args)
 
 
-def scan_append_reduce(f, x, append_cond, op=_tree_add, zero_fun=_tree_zeros_like):
+def scan_append(f, x):
     """Evaluate f element by element in x while appending and/or reducing the results.
 
     Parameters
@@ -177,38 +179,22 @@ def f(x):
     # special code path if there is only one element
     # to avoid having to rely on xla/llvm to optimize the overhead away
     if jax.tree_util.tree_leaves(x)[0].shape[0] == 1:
-        return _multimap(
-            lambda c, x: jnp.expand_dims(x, 0) if c else x, append_cond, f(x0)
-        )
+        return _multimap(lambda c, x: jnp.expand_dims(x, 0) if c else x, True, f(x0))
 
     # the original idea was to use pytrees,
     # however for now just operate on the return value tuple
-    _get_append_part = functools.partial(
-        _multimap, lambda c, x: x if c else None, append_cond
-    )
-    _get_op_part = functools.partial(
-        _multimap, lambda c, x: x if not c else None, append_cond
-    )
-    _tree_select = functools.partial(
-        _multimap, lambda c, t1, t2: t1 if c else t2, append_cond
-    )
+    _get_append_part = functools.partial(_multimap, lambda c, x: x if c else None, True)
 
-    carry_init = True, _get_op_part(zero_fun(jax.eval_shape(f, x0)))
+    carry_init = True
 
     def f_(carry, x):
-        is_first, y_carry = carry
         y = f(x)
-        y_op = _get_op_part(y)
         y_append = _get_append_part(y)
-        y_reduce = op(y_carry, y_op)
-        return (False, y_reduce), y_append
+        return False, y_append
 
-    (_, res_op), res_append = jax.lax.scan(f_, carry_init, x, unroll=1)
+    _, res_append = jax.lax.scan(f_, carry_init, x, unroll=1)
     # reconstruct the result from the reduced and appended parts in the two trees
-    return _tree_select(res_append, res_op)
-
-
-scan_append = functools.partial(scan_append_reduce, append_cond=True)
+    return res_append  # _tree_select(res_append, res_op)
 
 
 # TODO in_axes a la vmap?

From f088a061a3e2f4f9c49dbb1b2ec6d12049ca8b45 Mon Sep 17 00:00:00 2001
From: Dario Panici <dpanici@princeton.edu>
Date: Wed, 11 Sep 2024 10:14:49 +0900
Subject: [PATCH 27/46] remove print statements

---
 desc/utils_batched_vectorize.py | 2 --
 1 file changed, 2 deletions(-)

diff --git a/desc/utils_batched_vectorize.py b/desc/utils_batched_vectorize.py
index 8b2c231833..667b294515 100644
--- a/desc/utils_batched_vectorize.py
+++ b/desc/utils_batched_vectorize.py
@@ -122,10 +122,8 @@ def chunk(x, jac_chunk_size=None):
 # TODO put it somewhere
 def _multimap(f, *args):
     try:
-        print("one")
         return tuple(map(lambda a: f(*a), zip(*args)))
     except TypeError:
-        print("two")
         return f(*args)
 
 

From 7c804a1f9bfb53c3d221d27e9ddc4bcfaaba7999 Mon Sep 17 00:00:00 2001
From: Dario Panici <dpanici@princeton.edu>
Date: Wed, 11 Sep 2024 14:41:35 +0900
Subject: [PATCH 28/46] simplify scan further

---
 desc/objectives/objective_funs.py |   8 +-
 desc/utils_batched_vectorize.py   | 157 +++++++-----------------------
 2 files changed, 40 insertions(+), 125 deletions(-)

diff --git a/desc/objectives/objective_funs.py b/desc/objectives/objective_funs.py
index 4f639a539b..21496d54ef 100644
--- a/desc/objectives/objective_funs.py
+++ b/desc/objectives/objective_funs.py
@@ -559,12 +559,12 @@ def _jvp(self, v, x, constants=None, op="compute_scaled"):
         if len(v) == 1:
             jvpfun = lambda dx: Derivative.compute_jvp(fun, 0, dx, x)
             return batched_vectorize(
-                jvpfun, signature="(n)->(k)", jac_chunk_size=self._jac_chunk_size
+                jvpfun, signature="(n)->(k)", chunk_size=self._jac_chunk_size
             )(v[0])
         elif len(v) == 2:
             jvpfun = lambda dx1, dx2: Derivative.compute_jvp2(fun, 0, 0, dx1, dx2, x)
             return batched_vectorize(
-                jvpfun, signature="(n),(n)->(k)", jac_chunk_size=self._jac_chunk_size
+                jvpfun, signature="(n),(n)->(k)", chunk_size=self._jac_chunk_size
             )(v[0], v[1])
         elif len(v) == 3:
             jvpfun = lambda dx1, dx2, dx3: Derivative.compute_jvp3(
@@ -573,7 +573,7 @@ def _jvp(self, v, x, constants=None, op="compute_scaled"):
             return batched_vectorize(
                 jvpfun,
                 signature="(n),(n),(n)->(k)",
-                jac_chunk_size=self._jac_chunk_size,
+                chunk_size=self._jac_chunk_size,
             )(v[0], v[1], v[2])
         else:
             raise NotImplementedError("Cannot compute JVP higher than 3rd order.")
@@ -1164,7 +1164,7 @@ def _jvp(self, v, x, constants=None, op="compute_scaled"):
         jvpfun = lambda *dx: Derivative.compute_jvp(fun, tuple(range(len(x))), dx, *x)
         sig = ",".join(f"(n{i})" for i in range(len(x))) + "->(k)"
         return batched_vectorize(
-            jvpfun, signature=sig, jac_chunk_size=self._jac_chunk_size
+            jvpfun, signature=sig, chunk_size=self._jac_chunk_size
         )(*v)
 
     @jit
diff --git a/desc/utils_batched_vectorize.py b/desc/utils_batched_vectorize.py
index 667b294515..530f847eaf 100644
--- a/desc/utils_batched_vectorize.py
+++ b/desc/utils_batched_vectorize.py
@@ -38,29 +38,29 @@ def _unchunk(x):
 
 
 @_treeify
-def _chunk(x, jac_chunk_size=None):
-    # jac_chunk_size=None -> add just a dummy chunk dimension,
+def _chunk(x, chunk_size=None):
+    # chunk_size=None -> add just a dummy chunk dimension,
     #  same as np.expand_dims(x, 0)
     if x.ndim == 0:
         raise ValueError("x cannot be chunked as it has 0 dimensions.")
     n = x.shape[0]
-    if jac_chunk_size is None:
-        jac_chunk_size = n
+    if chunk_size is None:
+        chunk_size = n
 
-    n_chunks, residual = divmod(n, jac_chunk_size)
+    n_chunks, residual = divmod(n, chunk_size)
     if residual != 0:
         raise ValueError(
-            "The first dimension of x must be divisible by jac_chunk_size."
-            + f"\n        Got x.shape={x.shape} but jac_chunk_size={jac_chunk_size}."
+            "The first dimension of x must be divisible by chunk_size."
+            + f"\n        Got x.shape={x.shape} but chunk_size={chunk_size}."
         )
-    return x.reshape((n_chunks, jac_chunk_size) + x.shape[1:])
+    return x.reshape((n_chunks, chunk_size) + x.shape[1:])
 
 
-def _jac_chunk_size(x):
+def _chunk_size(x):
     b = set(map(lambda x: x.shape[:2], jax.tree_util.tree_leaves(x)))
     if len(b) != 1:
         raise ValueError(
-            "The arrays in x have inconsistent jac_chunk_size or number of chunks"
+            "The arrays in x have inconsistent chunk_size or number of chunks"
         )
     return b.pop()[1]
 
@@ -80,27 +80,27 @@ def unchunk(x_chunked):
 
     """
     return _unchunk(x_chunked), functools.partial(
-        _chunk, jac_chunk_size=_jac_chunk_size(x_chunked)
+        _chunk, chunk_size=_chunk_size(x_chunked)
     )
 
 
-def chunk(x, jac_chunk_size=None):
+def chunk(x, chunk_size=None):
     """Split an array (or a pytree of arrays) into chunks along the first axis.
 
     Parameters
     ----------
         x: an array (or pytree of arrays)
-        jac_chunk_size: an integer or None (default)
-            The first axis in x must be a multiple of jac_chunk_size
+        chunk_size: an integer or None (default)
+            The first axis in x must be a multiple of chunk_size
 
     Returns
     -------
     (x_chunked, unchunk_fn): tuple
-        - x_chunked is x reshaped to (-1, jac_chunk_size)+x.shape[1:]
-          if jac_chunk_size is None then it defaults to x.shape[0], i.e. just one chunk
+        - x_chunked is x reshaped to (-1, chunk_size)+x.shape[1:]
+          if chunk_size is None then it defaults to x.shape[0], i.e. just one chunk
         - unchunk_fn is a function which restores x given x_chunked
     """
-    return _chunk(x, jac_chunk_size), _unchunk
+    return _chunk(x, chunk_size), _unchunk
 
 
 ####
@@ -113,115 +113,32 @@ def chunk(x, jac_chunk_size=None):
 # Copyright 2021 The NetKet Authors - All rights reserved.
 # Licensed under the Apache License, Version 2.0 (the "License");
 
-_tree_add = functools.partial(jax.tree_util.tree_map, jax.lax.add)
-_tree_zeros_like = functools.partial(
-    jax.tree_util.tree_map, lambda x: jnp.zeros(x.shape, dtype=x.dtype)
-)
-
-
-# TODO put it somewhere
-def _multimap(f, *args):
-    try:
-        return tuple(map(lambda a: f(*a), zip(*args)))
-    except TypeError:
-        return f(*args)
-
 
 def scan_append(f, x):
-    """Evaluate f element by element in x while appending and/or reducing the results.
+    """Evaluate f element by element in x while appending the results.
 
     Parameters
     ----------
         f: a function that takes elements of the leading dimension of x
         x: a pytree where each leaf array has the same leading dimension
-        append_cond: a bool (if f returns just one result) or a tuple of
-                     bools (if f returns multiple values)
-                     which indicates whether the individual result should
-                     be appended or reduced
-        op: a function to (pairwise) reduce the specified results. Defaults to a sum.
-        zero_fun: a function which prepares the zero element of op for a given input
-                  shape/dtype tree. Defaults to zeros.
 
     Returns
     -------
-        The (tuple of) results corresponding to the output of f
-        where each result is given by:
-        if append_cond is True:
-            a (pytree of) array(s) with leading dimension same as x,
-            containing the evaluation of f at each element in x
-        else (append_cond is False):
-            a (pytree of) array(s) with the same shape as the corresponding
-            output of f, containing the reduction over op of f evaluated at each x
-
-
-    Example:
-
-        import jax.numpy as jnp
-        from netket.jax import scan_append_reduce
-
-        def f(x):
-             y = jnp.sin(x)
-             return y, y, y**2
-
-        N = 100
-        x = jnp.linspace(0.,jnp.pi,N)
-
-        y, s, s2 = scan_append_reduce(f, x, (True, False, False))
-        mean = s/N
-        var = s2/N - mean**2
+        a (pytree of) array(s) with leading dimension same as x,
+        containing the evaluation of f at each element in x
     """
-    # TODO: different op for each result
-
-    x0 = jax.tree_util.tree_map(lambda x: x[0], x)
-
-    # special code path if there is only one element
-    # to avoid having to rely on xla/llvm to optimize the overhead away
-    if jax.tree_util.tree_leaves(x)[0].shape[0] == 1:
-        return _multimap(lambda c, x: jnp.expand_dims(x, 0) if c else x, True, f(x0))
-
-    # the original idea was to use pytrees,
-    # however for now just operate on the return value tuple
-    _get_append_part = functools.partial(_multimap, lambda c, x: x if c else None, True)
-
     carry_init = True
 
     def f_(carry, x):
-        y = f(x)
-        y_append = _get_append_part(y)
-        return False, y_append
+        return False, f(x)
 
     _, res_append = jax.lax.scan(f_, carry_init, x, unroll=1)
-    # reconstruct the result from the reduced and appended parts in the two trees
-    return res_append  # _tree_select(res_append, res_op)
+    return res_append
 
 
 # TODO in_axes a la vmap?
 def _scanmap(fun, scan_fun, argnums=0):
-    """A helper function to wrap f with a scan_fun.
-
-    Example
-    -------
-        import jax.numpy as jnp
-        from functools import partial
-
-        from desc.utils import _scanmap, scan_append_reduce
-
-        scan_fun = partial(scan_append_reduce, append_cond=(True, False, False))
-
-        @partial(_scanmap, scan_fun=scan_fun, argnums=1)
-        def f(c, x):
-             y = jnp.sin(x) + c
-             return y, y, y**2
-
-        N = 100
-        x = jnp.linspace(0.,jnp.pi,N)
-        c = 1.
-
-
-        y, s, s2 = f(c, x)
-        mean = s/N
-        var = s2/N - mean**2
-    """
+    """A helper function to wrap f with a scan_fun."""
 
     def f_(*args, **kwargs):
         f = lu.wrap_init(fun, kwargs)
@@ -242,11 +159,11 @@ def f_(*args, **kwargs):
 # Licensed under the Apache License, Version 2.0 (the "License");
 
 
-def _eval_fun_in_chunks(vmapped_fun, jac_chunk_size, argnums, *args, **kwargs):
+def _eval_fun_in_chunks(vmapped_fun, chunk_size, argnums, *args, **kwargs):
     n_elements = jax.tree_util.tree_leaves(args[argnums[0]])[0].shape[0]
-    n_chunks, n_rest = divmod(n_elements, jac_chunk_size)
+    n_chunks, n_rest = divmod(n_elements, chunk_size)
 
-    if n_chunks == 0 or jac_chunk_size >= n_elements:
+    if n_chunks == 0 or chunk_size >= n_elements:
         y = vmapped_fun(*args, **kwargs)
     else:
         # split inputs
@@ -254,7 +171,7 @@ def _get_chunks(x):
             x_chunks = jax.tree_util.tree_map(
                 lambda x_: x_[: n_elements - n_rest, ...], x
             )
-            x_chunks = _chunk(x_chunks, jac_chunk_size)
+            x_chunks = _chunk(x_chunks, chunk_size)
             return x_chunks
 
         def _get_rest(x):
@@ -284,16 +201,16 @@ def _get_rest(x):
 
 def _chunk_vmapped_function(
     vmapped_fun: Callable,
-    jac_chunk_size: Optional[int],
+    chunk_size: Optional[int],
     argnums=0,
 ) -> Callable:
     """Takes a vmapped function and computes it in chunks."""
-    if jac_chunk_size is None:
+    if chunk_size is None:
         return vmapped_fun
 
     if isinstance(argnums, int):
         argnums = (argnums,)
-    return functools.partial(_eval_fun_in_chunks, vmapped_fun, jac_chunk_size, argnums)
+    return functools.partial(_eval_fun_in_chunks, vmapped_fun, chunk_size, argnums)
 
 
 def _parse_in_axes(in_axes):
@@ -313,7 +230,7 @@ def vmap_chunked(
     f: Callable,
     in_axes=0,
     *,
-    jac_chunk_size: Optional[int],
+    chunk_size: Optional[int],
 ) -> Callable:
     """Behaves like jax.vmap but uses scan to chunk the computations in smaller chunks.
 
@@ -321,7 +238,7 @@ def vmap_chunked(
     ----------
         f: The function to be vectorised.
         in_axes: The axes that should be scanned along. Only supports `0` or `None`
-        jac_chunk_size: The maximum size of the chunks to be used. If it is `None`,
+        chunk_size: The maximum size of the chunks to be used. If it is `None`,
             chunking is disabled
 
 
@@ -331,12 +248,10 @@ def vmap_chunked(
     """
     in_axes, argnums = _parse_in_axes(in_axes)
     vmapped_fun = jax.vmap(f, in_axes=in_axes)
-    return _chunk_vmapped_function(vmapped_fun, jac_chunk_size, argnums)
+    return _chunk_vmapped_function(vmapped_fun, chunk_size, argnums)
 
 
-def batched_vectorize(
-    pyfunc, *, excluded=frozenset(), signature=None, jac_chunk_size=None
-):
+def batched_vectorize(pyfunc, *, excluded=frozenset(), signature=None, chunk_size=None):
     """Define a vectorized function with broadcasting and batching.
 
     below is taken from JAX
@@ -366,7 +281,7 @@ def batched_vectorize(
         provided, ``pyfunc`` will be called with (and expected to return) arrays
         with shapes given by the size of corresponding core dimensions. By
         default, pyfunc is assumed to take scalars arrays as input and output.
-        jac_chunk_size: the size of the batches to pass to vmap. if 1, will only
+        chunk_size: the size of the batches to pass to vmap. if 1, will only
 
     Returns
     -------
@@ -448,7 +363,7 @@ def wrapped(*args, **kwargs):
             else:
                 # change the vmap here to chunked_vmap
                 vectorized_func = vmap_chunked(
-                    vectorized_func, in_axes, jac_chunk_size=jac_chunk_size
+                    vectorized_func, in_axes, chunk_size=chunk_size
                 )
         result = vectorized_func(*squeezed_args)
 

From 480a4a7080f0578aa8ecfa53217ad13eec561b50 Mon Sep 17 00:00:00 2001
From: Dario Panici <dpanici@princeton.edu>
Date: Wed, 11 Sep 2024 14:53:44 +0900
Subject: [PATCH 29/46] add doc for reducing memory usage

---
 docs/adding_objectives.rst | 49 +++++++++-----------------------------
 docs/index.rst             |  1 +
 docs/memory_usage.rst      | 26 ++++++++++++++++++++
 3 files changed, 38 insertions(+), 38 deletions(-)
 create mode 100644 docs/memory_usage.rst

diff --git a/docs/adding_objectives.rst b/docs/adding_objectives.rst
index d97c31fbfa..7cae6b4104 100644
--- a/docs/adding_objectives.rst
+++ b/docs/adding_objectives.rst
@@ -71,17 +71,17 @@ A full example objective with comments describing the key points is given below:
         name : str, optional
             Name of the objective function.
         jac_chunk_size : int, optional
-            If `"blocked"` deriv_mode is used in the ObjectiveFunction, will
-            calculate the Jacobian for this objective ``jac_chunk_size`` columns at a time,
-            instead of all at once.  The memory usage of the Jacobian calculation is
-            linearly proportional to ``jac_chunk_size``: the smaller the ``jac_chunk_size``, the
-            less memory the Jacobian calculation will require (with some baseline memory
-            usage). The time to compute the Jacobian is roughly ``t ~1/jac_chunk_size``
-            with some baseline time, so the larger the ``jac_chunk_size``, the faster the
-            calculation takes, at the cost of requiring more memory. A ``jac_chunk_size``
-            of 1 corresponds to the least memory intensive,  but slowest method of
-            calculating the Jacobian.
-            If None, it will default to the largest possible `jac_chunk_size` i.e. ``dim_x``
+            Will calculate the Jacobian for this objective ``jac_chunk_size``
+            columns at a time, instead of all at once.  The memory usage of the
+            Jacobian calculation is roughly ``memory usage = m0 + m1*jac_chunk_size``:
+            the higher the chunk size, the less memory the Jacobian calculation
+            will require (with some baseline memory usage). The time to compute the
+            Jacobian is roughly ``t=t0 +t1/jac_chunk_size``, so the larger the
+            ``jac_chunk_size``, the faster the calculation takes, at the cost of
+            requiring more memory. A ``jac_chunk_size`` of 1 corresponds to the least
+            memory intensive,  but slowest method of calculating the Jacobian.
+            If None, it will default to the largest possible
+            `jac_chunk_size` i.e. ``np.ceil(dim_x/4)``
 
         """
 
@@ -248,30 +248,3 @@ available for the ``loss_function`` kwarg are ``[None,"mean","min","max"]``, wit
 ``None`` meaning using the usual default objective cost, while ``"mean"`` takes the
 average of the raw objective values (before subtracting the target/bounds or
 normalization), ``"min"`` takes the minimum, and ``"max"`` takes the maximum.
-
-Reducing Memory Size of Objective Jacobian Calculation
-------------------------------------------------------
-
-During optimization, one of the most memory-intensive steps is the calculation of the Jacobian
-of the cost function. This memory cost comes from attempting to calculate the entire Jacobian
-matrix in one vectorized operation. However, this can be tuned between high memory usage but quick (default)
-and low memory usage but slower with the ``jac_chunk_size`` keyword argument. By default, where this matters
-is when creating the overall ``ObjectiveFunction`` to be used in the optimization (where by default ``deriv_mode="batched"``). The Jacobian is a
-matrix of shape [``obj.dim_f`` x ``obj.dim_x``], and the calculation of the Jacobian is vectorized over
-the columns (the ``obj.dim_x`` dimension), where ``obj`` is the ``ObjectiveFunction`` object. Passing in the ``jac_chunk_size`` attribute allows one to split up
-the vectorized computation into chunks of ``jac_chunk_size`` columns at a time, allowing one to compute the Jacobian
-in a slightly slower, but more memory-efficient manner. The memory usage of the Jacobian calculation is
-linearly proportional to ``jac_chunk_size``: the smaller the ``jac_chunk_size``, the less memory the Jacobian calculation will
-require (with some baseline memory usage). The time to compute the Jacobian is roughly ``t ~1/jac_chunk_size``
-with some baseline time, so the larger the ``jac_chunk_size``, the faster the calculation takes,
-at the cost of requiring more memory. A ``jac_chunk_size`` of 1 corresponds to the least memory intensive,
-but slowest method of calculating the Jacobian. If ``jac_chunk_size=None``, it will default to the largest
-possible `jac_chunk_size` i.e. ``obj.dim_x``.
-
-If ``deriv_mode="blocked"`` is specified when the ``ObjectiveFunction`` is created, then the Jacobian will
-be calculated individually for each of the sub-objectives inside of the ``ObjectiveFunction``, and in that case
-the ``jac_chunk_size`` of the individual ``_Objective`` objects inside of the ``ObjectiveFunction`` will be used.
-For example, if ``obj1 = QuasisymmetryTripleProduct(eq, jac_chunk_size=100)``, ``obj2 = MeanCurvature(eq, jac_chunk_size=2000)``
-and ``obj = ObjectiveFunction((obj1, obj2), deriv_mode="blocked")``, then the Jacobian will be calculated with a
-``jac_chunk_size=100`` for the quasisymmetry part and a ``jac_chunk_size=2000`` for the curvature part, then the full Jacobian
-will be formed as a block diagonal matrix with the individual Jacobians of these two objectives.
diff --git a/docs/index.rst b/docs/index.rst
index dceb4b6787..0cfbeb9bff 100644
--- a/docs/index.rst
+++ b/docs/index.rst
@@ -32,6 +32,7 @@
    notebooks/tutorials/nae_constraint.ipynb
    notebooks/tutorials/bootstrap_current.ipynb
    notebooks/tutorials/coil_stage_two_optimization.ipynb
+   memory_usage
 
 .. toctree::
    :maxdepth: 1
diff --git a/docs/memory_usage.rst b/docs/memory_usage.rst
new file mode 100644
index 0000000000..f9cdf7d964
--- /dev/null
+++ b/docs/memory_usage.rst
@@ -0,0 +1,26 @@
+
+Reducing Memory Size of Objective Jacobian Calculation
+------------------------------------------------------
+
+During optimization, one of the most memory-intensive steps is the calculation of the Jacobian
+of the cost function. This memory cost comes from attempting to calculate the entire Jacobian
+matrix in one vectorized operation. However, this can be tuned between high memory usage but quick (default)
+and low memory usage but slower with the ``jac_chunk_size`` keyword argument. By default, where this matters
+is when creating the overall ``ObjectiveFunction`` to be used in the optimization (where by default ``deriv_mode="batched"``). The Jacobian is a
+matrix of shape [``obj.dim_f`` x ``obj.dim_x``], and the calculation of the Jacobian is vectorized over
+the columns (the ``obj.dim_x`` dimension), where ``obj`` is the ``ObjectiveFunction`` object. Passing in the ``jac_chunk_size`` attribute allows one to split up
+the vectorized computation into chunks of ``jac_chunk_size`` columns at a time, allowing one to compute the Jacobian
+in a slightly slower, but more memory-efficient manner. The memory usage of the Jacobian calculation is
+``memory usage = m0 + m1*jac_chunk_size``: the higher the chunk size, the less memory the Jacobian calculation
+will require (with some baseline memory usage). The time to compute the Jacobian is roughly ``t=t0 +t1/jac_chunk_size``
+with some baseline time, so the larger the ``jac_chunk_size``, the faster the calculation takes,
+at the cost of requiring more memory. A ``jac_chunk_size`` of 1 corresponds to the least memory intensive,
+but slowest method of calculating the Jacobian. If ``jac_chunk_size=None``, it will default to ``obj.dim_x/4``.
+
+If ``deriv_mode="blocked"`` is specified when the ``ObjectiveFunction`` is created, then the Jacobian will
+be calculated individually for each of the sub-objectives inside of the ``ObjectiveFunction``, and in that case
+the ``jac_chunk_size`` of the individual ``_Objective`` objects inside of the ``ObjectiveFunction`` will be used.
+For example, if ``obj1 = QuasisymmetryTripleProduct(eq, jac_chunk_size=100)``, ``obj2 = MeanCurvature(eq, jac_chunk_size=2000)``
+and ``obj = ObjectiveFunction((obj1, obj2), deriv_mode="blocked")``, then the Jacobian will be calculated with a
+``jac_chunk_size=100`` for the quasisymmetry part and a ``jac_chunk_size=2000`` for the curvature part, then the full Jacobian
+will be formed as a block diagonal matrix with the individual Jacobians of these two objectives.

From 15a7f48b5839ed2d1e0e19aa7b61dd8344fd1a76 Mon Sep 17 00:00:00 2001
From: Dario Panici <dpanici@princeton.edu>
Date: Wed, 11 Sep 2024 15:59:54 +0900
Subject: [PATCH 30/46] change constraint wrapper to use batched

---
 desc/objectives/objective_funs.py     |  4 ++--
 desc/optimize/_constraint_wrappers.py | 13 ++++++++++---
 2 files changed, 12 insertions(+), 5 deletions(-)

diff --git a/desc/objectives/objective_funs.py b/desc/objectives/objective_funs.py
index 21496d54ef..d074187ed8 100644
--- a/desc/objectives/objective_funs.py
+++ b/desc/objectives/objective_funs.py
@@ -163,7 +163,7 @@ def build(self, use_jit=None, verbose=1):
             np.any(sub_obj_jac_chunk_sizes) and self._deriv_mode != "blocked",
             UserWarning,
             "'jac_chunk_size' was passed into one or more sub-objectives, but the"
-            "ObjectiveFunction is  using 'batched' deriv_mode, so sub-objective "
+            " ObjectiveFunction is  using 'batched' deriv_mode, so sub-objective "
             "'jac_chunk_size' will be ignored in favor of the ObjectiveFunction's "
             f"'jac_chunk_size' of {self._jac_chunk_size}."
             " Specify 'blocked' deriv_mode if each sub-objective is desired to have a "
@@ -172,7 +172,7 @@ def build(self, use_jit=None, verbose=1):
         warnif(
             self._jac_chunk_size is not None and self._deriv_mode == "blocked",
             UserWarning,
-            "'jac_chunk_size' was passed into ObjectiveFunction, but the"
+            "'jac_chunk_size' was passed into ObjectiveFunction, but the "
             "ObjectiveFunction is using 'blocked' deriv_mode, so sub-objective "
             "'jac_chunk_size' are used to compute each sub-objective's Jacobian, "
             "`ignoring the ObjectiveFunction's 'jac_chunk_size'.",
diff --git a/desc/optimize/_constraint_wrappers.py b/desc/optimize/_constraint_wrappers.py
index d187b28205..2e38700585 100644
--- a/desc/optimize/_constraint_wrappers.py
+++ b/desc/optimize/_constraint_wrappers.py
@@ -14,6 +14,7 @@
 )
 from desc.objectives.utils import factorize_linear_constraints
 from desc.utils import Timer, errorif, get_instance, setdefault
+from desc.utils_batched_vectorize import batched_vectorize
 
 from .utils import f_where_x
 
@@ -978,7 +979,9 @@ def jvp_scaled(self, v, x, constants=None):
         constants = setdefault(constants, self.constants)
         xg, xf = self._update_equilibrium(x, store=True)
         jvpfun = lambda u: self._jvp(u, xf, xg, constants, op="scaled")
-        return jnp.vectorize(jvpfun, signature="(n)->(k)")(v)
+        return batched_vectorize(
+            jvpfun, signature="(n)->(k)", chunk_size=self._objective._jac_chunk_size
+        )(v)
 
     def jvp_scaled_error(self, v, x, constants=None):
         """Compute Jacobian-vector product of self.compute_scaled_error.
@@ -998,7 +1001,9 @@ def jvp_scaled_error(self, v, x, constants=None):
         constants = setdefault(constants, self.constants)
         xg, xf = self._update_equilibrium(x, store=True)
         jvpfun = lambda u: self._jvp(u, xf, xg, constants, op="scaled_error")
-        return jnp.vectorize(jvpfun, signature="(n)->(k)")(v)
+        return batched_vectorize(
+            jvpfun, signature="(n)->(k)", chunk_size=self._objective._jac_chunk_size
+        )(v)
 
     def jvp_unscaled(self, v, x, constants=None):
         """Compute Jacobian-vector product of self.compute_unscaled.
@@ -1018,7 +1023,9 @@ def jvp_unscaled(self, v, x, constants=None):
         constants = setdefault(constants, self.constants)
         xg, xf = self._update_equilibrium(x, store=True)
         jvpfun = lambda u: self._jvp(u, xf, xg, constants, op="unscaled")
-        return jnp.vectorize(jvpfun, signature="(n)->(k)")(v)
+        return batched_vectorize(
+            jvpfun, signature="(n)->(k)", chunk_size=self._objective._jac_chunk_size
+        )(v)
 
     def _jvp(self, v, xf, xg, constants, op):
         # we're replacing stuff like this with jvps

From ea74b247021c64310cff1eb89bfc07089fd07bf4 Mon Sep 17 00:00:00 2001
From: Dario Panici <dpanici@princeton.edu>
Date: Thu, 12 Sep 2024 09:47:30 +0900
Subject: [PATCH 31/46] address some comments

---
 desc/objectives/_bootstrap.py     |  8 ++--
 desc/objectives/_coils.py         | 72 +++++++++++++++----------------
 desc/objectives/_equilibrium.py   | 48 ++++++++++-----------
 desc/objectives/_free_boundary.py | 24 +++++------
 desc/objectives/_generic.py       | 16 +++----
 desc/objectives/_geometry.py      | 64 +++++++++++++--------------
 desc/objectives/_omnigenity.py    | 40 ++++++++---------
 desc/objectives/_power_balance.py | 16 +++----
 desc/objectives/_profiles.py      | 32 +++++++-------
 desc/objectives/_stability.py     | 16 +++----
 desc/objectives/getters.py        |  9 +---
 desc/objectives/objective_funs.py |  4 +-
 desc/utils_batched_vectorize.py   | 57 +++++++++++-------------
 docs/adding_objectives.rst        |  8 ++--
 docs/memory_usage.rst             |  2 +-
 15 files changed, 201 insertions(+), 215 deletions(-)

diff --git a/desc/objectives/_bootstrap.py b/desc/objectives/_bootstrap.py
index 0933fc14be..71e890b73c 100644
--- a/desc/objectives/_bootstrap.py
+++ b/desc/objectives/_bootstrap.py
@@ -68,15 +68,15 @@ class BootstrapRedlConsistency(_Objective):
         Name of the objective function.
     jac_chunk_size : int, optional
         Will calculate the Jacobian for this objective ``jac_chunk_size``
-        columns at a time, instead of all at once.  The memory usage of the
+        columns at a time, instead of all at once. The memory usage of the
         Jacobian calculation is roughly ``memory usage = m0 + m1*jac_chunk_size``:
-        the higher the chunk size, the less memory the Jacobian calculation
+        the smaller the chunk size, the less memory the Jacobian calculation
         will require (with some baseline memory usage). The time to compute the
         Jacobian is roughly ``t=t0 +t1/jac_chunk_size``, so the larger the
         ``jac_chunk_size``, the faster the calculation takes, at the cost of
         requiring more memory. A ``jac_chunk_size`` of 1 corresponds to the least
-        memory intensive,  but slowest method of calculating the Jacobian.
-        If None, it will default to the largest possible
+        memory intensive, but slowest method of calculating the Jacobian.
+        If None, it will default to a conservative default
         `jac_chunk_size` i.e. ``np.ceil(dim_x/4)``
 
     """
diff --git a/desc/objectives/_coils.py b/desc/objectives/_coils.py
index 565bf3bdd8..4e4fc77eb5 100644
--- a/desc/objectives/_coils.py
+++ b/desc/objectives/_coils.py
@@ -61,15 +61,15 @@ class _CoilObjective(_Objective):
         Name of the objective function.
     jac_chunk_size : int, optional
         Will calculate the Jacobian for this objective ``jac_chunk_size``
-        columns at a time, instead of all at once.  The memory usage of the
+        columns at a time, instead of all at once. The memory usage of the
         Jacobian calculation is roughly ``memory usage = m0 + m1*jac_chunk_size``:
-        the higher the chunk size, the less memory the Jacobian calculation
+        the smaller the chunk size, the less memory the Jacobian calculation
         will require (with some baseline memory usage). The time to compute the
         Jacobian is roughly ``t=t0 +t1/jac_chunk_size``, so the larger the
         ``jac_chunk_size``, the faster the calculation takes, at the cost of
         requiring more memory. A ``jac_chunk_size`` of 1 corresponds to the least
-        memory intensive,  but slowest method of calculating the Jacobian.
-        If None, it will default to the largest possible
+        memory intensive, but slowest method of calculating the Jacobian.
+        If None, it will default to a conservative default
         `jac_chunk_size` i.e. ``np.ceil(dim_x/4)``
 
 
@@ -271,15 +271,15 @@ class CoilLength(_CoilObjective):
         Name of the objective function.
     jac_chunk_size : int, optional
         Will calculate the Jacobian for this objective ``jac_chunk_size``
-        columns at a time, instead of all at once.  The memory usage of the
+        columns at a time, instead of all at once. The memory usage of the
         Jacobian calculation is roughly ``memory usage = m0 + m1*jac_chunk_size``:
-        the higher the chunk size, the less memory the Jacobian calculation
+        the smaller the chunk size, the less memory the Jacobian calculation
         will require (with some baseline memory usage). The time to compute the
         Jacobian is roughly ``t=t0 +t1/jac_chunk_size``, so the larger the
         ``jac_chunk_size``, the faster the calculation takes, at the cost of
         requiring more memory. A ``jac_chunk_size`` of 1 corresponds to the least
-        memory intensive,  but slowest method of calculating the Jacobian.
-        If None, it will default to the largest possible
+        memory intensive, but slowest method of calculating the Jacobian.
+        If None, it will default to a conservative default
         `jac_chunk_size` i.e. ``np.ceil(dim_x/4)``
 
 
@@ -411,15 +411,15 @@ class CoilCurvature(_CoilObjective):
         Name of the objective function.
     jac_chunk_size : int, optional
         Will calculate the Jacobian for this objective ``jac_chunk_size``
-        columns at a time, instead of all at once.  The memory usage of the
+        columns at a time, instead of all at once. The memory usage of the
         Jacobian calculation is roughly ``memory usage = m0 + m1*jac_chunk_size``:
-        the higher the chunk size, the less memory the Jacobian calculation
+        the smaller the chunk size, the less memory the Jacobian calculation
         will require (with some baseline memory usage). The time to compute the
         Jacobian is roughly ``t=t0 +t1/jac_chunk_size``, so the larger the
         ``jac_chunk_size``, the faster the calculation takes, at the cost of
         requiring more memory. A ``jac_chunk_size`` of 1 corresponds to the least
-        memory intensive,  but slowest method of calculating the Jacobian.
-        If None, it will default to the largest possible
+        memory intensive, but slowest method of calculating the Jacobian.
+        If None, it will default to a conservative default
         `jac_chunk_size` i.e. ``np.ceil(dim_x/4)``
 
 
@@ -546,15 +546,15 @@ class CoilTorsion(_CoilObjective):
         Name of the objective function.
     jac_chunk_size : int, optional
         Will calculate the Jacobian for this objective ``jac_chunk_size``
-        columns at a time, instead of all at once.  The memory usage of the
+        columns at a time, instead of all at once. The memory usage of the
         Jacobian calculation is roughly ``memory usage = m0 + m1*jac_chunk_size``:
-        the higher the chunk size, the less memory the Jacobian calculation
+        the smaller the chunk size, the less memory the Jacobian calculation
         will require (with some baseline memory usage). The time to compute the
         Jacobian is roughly ``t=t0 +t1/jac_chunk_size``, so the larger the
         ``jac_chunk_size``, the faster the calculation takes, at the cost of
         requiring more memory. A ``jac_chunk_size`` of 1 corresponds to the least
-        memory intensive,  but slowest method of calculating the Jacobian.
-        If None, it will default to the largest possible
+        memory intensive, but slowest method of calculating the Jacobian.
+        If None, it will default to a conservative default
         `jac_chunk_size` i.e. ``np.ceil(dim_x/4)``
 
 
@@ -681,15 +681,15 @@ class CoilCurrentLength(CoilLength):
         Name of the objective function.
     jac_chunk_size : int, optional
         Will calculate the Jacobian for this objective ``jac_chunk_size``
-        columns at a time, instead of all at once.  The memory usage of the
+        columns at a time, instead of all at once. The memory usage of the
         Jacobian calculation is roughly ``memory usage = m0 + m1*jac_chunk_size``:
-        the higher the chunk size, the less memory the Jacobian calculation
+        the smaller the chunk size, the less memory the Jacobian calculation
         will require (with some baseline memory usage). The time to compute the
         Jacobian is roughly ``t=t0 +t1/jac_chunk_size``, so the larger the
         ``jac_chunk_size``, the faster the calculation takes, at the cost of
         requiring more memory. A ``jac_chunk_size`` of 1 corresponds to the least
-        memory intensive,  but slowest method of calculating the Jacobian.
-        If None, it will default to the largest possible
+        memory intensive, but slowest method of calculating the Jacobian.
+        If None, it will default to a conservative default
         `jac_chunk_size` i.e. ``np.ceil(dim_x/4)``
 
 
@@ -824,15 +824,15 @@ class CoilSetMinDistance(_Objective):
         Name of the objective function.
     jac_chunk_size : int, optional
         Will calculate the Jacobian for this objective ``jac_chunk_size``
-        columns at a time, instead of all at once.  The memory usage of the
+        columns at a time, instead of all at once. The memory usage of the
         Jacobian calculation is roughly ``memory usage = m0 + m1*jac_chunk_size``:
-        the higher the chunk size, the less memory the Jacobian calculation
+        the smaller the chunk size, the less memory the Jacobian calculation
         will require (with some baseline memory usage). The time to compute the
         Jacobian is roughly ``t=t0 +t1/jac_chunk_size``, so the larger the
         ``jac_chunk_size``, the faster the calculation takes, at the cost of
         requiring more memory. A ``jac_chunk_size`` of 1 corresponds to the least
-        memory intensive,  but slowest method of calculating the Jacobian.
-        If None, it will default to the largest possible
+        memory intensive, but slowest method of calculating the Jacobian.
+        If None, it will default to a conservative default
         `jac_chunk_size` i.e. ``np.ceil(dim_x/4)``
 
 
@@ -1013,15 +1013,15 @@ class PlasmaCoilSetMinDistance(_Objective):
         Name of the objective function.
     jac_chunk_size : int, optional
         Will calculate the Jacobian for this objective ``jac_chunk_size``
-        columns at a time, instead of all at once.  The memory usage of the
+        columns at a time, instead of all at once. The memory usage of the
         Jacobian calculation is roughly ``memory usage = m0 + m1*jac_chunk_size``:
-        the higher the chunk size, the less memory the Jacobian calculation
+        the smaller the chunk size, the less memory the Jacobian calculation
         will require (with some baseline memory usage). The time to compute the
         Jacobian is roughly ``t=t0 +t1/jac_chunk_size``, so the larger the
         ``jac_chunk_size``, the faster the calculation takes, at the cost of
         requiring more memory. A ``jac_chunk_size`` of 1 corresponds to the least
-        memory intensive,  but slowest method of calculating the Jacobian.
-        If None, it will default to the largest possible
+        memory intensive, but slowest method of calculating the Jacobian.
+        If None, it will default to a conservative default
         `jac_chunk_size` i.e. ``np.ceil(dim_x/4)``
 
 
@@ -1258,15 +1258,15 @@ class QuadraticFlux(_Objective):
         Name of the objective function.
     jac_chunk_size : int, optional
         Will calculate the Jacobian for this objective ``jac_chunk_size``
-        columns at a time, instead of all at once.  The memory usage of the
+        columns at a time, instead of all at once. The memory usage of the
         Jacobian calculation is roughly ``memory usage = m0 + m1*jac_chunk_size``:
-        the higher the chunk size, the less memory the Jacobian calculation
+        the smaller the chunk size, the less memory the Jacobian calculation
         will require (with some baseline memory usage). The time to compute the
         Jacobian is roughly ``t=t0 +t1/jac_chunk_size``, so the larger the
         ``jac_chunk_size``, the faster the calculation takes, at the cost of
         requiring more memory. A ``jac_chunk_size`` of 1 corresponds to the least
-        memory intensive,  but slowest method of calculating the Jacobian.
-        If None, it will default to the largest possible
+        memory intensive, but slowest method of calculating the Jacobian.
+        If None, it will default to a conservative default
         `jac_chunk_size` i.e. ``np.ceil(dim_x/4)``
 
     """
@@ -1482,15 +1482,15 @@ class ToroidalFlux(_Objective):
         Name of the objective function.
     jac_chunk_size : int, optional
         Will calculate the Jacobian for this objective ``jac_chunk_size``
-        columns at a time, instead of all at once.  The memory usage of the
+        columns at a time, instead of all at once. The memory usage of the
         Jacobian calculation is roughly ``memory usage = m0 + m1*jac_chunk_size``:
-        the higher the chunk size, the less memory the Jacobian calculation
+        the smaller the chunk size, the less memory the Jacobian calculation
         will require (with some baseline memory usage). The time to compute the
         Jacobian is roughly ``t=t0 +t1/jac_chunk_size``, so the larger the
         ``jac_chunk_size``, the faster the calculation takes, at the cost of
         requiring more memory. A ``jac_chunk_size`` of 1 corresponds to the least
-        memory intensive,  but slowest method of calculating the Jacobian.
-        If None, it will default to the largest possible
+        memory intensive, but slowest method of calculating the Jacobian.
+        If None, it will default to a conservative default
         `jac_chunk_size` i.e. ``np.ceil(dim_x/4)``
 
 
diff --git a/desc/objectives/_equilibrium.py b/desc/objectives/_equilibrium.py
index bb2c4dc7a7..f9fcdc7f86 100644
--- a/desc/objectives/_equilibrium.py
+++ b/desc/objectives/_equilibrium.py
@@ -65,15 +65,15 @@ class ForceBalance(_Objective):
         Name of the objective function.
     jac_chunk_size : int, optional
         Will calculate the Jacobian for this objective ``jac_chunk_size``
-        columns at a time, instead of all at once.  The memory usage of the
+        columns at a time, instead of all at once. The memory usage of the
         Jacobian calculation is roughly ``memory usage = m0 + m1*jac_chunk_size``:
-        the higher the chunk size, the less memory the Jacobian calculation
+        the smaller the chunk size, the less memory the Jacobian calculation
         will require (with some baseline memory usage). The time to compute the
         Jacobian is roughly ``t=t0 +t1/jac_chunk_size``, so the larger the
         ``jac_chunk_size``, the faster the calculation takes, at the cost of
         requiring more memory. A ``jac_chunk_size`` of 1 corresponds to the least
-        memory intensive,  but slowest method of calculating the Jacobian.
-        If None, it will default to the largest possible
+        memory intensive, but slowest method of calculating the Jacobian.
+        If None, it will default to a conservative default
         `jac_chunk_size` i.e. ``np.ceil(dim_x/4)``
 
     """
@@ -251,15 +251,15 @@ class ForceBalanceAnisotropic(_Objective):
         Name of the objective function.
     jac_chunk_size : int, optional
         Will calculate the Jacobian for this objective ``jac_chunk_size``
-        columns at a time, instead of all at once.  The memory usage of the
+        columns at a time, instead of all at once. The memory usage of the
         Jacobian calculation is roughly ``memory usage = m0 + m1*jac_chunk_size``:
-        the higher the chunk size, the less memory the Jacobian calculation
+        the smaller the chunk size, the less memory the Jacobian calculation
         will require (with some baseline memory usage). The time to compute the
         Jacobian is roughly ``t=t0 +t1/jac_chunk_size``, so the larger the
         ``jac_chunk_size``, the faster the calculation takes, at the cost of
         requiring more memory. A ``jac_chunk_size`` of 1 corresponds to the least
-        memory intensive,  but slowest method of calculating the Jacobian.
-        If None, it will default to the largest possible
+        memory intensive, but slowest method of calculating the Jacobian.
+        If None, it will default to a conservative default
         `jac_chunk_size` i.e. ``np.ceil(dim_x/4)``
 
 
@@ -424,15 +424,15 @@ class RadialForceBalance(_Objective):
         Name of the objective function.
     jac_chunk_size : int, optional
         Will calculate the Jacobian for this objective ``jac_chunk_size``
-        columns at a time, instead of all at once.  The memory usage of the
+        columns at a time, instead of all at once. The memory usage of the
         Jacobian calculation is roughly ``memory usage = m0 + m1*jac_chunk_size``:
-        the higher the chunk size, the less memory the Jacobian calculation
+        the smaller the chunk size, the less memory the Jacobian calculation
         will require (with some baseline memory usage). The time to compute the
         Jacobian is roughly ``t=t0 +t1/jac_chunk_size``, so the larger the
         ``jac_chunk_size``, the faster the calculation takes, at the cost of
         requiring more memory. A ``jac_chunk_size`` of 1 corresponds to the least
-        memory intensive,  but slowest method of calculating the Jacobian.
-        If None, it will default to the largest possible
+        memory intensive, but slowest method of calculating the Jacobian.
+        If None, it will default to a conservative default
         `jac_chunk_size` i.e. ``np.ceil(dim_x/4)``
 
 
@@ -597,15 +597,15 @@ class HelicalForceBalance(_Objective):
         Name of the objective function.
     jac_chunk_size : int, optional
         Will calculate the Jacobian for this objective ``jac_chunk_size``
-        columns at a time, instead of all at once.  The memory usage of the
+        columns at a time, instead of all at once. The memory usage of the
         Jacobian calculation is roughly ``memory usage = m0 + m1*jac_chunk_size``:
-        the higher the chunk size, the less memory the Jacobian calculation
+        the smaller the chunk size, the less memory the Jacobian calculation
         will require (with some baseline memory usage). The time to compute the
         Jacobian is roughly ``t=t0 +t1/jac_chunk_size``, so the larger the
         ``jac_chunk_size``, the faster the calculation takes, at the cost of
         requiring more memory. A ``jac_chunk_size`` of 1 corresponds to the least
-        memory intensive,  but slowest method of calculating the Jacobian.
-        If None, it will default to the largest possible
+        memory intensive, but slowest method of calculating the Jacobian.
+        If None, it will default to a conservative default
         `jac_chunk_size` i.e. ``np.ceil(dim_x/4)``
 
 
@@ -768,15 +768,15 @@ class Energy(_Objective):
         Name of the objective function.
     jac_chunk_size : int, optional
         Will calculate the Jacobian for this objective ``jac_chunk_size``
-        columns at a time, instead of all at once.  The memory usage of the
+        columns at a time, instead of all at once. The memory usage of the
         Jacobian calculation is roughly ``memory usage = m0 + m1*jac_chunk_size``:
-        the higher the chunk size, the less memory the Jacobian calculation
+        the smaller the chunk size, the less memory the Jacobian calculation
         will require (with some baseline memory usage). The time to compute the
         Jacobian is roughly ``t=t0 +t1/jac_chunk_size``, so the larger the
         ``jac_chunk_size``, the faster the calculation takes, at the cost of
         requiring more memory. A ``jac_chunk_size`` of 1 corresponds to the least
-        memory intensive,  but slowest method of calculating the Jacobian.
-        If None, it will default to the largest possible
+        memory intensive, but slowest method of calculating the Jacobian.
+        If None, it will default to a conservative default
         `jac_chunk_size` i.e. ``np.ceil(dim_x/4)``
 
 
@@ -950,15 +950,15 @@ class CurrentDensity(_Objective):
         Name of the objective function.
     jac_chunk_size : int, optional
         Will calculate the Jacobian for this objective ``jac_chunk_size``
-        columns at a time, instead of all at once.  The memory usage of the
+        columns at a time, instead of all at once. The memory usage of the
         Jacobian calculation is roughly ``memory usage = m0 + m1*jac_chunk_size``:
-        the higher the chunk size, the less memory the Jacobian calculation
+        the smaller the chunk size, the less memory the Jacobian calculation
         will require (with some baseline memory usage). The time to compute the
         Jacobian is roughly ``t=t0 +t1/jac_chunk_size``, so the larger the
         ``jac_chunk_size``, the faster the calculation takes, at the cost of
         requiring more memory. A ``jac_chunk_size`` of 1 corresponds to the least
-        memory intensive,  but slowest method of calculating the Jacobian.
-        If None, it will default to the largest possible
+        memory intensive, but slowest method of calculating the Jacobian.
+        If None, it will default to a conservative default
         `jac_chunk_size` i.e. ``np.ceil(dim_x/4)``
 
 
diff --git a/desc/objectives/_free_boundary.py b/desc/objectives/_free_boundary.py
index 2a923f03ea..f0240ec408 100644
--- a/desc/objectives/_free_boundary.py
+++ b/desc/objectives/_free_boundary.py
@@ -75,15 +75,15 @@ class VacuumBoundaryError(_Objective):
         Name of the objective function.
     jac_chunk_size : int, optional
         Will calculate the Jacobian for this objective ``jac_chunk_size``
-        columns at a time, instead of all at once.  The memory usage of the
+        columns at a time, instead of all at once. The memory usage of the
         Jacobian calculation is roughly ``memory usage = m0 + m1*jac_chunk_size``:
-        the higher the chunk size, the less memory the Jacobian calculation
+        the smaller the chunk size, the less memory the Jacobian calculation
         will require (with some baseline memory usage). The time to compute the
         Jacobian is roughly ``t=t0 +t1/jac_chunk_size``, so the larger the
         ``jac_chunk_size``, the faster the calculation takes, at the cost of
         requiring more memory. A ``jac_chunk_size`` of 1 corresponds to the least
-        memory intensive,  but slowest method of calculating the Jacobian.
-        If None, it will default to the largest possible
+        memory intensive, but slowest method of calculating the Jacobian.
+        If None, it will default to a conservative default
         `jac_chunk_size` i.e. ``np.ceil(dim_x/4)``
 
 
@@ -427,15 +427,15 @@ class BoundaryError(_Objective):
         Name of the objective function.
     jac_chunk_size : int, optional
         Will calculate the Jacobian for this objective ``jac_chunk_size``
-        columns at a time, instead of all at once.  The memory usage of the
+        columns at a time, instead of all at once. The memory usage of the
         Jacobian calculation is roughly ``memory usage = m0 + m1*jac_chunk_size``:
-        the higher the chunk size, the less memory the Jacobian calculation
+        the smaller the chunk size, the less memory the Jacobian calculation
         will require (with some baseline memory usage). The time to compute the
         Jacobian is roughly ``t=t0 +t1/jac_chunk_size``, so the larger the
         ``jac_chunk_size``, the faster the calculation takes, at the cost of
         requiring more memory. A ``jac_chunk_size`` of 1 corresponds to the least
-        memory intensive,  but slowest method of calculating the Jacobian.
-        If None, it will default to the largest possible
+        memory intensive, but slowest method of calculating the Jacobian.
+        If None, it will default to a conservative default
         `jac_chunk_size` i.e. ``np.ceil(dim_x/4)``
 
 
@@ -899,15 +899,15 @@ class BoundaryErrorNESTOR(_Objective):
         Name of the objective function.
     jac_chunk_size : int, optional
         Will calculate the Jacobian for this objective ``jac_chunk_size``
-        columns at a time, instead of all at once.  The memory usage of the
+        columns at a time, instead of all at once. The memory usage of the
         Jacobian calculation is roughly ``memory usage = m0 + m1*jac_chunk_size``:
-        the higher the chunk size, the less memory the Jacobian calculation
+        the smaller the chunk size, the less memory the Jacobian calculation
         will require (with some baseline memory usage). The time to compute the
         Jacobian is roughly ``t=t0 +t1/jac_chunk_size``, so the larger the
         ``jac_chunk_size``, the faster the calculation takes, at the cost of
         requiring more memory. A ``jac_chunk_size`` of 1 corresponds to the least
-        memory intensive,  but slowest method of calculating the Jacobian.
-        If None, it will default to the largest possible
+        memory intensive, but slowest method of calculating the Jacobian.
+        If None, it will default to a conservative default
         `jac_chunk_size` i.e. ``np.ceil(dim_x/4)``
 
 
diff --git a/desc/objectives/_generic.py b/desc/objectives/_generic.py
index 9dc7b05f6f..554b06fc6b 100644
--- a/desc/objectives/_generic.py
+++ b/desc/objectives/_generic.py
@@ -59,15 +59,15 @@ class GenericObjective(_Objective):
         Name of the objective function.
     jac_chunk_size : int, optional
         Will calculate the Jacobian for this objective ``jac_chunk_size``
-        columns at a time, instead of all at once.  The memory usage of the
+        columns at a time, instead of all at once. The memory usage of the
         Jacobian calculation is roughly ``memory usage = m0 + m1*jac_chunk_size``:
-        the higher the chunk size, the less memory the Jacobian calculation
+        the smaller the chunk size, the less memory the Jacobian calculation
         will require (with some baseline memory usage). The time to compute the
         Jacobian is roughly ``t=t0 +t1/jac_chunk_size``, so the larger the
         ``jac_chunk_size``, the faster the calculation takes, at the cost of
         requiring more memory. A ``jac_chunk_size`` of 1 corresponds to the least
-        memory intensive,  but slowest method of calculating the Jacobian.
-        If None, it will default to the largest possible
+        memory intensive, but slowest method of calculating the Jacobian.
+        If None, it will default to a conservative default
         `jac_chunk_size` i.e. ``np.ceil(dim_x/4)``
 
 
@@ -361,15 +361,15 @@ class ObjectiveFromUser(_Objective):
         Name of the objective function.
     jac_chunk_size : int, optional
         Will calculate the Jacobian for this objective ``jac_chunk_size``
-        columns at a time, instead of all at once.  The memory usage of the
+        columns at a time, instead of all at once. The memory usage of the
         Jacobian calculation is roughly ``memory usage = m0 + m1*jac_chunk_size``:
-        the higher the chunk size, the less memory the Jacobian calculation
+        the smaller the chunk size, the less memory the Jacobian calculation
         will require (with some baseline memory usage). The time to compute the
         Jacobian is roughly ``t=t0 +t1/jac_chunk_size``, so the larger the
         ``jac_chunk_size``, the faster the calculation takes, at the cost of
         requiring more memory. A ``jac_chunk_size`` of 1 corresponds to the least
-        memory intensive,  but slowest method of calculating the Jacobian.
-        If None, it will default to the largest possible
+        memory intensive, but slowest method of calculating the Jacobian.
+        If None, it will default to a conservative default
         `jac_chunk_size` i.e. ``np.ceil(dim_x/4)``
 
 
diff --git a/desc/objectives/_geometry.py b/desc/objectives/_geometry.py
index 8eb7d9b8eb..2ea7a7b6b2 100644
--- a/desc/objectives/_geometry.py
+++ b/desc/objectives/_geometry.py
@@ -55,15 +55,15 @@ class AspectRatio(_Objective):
         Name of the objective function.
     jac_chunk_size : int, optional
         Will calculate the Jacobian for this objective ``jac_chunk_size``
-        columns at a time, instead of all at once.  The memory usage of the
+        columns at a time, instead of all at once. The memory usage of the
         Jacobian calculation is roughly ``memory usage = m0 + m1*jac_chunk_size``:
-        the higher the chunk size, the less memory the Jacobian calculation
+        the smaller the chunk size, the less memory the Jacobian calculation
         will require (with some baseline memory usage). The time to compute the
         Jacobian is roughly ``t=t0 +t1/jac_chunk_size``, so the larger the
         ``jac_chunk_size``, the faster the calculation takes, at the cost of
         requiring more memory. A ``jac_chunk_size`` of 1 corresponds to the least
-        memory intensive,  but slowest method of calculating the Jacobian.
-        If None, it will default to the largest possible
+        memory intensive, but slowest method of calculating the Jacobian.
+        If None, it will default to a conservative default
         `jac_chunk_size` i.e. ``np.ceil(dim_x/4)``
 
 
@@ -232,15 +232,15 @@ class Elongation(_Objective):
         Name of the objective function.
     jac_chunk_size : int, optional
         Will calculate the Jacobian for this objective ``jac_chunk_size``
-        columns at a time, instead of all at once.  The memory usage of the
+        columns at a time, instead of all at once. The memory usage of the
         Jacobian calculation is roughly ``memory usage = m0 + m1*jac_chunk_size``:
-        the higher the chunk size, the less memory the Jacobian calculation
+        the smaller the chunk size, the less memory the Jacobian calculation
         will require (with some baseline memory usage). The time to compute the
         Jacobian is roughly ``t=t0 +t1/jac_chunk_size``, so the larger the
         ``jac_chunk_size``, the faster the calculation takes, at the cost of
         requiring more memory. A ``jac_chunk_size`` of 1 corresponds to the least
-        memory intensive,  but slowest method of calculating the Jacobian.
-        If None, it will default to the largest possible
+        memory intensive, but slowest method of calculating the Jacobian.
+        If None, it will default to a conservative default
         `jac_chunk_size` i.e. ``np.ceil(dim_x/4)``
 
 
@@ -408,15 +408,15 @@ class Volume(_Objective):
         Name of the objective function.
     jac_chunk_size : int, optional
         Will calculate the Jacobian for this objective ``jac_chunk_size``
-        columns at a time, instead of all at once.  The memory usage of the
+        columns at a time, instead of all at once. The memory usage of the
         Jacobian calculation is roughly ``memory usage = m0 + m1*jac_chunk_size``:
-        the higher the chunk size, the less memory the Jacobian calculation
+        the smaller the chunk size, the less memory the Jacobian calculation
         will require (with some baseline memory usage). The time to compute the
         Jacobian is roughly ``t=t0 +t1/jac_chunk_size``, so the larger the
         ``jac_chunk_size``, the faster the calculation takes, at the cost of
         requiring more memory. A ``jac_chunk_size`` of 1 corresponds to the least
-        memory intensive,  but slowest method of calculating the Jacobian.
-        If None, it will default to the largest possible
+        memory intensive, but slowest method of calculating the Jacobian.
+        If None, it will default to a conservative default
         `jac_chunk_size` i.e. ``np.ceil(dim_x/4)``
 
 
@@ -633,15 +633,15 @@ class PlasmaVesselDistance(_Objective):
         Name of the objective function.
     jac_chunk_size : int, optional
         Will calculate the Jacobian for this objective ``jac_chunk_size``
-        columns at a time, instead of all at once.  The memory usage of the
+        columns at a time, instead of all at once. The memory usage of the
         Jacobian calculation is roughly ``memory usage = m0 + m1*jac_chunk_size``:
-        the higher the chunk size, the less memory the Jacobian calculation
+        the smaller the chunk size, the less memory the Jacobian calculation
         will require (with some baseline memory usage). The time to compute the
         Jacobian is roughly ``t=t0 +t1/jac_chunk_size``, so the larger the
         ``jac_chunk_size``, the faster the calculation takes, at the cost of
         requiring more memory. A ``jac_chunk_size`` of 1 corresponds to the least
-        memory intensive,  but slowest method of calculating the Jacobian.
-        If None, it will default to the largest possible
+        memory intensive, but slowest method of calculating the Jacobian.
+        If None, it will default to a conservative default
         `jac_chunk_size` i.e. ``np.ceil(dim_x/4)``
 
     """
@@ -986,15 +986,15 @@ class MeanCurvature(_Objective):
         Name of the objective function.
     jac_chunk_size : int, optional
         Will calculate the Jacobian for this objective ``jac_chunk_size``
-        columns at a time, instead of all at once.  The memory usage of the
+        columns at a time, instead of all at once. The memory usage of the
         Jacobian calculation is roughly ``memory usage = m0 + m1*jac_chunk_size``:
-        the higher the chunk size, the less memory the Jacobian calculation
+        the smaller the chunk size, the less memory the Jacobian calculation
         will require (with some baseline memory usage). The time to compute the
         Jacobian is roughly ``t=t0 +t1/jac_chunk_size``, so the larger the
         ``jac_chunk_size``, the faster the calculation takes, at the cost of
         requiring more memory. A ``jac_chunk_size`` of 1 corresponds to the least
-        memory intensive,  but slowest method of calculating the Jacobian.
-        If None, it will default to the largest possible
+        memory intensive, but slowest method of calculating the Jacobian.
+        If None, it will default to a conservative default
         `jac_chunk_size` i.e. ``np.ceil(dim_x/4)``
 
 
@@ -1161,15 +1161,15 @@ class PrincipalCurvature(_Objective):
         Name of the objective function.
     jac_chunk_size : int, optional
         Will calculate the Jacobian for this objective ``jac_chunk_size``
-        columns at a time, instead of all at once.  The memory usage of the
+        columns at a time, instead of all at once. The memory usage of the
         Jacobian calculation is roughly ``memory usage = m0 + m1*jac_chunk_size``:
-        the higher the chunk size, the less memory the Jacobian calculation
+        the smaller the chunk size, the less memory the Jacobian calculation
         will require (with some baseline memory usage). The time to compute the
         Jacobian is roughly ``t=t0 +t1/jac_chunk_size``, so the larger the
         ``jac_chunk_size``, the faster the calculation takes, at the cost of
         requiring more memory. A ``jac_chunk_size`` of 1 corresponds to the least
-        memory intensive,  but slowest method of calculating the Jacobian.
-        If None, it will default to the largest possible
+        memory intensive, but slowest method of calculating the Jacobian.
+        If None, it will default to a conservative default
         `jac_chunk_size` i.e. ``np.ceil(dim_x/4)``
 
 
@@ -1331,15 +1331,15 @@ class BScaleLength(_Objective):
         Name of the objective function.
     jac_chunk_size : int, optional
         Will calculate the Jacobian for this objective ``jac_chunk_size``
-        columns at a time, instead of all at once.  The memory usage of the
+        columns at a time, instead of all at once. The memory usage of the
         Jacobian calculation is roughly ``memory usage = m0 + m1*jac_chunk_size``:
-        the higher the chunk size, the less memory the Jacobian calculation
+        the smaller the chunk size, the less memory the Jacobian calculation
         will require (with some baseline memory usage). The time to compute the
         Jacobian is roughly ``t=t0 +t1/jac_chunk_size``, so the larger the
         ``jac_chunk_size``, the faster the calculation takes, at the cost of
         requiring more memory. A ``jac_chunk_size`` of 1 corresponds to the least
-        memory intensive,  but slowest method of calculating the Jacobian.
-        If None, it will default to the largest possible
+        memory intensive, but slowest method of calculating the Jacobian.
+        If None, it will default to a conservative default
         `jac_chunk_size` i.e. ``np.ceil(dim_x/4)``
 
 
@@ -1497,15 +1497,15 @@ class GoodCoordinates(_Objective):
         Name of the objective function.
     jac_chunk_size : int, optional
         Will calculate the Jacobian for this objective ``jac_chunk_size``
-        columns at a time, instead of all at once.  The memory usage of the
+        columns at a time, instead of all at once. The memory usage of the
         Jacobian calculation is roughly ``memory usage = m0 + m1*jac_chunk_size``:
-        the higher the chunk size, the less memory the Jacobian calculation
+        the smaller the chunk size, the less memory the Jacobian calculation
         will require (with some baseline memory usage). The time to compute the
         Jacobian is roughly ``t=t0 +t1/jac_chunk_size``, so the larger the
         ``jac_chunk_size``, the faster the calculation takes, at the cost of
         requiring more memory. A ``jac_chunk_size`` of 1 corresponds to the least
-        memory intensive,  but slowest method of calculating the Jacobian.
-        If None, it will default to the largest possible
+        memory intensive, but slowest method of calculating the Jacobian.
+        If None, it will default to a conservative default
         `jac_chunk_size` i.e. ``np.ceil(dim_x/4)``
 
 
diff --git a/desc/objectives/_omnigenity.py b/desc/objectives/_omnigenity.py
index 7c72d7f3d3..0a90c719b0 100644
--- a/desc/objectives/_omnigenity.py
+++ b/desc/objectives/_omnigenity.py
@@ -59,15 +59,15 @@ class QuasisymmetryBoozer(_Objective):
         Name of the objective function.
     jac_chunk_size : int, optional
         Will calculate the Jacobian for this objective ``jac_chunk_size``
-        columns at a time, instead of all at once.  The memory usage of the
+        columns at a time, instead of all at once. The memory usage of the
         Jacobian calculation is roughly ``memory usage = m0 + m1*jac_chunk_size``:
-        the higher the chunk size, the less memory the Jacobian calculation
+        the smaller the chunk size, the less memory the Jacobian calculation
         will require (with some baseline memory usage). The time to compute the
         Jacobian is roughly ``t=t0 +t1/jac_chunk_size``, so the larger the
         ``jac_chunk_size``, the faster the calculation takes, at the cost of
         requiring more memory. A ``jac_chunk_size`` of 1 corresponds to the least
-        memory intensive,  but slowest method of calculating the Jacobian.
-        If None, it will default to the largest possible
+        memory intensive, but slowest method of calculating the Jacobian.
+        If None, it will default to a conservative default
         `jac_chunk_size` i.e. ``np.ceil(dim_x/4)``
 
 
@@ -286,15 +286,15 @@ class QuasisymmetryTwoTerm(_Objective):
         Name of the objective function.
     jac_chunk_size : int, optional
         Will calculate the Jacobian for this objective ``jac_chunk_size``
-        columns at a time, instead of all at once.  The memory usage of the
+        columns at a time, instead of all at once. The memory usage of the
         Jacobian calculation is roughly ``memory usage = m0 + m1*jac_chunk_size``:
-        the higher the chunk size, the less memory the Jacobian calculation
+        the smaller the chunk size, the less memory the Jacobian calculation
         will require (with some baseline memory usage). The time to compute the
         Jacobian is roughly ``t=t0 +t1/jac_chunk_size``, so the larger the
         ``jac_chunk_size``, the faster the calculation takes, at the cost of
         requiring more memory. A ``jac_chunk_size`` of 1 corresponds to the least
-        memory intensive,  but slowest method of calculating the Jacobian.
-        If None, it will default to the largest possible
+        memory intensive, but slowest method of calculating the Jacobian.
+        If None, it will default to a conservative default
         `jac_chunk_size` i.e. ``np.ceil(dim_x/4)``
 
 
@@ -486,15 +486,15 @@ class QuasisymmetryTripleProduct(_Objective):
         Name of the objective function.
     jac_chunk_size : int, optional
         Will calculate the Jacobian for this objective ``jac_chunk_size``
-        columns at a time, instead of all at once.  The memory usage of the
+        columns at a time, instead of all at once. The memory usage of the
         Jacobian calculation is roughly ``memory usage = m0 + m1*jac_chunk_size``:
-        the higher the chunk size, the less memory the Jacobian calculation
+        the smaller the chunk size, the less memory the Jacobian calculation
         will require (with some baseline memory usage). The time to compute the
         Jacobian is roughly ``t=t0 +t1/jac_chunk_size``, so the larger the
         ``jac_chunk_size``, the faster the calculation takes, at the cost of
         requiring more memory. A ``jac_chunk_size`` of 1 corresponds to the least
-        memory intensive,  but slowest method of calculating the Jacobian.
-        If None, it will default to the largest possible
+        memory intensive, but slowest method of calculating the Jacobian.
+        If None, it will default to a conservative default
         `jac_chunk_size` i.e. ``np.ceil(dim_x/4)``
 
 
@@ -679,15 +679,15 @@ class Omnigenity(_Objective):
         Name of the objective function.
     jac_chunk_size : int, optional
         Will calculate the Jacobian for this objective ``jac_chunk_size``
-        columns at a time, instead of all at once.  The memory usage of the
+        columns at a time, instead of all at once. The memory usage of the
         Jacobian calculation is roughly ``memory usage = m0 + m1*jac_chunk_size``:
-        the higher the chunk size, the less memory the Jacobian calculation
+        the smaller the chunk size, the less memory the Jacobian calculation
         will require (with some baseline memory usage). The time to compute the
         Jacobian is roughly ``t=t0 +t1/jac_chunk_size``, so the larger the
         ``jac_chunk_size``, the faster the calculation takes, at the cost of
         requiring more memory. A ``jac_chunk_size`` of 1 corresponds to the least
-        memory intensive,  but slowest method of calculating the Jacobian.
-        If None, it will default to the largest possible
+        memory intensive, but slowest method of calculating the Jacobian.
+        If None, it will default to a conservative default
         `jac_chunk_size` i.e. ``np.ceil(dim_x/4)``
 
 
@@ -1029,15 +1029,15 @@ class Isodynamicity(_Objective):
         Name of the objective function.
     jac_chunk_size : int, optional
         Will calculate the Jacobian for this objective ``jac_chunk_size``
-        columns at a time, instead of all at once.  The memory usage of the
+        columns at a time, instead of all at once. The memory usage of the
         Jacobian calculation is roughly ``memory usage = m0 + m1*jac_chunk_size``:
-        the higher the chunk size, the less memory the Jacobian calculation
+        the smaller the chunk size, the less memory the Jacobian calculation
         will require (with some baseline memory usage). The time to compute the
         Jacobian is roughly ``t=t0 +t1/jac_chunk_size``, so the larger the
         ``jac_chunk_size``, the faster the calculation takes, at the cost of
         requiring more memory. A ``jac_chunk_size`` of 1 corresponds to the least
-        memory intensive,  but slowest method of calculating the Jacobian.
-        If None, it will default to the largest possible
+        memory intensive, but slowest method of calculating the Jacobian.
+        If None, it will default to a conservative default
         `jac_chunk_size` i.e. ``np.ceil(dim_x/4)``
 
 
diff --git a/desc/objectives/_power_balance.py b/desc/objectives/_power_balance.py
index 63faf689c7..459b0c4912 100644
--- a/desc/objectives/_power_balance.py
+++ b/desc/objectives/_power_balance.py
@@ -58,15 +58,15 @@ class FusionPower(_Objective):
         Name of the objective function.
     jac_chunk_size : int, optional
         Will calculate the Jacobian for this objective ``jac_chunk_size``
-        columns at a time, instead of all at once.  The memory usage of the
+        columns at a time, instead of all at once. The memory usage of the
         Jacobian calculation is roughly ``memory usage = m0 + m1*jac_chunk_size``:
-        the higher the chunk size, the less memory the Jacobian calculation
+        the smaller the chunk size, the less memory the Jacobian calculation
         will require (with some baseline memory usage). The time to compute the
         Jacobian is roughly ``t=t0 +t1/jac_chunk_size``, so the larger the
         ``jac_chunk_size``, the faster the calculation takes, at the cost of
         requiring more memory. A ``jac_chunk_size`` of 1 corresponds to the least
-        memory intensive,  but slowest method of calculating the Jacobian.
-        If None, it will default to the largest possible
+        memory intensive, but slowest method of calculating the Jacobian.
+        If None, it will default to a conservative default
         `jac_chunk_size` i.e. ``np.ceil(dim_x/4)``
 
     """
@@ -257,15 +257,15 @@ class HeatingPowerISS04(_Objective):
         Name of the objective function.
     jac_chunk_size : int, optional
         Will calculate the Jacobian for this objective ``jac_chunk_size``
-        columns at a time, instead of all at once.  The memory usage of the
+        columns at a time, instead of all at once. The memory usage of the
         Jacobian calculation is roughly ``memory usage = m0 + m1*jac_chunk_size``:
-        the higher the chunk size, the less memory the Jacobian calculation
+        the smaller the chunk size, the less memory the Jacobian calculation
         will require (with some baseline memory usage). The time to compute the
         Jacobian is roughly ``t=t0 +t1/jac_chunk_size``, so the larger the
         ``jac_chunk_size``, the faster the calculation takes, at the cost of
         requiring more memory. A ``jac_chunk_size`` of 1 corresponds to the least
-        memory intensive,  but slowest method of calculating the Jacobian.
-        If None, it will default to the largest possible
+        memory intensive, but slowest method of calculating the Jacobian.
+        If None, it will default to a conservative default
         `jac_chunk_size` i.e. ``np.ceil(dim_x/4)``
 
     """
diff --git a/desc/objectives/_profiles.py b/desc/objectives/_profiles.py
index de9926d3ab..3100b61f02 100644
--- a/desc/objectives/_profiles.py
+++ b/desc/objectives/_profiles.py
@@ -55,15 +55,15 @@ class Pressure(_Objective):
         Name of the objective function.
     jac_chunk_size : int, optional
         Will calculate the Jacobian for this objective ``jac_chunk_size``
-        columns at a time, instead of all at once.  The memory usage of the
+        columns at a time, instead of all at once. The memory usage of the
         Jacobian calculation is roughly ``memory usage = m0 + m1*jac_chunk_size``:
-        the higher the chunk size, the less memory the Jacobian calculation
+        the smaller the chunk size, the less memory the Jacobian calculation
         will require (with some baseline memory usage). The time to compute the
         Jacobian is roughly ``t=t0 +t1/jac_chunk_size``, so the larger the
         ``jac_chunk_size``, the faster the calculation takes, at the cost of
         requiring more memory. A ``jac_chunk_size`` of 1 corresponds to the least
-        memory intensive,  but slowest method of calculating the Jacobian.
-        If None, it will default to the largest possible
+        memory intensive, but slowest method of calculating the Jacobian.
+        If None, it will default to a conservative default
         `jac_chunk_size` i.e. ``np.ceil(dim_x/4)``
 
 
@@ -228,15 +228,15 @@ class RotationalTransform(_Objective):
         Name of the objective function.
     jac_chunk_size : int, optional
         Will calculate the Jacobian for this objective ``jac_chunk_size``
-        columns at a time, instead of all at once.  The memory usage of the
+        columns at a time, instead of all at once. The memory usage of the
         Jacobian calculation is roughly ``memory usage = m0 + m1*jac_chunk_size``:
-        the higher the chunk size, the less memory the Jacobian calculation
+        the smaller the chunk size, the less memory the Jacobian calculation
         will require (with some baseline memory usage). The time to compute the
         Jacobian is roughly ``t=t0 +t1/jac_chunk_size``, so the larger the
         ``jac_chunk_size``, the faster the calculation takes, at the cost of
         requiring more memory. A ``jac_chunk_size`` of 1 corresponds to the least
-        memory intensive,  but slowest method of calculating the Jacobian.
-        If None, it will default to the largest possible
+        memory intensive, but slowest method of calculating the Jacobian.
+        If None, it will default to a conservative default
         `jac_chunk_size` i.e. ``np.ceil(dim_x/4)``
 
 
@@ -414,15 +414,15 @@ class Shear(_Objective):
         Name of the objective function.
     jac_chunk_size : int, optional
         Will calculate the Jacobian for this objective ``jac_chunk_size``
-        columns at a time, instead of all at once.  The memory usage of the
+        columns at a time, instead of all at once. The memory usage of the
         Jacobian calculation is roughly ``memory usage = m0 + m1*jac_chunk_size``:
-        the higher the chunk size, the less memory the Jacobian calculation
+        the smaller the chunk size, the less memory the Jacobian calculation
         will require (with some baseline memory usage). The time to compute the
         Jacobian is roughly ``t=t0 +t1/jac_chunk_size``, so the larger the
         ``jac_chunk_size``, the faster the calculation takes, at the cost of
         requiring more memory. A ``jac_chunk_size`` of 1 corresponds to the least
-        memory intensive,  but slowest method of calculating the Jacobian.
-        If None, it will default to the largest possible
+        memory intensive, but slowest method of calculating the Jacobian.
+        If None, it will default to a conservative default
         `jac_chunk_size` i.e. ``np.ceil(dim_x/4)``
 
 
@@ -596,15 +596,15 @@ class ToroidalCurrent(_Objective):
         Name of the objective function.
     jac_chunk_size : int, optional
         Will calculate the Jacobian for this objective ``jac_chunk_size``
-        columns at a time, instead of all at once.  The memory usage of the
+        columns at a time, instead of all at once. The memory usage of the
         Jacobian calculation is roughly ``memory usage = m0 + m1*jac_chunk_size``:
-        the higher the chunk size, the less memory the Jacobian calculation
+        the smaller the chunk size, the less memory the Jacobian calculation
         will require (with some baseline memory usage). The time to compute the
         Jacobian is roughly ``t=t0 +t1/jac_chunk_size``, so the larger the
         ``jac_chunk_size``, the faster the calculation takes, at the cost of
         requiring more memory. A ``jac_chunk_size`` of 1 corresponds to the least
-        memory intensive,  but slowest method of calculating the Jacobian.
-        If None, it will default to the largest possible
+        memory intensive, but slowest method of calculating the Jacobian.
+        If None, it will default to a conservative default
         `jac_chunk_size` i.e. ``np.ceil(dim_x/4)``
 
 
diff --git a/desc/objectives/_stability.py b/desc/objectives/_stability.py
index 1520b73e20..02b8bce259 100644
--- a/desc/objectives/_stability.py
+++ b/desc/objectives/_stability.py
@@ -65,15 +65,15 @@ class MercierStability(_Objective):
         Name of the objective function.
     jac_chunk_size : int, optional
         Will calculate the Jacobian for this objective ``jac_chunk_size``
-        columns at a time, instead of all at once.  The memory usage of the
+        columns at a time, instead of all at once. The memory usage of the
         Jacobian calculation is roughly ``memory usage = m0 + m1*jac_chunk_size``:
-        the higher the chunk size, the less memory the Jacobian calculation
+        the smaller the chunk size, the less memory the Jacobian calculation
         will require (with some baseline memory usage). The time to compute the
         Jacobian is roughly ``t=t0 +t1/jac_chunk_size``, so the larger the
         ``jac_chunk_size``, the faster the calculation takes, at the cost of
         requiring more memory. A ``jac_chunk_size`` of 1 corresponds to the least
-        memory intensive,  but slowest method of calculating the Jacobian.
-        If None, it will default to the largest possible
+        memory intensive, but slowest method of calculating the Jacobian.
+        If None, it will default to a conservative default
         `jac_chunk_size` i.e. ``np.ceil(dim_x/4)``
 
 
@@ -261,15 +261,15 @@ class MagneticWell(_Objective):
         Name of the objective function.
     jac_chunk_size : int, optional
         Will calculate the Jacobian for this objective ``jac_chunk_size``
-        columns at a time, instead of all at once.  The memory usage of the
+        columns at a time, instead of all at once. The memory usage of the
         Jacobian calculation is roughly ``memory usage = m0 + m1*jac_chunk_size``:
-        the higher the chunk size, the less memory the Jacobian calculation
+        the smaller the chunk size, the less memory the Jacobian calculation
         will require (with some baseline memory usage). The time to compute the
         Jacobian is roughly ``t=t0 +t1/jac_chunk_size``, so the larger the
         ``jac_chunk_size``, the faster the calculation takes, at the cost of
         requiring more memory. A ``jac_chunk_size`` of 1 corresponds to the least
-        memory intensive,  but slowest method of calculating the Jacobian.
-        If None, it will default to the largest possible
+        memory intensive, but slowest method of calculating the Jacobian.
+        If None, it will default to a conservative default
         `jac_chunk_size` i.e. ``np.ceil(dim_x/4)``
 
 
diff --git a/desc/objectives/getters.py b/desc/objectives/getters.py
index a0cc959676..3fe98abaf1 100644
--- a/desc/objectives/getters.py
+++ b/desc/objectives/getters.py
@@ -44,9 +44,7 @@
 }
 
 
-def get_equilibrium_objective(
-    eq, mode="force", normalize=True, jac_chunk_size=None, **kwargs
-):
+def get_equilibrium_objective(eq, mode="force", normalize=True, jac_chunk_size=None):
     """Get the objective function for a typical force balance equilibrium problem.
 
     Parameters
@@ -68,10 +66,7 @@ def get_equilibrium_objective(
         An objective function with default force balance objectives.
 
     """
-    kwargs = {
-        **{"eq": eq, "normalize": normalize, "normalize_target": normalize},
-        **kwargs,
-    }
+    kwargs = {"eq": eq, "normalize": normalize, "normalize_target": normalize}
     if mode == "energy":
         objectives = Energy(**kwargs)
     elif mode == "force":
diff --git a/desc/objectives/objective_funs.py b/desc/objectives/objective_funs.py
index d074187ed8..591f3155af 100644
--- a/desc/objectives/objective_funs.py
+++ b/desc/objectives/objective_funs.py
@@ -46,8 +46,8 @@ class ObjectiveFunction(IOAble):
     jac_chunk_size : int, optional
         If `"batched"` deriv_mode is used, will calculate the Jacobian
         ``jac_chunk_size`` columns at a time, instead of all at once.
-        The memory usage of the Jacobian calculation is roughly
-        ``memory usage = m0 + m1*jac_chunk_size``: the higher the chunk size,
+       The memory usage of the Jacobian calculation is roughly
+        ``memory usage = m0 + m1*jac_chunk_size``: the smaller the chunk size,
         the less memory the Jacobian calculation will require (with some baseline
         memory usage). The time it takes to compute the Jacobian is roughly
         ``t= t0 + t1/jac_chunk_size` so the larger the ``jac_chunk_size``, the faster
diff --git a/desc/utils_batched_vectorize.py b/desc/utils_batched_vectorize.py
index 530f847eaf..d6ae9f7806 100644
--- a/desc/utils_batched_vectorize.py
+++ b/desc/utils_batched_vectorize.py
@@ -89,9 +89,9 @@ def chunk(x, chunk_size=None):
 
     Parameters
     ----------
-        x: an array (or pytree of arrays)
-        chunk_size: an integer or None (default)
-            The first axis in x must be a multiple of chunk_size
+    x: an array (or pytree of arrays)
+    chunk_size: an integer or None (default)
+        The first axis in x must be a multiple of chunk_size
 
     Returns
     -------
@@ -108,10 +108,6 @@ def chunk(x, chunk_size=None):
 # The following section of this code is derived from the NetKet project
 # https://github.com/netket/netket/blob/9881c9fb217a2ac4dc9274a054bf6e6a2993c519/
 # netket/jax/_scanmap.py
-#
-# The original copyright notice is as follows
-# Copyright 2021 The NetKet Authors - All rights reserved.
-# Licensed under the Apache License, Version 2.0 (the "License");
 
 
 def scan_append(f, x):
@@ -119,13 +115,13 @@ def scan_append(f, x):
 
     Parameters
     ----------
-        f: a function that takes elements of the leading dimension of x
-        x: a pytree where each leaf array has the same leading dimension
+    f: a function that takes elements of the leading dimension of x
+    x: a pytree where each leaf array has the same leading dimension
 
     Returns
     -------
-        a (pytree of) array(s) with leading dimension same as x,
-        containing the evaluation of f at each element in x
+    a (pytree of) array(s) with leading dimension same as x,
+    containing the evaluation of f at each element in x
     """
     carry_init = True
 
@@ -153,10 +149,6 @@ def f_(*args, **kwargs):
 # The following section of this code is derived from the NetKet project
 # https://github.com/netket/netket/blob/9881c9fb217a2ac4dc9274a054bf6e6a2993c519/
 # netket/jax/_vmap_chunked.py
-#
-# The original copyright notice is as follows
-# Copyright 2021 The NetKet Authors - All rights reserved.
-# Licensed under the Apache License, Version 2.0 (the "License");
 
 
 def _eval_fun_in_chunks(vmapped_fun, chunk_size, argnums, *args, **kwargs):
@@ -236,15 +228,15 @@ def vmap_chunked(
 
     Parameters
     ----------
-        f: The function to be vectorised.
-        in_axes: The axes that should be scanned along. Only supports `0` or `None`
-        chunk_size: The maximum size of the chunks to be used. If it is `None`,
-            chunking is disabled
+    f: The function to be vectorised.
+    in_axes: The axes that should be scanned along. Only supports `0` or `None`
+    chunk_size: The maximum size of the chunks to be used. If it is `None`,
+        chunking is disabled
 
 
     Returns
     -------
-        f: A vectorised and chunked function
+    f: A vectorised and chunked function
     """
     in_axes, argnums = _parse_in_axes(in_axes)
     vmapped_fun = jax.vmap(f, in_axes=in_axes)
@@ -254,8 +246,6 @@ def vmap_chunked(
 def batched_vectorize(pyfunc, *, excluded=frozenset(), signature=None, chunk_size=None):
     """Define a vectorized function with broadcasting and batching.
 
-    below is taken from JAX
-    FIXME: change restof docstring
     :func:`vectorize` is a convenience wrapper for defining vectorized
     functions with broadcasting, in the style of NumPy's
     `generalized universal functions
@@ -272,20 +262,21 @@ def batched_vectorize(pyfunc, *, excluded=frozenset(), signature=None, chunk_siz
 
     Parameters
     ----------
-        pyfunc: function to vectorize.
-        excluded: optional set of integers representing positional arguments for
-        which the function will not be vectorized. These will be passed directly
-        to ``pyfunc`` unmodified.
-        signature: optional generalized universal function signature, e.g.,
-        ``(m,n),(n)->(m)`` for vectorized matrix-vector multiplication. If
-        provided, ``pyfunc`` will be called with (and expected to return) arrays
-        with shapes given by the size of corresponding core dimensions. By
-        default, pyfunc is assumed to take scalars arrays as input and output.
-        chunk_size: the size of the batches to pass to vmap. if 1, will only
+    pyfunc: function to vectorize.
+    excluded: optional set of integers representing positional arguments for
+    which the function will not be vectorized. These will be passed directly
+    to ``pyfunc`` unmodified.
+    signature: optional generalized universal function signature, e.g.,
+    ``(m,n),(n)->(m)`` for vectorized matrix-vector multiplication. If
+    provided, ``pyfunc`` will be called with (and expected to return) arrays
+    with shapes given by the size of corresponding core dimensions. By
+    default, pyfunc is assumed to take scalars arrays as input and output.
+    chunk_size: the size of the batches to pass to vmap. If None, defaults to
+    the largest possible chunk_size (like the default behavior of ``vectorize11)
 
     Returns
     -------
-        Vectorized version of the given function.
+    Batch-vectorized version of the given function.
 
     """
     if any(not isinstance(exclude, (str, int)) for exclude in excluded):
diff --git a/docs/adding_objectives.rst b/docs/adding_objectives.rst
index 7cae6b4104..2b2cfd196c 100644
--- a/docs/adding_objectives.rst
+++ b/docs/adding_objectives.rst
@@ -72,15 +72,15 @@ A full example objective with comments describing the key points is given below:
             Name of the objective function.
         jac_chunk_size : int, optional
             Will calculate the Jacobian for this objective ``jac_chunk_size``
-            columns at a time, instead of all at once.  The memory usage of the
+            columns at a time, instead of all at once. The memory usage of the
             Jacobian calculation is roughly ``memory usage = m0 + m1*jac_chunk_size``:
-            the higher the chunk size, the less memory the Jacobian calculation
+            the smaller the chunk size, the less memory the Jacobian calculation
             will require (with some baseline memory usage). The time to compute the
             Jacobian is roughly ``t=t0 +t1/jac_chunk_size``, so the larger the
             ``jac_chunk_size``, the faster the calculation takes, at the cost of
             requiring more memory. A ``jac_chunk_size`` of 1 corresponds to the least
-            memory intensive,  but slowest method of calculating the Jacobian.
-            If None, it will default to the largest possible
+            memory intensive, but slowest method of calculating the Jacobian.
+            If None, it will default to a conservative default
             `jac_chunk_size` i.e. ``np.ceil(dim_x/4)``
 
         """
diff --git a/docs/memory_usage.rst b/docs/memory_usage.rst
index f9cdf7d964..f2fdc7adad 100644
--- a/docs/memory_usage.rst
+++ b/docs/memory_usage.rst
@@ -11,7 +11,7 @@ matrix of shape [``obj.dim_f`` x ``obj.dim_x``], and the calculation of the Jaco
 the columns (the ``obj.dim_x`` dimension), where ``obj`` is the ``ObjectiveFunction`` object. Passing in the ``jac_chunk_size`` attribute allows one to split up
 the vectorized computation into chunks of ``jac_chunk_size`` columns at a time, allowing one to compute the Jacobian
 in a slightly slower, but more memory-efficient manner. The memory usage of the Jacobian calculation is
-``memory usage = m0 + m1*jac_chunk_size``: the higher the chunk size, the less memory the Jacobian calculation
+``memory usage = m0 + m1*jac_chunk_size``: the smaller the chunk size, the less memory the Jacobian calculation
 will require (with some baseline memory usage). The time to compute the Jacobian is roughly ``t=t0 +t1/jac_chunk_size``
 with some baseline time, so the larger the ``jac_chunk_size``, the faster the calculation takes,
 at the cost of requiring more memory. A ``jac_chunk_size`` of 1 corresponds to the least memory intensive,

From 83c39b472b60e73d5e69e5b74febc97e12a23b8b Mon Sep 17 00:00:00 2001
From: Dario Panici <dpanici@princeton.edu>
Date: Thu, 12 Sep 2024 11:07:41 +0900
Subject: [PATCH 32/46] update chunk size to auto as default

---
 desc/continuation.py              | 10 ++--
 desc/objectives/_bootstrap.py     |  9 ++--
 desc/objectives/_coils.py         | 81 +++++++++++++++++--------------
 desc/objectives/_equilibrium.py   | 54 ++++++++++++---------
 desc/objectives/_free_boundary.py | 27 ++++++-----
 desc/objectives/_generic.py       | 20 ++++----
 desc/objectives/_geometry.py      | 72 +++++++++++++++------------
 desc/objectives/_omnigenity.py    | 45 +++++++++--------
 desc/objectives/_power_balance.py | 18 ++++---
 desc/objectives/_profiles.py      | 36 ++++++++------
 desc/objectives/_stability.py     | 18 ++++---
 desc/objectives/getters.py        |  4 +-
 desc/objectives/objective_funs.py | 49 +++++++++++++------
 docs/adding_objectives.rst        |  4 +-
 docs/memory_usage.rst             |  2 +-
 tests/test_objective_funs.py      |  9 ++--
 16 files changed, 261 insertions(+), 197 deletions(-)

diff --git a/desc/continuation.py b/desc/continuation.py
index e4d38ab72f..b5274171fe 100644
--- a/desc/continuation.py
+++ b/desc/continuation.py
@@ -29,7 +29,7 @@ def _solve_axisym(
     maxiter=100,
     verbose=1,
     checkpoint_path=None,
-    jac_chunk_size=None,
+    jac_chunk_size="auto",
 ):
     """Solve initial axisymmetric case with adaptive step sizing."""
     timer = Timer()
@@ -199,7 +199,7 @@ def _add_pressure(
     maxiter=100,
     verbose=1,
     checkpoint_path=None,
-    jac_chunk_size=None,
+    jac_chunk_size="auto",
 ):
     """Add pressure with adaptive step sizing."""
     timer = Timer()
@@ -330,7 +330,7 @@ def _add_shaping(
     maxiter=100,
     verbose=1,
     checkpoint_path=None,
-    jac_chunk_size=None,
+    jac_chunk_size="auto",
 ):
     """Add 3D shaping with adaptive step sizing."""
     timer = Timer()
@@ -460,7 +460,7 @@ def solve_continuation_automatic(  # noqa: C901
     maxiter=100,
     verbose=1,
     checkpoint_path=None,
-    jac_chunk_size=None,
+    jac_chunk_size="auto",
     **kwargs,
 ):
     """Solve for an equilibrium using an automatic continuation method.
@@ -640,7 +640,7 @@ def solve_continuation(  # noqa: C901
     maxiter=100,
     verbose=1,
     checkpoint_path=None,
-    jac_chunk_size=None,
+    jac_chunk_size="auto",
 ):
     """Solve for an equilibrium by continuation method.
 
diff --git a/desc/objectives/_bootstrap.py b/desc/objectives/_bootstrap.py
index 71e890b73c..7c73b10227 100644
--- a/desc/objectives/_bootstrap.py
+++ b/desc/objectives/_bootstrap.py
@@ -66,7 +66,7 @@ class BootstrapRedlConsistency(_Objective):
         or quasi-axisymmetry; set to +/-NFP for quasi-helical symmetry.
     name : str, optional
         Name of the objective function.
-    jac_chunk_size : int, optional
+    jac_chunk_size : int or "auto", optional
         Will calculate the Jacobian for this objective ``jac_chunk_size``
         columns at a time, instead of all at once. The memory usage of the
         Jacobian calculation is roughly ``memory usage = m0 + m1*jac_chunk_size``:
@@ -76,8 +76,9 @@ class BootstrapRedlConsistency(_Objective):
         ``jac_chunk_size``, the faster the calculation takes, at the cost of
         requiring more memory. A ``jac_chunk_size`` of 1 corresponds to the least
         memory intensive, but slowest method of calculating the Jacobian.
-        If None, it will default to a conservative default
-        `jac_chunk_size` i.e. ``np.ceil(dim_x/4)``
+        If None, it will use the largest size i.e ``obj.dim_x``.
+        Defaults to ``chunk_size="auto"`` which will use a conservative
+        size of 1000.
 
     """
 
@@ -98,7 +99,7 @@ def __init__(
         grid=None,
         helicity=(1, 0),
         name="Bootstrap current self-consistency (Redl)",
-        jac_chunk_size=None,
+        jac_chunk_size="auto",
     ):
         if target is None and bounds is None:
             target = 0
diff --git a/desc/objectives/_coils.py b/desc/objectives/_coils.py
index 4e4fc77eb5..0fdeeefc9a 100644
--- a/desc/objectives/_coils.py
+++ b/desc/objectives/_coils.py
@@ -59,7 +59,7 @@ class _CoilObjective(_Objective):
         If a list, must have the same structure as coil.
     name : str, optional
         Name of the objective function.
-    jac_chunk_size : int, optional
+    jac_chunk_size : int or "auto", optional
         Will calculate the Jacobian for this objective ``jac_chunk_size``
         columns at a time, instead of all at once. The memory usage of the
         Jacobian calculation is roughly ``memory usage = m0 + m1*jac_chunk_size``:
@@ -69,8 +69,9 @@ class _CoilObjective(_Objective):
         ``jac_chunk_size``, the faster the calculation takes, at the cost of
         requiring more memory. A ``jac_chunk_size`` of 1 corresponds to the least
         memory intensive, but slowest method of calculating the Jacobian.
-        If None, it will default to a conservative default
-        `jac_chunk_size` i.e. ``np.ceil(dim_x/4)``
+        If None, it will use the largest size i.e ``obj.dim_x``.
+        Defaults to ``chunk_size="auto"`` which will use a conservative
+        size of 1000.
 
 
     """
@@ -88,7 +89,7 @@ def __init__(
         deriv_mode="auto",
         grid=None,
         name=None,
-        jac_chunk_size=None,
+        jac_chunk_size="auto",
     ):
         self._grid = grid
         self._data_keys = data_keys
@@ -269,7 +270,7 @@ class CoilLength(_CoilObjective):
         Defaults to ``LinearGrid(N=2 * coil.N + 5)``
     name : str, optional
         Name of the objective function.
-    jac_chunk_size : int, optional
+    jac_chunk_size : int or "auto", optional
         Will calculate the Jacobian for this objective ``jac_chunk_size``
         columns at a time, instead of all at once. The memory usage of the
         Jacobian calculation is roughly ``memory usage = m0 + m1*jac_chunk_size``:
@@ -279,8 +280,9 @@ class CoilLength(_CoilObjective):
         ``jac_chunk_size``, the faster the calculation takes, at the cost of
         requiring more memory. A ``jac_chunk_size`` of 1 corresponds to the least
         memory intensive, but slowest method of calculating the Jacobian.
-        If None, it will default to a conservative default
-        `jac_chunk_size` i.e. ``np.ceil(dim_x/4)``
+        If None, it will use the largest size i.e ``obj.dim_x``.
+        Defaults to ``chunk_size="auto"`` which will use a conservative
+        size of 1000.
 
 
     """
@@ -301,7 +303,7 @@ def __init__(
         deriv_mode="auto",
         grid=None,
         name="coil length",
-        jac_chunk_size=None,
+        jac_chunk_size="auto",
     ):
         if target is None and bounds is None:
             target = 2 * np.pi
@@ -409,7 +411,7 @@ class CoilCurvature(_CoilObjective):
         Defaults to ``LinearGrid(N=2 * coil.N + 5)``
     name : str, optional
         Name of the objective function.
-    jac_chunk_size : int, optional
+    jac_chunk_size : int or "auto", optional
         Will calculate the Jacobian for this objective ``jac_chunk_size``
         columns at a time, instead of all at once. The memory usage of the
         Jacobian calculation is roughly ``memory usage = m0 + m1*jac_chunk_size``:
@@ -419,8 +421,9 @@ class CoilCurvature(_CoilObjective):
         ``jac_chunk_size``, the faster the calculation takes, at the cost of
         requiring more memory. A ``jac_chunk_size`` of 1 corresponds to the least
         memory intensive, but slowest method of calculating the Jacobian.
-        If None, it will default to a conservative default
-        `jac_chunk_size` i.e. ``np.ceil(dim_x/4)``
+        If None, it will use the largest size i.e ``obj.dim_x``.
+        Defaults to ``chunk_size="auto"`` which will use a conservative
+        size of 1000.
 
 
     """
@@ -441,7 +444,7 @@ def __init__(
         deriv_mode="auto",
         grid=None,
         name="coil curvature",
-        jac_chunk_size=None,
+        jac_chunk_size="auto",
     ):
         if target is None and bounds is None:
             bounds = (0, 1)
@@ -544,7 +547,7 @@ class CoilTorsion(_CoilObjective):
         Defaults to ``LinearGrid(N=2 * coil.N + 5)``
     name : str, optional
         Name of the objective function.
-    jac_chunk_size : int, optional
+    jac_chunk_size : int or "auto", optional
         Will calculate the Jacobian for this objective ``jac_chunk_size``
         columns at a time, instead of all at once. The memory usage of the
         Jacobian calculation is roughly ``memory usage = m0 + m1*jac_chunk_size``:
@@ -554,8 +557,9 @@ class CoilTorsion(_CoilObjective):
         ``jac_chunk_size``, the faster the calculation takes, at the cost of
         requiring more memory. A ``jac_chunk_size`` of 1 corresponds to the least
         memory intensive, but slowest method of calculating the Jacobian.
-        If None, it will default to a conservative default
-        `jac_chunk_size` i.e. ``np.ceil(dim_x/4)``
+        If None, it will use the largest size i.e ``obj.dim_x``.
+        Defaults to ``chunk_size="auto"`` which will use a conservative
+        size of 1000.
 
 
     """
@@ -576,7 +580,7 @@ def __init__(
         deriv_mode="auto",
         grid=None,
         name="coil torsion",
-        jac_chunk_size=None,
+        jac_chunk_size="auto",
     ):
         if target is None and bounds is None:
             target = 0
@@ -679,7 +683,7 @@ class CoilCurrentLength(CoilLength):
         Defaults to ``LinearGrid(N=2 * coil.N + 5)``
     name : str, optional
         Name of the objective function.
-    jac_chunk_size : int, optional
+    jac_chunk_size : int or "auto", optional
         Will calculate the Jacobian for this objective ``jac_chunk_size``
         columns at a time, instead of all at once. The memory usage of the
         Jacobian calculation is roughly ``memory usage = m0 + m1*jac_chunk_size``:
@@ -689,8 +693,9 @@ class CoilCurrentLength(CoilLength):
         ``jac_chunk_size``, the faster the calculation takes, at the cost of
         requiring more memory. A ``jac_chunk_size`` of 1 corresponds to the least
         memory intensive, but slowest method of calculating the Jacobian.
-        If None, it will default to a conservative default
-        `jac_chunk_size` i.e. ``np.ceil(dim_x/4)``
+        If None, it will use the largest size i.e ``obj.dim_x``.
+        Defaults to ``chunk_size="auto"`` which will use a conservative
+        size of 1000.
 
 
     """
@@ -711,7 +716,7 @@ def __init__(
         deriv_mode="auto",
         grid=None,
         name="coil current length",
-        jac_chunk_size=None,
+        jac_chunk_size="auto",
     ):
         if target is None and bounds is None:
             target = 0
@@ -822,7 +827,7 @@ class CoilSetMinDistance(_Objective):
         If a list, must have the same structure as coils.
     name : str, optional
         Name of the objective function.
-    jac_chunk_size : int, optional
+    jac_chunk_size : int or "auto", optional
         Will calculate the Jacobian for this objective ``jac_chunk_size``
         columns at a time, instead of all at once. The memory usage of the
         Jacobian calculation is roughly ``memory usage = m0 + m1*jac_chunk_size``:
@@ -832,8 +837,9 @@ class CoilSetMinDistance(_Objective):
         ``jac_chunk_size``, the faster the calculation takes, at the cost of
         requiring more memory. A ``jac_chunk_size`` of 1 corresponds to the least
         memory intensive, but slowest method of calculating the Jacobian.
-        If None, it will default to a conservative default
-        `jac_chunk_size` i.e. ``np.ceil(dim_x/4)``
+        If None, it will use the largest size i.e ``obj.dim_x``.
+        Defaults to ``chunk_size="auto"`` which will use a conservative
+        size of 1000.
 
 
     """
@@ -854,7 +860,7 @@ def __init__(
         deriv_mode="auto",
         grid=None,
         name="coil-coil minimum distance",
-        jac_chunk_size=None,
+        jac_chunk_size="auto",
     ):
         from desc.coils import CoilSet
 
@@ -1011,7 +1017,7 @@ class PlasmaCoilSetMinDistance(_Objective):
         False by default, so that self.things = [coil, eq].
     name : str, optional
         Name of the objective function.
-    jac_chunk_size : int, optional
+    jac_chunk_size : int or "auto", optional
         Will calculate the Jacobian for this objective ``jac_chunk_size``
         columns at a time, instead of all at once. The memory usage of the
         Jacobian calculation is roughly ``memory usage = m0 + m1*jac_chunk_size``:
@@ -1021,8 +1027,9 @@ class PlasmaCoilSetMinDistance(_Objective):
         ``jac_chunk_size``, the faster the calculation takes, at the cost of
         requiring more memory. A ``jac_chunk_size`` of 1 corresponds to the least
         memory intensive, but slowest method of calculating the Jacobian.
-        If None, it will default to a conservative default
-        `jac_chunk_size` i.e. ``np.ceil(dim_x/4)``
+        If None, it will use the largest size i.e ``obj.dim_x``.
+        Defaults to ``chunk_size="auto"`` which will use a conservative
+        size of 1000.
 
 
     """
@@ -1047,7 +1054,7 @@ def __init__(
         eq_fixed=False,
         coils_fixed=False,
         name="plasma-coil minimum distance",
-        jac_chunk_size=None,
+        jac_chunk_size="auto",
     ):
         if target is None and bounds is None:
             bounds = (1, np.inf)
@@ -1256,7 +1263,7 @@ class QuadraticFlux(_Objective):
         plasma currents) is set to zero.
     name : str
         Name of the objective function.
-    jac_chunk_size : int, optional
+    jac_chunk_size : int or "auto", optional
         Will calculate the Jacobian for this objective ``jac_chunk_size``
         columns at a time, instead of all at once. The memory usage of the
         Jacobian calculation is roughly ``memory usage = m0 + m1*jac_chunk_size``:
@@ -1266,8 +1273,9 @@ class QuadraticFlux(_Objective):
         ``jac_chunk_size``, the faster the calculation takes, at the cost of
         requiring more memory. A ``jac_chunk_size`` of 1 corresponds to the least
         memory intensive, but slowest method of calculating the Jacobian.
-        If None, it will default to a conservative default
-        `jac_chunk_size` i.e. ``np.ceil(dim_x/4)``
+        If None, it will use the largest size i.e ``obj.dim_x``.
+        Defaults to ``chunk_size="auto"`` which will use a conservative
+        size of 1000.
 
     """
 
@@ -1291,7 +1299,7 @@ def __init__(
         field_grid=None,
         vacuum=False,
         name="Quadratic flux",
-        jac_chunk_size=None,
+        jac_chunk_size="auto",
     ):
         if target is None and bounds is None:
             target = 0
@@ -1480,7 +1488,7 @@ class ToroidalFlux(_Objective):
         zeta=jnp.array(0.0), NFP=eq.NFP).
     name : str, optional
         Name of the objective function.
-    jac_chunk_size : int, optional
+    jac_chunk_size : int or "auto", optional
         Will calculate the Jacobian for this objective ``jac_chunk_size``
         columns at a time, instead of all at once. The memory usage of the
         Jacobian calculation is roughly ``memory usage = m0 + m1*jac_chunk_size``:
@@ -1490,8 +1498,9 @@ class ToroidalFlux(_Objective):
         ``jac_chunk_size``, the faster the calculation takes, at the cost of
         requiring more memory. A ``jac_chunk_size`` of 1 corresponds to the least
         memory intensive, but slowest method of calculating the Jacobian.
-        If None, it will default to a conservative default
-        `jac_chunk_size` i.e. ``np.ceil(dim_x/4)``
+        If None, it will use the largest size i.e ``obj.dim_x``.
+        Defaults to ``chunk_size="auto"`` which will use a conservative
+        size of 1000.
 
 
 
@@ -1515,7 +1524,7 @@ def __init__(
         field_grid=None,
         eval_grid=None,
         name="toroidal-flux",
-        jac_chunk_size=None,
+        jac_chunk_size="auto",
     ):
         if target is None and bounds is None:
             target = eq.Psi
diff --git a/desc/objectives/_equilibrium.py b/desc/objectives/_equilibrium.py
index f9fcdc7f86..963bb68418 100644
--- a/desc/objectives/_equilibrium.py
+++ b/desc/objectives/_equilibrium.py
@@ -63,7 +63,7 @@ class ForceBalance(_Objective):
         Defaults to ``ConcentricGrid(eq.L_grid, eq.M_grid, eq.N_grid)``
     name : str, optional
         Name of the objective function.
-    jac_chunk_size : int, optional
+    jac_chunk_size : int or "auto", optional
         Will calculate the Jacobian for this objective ``jac_chunk_size``
         columns at a time, instead of all at once. The memory usage of the
         Jacobian calculation is roughly ``memory usage = m0 + m1*jac_chunk_size``:
@@ -73,8 +73,9 @@ class ForceBalance(_Objective):
         ``jac_chunk_size``, the faster the calculation takes, at the cost of
         requiring more memory. A ``jac_chunk_size`` of 1 corresponds to the least
         memory intensive, but slowest method of calculating the Jacobian.
-        If None, it will default to a conservative default
-        `jac_chunk_size` i.e. ``np.ceil(dim_x/4)``
+        If None, it will use the largest size i.e ``obj.dim_x``.
+        Defaults to ``chunk_size="auto"`` which will use a conservative
+        size of 1000.
 
     """
 
@@ -95,7 +96,7 @@ def __init__(
         deriv_mode="auto",
         grid=None,
         name="force",
-        jac_chunk_size=None,
+        jac_chunk_size="auto",
     ):
         if target is None and bounds is None:
             target = 0
@@ -249,7 +250,7 @@ class ForceBalanceAnisotropic(_Objective):
         Defaults to ``ConcentricGrid(eq.L_grid, eq.M_grid, eq.N_grid)``
     name : str
         Name of the objective function.
-    jac_chunk_size : int, optional
+    jac_chunk_size : int or "auto", optional
         Will calculate the Jacobian for this objective ``jac_chunk_size``
         columns at a time, instead of all at once. The memory usage of the
         Jacobian calculation is roughly ``memory usage = m0 + m1*jac_chunk_size``:
@@ -259,8 +260,9 @@ class ForceBalanceAnisotropic(_Objective):
         ``jac_chunk_size``, the faster the calculation takes, at the cost of
         requiring more memory. A ``jac_chunk_size`` of 1 corresponds to the least
         memory intensive, but slowest method of calculating the Jacobian.
-        If None, it will default to a conservative default
-        `jac_chunk_size` i.e. ``np.ceil(dim_x/4)``
+        If None, it will use the largest size i.e ``obj.dim_x``.
+        Defaults to ``chunk_size="auto"`` which will use a conservative
+        size of 1000.
 
 
     """
@@ -282,7 +284,7 @@ def __init__(
         deriv_mode="auto",
         grid=None,
         name="force-anisotropic",
-        jac_chunk_size=None,
+        jac_chunk_size="auto",
     ):
         if target is None and bounds is None:
             target = 0
@@ -422,7 +424,7 @@ class RadialForceBalance(_Objective):
         Defaults to ``ConcentricGrid(eq.L_grid, eq.M_grid, eq.N_grid)``
     name : str, optional
         Name of the objective function.
-    jac_chunk_size : int, optional
+    jac_chunk_size : int or "auto", optional
         Will calculate the Jacobian for this objective ``jac_chunk_size``
         columns at a time, instead of all at once. The memory usage of the
         Jacobian calculation is roughly ``memory usage = m0 + m1*jac_chunk_size``:
@@ -432,8 +434,9 @@ class RadialForceBalance(_Objective):
         ``jac_chunk_size``, the faster the calculation takes, at the cost of
         requiring more memory. A ``jac_chunk_size`` of 1 corresponds to the least
         memory intensive, but slowest method of calculating the Jacobian.
-        If None, it will default to a conservative default
-        `jac_chunk_size` i.e. ``np.ceil(dim_x/4)``
+        If None, it will use the largest size i.e ``obj.dim_x``.
+        Defaults to ``chunk_size="auto"`` which will use a conservative
+        size of 1000.
 
 
     """
@@ -455,7 +458,7 @@ def __init__(
         deriv_mode="auto",
         grid=None,
         name="radial force",
-        jac_chunk_size=None,
+        jac_chunk_size="auto",
     ):
         if target is None and bounds is None:
             target = 0
@@ -595,7 +598,7 @@ class HelicalForceBalance(_Objective):
         Defaults to ``ConcentricGrid(eq.L_grid, eq.M_grid, eq.N_grid)``
     name : str, optional
         Name of the objective function.
-    jac_chunk_size : int, optional
+    jac_chunk_size : int or "auto", optional
         Will calculate the Jacobian for this objective ``jac_chunk_size``
         columns at a time, instead of all at once. The memory usage of the
         Jacobian calculation is roughly ``memory usage = m0 + m1*jac_chunk_size``:
@@ -605,8 +608,9 @@ class HelicalForceBalance(_Objective):
         ``jac_chunk_size``, the faster the calculation takes, at the cost of
         requiring more memory. A ``jac_chunk_size`` of 1 corresponds to the least
         memory intensive, but slowest method of calculating the Jacobian.
-        If None, it will default to a conservative default
-        `jac_chunk_size` i.e. ``np.ceil(dim_x/4)``
+        If None, it will use the largest size i.e ``obj.dim_x``.
+        Defaults to ``chunk_size="auto"`` which will use a conservative
+        size of 1000.
 
 
     """
@@ -628,7 +632,7 @@ def __init__(
         deriv_mode="auto",
         grid=None,
         name="helical force",
-        jac_chunk_size=None,
+        jac_chunk_size="auto",
     ):
         if target is None and bounds is None:
             target = 0
@@ -766,7 +770,7 @@ class Energy(_Objective):
         Adiabatic (compressional) index. Default = 0.
     name : str, optional
         Name of the objective function.
-    jac_chunk_size : int, optional
+    jac_chunk_size : int or "auto", optional
         Will calculate the Jacobian for this objective ``jac_chunk_size``
         columns at a time, instead of all at once. The memory usage of the
         Jacobian calculation is roughly ``memory usage = m0 + m1*jac_chunk_size``:
@@ -776,8 +780,9 @@ class Energy(_Objective):
         ``jac_chunk_size``, the faster the calculation takes, at the cost of
         requiring more memory. A ``jac_chunk_size`` of 1 corresponds to the least
         memory intensive, but slowest method of calculating the Jacobian.
-        If None, it will default to a conservative default
-        `jac_chunk_size` i.e. ``np.ceil(dim_x/4)``
+        If None, it will use the largest size i.e ``obj.dim_x``.
+        Defaults to ``chunk_size="auto"`` which will use a conservative
+        size of 1000.
 
 
     """
@@ -802,7 +807,7 @@ def __init__(
         grid=None,
         gamma=0,
         name="energy",
-        jac_chunk_size=None,
+        jac_chunk_size="auto",
     ):
         if target is None and bounds is None:
             target = 0
@@ -948,7 +953,7 @@ class CurrentDensity(_Objective):
         Defaults to ``ConcentricGrid(eq.L_grid, eq.M_grid, eq.N_grid)``
     name : str, optional
         Name of the objective function.
-    jac_chunk_size : int, optional
+    jac_chunk_size : int or "auto", optional
         Will calculate the Jacobian for this objective ``jac_chunk_size``
         columns at a time, instead of all at once. The memory usage of the
         Jacobian calculation is roughly ``memory usage = m0 + m1*jac_chunk_size``:
@@ -958,8 +963,9 @@ class CurrentDensity(_Objective):
         ``jac_chunk_size``, the faster the calculation takes, at the cost of
         requiring more memory. A ``jac_chunk_size`` of 1 corresponds to the least
         memory intensive, but slowest method of calculating the Jacobian.
-        If None, it will default to a conservative default
-        `jac_chunk_size` i.e. ``np.ceil(dim_x/4)``
+        If None, it will use the largest size i.e ``obj.dim_x``.
+        Defaults to ``chunk_size="auto"`` which will use a conservative
+        size of 1000.
 
 
     """
@@ -981,7 +987,7 @@ def __init__(
         deriv_mode="auto",
         grid=None,
         name="current density",
-        jac_chunk_size=None,
+        jac_chunk_size="auto",
     ):
         if target is None and bounds is None:
             target = 0
diff --git a/desc/objectives/_free_boundary.py b/desc/objectives/_free_boundary.py
index f0240ec408..9d475eebe8 100644
--- a/desc/objectives/_free_boundary.py
+++ b/desc/objectives/_free_boundary.py
@@ -73,7 +73,7 @@ class VacuumBoundaryError(_Objective):
         be fixed. For single stage optimization, should be False (default).
     name : str, optional
         Name of the objective function.
-    jac_chunk_size : int, optional
+    jac_chunk_size : int or "auto", optional
         Will calculate the Jacobian for this objective ``jac_chunk_size``
         columns at a time, instead of all at once. The memory usage of the
         Jacobian calculation is roughly ``memory usage = m0 + m1*jac_chunk_size``:
@@ -83,8 +83,9 @@ class VacuumBoundaryError(_Objective):
         ``jac_chunk_size``, the faster the calculation takes, at the cost of
         requiring more memory. A ``jac_chunk_size`` of 1 corresponds to the least
         memory intensive, but slowest method of calculating the Jacobian.
-        If None, it will default to a conservative default
-        `jac_chunk_size` i.e. ``np.ceil(dim_x/4)``
+        If None, it will use the largest size i.e ``obj.dim_x``.
+        Defaults to ``chunk_size="auto"`` which will use a conservative
+        size of 1000.
 
 
     """
@@ -110,7 +111,7 @@ def __init__(
         field_grid=None,
         field_fixed=False,
         name="Vacuum boundary error",
-        jac_chunk_size=None,
+        jac_chunk_size="auto",
     ):
         if target is None and bounds is None:
             target = 0
@@ -425,7 +426,7 @@ class BoundaryError(_Objective):
         less memory.
     name : str, optional
         Name of the objective function.
-    jac_chunk_size : int, optional
+    jac_chunk_size : int or "auto", optional
         Will calculate the Jacobian for this objective ``jac_chunk_size``
         columns at a time, instead of all at once. The memory usage of the
         Jacobian calculation is roughly ``memory usage = m0 + m1*jac_chunk_size``:
@@ -435,8 +436,9 @@ class BoundaryError(_Objective):
         ``jac_chunk_size``, the faster the calculation takes, at the cost of
         requiring more memory. A ``jac_chunk_size`` of 1 corresponds to the least
         memory intensive, but slowest method of calculating the Jacobian.
-        If None, it will default to a conservative default
-        `jac_chunk_size` i.e. ``np.ceil(dim_x/4)``
+        If None, it will use the largest size i.e ``obj.dim_x``.
+        Defaults to ``chunk_size="auto"`` which will use a conservative
+        size of 1000.
 
 
 
@@ -483,7 +485,7 @@ def __init__(
         field_fixed=False,
         loop=True,
         name="Boundary error",
-        jac_chunk_size=None,
+        jac_chunk_size="auto",
     ):
         if target is None and bounds is None:
             target = 0
@@ -897,7 +899,7 @@ class BoundaryErrorNESTOR(_Objective):
         reverse mode and forward over reverse mode respectively.
     name : str, optional
         Name of the objective function.
-    jac_chunk_size : int, optional
+    jac_chunk_size : int or "auto", optional
         Will calculate the Jacobian for this objective ``jac_chunk_size``
         columns at a time, instead of all at once. The memory usage of the
         Jacobian calculation is roughly ``memory usage = m0 + m1*jac_chunk_size``:
@@ -907,8 +909,9 @@ class BoundaryErrorNESTOR(_Objective):
         ``jac_chunk_size``, the faster the calculation takes, at the cost of
         requiring more memory. A ``jac_chunk_size`` of 1 corresponds to the least
         memory intensive, but slowest method of calculating the Jacobian.
-        If None, it will default to a conservative default
-        `jac_chunk_size` i.e. ``np.ceil(dim_x/4)``
+        If None, it will use the largest size i.e ``obj.dim_x``.
+        Defaults to ``chunk_size="auto"`` which will use a conservative
+        size of 1000.
 
 
     """
@@ -936,7 +939,7 @@ def __init__(
         loss_function=None,
         deriv_mode="auto",
         name="NESTOR Boundary",
-        jac_chunk_size=None,
+        jac_chunk_size="auto",
     ):
         if target is None and bounds is None:
             target = 0
diff --git a/desc/objectives/_generic.py b/desc/objectives/_generic.py
index 554b06fc6b..49c3d86737 100644
--- a/desc/objectives/_generic.py
+++ b/desc/objectives/_generic.py
@@ -57,7 +57,7 @@ class GenericObjective(_Objective):
         ``QuadratureGrid(eq.L_grid, eq.M_grid, eq.N_grid)`` if thing is an Equilibrium.
     name : str, optional
         Name of the objective function.
-    jac_chunk_size : int, optional
+    jac_chunk_size : int or "auto", optional
         Will calculate the Jacobian for this objective ``jac_chunk_size``
         columns at a time, instead of all at once. The memory usage of the
         Jacobian calculation is roughly ``memory usage = m0 + m1*jac_chunk_size``:
@@ -67,8 +67,9 @@ class GenericObjective(_Objective):
         ``jac_chunk_size``, the faster the calculation takes, at the cost of
         requiring more memory. A ``jac_chunk_size`` of 1 corresponds to the least
         memory intensive, but slowest method of calculating the Jacobian.
-        If None, it will default to a conservative default
-        `jac_chunk_size` i.e. ``np.ceil(dim_x/4)``
+        If None, it will use the largest size i.e ``obj.dim_x``.
+        Defaults to ``chunk_size="auto"`` which will use a conservative
+        size of 1000.
 
 
     """
@@ -88,7 +89,7 @@ def __init__(
         deriv_mode="auto",
         grid=None,
         name="generic",
-        jac_chunk_size=None,
+        jac_chunk_size="auto",
         **kwargs,
     ):
         errorif(
@@ -240,7 +241,7 @@ def __init__(
         normalize=False,
         normalize_target=False,
         name="custom linear",
-        jac_chunk_size=None,
+        jac_chunk_size="auto",
     ):
         if target is None and bounds is None:
             target = 0
@@ -359,7 +360,7 @@ class ObjectiveFromUser(_Objective):
         ``QuadratureGrid(eq.L_grid, eq.M_grid, eq.N_grid)`` if thing is an Equilibrium.
     name : str, optional
         Name of the objective function.
-    jac_chunk_size : int, optional
+    jac_chunk_size : int or "auto", optional
         Will calculate the Jacobian for this objective ``jac_chunk_size``
         columns at a time, instead of all at once. The memory usage of the
         Jacobian calculation is roughly ``memory usage = m0 + m1*jac_chunk_size``:
@@ -369,8 +370,9 @@ class ObjectiveFromUser(_Objective):
         ``jac_chunk_size``, the faster the calculation takes, at the cost of
         requiring more memory. A ``jac_chunk_size`` of 1 corresponds to the least
         memory intensive, but slowest method of calculating the Jacobian.
-        If None, it will default to a conservative default
-        `jac_chunk_size` i.e. ``np.ceil(dim_x/4)``
+        If None, it will use the largest size i.e ``obj.dim_x``.
+        Defaults to ``chunk_size="auto"`` which will use a conservative
+        size of 1000.
 
 
     Examples
@@ -408,7 +410,7 @@ def __init__(
         deriv_mode="auto",
         grid=None,
         name="custom",
-        jac_chunk_size=None,
+        jac_chunk_size="auto",
         **kwargs,
     ):
         errorif(
diff --git a/desc/objectives/_geometry.py b/desc/objectives/_geometry.py
index 2ea7a7b6b2..8eb5357469 100644
--- a/desc/objectives/_geometry.py
+++ b/desc/objectives/_geometry.py
@@ -53,7 +53,7 @@ class AspectRatio(_Objective):
         or ``LinearGrid(M=2*eq.M, N=2*eq.N)`` for ``FourierRZToroidalSurface``.
     name : str, optional
         Name of the objective function.
-    jac_chunk_size : int, optional
+    jac_chunk_size : int or "auto", optional
         Will calculate the Jacobian for this objective ``jac_chunk_size``
         columns at a time, instead of all at once. The memory usage of the
         Jacobian calculation is roughly ``memory usage = m0 + m1*jac_chunk_size``:
@@ -63,8 +63,9 @@ class AspectRatio(_Objective):
         ``jac_chunk_size``, the faster the calculation takes, at the cost of
         requiring more memory. A ``jac_chunk_size`` of 1 corresponds to the least
         memory intensive, but slowest method of calculating the Jacobian.
-        If None, it will default to a conservative default
-        `jac_chunk_size` i.e. ``np.ceil(dim_x/4)``
+        If None, it will use the largest size i.e ``obj.dim_x``.
+        Defaults to ``chunk_size="auto"`` which will use a conservative
+        size of 1000.
 
 
     """
@@ -85,7 +86,7 @@ def __init__(
         deriv_mode="auto",
         grid=None,
         name="aspect ratio",
-        jac_chunk_size=None,
+        jac_chunk_size="auto",
     ):
         if target is None and bounds is None:
             target = 2
@@ -230,7 +231,7 @@ class Elongation(_Objective):
         or ``LinearGrid(M=2*eq.M, N=2*eq.N)`` for ``FourierRZToroidalSurface``.
     name : str, optional
         Name of the objective function.
-    jac_chunk_size : int, optional
+    jac_chunk_size : int or "auto", optional
         Will calculate the Jacobian for this objective ``jac_chunk_size``
         columns at a time, instead of all at once. The memory usage of the
         Jacobian calculation is roughly ``memory usage = m0 + m1*jac_chunk_size``:
@@ -240,8 +241,9 @@ class Elongation(_Objective):
         ``jac_chunk_size``, the faster the calculation takes, at the cost of
         requiring more memory. A ``jac_chunk_size`` of 1 corresponds to the least
         memory intensive, but slowest method of calculating the Jacobian.
-        If None, it will default to a conservative default
-        `jac_chunk_size` i.e. ``np.ceil(dim_x/4)``
+        If None, it will use the largest size i.e ``obj.dim_x``.
+        Defaults to ``chunk_size="auto"`` which will use a conservative
+        size of 1000.
 
 
     """
@@ -262,7 +264,7 @@ def __init__(
         deriv_mode="auto",
         grid=None,
         name="elongation",
-        jac_chunk_size=None,
+        jac_chunk_size="auto",
     ):
         if target is None and bounds is None:
             target = 1
@@ -406,7 +408,7 @@ class Volume(_Objective):
         or ``LinearGrid(M=2*eq.M, N=2*eq.N)`` for ``FourierRZToroidalSurface``.
     name : str, optional
         Name of the objective function.
-    jac_chunk_size : int, optional
+    jac_chunk_size : int or "auto", optional
         Will calculate the Jacobian for this objective ``jac_chunk_size``
         columns at a time, instead of all at once. The memory usage of the
         Jacobian calculation is roughly ``memory usage = m0 + m1*jac_chunk_size``:
@@ -416,8 +418,9 @@ class Volume(_Objective):
         ``jac_chunk_size``, the faster the calculation takes, at the cost of
         requiring more memory. A ``jac_chunk_size`` of 1 corresponds to the least
         memory intensive, but slowest method of calculating the Jacobian.
-        If None, it will default to a conservative default
-        `jac_chunk_size` i.e. ``np.ceil(dim_x/4)``
+        If None, it will use the largest size i.e ``obj.dim_x``.
+        Defaults to ``chunk_size="auto"`` which will use a conservative
+        size of 1000.
 
 
     """
@@ -438,7 +441,7 @@ def __init__(
         deriv_mode="auto",
         grid=None,
         name="volume",
-        jac_chunk_size=None,
+        jac_chunk_size="auto",
     ):
         if target is None and bounds is None:
             target = 1
@@ -631,7 +634,7 @@ class PlasmaVesselDistance(_Objective):
         more accurate approximation of the true min.
     name : str, optional
         Name of the objective function.
-    jac_chunk_size : int, optional
+    jac_chunk_size : int or "auto", optional
         Will calculate the Jacobian for this objective ``jac_chunk_size``
         columns at a time, instead of all at once. The memory usage of the
         Jacobian calculation is roughly ``memory usage = m0 + m1*jac_chunk_size``:
@@ -641,8 +644,9 @@ class PlasmaVesselDistance(_Objective):
         ``jac_chunk_size``, the faster the calculation takes, at the cost of
         requiring more memory. A ``jac_chunk_size`` of 1 corresponds to the least
         memory intensive, but slowest method of calculating the Jacobian.
-        If None, it will default to a conservative default
-        `jac_chunk_size` i.e. ``np.ceil(dim_x/4)``
+        If None, it will use the largest size i.e ``obj.dim_x``.
+        Defaults to ``chunk_size="auto"`` which will use a conservative
+        size of 1000.
 
     """
 
@@ -669,7 +673,7 @@ def __init__(
         softmin_alpha=1.0,
         name="plasma-vessel distance",
         use_signed_distance=False,
-        jac_chunk_size=None,
+        jac_chunk_size="auto",
         **kwargs,
     ):
         if target is None and bounds is None:
@@ -984,7 +988,7 @@ class MeanCurvature(_Objective):
         or ``LinearGrid(M=2*eq.M, N=2*eq.N)`` for ``FourierRZToroidalSurface``.
     name : str, optional
         Name of the objective function.
-    jac_chunk_size : int, optional
+    jac_chunk_size : int or "auto", optional
         Will calculate the Jacobian for this objective ``jac_chunk_size``
         columns at a time, instead of all at once. The memory usage of the
         Jacobian calculation is roughly ``memory usage = m0 + m1*jac_chunk_size``:
@@ -994,8 +998,9 @@ class MeanCurvature(_Objective):
         ``jac_chunk_size``, the faster the calculation takes, at the cost of
         requiring more memory. A ``jac_chunk_size`` of 1 corresponds to the least
         memory intensive, but slowest method of calculating the Jacobian.
-        If None, it will default to a conservative default
-        `jac_chunk_size` i.e. ``np.ceil(dim_x/4)``
+        If None, it will use the largest size i.e ``obj.dim_x``.
+        Defaults to ``chunk_size="auto"`` which will use a conservative
+        size of 1000.
 
 
     """
@@ -1016,7 +1021,7 @@ def __init__(
         deriv_mode="auto",
         grid=None,
         name="mean curvature",
-        jac_chunk_size=None,
+        jac_chunk_size="auto",
     ):
         if target is None and bounds is None:
             bounds = (-np.inf, 0)
@@ -1159,7 +1164,7 @@ class PrincipalCurvature(_Objective):
         or ``LinearGrid(M=2*eq.M, N=2*eq.N)`` for ``FourierRZToroidalSurface``.
     name : str, optional
         Name of the objective function.
-    jac_chunk_size : int, optional
+    jac_chunk_size : int or "auto", optional
         Will calculate the Jacobian for this objective ``jac_chunk_size``
         columns at a time, instead of all at once. The memory usage of the
         Jacobian calculation is roughly ``memory usage = m0 + m1*jac_chunk_size``:
@@ -1169,8 +1174,9 @@ class PrincipalCurvature(_Objective):
         ``jac_chunk_size``, the faster the calculation takes, at the cost of
         requiring more memory. A ``jac_chunk_size`` of 1 corresponds to the least
         memory intensive, but slowest method of calculating the Jacobian.
-        If None, it will default to a conservative default
-        `jac_chunk_size` i.e. ``np.ceil(dim_x/4)``
+        If None, it will use the largest size i.e ``obj.dim_x``.
+        Defaults to ``chunk_size="auto"`` which will use a conservative
+        size of 1000.
 
 
     """
@@ -1191,7 +1197,7 @@ def __init__(
         deriv_mode="auto",
         grid=None,
         name="principal-curvature",
-        jac_chunk_size=None,
+        jac_chunk_size="auto",
     ):
         if target is None and bounds is None:
             target = 1
@@ -1329,7 +1335,7 @@ class BScaleLength(_Objective):
         ``LinearGrid(M=eq.M_grid, N=eq.N_grid)``.
     name : str, optional
         Name of the objective function.
-    jac_chunk_size : int, optional
+    jac_chunk_size : int or "auto", optional
         Will calculate the Jacobian for this objective ``jac_chunk_size``
         columns at a time, instead of all at once. The memory usage of the
         Jacobian calculation is roughly ``memory usage = m0 + m1*jac_chunk_size``:
@@ -1339,8 +1345,9 @@ class BScaleLength(_Objective):
         ``jac_chunk_size``, the faster the calculation takes, at the cost of
         requiring more memory. A ``jac_chunk_size`` of 1 corresponds to the least
         memory intensive, but slowest method of calculating the Jacobian.
-        If None, it will default to a conservative default
-        `jac_chunk_size` i.e. ``np.ceil(dim_x/4)``
+        If None, it will use the largest size i.e ``obj.dim_x``.
+        Defaults to ``chunk_size="auto"`` which will use a conservative
+        size of 1000.
 
 
     """
@@ -1361,7 +1368,7 @@ def __init__(
         deriv_mode="auto",
         grid=None,
         name="B-scale-length",
-        jac_chunk_size=None,
+        jac_chunk_size="auto",
     ):
         if target is None and bounds is None:
             bounds = (1, np.inf)
@@ -1495,7 +1502,7 @@ class GoodCoordinates(_Objective):
         Collocation grid containing the nodes to evaluate at.
     name : str, optional
         Name of the objective function.
-    jac_chunk_size : int, optional
+    jac_chunk_size : int or "auto", optional
         Will calculate the Jacobian for this objective ``jac_chunk_size``
         columns at a time, instead of all at once. The memory usage of the
         Jacobian calculation is roughly ``memory usage = m0 + m1*jac_chunk_size``:
@@ -1505,8 +1512,9 @@ class GoodCoordinates(_Objective):
         ``jac_chunk_size``, the faster the calculation takes, at the cost of
         requiring more memory. A ``jac_chunk_size`` of 1 corresponds to the least
         memory intensive, but slowest method of calculating the Jacobian.
-        If None, it will default to a conservative default
-        `jac_chunk_size` i.e. ``np.ceil(dim_x/4)``
+        If None, it will use the largest size i.e ``obj.dim_x``.
+        Defaults to ``chunk_size="auto"`` which will use a conservative
+        size of 1000.
 
 
     """
@@ -1528,7 +1536,7 @@ def __init__(
         deriv_mode="auto",
         grid=None,
         name="coordinate goodness",
-        jac_chunk_size=None,
+        jac_chunk_size="auto",
     ):
         if target is None and bounds is None:
             target = 0
diff --git a/desc/objectives/_omnigenity.py b/desc/objectives/_omnigenity.py
index 0a90c719b0..648d6aacdc 100644
--- a/desc/objectives/_omnigenity.py
+++ b/desc/objectives/_omnigenity.py
@@ -57,7 +57,7 @@ class QuasisymmetryBoozer(_Objective):
         Toroidal resolution of Boozer transformation. Default = 2 * eq.N.
     name : str, optional
         Name of the objective function.
-    jac_chunk_size : int, optional
+    jac_chunk_size : int or "auto", optional
         Will calculate the Jacobian for this objective ``jac_chunk_size``
         columns at a time, instead of all at once. The memory usage of the
         Jacobian calculation is roughly ``memory usage = m0 + m1*jac_chunk_size``:
@@ -67,8 +67,9 @@ class QuasisymmetryBoozer(_Objective):
         ``jac_chunk_size``, the faster the calculation takes, at the cost of
         requiring more memory. A ``jac_chunk_size`` of 1 corresponds to the least
         memory intensive, but slowest method of calculating the Jacobian.
-        If None, it will default to a conservative default
-        `jac_chunk_size` i.e. ``np.ceil(dim_x/4)``
+        If None, it will use the largest size i.e ``obj.dim_x``.
+        Defaults to ``chunk_size="auto"`` which will use a conservative
+        size of 1000.
 
 
     """
@@ -91,7 +92,7 @@ def __init__(
         M_booz=None,
         N_booz=None,
         name="QS Boozer",
-        jac_chunk_size=None,
+        jac_chunk_size="auto",
     ):
         if target is None and bounds is None:
             target = 0
@@ -284,7 +285,7 @@ class QuasisymmetryTwoTerm(_Objective):
         Type of quasi-symmetry (M, N).
     name : str, optional
         Name of the objective function.
-    jac_chunk_size : int, optional
+    jac_chunk_size : int or "auto", optional
         Will calculate the Jacobian for this objective ``jac_chunk_size``
         columns at a time, instead of all at once. The memory usage of the
         Jacobian calculation is roughly ``memory usage = m0 + m1*jac_chunk_size``:
@@ -294,8 +295,9 @@ class QuasisymmetryTwoTerm(_Objective):
         ``jac_chunk_size``, the faster the calculation takes, at the cost of
         requiring more memory. A ``jac_chunk_size`` of 1 corresponds to the least
         memory intensive, but slowest method of calculating the Jacobian.
-        If None, it will default to a conservative default
-        `jac_chunk_size` i.e. ``np.ceil(dim_x/4)``
+        If None, it will use the largest size i.e ``obj.dim_x``.
+        Defaults to ``chunk_size="auto"`` which will use a conservative
+        size of 1000.
 
 
     """
@@ -317,7 +319,7 @@ def __init__(
         grid=None,
         helicity=(1, 0),
         name="QS two-term",
-        jac_chunk_size=None,
+        jac_chunk_size="auto",
     ):
         if target is None and bounds is None:
             target = 0
@@ -484,7 +486,7 @@ class QuasisymmetryTripleProduct(_Objective):
         Defaults to ``LinearGrid(M=eq.M_grid, N=eq.N_grid)``.
     name : str, optional
         Name of the objective function.
-    jac_chunk_size : int, optional
+    jac_chunk_size : int or "auto", optional
         Will calculate the Jacobian for this objective ``jac_chunk_size``
         columns at a time, instead of all at once. The memory usage of the
         Jacobian calculation is roughly ``memory usage = m0 + m1*jac_chunk_size``:
@@ -494,8 +496,9 @@ class QuasisymmetryTripleProduct(_Objective):
         ``jac_chunk_size``, the faster the calculation takes, at the cost of
         requiring more memory. A ``jac_chunk_size`` of 1 corresponds to the least
         memory intensive, but slowest method of calculating the Jacobian.
-        If None, it will default to a conservative default
-        `jac_chunk_size` i.e. ``np.ceil(dim_x/4)``
+        If None, it will use the largest size i.e ``obj.dim_x``.
+        Defaults to ``chunk_size="auto"`` which will use a conservative
+        size of 1000.
 
 
     """
@@ -516,7 +519,7 @@ def __init__(
         deriv_mode="auto",
         grid=None,
         name="QS triple product",
-        jac_chunk_size=None,
+        jac_chunk_size="auto",
     ):
         if target is None and bounds is None:
             target = 0
@@ -677,7 +680,7 @@ class Omnigenity(_Objective):
         associated data are re-computed at every iteration (Default).
     name : str, optional
         Name of the objective function.
-    jac_chunk_size : int, optional
+    jac_chunk_size : int or "auto", optional
         Will calculate the Jacobian for this objective ``jac_chunk_size``
         columns at a time, instead of all at once. The memory usage of the
         Jacobian calculation is roughly ``memory usage = m0 + m1*jac_chunk_size``:
@@ -687,8 +690,9 @@ class Omnigenity(_Objective):
         ``jac_chunk_size``, the faster the calculation takes, at the cost of
         requiring more memory. A ``jac_chunk_size`` of 1 corresponds to the least
         memory intensive, but slowest method of calculating the Jacobian.
-        If None, it will default to a conservative default
-        `jac_chunk_size` i.e. ``np.ceil(dim_x/4)``
+        If None, it will use the largest size i.e ``obj.dim_x``.
+        Defaults to ``chunk_size="auto"`` which will use a conservative
+        size of 1000.
 
 
     """
@@ -716,7 +720,7 @@ def __init__(
         eq_fixed=False,
         field_fixed=False,
         name="omnigenity",
-        jac_chunk_size=None,
+        jac_chunk_size="auto",
     ):
         if target is None and bounds is None:
             target = 0
@@ -1027,7 +1031,7 @@ class Isodynamicity(_Objective):
         Defaults to ``LinearGrid(M=eq.M_grid, N=eq.N_grid)``.
     name : str, optional
         Name of the objective function.
-    jac_chunk_size : int, optional
+    jac_chunk_size : int or "auto", optional
         Will calculate the Jacobian for this objective ``jac_chunk_size``
         columns at a time, instead of all at once. The memory usage of the
         Jacobian calculation is roughly ``memory usage = m0 + m1*jac_chunk_size``:
@@ -1037,8 +1041,9 @@ class Isodynamicity(_Objective):
         ``jac_chunk_size``, the faster the calculation takes, at the cost of
         requiring more memory. A ``jac_chunk_size`` of 1 corresponds to the least
         memory intensive, but slowest method of calculating the Jacobian.
-        If None, it will default to a conservative default
-        `jac_chunk_size` i.e. ``np.ceil(dim_x/4)``
+        If None, it will use the largest size i.e ``obj.dim_x``.
+        Defaults to ``chunk_size="auto"`` which will use a conservative
+        size of 1000.
 
 
     """
@@ -1059,7 +1064,7 @@ def __init__(
         deriv_mode="auto",
         grid=None,
         name="Isodynamicity",
-        jac_chunk_size=None,
+        jac_chunk_size="auto",
     ):
         if target is None and bounds is None:
             target = 0
diff --git a/desc/objectives/_power_balance.py b/desc/objectives/_power_balance.py
index 459b0c4912..16a4f67e82 100644
--- a/desc/objectives/_power_balance.py
+++ b/desc/objectives/_power_balance.py
@@ -56,7 +56,7 @@ class FusionPower(_Objective):
         Defaults to ``QuadratureGrid(eq.L_grid, eq.M_grid, eq.N_grid, eq.NFP)``.
     name : str, optional
         Name of the objective function.
-    jac_chunk_size : int, optional
+    jac_chunk_size : int or "auto", optional
         Will calculate the Jacobian for this objective ``jac_chunk_size``
         columns at a time, instead of all at once. The memory usage of the
         Jacobian calculation is roughly ``memory usage = m0 + m1*jac_chunk_size``:
@@ -66,8 +66,9 @@ class FusionPower(_Objective):
         ``jac_chunk_size``, the faster the calculation takes, at the cost of
         requiring more memory. A ``jac_chunk_size`` of 1 corresponds to the least
         memory intensive, but slowest method of calculating the Jacobian.
-        If None, it will default to a conservative default
-        `jac_chunk_size` i.e. ``np.ceil(dim_x/4)``
+        If None, it will use the largest size i.e ``obj.dim_x``.
+        Defaults to ``chunk_size="auto"`` which will use a conservative
+        size of 1000.
 
     """
 
@@ -88,7 +89,7 @@ def __init__(
         fuel="DT",
         grid=None,
         name="fusion power",
-        jac_chunk_size=None,
+        jac_chunk_size="auto",
     ):
         errorif(
             fuel not in ["DT"], ValueError, f"fuel must be one of ['DT'], got {fuel}."
@@ -255,7 +256,7 @@ class HeatingPowerISS04(_Objective):
         Defaults to ``QuadratureGrid(eq.L_grid, eq.M_grid, eq.N_grid, eq.NFP)``.
     name : str, optional
         Name of the objective function.
-    jac_chunk_size : int, optional
+    jac_chunk_size : int or "auto", optional
         Will calculate the Jacobian for this objective ``jac_chunk_size``
         columns at a time, instead of all at once. The memory usage of the
         Jacobian calculation is roughly ``memory usage = m0 + m1*jac_chunk_size``:
@@ -265,8 +266,9 @@ class HeatingPowerISS04(_Objective):
         ``jac_chunk_size``, the faster the calculation takes, at the cost of
         requiring more memory. A ``jac_chunk_size`` of 1 corresponds to the least
         memory intensive, but slowest method of calculating the Jacobian.
-        If None, it will default to a conservative default
-        `jac_chunk_size` i.e. ``np.ceil(dim_x/4)``
+        If None, it will use the largest size i.e ``obj.dim_x``.
+        Defaults to ``chunk_size="auto"`` which will use a conservative
+        size of 1000.
 
     """
 
@@ -288,7 +290,7 @@ def __init__(
         gamma=0,
         grid=None,
         name="heating power",
-        jac_chunk_size=None,
+        jac_chunk_size="auto",
     ):
         if target is None and bounds is None:
             target = 0
diff --git a/desc/objectives/_profiles.py b/desc/objectives/_profiles.py
index 3100b61f02..5756a7d14f 100644
--- a/desc/objectives/_profiles.py
+++ b/desc/objectives/_profiles.py
@@ -53,7 +53,7 @@ class Pressure(_Objective):
         Defaults to ``LinearGrid(L=eq.L_grid)``.
     name : str, optional
         Name of the objective function.
-    jac_chunk_size : int, optional
+    jac_chunk_size : int or "auto", optional
         Will calculate the Jacobian for this objective ``jac_chunk_size``
         columns at a time, instead of all at once. The memory usage of the
         Jacobian calculation is roughly ``memory usage = m0 + m1*jac_chunk_size``:
@@ -63,8 +63,9 @@ class Pressure(_Objective):
         ``jac_chunk_size``, the faster the calculation takes, at the cost of
         requiring more memory. A ``jac_chunk_size`` of 1 corresponds to the least
         memory intensive, but slowest method of calculating the Jacobian.
-        If None, it will default to a conservative default
-        `jac_chunk_size` i.e. ``np.ceil(dim_x/4)``
+        If None, it will use the largest size i.e ``obj.dim_x``.
+        Defaults to ``chunk_size="auto"`` which will use a conservative
+        size of 1000.
 
 
     """
@@ -85,7 +86,7 @@ def __init__(
         deriv_mode="auto",
         grid=None,
         name="pressure",
-        jac_chunk_size=None,
+        jac_chunk_size="auto",
     ):
         if target is None and bounds is None:
             target = 0
@@ -226,7 +227,7 @@ class RotationalTransform(_Objective):
         are required.
     name : str, optional
         Name of the objective function.
-    jac_chunk_size : int, optional
+    jac_chunk_size : int or "auto", optional
         Will calculate the Jacobian for this objective ``jac_chunk_size``
         columns at a time, instead of all at once. The memory usage of the
         Jacobian calculation is roughly ``memory usage = m0 + m1*jac_chunk_size``:
@@ -236,8 +237,9 @@ class RotationalTransform(_Objective):
         ``jac_chunk_size``, the faster the calculation takes, at the cost of
         requiring more memory. A ``jac_chunk_size`` of 1 corresponds to the least
         memory intensive, but slowest method of calculating the Jacobian.
-        If None, it will default to a conservative default
-        `jac_chunk_size` i.e. ``np.ceil(dim_x/4)``
+        If None, it will use the largest size i.e ``obj.dim_x``.
+        Defaults to ``chunk_size="auto"`` which will use a conservative
+        size of 1000.
 
 
     """
@@ -258,7 +260,7 @@ def __init__(
         deriv_mode="auto",
         grid=None,
         name="rotational transform",
-        jac_chunk_size=None,
+        jac_chunk_size="auto",
     ):
         if target is None and bounds is None:
             target = 0
@@ -412,7 +414,7 @@ class Shear(_Objective):
         are required.
     name : str, optional
         Name of the objective function.
-    jac_chunk_size : int, optional
+    jac_chunk_size : int or "auto", optional
         Will calculate the Jacobian for this objective ``jac_chunk_size``
         columns at a time, instead of all at once. The memory usage of the
         Jacobian calculation is roughly ``memory usage = m0 + m1*jac_chunk_size``:
@@ -422,8 +424,9 @@ class Shear(_Objective):
         ``jac_chunk_size``, the faster the calculation takes, at the cost of
         requiring more memory. A ``jac_chunk_size`` of 1 corresponds to the least
         memory intensive, but slowest method of calculating the Jacobian.
-        If None, it will default to a conservative default
-        `jac_chunk_size` i.e. ``np.ceil(dim_x/4)``
+        If None, it will use the largest size i.e ``obj.dim_x``.
+        Defaults to ``chunk_size="auto"`` which will use a conservative
+        size of 1000.
 
 
     """
@@ -444,7 +447,7 @@ def __init__(
         deriv_mode="auto",
         grid=None,
         name="shear",
-        jac_chunk_size=None,
+        jac_chunk_size="auto",
     ):
         if target is None and bounds is None:
             bounds = (-np.inf, 0)
@@ -594,7 +597,7 @@ class ToroidalCurrent(_Objective):
         are required.
     name : str, optional
         Name of the objective function.
-    jac_chunk_size : int, optional
+    jac_chunk_size : int or "auto", optional
         Will calculate the Jacobian for this objective ``jac_chunk_size``
         columns at a time, instead of all at once. The memory usage of the
         Jacobian calculation is roughly ``memory usage = m0 + m1*jac_chunk_size``:
@@ -604,8 +607,9 @@ class ToroidalCurrent(_Objective):
         ``jac_chunk_size``, the faster the calculation takes, at the cost of
         requiring more memory. A ``jac_chunk_size`` of 1 corresponds to the least
         memory intensive, but slowest method of calculating the Jacobian.
-        If None, it will default to a conservative default
-        `jac_chunk_size` i.e. ``np.ceil(dim_x/4)``
+        If None, it will use the largest size i.e ``obj.dim_x``.
+        Defaults to ``chunk_size="auto"`` which will use a conservative
+        size of 1000.
 
 
     """
@@ -626,7 +630,7 @@ def __init__(
         deriv_mode="auto",
         grid=None,
         name="toroidal current",
-        jac_chunk_size=None,
+        jac_chunk_size="auto",
     ):
         if target is None and bounds is None:
             target = 0
diff --git a/desc/objectives/_stability.py b/desc/objectives/_stability.py
index 02b8bce259..7f7c986d68 100644
--- a/desc/objectives/_stability.py
+++ b/desc/objectives/_stability.py
@@ -63,7 +63,7 @@ class MercierStability(_Objective):
         are required.
     name : str, optional
         Name of the objective function.
-    jac_chunk_size : int, optional
+    jac_chunk_size : int or "auto", optional
         Will calculate the Jacobian for this objective ``jac_chunk_size``
         columns at a time, instead of all at once. The memory usage of the
         Jacobian calculation is roughly ``memory usage = m0 + m1*jac_chunk_size``:
@@ -73,8 +73,9 @@ class MercierStability(_Objective):
         ``jac_chunk_size``, the faster the calculation takes, at the cost of
         requiring more memory. A ``jac_chunk_size`` of 1 corresponds to the least
         memory intensive, but slowest method of calculating the Jacobian.
-        If None, it will default to a conservative default
-        `jac_chunk_size` i.e. ``np.ceil(dim_x/4)``
+        If None, it will use the largest size i.e ``obj.dim_x``.
+        Defaults to ``chunk_size="auto"`` which will use a conservative
+        size of 1000.
 
 
     """
@@ -95,7 +96,7 @@ def __init__(
         deriv_mode="auto",
         grid=None,
         name="Mercier Stability",
-        jac_chunk_size=None,
+        jac_chunk_size="auto",
     ):
         if target is None and bounds is None:
             bounds = (0, np.inf)
@@ -259,7 +260,7 @@ class MagneticWell(_Objective):
         are required.
     name : str, optional
         Name of the objective function.
-    jac_chunk_size : int, optional
+    jac_chunk_size : int or "auto", optional
         Will calculate the Jacobian for this objective ``jac_chunk_size``
         columns at a time, instead of all at once. The memory usage of the
         Jacobian calculation is roughly ``memory usage = m0 + m1*jac_chunk_size``:
@@ -269,8 +270,9 @@ class MagneticWell(_Objective):
         ``jac_chunk_size``, the faster the calculation takes, at the cost of
         requiring more memory. A ``jac_chunk_size`` of 1 corresponds to the least
         memory intensive, but slowest method of calculating the Jacobian.
-        If None, it will default to a conservative default
-        `jac_chunk_size` i.e. ``np.ceil(dim_x/4)``
+        If None, it will use the largest size i.e ``obj.dim_x``.
+        Defaults to ``chunk_size="auto"`` which will use a conservative
+        size of 1000.
 
 
     """
@@ -291,7 +293,7 @@ def __init__(
         deriv_mode="auto",
         grid=None,
         name="Magnetic Well",
-        jac_chunk_size=None,
+        jac_chunk_size="auto",
     ):
         if target is None and bounds is None:
             bounds = (0, np.inf)
diff --git a/desc/objectives/getters.py b/desc/objectives/getters.py
index 3fe98abaf1..9ab90a9c79 100644
--- a/desc/objectives/getters.py
+++ b/desc/objectives/getters.py
@@ -44,7 +44,7 @@
 }
 
 
-def get_equilibrium_objective(eq, mode="force", normalize=True, jac_chunk_size=None):
+def get_equilibrium_objective(eq, mode="force", normalize=True, jac_chunk_size="auto"):
     """Get the objective function for a typical force balance equilibrium problem.
 
     Parameters
@@ -57,7 +57,7 @@ def get_equilibrium_objective(eq, mode="force", normalize=True, jac_chunk_size=N
         for minimizing MHD energy.
     normalize : bool
         Whether to normalize units of objective.
-    jac_chunk_size : int, optional
+    jac_chunk_size : int or "auto", optional
 
 
     Returns
diff --git a/desc/objectives/objective_funs.py b/desc/objectives/objective_funs.py
index 591f3155af..11ed234298 100644
--- a/desc/objectives/objective_funs.py
+++ b/desc/objectives/objective_funs.py
@@ -15,6 +15,7 @@
     errorif,
     flatten_list,
     is_broadcastable,
+    isposint,
     setdefault,
     unique_list,
     warnif,
@@ -43,16 +44,18 @@ class ObjectiveFunction(IOAble):
         otherwise "blocked".
     name : str
         Name of the objective function.
-    jac_chunk_size : int, optional
+    jac_chunk_size : int or "auto", optional
         If `"batched"` deriv_mode is used, will calculate the Jacobian
         ``jac_chunk_size`` columns at a time, instead of all at once.
-       The memory usage of the Jacobian calculation is roughly
+        The memory usage of the Jacobian calculation is roughly
         ``memory usage = m0 + m1*jac_chunk_size``: the smaller the chunk size,
         the less memory the Jacobian calculation will require (with some baseline
         memory usage). The time it takes to compute the Jacobian is roughly
         ``t= t0 + t1/jac_chunk_size` so the larger the ``jac_chunk_size``, the faster
         the calculation takes, at the cost of requiring more memory.
-        If None, it will default to ``np.ceil(dim_x/4)``
+        If None, it will use the largest size i.e ``obj.dim_x``.
+        Defaults to ``chunk_size="auto"`` which will use a conservative
+        size of 1000.
 
     """
 
@@ -64,7 +67,7 @@ def __init__(
         use_jit=True,
         deriv_mode="auto",
         name="ObjectiveFunction",
-        jac_chunk_size=None,
+        jac_chunk_size="auto",
     ):
         if not isinstance(objectives, (tuple, list)):
             objectives = (objectives,)
@@ -83,6 +86,7 @@ def __init__(
             deriv_mode = "batched"
             jac_chunk_size = 1
         assert deriv_mode in {"auto", "batched", "blocked"}
+        assert jac_chunk_size in ["auto", None] or isposint(jac_chunk_size)
 
         self._jac_chunk_size = jac_chunk_size
         self._objectives = objectives
@@ -158,9 +162,11 @@ def build(self, use_jit=None, verbose=1):
             self._scalar = False
 
         self._set_derivatives()
-        sub_obj_jac_chunk_sizes = [obj._jac_chunk_size for obj in self.objectives]
+        sub_obj_jac_chunk_sizes_are_ints = [
+            isposint(obj._jac_chunk_size) for obj in self.objectives
+        ]
         warnif(
-            np.any(sub_obj_jac_chunk_sizes) and self._deriv_mode != "blocked",
+            any(sub_obj_jac_chunk_sizes_are_ints) and self._deriv_mode != "blocked",
             UserWarning,
             "'jac_chunk_size' was passed into one or more sub-objectives, but the"
             " ObjectiveFunction is  using 'batched' deriv_mode, so sub-objective "
@@ -170,7 +176,8 @@ def build(self, use_jit=None, verbose=1):
             "different 'jac_chunk_size' for its Jacobian computation.",
         )
         warnif(
-            self._jac_chunk_size is not None and self._deriv_mode == "blocked",
+            self._jac_chunk_size not in ["auto", None]
+            and self._deriv_mode == "blocked",
             UserWarning,
             "'jac_chunk_size' was passed into ObjectiveFunction, but the "
             "ObjectiveFunction is using 'blocked' deriv_mode, so sub-objective "
@@ -182,19 +189,17 @@ def build(self, use_jit=None, verbose=1):
             self._unjit()
 
         self._set_things()
-        if self._jac_chunk_size is None and self._deriv_mode == "batched":
-            # set jac_chunk_size to 1/4 of number columns of Jacobian
+        if self._jac_chunk_size == "auto" and self._deriv_mode == "batched":
+            # set jac_chunk_size to 1000 columns of Jacobian
             # as the default for batched deriv_mode
-            self._jac_chunk_size = int(np.ceil(self.dim_x / 4))
+            self._jac_chunk_size = 1000
         if self._deriv_mode == "blocked":
             # set jac_chunk_size for each sub-objective
-            # to 1/4 of number columns of Jacobian
+            # to 1000 columns of Jacobian
             # as the default for batched deriv_mode
             for obj in self.objectives:
                 obj._jac_chunk_size = (
-                    int(np.ceil(sum(t.dim_x for t in obj.things) / 4))
-                    if obj._jac_chunk_size is None
-                    else obj._jac_chunk_size
+                    1000 if obj._jac_chunk_size == "auto" else obj._jac_chunk_size
                 )
 
         self._built = True
@@ -881,6 +886,18 @@ class _Objective(IOAble, ABC):
         reverse mode and forward over reverse mode respectively.
     name : str, optional
         Name of the objective.
+    jac_chunk_size : int or "auto", optional
+        Will calculate the Jacobian
+        ``jac_chunk_size`` columns at a time, instead of all at once.
+        The memory usage of the Jacobian calculation is roughly
+        ``memory usage = m0 + m1*jac_chunk_size``: the smaller the chunk size,
+        the less memory the Jacobian calculation will require (with some baseline
+        memory usage). The time it takes to compute the Jacobian is roughly
+        ``t= t0 + t1/jac_chunk_size` so the larger the ``jac_chunk_size``, the faster
+        the calculation takes, at the cost of requiring more memory.
+        If None, it will use the largest size i.e ``obj.dim_x``.
+        Defaults to ``chunk_size="auto"`` which will use a conservative
+        size of 1000.
 
     """
 
@@ -911,7 +928,7 @@ def __init__(
         loss_function=None,
         deriv_mode="auto",
         name=None,
-        jac_chunk_size=None,
+        jac_chunk_size="auto",
     ):
         if self._scalar:
             assert self._coordinates == ""
@@ -922,6 +939,8 @@ def __init__(
         assert (bounds is None) or (target is None), "Cannot use both bounds and target"
         assert loss_function in [None, "mean", "min", "max"]
         assert deriv_mode in {"auto", "fwd", "rev"}
+        assert jac_chunk_size in ["auto", None] or isposint(jac_chunk_size)
+
         self._jac_chunk_size = jac_chunk_size
 
         self._target = target
diff --git a/docs/adding_objectives.rst b/docs/adding_objectives.rst
index 2b2cfd196c..ec5c649d3a 100644
--- a/docs/adding_objectives.rst
+++ b/docs/adding_objectives.rst
@@ -70,7 +70,7 @@ A full example objective with comments describing the key points is given below:
             Collocation grid containing the nodes to evaluate at.
         name : str, optional
             Name of the objective function.
-        jac_chunk_size : int, optional
+        jac_chunk_size : int or "auto", optional
             Will calculate the Jacobian for this objective ``jac_chunk_size``
             columns at a time, instead of all at once. The memory usage of the
             Jacobian calculation is roughly ``memory usage = m0 + m1*jac_chunk_size``:
@@ -100,7 +100,7 @@ A full example objective with comments describing the key points is given below:
             normalize_target=True,
             grid=None,
             name="QS triple product",
-            jac_chunk_size=None,
+            jac_chunk_size="auto",
         ):
             # we don't have to do much here, mostly just call ``super().__init__()``
             if target is None and bounds is None:
diff --git a/docs/memory_usage.rst b/docs/memory_usage.rst
index f2fdc7adad..390416a645 100644
--- a/docs/memory_usage.rst
+++ b/docs/memory_usage.rst
@@ -15,7 +15,7 @@ in a slightly slower, but more memory-efficient manner. The memory usage of the
 will require (with some baseline memory usage). The time to compute the Jacobian is roughly ``t=t0 +t1/jac_chunk_size``
 with some baseline time, so the larger the ``jac_chunk_size``, the faster the calculation takes,
 at the cost of requiring more memory. A ``jac_chunk_size`` of 1 corresponds to the least memory intensive,
-but slowest method of calculating the Jacobian. If ``jac_chunk_size=None``, it will default to ``obj.dim_x/4``.
+but slowest method of calculating the Jacobian. If ``jac_chunk_size="auto"``, it will default to ``obj.dim_x/4``.
 
 If ``deriv_mode="blocked"`` is specified when the ``ObjectiveFunction`` is created, then the Jacobian will
 be calculated individually for each of the sub-objectives inside of the ``ObjectiveFunction``, and in that case
diff --git a/tests/test_objective_funs.py b/tests/test_objective_funs.py
index 53ac2b63b9..986ed5bfbf 100644
--- a/tests/test_objective_funs.py
+++ b/tests/test_objective_funs.py
@@ -1286,6 +1286,7 @@ def test_derivative_modes():
         [
             PlasmaVesselDistance(eq, surf, jac_chunk_size=1),
             MagneticWell(eq),
+            AspectRatio(eq),
         ],
         deriv_mode="batched",
         use_jit=False,
@@ -1294,6 +1295,7 @@ def test_derivative_modes():
         [
             PlasmaVesselDistance(eq, surf, jac_chunk_size=2),
             MagneticWell(eq),
+            AspectRatio(eq, jac_chunk_size=None),
         ],
         deriv_mode="blocked",
         jac_chunk_size=10,
@@ -1304,6 +1306,7 @@ def test_derivative_modes():
             [
                 PlasmaVesselDistance(eq, surf),
                 MagneticWell(eq),
+                AspectRatio(eq),
             ],
             deriv_mode="looped",
             use_jit=False,
@@ -1313,9 +1316,9 @@ def test_derivative_modes():
     with pytest.warns(UserWarning, match="jac_chunk_size"):
         obj2.build()
     # check that default size works for blocked
-    assert obj2.objectives[1]._jac_chunk_size == np.ceil(
-        sum(t.dim_x for t in obj2.objectives[1].things) / 4
-    )
+    assert obj2.objectives[1]._jac_chunk_size == 1000
+    assert obj2.objectives[2]._jac_chunk_size is None
+    assert obj1._jac_chunk_size == 1000
     obj3.build()
     x = obj1.x(eq, surf)
     g1 = obj1.grad(x)

From 2df20b5ecbbed89e568e7f80e66521b18b23f234 Mon Sep 17 00:00:00 2001
From: Dario Panici <dpanici@princeton.edu>
Date: Sat, 14 Sep 2024 11:32:59 +0900
Subject: [PATCH 33/46] fix test

---
 desc/objectives/objective_funs.py | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/desc/objectives/objective_funs.py b/desc/objectives/objective_funs.py
index 11ed234298..d44e3191d9 100644
--- a/desc/objectives/objective_funs.py
+++ b/desc/objectives/objective_funs.py
@@ -189,7 +189,7 @@ def build(self, use_jit=None, verbose=1):
             self._unjit()
 
         self._set_things()
-        if self._jac_chunk_size == "auto" and self._deriv_mode == "batched":
+        if self._jac_chunk_size == "auto":
             # set jac_chunk_size to 1000 columns of Jacobian
             # as the default for batched deriv_mode
             self._jac_chunk_size = 1000
@@ -201,6 +201,8 @@ def build(self, use_jit=None, verbose=1):
                 obj._jac_chunk_size = (
                     1000 if obj._jac_chunk_size == "auto" else obj._jac_chunk_size
                 )
+                print(obj._jac_chunk_size)
+        print(self._jac_chunk_size)
 
         self._built = True
         timer.stop("Objective build")

From e433704901f612df6a591cf873ff8dc8bef9d2e2 Mon Sep 17 00:00:00 2001
From: Dario Panici <dpanici@princeton.edu>
Date: Sat, 14 Sep 2024 17:53:43 +0900
Subject: [PATCH 34/46] remove errant prints

---
 desc/objectives/objective_funs.py | 2 --
 1 file changed, 2 deletions(-)

diff --git a/desc/objectives/objective_funs.py b/desc/objectives/objective_funs.py
index d44e3191d9..befa6bf463 100644
--- a/desc/objectives/objective_funs.py
+++ b/desc/objectives/objective_funs.py
@@ -201,8 +201,6 @@ def build(self, use_jit=None, verbose=1):
                 obj._jac_chunk_size = (
                     1000 if obj._jac_chunk_size == "auto" else obj._jac_chunk_size
                 )
-                print(obj._jac_chunk_size)
-        print(self._jac_chunk_size)
 
         self._built = True
         timer.stop("Objective build")

From 4137d369a93887eccf10defc8be1852ad1b330f7 Mon Sep 17 00:00:00 2001
From: "Dario G. Panici" <dpanici@princeton.edu>
Date: Sat, 21 Sep 2024 17:02:16 -0400
Subject: [PATCH 35/46] change auto to base chunk size off of device memiry and
 jacobian size

---
 desc/objectives/objective_funs.py | 28 +++++++++++++++++++++-------
 tests/test_objective_funs.py      |  1 -
 2 files changed, 21 insertions(+), 8 deletions(-)

diff --git a/desc/objectives/objective_funs.py b/desc/objectives/objective_funs.py
index befa6bf463..574da48f62 100644
--- a/desc/objectives/objective_funs.py
+++ b/desc/objectives/objective_funs.py
@@ -5,7 +5,15 @@
 
 import numpy as np
 
-from desc.backend import execute_on_cpu, jit, jnp, tree_flatten, tree_unflatten, use_jax
+from desc.backend import (
+    desc_config,
+    execute_on_cpu,
+    jit,
+    jnp,
+    tree_flatten,
+    tree_unflatten,
+    use_jax,
+)
 from desc.derivatives import Derivative
 from desc.io import IOAble
 from desc.optimizable import Optimizable
@@ -55,7 +63,7 @@ class ObjectiveFunction(IOAble):
         the calculation takes, at the cost of requiring more memory.
         If None, it will use the largest size i.e ``obj.dim_x``.
         Defaults to ``chunk_size="auto"`` which will use a conservative
-        size of 1000.
+        chunk size based off of a heuristic estimate of the memory usage.
 
     """
 
@@ -189,10 +197,18 @@ def build(self, use_jit=None, verbose=1):
             self._unjit()
 
         self._set_things()
+        self._built = True
+
         if self._jac_chunk_size == "auto":
-            # set jac_chunk_size to 1000 columns of Jacobian
-            # as the default for batched deriv_mode
-            self._jac_chunk_size = 1000
+            # Heuristic estimates of fwd mode Jacobian memory usage,
+            # slightly conservative, based on using ForceBalance as the objective
+            estimated_memory_usage = 2.4e-7 * self.dim_f * self.dim_x + 1  # in GB
+            max_chunk_size = round(
+                (desc_config.get("avail_mem") / estimated_memory_usage - 0.22)
+                / 0.85
+                * self.dim_x
+            )
+            self._jac_chunk_size = max([1, max_chunk_size])
         if self._deriv_mode == "blocked":
             # set jac_chunk_size for each sub-objective
             # to 1000 columns of Jacobian
@@ -201,8 +217,6 @@ def build(self, use_jit=None, verbose=1):
                 obj._jac_chunk_size = (
                     1000 if obj._jac_chunk_size == "auto" else obj._jac_chunk_size
                 )
-
-        self._built = True
         timer.stop("Objective build")
         if verbose > 1:
             timer.disp("Objective build")
diff --git a/tests/test_objective_funs.py b/tests/test_objective_funs.py
index 986ed5bfbf..29c5ae8b2f 100644
--- a/tests/test_objective_funs.py
+++ b/tests/test_objective_funs.py
@@ -1318,7 +1318,6 @@ def test_derivative_modes():
     # check that default size works for blocked
     assert obj2.objectives[1]._jac_chunk_size == 1000
     assert obj2.objectives[2]._jac_chunk_size is None
-    assert obj1._jac_chunk_size == 1000
     obj3.build()
     x = obj1.x(eq, surf)
     g1 = obj1.grad(x)

From bd345344cce05312a4b36a16455847ead55d0649 Mon Sep 17 00:00:00 2001
From: "Dario G. Panici" <dpanici@princeton.edu>
Date: Sat, 21 Sep 2024 17:13:11 -0400
Subject: [PATCH 36/46] update test

---
 tests/test_objective_funs.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/tests/test_objective_funs.py b/tests/test_objective_funs.py
index 29c5ae8b2f..f985290f40 100644
--- a/tests/test_objective_funs.py
+++ b/tests/test_objective_funs.py
@@ -1318,6 +1318,8 @@ def test_derivative_modes():
     # check that default size works for blocked
     assert obj2.objectives[1]._jac_chunk_size == 1000
     assert obj2.objectives[2]._jac_chunk_size is None
+    # hard to say what size auto will give, just check it is >0
+    assert obj1._jac_chunk_size > 0
     obj3.build()
     x = obj1.x(eq, surf)
     g1 = obj1.grad(x)

From 8b2645dc95277ed69bbd0900bcbade9da16cfb01 Mon Sep 17 00:00:00 2001
From: Dario Panici <dpanici@princeton.edu>
Date: Sun, 22 Sep 2024 19:37:39 -0400
Subject: [PATCH 37/46] update changelog

---
 CHANGELOG.md | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 2c165fa562..a82e504bb4 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -12,6 +12,12 @@ New Features
 - Changes ``ToroidalFlux`` objective to default using a 1D loop integral of the vector potential
 to compute the toroidal flux when possible, as opposed to a 2D surface integral of the magnetic field dotted with ``n_zeta``.
 - Allow specification of Nyquist spectrum maximum modenumbers when using ``VMECIO.save`` to save a DESC .h5 file as a VMEC-format wout file
+- Add ``jac_chunk_size`` to ``ObjectiveFunction`` and ``_Objective`` to control the above chunk size for the ``fwd`` mode Jacobian calculation
+  - if ``None``, the chunk size is equal to ``dim_x``, so no chunking is done
+  - if an ``int``, this is the chunk size to be used.
+  - if `"`auto`"` for the ``ObjectiveFunction``, will use a heuristic for the minimum ``jac_chunk_size`` needed to fit the jacobian calculation on the available device memory, according to the formula: ``min_jac_chunk_size = (desc_config.get("avail_mem") / estimated_memory_usage - 0.22)  / 0.85  * self.dim_x``
+  - if ``"auto"`` for an ``_Objective``, will use a conservative size of ``1000``
+- the ``ObjectiveFunction`` ``jac_chunk_size`` is used if ``deriv_mode="batched"``, and the ``_Objective`` ``jac_chunk_size`` will be used if ``deriv_mode="blocked"``
 
 Bug Fixes
 

From 3e8e36457a7582ec94f6805f6262abb2f9a670f1 Mon Sep 17 00:00:00 2001
From: Dario Panici <dpanici@princeton.edu>
Date: Tue, 24 Sep 2024 12:02:31 -0400
Subject: [PATCH 38/46] remove auto option for _Objective chunk size

---
 desc/objectives/_bootstrap.py     |  6 +--
 desc/objectives/_coils.py         | 62 +++++++++----------------------
 desc/objectives/_equilibrium.py   | 41 ++++++--------------
 desc/objectives/_free_boundary.py | 21 +++--------
 desc/objectives/_generic.py       | 16 +++-----
 desc/objectives/_geometry.py      | 55 ++++++++-------------------
 desc/objectives/_omnigenity.py    | 35 +++++------------
 desc/objectives/_power_balance.py | 12 ++----
 desc/objectives/_profiles.py      | 28 ++++----------
 desc/objectives/_stability.py     | 14 ++-----
 desc/objectives/getters.py        | 11 ++++++
 desc/objectives/objective_funs.py | 16 ++------
 desc/utils_batched_vectorize.py   | 47 -----------------------
 docs/adding_objectives.rst        |  5 +--
 docs/memory_usage.rst             |  5 ++-
 tests/test_objective_funs.py      |  2 +-
 16 files changed, 106 insertions(+), 270 deletions(-)

diff --git a/desc/objectives/_bootstrap.py b/desc/objectives/_bootstrap.py
index 7c73b10227..c76a003b28 100644
--- a/desc/objectives/_bootstrap.py
+++ b/desc/objectives/_bootstrap.py
@@ -66,7 +66,7 @@ class BootstrapRedlConsistency(_Objective):
         or quasi-axisymmetry; set to +/-NFP for quasi-helical symmetry.
     name : str, optional
         Name of the objective function.
-    jac_chunk_size : int or "auto", optional
+    jac_chunk_size : int, optional
         Will calculate the Jacobian for this objective ``jac_chunk_size``
         columns at a time, instead of all at once. The memory usage of the
         Jacobian calculation is roughly ``memory usage = m0 + m1*jac_chunk_size``:
@@ -77,8 +77,6 @@ class BootstrapRedlConsistency(_Objective):
         requiring more memory. A ``jac_chunk_size`` of 1 corresponds to the least
         memory intensive, but slowest method of calculating the Jacobian.
         If None, it will use the largest size i.e ``obj.dim_x``.
-        Defaults to ``chunk_size="auto"`` which will use a conservative
-        size of 1000.
 
     """
 
@@ -99,7 +97,7 @@ def __init__(
         grid=None,
         helicity=(1, 0),
         name="Bootstrap current self-consistency (Redl)",
-        jac_chunk_size="auto",
+        jac_chunk_size=None,
     ):
         if target is None and bounds is None:
             target = 0
diff --git a/desc/objectives/_coils.py b/desc/objectives/_coils.py
index 0fdeeefc9a..40c53bd851 100644
--- a/desc/objectives/_coils.py
+++ b/desc/objectives/_coils.py
@@ -59,7 +59,7 @@ class _CoilObjective(_Objective):
         If a list, must have the same structure as coil.
     name : str, optional
         Name of the objective function.
-    jac_chunk_size : int or "auto", optional
+    jac_chunk_size : int , optional
         Will calculate the Jacobian for this objective ``jac_chunk_size``
         columns at a time, instead of all at once. The memory usage of the
         Jacobian calculation is roughly ``memory usage = m0 + m1*jac_chunk_size``:
@@ -70,9 +70,6 @@ class _CoilObjective(_Objective):
         requiring more memory. A ``jac_chunk_size`` of 1 corresponds to the least
         memory intensive, but slowest method of calculating the Jacobian.
         If None, it will use the largest size i.e ``obj.dim_x``.
-        Defaults to ``chunk_size="auto"`` which will use a conservative
-        size of 1000.
-
 
     """
 
@@ -89,7 +86,7 @@ def __init__(
         deriv_mode="auto",
         grid=None,
         name=None,
-        jac_chunk_size="auto",
+        jac_chunk_size=None,
     ):
         self._grid = grid
         self._data_keys = data_keys
@@ -270,7 +267,7 @@ class CoilLength(_CoilObjective):
         Defaults to ``LinearGrid(N=2 * coil.N + 5)``
     name : str, optional
         Name of the objective function.
-    jac_chunk_size : int or "auto", optional
+    jac_chunk_size : int , optional
         Will calculate the Jacobian for this objective ``jac_chunk_size``
         columns at a time, instead of all at once. The memory usage of the
         Jacobian calculation is roughly ``memory usage = m0 + m1*jac_chunk_size``:
@@ -281,9 +278,6 @@ class CoilLength(_CoilObjective):
         requiring more memory. A ``jac_chunk_size`` of 1 corresponds to the least
         memory intensive, but slowest method of calculating the Jacobian.
         If None, it will use the largest size i.e ``obj.dim_x``.
-        Defaults to ``chunk_size="auto"`` which will use a conservative
-        size of 1000.
-
 
     """
 
@@ -303,7 +297,7 @@ def __init__(
         deriv_mode="auto",
         grid=None,
         name="coil length",
-        jac_chunk_size="auto",
+        jac_chunk_size=None,
     ):
         if target is None and bounds is None:
             target = 2 * np.pi
@@ -411,7 +405,7 @@ class CoilCurvature(_CoilObjective):
         Defaults to ``LinearGrid(N=2 * coil.N + 5)``
     name : str, optional
         Name of the objective function.
-    jac_chunk_size : int or "auto", optional
+    jac_chunk_size : int , optional
         Will calculate the Jacobian for this objective ``jac_chunk_size``
         columns at a time, instead of all at once. The memory usage of the
         Jacobian calculation is roughly ``memory usage = m0 + m1*jac_chunk_size``:
@@ -422,9 +416,6 @@ class CoilCurvature(_CoilObjective):
         requiring more memory. A ``jac_chunk_size`` of 1 corresponds to the least
         memory intensive, but slowest method of calculating the Jacobian.
         If None, it will use the largest size i.e ``obj.dim_x``.
-        Defaults to ``chunk_size="auto"`` which will use a conservative
-        size of 1000.
-
 
     """
 
@@ -444,7 +435,7 @@ def __init__(
         deriv_mode="auto",
         grid=None,
         name="coil curvature",
-        jac_chunk_size="auto",
+        jac_chunk_size=None,
     ):
         if target is None and bounds is None:
             bounds = (0, 1)
@@ -547,7 +538,7 @@ class CoilTorsion(_CoilObjective):
         Defaults to ``LinearGrid(N=2 * coil.N + 5)``
     name : str, optional
         Name of the objective function.
-    jac_chunk_size : int or "auto", optional
+    jac_chunk_size : int , optional
         Will calculate the Jacobian for this objective ``jac_chunk_size``
         columns at a time, instead of all at once. The memory usage of the
         Jacobian calculation is roughly ``memory usage = m0 + m1*jac_chunk_size``:
@@ -558,9 +549,6 @@ class CoilTorsion(_CoilObjective):
         requiring more memory. A ``jac_chunk_size`` of 1 corresponds to the least
         memory intensive, but slowest method of calculating the Jacobian.
         If None, it will use the largest size i.e ``obj.dim_x``.
-        Defaults to ``chunk_size="auto"`` which will use a conservative
-        size of 1000.
-
 
     """
 
@@ -580,7 +568,7 @@ def __init__(
         deriv_mode="auto",
         grid=None,
         name="coil torsion",
-        jac_chunk_size="auto",
+        jac_chunk_size=None,
     ):
         if target is None and bounds is None:
             target = 0
@@ -683,7 +671,7 @@ class CoilCurrentLength(CoilLength):
         Defaults to ``LinearGrid(N=2 * coil.N + 5)``
     name : str, optional
         Name of the objective function.
-    jac_chunk_size : int or "auto", optional
+    jac_chunk_size : int , optional
         Will calculate the Jacobian for this objective ``jac_chunk_size``
         columns at a time, instead of all at once. The memory usage of the
         Jacobian calculation is roughly ``memory usage = m0 + m1*jac_chunk_size``:
@@ -694,9 +682,6 @@ class CoilCurrentLength(CoilLength):
         requiring more memory. A ``jac_chunk_size`` of 1 corresponds to the least
         memory intensive, but slowest method of calculating the Jacobian.
         If None, it will use the largest size i.e ``obj.dim_x``.
-        Defaults to ``chunk_size="auto"`` which will use a conservative
-        size of 1000.
-
 
     """
 
@@ -716,7 +701,7 @@ def __init__(
         deriv_mode="auto",
         grid=None,
         name="coil current length",
-        jac_chunk_size="auto",
+        jac_chunk_size=None,
     ):
         if target is None and bounds is None:
             target = 0
@@ -827,7 +812,7 @@ class CoilSetMinDistance(_Objective):
         If a list, must have the same structure as coils.
     name : str, optional
         Name of the objective function.
-    jac_chunk_size : int or "auto", optional
+    jac_chunk_size : int , optional
         Will calculate the Jacobian for this objective ``jac_chunk_size``
         columns at a time, instead of all at once. The memory usage of the
         Jacobian calculation is roughly ``memory usage = m0 + m1*jac_chunk_size``:
@@ -838,9 +823,6 @@ class CoilSetMinDistance(_Objective):
         requiring more memory. A ``jac_chunk_size`` of 1 corresponds to the least
         memory intensive, but slowest method of calculating the Jacobian.
         If None, it will use the largest size i.e ``obj.dim_x``.
-        Defaults to ``chunk_size="auto"`` which will use a conservative
-        size of 1000.
-
 
     """
 
@@ -860,7 +842,7 @@ def __init__(
         deriv_mode="auto",
         grid=None,
         name="coil-coil minimum distance",
-        jac_chunk_size="auto",
+        jac_chunk_size=None,
     ):
         from desc.coils import CoilSet
 
@@ -1017,7 +999,7 @@ class PlasmaCoilSetMinDistance(_Objective):
         False by default, so that self.things = [coil, eq].
     name : str, optional
         Name of the objective function.
-    jac_chunk_size : int or "auto", optional
+    jac_chunk_size : int , optional
         Will calculate the Jacobian for this objective ``jac_chunk_size``
         columns at a time, instead of all at once. The memory usage of the
         Jacobian calculation is roughly ``memory usage = m0 + m1*jac_chunk_size``:
@@ -1028,9 +1010,6 @@ class PlasmaCoilSetMinDistance(_Objective):
         requiring more memory. A ``jac_chunk_size`` of 1 corresponds to the least
         memory intensive, but slowest method of calculating the Jacobian.
         If None, it will use the largest size i.e ``obj.dim_x``.
-        Defaults to ``chunk_size="auto"`` which will use a conservative
-        size of 1000.
-
 
     """
 
@@ -1054,7 +1033,7 @@ def __init__(
         eq_fixed=False,
         coils_fixed=False,
         name="plasma-coil minimum distance",
-        jac_chunk_size="auto",
+        jac_chunk_size=None,
     ):
         if target is None and bounds is None:
             bounds = (1, np.inf)
@@ -1263,7 +1242,7 @@ class QuadraticFlux(_Objective):
         plasma currents) is set to zero.
     name : str
         Name of the objective function.
-    jac_chunk_size : int or "auto", optional
+    jac_chunk_size : int , optional
         Will calculate the Jacobian for this objective ``jac_chunk_size``
         columns at a time, instead of all at once. The memory usage of the
         Jacobian calculation is roughly ``memory usage = m0 + m1*jac_chunk_size``:
@@ -1274,8 +1253,6 @@ class QuadraticFlux(_Objective):
         requiring more memory. A ``jac_chunk_size`` of 1 corresponds to the least
         memory intensive, but slowest method of calculating the Jacobian.
         If None, it will use the largest size i.e ``obj.dim_x``.
-        Defaults to ``chunk_size="auto"`` which will use a conservative
-        size of 1000.
 
     """
 
@@ -1299,7 +1276,7 @@ def __init__(
         field_grid=None,
         vacuum=False,
         name="Quadratic flux",
-        jac_chunk_size="auto",
+        jac_chunk_size=None,
     ):
         if target is None and bounds is None:
             target = 0
@@ -1488,7 +1465,7 @@ class ToroidalFlux(_Objective):
         zeta=jnp.array(0.0), NFP=eq.NFP).
     name : str, optional
         Name of the objective function.
-    jac_chunk_size : int or "auto", optional
+    jac_chunk_size : int , optional
         Will calculate the Jacobian for this objective ``jac_chunk_size``
         columns at a time, instead of all at once. The memory usage of the
         Jacobian calculation is roughly ``memory usage = m0 + m1*jac_chunk_size``:
@@ -1499,9 +1476,6 @@ class ToroidalFlux(_Objective):
         requiring more memory. A ``jac_chunk_size`` of 1 corresponds to the least
         memory intensive, but slowest method of calculating the Jacobian.
         If None, it will use the largest size i.e ``obj.dim_x``.
-        Defaults to ``chunk_size="auto"`` which will use a conservative
-        size of 1000.
-
 
 
     """
@@ -1524,7 +1498,7 @@ def __init__(
         field_grid=None,
         eval_grid=None,
         name="toroidal-flux",
-        jac_chunk_size="auto",
+        jac_chunk_size=None,
     ):
         if target is None and bounds is None:
             target = eq.Psi
diff --git a/desc/objectives/_equilibrium.py b/desc/objectives/_equilibrium.py
index 963bb68418..0124a84b11 100644
--- a/desc/objectives/_equilibrium.py
+++ b/desc/objectives/_equilibrium.py
@@ -63,7 +63,7 @@ class ForceBalance(_Objective):
         Defaults to ``ConcentricGrid(eq.L_grid, eq.M_grid, eq.N_grid)``
     name : str, optional
         Name of the objective function.
-    jac_chunk_size : int or "auto", optional
+    jac_chunk_size : int , optional
         Will calculate the Jacobian for this objective ``jac_chunk_size``
         columns at a time, instead of all at once. The memory usage of the
         Jacobian calculation is roughly ``memory usage = m0 + m1*jac_chunk_size``:
@@ -74,8 +74,6 @@ class ForceBalance(_Objective):
         requiring more memory. A ``jac_chunk_size`` of 1 corresponds to the least
         memory intensive, but slowest method of calculating the Jacobian.
         If None, it will use the largest size i.e ``obj.dim_x``.
-        Defaults to ``chunk_size="auto"`` which will use a conservative
-        size of 1000.
 
     """
 
@@ -96,7 +94,7 @@ def __init__(
         deriv_mode="auto",
         grid=None,
         name="force",
-        jac_chunk_size="auto",
+        jac_chunk_size=None,
     ):
         if target is None and bounds is None:
             target = 0
@@ -250,7 +248,7 @@ class ForceBalanceAnisotropic(_Objective):
         Defaults to ``ConcentricGrid(eq.L_grid, eq.M_grid, eq.N_grid)``
     name : str
         Name of the objective function.
-    jac_chunk_size : int or "auto", optional
+    jac_chunk_size : int , optional
         Will calculate the Jacobian for this objective ``jac_chunk_size``
         columns at a time, instead of all at once. The memory usage of the
         Jacobian calculation is roughly ``memory usage = m0 + m1*jac_chunk_size``:
@@ -261,9 +259,6 @@ class ForceBalanceAnisotropic(_Objective):
         requiring more memory. A ``jac_chunk_size`` of 1 corresponds to the least
         memory intensive, but slowest method of calculating the Jacobian.
         If None, it will use the largest size i.e ``obj.dim_x``.
-        Defaults to ``chunk_size="auto"`` which will use a conservative
-        size of 1000.
-
 
     """
 
@@ -284,7 +279,7 @@ def __init__(
         deriv_mode="auto",
         grid=None,
         name="force-anisotropic",
-        jac_chunk_size="auto",
+        jac_chunk_size=None,
     ):
         if target is None and bounds is None:
             target = 0
@@ -424,7 +419,7 @@ class RadialForceBalance(_Objective):
         Defaults to ``ConcentricGrid(eq.L_grid, eq.M_grid, eq.N_grid)``
     name : str, optional
         Name of the objective function.
-    jac_chunk_size : int or "auto", optional
+    jac_chunk_size : int , optional
         Will calculate the Jacobian for this objective ``jac_chunk_size``
         columns at a time, instead of all at once. The memory usage of the
         Jacobian calculation is roughly ``memory usage = m0 + m1*jac_chunk_size``:
@@ -435,9 +430,6 @@ class RadialForceBalance(_Objective):
         requiring more memory. A ``jac_chunk_size`` of 1 corresponds to the least
         memory intensive, but slowest method of calculating the Jacobian.
         If None, it will use the largest size i.e ``obj.dim_x``.
-        Defaults to ``chunk_size="auto"`` which will use a conservative
-        size of 1000.
-
 
     """
 
@@ -458,7 +450,7 @@ def __init__(
         deriv_mode="auto",
         grid=None,
         name="radial force",
-        jac_chunk_size="auto",
+        jac_chunk_size=None,
     ):
         if target is None and bounds is None:
             target = 0
@@ -598,7 +590,7 @@ class HelicalForceBalance(_Objective):
         Defaults to ``ConcentricGrid(eq.L_grid, eq.M_grid, eq.N_grid)``
     name : str, optional
         Name of the objective function.
-    jac_chunk_size : int or "auto", optional
+    jac_chunk_size : int , optional
         Will calculate the Jacobian for this objective ``jac_chunk_size``
         columns at a time, instead of all at once. The memory usage of the
         Jacobian calculation is roughly ``memory usage = m0 + m1*jac_chunk_size``:
@@ -609,9 +601,6 @@ class HelicalForceBalance(_Objective):
         requiring more memory. A ``jac_chunk_size`` of 1 corresponds to the least
         memory intensive, but slowest method of calculating the Jacobian.
         If None, it will use the largest size i.e ``obj.dim_x``.
-        Defaults to ``chunk_size="auto"`` which will use a conservative
-        size of 1000.
-
 
     """
 
@@ -632,7 +621,7 @@ def __init__(
         deriv_mode="auto",
         grid=None,
         name="helical force",
-        jac_chunk_size="auto",
+        jac_chunk_size=None,
     ):
         if target is None and bounds is None:
             target = 0
@@ -770,7 +759,7 @@ class Energy(_Objective):
         Adiabatic (compressional) index. Default = 0.
     name : str, optional
         Name of the objective function.
-    jac_chunk_size : int or "auto", optional
+    jac_chunk_size : int , optional
         Will calculate the Jacobian for this objective ``jac_chunk_size``
         columns at a time, instead of all at once. The memory usage of the
         Jacobian calculation is roughly ``memory usage = m0 + m1*jac_chunk_size``:
@@ -781,9 +770,6 @@ class Energy(_Objective):
         requiring more memory. A ``jac_chunk_size`` of 1 corresponds to the least
         memory intensive, but slowest method of calculating the Jacobian.
         If None, it will use the largest size i.e ``obj.dim_x``.
-        Defaults to ``chunk_size="auto"`` which will use a conservative
-        size of 1000.
-
 
     """
 
@@ -807,7 +793,7 @@ def __init__(
         grid=None,
         gamma=0,
         name="energy",
-        jac_chunk_size="auto",
+        jac_chunk_size=None,
     ):
         if target is None and bounds is None:
             target = 0
@@ -953,7 +939,7 @@ class CurrentDensity(_Objective):
         Defaults to ``ConcentricGrid(eq.L_grid, eq.M_grid, eq.N_grid)``
     name : str, optional
         Name of the objective function.
-    jac_chunk_size : int or "auto", optional
+    jac_chunk_size : int , optional
         Will calculate the Jacobian for this objective ``jac_chunk_size``
         columns at a time, instead of all at once. The memory usage of the
         Jacobian calculation is roughly ``memory usage = m0 + m1*jac_chunk_size``:
@@ -964,9 +950,6 @@ class CurrentDensity(_Objective):
         requiring more memory. A ``jac_chunk_size`` of 1 corresponds to the least
         memory intensive, but slowest method of calculating the Jacobian.
         If None, it will use the largest size i.e ``obj.dim_x``.
-        Defaults to ``chunk_size="auto"`` which will use a conservative
-        size of 1000.
-
 
     """
 
@@ -987,7 +970,7 @@ def __init__(
         deriv_mode="auto",
         grid=None,
         name="current density",
-        jac_chunk_size="auto",
+        jac_chunk_size=None,
     ):
         if target is None and bounds is None:
             target = 0
diff --git a/desc/objectives/_free_boundary.py b/desc/objectives/_free_boundary.py
index 9d475eebe8..cd75899f41 100644
--- a/desc/objectives/_free_boundary.py
+++ b/desc/objectives/_free_boundary.py
@@ -73,7 +73,7 @@ class VacuumBoundaryError(_Objective):
         be fixed. For single stage optimization, should be False (default).
     name : str, optional
         Name of the objective function.
-    jac_chunk_size : int or "auto", optional
+    jac_chunk_size : int , optional
         Will calculate the Jacobian for this objective ``jac_chunk_size``
         columns at a time, instead of all at once. The memory usage of the
         Jacobian calculation is roughly ``memory usage = m0 + m1*jac_chunk_size``:
@@ -84,9 +84,6 @@ class VacuumBoundaryError(_Objective):
         requiring more memory. A ``jac_chunk_size`` of 1 corresponds to the least
         memory intensive, but slowest method of calculating the Jacobian.
         If None, it will use the largest size i.e ``obj.dim_x``.
-        Defaults to ``chunk_size="auto"`` which will use a conservative
-        size of 1000.
-
 
     """
 
@@ -111,7 +108,7 @@ def __init__(
         field_grid=None,
         field_fixed=False,
         name="Vacuum boundary error",
-        jac_chunk_size="auto",
+        jac_chunk_size=None,
     ):
         if target is None and bounds is None:
             target = 0
@@ -426,7 +423,7 @@ class BoundaryError(_Objective):
         less memory.
     name : str, optional
         Name of the objective function.
-    jac_chunk_size : int or "auto", optional
+    jac_chunk_size : int , optional
         Will calculate the Jacobian for this objective ``jac_chunk_size``
         columns at a time, instead of all at once. The memory usage of the
         Jacobian calculation is roughly ``memory usage = m0 + m1*jac_chunk_size``:
@@ -437,9 +434,6 @@ class BoundaryError(_Objective):
         requiring more memory. A ``jac_chunk_size`` of 1 corresponds to the least
         memory intensive, but slowest method of calculating the Jacobian.
         If None, it will use the largest size i.e ``obj.dim_x``.
-        Defaults to ``chunk_size="auto"`` which will use a conservative
-        size of 1000.
-
 
 
     Examples
@@ -485,7 +479,7 @@ def __init__(
         field_fixed=False,
         loop=True,
         name="Boundary error",
-        jac_chunk_size="auto",
+        jac_chunk_size=None,
     ):
         if target is None and bounds is None:
             target = 0
@@ -899,7 +893,7 @@ class BoundaryErrorNESTOR(_Objective):
         reverse mode and forward over reverse mode respectively.
     name : str, optional
         Name of the objective function.
-    jac_chunk_size : int or "auto", optional
+    jac_chunk_size : int , optional
         Will calculate the Jacobian for this objective ``jac_chunk_size``
         columns at a time, instead of all at once. The memory usage of the
         Jacobian calculation is roughly ``memory usage = m0 + m1*jac_chunk_size``:
@@ -910,9 +904,6 @@ class BoundaryErrorNESTOR(_Objective):
         requiring more memory. A ``jac_chunk_size`` of 1 corresponds to the least
         memory intensive, but slowest method of calculating the Jacobian.
         If None, it will use the largest size i.e ``obj.dim_x``.
-        Defaults to ``chunk_size="auto"`` which will use a conservative
-        size of 1000.
-
 
     """
 
@@ -939,7 +930,7 @@ def __init__(
         loss_function=None,
         deriv_mode="auto",
         name="NESTOR Boundary",
-        jac_chunk_size="auto",
+        jac_chunk_size=None,
     ):
         if target is None and bounds is None:
             target = 0
diff --git a/desc/objectives/_generic.py b/desc/objectives/_generic.py
index 49c3d86737..b2e568fd20 100644
--- a/desc/objectives/_generic.py
+++ b/desc/objectives/_generic.py
@@ -57,7 +57,7 @@ class GenericObjective(_Objective):
         ``QuadratureGrid(eq.L_grid, eq.M_grid, eq.N_grid)`` if thing is an Equilibrium.
     name : str, optional
         Name of the objective function.
-    jac_chunk_size : int or "auto", optional
+    jac_chunk_size : int , optional
         Will calculate the Jacobian for this objective ``jac_chunk_size``
         columns at a time, instead of all at once. The memory usage of the
         Jacobian calculation is roughly ``memory usage = m0 + m1*jac_chunk_size``:
@@ -68,9 +68,6 @@ class GenericObjective(_Objective):
         requiring more memory. A ``jac_chunk_size`` of 1 corresponds to the least
         memory intensive, but slowest method of calculating the Jacobian.
         If None, it will use the largest size i.e ``obj.dim_x``.
-        Defaults to ``chunk_size="auto"`` which will use a conservative
-        size of 1000.
-
 
     """
 
@@ -89,7 +86,7 @@ def __init__(
         deriv_mode="auto",
         grid=None,
         name="generic",
-        jac_chunk_size="auto",
+        jac_chunk_size=None,
         **kwargs,
     ):
         errorif(
@@ -241,7 +238,7 @@ def __init__(
         normalize=False,
         normalize_target=False,
         name="custom linear",
-        jac_chunk_size="auto",
+        jac_chunk_size=None,
     ):
         if target is None and bounds is None:
             target = 0
@@ -360,7 +357,7 @@ class ObjectiveFromUser(_Objective):
         ``QuadratureGrid(eq.L_grid, eq.M_grid, eq.N_grid)`` if thing is an Equilibrium.
     name : str, optional
         Name of the objective function.
-    jac_chunk_size : int or "auto", optional
+    jac_chunk_size : int , optional
         Will calculate the Jacobian for this objective ``jac_chunk_size``
         columns at a time, instead of all at once. The memory usage of the
         Jacobian calculation is roughly ``memory usage = m0 + m1*jac_chunk_size``:
@@ -371,9 +368,6 @@ class ObjectiveFromUser(_Objective):
         requiring more memory. A ``jac_chunk_size`` of 1 corresponds to the least
         memory intensive, but slowest method of calculating the Jacobian.
         If None, it will use the largest size i.e ``obj.dim_x``.
-        Defaults to ``chunk_size="auto"`` which will use a conservative
-        size of 1000.
-
 
     Examples
     --------
@@ -410,7 +404,7 @@ def __init__(
         deriv_mode="auto",
         grid=None,
         name="custom",
-        jac_chunk_size="auto",
+        jac_chunk_size=None,
         **kwargs,
     ):
         errorif(
diff --git a/desc/objectives/_geometry.py b/desc/objectives/_geometry.py
index 8eb5357469..6c8c1376da 100644
--- a/desc/objectives/_geometry.py
+++ b/desc/objectives/_geometry.py
@@ -53,7 +53,7 @@ class AspectRatio(_Objective):
         or ``LinearGrid(M=2*eq.M, N=2*eq.N)`` for ``FourierRZToroidalSurface``.
     name : str, optional
         Name of the objective function.
-    jac_chunk_size : int or "auto", optional
+    jac_chunk_size : int , optional
         Will calculate the Jacobian for this objective ``jac_chunk_size``
         columns at a time, instead of all at once. The memory usage of the
         Jacobian calculation is roughly ``memory usage = m0 + m1*jac_chunk_size``:
@@ -64,9 +64,6 @@ class AspectRatio(_Objective):
         requiring more memory. A ``jac_chunk_size`` of 1 corresponds to the least
         memory intensive, but slowest method of calculating the Jacobian.
         If None, it will use the largest size i.e ``obj.dim_x``.
-        Defaults to ``chunk_size="auto"`` which will use a conservative
-        size of 1000.
-
 
     """
 
@@ -86,7 +83,7 @@ def __init__(
         deriv_mode="auto",
         grid=None,
         name="aspect ratio",
-        jac_chunk_size="auto",
+        jac_chunk_size=None,
     ):
         if target is None and bounds is None:
             target = 2
@@ -231,7 +228,7 @@ class Elongation(_Objective):
         or ``LinearGrid(M=2*eq.M, N=2*eq.N)`` for ``FourierRZToroidalSurface``.
     name : str, optional
         Name of the objective function.
-    jac_chunk_size : int or "auto", optional
+    jac_chunk_size : int , optional
         Will calculate the Jacobian for this objective ``jac_chunk_size``
         columns at a time, instead of all at once. The memory usage of the
         Jacobian calculation is roughly ``memory usage = m0 + m1*jac_chunk_size``:
@@ -242,9 +239,6 @@ class Elongation(_Objective):
         requiring more memory. A ``jac_chunk_size`` of 1 corresponds to the least
         memory intensive, but slowest method of calculating the Jacobian.
         If None, it will use the largest size i.e ``obj.dim_x``.
-        Defaults to ``chunk_size="auto"`` which will use a conservative
-        size of 1000.
-
 
     """
 
@@ -264,7 +258,7 @@ def __init__(
         deriv_mode="auto",
         grid=None,
         name="elongation",
-        jac_chunk_size="auto",
+        jac_chunk_size=None,
     ):
         if target is None and bounds is None:
             target = 1
@@ -408,7 +402,7 @@ class Volume(_Objective):
         or ``LinearGrid(M=2*eq.M, N=2*eq.N)`` for ``FourierRZToroidalSurface``.
     name : str, optional
         Name of the objective function.
-    jac_chunk_size : int or "auto", optional
+    jac_chunk_size : int , optional
         Will calculate the Jacobian for this objective ``jac_chunk_size``
         columns at a time, instead of all at once. The memory usage of the
         Jacobian calculation is roughly ``memory usage = m0 + m1*jac_chunk_size``:
@@ -419,9 +413,6 @@ class Volume(_Objective):
         requiring more memory. A ``jac_chunk_size`` of 1 corresponds to the least
         memory intensive, but slowest method of calculating the Jacobian.
         If None, it will use the largest size i.e ``obj.dim_x``.
-        Defaults to ``chunk_size="auto"`` which will use a conservative
-        size of 1000.
-
 
     """
 
@@ -441,7 +432,7 @@ def __init__(
         deriv_mode="auto",
         grid=None,
         name="volume",
-        jac_chunk_size="auto",
+        jac_chunk_size=None,
     ):
         if target is None and bounds is None:
             target = 1
@@ -634,7 +625,7 @@ class PlasmaVesselDistance(_Objective):
         more accurate approximation of the true min.
     name : str, optional
         Name of the objective function.
-    jac_chunk_size : int or "auto", optional
+    jac_chunk_size : int , optional
         Will calculate the Jacobian for this objective ``jac_chunk_size``
         columns at a time, instead of all at once. The memory usage of the
         Jacobian calculation is roughly ``memory usage = m0 + m1*jac_chunk_size``:
@@ -645,8 +636,6 @@ class PlasmaVesselDistance(_Objective):
         requiring more memory. A ``jac_chunk_size`` of 1 corresponds to the least
         memory intensive, but slowest method of calculating the Jacobian.
         If None, it will use the largest size i.e ``obj.dim_x``.
-        Defaults to ``chunk_size="auto"`` which will use a conservative
-        size of 1000.
 
     """
 
@@ -673,7 +662,7 @@ def __init__(
         softmin_alpha=1.0,
         name="plasma-vessel distance",
         use_signed_distance=False,
-        jac_chunk_size="auto",
+        jac_chunk_size=None,
         **kwargs,
     ):
         if target is None and bounds is None:
@@ -988,7 +977,7 @@ class MeanCurvature(_Objective):
         or ``LinearGrid(M=2*eq.M, N=2*eq.N)`` for ``FourierRZToroidalSurface``.
     name : str, optional
         Name of the objective function.
-    jac_chunk_size : int or "auto", optional
+    jac_chunk_size : int , optional
         Will calculate the Jacobian for this objective ``jac_chunk_size``
         columns at a time, instead of all at once. The memory usage of the
         Jacobian calculation is roughly ``memory usage = m0 + m1*jac_chunk_size``:
@@ -999,9 +988,6 @@ class MeanCurvature(_Objective):
         requiring more memory. A ``jac_chunk_size`` of 1 corresponds to the least
         memory intensive, but slowest method of calculating the Jacobian.
         If None, it will use the largest size i.e ``obj.dim_x``.
-        Defaults to ``chunk_size="auto"`` which will use a conservative
-        size of 1000.
-
 
     """
 
@@ -1021,7 +1007,7 @@ def __init__(
         deriv_mode="auto",
         grid=None,
         name="mean curvature",
-        jac_chunk_size="auto",
+        jac_chunk_size=None,
     ):
         if target is None and bounds is None:
             bounds = (-np.inf, 0)
@@ -1164,7 +1150,7 @@ class PrincipalCurvature(_Objective):
         or ``LinearGrid(M=2*eq.M, N=2*eq.N)`` for ``FourierRZToroidalSurface``.
     name : str, optional
         Name of the objective function.
-    jac_chunk_size : int or "auto", optional
+    jac_chunk_size : int , optional
         Will calculate the Jacobian for this objective ``jac_chunk_size``
         columns at a time, instead of all at once. The memory usage of the
         Jacobian calculation is roughly ``memory usage = m0 + m1*jac_chunk_size``:
@@ -1175,9 +1161,6 @@ class PrincipalCurvature(_Objective):
         requiring more memory. A ``jac_chunk_size`` of 1 corresponds to the least
         memory intensive, but slowest method of calculating the Jacobian.
         If None, it will use the largest size i.e ``obj.dim_x``.
-        Defaults to ``chunk_size="auto"`` which will use a conservative
-        size of 1000.
-
 
     """
 
@@ -1197,7 +1180,7 @@ def __init__(
         deriv_mode="auto",
         grid=None,
         name="principal-curvature",
-        jac_chunk_size="auto",
+        jac_chunk_size=None,
     ):
         if target is None and bounds is None:
             target = 1
@@ -1335,7 +1318,7 @@ class BScaleLength(_Objective):
         ``LinearGrid(M=eq.M_grid, N=eq.N_grid)``.
     name : str, optional
         Name of the objective function.
-    jac_chunk_size : int or "auto", optional
+    jac_chunk_size : int , optional
         Will calculate the Jacobian for this objective ``jac_chunk_size``
         columns at a time, instead of all at once. The memory usage of the
         Jacobian calculation is roughly ``memory usage = m0 + m1*jac_chunk_size``:
@@ -1346,9 +1329,6 @@ class BScaleLength(_Objective):
         requiring more memory. A ``jac_chunk_size`` of 1 corresponds to the least
         memory intensive, but slowest method of calculating the Jacobian.
         If None, it will use the largest size i.e ``obj.dim_x``.
-        Defaults to ``chunk_size="auto"`` which will use a conservative
-        size of 1000.
-
 
     """
 
@@ -1368,7 +1348,7 @@ def __init__(
         deriv_mode="auto",
         grid=None,
         name="B-scale-length",
-        jac_chunk_size="auto",
+        jac_chunk_size=None,
     ):
         if target is None and bounds is None:
             bounds = (1, np.inf)
@@ -1502,7 +1482,7 @@ class GoodCoordinates(_Objective):
         Collocation grid containing the nodes to evaluate at.
     name : str, optional
         Name of the objective function.
-    jac_chunk_size : int or "auto", optional
+    jac_chunk_size : int , optional
         Will calculate the Jacobian for this objective ``jac_chunk_size``
         columns at a time, instead of all at once. The memory usage of the
         Jacobian calculation is roughly ``memory usage = m0 + m1*jac_chunk_size``:
@@ -1513,9 +1493,6 @@ class GoodCoordinates(_Objective):
         requiring more memory. A ``jac_chunk_size`` of 1 corresponds to the least
         memory intensive, but slowest method of calculating the Jacobian.
         If None, it will use the largest size i.e ``obj.dim_x``.
-        Defaults to ``chunk_size="auto"`` which will use a conservative
-        size of 1000.
-
 
     """
 
@@ -1536,7 +1513,7 @@ def __init__(
         deriv_mode="auto",
         grid=None,
         name="coordinate goodness",
-        jac_chunk_size="auto",
+        jac_chunk_size=None,
     ):
         if target is None and bounds is None:
             target = 0
diff --git a/desc/objectives/_omnigenity.py b/desc/objectives/_omnigenity.py
index 648d6aacdc..415716c2eb 100644
--- a/desc/objectives/_omnigenity.py
+++ b/desc/objectives/_omnigenity.py
@@ -57,7 +57,7 @@ class QuasisymmetryBoozer(_Objective):
         Toroidal resolution of Boozer transformation. Default = 2 * eq.N.
     name : str, optional
         Name of the objective function.
-    jac_chunk_size : int or "auto", optional
+    jac_chunk_size : int , optional
         Will calculate the Jacobian for this objective ``jac_chunk_size``
         columns at a time, instead of all at once. The memory usage of the
         Jacobian calculation is roughly ``memory usage = m0 + m1*jac_chunk_size``:
@@ -68,9 +68,6 @@ class QuasisymmetryBoozer(_Objective):
         requiring more memory. A ``jac_chunk_size`` of 1 corresponds to the least
         memory intensive, but slowest method of calculating the Jacobian.
         If None, it will use the largest size i.e ``obj.dim_x``.
-        Defaults to ``chunk_size="auto"`` which will use a conservative
-        size of 1000.
-
 
     """
 
@@ -92,7 +89,7 @@ def __init__(
         M_booz=None,
         N_booz=None,
         name="QS Boozer",
-        jac_chunk_size="auto",
+        jac_chunk_size=None,
     ):
         if target is None and bounds is None:
             target = 0
@@ -285,7 +282,7 @@ class QuasisymmetryTwoTerm(_Objective):
         Type of quasi-symmetry (M, N).
     name : str, optional
         Name of the objective function.
-    jac_chunk_size : int or "auto", optional
+    jac_chunk_size : int , optional
         Will calculate the Jacobian for this objective ``jac_chunk_size``
         columns at a time, instead of all at once. The memory usage of the
         Jacobian calculation is roughly ``memory usage = m0 + m1*jac_chunk_size``:
@@ -296,9 +293,6 @@ class QuasisymmetryTwoTerm(_Objective):
         requiring more memory. A ``jac_chunk_size`` of 1 corresponds to the least
         memory intensive, but slowest method of calculating the Jacobian.
         If None, it will use the largest size i.e ``obj.dim_x``.
-        Defaults to ``chunk_size="auto"`` which will use a conservative
-        size of 1000.
-
 
     """
 
@@ -319,7 +313,7 @@ def __init__(
         grid=None,
         helicity=(1, 0),
         name="QS two-term",
-        jac_chunk_size="auto",
+        jac_chunk_size=None,
     ):
         if target is None and bounds is None:
             target = 0
@@ -486,7 +480,7 @@ class QuasisymmetryTripleProduct(_Objective):
         Defaults to ``LinearGrid(M=eq.M_grid, N=eq.N_grid)``.
     name : str, optional
         Name of the objective function.
-    jac_chunk_size : int or "auto", optional
+    jac_chunk_size : int , optional
         Will calculate the Jacobian for this objective ``jac_chunk_size``
         columns at a time, instead of all at once. The memory usage of the
         Jacobian calculation is roughly ``memory usage = m0 + m1*jac_chunk_size``:
@@ -497,9 +491,6 @@ class QuasisymmetryTripleProduct(_Objective):
         requiring more memory. A ``jac_chunk_size`` of 1 corresponds to the least
         memory intensive, but slowest method of calculating the Jacobian.
         If None, it will use the largest size i.e ``obj.dim_x``.
-        Defaults to ``chunk_size="auto"`` which will use a conservative
-        size of 1000.
-
 
     """
 
@@ -519,7 +510,7 @@ def __init__(
         deriv_mode="auto",
         grid=None,
         name="QS triple product",
-        jac_chunk_size="auto",
+        jac_chunk_size=None,
     ):
         if target is None and bounds is None:
             target = 0
@@ -680,7 +671,7 @@ class Omnigenity(_Objective):
         associated data are re-computed at every iteration (Default).
     name : str, optional
         Name of the objective function.
-    jac_chunk_size : int or "auto", optional
+    jac_chunk_size : int , optional
         Will calculate the Jacobian for this objective ``jac_chunk_size``
         columns at a time, instead of all at once. The memory usage of the
         Jacobian calculation is roughly ``memory usage = m0 + m1*jac_chunk_size``:
@@ -691,9 +682,6 @@ class Omnigenity(_Objective):
         requiring more memory. A ``jac_chunk_size`` of 1 corresponds to the least
         memory intensive, but slowest method of calculating the Jacobian.
         If None, it will use the largest size i.e ``obj.dim_x``.
-        Defaults to ``chunk_size="auto"`` which will use a conservative
-        size of 1000.
-
 
     """
 
@@ -720,7 +708,7 @@ def __init__(
         eq_fixed=False,
         field_fixed=False,
         name="omnigenity",
-        jac_chunk_size="auto",
+        jac_chunk_size=None,
     ):
         if target is None and bounds is None:
             target = 0
@@ -1031,7 +1019,7 @@ class Isodynamicity(_Objective):
         Defaults to ``LinearGrid(M=eq.M_grid, N=eq.N_grid)``.
     name : str, optional
         Name of the objective function.
-    jac_chunk_size : int or "auto", optional
+    jac_chunk_size : int , optional
         Will calculate the Jacobian for this objective ``jac_chunk_size``
         columns at a time, instead of all at once. The memory usage of the
         Jacobian calculation is roughly ``memory usage = m0 + m1*jac_chunk_size``:
@@ -1042,9 +1030,6 @@ class Isodynamicity(_Objective):
         requiring more memory. A ``jac_chunk_size`` of 1 corresponds to the least
         memory intensive, but slowest method of calculating the Jacobian.
         If None, it will use the largest size i.e ``obj.dim_x``.
-        Defaults to ``chunk_size="auto"`` which will use a conservative
-        size of 1000.
-
 
     """
 
@@ -1064,7 +1049,7 @@ def __init__(
         deriv_mode="auto",
         grid=None,
         name="Isodynamicity",
-        jac_chunk_size="auto",
+        jac_chunk_size=None,
     ):
         if target is None and bounds is None:
             target = 0
diff --git a/desc/objectives/_power_balance.py b/desc/objectives/_power_balance.py
index 16a4f67e82..313aa87344 100644
--- a/desc/objectives/_power_balance.py
+++ b/desc/objectives/_power_balance.py
@@ -56,7 +56,7 @@ class FusionPower(_Objective):
         Defaults to ``QuadratureGrid(eq.L_grid, eq.M_grid, eq.N_grid, eq.NFP)``.
     name : str, optional
         Name of the objective function.
-    jac_chunk_size : int or "auto", optional
+    jac_chunk_size : int , optional
         Will calculate the Jacobian for this objective ``jac_chunk_size``
         columns at a time, instead of all at once. The memory usage of the
         Jacobian calculation is roughly ``memory usage = m0 + m1*jac_chunk_size``:
@@ -67,8 +67,6 @@ class FusionPower(_Objective):
         requiring more memory. A ``jac_chunk_size`` of 1 corresponds to the least
         memory intensive, but slowest method of calculating the Jacobian.
         If None, it will use the largest size i.e ``obj.dim_x``.
-        Defaults to ``chunk_size="auto"`` which will use a conservative
-        size of 1000.
 
     """
 
@@ -89,7 +87,7 @@ def __init__(
         fuel="DT",
         grid=None,
         name="fusion power",
-        jac_chunk_size="auto",
+        jac_chunk_size=None,
     ):
         errorif(
             fuel not in ["DT"], ValueError, f"fuel must be one of ['DT'], got {fuel}."
@@ -256,7 +254,7 @@ class HeatingPowerISS04(_Objective):
         Defaults to ``QuadratureGrid(eq.L_grid, eq.M_grid, eq.N_grid, eq.NFP)``.
     name : str, optional
         Name of the objective function.
-    jac_chunk_size : int or "auto", optional
+    jac_chunk_size : int , optional
         Will calculate the Jacobian for this objective ``jac_chunk_size``
         columns at a time, instead of all at once. The memory usage of the
         Jacobian calculation is roughly ``memory usage = m0 + m1*jac_chunk_size``:
@@ -267,8 +265,6 @@ class HeatingPowerISS04(_Objective):
         requiring more memory. A ``jac_chunk_size`` of 1 corresponds to the least
         memory intensive, but slowest method of calculating the Jacobian.
         If None, it will use the largest size i.e ``obj.dim_x``.
-        Defaults to ``chunk_size="auto"`` which will use a conservative
-        size of 1000.
 
     """
 
@@ -290,7 +286,7 @@ def __init__(
         gamma=0,
         grid=None,
         name="heating power",
-        jac_chunk_size="auto",
+        jac_chunk_size=None,
     ):
         if target is None and bounds is None:
             target = 0
diff --git a/desc/objectives/_profiles.py b/desc/objectives/_profiles.py
index 5756a7d14f..1103421d42 100644
--- a/desc/objectives/_profiles.py
+++ b/desc/objectives/_profiles.py
@@ -53,7 +53,7 @@ class Pressure(_Objective):
         Defaults to ``LinearGrid(L=eq.L_grid)``.
     name : str, optional
         Name of the objective function.
-    jac_chunk_size : int or "auto", optional
+    jac_chunk_size : int , optional
         Will calculate the Jacobian for this objective ``jac_chunk_size``
         columns at a time, instead of all at once. The memory usage of the
         Jacobian calculation is roughly ``memory usage = m0 + m1*jac_chunk_size``:
@@ -64,9 +64,6 @@ class Pressure(_Objective):
         requiring more memory. A ``jac_chunk_size`` of 1 corresponds to the least
         memory intensive, but slowest method of calculating the Jacobian.
         If None, it will use the largest size i.e ``obj.dim_x``.
-        Defaults to ``chunk_size="auto"`` which will use a conservative
-        size of 1000.
-
 
     """
 
@@ -86,7 +83,7 @@ def __init__(
         deriv_mode="auto",
         grid=None,
         name="pressure",
-        jac_chunk_size="auto",
+        jac_chunk_size=None,
     ):
         if target is None and bounds is None:
             target = 0
@@ -227,7 +224,7 @@ class RotationalTransform(_Objective):
         are required.
     name : str, optional
         Name of the objective function.
-    jac_chunk_size : int or "auto", optional
+    jac_chunk_size : int , optional
         Will calculate the Jacobian for this objective ``jac_chunk_size``
         columns at a time, instead of all at once. The memory usage of the
         Jacobian calculation is roughly ``memory usage = m0 + m1*jac_chunk_size``:
@@ -238,9 +235,6 @@ class RotationalTransform(_Objective):
         requiring more memory. A ``jac_chunk_size`` of 1 corresponds to the least
         memory intensive, but slowest method of calculating the Jacobian.
         If None, it will use the largest size i.e ``obj.dim_x``.
-        Defaults to ``chunk_size="auto"`` which will use a conservative
-        size of 1000.
-
 
     """
 
@@ -260,7 +254,7 @@ def __init__(
         deriv_mode="auto",
         grid=None,
         name="rotational transform",
-        jac_chunk_size="auto",
+        jac_chunk_size=None,
     ):
         if target is None and bounds is None:
             target = 0
@@ -414,7 +408,7 @@ class Shear(_Objective):
         are required.
     name : str, optional
         Name of the objective function.
-    jac_chunk_size : int or "auto", optional
+    jac_chunk_size : int , optional
         Will calculate the Jacobian for this objective ``jac_chunk_size``
         columns at a time, instead of all at once. The memory usage of the
         Jacobian calculation is roughly ``memory usage = m0 + m1*jac_chunk_size``:
@@ -425,9 +419,6 @@ class Shear(_Objective):
         requiring more memory. A ``jac_chunk_size`` of 1 corresponds to the least
         memory intensive, but slowest method of calculating the Jacobian.
         If None, it will use the largest size i.e ``obj.dim_x``.
-        Defaults to ``chunk_size="auto"`` which will use a conservative
-        size of 1000.
-
 
     """
 
@@ -447,7 +438,7 @@ def __init__(
         deriv_mode="auto",
         grid=None,
         name="shear",
-        jac_chunk_size="auto",
+        jac_chunk_size=None,
     ):
         if target is None and bounds is None:
             bounds = (-np.inf, 0)
@@ -597,7 +588,7 @@ class ToroidalCurrent(_Objective):
         are required.
     name : str, optional
         Name of the objective function.
-    jac_chunk_size : int or "auto", optional
+    jac_chunk_size : int , optional
         Will calculate the Jacobian for this objective ``jac_chunk_size``
         columns at a time, instead of all at once. The memory usage of the
         Jacobian calculation is roughly ``memory usage = m0 + m1*jac_chunk_size``:
@@ -608,9 +599,6 @@ class ToroidalCurrent(_Objective):
         requiring more memory. A ``jac_chunk_size`` of 1 corresponds to the least
         memory intensive, but slowest method of calculating the Jacobian.
         If None, it will use the largest size i.e ``obj.dim_x``.
-        Defaults to ``chunk_size="auto"`` which will use a conservative
-        size of 1000.
-
 
     """
 
@@ -630,7 +618,7 @@ def __init__(
         deriv_mode="auto",
         grid=None,
         name="toroidal current",
-        jac_chunk_size="auto",
+        jac_chunk_size=None,
     ):
         if target is None and bounds is None:
             target = 0
diff --git a/desc/objectives/_stability.py b/desc/objectives/_stability.py
index 7f7c986d68..8229eddda2 100644
--- a/desc/objectives/_stability.py
+++ b/desc/objectives/_stability.py
@@ -63,7 +63,7 @@ class MercierStability(_Objective):
         are required.
     name : str, optional
         Name of the objective function.
-    jac_chunk_size : int or "auto", optional
+    jac_chunk_size : int , optional
         Will calculate the Jacobian for this objective ``jac_chunk_size``
         columns at a time, instead of all at once. The memory usage of the
         Jacobian calculation is roughly ``memory usage = m0 + m1*jac_chunk_size``:
@@ -74,9 +74,6 @@ class MercierStability(_Objective):
         requiring more memory. A ``jac_chunk_size`` of 1 corresponds to the least
         memory intensive, but slowest method of calculating the Jacobian.
         If None, it will use the largest size i.e ``obj.dim_x``.
-        Defaults to ``chunk_size="auto"`` which will use a conservative
-        size of 1000.
-
 
     """
 
@@ -96,7 +93,7 @@ def __init__(
         deriv_mode="auto",
         grid=None,
         name="Mercier Stability",
-        jac_chunk_size="auto",
+        jac_chunk_size=None,
     ):
         if target is None and bounds is None:
             bounds = (0, np.inf)
@@ -260,7 +257,7 @@ class MagneticWell(_Objective):
         are required.
     name : str, optional
         Name of the objective function.
-    jac_chunk_size : int or "auto", optional
+    jac_chunk_size : int , optional
         Will calculate the Jacobian for this objective ``jac_chunk_size``
         columns at a time, instead of all at once. The memory usage of the
         Jacobian calculation is roughly ``memory usage = m0 + m1*jac_chunk_size``:
@@ -271,9 +268,6 @@ class MagneticWell(_Objective):
         requiring more memory. A ``jac_chunk_size`` of 1 corresponds to the least
         memory intensive, but slowest method of calculating the Jacobian.
         If None, it will use the largest size i.e ``obj.dim_x``.
-        Defaults to ``chunk_size="auto"`` which will use a conservative
-        size of 1000.
-
 
     """
 
@@ -293,7 +287,7 @@ def __init__(
         deriv_mode="auto",
         grid=None,
         name="Magnetic Well",
-        jac_chunk_size="auto",
+        jac_chunk_size=None,
     ):
         if target is None and bounds is None:
             bounds = (0, np.inf)
diff --git a/desc/objectives/getters.py b/desc/objectives/getters.py
index 9ab90a9c79..727c6f1835 100644
--- a/desc/objectives/getters.py
+++ b/desc/objectives/getters.py
@@ -58,6 +58,17 @@ def get_equilibrium_objective(eq, mode="force", normalize=True, jac_chunk_size="
     normalize : bool
         Whether to normalize units of objective.
     jac_chunk_size : int or "auto", optional
+        If `"batched"` deriv_mode is used, will calculate the Jacobian
+        ``jac_chunk_size`` columns at a time, instead of all at once.
+        The memory usage of the Jacobian calculation is roughly
+        ``memory usage = m0 + m1*jac_chunk_size``: the smaller the chunk size,
+        the less memory the Jacobian calculation will require (with some baseline
+        memory usage). The time it takes to compute the Jacobian is roughly
+        ``t= t0 + t1/jac_chunk_size` so the larger the ``jac_chunk_size``, the faster
+        the calculation takes, at the cost of requiring more memory.
+        If None, it will use the largest size i.e ``obj.dim_x``.
+        Defaults to ``chunk_size="auto"`` which will use a conservative
+        chunk size based off of a heuristic estimate of the memory usage.
 
 
     Returns
diff --git a/desc/objectives/objective_funs.py b/desc/objectives/objective_funs.py
index 574da48f62..0da1aca3e3 100644
--- a/desc/objectives/objective_funs.py
+++ b/desc/objectives/objective_funs.py
@@ -209,14 +209,7 @@ def build(self, use_jit=None, verbose=1):
                 * self.dim_x
             )
             self._jac_chunk_size = max([1, max_chunk_size])
-        if self._deriv_mode == "blocked":
-            # set jac_chunk_size for each sub-objective
-            # to 1000 columns of Jacobian
-            # as the default for batched deriv_mode
-            for obj in self.objectives:
-                obj._jac_chunk_size = (
-                    1000 if obj._jac_chunk_size == "auto" else obj._jac_chunk_size
-                )
+
         timer.stop("Objective build")
         if verbose > 1:
             timer.disp("Objective build")
@@ -910,8 +903,7 @@ class _Objective(IOAble, ABC):
         ``t= t0 + t1/jac_chunk_size` so the larger the ``jac_chunk_size``, the faster
         the calculation takes, at the cost of requiring more memory.
         If None, it will use the largest size i.e ``obj.dim_x``.
-        Defaults to ``chunk_size="auto"`` which will use a conservative
-        size of 1000.
+        Defaults to ``chunk_size=None``.
 
     """
 
@@ -942,7 +934,7 @@ def __init__(
         loss_function=None,
         deriv_mode="auto",
         name=None,
-        jac_chunk_size="auto",
+        jac_chunk_size=None,
     ):
         if self._scalar:
             assert self._coordinates == ""
@@ -953,7 +945,7 @@ def __init__(
         assert (bounds is None) or (target is None), "Cannot use both bounds and target"
         assert loss_function in [None, "mean", "min", "max"]
         assert deriv_mode in {"auto", "fwd", "rev"}
-        assert jac_chunk_size in ["auto", None] or isposint(jac_chunk_size)
+        assert jac_chunk_size is None or isposint(jac_chunk_size)
 
         self._jac_chunk_size = jac_chunk_size
 
diff --git a/desc/utils_batched_vectorize.py b/desc/utils_batched_vectorize.py
index d6ae9f7806..9b8bf0f411 100644
--- a/desc/utils_batched_vectorize.py
+++ b/desc/utils_batched_vectorize.py
@@ -56,53 +56,6 @@ def _chunk(x, chunk_size=None):
     return x.reshape((n_chunks, chunk_size) + x.shape[1:])
 
 
-def _chunk_size(x):
-    b = set(map(lambda x: x.shape[:2], jax.tree_util.tree_leaves(x)))
-    if len(b) != 1:
-        raise ValueError(
-            "The arrays in x have inconsistent chunk_size or number of chunks"
-        )
-    return b.pop()[1]
-
-
-def unchunk(x_chunked):
-    """Merge the first two axes of an array (or a pytree of arrays).
-
-    Parameters
-    ----------
-    x_chunked: an array (or pytree of arrays) of at least 2 dimensions
-
-    Returns
-    -------
-    (x, chunk_fn) : tuple
-        where x is x_chunked reshaped to (-1,)+x.shape[2:]
-        and chunk_fn is a function which restores x given x_chunked
-
-    """
-    return _unchunk(x_chunked), functools.partial(
-        _chunk, chunk_size=_chunk_size(x_chunked)
-    )
-
-
-def chunk(x, chunk_size=None):
-    """Split an array (or a pytree of arrays) into chunks along the first axis.
-
-    Parameters
-    ----------
-    x: an array (or pytree of arrays)
-    chunk_size: an integer or None (default)
-        The first axis in x must be a multiple of chunk_size
-
-    Returns
-    -------
-    (x_chunked, unchunk_fn): tuple
-        - x_chunked is x reshaped to (-1, chunk_size)+x.shape[1:]
-          if chunk_size is None then it defaults to x.shape[0], i.e. just one chunk
-        - unchunk_fn is a function which restores x given x_chunked
-    """
-    return _chunk(x, chunk_size), _unchunk
-
-
 ####
 
 # The following section of this code is derived from the NetKet project
diff --git a/docs/adding_objectives.rst b/docs/adding_objectives.rst
index ec5c649d3a..9204208b54 100644
--- a/docs/adding_objectives.rst
+++ b/docs/adding_objectives.rst
@@ -80,8 +80,7 @@ A full example objective with comments describing the key points is given below:
             ``jac_chunk_size``, the faster the calculation takes, at the cost of
             requiring more memory. A ``jac_chunk_size`` of 1 corresponds to the least
             memory intensive, but slowest method of calculating the Jacobian.
-            If None, it will default to a conservative default
-            `jac_chunk_size` i.e. ``np.ceil(dim_x/4)``
+            If None, it will use the largest possible size.
 
         """
 
@@ -100,7 +99,7 @@ A full example objective with comments describing the key points is given below:
             normalize_target=True,
             grid=None,
             name="QS triple product",
-            jac_chunk_size="auto",
+            jac_chunk_size=None,
         ):
             # we don't have to do much here, mostly just call ``super().__init__()``
             if target is None and bounds is None:
diff --git a/docs/memory_usage.rst b/docs/memory_usage.rst
index 390416a645..692f2c7ae8 100644
--- a/docs/memory_usage.rst
+++ b/docs/memory_usage.rst
@@ -15,7 +15,8 @@ in a slightly slower, but more memory-efficient manner. The memory usage of the
 will require (with some baseline memory usage). The time to compute the Jacobian is roughly ``t=t0 +t1/jac_chunk_size``
 with some baseline time, so the larger the ``jac_chunk_size``, the faster the calculation takes,
 at the cost of requiring more memory. A ``jac_chunk_size`` of 1 corresponds to the least memory intensive,
-but slowest method of calculating the Jacobian. If ``jac_chunk_size="auto"``, it will default to ``obj.dim_x/4``.
+but slowest method of calculating the Jacobian. If ``jac_chunk_size="auto"``, it will default to a size
+that should make the calculation fit in memory based on a heuristic estimate of the Jacobian memory usage.
 
 If ``deriv_mode="blocked"`` is specified when the ``ObjectiveFunction`` is created, then the Jacobian will
 be calculated individually for each of the sub-objectives inside of the ``ObjectiveFunction``, and in that case
@@ -23,4 +24,4 @@ the ``jac_chunk_size`` of the individual ``_Objective`` objects inside of the ``
 For example, if ``obj1 = QuasisymmetryTripleProduct(eq, jac_chunk_size=100)``, ``obj2 = MeanCurvature(eq, jac_chunk_size=2000)``
 and ``obj = ObjectiveFunction((obj1, obj2), deriv_mode="blocked")``, then the Jacobian will be calculated with a
 ``jac_chunk_size=100`` for the quasisymmetry part and a ``jac_chunk_size=2000`` for the curvature part, then the full Jacobian
-will be formed as a block diagonal matrix with the individual Jacobians of these two objectives.
+will be formed as a blocked matrix with the individual Jacobians of these two objectives.
diff --git a/tests/test_objective_funs.py b/tests/test_objective_funs.py
index f985290f40..25fe587a09 100644
--- a/tests/test_objective_funs.py
+++ b/tests/test_objective_funs.py
@@ -1316,7 +1316,7 @@ def test_derivative_modes():
     with pytest.warns(UserWarning, match="jac_chunk_size"):
         obj2.build()
     # check that default size works for blocked
-    assert obj2.objectives[1]._jac_chunk_size == 1000
+    assert obj2.objectives[1]._jac_chunk_size is None
     assert obj2.objectives[2]._jac_chunk_size is None
     # hard to say what size auto will give, just check it is >0
     assert obj1._jac_chunk_size > 0

From b0b78fec31fc986c31b2a6df6ad55d3fc9a75d2e Mon Sep 17 00:00:00 2001
From: Dario Panici <dpanici@princeton.edu>
Date: Tue, 24 Sep 2024 12:03:53 -0400
Subject: [PATCH 39/46] rename utils file

---
 desc/{utils_batched_vectorize.py => batching.py} | 0
 desc/objectives/objective_funs.py                | 2 +-
 desc/optimize/_constraint_wrappers.py            | 2 +-
 3 files changed, 2 insertions(+), 2 deletions(-)
 rename desc/{utils_batched_vectorize.py => batching.py} (100%)

diff --git a/desc/utils_batched_vectorize.py b/desc/batching.py
similarity index 100%
rename from desc/utils_batched_vectorize.py
rename to desc/batching.py
diff --git a/desc/objectives/objective_funs.py b/desc/objectives/objective_funs.py
index 0da1aca3e3..a7526fc9c9 100644
--- a/desc/objectives/objective_funs.py
+++ b/desc/objectives/objective_funs.py
@@ -14,6 +14,7 @@
     tree_unflatten,
     use_jax,
 )
+from desc.batching import batched_vectorize
 from desc.derivatives import Derivative
 from desc.io import IOAble
 from desc.optimizable import Optimizable
@@ -28,7 +29,6 @@
     unique_list,
     warnif,
 )
-from desc.utils_batched_vectorize import batched_vectorize
 
 
 class ObjectiveFunction(IOAble):
diff --git a/desc/optimize/_constraint_wrappers.py b/desc/optimize/_constraint_wrappers.py
index 2e38700585..604609fc8f 100644
--- a/desc/optimize/_constraint_wrappers.py
+++ b/desc/optimize/_constraint_wrappers.py
@@ -5,6 +5,7 @@
 import numpy as np
 
 from desc.backend import jit, jnp
+from desc.batching import batched_vectorize
 from desc.objectives import (
     BoundaryRSelfConsistency,
     BoundaryZSelfConsistency,
@@ -14,7 +15,6 @@
 )
 from desc.objectives.utils import factorize_linear_constraints
 from desc.utils import Timer, errorif, get_instance, setdefault
-from desc.utils_batched_vectorize import batched_vectorize
 
 from .utils import f_where_x
 

From be57bbc21e968c1f1e09b60e3e7b899c4a61d4f2 Mon Sep 17 00:00:00 2001
From: Dario Panici <dpanici@princeton.edu>
Date: Tue, 24 Sep 2024 12:08:28 -0400
Subject: [PATCH 40/46] resolve comments

---
 desc/batching.py | 3 ++-
 desc/utils.py    | 3 ++-
 2 files changed, 4 insertions(+), 2 deletions(-)

diff --git a/desc/batching.py b/desc/batching.py
index 9b8bf0f411..57d6ba9d14 100644
--- a/desc/batching.py
+++ b/desc/batching.py
@@ -1,8 +1,9 @@
 """Utility functions for the ``batched_vectorize`` function."""
 
+import functools
 from typing import Callable, Optional
 
-from desc.backend import functools, jax, jnp
+from desc.backend import jax, jnp
 
 if jax.__version_info__ >= (0, 4, 16):
     from jax.extend import linear_util as lu
diff --git a/desc/utils.py b/desc/utils.py
index d1483aa039..203e29d3dc 100644
--- a/desc/utils.py
+++ b/desc/utils.py
@@ -1,5 +1,6 @@
 """Utility functions, independent of the rest of DESC."""
 
+import functools
 import operator
 import warnings
 from itertools import combinations_with_replacement, permutations
@@ -8,7 +9,7 @@
 from scipy.special import factorial
 from termcolor import colored
 
-from desc.backend import flatnonzero, fori_loop, functools, jit, jnp, take
+from desc.backend import flatnonzero, fori_loop, jit, jnp, take
 
 
 class Timer:

From cd865d09db63af2ad15d0bdb2ee431617156c0cc Mon Sep 17 00:00:00 2001
From: Dario Panici <dpanici@princeton.edu>
Date: Tue, 24 Sep 2024 12:11:00 -0400
Subject: [PATCH 41/46] resolve further comment

---
 desc/objectives/objective_funs.py | 16 ++++++++++------
 1 file changed, 10 insertions(+), 6 deletions(-)

diff --git a/desc/objectives/objective_funs.py b/desc/objectives/objective_funs.py
index a7526fc9c9..a507868f19 100644
--- a/desc/objectives/objective_funs.py
+++ b/desc/objectives/objective_funs.py
@@ -1,9 +1,11 @@
 """Base classes for objectives."""
 
 import functools
+import warnings
 from abc import ABC, abstractmethod
 
 import numpy as np
+from termcolor import colored
 
 from desc.backend import (
     desc_config,
@@ -83,16 +85,18 @@ def __init__(
             isinstance(obj, _Objective) for obj in objectives
         ), "members of ObjectiveFunction should be instances of _Objective"
         assert use_jit in {True, False}
-        warnif(
-            deriv_mode == "looped",
-            DeprecationWarning,
-            '``deriv_mode="looped"`` is deprecated in favor of'
-            ' ``deriv_mode="batched"`` with ``jac_chunk_size=1``.',
-        )
         if deriv_mode == "looped":
             # overwrite the user inputs if deprecated "looped" was given
             deriv_mode = "batched"
             jac_chunk_size = 1
+            warnings.warn(
+                colored(
+                    '``deriv_mode="looped"`` is deprecated in favor of'
+                    ' ``deriv_mode="batched"`` with ``jac_chunk_size=1``.',
+                    "yellow",
+                ),
+                DeprecationWarning,
+            )
         assert deriv_mode in {"auto", "batched", "blocked"}
         assert jac_chunk_size in ["auto", None] or isposint(jac_chunk_size)
 

From e0e513fbbd1868e3a8d7506806ea0a86c5c0b3d1 Mon Sep 17 00:00:00 2001
From: Dario Panici <dpanici@princeton.edu>
Date: Tue, 24 Sep 2024 12:15:02 -0400
Subject: [PATCH 42/46] change warnings to errors

---
 desc/objectives/objective_funs.py |  9 ++++-----
 tests/test_objective_funs.py      | 24 ++++++++++++++++++++++--
 2 files changed, 26 insertions(+), 7 deletions(-)

diff --git a/desc/objectives/objective_funs.py b/desc/objectives/objective_funs.py
index a507868f19..a49b3e2398 100644
--- a/desc/objectives/objective_funs.py
+++ b/desc/objectives/objective_funs.py
@@ -29,7 +29,6 @@
     isposint,
     setdefault,
     unique_list,
-    warnif,
 )
 
 
@@ -177,9 +176,9 @@ def build(self, use_jit=None, verbose=1):
         sub_obj_jac_chunk_sizes_are_ints = [
             isposint(obj._jac_chunk_size) for obj in self.objectives
         ]
-        warnif(
+        errorif(
             any(sub_obj_jac_chunk_sizes_are_ints) and self._deriv_mode != "blocked",
-            UserWarning,
+            ValueError,
             "'jac_chunk_size' was passed into one or more sub-objectives, but the"
             " ObjectiveFunction is  using 'batched' deriv_mode, so sub-objective "
             "'jac_chunk_size' will be ignored in favor of the ObjectiveFunction's "
@@ -187,10 +186,10 @@ def build(self, use_jit=None, verbose=1):
             " Specify 'blocked' deriv_mode if each sub-objective is desired to have a "
             "different 'jac_chunk_size' for its Jacobian computation.",
         )
-        warnif(
+        errorif(
             self._jac_chunk_size not in ["auto", None]
             and self._deriv_mode == "blocked",
-            UserWarning,
+            ValueError,
             "'jac_chunk_size' was passed into ObjectiveFunction, but the "
             "ObjectiveFunction is using 'blocked' deriv_mode, so sub-objective "
             "'jac_chunk_size' are used to compute each sub-objective's Jacobian, "
diff --git a/tests/test_objective_funs.py b/tests/test_objective_funs.py
index 25fe587a09..5a4a3d5529 100644
--- a/tests/test_objective_funs.py
+++ b/tests/test_objective_funs.py
@@ -1311,10 +1311,30 @@ def test_derivative_modes():
             deriv_mode="looped",
             use_jit=False,
         )
-    with pytest.warns(UserWarning, match="jac_chunk_size"):
+    with pytest.raises(ValueError, match="jac_chunk_size"):
         obj1.build()
-    with pytest.warns(UserWarning, match="jac_chunk_size"):
+    with pytest.raises(ValueError, match="jac_chunk_size"):
         obj2.build()
+    obj1 = ObjectiveFunction(
+        [
+            PlasmaVesselDistance(eq, surf),
+            MagneticWell(eq),
+            AspectRatio(eq),
+        ],
+        deriv_mode="batched",
+        use_jit=False,
+    )
+    obj2 = ObjectiveFunction(
+        [
+            PlasmaVesselDistance(eq, surf, jac_chunk_size=2),
+            MagneticWell(eq),
+            AspectRatio(eq, jac_chunk_size=None),
+        ],
+        deriv_mode="blocked",
+        use_jit=False,
+    )
+    obj1.build()
+    obj2.build()
     # check that default size works for blocked
     assert obj2.objectives[1]._jac_chunk_size is None
     assert obj2.objectives[2]._jac_chunk_size is None

From a6e29d61e39cc9efce6e088add371d0ad20907f8 Mon Sep 17 00:00:00 2001
From: Dario Panici <dpanici@princeton.edu>
Date: Tue, 24 Sep 2024 12:17:47 -0400
Subject: [PATCH 43/46] add disclaimer to ObjectiveFunction for auto chunk size
 on HPC CPU

---
 desc/objectives/objective_funs.py | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/desc/objectives/objective_funs.py b/desc/objectives/objective_funs.py
index a49b3e2398..9a540d0b5b 100644
--- a/desc/objectives/objective_funs.py
+++ b/desc/objectives/objective_funs.py
@@ -65,6 +65,10 @@ class ObjectiveFunction(IOAble):
         If None, it will use the largest size i.e ``obj.dim_x``.
         Defaults to ``chunk_size="auto"`` which will use a conservative
         chunk size based off of a heuristic estimate of the memory usage.
+        NOTE: When running on a CPU (not a GPU) on a HPC cluster, DESC is unable to
+        accurately estimate the available device memory, so the "auto" chunk_size
+        option will yield a larger chunk size than may be needed. It is recommended
+        to manually choose a chunk_size if an OOM error is experienced in this case.
 
     """
 

From 9100e7ca8d4910d60a872b3bda01b0fd1c42e0bb Mon Sep 17 00:00:00 2001
From: Dario Panici <dpanici@princeton.edu>
Date: Wed, 25 Sep 2024 12:03:30 -0400
Subject: [PATCH 44/46] update changelog

---
 CHANGELOG.md | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index a82e504bb4..757906ea50 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -15,8 +15,7 @@ to compute the toroidal flux when possible, as opposed to a 2D surface integral
 - Add ``jac_chunk_size`` to ``ObjectiveFunction`` and ``_Objective`` to control the above chunk size for the ``fwd`` mode Jacobian calculation
   - if ``None``, the chunk size is equal to ``dim_x``, so no chunking is done
   - if an ``int``, this is the chunk size to be used.
-  - if `"`auto`"` for the ``ObjectiveFunction``, will use a heuristic for the minimum ``jac_chunk_size`` needed to fit the jacobian calculation on the available device memory, according to the formula: ``min_jac_chunk_size = (desc_config.get("avail_mem") / estimated_memory_usage - 0.22)  / 0.85  * self.dim_x``
-  - if ``"auto"`` for an ``_Objective``, will use a conservative size of ``1000``
+  - if ``"auto"`` for the ``ObjectiveFunction``, will use a heuristic for the minimum ``jac_chunk_size`` needed to fit the jacobian calculation on the available device memory, according to the formula: ``min_jac_chunk_size = (desc_config.get("avail_mem") / estimated_memory_usage - 0.22)  / 0.85  * self.dim_x`` with ``estimated_memory_usage = 2.4e-7 * self.dim_f * self.dim_x + 1``
 - the ``ObjectiveFunction`` ``jac_chunk_size`` is used if ``deriv_mode="batched"``, and the ``_Objective`` ``jac_chunk_size`` will be used if ``deriv_mode="blocked"``
 
 Bug Fixes

From 15d95f16081379ddd186266c0ae4012b1eeddaf7 Mon Sep 17 00:00:00 2001
From: Dario Panici <dpanici@princeton.edu>
Date: Thu, 26 Sep 2024 08:42:11 -0400
Subject: [PATCH 45/46] correct changelog

---
 CHANGELOG.md     | 2 +-
 desc/batching.py | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 757906ea50..51d9eee577 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -15,7 +15,7 @@ to compute the toroidal flux when possible, as opposed to a 2D surface integral
 - Add ``jac_chunk_size`` to ``ObjectiveFunction`` and ``_Objective`` to control the above chunk size for the ``fwd`` mode Jacobian calculation
   - if ``None``, the chunk size is equal to ``dim_x``, so no chunking is done
   - if an ``int``, this is the chunk size to be used.
-  - if ``"auto"`` for the ``ObjectiveFunction``, will use a heuristic for the minimum ``jac_chunk_size`` needed to fit the jacobian calculation on the available device memory, according to the formula: ``min_jac_chunk_size = (desc_config.get("avail_mem") / estimated_memory_usage - 0.22)  / 0.85  * self.dim_x`` with ``estimated_memory_usage = 2.4e-7 * self.dim_f * self.dim_x + 1``
+  - if ``"auto"`` for the ``ObjectiveFunction``, will use a heuristic for the maximum ``jac_chunk_size`` needed to fit the jacobian calculation on the available device memory, according to the formula: ``max_jac_chunk_size = (desc_config.get("avail_mem") / estimated_memory_usage - 0.22)  / 0.85  * self.dim_x`` with ``estimated_memory_usage = 2.4e-7 * self.dim_f * self.dim_x + 1``
 - the ``ObjectiveFunction`` ``jac_chunk_size`` is used if ``deriv_mode="batched"``, and the ``_Objective`` ``jac_chunk_size`` will be used if ``deriv_mode="blocked"``
 
 Bug Fixes
diff --git a/desc/batching.py b/desc/batching.py
index 57d6ba9d14..7b2a18f7b6 100644
--- a/desc/batching.py
+++ b/desc/batching.py
@@ -216,7 +216,7 @@ def batched_vectorize(pyfunc, *, excluded=frozenset(), signature=None, chunk_siz
 
     Parameters
     ----------
-    pyfunc: function to vectorize.
+    pyfunc: callable,function to vectorize.
     excluded: optional set of integers representing positional arguments for
     which the function will not be vectorized. These will be passed directly
     to ``pyfunc`` unmodified.

From dd63b2bf1daf0c78a9c06d14b21341ea8491d937 Mon Sep 17 00:00:00 2001
From: Dario Panici <dpanici@princeton.edu>
Date: Thu, 26 Sep 2024 08:45:10 -0400
Subject: [PATCH 46/46] fix typo

---
 docs/adding_objectives.rst | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docs/adding_objectives.rst b/docs/adding_objectives.rst
index 346ba0db3b..95943f6108 100644
--- a/docs/adding_objectives.rst
+++ b/docs/adding_objectives.rst
@@ -225,7 +225,7 @@ when instantiating an Objective objective to modify the objective cost in order
 the objective to your desired purpose. For example, the DESC ``RotationalTransform``
 objective with ``target=iota_target`` by default forms the residual by taking the target
 and subtracting it from the profile at the points in the grid, resulting in a residual
-of the form :math:`\iota_{err} = \sum_{i} (\iota_i - iota_target)^2`, i.e. the residual
+of the form :math:`\iota_{err} = \sum_{i} (\iota_i - iota_{target})^2`, i.e. the residual
 is the sum of squared pointwise error between the current rotational transform profile
 and the target passed into the objective. If the desired objective instead is to
 optimize to target an average rotational transform of `iota_target`, we can adapt the