diff --git a/CHANGELOG.md b/CHANGELOG.md
index f486c79bd..77497379f 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -13,12 +13,21 @@ New Features
 to compute the toroidal flux when possible, as opposed to a 2D surface integral of the magnetic field dotted with ``n_zeta``.
 - Allow specification of Nyquist spectrum maximum modenumbers when using ``VMECIO.save`` to save a DESC .h5 file as a VMEC-format wout file
 - Added and tested infinite-n ideal-ballooning stability solver implemented as a part of the BallooningStability Objective. DESC can use reverse-mode AD to now optimize equilibria against infinite-n ideal ballooning modes.
+- Add ``jac_chunk_size`` to ``ObjectiveFunction`` and ``_Objective`` to control the above chunk size for the ``fwd`` mode Jacobian calculation
+  - if ``None``, the chunk size is equal to ``dim_x``, so no chunking is done
+  - if an ``int``, this is the chunk size to be used.
+  - if ``"auto"`` for the ``ObjectiveFunction``, will use a heuristic for the maximum ``jac_chunk_size`` needed to fit the jacobian calculation on the available device memory, according to the formula: ``max_jac_chunk_size = (desc_config.get("avail_mem") / estimated_memory_usage - 0.22)  / 0.85  * self.dim_x`` with ``estimated_memory_usage = 2.4e-7 * self.dim_f * self.dim_x + 1``
+- the ``ObjectiveFunction`` ``jac_chunk_size`` is used if ``deriv_mode="batched"``, and the ``_Objective`` ``jac_chunk_size`` will be used if ``deriv_mode="blocked"``
+
 
 Bug Fixes
 
 - Fixes bugs that occur when saving asymmetric equilibria as wout files
 - Fixes bug that occurs when using ``VMECIO.plot_vmec_comparison`` to compare to an asymmetric wout file
 
+Deprecations
+
+- ``deriv_mode="looped"`` in ``ObjectiveFunction`` is deprecated and will be removed in a future version in favored of ``deriv_mode="batched"`` with ``jac_chunk_size=1``,
 
 
 v0.12.1
diff --git a/desc/batching.py b/desc/batching.py
new file mode 100644
index 000000000..7b2a18f7b
--- /dev/null
+++ b/desc/batching.py
@@ -0,0 +1,322 @@
+"""Utility functions for the ``batched_vectorize`` function."""
+
+import functools
+from typing import Callable, Optional
+
+from desc.backend import jax, jnp
+
+if jax.__version_info__ >= (0, 4, 16):
+    from jax.extend import linear_util as lu
+else:
+    from jax import linear_util as lu
+
+from jax._src.numpy.vectorize import (
+    _apply_excluded,
+    _check_output_dims,
+    _parse_gufunc_signature,
+    _parse_input_dimensions,
+)
+
+# The following section of this code is derived from the NetKet project
+# https://github.com/netket/netket/blob/9881c9fb217a2ac4dc9274a054bf6e6a2993c519/
+# netket/jax/_chunk_utils.py
+#
+# The original copyright notice is as follows
+# Copyright 2021 The NetKet Authors - All rights reserved.
+# Licensed under the Apache License, Version 2.0 (the "License");
+
+
+def _treeify(f):
+    def _f(x, *args, **kwargs):
+        return jax.tree_util.tree_map(lambda y: f(y, *args, **kwargs), x)
+
+    return _f
+
+
+@_treeify
+def _unchunk(x):
+    return x.reshape((-1,) + x.shape[2:])
+
+
+@_treeify
+def _chunk(x, chunk_size=None):
+    # chunk_size=None -> add just a dummy chunk dimension,
+    #  same as np.expand_dims(x, 0)
+    if x.ndim == 0:
+        raise ValueError("x cannot be chunked as it has 0 dimensions.")
+    n = x.shape[0]
+    if chunk_size is None:
+        chunk_size = n
+
+    n_chunks, residual = divmod(n, chunk_size)
+    if residual != 0:
+        raise ValueError(
+            "The first dimension of x must be divisible by chunk_size."
+            + f"\n        Got x.shape={x.shape} but chunk_size={chunk_size}."
+        )
+    return x.reshape((n_chunks, chunk_size) + x.shape[1:])
+
+
+####
+
+# The following section of this code is derived from the NetKet project
+# https://github.com/netket/netket/blob/9881c9fb217a2ac4dc9274a054bf6e6a2993c519/
+# netket/jax/_scanmap.py
+
+
+def scan_append(f, x):
+    """Evaluate f element by element in x while appending the results.
+
+    Parameters
+    ----------
+    f: a function that takes elements of the leading dimension of x
+    x: a pytree where each leaf array has the same leading dimension
+
+    Returns
+    -------
+    a (pytree of) array(s) with leading dimension same as x,
+    containing the evaluation of f at each element in x
+    """
+    carry_init = True
+
+    def f_(carry, x):
+        return False, f(x)
+
+    _, res_append = jax.lax.scan(f_, carry_init, x, unroll=1)
+    return res_append
+
+
+# TODO in_axes a la vmap?
+def _scanmap(fun, scan_fun, argnums=0):
+    """A helper function to wrap f with a scan_fun."""
+
+    def f_(*args, **kwargs):
+        f = lu.wrap_init(fun, kwargs)
+        f_partial, dyn_args = jax.api_util.argnums_partial(
+            f, argnums, args, require_static_args_hashable=False
+        )
+        return scan_fun(lambda x: f_partial.call_wrapped(*x), dyn_args)
+
+    return f_
+
+
+# The following section of this code is derived from the NetKet project
+# https://github.com/netket/netket/blob/9881c9fb217a2ac4dc9274a054bf6e6a2993c519/
+# netket/jax/_vmap_chunked.py
+
+
+def _eval_fun_in_chunks(vmapped_fun, chunk_size, argnums, *args, **kwargs):
+    n_elements = jax.tree_util.tree_leaves(args[argnums[0]])[0].shape[0]
+    n_chunks, n_rest = divmod(n_elements, chunk_size)
+
+    if n_chunks == 0 or chunk_size >= n_elements:
+        y = vmapped_fun(*args, **kwargs)
+    else:
+        # split inputs
+        def _get_chunks(x):
+            x_chunks = jax.tree_util.tree_map(
+                lambda x_: x_[: n_elements - n_rest, ...], x
+            )
+            x_chunks = _chunk(x_chunks, chunk_size)
+            return x_chunks
+
+        def _get_rest(x):
+            x_rest = jax.tree_util.tree_map(
+                lambda x_: x_[n_elements - n_rest :, ...], x
+            )
+            return x_rest
+
+        args_chunks = [
+            _get_chunks(a) if i in argnums else a for i, a in enumerate(args)
+        ]
+        args_rest = [_get_rest(a) if i in argnums else a for i, a in enumerate(args)]
+
+        y_chunks = _unchunk(
+            _scanmap(vmapped_fun, scan_append, argnums)(*args_chunks, **kwargs)
+        )
+
+        if n_rest == 0:
+            y = y_chunks
+        else:
+            y_rest = vmapped_fun(*args_rest, **kwargs)
+            y = jax.tree_util.tree_map(
+                lambda y1, y2: jnp.concatenate((y1, y2)), y_chunks, y_rest
+            )
+    return y
+
+
+def _chunk_vmapped_function(
+    vmapped_fun: Callable,
+    chunk_size: Optional[int],
+    argnums=0,
+) -> Callable:
+    """Takes a vmapped function and computes it in chunks."""
+    if chunk_size is None:
+        return vmapped_fun
+
+    if isinstance(argnums, int):
+        argnums = (argnums,)
+    return functools.partial(_eval_fun_in_chunks, vmapped_fun, chunk_size, argnums)
+
+
+def _parse_in_axes(in_axes):
+    if isinstance(in_axes, int):
+        in_axes = (in_axes,)
+
+    if not set(in_axes).issubset((0, None)):
+        raise NotImplementedError("Only in_axes 0/None are currently supported")
+
+    argnums = tuple(
+        map(lambda ix: ix[0], filter(lambda ix: ix[1] is not None, enumerate(in_axes)))
+    )
+    return in_axes, argnums
+
+
+def vmap_chunked(
+    f: Callable,
+    in_axes=0,
+    *,
+    chunk_size: Optional[int],
+) -> Callable:
+    """Behaves like jax.vmap but uses scan to chunk the computations in smaller chunks.
+
+    Parameters
+    ----------
+    f: The function to be vectorised.
+    in_axes: The axes that should be scanned along. Only supports `0` or `None`
+    chunk_size: The maximum size of the chunks to be used. If it is `None`,
+        chunking is disabled
+
+
+    Returns
+    -------
+    f: A vectorised and chunked function
+    """
+    in_axes, argnums = _parse_in_axes(in_axes)
+    vmapped_fun = jax.vmap(f, in_axes=in_axes)
+    return _chunk_vmapped_function(vmapped_fun, chunk_size, argnums)
+
+
+def batched_vectorize(pyfunc, *, excluded=frozenset(), signature=None, chunk_size=None):
+    """Define a vectorized function with broadcasting and batching.
+
+    :func:`vectorize` is a convenience wrapper for defining vectorized
+    functions with broadcasting, in the style of NumPy's
+    `generalized universal functions
+    <https://numpy.org/doc/stable/reference/c-api/generalized-ufuncs.html>`_.
+    It allows for defining functions that are automatically repeated across
+    any leading dimensions, without the implementation of the function needing to
+    be concerned about how to handle higher dimensional inputs.
+
+    :func:`jax.numpy.vectorize` has the same interface as
+    :class:`numpy.vectorize`, but it is syntactic sugar for an auto-batching
+    transformation (:func:`vmap`) rather than a Python loop. This should be
+    considerably more efficient, but the implementation must be written in terms
+    of functions that act on JAX arrays.
+
+    Parameters
+    ----------
+    pyfunc: callable,function to vectorize.
+    excluded: optional set of integers representing positional arguments for
+    which the function will not be vectorized. These will be passed directly
+    to ``pyfunc`` unmodified.
+    signature: optional generalized universal function signature, e.g.,
+    ``(m,n),(n)->(m)`` for vectorized matrix-vector multiplication. If
+    provided, ``pyfunc`` will be called with (and expected to return) arrays
+    with shapes given by the size of corresponding core dimensions. By
+    default, pyfunc is assumed to take scalars arrays as input and output.
+    chunk_size: the size of the batches to pass to vmap. If None, defaults to
+    the largest possible chunk_size (like the default behavior of ``vectorize11)
+
+    Returns
+    -------
+    Batch-vectorized version of the given function.
+
+    """
+    if any(not isinstance(exclude, (str, int)) for exclude in excluded):
+        raise TypeError(
+            "jax.numpy.vectorize can only exclude integer or string arguments, "
+            "but excluded={!r}".format(excluded)
+        )
+    if any(isinstance(e, int) and e < 0 for e in excluded):
+        raise ValueError(f"excluded={excluded!r} contains negative numbers")
+
+    @functools.wraps(pyfunc)
+    def wrapped(*args, **kwargs):
+        error_context = (
+            "on vectorized function with excluded={!r} and "
+            "signature={!r}".format(excluded, signature)
+        )
+        excluded_func, args, kwargs = _apply_excluded(pyfunc, excluded, args, kwargs)
+
+        if signature is not None:
+            input_core_dims, output_core_dims = _parse_gufunc_signature(signature)
+        else:
+            input_core_dims = [()] * len(args)
+            output_core_dims = None
+
+        none_args = {i for i, arg in enumerate(args) if arg is None}
+        if any(none_args):
+            if any(input_core_dims[i] != () for i in none_args):
+                raise ValueError(
+                    f"Cannot pass None at locations {none_args} with {signature=}"
+                )
+            excluded_func, args, _ = _apply_excluded(excluded_func, none_args, args, {})
+            input_core_dims = [
+                dim for i, dim in enumerate(input_core_dims) if i not in none_args
+            ]
+
+        args = tuple(map(jnp.asarray, args))
+
+        broadcast_shape, dim_sizes = _parse_input_dimensions(
+            args, input_core_dims, error_context
+        )
+
+        checked_func = _check_output_dims(
+            excluded_func, dim_sizes, output_core_dims, error_context
+        )
+
+        # Rather than broadcasting all arguments to full broadcast shapes, prefer
+        # expanding dimensions using vmap. By pushing broadcasting
+        # into vmap, we can make use of more efficient batching rules for
+        # primitives where only some arguments are batched (e.g., for
+        # lax_linalg.triangular_solve), and avoid instantiating large broadcasted
+        # arrays.
+
+        squeezed_args = []
+        rev_filled_shapes = []
+
+        for arg, core_dims in zip(args, input_core_dims):
+            noncore_shape = arg.shape[: arg.ndim - len(core_dims)]
+
+            pad_ndim = len(broadcast_shape) - len(noncore_shape)
+            filled_shape = pad_ndim * (1,) + noncore_shape
+            rev_filled_shapes.append(filled_shape[::-1])
+
+            squeeze_indices = tuple(
+                i for i, size in enumerate(noncore_shape) if size == 1
+            )
+            squeezed_arg = jnp.squeeze(arg, axis=squeeze_indices)
+            squeezed_args.append(squeezed_arg)
+
+        vectorized_func = checked_func
+        dims_to_expand = []
+        for negdim, axis_sizes in enumerate(zip(*rev_filled_shapes)):
+            in_axes = tuple(None if size == 1 else 0 for size in axis_sizes)
+            if all(axis is None for axis in in_axes):
+                dims_to_expand.append(len(broadcast_shape) - 1 - negdim)
+            else:
+                # change the vmap here to chunked_vmap
+                vectorized_func = vmap_chunked(
+                    vectorized_func, in_axes, chunk_size=chunk_size
+                )
+        result = vectorized_func(*squeezed_args)
+
+        if not dims_to_expand:
+            return result
+        elif isinstance(result, tuple):
+            return tuple(jnp.expand_dims(r, axis=dims_to_expand) for r in result)
+        else:
+            return jnp.expand_dims(result, axis=dims_to_expand)
+
+    return wrapped
diff --git a/desc/continuation.py b/desc/continuation.py
index 8be7e33cc..b5274171f 100644
--- a/desc/continuation.py
+++ b/desc/continuation.py
@@ -29,6 +29,7 @@ def _solve_axisym(
     maxiter=100,
     verbose=1,
     checkpoint_path=None,
+    jac_chunk_size="auto",
 ):
     """Solve initial axisymmetric case with adaptive step sizing."""
     timer = Timer()
@@ -99,7 +100,9 @@ def _solve_axisym(
             surf_i = surf_i2
 
         constraints_i = get_fixed_boundary_constraints(eq=eqi)
-        objective_i = get_equilibrium_objective(eq=eqi, mode=objective)
+        objective_i = get_equilibrium_objective(
+            eq=eqi, mode=objective, jac_chunk_size=jac_chunk_size
+        )
 
         if verbose:
             _print_iteration_summary(
@@ -196,6 +199,7 @@ def _add_pressure(
     maxiter=100,
     verbose=1,
     checkpoint_path=None,
+    jac_chunk_size="auto",
 ):
     """Add pressure with adaptive step sizing."""
     timer = Timer()
@@ -224,7 +228,9 @@ def _add_pressure(
         pres_ratio += pres_step
 
         constraints_i = get_fixed_boundary_constraints(eq=eqi)
-        objective_i = get_equilibrium_objective(eq=eqi, mode=objective)
+        objective_i = get_equilibrium_objective(
+            eq=eqi, mode=objective, jac_chunk_size=jac_chunk_size
+        )
 
         if verbose:
             _print_iteration_summary(
@@ -324,6 +330,7 @@ def _add_shaping(
     maxiter=100,
     verbose=1,
     checkpoint_path=None,
+    jac_chunk_size="auto",
 ):
     """Add 3D shaping with adaptive step sizing."""
     timer = Timer()
@@ -353,7 +360,9 @@ def _add_shaping(
         bdry_ratio += bdry_step
 
         constraints_i = get_fixed_boundary_constraints(eq=eqi)
-        objective_i = get_equilibrium_objective(eq=eqi, mode=objective)
+        objective_i = get_equilibrium_objective(
+            eq=eqi, mode=objective, jac_chunk_size=jac_chunk_size
+        )
 
         if verbose:
             _print_iteration_summary(
@@ -451,6 +460,7 @@ def solve_continuation_automatic(  # noqa: C901
     maxiter=100,
     verbose=1,
     checkpoint_path=None,
+    jac_chunk_size="auto",
     **kwargs,
 ):
     """Solve for an equilibrium using an automatic continuation method.
@@ -529,6 +539,7 @@ def solve_continuation_automatic(  # noqa: C901
         maxiter,
         verbose,
         checkpoint_path,
+        jac_chunk_size=jac_chunk_size,
     )
 
     # for zero current we want to do shaping before pressure to avoid having a
@@ -547,6 +558,7 @@ def solve_continuation_automatic(  # noqa: C901
             maxiter,
             verbose,
             checkpoint_path,
+            jac_chunk_size=jac_chunk_size,
         )
 
         eqfam = _add_pressure(
@@ -562,6 +574,7 @@ def solve_continuation_automatic(  # noqa: C901
             maxiter,
             verbose,
             checkpoint_path,
+            jac_chunk_size=jac_chunk_size,
         )
 
     # for other cases such as fixed iota or nonzero current we do pressure first
@@ -580,6 +593,7 @@ def solve_continuation_automatic(  # noqa: C901
             maxiter,
             verbose,
             checkpoint_path,
+            jac_chunk_size=jac_chunk_size,
         )
 
         eqfam = _add_shaping(
@@ -595,6 +609,7 @@ def solve_continuation_automatic(  # noqa: C901
             maxiter,
             verbose,
             checkpoint_path,
+            jac_chunk_size=jac_chunk_size,
         )
     eq.params_dict = eqfam[-1].params_dict
     eqfam[-1] = eq
@@ -625,6 +640,7 @@ def solve_continuation(  # noqa: C901
     maxiter=100,
     verbose=1,
     checkpoint_path=None,
+    jac_chunk_size="auto",
 ):
     """Solve for an equilibrium by continuation method.
 
@@ -685,7 +701,9 @@ def solve_continuation(  # noqa: C901
 
     if not isinstance(optimizer, Optimizer):
         optimizer = Optimizer(optimizer)
-    objective_i = get_equilibrium_objective(eq=eqfam[0], mode=objective)
+    objective_i = get_equilibrium_objective(
+        eq=eqfam[0], mode=objective, jac_chunk_size=jac_chunk_size
+    )
     constraints_i = get_fixed_boundary_constraints(eq=eqfam[0])
 
     ii = 0
@@ -732,7 +750,9 @@ def solve_continuation(  # noqa: C901
                 print("Perturbing equilibrium")
             # TODO: pass Jx if available
             eqp = eqfam[ii - 1].copy()
-            objective_i = get_equilibrium_objective(eq=eqp, mode=objective)
+            objective_i = get_equilibrium_objective(
+                eq=eqp, mode=objective, jac_chunk_size=jac_chunk_size
+            )
             constraints_i = get_fixed_boundary_constraints(eq=eqp)
             eqp.change_resolution(**eqi.resolution)
             eqp.perturb(
@@ -751,8 +771,9 @@ def solve_continuation(  # noqa: C901
             stop = True
 
         if not stop:
-            # TODO: add ability to rebind objectives
-            objective_i = get_equilibrium_objective(eq=eqi, mode=objective)
+            objective_i = get_equilibrium_objective(
+                eq=eqi, mode=objective, jac_chunk_size=jac_chunk_size
+            )
             constraints_i = get_fixed_boundary_constraints(eq=eqi)
             eqi.solve(
                 optimizer=optimizer,
diff --git a/desc/derivatives.py b/desc/derivatives.py
index cf9f6d707..684ea37d4 100644
--- a/desc/derivatives.py
+++ b/desc/derivatives.py
@@ -5,7 +5,7 @@
 import numpy as np
 from termcolor import colored
 
-from desc.backend import fori_loop, jnp, put, use_jax
+from desc.backend import jnp, put, use_jax
 
 if use_jax:
     import jax
@@ -315,23 +315,8 @@ def compute_jvp3(cls, fun, argnum1, argnum2, argnum3, v1, v2, v3, *args, **kwarg
     def _compute_jvp(self, v, *args, **kwargs):
         return self.compute_jvp(self._fun, self.argnum, v, *args, **kwargs)
 
-    def _jac_looped(self, *args, **kwargs):
-
-        n = args[self._argnum].size
-        shp = jax.eval_shape(self._fun, *args).shape
-        I = jnp.eye(n)
-        J = jnp.zeros((*shp, n)).T
-
-        def body(i, J):
-            tangents = I[i]
-            Ji = self._compute_jvp(tangents, *args, **kwargs)
-            J = put(J, i, Ji.T)
-            return J
-
-        return fori_loop(0, n, body, J).T
-
     def _set_mode(self, mode) -> None:
-        if mode not in ["fwd", "rev", "grad", "hess", "jvp", "looped"]:
+        if mode not in ["fwd", "rev", "grad", "hess", "jvp"]:
             raise ValueError(
                 colored("invalid mode option for automatic differentiation", "red")
             )
@@ -347,8 +332,6 @@ def _set_mode(self, mode) -> None:
             self._compute = jax.hessian(self._fun, self._argnum)
         elif self._mode == "jvp":
             self._compute = self._compute_jvp
-        elif self._mode == "looped":
-            self._compute = self._jac_looped
 
 
 class FiniteDiffDerivative(_Derivative):
diff --git a/desc/objectives/_bootstrap.py b/desc/objectives/_bootstrap.py
index 5887da06f..c76a003b2 100644
--- a/desc/objectives/_bootstrap.py
+++ b/desc/objectives/_bootstrap.py
@@ -66,6 +66,18 @@ class BootstrapRedlConsistency(_Objective):
         or quasi-axisymmetry; set to +/-NFP for quasi-helical symmetry.
     name : str, optional
         Name of the objective function.
+    jac_chunk_size : int, optional
+        Will calculate the Jacobian for this objective ``jac_chunk_size``
+        columns at a time, instead of all at once. The memory usage of the
+        Jacobian calculation is roughly ``memory usage = m0 + m1*jac_chunk_size``:
+        the smaller the chunk size, the less memory the Jacobian calculation
+        will require (with some baseline memory usage). The time to compute the
+        Jacobian is roughly ``t=t0 +t1/jac_chunk_size``, so the larger the
+        ``jac_chunk_size``, the faster the calculation takes, at the cost of
+        requiring more memory. A ``jac_chunk_size`` of 1 corresponds to the least
+        memory intensive, but slowest method of calculating the Jacobian.
+        If None, it will use the largest size i.e ``obj.dim_x``.
+
     """
 
     _coordinates = "r"
@@ -85,6 +97,7 @@ def __init__(
         grid=None,
         helicity=(1, 0),
         name="Bootstrap current self-consistency (Redl)",
+        jac_chunk_size=None,
     ):
         if target is None and bounds is None:
             target = 0
@@ -102,6 +115,7 @@ def __init__(
             loss_function=loss_function,
             deriv_mode=deriv_mode,
             name=name,
+            jac_chunk_size=jac_chunk_size,
         )
 
     def build(self, use_jit=True, verbose=1):
diff --git a/desc/objectives/_coils.py b/desc/objectives/_coils.py
index 04f295491..40c53bd85 100644
--- a/desc/objectives/_coils.py
+++ b/desc/objectives/_coils.py
@@ -59,6 +59,17 @@ class _CoilObjective(_Objective):
         If a list, must have the same structure as coil.
     name : str, optional
         Name of the objective function.
+    jac_chunk_size : int , optional
+        Will calculate the Jacobian for this objective ``jac_chunk_size``
+        columns at a time, instead of all at once. The memory usage of the
+        Jacobian calculation is roughly ``memory usage = m0 + m1*jac_chunk_size``:
+        the smaller the chunk size, the less memory the Jacobian calculation
+        will require (with some baseline memory usage). The time to compute the
+        Jacobian is roughly ``t=t0 +t1/jac_chunk_size``, so the larger the
+        ``jac_chunk_size``, the faster the calculation takes, at the cost of
+        requiring more memory. A ``jac_chunk_size`` of 1 corresponds to the least
+        memory intensive, but slowest method of calculating the Jacobian.
+        If None, it will use the largest size i.e ``obj.dim_x``.
 
     """
 
@@ -75,6 +86,7 @@ def __init__(
         deriv_mode="auto",
         grid=None,
         name=None,
+        jac_chunk_size=None,
     ):
         self._grid = grid
         self._data_keys = data_keys
@@ -89,6 +101,7 @@ def __init__(
             loss_function=loss_function,
             deriv_mode=deriv_mode,
             name=name,
+            jac_chunk_size=jac_chunk_size,
         )
 
     def build(self, use_jit=True, verbose=1):  # noqa:C901
@@ -254,6 +267,17 @@ class CoilLength(_CoilObjective):
         Defaults to ``LinearGrid(N=2 * coil.N + 5)``
     name : str, optional
         Name of the objective function.
+    jac_chunk_size : int , optional
+        Will calculate the Jacobian for this objective ``jac_chunk_size``
+        columns at a time, instead of all at once. The memory usage of the
+        Jacobian calculation is roughly ``memory usage = m0 + m1*jac_chunk_size``:
+        the smaller the chunk size, the less memory the Jacobian calculation
+        will require (with some baseline memory usage). The time to compute the
+        Jacobian is roughly ``t=t0 +t1/jac_chunk_size``, so the larger the
+        ``jac_chunk_size``, the faster the calculation takes, at the cost of
+        requiring more memory. A ``jac_chunk_size`` of 1 corresponds to the least
+        memory intensive, but slowest method of calculating the Jacobian.
+        If None, it will use the largest size i.e ``obj.dim_x``.
 
     """
 
@@ -273,6 +297,7 @@ def __init__(
         deriv_mode="auto",
         grid=None,
         name="coil length",
+        jac_chunk_size=None,
     ):
         if target is None and bounds is None:
             target = 2 * np.pi
@@ -289,6 +314,7 @@ def __init__(
             deriv_mode=deriv_mode,
             grid=grid,
             name=name,
+            jac_chunk_size=jac_chunk_size,
         )
 
     def build(self, use_jit=True, verbose=1):
@@ -379,6 +405,17 @@ class CoilCurvature(_CoilObjective):
         Defaults to ``LinearGrid(N=2 * coil.N + 5)``
     name : str, optional
         Name of the objective function.
+    jac_chunk_size : int , optional
+        Will calculate the Jacobian for this objective ``jac_chunk_size``
+        columns at a time, instead of all at once. The memory usage of the
+        Jacobian calculation is roughly ``memory usage = m0 + m1*jac_chunk_size``:
+        the smaller the chunk size, the less memory the Jacobian calculation
+        will require (with some baseline memory usage). The time to compute the
+        Jacobian is roughly ``t=t0 +t1/jac_chunk_size``, so the larger the
+        ``jac_chunk_size``, the faster the calculation takes, at the cost of
+        requiring more memory. A ``jac_chunk_size`` of 1 corresponds to the least
+        memory intensive, but slowest method of calculating the Jacobian.
+        If None, it will use the largest size i.e ``obj.dim_x``.
 
     """
 
@@ -398,6 +435,7 @@ def __init__(
         deriv_mode="auto",
         grid=None,
         name="coil curvature",
+        jac_chunk_size=None,
     ):
         if target is None and bounds is None:
             bounds = (0, 1)
@@ -414,6 +452,7 @@ def __init__(
             deriv_mode=deriv_mode,
             grid=grid,
             name=name,
+            jac_chunk_size=jac_chunk_size,
         )
 
     def build(self, use_jit=True, verbose=1):
@@ -499,6 +538,17 @@ class CoilTorsion(_CoilObjective):
         Defaults to ``LinearGrid(N=2 * coil.N + 5)``
     name : str, optional
         Name of the objective function.
+    jac_chunk_size : int , optional
+        Will calculate the Jacobian for this objective ``jac_chunk_size``
+        columns at a time, instead of all at once. The memory usage of the
+        Jacobian calculation is roughly ``memory usage = m0 + m1*jac_chunk_size``:
+        the smaller the chunk size, the less memory the Jacobian calculation
+        will require (with some baseline memory usage). The time to compute the
+        Jacobian is roughly ``t=t0 +t1/jac_chunk_size``, so the larger the
+        ``jac_chunk_size``, the faster the calculation takes, at the cost of
+        requiring more memory. A ``jac_chunk_size`` of 1 corresponds to the least
+        memory intensive, but slowest method of calculating the Jacobian.
+        If None, it will use the largest size i.e ``obj.dim_x``.
 
     """
 
@@ -518,6 +568,7 @@ def __init__(
         deriv_mode="auto",
         grid=None,
         name="coil torsion",
+        jac_chunk_size=None,
     ):
         if target is None and bounds is None:
             target = 0
@@ -534,6 +585,7 @@ def __init__(
             deriv_mode=deriv_mode,
             grid=grid,
             name=name,
+            jac_chunk_size=jac_chunk_size,
         )
 
     def build(self, use_jit=True, verbose=1):
@@ -619,6 +671,17 @@ class CoilCurrentLength(CoilLength):
         Defaults to ``LinearGrid(N=2 * coil.N + 5)``
     name : str, optional
         Name of the objective function.
+    jac_chunk_size : int , optional
+        Will calculate the Jacobian for this objective ``jac_chunk_size``
+        columns at a time, instead of all at once. The memory usage of the
+        Jacobian calculation is roughly ``memory usage = m0 + m1*jac_chunk_size``:
+        the smaller the chunk size, the less memory the Jacobian calculation
+        will require (with some baseline memory usage). The time to compute the
+        Jacobian is roughly ``t=t0 +t1/jac_chunk_size``, so the larger the
+        ``jac_chunk_size``, the faster the calculation takes, at the cost of
+        requiring more memory. A ``jac_chunk_size`` of 1 corresponds to the least
+        memory intensive, but slowest method of calculating the Jacobian.
+        If None, it will use the largest size i.e ``obj.dim_x``.
 
     """
 
@@ -638,6 +701,7 @@ def __init__(
         deriv_mode="auto",
         grid=None,
         name="coil current length",
+        jac_chunk_size=None,
     ):
         if target is None and bounds is None:
             target = 0
@@ -653,6 +717,7 @@ def __init__(
             deriv_mode=deriv_mode,
             grid=grid,
             name=name,
+            jac_chunk_size=jac_chunk_size,
         )
 
     def build(self, use_jit=True, verbose=1):
@@ -747,6 +812,17 @@ class CoilSetMinDistance(_Objective):
         If a list, must have the same structure as coils.
     name : str, optional
         Name of the objective function.
+    jac_chunk_size : int , optional
+        Will calculate the Jacobian for this objective ``jac_chunk_size``
+        columns at a time, instead of all at once. The memory usage of the
+        Jacobian calculation is roughly ``memory usage = m0 + m1*jac_chunk_size``:
+        the smaller the chunk size, the less memory the Jacobian calculation
+        will require (with some baseline memory usage). The time to compute the
+        Jacobian is roughly ``t=t0 +t1/jac_chunk_size``, so the larger the
+        ``jac_chunk_size``, the faster the calculation takes, at the cost of
+        requiring more memory. A ``jac_chunk_size`` of 1 corresponds to the least
+        memory intensive, but slowest method of calculating the Jacobian.
+        If None, it will use the largest size i.e ``obj.dim_x``.
 
     """
 
@@ -766,6 +842,7 @@ def __init__(
         deriv_mode="auto",
         grid=None,
         name="coil-coil minimum distance",
+        jac_chunk_size=None,
     ):
         from desc.coils import CoilSet
 
@@ -787,6 +864,7 @@ def __init__(
             loss_function=loss_function,
             deriv_mode=deriv_mode,
             name=name,
+            jac_chunk_size=jac_chunk_size,
         )
 
     def build(self, use_jit=True, verbose=1):
@@ -921,6 +999,17 @@ class PlasmaCoilSetMinDistance(_Objective):
         False by default, so that self.things = [coil, eq].
     name : str, optional
         Name of the objective function.
+    jac_chunk_size : int , optional
+        Will calculate the Jacobian for this objective ``jac_chunk_size``
+        columns at a time, instead of all at once. The memory usage of the
+        Jacobian calculation is roughly ``memory usage = m0 + m1*jac_chunk_size``:
+        the smaller the chunk size, the less memory the Jacobian calculation
+        will require (with some baseline memory usage). The time to compute the
+        Jacobian is roughly ``t=t0 +t1/jac_chunk_size``, so the larger the
+        ``jac_chunk_size``, the faster the calculation takes, at the cost of
+        requiring more memory. A ``jac_chunk_size`` of 1 corresponds to the least
+        memory intensive, but slowest method of calculating the Jacobian.
+        If None, it will use the largest size i.e ``obj.dim_x``.
 
     """
 
@@ -944,6 +1033,7 @@ def __init__(
         eq_fixed=False,
         coils_fixed=False,
         name="plasma-coil minimum distance",
+        jac_chunk_size=None,
     ):
         if target is None and bounds is None:
             bounds = (1, np.inf)
@@ -969,6 +1059,7 @@ def __init__(
             loss_function=loss_function,
             deriv_mode=deriv_mode,
             name=name,
+            jac_chunk_size=jac_chunk_size,
         )
 
     def build(self, use_jit=True, verbose=1):
@@ -1151,6 +1242,17 @@ class QuadraticFlux(_Objective):
         plasma currents) is set to zero.
     name : str
         Name of the objective function.
+    jac_chunk_size : int , optional
+        Will calculate the Jacobian for this objective ``jac_chunk_size``
+        columns at a time, instead of all at once. The memory usage of the
+        Jacobian calculation is roughly ``memory usage = m0 + m1*jac_chunk_size``:
+        the smaller the chunk size, the less memory the Jacobian calculation
+        will require (with some baseline memory usage). The time to compute the
+        Jacobian is roughly ``t=t0 +t1/jac_chunk_size``, so the larger the
+        ``jac_chunk_size``, the faster the calculation takes, at the cost of
+        requiring more memory. A ``jac_chunk_size`` of 1 corresponds to the least
+        memory intensive, but slowest method of calculating the Jacobian.
+        If None, it will use the largest size i.e ``obj.dim_x``.
 
     """
 
@@ -1174,6 +1276,7 @@ def __init__(
         field_grid=None,
         vacuum=False,
         name="Quadratic flux",
+        jac_chunk_size=None,
     ):
         if target is None and bounds is None:
             target = 0
@@ -1192,6 +1295,7 @@ def __init__(
             normalize=normalize,
             normalize_target=normalize_target,
             name=name,
+            jac_chunk_size=jac_chunk_size,
         )
 
     def build(self, use_jit=True, verbose=1):
@@ -1361,6 +1465,17 @@ class ToroidalFlux(_Objective):
         zeta=jnp.array(0.0), NFP=eq.NFP).
     name : str, optional
         Name of the objective function.
+    jac_chunk_size : int , optional
+        Will calculate the Jacobian for this objective ``jac_chunk_size``
+        columns at a time, instead of all at once. The memory usage of the
+        Jacobian calculation is roughly ``memory usage = m0 + m1*jac_chunk_size``:
+        the smaller the chunk size, the less memory the Jacobian calculation
+        will require (with some baseline memory usage). The time to compute the
+        Jacobian is roughly ``t=t0 +t1/jac_chunk_size``, so the larger the
+        ``jac_chunk_size``, the faster the calculation takes, at the cost of
+        requiring more memory. A ``jac_chunk_size`` of 1 corresponds to the least
+        memory intensive, but slowest method of calculating the Jacobian.
+        If None, it will use the largest size i.e ``obj.dim_x``.
 
 
     """
@@ -1383,6 +1498,7 @@ def __init__(
         field_grid=None,
         eval_grid=None,
         name="toroidal-flux",
+        jac_chunk_size=None,
     ):
         if target is None and bounds is None:
             target = eq.Psi
@@ -1402,6 +1518,7 @@ def __init__(
             loss_function=loss_function,
             deriv_mode=deriv_mode,
             name=name,
+            jac_chunk_size=jac_chunk_size,
         )
 
     def build(self, use_jit=True, verbose=1):
diff --git a/desc/objectives/_equilibrium.py b/desc/objectives/_equilibrium.py
index dc2f4bbb2..0124a84b1 100644
--- a/desc/objectives/_equilibrium.py
+++ b/desc/objectives/_equilibrium.py
@@ -63,6 +63,17 @@ class ForceBalance(_Objective):
         Defaults to ``ConcentricGrid(eq.L_grid, eq.M_grid, eq.N_grid)``
     name : str, optional
         Name of the objective function.
+    jac_chunk_size : int , optional
+        Will calculate the Jacobian for this objective ``jac_chunk_size``
+        columns at a time, instead of all at once. The memory usage of the
+        Jacobian calculation is roughly ``memory usage = m0 + m1*jac_chunk_size``:
+        the smaller the chunk size, the less memory the Jacobian calculation
+        will require (with some baseline memory usage). The time to compute the
+        Jacobian is roughly ``t=t0 +t1/jac_chunk_size``, so the larger the
+        ``jac_chunk_size``, the faster the calculation takes, at the cost of
+        requiring more memory. A ``jac_chunk_size`` of 1 corresponds to the least
+        memory intensive, but slowest method of calculating the Jacobian.
+        If None, it will use the largest size i.e ``obj.dim_x``.
 
     """
 
@@ -83,6 +94,7 @@ def __init__(
         deriv_mode="auto",
         grid=None,
         name="force",
+        jac_chunk_size=None,
     ):
         if target is None and bounds is None:
             target = 0
@@ -97,6 +109,7 @@ def __init__(
             loss_function=loss_function,
             deriv_mode=deriv_mode,
             name=name,
+            jac_chunk_size=jac_chunk_size,
         )
 
     def build(self, use_jit=True, verbose=1):
@@ -235,6 +248,17 @@ class ForceBalanceAnisotropic(_Objective):
         Defaults to ``ConcentricGrid(eq.L_grid, eq.M_grid, eq.N_grid)``
     name : str
         Name of the objective function.
+    jac_chunk_size : int , optional
+        Will calculate the Jacobian for this objective ``jac_chunk_size``
+        columns at a time, instead of all at once. The memory usage of the
+        Jacobian calculation is roughly ``memory usage = m0 + m1*jac_chunk_size``:
+        the smaller the chunk size, the less memory the Jacobian calculation
+        will require (with some baseline memory usage). The time to compute the
+        Jacobian is roughly ``t=t0 +t1/jac_chunk_size``, so the larger the
+        ``jac_chunk_size``, the faster the calculation takes, at the cost of
+        requiring more memory. A ``jac_chunk_size`` of 1 corresponds to the least
+        memory intensive, but slowest method of calculating the Jacobian.
+        If None, it will use the largest size i.e ``obj.dim_x``.
 
     """
 
@@ -255,6 +279,7 @@ def __init__(
         deriv_mode="auto",
         grid=None,
         name="force-anisotropic",
+        jac_chunk_size=None,
     ):
         if target is None and bounds is None:
             target = 0
@@ -269,6 +294,7 @@ def __init__(
             loss_function=loss_function,
             deriv_mode=deriv_mode,
             name=name,
+            jac_chunk_size=jac_chunk_size,
         )
 
     def build(self, use_jit=True, verbose=1):
@@ -393,6 +419,17 @@ class RadialForceBalance(_Objective):
         Defaults to ``ConcentricGrid(eq.L_grid, eq.M_grid, eq.N_grid)``
     name : str, optional
         Name of the objective function.
+    jac_chunk_size : int , optional
+        Will calculate the Jacobian for this objective ``jac_chunk_size``
+        columns at a time, instead of all at once. The memory usage of the
+        Jacobian calculation is roughly ``memory usage = m0 + m1*jac_chunk_size``:
+        the smaller the chunk size, the less memory the Jacobian calculation
+        will require (with some baseline memory usage). The time to compute the
+        Jacobian is roughly ``t=t0 +t1/jac_chunk_size``, so the larger the
+        ``jac_chunk_size``, the faster the calculation takes, at the cost of
+        requiring more memory. A ``jac_chunk_size`` of 1 corresponds to the least
+        memory intensive, but slowest method of calculating the Jacobian.
+        If None, it will use the largest size i.e ``obj.dim_x``.
 
     """
 
@@ -413,6 +450,7 @@ def __init__(
         deriv_mode="auto",
         grid=None,
         name="radial force",
+        jac_chunk_size=None,
     ):
         if target is None and bounds is None:
             target = 0
@@ -427,6 +465,7 @@ def __init__(
             loss_function=loss_function,
             deriv_mode=deriv_mode,
             name=name,
+            jac_chunk_size=jac_chunk_size,
         )
 
     def build(self, use_jit=True, verbose=1):
@@ -551,6 +590,17 @@ class HelicalForceBalance(_Objective):
         Defaults to ``ConcentricGrid(eq.L_grid, eq.M_grid, eq.N_grid)``
     name : str, optional
         Name of the objective function.
+    jac_chunk_size : int , optional
+        Will calculate the Jacobian for this objective ``jac_chunk_size``
+        columns at a time, instead of all at once. The memory usage of the
+        Jacobian calculation is roughly ``memory usage = m0 + m1*jac_chunk_size``:
+        the smaller the chunk size, the less memory the Jacobian calculation
+        will require (with some baseline memory usage). The time to compute the
+        Jacobian is roughly ``t=t0 +t1/jac_chunk_size``, so the larger the
+        ``jac_chunk_size``, the faster the calculation takes, at the cost of
+        requiring more memory. A ``jac_chunk_size`` of 1 corresponds to the least
+        memory intensive, but slowest method of calculating the Jacobian.
+        If None, it will use the largest size i.e ``obj.dim_x``.
 
     """
 
@@ -571,6 +621,7 @@ def __init__(
         deriv_mode="auto",
         grid=None,
         name="helical force",
+        jac_chunk_size=None,
     ):
         if target is None and bounds is None:
             target = 0
@@ -585,6 +636,7 @@ def __init__(
             loss_function=loss_function,
             deriv_mode=deriv_mode,
             name=name,
+            jac_chunk_size=jac_chunk_size,
         )
 
     def build(self, use_jit=True, verbose=1):
@@ -707,6 +759,17 @@ class Energy(_Objective):
         Adiabatic (compressional) index. Default = 0.
     name : str, optional
         Name of the objective function.
+    jac_chunk_size : int , optional
+        Will calculate the Jacobian for this objective ``jac_chunk_size``
+        columns at a time, instead of all at once. The memory usage of the
+        Jacobian calculation is roughly ``memory usage = m0 + m1*jac_chunk_size``:
+        the smaller the chunk size, the less memory the Jacobian calculation
+        will require (with some baseline memory usage). The time to compute the
+        Jacobian is roughly ``t=t0 +t1/jac_chunk_size``, so the larger the
+        ``jac_chunk_size``, the faster the calculation takes, at the cost of
+        requiring more memory. A ``jac_chunk_size`` of 1 corresponds to the least
+        memory intensive, but slowest method of calculating the Jacobian.
+        If None, it will use the largest size i.e ``obj.dim_x``.
 
     """
 
@@ -730,6 +793,7 @@ def __init__(
         grid=None,
         gamma=0,
         name="energy",
+        jac_chunk_size=None,
     ):
         if target is None and bounds is None:
             target = 0
@@ -745,6 +809,7 @@ def __init__(
             loss_function=loss_function,
             deriv_mode=deriv_mode,
             name=name,
+            jac_chunk_size=jac_chunk_size,
         )
 
     def build(self, use_jit=True, verbose=1):
@@ -874,6 +939,17 @@ class CurrentDensity(_Objective):
         Defaults to ``ConcentricGrid(eq.L_grid, eq.M_grid, eq.N_grid)``
     name : str, optional
         Name of the objective function.
+    jac_chunk_size : int , optional
+        Will calculate the Jacobian for this objective ``jac_chunk_size``
+        columns at a time, instead of all at once. The memory usage of the
+        Jacobian calculation is roughly ``memory usage = m0 + m1*jac_chunk_size``:
+        the smaller the chunk size, the less memory the Jacobian calculation
+        will require (with some baseline memory usage). The time to compute the
+        Jacobian is roughly ``t=t0 +t1/jac_chunk_size``, so the larger the
+        ``jac_chunk_size``, the faster the calculation takes, at the cost of
+        requiring more memory. A ``jac_chunk_size`` of 1 corresponds to the least
+        memory intensive, but slowest method of calculating the Jacobian.
+        If None, it will use the largest size i.e ``obj.dim_x``.
 
     """
 
@@ -894,6 +970,7 @@ def __init__(
         deriv_mode="auto",
         grid=None,
         name="current density",
+        jac_chunk_size=None,
     ):
         if target is None and bounds is None:
             target = 0
@@ -908,6 +985,7 @@ def __init__(
             loss_function=loss_function,
             deriv_mode=deriv_mode,
             name=name,
+            jac_chunk_size=jac_chunk_size,
         )
 
     def build(self, use_jit=True, verbose=1):
diff --git a/desc/objectives/_free_boundary.py b/desc/objectives/_free_boundary.py
index 62d5984ae..cd75899f4 100644
--- a/desc/objectives/_free_boundary.py
+++ b/desc/objectives/_free_boundary.py
@@ -71,8 +71,19 @@ class VacuumBoundaryError(_Objective):
     field_fixed : bool
         Whether to assume the field is fixed. For free boundary solve, should
         be fixed. For single stage optimization, should be False (default).
-    name : str
+    name : str, optional
         Name of the objective function.
+    jac_chunk_size : int , optional
+        Will calculate the Jacobian for this objective ``jac_chunk_size``
+        columns at a time, instead of all at once. The memory usage of the
+        Jacobian calculation is roughly ``memory usage = m0 + m1*jac_chunk_size``:
+        the smaller the chunk size, the less memory the Jacobian calculation
+        will require (with some baseline memory usage). The time to compute the
+        Jacobian is roughly ``t=t0 +t1/jac_chunk_size``, so the larger the
+        ``jac_chunk_size``, the faster the calculation takes, at the cost of
+        requiring more memory. A ``jac_chunk_size`` of 1 corresponds to the least
+        memory intensive, but slowest method of calculating the Jacobian.
+        If None, it will use the largest size i.e ``obj.dim_x``.
 
     """
 
@@ -97,6 +108,7 @@ def __init__(
         field_grid=None,
         field_fixed=False,
         name="Vacuum boundary error",
+        jac_chunk_size=None,
     ):
         if target is None and bounds is None:
             target = 0
@@ -119,6 +131,7 @@ def __init__(
             loss_function=loss_function,
             deriv_mode=deriv_mode,
             name=name,
+            jac_chunk_size=jac_chunk_size,
         )
 
     def build(self, use_jit=True, verbose=1):
@@ -408,8 +421,19 @@ class BoundaryError(_Objective):
     loop : bool
         If True, evaluate integral using loops, as opposed to vmap. Slower, but uses
         less memory.
-    name : str
+    name : str, optional
         Name of the objective function.
+    jac_chunk_size : int , optional
+        Will calculate the Jacobian for this objective ``jac_chunk_size``
+        columns at a time, instead of all at once. The memory usage of the
+        Jacobian calculation is roughly ``memory usage = m0 + m1*jac_chunk_size``:
+        the smaller the chunk size, the less memory the Jacobian calculation
+        will require (with some baseline memory usage). The time to compute the
+        Jacobian is roughly ``t=t0 +t1/jac_chunk_size``, so the larger the
+        ``jac_chunk_size``, the faster the calculation takes, at the cost of
+        requiring more memory. A ``jac_chunk_size`` of 1 corresponds to the least
+        memory intensive, but slowest method of calculating the Jacobian.
+        If None, it will use the largest size i.e ``obj.dim_x``.
 
 
     Examples
@@ -455,6 +479,7 @@ def __init__(
         field_fixed=False,
         loop=True,
         name="Boundary error",
+        jac_chunk_size=None,
     ):
         if target is None and bounds is None:
             target = 0
@@ -481,6 +506,7 @@ def __init__(
             loss_function=loss_function,
             deriv_mode=deriv_mode,
             name=name,
+            jac_chunk_size=jac_chunk_size,
         )
 
     def build(self, use_jit=True, verbose=1):
@@ -865,8 +891,19 @@ class BoundaryErrorNESTOR(_Objective):
         "auto" selects forward or reverse mode based on the size of the input and output
         of the objective. Has no effect on self.grad or self.hess which always use
         reverse mode and forward over reverse mode respectively.
-    name : str
+    name : str, optional
         Name of the objective function.
+    jac_chunk_size : int , optional
+        Will calculate the Jacobian for this objective ``jac_chunk_size``
+        columns at a time, instead of all at once. The memory usage of the
+        Jacobian calculation is roughly ``memory usage = m0 + m1*jac_chunk_size``:
+        the smaller the chunk size, the less memory the Jacobian calculation
+        will require (with some baseline memory usage). The time to compute the
+        Jacobian is roughly ``t=t0 +t1/jac_chunk_size``, so the larger the
+        ``jac_chunk_size``, the faster the calculation takes, at the cost of
+        requiring more memory. A ``jac_chunk_size`` of 1 corresponds to the least
+        memory intensive, but slowest method of calculating the Jacobian.
+        If None, it will use the largest size i.e ``obj.dim_x``.
 
     """
 
@@ -893,6 +930,7 @@ def __init__(
         loss_function=None,
         deriv_mode="auto",
         name="NESTOR Boundary",
+        jac_chunk_size=None,
     ):
         if target is None and bounds is None:
             target = 0
@@ -912,6 +950,7 @@ def __init__(
             loss_function=loss_function,
             deriv_mode=deriv_mode,
             name=name,
+            jac_chunk_size=jac_chunk_size,
         )
 
     def build(self, use_jit=True, verbose=1):
diff --git a/desc/objectives/_generic.py b/desc/objectives/_generic.py
index 46e45a995..b2e568fd2 100644
--- a/desc/objectives/_generic.py
+++ b/desc/objectives/_generic.py
@@ -57,6 +57,17 @@ class GenericObjective(_Objective):
         ``QuadratureGrid(eq.L_grid, eq.M_grid, eq.N_grid)`` if thing is an Equilibrium.
     name : str, optional
         Name of the objective function.
+    jac_chunk_size : int , optional
+        Will calculate the Jacobian for this objective ``jac_chunk_size``
+        columns at a time, instead of all at once. The memory usage of the
+        Jacobian calculation is roughly ``memory usage = m0 + m1*jac_chunk_size``:
+        the smaller the chunk size, the less memory the Jacobian calculation
+        will require (with some baseline memory usage). The time to compute the
+        Jacobian is roughly ``t=t0 +t1/jac_chunk_size``, so the larger the
+        ``jac_chunk_size``, the faster the calculation takes, at the cost of
+        requiring more memory. A ``jac_chunk_size`` of 1 corresponds to the least
+        memory intensive, but slowest method of calculating the Jacobian.
+        If None, it will use the largest size i.e ``obj.dim_x``.
 
     """
 
@@ -75,6 +86,7 @@ def __init__(
         deriv_mode="auto",
         grid=None,
         name="generic",
+        jac_chunk_size=None,
         **kwargs,
     ):
         errorif(
@@ -97,6 +109,7 @@ def __init__(
             loss_function=loss_function,
             deriv_mode=deriv_mode,
             name=name,
+            jac_chunk_size=jac_chunk_size,
         )
         self._p = _parse_parameterization(thing)
         self._scalar = not bool(data_index[self._p][self.f]["dim"])
@@ -225,6 +238,7 @@ def __init__(
         normalize=False,
         normalize_target=False,
         name="custom linear",
+        jac_chunk_size=None,
     ):
         if target is None and bounds is None:
             target = 0
@@ -237,6 +251,7 @@ def __init__(
             normalize=normalize,
             normalize_target=normalize_target,
             name=name,
+            jac_chunk_size=jac_chunk_size,
         )
 
     def build(self, use_jit=False, verbose=1):
@@ -342,6 +357,17 @@ class ObjectiveFromUser(_Objective):
         ``QuadratureGrid(eq.L_grid, eq.M_grid, eq.N_grid)`` if thing is an Equilibrium.
     name : str, optional
         Name of the objective function.
+    jac_chunk_size : int , optional
+        Will calculate the Jacobian for this objective ``jac_chunk_size``
+        columns at a time, instead of all at once. The memory usage of the
+        Jacobian calculation is roughly ``memory usage = m0 + m1*jac_chunk_size``:
+        the smaller the chunk size, the less memory the Jacobian calculation
+        will require (with some baseline memory usage). The time to compute the
+        Jacobian is roughly ``t=t0 +t1/jac_chunk_size``, so the larger the
+        ``jac_chunk_size``, the faster the calculation takes, at the cost of
+        requiring more memory. A ``jac_chunk_size`` of 1 corresponds to the least
+        memory intensive, but slowest method of calculating the Jacobian.
+        If None, it will use the largest size i.e ``obj.dim_x``.
 
     Examples
     --------
@@ -378,6 +404,7 @@ def __init__(
         deriv_mode="auto",
         grid=None,
         name="custom",
+        jac_chunk_size=None,
         **kwargs,
     ):
         errorif(
@@ -400,6 +427,7 @@ def __init__(
             loss_function=loss_function,
             deriv_mode=deriv_mode,
             name=name,
+            jac_chunk_size=jac_chunk_size,
         )
         self._p = _parse_parameterization(thing)
 
diff --git a/desc/objectives/_geometry.py b/desc/objectives/_geometry.py
index e405609c7..6c8c1376d 100644
--- a/desc/objectives/_geometry.py
+++ b/desc/objectives/_geometry.py
@@ -53,6 +53,17 @@ class AspectRatio(_Objective):
         or ``LinearGrid(M=2*eq.M, N=2*eq.N)`` for ``FourierRZToroidalSurface``.
     name : str, optional
         Name of the objective function.
+    jac_chunk_size : int , optional
+        Will calculate the Jacobian for this objective ``jac_chunk_size``
+        columns at a time, instead of all at once. The memory usage of the
+        Jacobian calculation is roughly ``memory usage = m0 + m1*jac_chunk_size``:
+        the smaller the chunk size, the less memory the Jacobian calculation
+        will require (with some baseline memory usage). The time to compute the
+        Jacobian is roughly ``t=t0 +t1/jac_chunk_size``, so the larger the
+        ``jac_chunk_size``, the faster the calculation takes, at the cost of
+        requiring more memory. A ``jac_chunk_size`` of 1 corresponds to the least
+        memory intensive, but slowest method of calculating the Jacobian.
+        If None, it will use the largest size i.e ``obj.dim_x``.
 
     """
 
@@ -72,6 +83,7 @@ def __init__(
         deriv_mode="auto",
         grid=None,
         name="aspect ratio",
+        jac_chunk_size=None,
     ):
         if target is None and bounds is None:
             target = 2
@@ -86,6 +98,7 @@ def __init__(
             loss_function=loss_function,
             deriv_mode=deriv_mode,
             name=name,
+            jac_chunk_size=jac_chunk_size,
         )
 
     def build(self, use_jit=True, verbose=1):
@@ -215,6 +228,17 @@ class Elongation(_Objective):
         or ``LinearGrid(M=2*eq.M, N=2*eq.N)`` for ``FourierRZToroidalSurface``.
     name : str, optional
         Name of the objective function.
+    jac_chunk_size : int , optional
+        Will calculate the Jacobian for this objective ``jac_chunk_size``
+        columns at a time, instead of all at once. The memory usage of the
+        Jacobian calculation is roughly ``memory usage = m0 + m1*jac_chunk_size``:
+        the smaller the chunk size, the less memory the Jacobian calculation
+        will require (with some baseline memory usage). The time to compute the
+        Jacobian is roughly ``t=t0 +t1/jac_chunk_size``, so the larger the
+        ``jac_chunk_size``, the faster the calculation takes, at the cost of
+        requiring more memory. A ``jac_chunk_size`` of 1 corresponds to the least
+        memory intensive, but slowest method of calculating the Jacobian.
+        If None, it will use the largest size i.e ``obj.dim_x``.
 
     """
 
@@ -234,6 +258,7 @@ def __init__(
         deriv_mode="auto",
         grid=None,
         name="elongation",
+        jac_chunk_size=None,
     ):
         if target is None and bounds is None:
             target = 1
@@ -248,6 +273,7 @@ def __init__(
             loss_function=loss_function,
             deriv_mode=deriv_mode,
             name=name,
+            jac_chunk_size=jac_chunk_size,
         )
 
     def build(self, use_jit=True, verbose=1):
@@ -376,6 +402,17 @@ class Volume(_Objective):
         or ``LinearGrid(M=2*eq.M, N=2*eq.N)`` for ``FourierRZToroidalSurface``.
     name : str, optional
         Name of the objective function.
+    jac_chunk_size : int , optional
+        Will calculate the Jacobian for this objective ``jac_chunk_size``
+        columns at a time, instead of all at once. The memory usage of the
+        Jacobian calculation is roughly ``memory usage = m0 + m1*jac_chunk_size``:
+        the smaller the chunk size, the less memory the Jacobian calculation
+        will require (with some baseline memory usage). The time to compute the
+        Jacobian is roughly ``t=t0 +t1/jac_chunk_size``, so the larger the
+        ``jac_chunk_size``, the faster the calculation takes, at the cost of
+        requiring more memory. A ``jac_chunk_size`` of 1 corresponds to the least
+        memory intensive, but slowest method of calculating the Jacobian.
+        If None, it will use the largest size i.e ``obj.dim_x``.
 
     """
 
@@ -395,6 +432,7 @@ def __init__(
         deriv_mode="auto",
         grid=None,
         name="volume",
+        jac_chunk_size=None,
     ):
         if target is None and bounds is None:
             target = 1
@@ -409,6 +447,7 @@ def __init__(
             loss_function=loss_function,
             deriv_mode=deriv_mode,
             name=name,
+            jac_chunk_size=jac_chunk_size,
         )
 
     def build(self, use_jit=True, verbose=1):
@@ -586,6 +625,18 @@ class PlasmaVesselDistance(_Objective):
         more accurate approximation of the true min.
     name : str, optional
         Name of the objective function.
+    jac_chunk_size : int , optional
+        Will calculate the Jacobian for this objective ``jac_chunk_size``
+        columns at a time, instead of all at once. The memory usage of the
+        Jacobian calculation is roughly ``memory usage = m0 + m1*jac_chunk_size``:
+        the smaller the chunk size, the less memory the Jacobian calculation
+        will require (with some baseline memory usage). The time to compute the
+        Jacobian is roughly ``t=t0 +t1/jac_chunk_size``, so the larger the
+        ``jac_chunk_size``, the faster the calculation takes, at the cost of
+        requiring more memory. A ``jac_chunk_size`` of 1 corresponds to the least
+        memory intensive, but slowest method of calculating the Jacobian.
+        If None, it will use the largest size i.e ``obj.dim_x``.
+
     """
 
     _coordinates = "rtz"
@@ -611,6 +662,7 @@ def __init__(
         softmin_alpha=1.0,
         name="plasma-vessel distance",
         use_signed_distance=False,
+        jac_chunk_size=None,
         **kwargs,
     ):
         if target is None and bounds is None:
@@ -650,6 +702,7 @@ def __init__(
             loss_function=loss_function,
             deriv_mode=deriv_mode,
             name=name,
+            jac_chunk_size=jac_chunk_size,
         )
 
     def build(self, use_jit=True, verbose=1):
@@ -924,6 +977,17 @@ class MeanCurvature(_Objective):
         or ``LinearGrid(M=2*eq.M, N=2*eq.N)`` for ``FourierRZToroidalSurface``.
     name : str, optional
         Name of the objective function.
+    jac_chunk_size : int , optional
+        Will calculate the Jacobian for this objective ``jac_chunk_size``
+        columns at a time, instead of all at once. The memory usage of the
+        Jacobian calculation is roughly ``memory usage = m0 + m1*jac_chunk_size``:
+        the smaller the chunk size, the less memory the Jacobian calculation
+        will require (with some baseline memory usage). The time to compute the
+        Jacobian is roughly ``t=t0 +t1/jac_chunk_size``, so the larger the
+        ``jac_chunk_size``, the faster the calculation takes, at the cost of
+        requiring more memory. A ``jac_chunk_size`` of 1 corresponds to the least
+        memory intensive, but slowest method of calculating the Jacobian.
+        If None, it will use the largest size i.e ``obj.dim_x``.
 
     """
 
@@ -943,6 +1007,7 @@ def __init__(
         deriv_mode="auto",
         grid=None,
         name="mean curvature",
+        jac_chunk_size=None,
     ):
         if target is None and bounds is None:
             bounds = (-np.inf, 0)
@@ -957,6 +1022,7 @@ def __init__(
             loss_function=loss_function,
             deriv_mode=deriv_mode,
             name=name,
+            jac_chunk_size=jac_chunk_size,
         )
 
     def build(self, use_jit=True, verbose=1):
@@ -1084,6 +1150,17 @@ class PrincipalCurvature(_Objective):
         or ``LinearGrid(M=2*eq.M, N=2*eq.N)`` for ``FourierRZToroidalSurface``.
     name : str, optional
         Name of the objective function.
+    jac_chunk_size : int , optional
+        Will calculate the Jacobian for this objective ``jac_chunk_size``
+        columns at a time, instead of all at once. The memory usage of the
+        Jacobian calculation is roughly ``memory usage = m0 + m1*jac_chunk_size``:
+        the smaller the chunk size, the less memory the Jacobian calculation
+        will require (with some baseline memory usage). The time to compute the
+        Jacobian is roughly ``t=t0 +t1/jac_chunk_size``, so the larger the
+        ``jac_chunk_size``, the faster the calculation takes, at the cost of
+        requiring more memory. A ``jac_chunk_size`` of 1 corresponds to the least
+        memory intensive, but slowest method of calculating the Jacobian.
+        If None, it will use the largest size i.e ``obj.dim_x``.
 
     """
 
@@ -1103,6 +1180,7 @@ def __init__(
         deriv_mode="auto",
         grid=None,
         name="principal-curvature",
+        jac_chunk_size=None,
     ):
         if target is None and bounds is None:
             target = 1
@@ -1117,6 +1195,7 @@ def __init__(
             loss_function=loss_function,
             deriv_mode=deriv_mode,
             name=name,
+            jac_chunk_size=jac_chunk_size,
         )
 
     def build(self, use_jit=True, verbose=1):
@@ -1239,6 +1318,17 @@ class BScaleLength(_Objective):
         ``LinearGrid(M=eq.M_grid, N=eq.N_grid)``.
     name : str, optional
         Name of the objective function.
+    jac_chunk_size : int , optional
+        Will calculate the Jacobian for this objective ``jac_chunk_size``
+        columns at a time, instead of all at once. The memory usage of the
+        Jacobian calculation is roughly ``memory usage = m0 + m1*jac_chunk_size``:
+        the smaller the chunk size, the less memory the Jacobian calculation
+        will require (with some baseline memory usage). The time to compute the
+        Jacobian is roughly ``t=t0 +t1/jac_chunk_size``, so the larger the
+        ``jac_chunk_size``, the faster the calculation takes, at the cost of
+        requiring more memory. A ``jac_chunk_size`` of 1 corresponds to the least
+        memory intensive, but slowest method of calculating the Jacobian.
+        If None, it will use the largest size i.e ``obj.dim_x``.
 
     """
 
@@ -1258,6 +1348,7 @@ def __init__(
         deriv_mode="auto",
         grid=None,
         name="B-scale-length",
+        jac_chunk_size=None,
     ):
         if target is None and bounds is None:
             bounds = (1, np.inf)
@@ -1272,6 +1363,7 @@ def __init__(
             loss_function=loss_function,
             deriv_mode=deriv_mode,
             name=name,
+            jac_chunk_size=jac_chunk_size,
         )
 
     def build(self, use_jit=True, verbose=1):
@@ -1390,6 +1482,17 @@ class GoodCoordinates(_Objective):
         Collocation grid containing the nodes to evaluate at.
     name : str, optional
         Name of the objective function.
+    jac_chunk_size : int , optional
+        Will calculate the Jacobian for this objective ``jac_chunk_size``
+        columns at a time, instead of all at once. The memory usage of the
+        Jacobian calculation is roughly ``memory usage = m0 + m1*jac_chunk_size``:
+        the smaller the chunk size, the less memory the Jacobian calculation
+        will require (with some baseline memory usage). The time to compute the
+        Jacobian is roughly ``t=t0 +t1/jac_chunk_size``, so the larger the
+        ``jac_chunk_size``, the faster the calculation takes, at the cost of
+        requiring more memory. A ``jac_chunk_size`` of 1 corresponds to the least
+        memory intensive, but slowest method of calculating the Jacobian.
+        If None, it will use the largest size i.e ``obj.dim_x``.
 
     """
 
@@ -1410,6 +1513,7 @@ def __init__(
         deriv_mode="auto",
         grid=None,
         name="coordinate goodness",
+        jac_chunk_size=None,
     ):
         if target is None and bounds is None:
             target = 0
@@ -1425,6 +1529,7 @@ def __init__(
             loss_function=loss_function,
             deriv_mode=deriv_mode,
             name=name,
+            jac_chunk_size=jac_chunk_size,
         )
 
     def build(self, use_jit=True, verbose=1):
diff --git a/desc/objectives/_omnigenity.py b/desc/objectives/_omnigenity.py
index 1eb7a8da6..415716c2e 100644
--- a/desc/objectives/_omnigenity.py
+++ b/desc/objectives/_omnigenity.py
@@ -57,6 +57,17 @@ class QuasisymmetryBoozer(_Objective):
         Toroidal resolution of Boozer transformation. Default = 2 * eq.N.
     name : str, optional
         Name of the objective function.
+    jac_chunk_size : int , optional
+        Will calculate the Jacobian for this objective ``jac_chunk_size``
+        columns at a time, instead of all at once. The memory usage of the
+        Jacobian calculation is roughly ``memory usage = m0 + m1*jac_chunk_size``:
+        the smaller the chunk size, the less memory the Jacobian calculation
+        will require (with some baseline memory usage). The time to compute the
+        Jacobian is roughly ``t=t0 +t1/jac_chunk_size``, so the larger the
+        ``jac_chunk_size``, the faster the calculation takes, at the cost of
+        requiring more memory. A ``jac_chunk_size`` of 1 corresponds to the least
+        memory intensive, but slowest method of calculating the Jacobian.
+        If None, it will use the largest size i.e ``obj.dim_x``.
 
     """
 
@@ -78,6 +89,7 @@ def __init__(
         M_booz=None,
         N_booz=None,
         name="QS Boozer",
+        jac_chunk_size=None,
     ):
         if target is None and bounds is None:
             target = 0
@@ -95,6 +107,7 @@ def __init__(
             loss_function=loss_function,
             deriv_mode=deriv_mode,
             name=name,
+            jac_chunk_size=jac_chunk_size,
         )
 
         self._print_value_fmt = "Quasi-symmetry ({},{}) Boozer error: ".format(
@@ -269,6 +282,17 @@ class QuasisymmetryTwoTerm(_Objective):
         Type of quasi-symmetry (M, N).
     name : str, optional
         Name of the objective function.
+    jac_chunk_size : int , optional
+        Will calculate the Jacobian for this objective ``jac_chunk_size``
+        columns at a time, instead of all at once. The memory usage of the
+        Jacobian calculation is roughly ``memory usage = m0 + m1*jac_chunk_size``:
+        the smaller the chunk size, the less memory the Jacobian calculation
+        will require (with some baseline memory usage). The time to compute the
+        Jacobian is roughly ``t=t0 +t1/jac_chunk_size``, so the larger the
+        ``jac_chunk_size``, the faster the calculation takes, at the cost of
+        requiring more memory. A ``jac_chunk_size`` of 1 corresponds to the least
+        memory intensive, but slowest method of calculating the Jacobian.
+        If None, it will use the largest size i.e ``obj.dim_x``.
 
     """
 
@@ -289,6 +313,7 @@ def __init__(
         grid=None,
         helicity=(1, 0),
         name="QS two-term",
+        jac_chunk_size=None,
     ):
         if target is None and bounds is None:
             target = 0
@@ -304,6 +329,7 @@ def __init__(
             loss_function=loss_function,
             deriv_mode=deriv_mode,
             name=name,
+            jac_chunk_size=jac_chunk_size,
         )
 
         self._print_value_fmt = "Quasi-symmetry ({},{}) two-term error: ".format(
@@ -454,6 +480,17 @@ class QuasisymmetryTripleProduct(_Objective):
         Defaults to ``LinearGrid(M=eq.M_grid, N=eq.N_grid)``.
     name : str, optional
         Name of the objective function.
+    jac_chunk_size : int , optional
+        Will calculate the Jacobian for this objective ``jac_chunk_size``
+        columns at a time, instead of all at once. The memory usage of the
+        Jacobian calculation is roughly ``memory usage = m0 + m1*jac_chunk_size``:
+        the smaller the chunk size, the less memory the Jacobian calculation
+        will require (with some baseline memory usage). The time to compute the
+        Jacobian is roughly ``t=t0 +t1/jac_chunk_size``, so the larger the
+        ``jac_chunk_size``, the faster the calculation takes, at the cost of
+        requiring more memory. A ``jac_chunk_size`` of 1 corresponds to the least
+        memory intensive, but slowest method of calculating the Jacobian.
+        If None, it will use the largest size i.e ``obj.dim_x``.
 
     """
 
@@ -473,6 +510,7 @@ def __init__(
         deriv_mode="auto",
         grid=None,
         name="QS triple product",
+        jac_chunk_size=None,
     ):
         if target is None and bounds is None:
             target = 0
@@ -487,6 +525,7 @@ def __init__(
             loss_function=loss_function,
             deriv_mode=deriv_mode,
             name=name,
+            jac_chunk_size=jac_chunk_size,
         )
 
     def build(self, use_jit=True, verbose=1):
@@ -630,8 +669,19 @@ class Omnigenity(_Objective):
         computation time during optimization and self.things = [eq] only.
         If False, the field is allowed to change during the optimization and its
         associated data are re-computed at every iteration (Default).
-    name : str
+    name : str, optional
         Name of the objective function.
+    jac_chunk_size : int , optional
+        Will calculate the Jacobian for this objective ``jac_chunk_size``
+        columns at a time, instead of all at once. The memory usage of the
+        Jacobian calculation is roughly ``memory usage = m0 + m1*jac_chunk_size``:
+        the smaller the chunk size, the less memory the Jacobian calculation
+        will require (with some baseline memory usage). The time to compute the
+        Jacobian is roughly ``t=t0 +t1/jac_chunk_size``, so the larger the
+        ``jac_chunk_size``, the faster the calculation takes, at the cost of
+        requiring more memory. A ``jac_chunk_size`` of 1 corresponds to the least
+        memory intensive, but slowest method of calculating the Jacobian.
+        If None, it will use the largest size i.e ``obj.dim_x``.
 
     """
 
@@ -658,6 +708,7 @@ def __init__(
         eq_fixed=False,
         field_fixed=False,
         name="omnigenity",
+        jac_chunk_size=None,
     ):
         if target is None and bounds is None:
             target = 0
@@ -689,6 +740,7 @@ def __init__(
             loss_function=loss_function,
             deriv_mode=deriv_mode,
             name=name,
+            jac_chunk_size=jac_chunk_size,
         )
 
     def build(self, use_jit=True, verbose=1):
@@ -967,6 +1019,17 @@ class Isodynamicity(_Objective):
         Defaults to ``LinearGrid(M=eq.M_grid, N=eq.N_grid)``.
     name : str, optional
         Name of the objective function.
+    jac_chunk_size : int , optional
+        Will calculate the Jacobian for this objective ``jac_chunk_size``
+        columns at a time, instead of all at once. The memory usage of the
+        Jacobian calculation is roughly ``memory usage = m0 + m1*jac_chunk_size``:
+        the smaller the chunk size, the less memory the Jacobian calculation
+        will require (with some baseline memory usage). The time to compute the
+        Jacobian is roughly ``t=t0 +t1/jac_chunk_size``, so the larger the
+        ``jac_chunk_size``, the faster the calculation takes, at the cost of
+        requiring more memory. A ``jac_chunk_size`` of 1 corresponds to the least
+        memory intensive, but slowest method of calculating the Jacobian.
+        If None, it will use the largest size i.e ``obj.dim_x``.
 
     """
 
@@ -986,6 +1049,7 @@ def __init__(
         deriv_mode="auto",
         grid=None,
         name="Isodynamicity",
+        jac_chunk_size=None,
     ):
         if target is None and bounds is None:
             target = 0
@@ -1000,6 +1064,7 @@ def __init__(
             loss_function=loss_function,
             deriv_mode=deriv_mode,
             name=name,
+            jac_chunk_size=jac_chunk_size,
         )
 
     def build(self, use_jit=True, verbose=1):
diff --git a/desc/objectives/_power_balance.py b/desc/objectives/_power_balance.py
index 299b24835..313aa8734 100644
--- a/desc/objectives/_power_balance.py
+++ b/desc/objectives/_power_balance.py
@@ -56,6 +56,17 @@ class FusionPower(_Objective):
         Defaults to ``QuadratureGrid(eq.L_grid, eq.M_grid, eq.N_grid, eq.NFP)``.
     name : str, optional
         Name of the objective function.
+    jac_chunk_size : int , optional
+        Will calculate the Jacobian for this objective ``jac_chunk_size``
+        columns at a time, instead of all at once. The memory usage of the
+        Jacobian calculation is roughly ``memory usage = m0 + m1*jac_chunk_size``:
+        the smaller the chunk size, the less memory the Jacobian calculation
+        will require (with some baseline memory usage). The time to compute the
+        Jacobian is roughly ``t=t0 +t1/jac_chunk_size``, so the larger the
+        ``jac_chunk_size``, the faster the calculation takes, at the cost of
+        requiring more memory. A ``jac_chunk_size`` of 1 corresponds to the least
+        memory intensive, but slowest method of calculating the Jacobian.
+        If None, it will use the largest size i.e ``obj.dim_x``.
 
     """
 
@@ -76,6 +87,7 @@ def __init__(
         fuel="DT",
         grid=None,
         name="fusion power",
+        jac_chunk_size=None,
     ):
         errorif(
             fuel not in ["DT"], ValueError, f"fuel must be one of ['DT'], got {fuel}."
@@ -94,6 +106,7 @@ def __init__(
             loss_function=loss_function,
             deriv_mode=deriv_mode,
             name=name,
+            jac_chunk_size=jac_chunk_size,
         )
 
     def build(self, use_jit=True, verbose=1):
@@ -241,6 +254,17 @@ class HeatingPowerISS04(_Objective):
         Defaults to ``QuadratureGrid(eq.L_grid, eq.M_grid, eq.N_grid, eq.NFP)``.
     name : str, optional
         Name of the objective function.
+    jac_chunk_size : int , optional
+        Will calculate the Jacobian for this objective ``jac_chunk_size``
+        columns at a time, instead of all at once. The memory usage of the
+        Jacobian calculation is roughly ``memory usage = m0 + m1*jac_chunk_size``:
+        the smaller the chunk size, the less memory the Jacobian calculation
+        will require (with some baseline memory usage). The time to compute the
+        Jacobian is roughly ``t=t0 +t1/jac_chunk_size``, so the larger the
+        ``jac_chunk_size``, the faster the calculation takes, at the cost of
+        requiring more memory. A ``jac_chunk_size`` of 1 corresponds to the least
+        memory intensive, but slowest method of calculating the Jacobian.
+        If None, it will use the largest size i.e ``obj.dim_x``.
 
     """
 
@@ -262,6 +286,7 @@ def __init__(
         gamma=0,
         grid=None,
         name="heating power",
+        jac_chunk_size=None,
     ):
         if target is None and bounds is None:
             target = 0
@@ -278,6 +303,7 @@ def __init__(
             loss_function=loss_function,
             deriv_mode=deriv_mode,
             name=name,
+            jac_chunk_size=jac_chunk_size,
         )
 
     def build(self, use_jit=True, verbose=1):
diff --git a/desc/objectives/_profiles.py b/desc/objectives/_profiles.py
index d53ee4f08..1103421d4 100644
--- a/desc/objectives/_profiles.py
+++ b/desc/objectives/_profiles.py
@@ -53,6 +53,17 @@ class Pressure(_Objective):
         Defaults to ``LinearGrid(L=eq.L_grid)``.
     name : str, optional
         Name of the objective function.
+    jac_chunk_size : int , optional
+        Will calculate the Jacobian for this objective ``jac_chunk_size``
+        columns at a time, instead of all at once. The memory usage of the
+        Jacobian calculation is roughly ``memory usage = m0 + m1*jac_chunk_size``:
+        the smaller the chunk size, the less memory the Jacobian calculation
+        will require (with some baseline memory usage). The time to compute the
+        Jacobian is roughly ``t=t0 +t1/jac_chunk_size``, so the larger the
+        ``jac_chunk_size``, the faster the calculation takes, at the cost of
+        requiring more memory. A ``jac_chunk_size`` of 1 corresponds to the least
+        memory intensive, but slowest method of calculating the Jacobian.
+        If None, it will use the largest size i.e ``obj.dim_x``.
 
     """
 
@@ -72,6 +83,7 @@ def __init__(
         deriv_mode="auto",
         grid=None,
         name="pressure",
+        jac_chunk_size=None,
     ):
         if target is None and bounds is None:
             target = 0
@@ -86,6 +98,7 @@ def __init__(
             loss_function=loss_function,
             deriv_mode=deriv_mode,
             name=name,
+            jac_chunk_size=jac_chunk_size,
         )
 
     def build(self, use_jit=True, verbose=1):
@@ -211,6 +224,17 @@ class RotationalTransform(_Objective):
         are required.
     name : str, optional
         Name of the objective function.
+    jac_chunk_size : int , optional
+        Will calculate the Jacobian for this objective ``jac_chunk_size``
+        columns at a time, instead of all at once. The memory usage of the
+        Jacobian calculation is roughly ``memory usage = m0 + m1*jac_chunk_size``:
+        the smaller the chunk size, the less memory the Jacobian calculation
+        will require (with some baseline memory usage). The time to compute the
+        Jacobian is roughly ``t=t0 +t1/jac_chunk_size``, so the larger the
+        ``jac_chunk_size``, the faster the calculation takes, at the cost of
+        requiring more memory. A ``jac_chunk_size`` of 1 corresponds to the least
+        memory intensive, but slowest method of calculating the Jacobian.
+        If None, it will use the largest size i.e ``obj.dim_x``.
 
     """
 
@@ -230,6 +254,7 @@ def __init__(
         deriv_mode="auto",
         grid=None,
         name="rotational transform",
+        jac_chunk_size=None,
     ):
         if target is None and bounds is None:
             target = 0
@@ -244,6 +269,7 @@ def __init__(
             loss_function=loss_function,
             deriv_mode=deriv_mode,
             name=name,
+            jac_chunk_size=jac_chunk_size,
         )
 
     def build(self, use_jit=True, verbose=1):
@@ -382,6 +408,17 @@ class Shear(_Objective):
         are required.
     name : str, optional
         Name of the objective function.
+    jac_chunk_size : int , optional
+        Will calculate the Jacobian for this objective ``jac_chunk_size``
+        columns at a time, instead of all at once. The memory usage of the
+        Jacobian calculation is roughly ``memory usage = m0 + m1*jac_chunk_size``:
+        the smaller the chunk size, the less memory the Jacobian calculation
+        will require (with some baseline memory usage). The time to compute the
+        Jacobian is roughly ``t=t0 +t1/jac_chunk_size``, so the larger the
+        ``jac_chunk_size``, the faster the calculation takes, at the cost of
+        requiring more memory. A ``jac_chunk_size`` of 1 corresponds to the least
+        memory intensive, but slowest method of calculating the Jacobian.
+        If None, it will use the largest size i.e ``obj.dim_x``.
 
     """
 
@@ -401,6 +438,7 @@ def __init__(
         deriv_mode="auto",
         grid=None,
         name="shear",
+        jac_chunk_size=None,
     ):
         if target is None and bounds is None:
             bounds = (-np.inf, 0)
@@ -415,6 +453,7 @@ def __init__(
             loss_function=loss_function,
             deriv_mode=deriv_mode,
             name=name,
+            jac_chunk_size=jac_chunk_size,
         )
 
     def build(self, use_jit=True, verbose=1):
@@ -549,6 +588,17 @@ class ToroidalCurrent(_Objective):
         are required.
     name : str, optional
         Name of the objective function.
+    jac_chunk_size : int , optional
+        Will calculate the Jacobian for this objective ``jac_chunk_size``
+        columns at a time, instead of all at once. The memory usage of the
+        Jacobian calculation is roughly ``memory usage = m0 + m1*jac_chunk_size``:
+        the smaller the chunk size, the less memory the Jacobian calculation
+        will require (with some baseline memory usage). The time to compute the
+        Jacobian is roughly ``t=t0 +t1/jac_chunk_size``, so the larger the
+        ``jac_chunk_size``, the faster the calculation takes, at the cost of
+        requiring more memory. A ``jac_chunk_size`` of 1 corresponds to the least
+        memory intensive, but slowest method of calculating the Jacobian.
+        If None, it will use the largest size i.e ``obj.dim_x``.
 
     """
 
@@ -568,6 +618,7 @@ def __init__(
         deriv_mode="auto",
         grid=None,
         name="toroidal current",
+        jac_chunk_size=None,
     ):
         if target is None and bounds is None:
             target = 0
@@ -582,6 +633,7 @@ def __init__(
             loss_function=loss_function,
             deriv_mode=deriv_mode,
             name=name,
+            jac_chunk_size=jac_chunk_size,
         )
 
     def build(self, use_jit=True, verbose=1):
diff --git a/desc/objectives/_stability.py b/desc/objectives/_stability.py
index 29b88eb7f..40e0abea7 100644
--- a/desc/objectives/_stability.py
+++ b/desc/objectives/_stability.py
@@ -64,6 +64,17 @@ class MercierStability(_Objective):
         are required.
     name : str, optional
         Name of the objective function.
+    jac_chunk_size : int , optional
+        Will calculate the Jacobian for this objective ``jac_chunk_size``
+        columns at a time, instead of all at once. The memory usage of the
+        Jacobian calculation is roughly ``memory usage = m0 + m1*jac_chunk_size``:
+        the smaller the chunk size, the less memory the Jacobian calculation
+        will require (with some baseline memory usage). The time to compute the
+        Jacobian is roughly ``t=t0 +t1/jac_chunk_size``, so the larger the
+        ``jac_chunk_size``, the faster the calculation takes, at the cost of
+        requiring more memory. A ``jac_chunk_size`` of 1 corresponds to the least
+        memory intensive, but slowest method of calculating the Jacobian.
+        If None, it will use the largest size i.e ``obj.dim_x``.
 
     """
 
@@ -83,6 +94,7 @@ def __init__(
         deriv_mode="auto",
         grid=None,
         name="Mercier Stability",
+        jac_chunk_size=None,
     ):
         if target is None and bounds is None:
             bounds = (0, np.inf)
@@ -97,6 +109,7 @@ def __init__(
             loss_function=loss_function,
             deriv_mode=deriv_mode,
             name=name,
+            jac_chunk_size=jac_chunk_size,
         )
 
     def build(self, use_jit=True, verbose=1):
@@ -245,6 +258,17 @@ class MagneticWell(_Objective):
         are required.
     name : str, optional
         Name of the objective function.
+    jac_chunk_size : int , optional
+        Will calculate the Jacobian for this objective ``jac_chunk_size``
+        columns at a time, instead of all at once. The memory usage of the
+        Jacobian calculation is roughly ``memory usage = m0 + m1*jac_chunk_size``:
+        the smaller the chunk size, the less memory the Jacobian calculation
+        will require (with some baseline memory usage). The time to compute the
+        Jacobian is roughly ``t=t0 +t1/jac_chunk_size``, so the larger the
+        ``jac_chunk_size``, the faster the calculation takes, at the cost of
+        requiring more memory. A ``jac_chunk_size`` of 1 corresponds to the least
+        memory intensive, but slowest method of calculating the Jacobian.
+        If None, it will use the largest size i.e ``obj.dim_x``.
 
     """
 
@@ -264,6 +288,7 @@ def __init__(
         deriv_mode="auto",
         grid=None,
         name="Magnetic Well",
+        jac_chunk_size=None,
     ):
         if target is None and bounds is None:
             bounds = (0, np.inf)
@@ -278,6 +303,7 @@ def __init__(
             loss_function=loss_function,
             deriv_mode=deriv_mode,
             name=name,
+            jac_chunk_size=jac_chunk_size,
         )
 
     def build(self, use_jit=True, verbose=1):
diff --git a/desc/objectives/getters.py b/desc/objectives/getters.py
index 92538311f..727c6f183 100644
--- a/desc/objectives/getters.py
+++ b/desc/objectives/getters.py
@@ -44,7 +44,7 @@
 }
 
 
-def get_equilibrium_objective(eq, mode="force", normalize=True):
+def get_equilibrium_objective(eq, mode="force", normalize=True, jac_chunk_size="auto"):
     """Get the objective function for a typical force balance equilibrium problem.
 
     Parameters
@@ -57,6 +57,19 @@ def get_equilibrium_objective(eq, mode="force", normalize=True):
         for minimizing MHD energy.
     normalize : bool
         Whether to normalize units of objective.
+    jac_chunk_size : int or "auto", optional
+        If `"batched"` deriv_mode is used, will calculate the Jacobian
+        ``jac_chunk_size`` columns at a time, instead of all at once.
+        The memory usage of the Jacobian calculation is roughly
+        ``memory usage = m0 + m1*jac_chunk_size``: the smaller the chunk size,
+        the less memory the Jacobian calculation will require (with some baseline
+        memory usage). The time it takes to compute the Jacobian is roughly
+        ``t= t0 + t1/jac_chunk_size` so the larger the ``jac_chunk_size``, the faster
+        the calculation takes, at the cost of requiring more memory.
+        If None, it will use the largest size i.e ``obj.dim_x``.
+        Defaults to ``chunk_size="auto"`` which will use a conservative
+        chunk size based off of a heuristic estimate of the memory usage.
+
 
     Returns
     -------
@@ -73,7 +86,7 @@ def get_equilibrium_objective(eq, mode="force", normalize=True):
         objectives = (RadialForceBalance(**kwargs), HelicalForceBalance(**kwargs))
     else:
         raise ValueError("got an unknown equilibrium objective type '{}'".format(mode))
-    return ObjectiveFunction(objectives)
+    return ObjectiveFunction(objectives, jac_chunk_size=jac_chunk_size)
 
 
 def get_fixed_axis_constraints(eq, profiles=True, normalize=True):
diff --git a/desc/objectives/objective_funs.py b/desc/objectives/objective_funs.py
index ffacaf639..9a540d0b5 100644
--- a/desc/objectives/objective_funs.py
+++ b/desc/objectives/objective_funs.py
@@ -1,11 +1,22 @@
 """Base classes for objectives."""
 
 import functools
+import warnings
 from abc import ABC, abstractmethod
 
 import numpy as np
-
-from desc.backend import execute_on_cpu, jit, jnp, tree_flatten, tree_unflatten, use_jax
+from termcolor import colored
+
+from desc.backend import (
+    desc_config,
+    execute_on_cpu,
+    jit,
+    jnp,
+    tree_flatten,
+    tree_unflatten,
+    use_jax,
+)
+from desc.batching import batched_vectorize
 from desc.derivatives import Derivative
 from desc.io import IOAble
 from desc.optimizable import Optimizable
@@ -15,6 +26,7 @@
     errorif,
     flatten_list,
     is_broadcastable,
+    isposint,
     setdefault,
     unique_list,
 )
@@ -29,25 +41,46 @@ class ObjectiveFunction(IOAble):
         List of objectives to be minimized.
     use_jit : bool, optional
         Whether to just-in-time compile the objectives and derivatives.
-    deriv_mode : {"auto", "batched", "blocked", "looped"}
+    deriv_mode : {"auto", "batched", "blocked"}
         Method for computing Jacobian matrices. "batched" uses forward mode, applied to
         the entire objective at once, and is generally the fastest for vector valued
-        objectives, though most memory intensive. "blocked" builds the Jacobian for each
-        objective separately, using each objective's preferred AD mode. Generally the
-        most efficient option when mixing scalar and vector valued objectives.
-        "looped" uses forward mode jacobian vector products in a loop to build the
-        Jacobian column by column. Generally the slowest, but most memory efficient.
+        objectives. Its memory intensity vs. speed may be traded off through the
+        ``jac_chunk_size`` keyword argument. "blocked" builds the Jacobian for
+        each objective separately, using each objective's preferred AD mode (and
+        each objective's `jac_chunk_size`). Generally the most efficient option when
+        mixing scalar and vector valued objectives.
         "auto" defaults to "batched" if all sub-objectives are set to "fwd",
         otherwise "blocked".
     name : str
         Name of the objective function.
+    jac_chunk_size : int or "auto", optional
+        If `"batched"` deriv_mode is used, will calculate the Jacobian
+        ``jac_chunk_size`` columns at a time, instead of all at once.
+        The memory usage of the Jacobian calculation is roughly
+        ``memory usage = m0 + m1*jac_chunk_size``: the smaller the chunk size,
+        the less memory the Jacobian calculation will require (with some baseline
+        memory usage). The time it takes to compute the Jacobian is roughly
+        ``t= t0 + t1/jac_chunk_size` so the larger the ``jac_chunk_size``, the faster
+        the calculation takes, at the cost of requiring more memory.
+        If None, it will use the largest size i.e ``obj.dim_x``.
+        Defaults to ``chunk_size="auto"`` which will use a conservative
+        chunk size based off of a heuristic estimate of the memory usage.
+        NOTE: When running on a CPU (not a GPU) on a HPC cluster, DESC is unable to
+        accurately estimate the available device memory, so the "auto" chunk_size
+        option will yield a larger chunk size than may be needed. It is recommended
+        to manually choose a chunk_size if an OOM error is experienced in this case.
 
     """
 
     _io_attrs_ = ["_objectives"]
 
     def __init__(
-        self, objectives, use_jit=True, deriv_mode="auto", name="ObjectiveFunction"
+        self,
+        objectives,
+        use_jit=True,
+        deriv_mode="auto",
+        name="ObjectiveFunction",
+        jac_chunk_size="auto",
     ):
         if not isinstance(objectives, (tuple, list)):
             objectives = (objectives,)
@@ -55,8 +88,22 @@ def __init__(
             isinstance(obj, _Objective) for obj in objectives
         ), "members of ObjectiveFunction should be instances of _Objective"
         assert use_jit in {True, False}
-        assert deriv_mode in {"auto", "batched", "looped", "blocked"}
+        if deriv_mode == "looped":
+            # overwrite the user inputs if deprecated "looped" was given
+            deriv_mode = "batched"
+            jac_chunk_size = 1
+            warnings.warn(
+                colored(
+                    '``deriv_mode="looped"`` is deprecated in favor of'
+                    ' ``deriv_mode="batched"`` with ``jac_chunk_size=1``.',
+                    "yellow",
+                ),
+                DeprecationWarning,
+            )
+        assert deriv_mode in {"auto", "batched", "blocked"}
+        assert jac_chunk_size in ["auto", None] or isposint(jac_chunk_size)
 
+        self._jac_chunk_size = jac_chunk_size
         self._objectives = objectives
         self._use_jit = use_jit
         self._deriv_mode = deriv_mode
@@ -130,12 +177,46 @@ def build(self, use_jit=None, verbose=1):
             self._scalar = False
 
         self._set_derivatives()
+        sub_obj_jac_chunk_sizes_are_ints = [
+            isposint(obj._jac_chunk_size) for obj in self.objectives
+        ]
+        errorif(
+            any(sub_obj_jac_chunk_sizes_are_ints) and self._deriv_mode != "blocked",
+            ValueError,
+            "'jac_chunk_size' was passed into one or more sub-objectives, but the"
+            " ObjectiveFunction is  using 'batched' deriv_mode, so sub-objective "
+            "'jac_chunk_size' will be ignored in favor of the ObjectiveFunction's "
+            f"'jac_chunk_size' of {self._jac_chunk_size}."
+            " Specify 'blocked' deriv_mode if each sub-objective is desired to have a "
+            "different 'jac_chunk_size' for its Jacobian computation.",
+        )
+        errorif(
+            self._jac_chunk_size not in ["auto", None]
+            and self._deriv_mode == "blocked",
+            ValueError,
+            "'jac_chunk_size' was passed into ObjectiveFunction, but the "
+            "ObjectiveFunction is using 'blocked' deriv_mode, so sub-objective "
+            "'jac_chunk_size' are used to compute each sub-objective's Jacobian, "
+            "`ignoring the ObjectiveFunction's 'jac_chunk_size'.",
+        )
+
         if not self.use_jit:
             self._unjit()
 
         self._set_things()
-
         self._built = True
+
+        if self._jac_chunk_size == "auto":
+            # Heuristic estimates of fwd mode Jacobian memory usage,
+            # slightly conservative, based on using ForceBalance as the objective
+            estimated_memory_usage = 2.4e-7 * self.dim_f * self.dim_x + 1  # in GB
+            max_chunk_size = round(
+                (desc_config.get("avail_mem") / estimated_memory_usage - 0.22)
+                / 0.85
+                * self.dim_x
+            )
+            self._jac_chunk_size = max([1, max_chunk_size])
+
         timer.stop("Objective build")
         if verbose > 1:
             timer.disp("Objective build")
@@ -459,8 +540,6 @@ def jac_scaled(self, x, constants=None):
 
         if self._deriv_mode == "batched":
             J = Derivative(self.compute_scaled, mode="fwd")(x, constants)
-        if self._deriv_mode == "looped":
-            J = Derivative(self.compute_scaled, mode="looped")(x, constants)
         if self._deriv_mode == "blocked":
             J = self._jac_blocked("jac_scaled", x, constants)
 
@@ -474,8 +553,6 @@ def jac_scaled_error(self, x, constants=None):
 
         if self._deriv_mode == "batched":
             J = Derivative(self.compute_scaled_error, mode="fwd")(x, constants)
-        if self._deriv_mode == "looped":
-            J = Derivative(self.compute_scaled_error, mode="looped")(x, constants)
         if self._deriv_mode == "blocked":
             J = self._jac_blocked("jac_scaled_error", x, constants)
 
@@ -489,8 +566,6 @@ def jac_unscaled(self, x, constants=None):
 
         if self._deriv_mode == "batched":
             J = Derivative(self.compute_unscaled, mode="fwd")(x, constants)
-        if self._deriv_mode == "looped":
-            J = Derivative(self.compute_unscaled, mode="looped")(x, constants)
         if self._deriv_mode == "blocked":
             J = self._jac_blocked("jac_unscaled", x, constants)
 
@@ -502,15 +577,23 @@ def _jvp(self, v, x, constants=None, op="compute_scaled"):
         fun = lambda x: getattr(self, op)(x, constants)
         if len(v) == 1:
             jvpfun = lambda dx: Derivative.compute_jvp(fun, 0, dx, x)
-            return jnp.vectorize(jvpfun, signature="(n)->(k)")(v[0])
+            return batched_vectorize(
+                jvpfun, signature="(n)->(k)", chunk_size=self._jac_chunk_size
+            )(v[0])
         elif len(v) == 2:
             jvpfun = lambda dx1, dx2: Derivative.compute_jvp2(fun, 0, 0, dx1, dx2, x)
-            return jnp.vectorize(jvpfun, signature="(n),(n)->(k)")(v[0], v[1])
+            return batched_vectorize(
+                jvpfun, signature="(n),(n)->(k)", chunk_size=self._jac_chunk_size
+            )(v[0], v[1])
         elif len(v) == 3:
             jvpfun = lambda dx1, dx2, dx3: Derivative.compute_jvp3(
                 fun, 0, 0, 0, dx1, dx2, dx3, x
             )
-            return jnp.vectorize(jvpfun, signature="(n),(n),(n)->(k)")(v[0], v[1], v[2])
+            return batched_vectorize(
+                jvpfun,
+                signature="(n),(n),(n)->(k)",
+                chunk_size=self._jac_chunk_size,
+            )(v[0], v[1], v[2])
         else:
             raise NotImplementedError("Cannot compute JVP higher than 3rd order.")
 
@@ -817,6 +900,17 @@ class _Objective(IOAble, ABC):
         reverse mode and forward over reverse mode respectively.
     name : str, optional
         Name of the objective.
+    jac_chunk_size : int or "auto", optional
+        Will calculate the Jacobian
+        ``jac_chunk_size`` columns at a time, instead of all at once.
+        The memory usage of the Jacobian calculation is roughly
+        ``memory usage = m0 + m1*jac_chunk_size``: the smaller the chunk size,
+        the less memory the Jacobian calculation will require (with some baseline
+        memory usage). The time it takes to compute the Jacobian is roughly
+        ``t= t0 + t1/jac_chunk_size` so the larger the ``jac_chunk_size``, the faster
+        the calculation takes, at the cost of requiring more memory.
+        If None, it will use the largest size i.e ``obj.dim_x``.
+        Defaults to ``chunk_size=None``.
 
     """
 
@@ -847,6 +941,7 @@ def __init__(
         loss_function=None,
         deriv_mode="auto",
         name=None,
+        jac_chunk_size=None,
     ):
         if self._scalar:
             assert self._coordinates == ""
@@ -857,6 +952,9 @@ def __init__(
         assert (bounds is None) or (target is None), "Cannot use both bounds and target"
         assert loss_function in [None, "mean", "min", "max"]
         assert deriv_mode in {"auto", "fwd", "rev"}
+        assert jac_chunk_size is None or isposint(jac_chunk_size)
+
+        self._jac_chunk_size = jac_chunk_size
 
         self._target = target
         self._bounds = bounds
@@ -1097,7 +1195,9 @@ def _jvp(self, v, x, constants=None, op="compute_scaled"):
         fun = lambda *x: getattr(self, op)(*x, constants=constants)
         jvpfun = lambda *dx: Derivative.compute_jvp(fun, tuple(range(len(x))), dx, *x)
         sig = ",".join(f"(n{i})" for i in range(len(x))) + "->(k)"
-        return jnp.vectorize(jvpfun, signature=sig)(*v)
+        return batched_vectorize(
+            jvpfun, signature=sig, chunk_size=self._jac_chunk_size
+        )(*v)
 
     @jit
     def jvp_scaled(self, v, x, constants=None):
diff --git a/desc/optimize/_constraint_wrappers.py b/desc/optimize/_constraint_wrappers.py
index 2ee664574..604609fc8 100644
--- a/desc/optimize/_constraint_wrappers.py
+++ b/desc/optimize/_constraint_wrappers.py
@@ -5,6 +5,7 @@
 import numpy as np
 
 from desc.backend import jit, jnp
+from desc.batching import batched_vectorize
 from desc.objectives import (
     BoundaryRSelfConsistency,
     BoundaryZSelfConsistency,
@@ -978,7 +979,9 @@ def jvp_scaled(self, v, x, constants=None):
         constants = setdefault(constants, self.constants)
         xg, xf = self._update_equilibrium(x, store=True)
         jvpfun = lambda u: self._jvp(u, xf, xg, constants, op="scaled")
-        return jnp.vectorize(jvpfun, signature="(n)->(k)")(v)
+        return batched_vectorize(
+            jvpfun, signature="(n)->(k)", chunk_size=self._objective._jac_chunk_size
+        )(v)
 
     def jvp_scaled_error(self, v, x, constants=None):
         """Compute Jacobian-vector product of self.compute_scaled_error.
@@ -998,7 +1001,9 @@ def jvp_scaled_error(self, v, x, constants=None):
         constants = setdefault(constants, self.constants)
         xg, xf = self._update_equilibrium(x, store=True)
         jvpfun = lambda u: self._jvp(u, xf, xg, constants, op="scaled_error")
-        return jnp.vectorize(jvpfun, signature="(n)->(k)")(v)
+        return batched_vectorize(
+            jvpfun, signature="(n)->(k)", chunk_size=self._objective._jac_chunk_size
+        )(v)
 
     def jvp_unscaled(self, v, x, constants=None):
         """Compute Jacobian-vector product of self.compute_unscaled.
@@ -1018,7 +1023,9 @@ def jvp_unscaled(self, v, x, constants=None):
         constants = setdefault(constants, self.constants)
         xg, xf = self._update_equilibrium(x, store=True)
         jvpfun = lambda u: self._jvp(u, xf, xg, constants, op="unscaled")
-        return jnp.vectorize(jvpfun, signature="(n)->(k)")(v)
+        return batched_vectorize(
+            jvpfun, signature="(n)->(k)", chunk_size=self._objective._jac_chunk_size
+        )(v)
 
     def _jvp(self, v, xf, xg, constants, op):
         # we're replacing stuff like this with jvps
@@ -1059,7 +1066,7 @@ def _jvp(self, v, xf, xg, constants, op):
             ]
         )
         tangent = self._unfixed_idx_mat @ dfdc - dxdcv
-        if self._objective._deriv_mode in ["batched", "looped"]:
+        if self._objective._deriv_mode in ["batched"]:
             out = getattr(self._objective, "jvp_" + op)(tangent, xg, constants[0])
         else:  # deriv_mode == "blocked"
             vgs = jnp.split(tangent, np.cumsum(self._dimx_per_thing))
diff --git a/desc/utils.py b/desc/utils.py
index 72dd10f97..203e29d3d 100644
--- a/desc/utils.py
+++ b/desc/utils.py
@@ -1,8 +1,8 @@
 """Utility functions, independent of the rest of DESC."""
 
+import functools
 import operator
 import warnings
-from functools import partial
 from itertools import combinations_with_replacement, permutations
 
 import numpy as np
@@ -692,7 +692,9 @@ def broadcast_tree(tree_in, tree_out, dtype=int):
         raise ValueError("trees must be nested lists of dicts")
 
 
-@partial(jnp.vectorize, signature="(m),(m)->(n)", excluded={"size", "fill_value"})
+@functools.partial(
+    jnp.vectorize, signature="(m),(m)->(n)", excluded={"size", "fill_value"}
+)
 def take_mask(a, mask, /, *, size=None, fill_value=None):
     """JIT compilable method to return ``a[mask][:size]`` padded by ``fill_value``.
 
diff --git a/docs/adding_objectives.rst b/docs/adding_objectives.rst
index 346ba0db3..31192c991 100644
--- a/docs/adding_objectives.rst
+++ b/docs/adding_objectives.rst
@@ -70,6 +70,17 @@ A full example objective with comments describing the key points is given below:
             Collocation grid containing the nodes to evaluate at.
         name : str, optional
             Name of the objective function.
+        jac_chunk_size : int or "auto", optional
+            Will calculate the Jacobian for this objective ``jac_chunk_size``
+            columns at a time, instead of all at once. The memory usage of the
+            Jacobian calculation is roughly ``memory usage = m0 + m1*jac_chunk_size``:
+            the smaller the chunk size, the less memory the Jacobian calculation
+            will require (with some baseline memory usage). The time to compute the
+            Jacobian is roughly ``t=t0 +t1/jac_chunk_size``, so the larger the
+            ``jac_chunk_size``, the faster the calculation takes, at the cost of
+            requiring more memory. A ``jac_chunk_size`` of 1 corresponds to the least
+            memory intensive, but slowest method of calculating the Jacobian.
+            If None, it will use the largest possible size.
 
         """
 
@@ -88,6 +99,7 @@ A full example objective with comments describing the key points is given below:
             normalize_target=True,
             grid=None,
             name="QS triple product",
+            jac_chunk_size=None,
         ):
             # we don't have to do much here, mostly just call ``super().__init__()``
             if target is None and bounds is None:
@@ -101,6 +113,7 @@ A full example objective with comments describing the key points is given below:
                 normalize=normalize,
                 normalize_target=normalize_target,
                 name=name,
+                jac_chunk_size=jac_chunk_size
             )
 
         def build(self, use_jit=True, verbose=1):
@@ -225,7 +238,7 @@ when instantiating an Objective objective to modify the objective cost in order
 the objective to your desired purpose. For example, the DESC ``RotationalTransform``
 objective with ``target=iota_target`` by default forms the residual by taking the target
 and subtracting it from the profile at the points in the grid, resulting in a residual
-of the form :math:`\iota_{err} = \sum_{i} (\iota_i - iota_target)^2`, i.e. the residual
+of the form :math:`\iota_{err} = \sum_{i} (\iota_i - iota_{target})^2`, i.e. the residual
 is the sum of squared pointwise error between the current rotational transform profile
 and the target passed into the objective. If the desired objective instead is to
 optimize to target an average rotational transform of `iota_target`, we can adapt the
diff --git a/docs/index.rst b/docs/index.rst
index 3dcc62ccc..d0ff30cfd 100644
--- a/docs/index.rst
+++ b/docs/index.rst
@@ -33,6 +33,7 @@
    notebooks/tutorials/bootstrap_current.ipynb
    notebooks/tutorials/coil_stage_two_optimization.ipynb
    notebooks/tutorials/ideal_ballooning_stability.ipynb
+   memory_usage
 
 .. toctree::
    :maxdepth: 1
diff --git a/docs/memory_usage.rst b/docs/memory_usage.rst
new file mode 100644
index 000000000..692f2c7ae
--- /dev/null
+++ b/docs/memory_usage.rst
@@ -0,0 +1,27 @@
+
+Reducing Memory Size of Objective Jacobian Calculation
+------------------------------------------------------
+
+During optimization, one of the most memory-intensive steps is the calculation of the Jacobian
+of the cost function. This memory cost comes from attempting to calculate the entire Jacobian
+matrix in one vectorized operation. However, this can be tuned between high memory usage but quick (default)
+and low memory usage but slower with the ``jac_chunk_size`` keyword argument. By default, where this matters
+is when creating the overall ``ObjectiveFunction`` to be used in the optimization (where by default ``deriv_mode="batched"``). The Jacobian is a
+matrix of shape [``obj.dim_f`` x ``obj.dim_x``], and the calculation of the Jacobian is vectorized over
+the columns (the ``obj.dim_x`` dimension), where ``obj`` is the ``ObjectiveFunction`` object. Passing in the ``jac_chunk_size`` attribute allows one to split up
+the vectorized computation into chunks of ``jac_chunk_size`` columns at a time, allowing one to compute the Jacobian
+in a slightly slower, but more memory-efficient manner. The memory usage of the Jacobian calculation is
+``memory usage = m0 + m1*jac_chunk_size``: the smaller the chunk size, the less memory the Jacobian calculation
+will require (with some baseline memory usage). The time to compute the Jacobian is roughly ``t=t0 +t1/jac_chunk_size``
+with some baseline time, so the larger the ``jac_chunk_size``, the faster the calculation takes,
+at the cost of requiring more memory. A ``jac_chunk_size`` of 1 corresponds to the least memory intensive,
+but slowest method of calculating the Jacobian. If ``jac_chunk_size="auto"``, it will default to a size
+that should make the calculation fit in memory based on a heuristic estimate of the Jacobian memory usage.
+
+If ``deriv_mode="blocked"`` is specified when the ``ObjectiveFunction`` is created, then the Jacobian will
+be calculated individually for each of the sub-objectives inside of the ``ObjectiveFunction``, and in that case
+the ``jac_chunk_size`` of the individual ``_Objective`` objects inside of the ``ObjectiveFunction`` will be used.
+For example, if ``obj1 = QuasisymmetryTripleProduct(eq, jac_chunk_size=100)``, ``obj2 = MeanCurvature(eq, jac_chunk_size=2000)``
+and ``obj = ObjectiveFunction((obj1, obj2), deriv_mode="blocked")``, then the Jacobian will be calculated with a
+``jac_chunk_size=100`` for the quasisymmetry part and a ``jac_chunk_size=2000`` for the curvature part, then the full Jacobian
+will be formed as a blocked matrix with the individual Jacobians of these two objectives.
diff --git a/tests/test_derivatives.py b/tests/test_derivatives.py
index 8cb4ed9bf..29c2f909d 100644
--- a/tests/test_derivatives.py
+++ b/tests/test_derivatives.py
@@ -121,25 +121,6 @@ def fun(x):
         jac = AutoDiffDerivative(fun, num_blocks=3, shape=A.shape)
         np.testing.assert_allclose(jac(x), A)
 
-    @pytest.mark.unit
-    def test_jac_looped(self):
-        """Test computing the jacobian by explicit looping jvp."""
-
-        def test_fun(x, y, a):
-            return jnp.cos(x) + x * y + a
-
-        x = np.array([1, 5, 0.01, 200])
-        y = np.array([60, 1, 100, 0.02])
-        a = -2.0
-
-        jac1 = AutoDiffDerivative(test_fun, argnum=0, mode="fwd")
-        J1 = jac1.compute(x, y, a)
-
-        jac2 = AutoDiffDerivative(test_fun, argnum=0, mode="looped")
-        J2 = jac2.compute(x, y, a)
-
-        np.testing.assert_allclose(J1, J2, atol=1e-8)
-
 
 class TestJVP:
     """Test calculation of jacobian vector products."""
diff --git a/tests/test_examples.py b/tests/test_examples.py
index 92f707346..53fb6479b 100644
--- a/tests/test_examples.py
+++ b/tests/test_examples.py
@@ -302,9 +302,7 @@ def test_ATF_results(tmpdir_factory):
         spectral_indexing=eq0.spectral_indexing,
     )
     eqf = EquilibriaFamily.solve_continuation_automatic(
-        eq,
-        verbose=2,
-        checkpoint_path=output_dir.join("ATF.h5"),
+        eq, verbose=2, checkpoint_path=output_dir.join("ATF.h5"), jac_chunk_size=500
     )
     eqf = load(output_dir.join("ATF.h5"))
     rho_err, theta_err = area_difference_desc(eq0, eqf[-1])
@@ -1043,7 +1041,8 @@ def test_freeb_vacuum():
         FixPsi(eq=eq),
     )
     objective = ObjectiveFunction(
-        VacuumBoundaryError(eq=eq, field=ext_field, field_fixed=True)
+        VacuumBoundaryError(eq=eq, field=ext_field, field_fixed=True),
+        jac_chunk_size=1000,
     )
     eq, _ = eq.optimize(
         objective,
diff --git a/tests/test_objective_funs.py b/tests/test_objective_funs.py
index 9a8e1df7d..68de417ea 100644
--- a/tests/test_objective_funs.py
+++ b/tests/test_objective_funs.py
@@ -1285,31 +1285,62 @@ def test_derivative_modes():
     surf = FourierRZToroidalSurface()
     obj1 = ObjectiveFunction(
         [
-            PlasmaVesselDistance(eq, surf),
+            PlasmaVesselDistance(eq, surf, jac_chunk_size=1),
             MagneticWell(eq),
+            AspectRatio(eq),
         ],
         deriv_mode="batched",
         use_jit=False,
     )
     obj2 = ObjectiveFunction(
         [
-            PlasmaVesselDistance(eq, surf),
+            PlasmaVesselDistance(eq, surf, jac_chunk_size=2),
             MagneticWell(eq),
+            AspectRatio(eq, jac_chunk_size=None),
         ],
         deriv_mode="blocked",
+        jac_chunk_size=10,
         use_jit=False,
     )
-    obj3 = ObjectiveFunction(
+    with pytest.warns(DeprecationWarning, match="looped"):
+        obj3 = ObjectiveFunction(
+            [
+                PlasmaVesselDistance(eq, surf),
+                MagneticWell(eq),
+                AspectRatio(eq),
+            ],
+            deriv_mode="looped",
+            use_jit=False,
+        )
+    with pytest.raises(ValueError, match="jac_chunk_size"):
+        obj1.build()
+    with pytest.raises(ValueError, match="jac_chunk_size"):
+        obj2.build()
+    obj1 = ObjectiveFunction(
         [
             PlasmaVesselDistance(eq, surf),
             MagneticWell(eq),
+            AspectRatio(eq),
         ],
-        deriv_mode="looped",
+        deriv_mode="batched",
+        use_jit=False,
+    )
+    obj2 = ObjectiveFunction(
+        [
+            PlasmaVesselDistance(eq, surf, jac_chunk_size=2),
+            MagneticWell(eq),
+            AspectRatio(eq, jac_chunk_size=None),
+        ],
+        deriv_mode="blocked",
         use_jit=False,
     )
-
     obj1.build()
     obj2.build()
+    # check that default size works for blocked
+    assert obj2.objectives[1]._jac_chunk_size is None
+    assert obj2.objectives[2]._jac_chunk_size is None
+    # hard to say what size auto will give, just check it is >0
+    assert obj1._jac_chunk_size > 0
     obj3.build()
     x = obj1.x(eq, surf)
     g1 = obj1.grad(x)
diff --git a/tests/test_optimizer.py b/tests/test_optimizer.py
index 24e4cd028..4f0cce0e0 100644
--- a/tests/test_optimizer.py
+++ b/tests/test_optimizer.py
@@ -1136,15 +1136,16 @@ def test_proximal_jacobian():
         deriv_mode="batched",
         use_jit=False,
     )
-    obj2 = ObjectiveFunction(
-        (
-            QuasisymmetryTripleProduct(eq2, deriv_mode="fwd"),
-            AspectRatio(eq2, deriv_mode="fwd"),
-            Volume(eq2, deriv_mode="fwd"),
-        ),
-        deriv_mode="looped",
-        use_jit=False,
-    )
+    with pytest.warns(DeprecationWarning, match="looped"):
+        obj2 = ObjectiveFunction(
+            (
+                QuasisymmetryTripleProduct(eq2, deriv_mode="fwd"),
+                AspectRatio(eq2, deriv_mode="fwd"),
+                Volume(eq2, deriv_mode="fwd"),
+            ),
+            deriv_mode="looped",
+            use_jit=False,
+        )
     obj3 = ObjectiveFunction(
         (
             QuasisymmetryTripleProduct(eq3, deriv_mode="fwd"),
@@ -1245,9 +1246,10 @@ def test_LinearConstraint_jacobian():
     obj1 = ObjectiveFunction(
         ForceBalance(eq1, deriv_mode="auto"), deriv_mode="batched", use_jit=False
     )
-    obj2 = ObjectiveFunction(
-        ForceBalance(eq2, deriv_mode="fwd"), deriv_mode="looped", use_jit=False
-    )
+    with pytest.warns(DeprecationWarning, match="looped"):
+        obj2 = ObjectiveFunction(
+            ForceBalance(eq2, deriv_mode="fwd"), deriv_mode="looped", use_jit=False
+        )
     obj3 = ObjectiveFunction(
         ForceBalance(eq3, deriv_mode="rev"), deriv_mode="blocked", use_jit=False
     )