diff --git a/circuit_knitting/cutting/__init__.py b/circuit_knitting/cutting/__init__.py
index c05d275b3..e0c294b39 100644
--- a/circuit_knitting/cutting/__init__.py
+++ b/circuit_knitting/cutting/__init__.py
@@ -54,6 +54,7 @@
     :toctree: ../stubs/
     :nosignatures:
 
+    qpd.generate_qpd_weights
     qpd.generate_qpd_samples
     qpd.decompose_qpd_instructions
     qpd.supported_gates
diff --git a/circuit_knitting/cutting/cutting_evaluation.py b/circuit_knitting/cutting/cutting_evaluation.py
index bbb896101..4ea9bbac0 100644
--- a/circuit_knitting/cutting/cutting_evaluation.py
+++ b/circuit_knitting/cutting/cutting_evaluation.py
@@ -30,7 +30,7 @@
     QPDBasis,
     SingleQubitQPDGate,
     TwoQubitQPDGate,
-    generate_qpd_samples,
+    generate_qpd_weights,
     decompose_qpd_instructions,
     WeightType,
 )
@@ -259,7 +259,7 @@ def _generate_cutting_experiments(
         }
 
     # Sample the joint quasiprobability decomposition
-    random_samples = generate_qpd_samples(bases, num_samples=num_samples)
+    random_samples = generate_qpd_weights(bases, num_samples=num_samples)
 
     # Calculate terms in coefficient calculation
     kappa = np.prod([basis.kappa for basis in bases])
diff --git a/circuit_knitting/cutting/qpd/__init__.py b/circuit_knitting/cutting/qpd/__init__.py
index 646abc03a..b734ae205 100644
--- a/circuit_knitting/cutting/qpd/__init__.py
+++ b/circuit_knitting/cutting/qpd/__init__.py
@@ -13,6 +13,7 @@
 
 from .qpd_basis import QPDBasis
 from .qpd import (
+    generate_qpd_weights,
     generate_qpd_samples,
     decompose_qpd_instructions,
     WeightType,
@@ -28,6 +29,7 @@
 
 __all__ = [
     "qpdbasis_from_gate",
+    "generate_qpd_weights",
     "generate_qpd_samples",
     "decompose_qpd_instructions",
     "supported_gates",
diff --git a/circuit_knitting/cutting/qpd/qpd.py b/circuit_knitting/cutting/qpd/qpd.py
index 16d193328..8ec377b0f 100644
--- a/circuit_knitting/cutting/qpd/qpd.py
+++ b/circuit_knitting/cutting/qpd/qpd.py
@@ -14,11 +14,14 @@
 from __future__ import annotations
 
 from collections.abc import Sequence, Callable
+from collections import Counter
 from enum import Enum
 import itertools
+import logging
 import math
 
 import numpy as np
+import numpy.typing as npt
 from qiskit.circuit import (
     QuantumCircuit,
     Gate,
@@ -59,12 +62,21 @@
     iSwapGate,
     DCXGate,
 )
+from qiskit.utils import deprecate_func
 
 from .qpd_basis import QPDBasis
 from .instructions import BaseQPDGate, TwoQubitQPDGate, QPDMeasure
 from ...utils.iteration import unique_by_id, strict_zip
 
 
+logger = logging.getLogger(__name__)
+
+
+# math.sin(math.pi) is just above 1e-16, so at 1e-14, we are well above that.
+# Numbers like this can often come up in the QPD coefficients.
+_NONZERO_ATOL = 1e-14
+
+
 class WeightType(Enum):
     """Type of weight associated with a QPD sample."""
 
@@ -75,18 +87,217 @@ class WeightType(Enum):
     SAMPLED = 2
 
 
+def _min_filter_nonzero(vals: npt.NDArray[np.float64], *, atol=_NONZERO_ATOL):
+    return np.min(vals[np.logical_not(np.isclose(vals, 0, atol=atol))])
+
+
+def __update_running_product_after_increment(
+    running_product: list[float],
+    state: Sequence[int],
+    coeff_probabilities: Sequence[npt.NDArray[np.float64]],
+):
+    """
+    Update the ``running_product`` list after the ``state`` has been incremented.
+
+    This snippet is used twice in
+    :func:`_generate_exact_weights_and_conditional_probabilities_assume_sorted`;
+    hence, we write it only once here.
+    """
+    try:
+        prev = running_product[-2]
+    except IndexError:
+        prev = 1.0
+    running_product[-1] = prev * coeff_probabilities[len(state) - 1][state[-1]]
+
+
+def _generate_exact_weights_and_conditional_probabilities_assume_sorted(
+    coeff_probabilities: Sequence[npt.NDArray[np.float64]], threshold: float
+):
+    r"""
+    Determine all exact weights above ``threshold`` and the conditional probabilities necessary to sample efficiently from all other weights.
+
+    Each yielded element will be a 2-tuple, the first element of which will be
+    a ``state``, represented by a tuple of ``int``\ s.
+
+    If ``state`` contains the same number of elements as
+    ``coeff_probabilities``, then the second element will be a ``float``,
+    greater than or equal to ``threshold``, that contains the exact probability
+    of this ``state``.
+
+    If, on the other hand, ``state`` contains fewer elements than
+    ``coeff_probabilities``, then the second element is a 1D ``np.ndarray`` of
+    the conditional probabilities that can be used to sample the remaining
+    weights that have not been evaluated exactly (i.e., are below
+    ``threshold``).  These will all be normalized *except* the
+    top-level one, which is given by ``state == ()``.
+
+    If there are no exact weights below a given level, then the conditional
+    probabilities at that level are equivalent to the corresponding element of
+    `coeff_probabilities``, and this generator does not yield them.  Skipping
+    redundant conditional probabilities, like this, allows for efficient
+    sampling without traversing the entire tree, which is desirable since the
+    tree grows exponentially with the number of cut gates.  By contrast, the
+    performance with the provided strategy is no worse than linear in
+    ``threshold``, in both time and memory.
+
+    This function assumes each element of ``coeff_probabilities`` contains
+    non-negative numbers, ordered largest to smallest.  For a generalization
+    that allows ``coeff_probabilities`` to be provided in any order, see
+    :func:`_generate_exact_weights_and_conditional_probabilities`.
+    """
+    assert len(coeff_probabilities) > 0
+
+    next_pop = False  # Stores whether the next move is a pop or not
+    state = [0]
+    running_product = [coeff_probabilities[0][0]]
+    running_conditional_probabilities: list[npt.NDArray[np.float64]] = []
+    while True:
+        if next_pop:
+            # Pop
+            state.pop()
+            if len(state) + 1 == len(running_conditional_probabilities):
+                # There were some exact weights found below us, so we need to
+                # yield the conditional probabilities, unless all probabilities
+                # at this level are zero *and* it's not the top level.
+                current_condprobs = running_conditional_probabilities.pop()
+                current_condprobs[
+                    np.isclose(current_condprobs, 0, atol=_NONZERO_ATOL)
+                ] = 0.0
+                if not state:
+                    # Don't normalize the top-level conditional probabilities.
+                    yield (), current_condprobs
+                else:
+                    norm = np.sum(current_condprobs)
+                    if norm != 0:
+                        # Some, but not all, weights below us are exact.  If,
+                        # instead, _all_ weights had been exact, we could have
+                        # skipped this yield, as there is zero probability of
+                        # reaching this partial state when sampling.
+                        yield tuple(state), current_condprobs / norm
+                    # Update the factor one level up from the popped one.
+                    running_conditional_probabilities[-1][state[-1]] *= norm
+            if not state:
+                # We're all done
+                return
+            running_product.pop()
+            # Increment the state counter.
+            state[-1] += 1
+            if state[-1] != len(coeff_probabilities[len(state) - 1]):
+                # Increment successful (no overflow).  We don't need to pop again.
+                next_pop = False
+                __update_running_product_after_increment(
+                    running_product, state, coeff_probabilities
+                )
+        else:
+            if running_product[-1] < threshold:
+                next_pop = True
+            elif len(state) < len(coeff_probabilities):
+                # Append 0 to work toward a "full" `state`
+                running_product.append(
+                    running_product[-1] * coeff_probabilities[len(state)][0]
+                )
+                state.append(0)
+            else:
+                # `state` is full.  Yield first.
+                yield tuple(state), running_product[-1]
+                # Since we found something exact, we need to update running_conditional_probabilities.
+                while len(running_conditional_probabilities) < len(coeff_probabilities):
+                    running_conditional_probabilities.append(
+                        np.array(
+                            coeff_probabilities[len(running_conditional_probabilities)],
+                            dtype=float,
+                        )
+                    )
+                # It's exact, so we want no probability of sampling it going forward.
+                running_conditional_probabilities[-1][state[-1]] = 0
+                # Increment the state counter.
+                state[-1] += 1
+                if state[-1] == len(coeff_probabilities[-1]):
+                    # The state counter has overflowed, so our next move should be a
+                    # pop.
+                    next_pop = True
+                else:
+                    __update_running_product_after_increment(
+                        running_product, state, coeff_probabilities
+                    )
+
+
+def _invert_permutation(p):
+    # https://stackoverflow.com/questions/11649577/how-to-invert-a-permutation-array-in-numpy
+    s = np.empty(p.size, p.dtype)
+    s[p] = np.arange(p.size)
+    return s
+
+
+def _generate_exact_weights_and_conditional_probabilities(
+    coeff_probabilities: Sequence[npt.NDArray[np.float64]], threshold: float
+):
+    """
+    Generate exact weights and conditional probabilities.
+
+    This is identical in behavior to
+    :func:`_generate_exact_weights_and_conditional_probabilities_assume_sorted`,
+    except it makes no assumption on the order of the coefficients.
+    """
+    permutations = [np.argsort(cp)[::-1] for cp in coeff_probabilities]
+    sorted_coeff_probabilities = [
+        cp[permutation] for cp, permutation in zip(coeff_probabilities, permutations)
+    ]
+    ipermutations = [_invert_permutation(p) for p in permutations]
+    for (
+        coeff_indices,
+        probability,
+    ) in _generate_exact_weights_and_conditional_probabilities_assume_sorted(
+        sorted_coeff_probabilities, threshold
+    ):
+        orig_coeff_indices = tuple(
+            perm[idx] for perm, idx in zip(permutations, coeff_indices)
+        )
+        if len(coeff_indices) != len(sorted_coeff_probabilities):
+            # In this case, `probability` is actually a vector of conditional
+            # probabilities, so apply the inverse permutation.
+            probability = probability[ipermutations[len(coeff_indices)]]
+        yield orig_coeff_indices, probability
+
+
+@deprecate_func(
+    since="0.3.0",
+    package_name="circuit-knitting-toolbox",
+    removal_timeline="no earlier than v0.4.0",
+    additional_msg=(
+        "This function has been renamed to "
+        "``circuit_knitting.cutting.qpd.generate_qpd_weights()``."
+    ),
+)
 def generate_qpd_samples(
     qpd_bases: Sequence[QPDBasis], num_samples: float = 1000
-) -> dict[tuple[int, ...], tuple[float, WeightType]]:
+) -> dict[tuple[int, ...], tuple[float, WeightType]]:  # pragma: no cover
     """
     Generate random quasiprobability decompositions.
 
+    Deprecated since CKT 0.3.0.  This function has been renamed to
+    :func:`.generate_qpd_weights`.
+    """
+    return generate_qpd_weights(qpd_bases, num_samples)
+
+
+def generate_qpd_weights(
+    qpd_bases: Sequence[QPDBasis], num_samples: float = 1000
+) -> dict[tuple[int, ...], tuple[float, WeightType]]:
+    """
+    Generate weights from the joint quasiprobability distribution.
+
+    Each weight whose absolute value is above a threshold of ``1 /
+    num_samples`` will be evaluated exactly. The remaining weights -- those in
+    the tail of the distribution -- will then be sampled from, resulting in at
+    most ``num_samples`` unique weights.
+
     Args:
-        qpd_bases: The :class:`QPDBasis` objects from which to sample
-        num_samples: Number of random samples to generate
+        qpd_bases: The :class:`QPDBasis` objects from which to generate weights
+        num_samples: Controls the number of weights to generate
 
     Returns:
-        A mapping from a given decomposition to its sampled weight.
+        A mapping from a given decomposition to its weight.
         Keys are tuples of indices -- one index per input :class:`QPDBasis`. The indices
         correspond to a specific decomposition mapping in the basis.
 
@@ -94,47 +305,188 @@ def generate_qpd_samples(
         weight of the contribution.  The second element is the :class:`WeightType`,
         either ``EXACT`` or ``SAMPLED``.
     """
+    independent_probabilities = [np.asarray(basis.probabilities) for basis in qpd_bases]
+    # In Python 3.7 and higher, dicts are guaranteed to remember their
+    # insertion order.  For user convenience, we sort by exact weights first,
+    # then sampled weights.  Within each, values are sorted largest to
+    # smallest.
+    lst = sorted(
+        _generate_qpd_weights(independent_probabilities, num_samples).items(),
+        key=lambda x: ((v := x[1])[1].value, -v[0]),
+    )
+    return dict(lst)
+
+
+def _generate_qpd_weights(
+    independent_probabilities: Sequence[npt.NDArray[np.float64]],
+    num_samples: float,
+    *,
+    _samples_multiplier: int = 1,
+) -> dict[tuple[int, ...], tuple[float, WeightType]]:
     if not num_samples >= 1:
         raise ValueError("num_samples must be at least 1.")
 
-    # Determine if the smallest probability is above the threshold implied by
+    retval: dict[tuple[int, ...], tuple[float, WeightType]] = {}
+
+    threshold = 1 / num_samples
+
+    # Determine if the smallest *nonzero* probability is above the threshold implied by
     # num_samples.  If so, then we can evaluate all weights exactly.
-    probabilities_by_basis = [basis.probabilities for basis in qpd_bases]
-    smallest_probability = np.prod([min(probs) for probs in probabilities_by_basis])
-    if smallest_probability >= 1 / num_samples:
+    smallest_probability = np.prod(
+        [_min_filter_nonzero(probs) for probs in independent_probabilities]
+    )
+    if smallest_probability >= threshold:
+        # All weights exactly
+        logger.info("All exact weights")
         multiplier = num_samples if math.isfinite(num_samples) else 1.0
-        retval: dict[tuple[int, ...], tuple[float, WeightType]] = {}
         for map_ids in itertools.product(
-            *[range(len(probs)) for probs in probabilities_by_basis]
+            *[range(len(probs)) for probs in independent_probabilities]
         ):
             probability = np.prod(
-                [probs[i] for i, probs in strict_zip(map_ids, probabilities_by_basis)]
+                [
+                    probs[i]
+                    for i, probs in strict_zip(map_ids, independent_probabilities)
+                ]
             )
+            if probability < _NONZERO_ATOL:
+                continue
             retval[map_ids] = (multiplier * probability, WeightType.EXACT)
         return retval
 
-    # Loop through each gate and sample from its distribution num_samples times
-    samples_by_decomp = []
-    sample_multiplier = num_samples / math.ceil(num_samples)
-    for basis in qpd_bases:
-        samples_by_decomp.append(
-            np.random.choice(
-                range(len(basis.probabilities)),
-                math.ceil(num_samples),
-                p=basis.probabilities,
-            )
-        )
+    conditional_probabilities: dict[tuple[int, ...], npt.NDArray[np.float64]] = {}
+    weight_to_sample = 1.0
+
+    largest_probability = np.prod(
+        [np.max(probs) for probs in independent_probabilities]
+    )
+    if not largest_probability >= threshold:
+        logger.info("No exact weights")
+    else:
+        logger.info("Some exact weights")
+        for (
+            map_ids,
+            probability,
+        ) in _generate_exact_weights_and_conditional_probabilities(
+            independent_probabilities, threshold
+        ):
+            # As described in the docstring to
+            # _generate_exact_weights_and_conditional_probabilities_assume_sorted,
+            # there are two possible pieces of information that might be
+            # yielded by the generator.  The following branch distinguishes
+            # between them.
+            if len(map_ids) == len(independent_probabilities):
+                # The generator produced a state together with its exact probability.
+                weight = probability * num_samples
+                retval[map_ids] = (weight, WeightType.EXACT)
+            else:
+                # The generator produced a partial state along with a vector of
+                # conditional probabilities.
+                conditional_probabilities[map_ids] = probability
+                if map_ids == ():
+                    weight_to_sample = np.sum(probability)
+                    conditional_probabilities[map_ids] /= weight_to_sample
+
+    # Loop through each gate and sample from the remainder of the distribution.
+
+    # Start by rescaling.
+    weight_to_sample *= num_samples
+    # The following variable, `samples_needed`, must be integer and at least 1.
+    # `_samples_multiplier` will typically be 1, but we set it higher in
+    # testing to collect additional statistics, faster.
+    samples_needed = math.ceil(weight_to_sample) * _samples_multiplier
+    # At the time of writing, the below assert should never fail.  But if
+    # future code changes result in inputs where it _may_ fail, then the only
+    # thing that should be needed if this is reached is to return `retval` in
+    # this case, since presumably it must contain all weights as exact weights.
+    assert samples_needed >= 1
+    single_sample_weight = weight_to_sample / samples_needed
+
+    # Figure out if we've reached the special case where everything except
+    # *one* weight has been calculated exactly, so there's only one thing left
+    # to sample.  If that's the case, then we can calculate that one remaining
+    # weight exactly as well and skip sampling.
+    if conditional_probabilities:
+        running_state: tuple[int, ...] = ()
+        while len(running_state) < len(independent_probabilities):
+            # If it's missing from `conditional_probabilities`, that just means
+            # to use the corresponding entry in `independent_probabilities`.
+            try:
+                probs = conditional_probabilities[running_state]
+            except KeyError:
+                probs = independent_probabilities[len(running_state)]
+            x = np.flatnonzero(probs)
+            assert len(x) != 0
+            if len(x) > 1:
+                break
+            running_state += (x[0],)
+        else:
+            assert running_state not in retval
+            retval[running_state] = (weight_to_sample, WeightType.EXACT)
+            return retval
 
-    # Form the joint samples, collecting them into a dict with counts for each
+    # Form the joint samples, collecting them into a dict with counts for each.
     random_samples: dict[tuple[int, ...], int] = {}
-    for decomp_ids in zip(*samples_by_decomp):
-        # Increment the counter for this basis selection
-        random_samples[decomp_ids] = random_samples.setdefault(decomp_ids, 0) + 1
-
-    return {
-        k: (v * sample_multiplier, WeightType.SAMPLED)
-        for k, v in random_samples.items()
-    }
+    _populate_samples(
+        random_samples,
+        samples_needed,
+        independent_probabilities,
+        conditional_probabilities,
+    )
+    # Insert the samples into the dict we are about to return.
+    for outcome, count in random_samples.items():
+        assert outcome not in retval
+        retval[outcome] = (count * single_sample_weight, WeightType.SAMPLED)
+
+    return retval
+
+
+def _populate_samples(
+    random_samples: dict[tuple[int, ...], int],
+    num_desired: int,
+    independent_probabilities: Sequence,
+    conditional_probabilities: dict[tuple[int, ...], npt.NDArray[np.float64]],
+    running_state: tuple[int, ...] = (),
+) -> None:
+    """
+    Generate random samples from the conditional probabilitity distributions.
+
+    Items get populated into the ``random_samples`` dict, rather than returned.
+
+    This function is designed to call itself recursively.  The number of
+    elements in ``running_state`` will match the recursion depth.
+    """
+    if running_state not in conditional_probabilities:
+        # Everything below us is sampled, so we can sample directly from the
+        # remaining independent probability distributions.
+        samples_by_decomp = []
+        for probs in independent_probabilities[len(running_state) :]:
+            samples_by_decomp.append(
+                np.random.choice(range(len(probs)), num_desired, p=probs)
+            )
+        for outcome, count in Counter(zip(*samples_by_decomp)).items():
+            assert (running_state + outcome) not in random_samples
+            random_samples[running_state + outcome] = count
+        return
+
+    # There are some exact weight(s) below us, so we must consider the
+    # conditional probabilities at the current level.
+    probs = conditional_probabilities[running_state]
+    current_outcomes = np.random.choice(range(len(probs)), num_desired, p=probs)
+    for current_outcome, count in Counter(current_outcomes).items():
+        outcome = running_state + (current_outcome,)
+        if len(outcome) == len(independent_probabilities):
+            # It's a full one
+            assert outcome not in random_samples
+            random_samples[outcome] = count
+        else:
+            # Recurse
+            _populate_samples(
+                random_samples,
+                count,
+                independent_probabilities,
+                conditional_probabilities,
+                outcome,
+            )
 
 
 def decompose_qpd_instructions(
diff --git a/docs/circuit_cutting/how-tos/how_to_generate_exact_sampling_coefficients.ipynb b/docs/circuit_cutting/how-tos/how_to_generate_exact_sampling_coefficients.ipynb
index 747250aeb..fc5d10744 100644
--- a/docs/circuit_cutting/how-tos/how_to_generate_exact_sampling_coefficients.ipynb
+++ b/docs/circuit_cutting/how-tos/how_to_generate_exact_sampling_coefficients.ipynb
@@ -7,7 +7,9 @@
    "source": [
     "## How to generate exact sampling coefficients\n",
     "\n",
-    "This how-to guide is intended to show users how they can generate exact sampling coefficients to be used in reconstructing the simulated expectation value of the original circuit."
+    "This how-to guide is intended to show users how they can generate exact sampling coefficients to be used in reconstructing the simulated expectation value of the original circuit.\n",
+    "\n",
+    "First, we set up a simple cutting problem following the [first tutorial](../tutorials/01_gate_cutting_to_reduce_circuit_width.ipynb)."
    ]
   },
   {
@@ -17,6 +19,7 @@
    "metadata": {},
    "outputs": [],
    "source": [
+    "import numpy as np\n",
     "from qiskit.circuit.library import EfficientSU2\n",
     "from qiskit.quantum_info import PauliList\n",
     "from qiskit_aer.primitives import Sampler\n",
@@ -107,21 +110,129 @@
     "subcircuits[\"B\"].draw(\"mpl\", scale=0.6)"
    ]
   },
+  {
+   "cell_type": "code",
+   "execution_count": 5,
+   "id": "7d1f2af8",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "sampler = Sampler(run_options={\"shots\": 1})"
+   ]
+  },
   {
    "cell_type": "markdown",
-   "id": "5770cf75",
+   "id": "fce55187",
    "metadata": {},
    "source": [
-    "#### Demonstrate how to find the minimum number of samples needed to retrieve exact weights for 2 CNOT cuts\n",
+    "### Demonstrate how to obtain all weights exactly\n",
     "\n",
-    "In order to retrieve exact coefficients and use them during reconstruction, the number of samples taken must be sufficient to cover the entire distribution. More specifically, if the smallest sample probability in the joint quasi-probability distribution is above 1 / `num_samples`, exact weights will be used.  Otherwise, the weights will be sampled, resulting in at most `num_samples` unique samples.\n",
+    "If you wish to calculate all weights exactly, no matter how small, you can achieve this by passing infinity (`np.inf`) to `num_samples`:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 6,
+   "id": "8c56282f",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "[(0.24999999999999992, <WeightType.EXACT: 1>),\n",
+       " (0.24999999999999992, <WeightType.EXACT: 1>),\n",
+       " (0.24999999999999992, <WeightType.EXACT: 1>),\n",
+       " (-0.24999999999999992, <WeightType.EXACT: 1>),\n",
+       " (0.24999999999999992, <WeightType.EXACT: 1>),\n",
+       " (-0.24999999999999992, <WeightType.EXACT: 1>),\n",
+       " (0.24999999999999992, <WeightType.EXACT: 1>),\n",
+       " (0.24999999999999992, <WeightType.EXACT: 1>),\n",
+       " (0.24999999999999992, <WeightType.EXACT: 1>),\n",
+       " (-0.24999999999999992, <WeightType.EXACT: 1>),\n",
+       " (0.24999999999999992, <WeightType.EXACT: 1>),\n",
+       " (-0.24999999999999992, <WeightType.EXACT: 1>),\n",
+       " (0.24999999999999992, <WeightType.EXACT: 1>),\n",
+       " (0.24999999999999992, <WeightType.EXACT: 1>),\n",
+       " (0.24999999999999992, <WeightType.EXACT: 1>),\n",
+       " (-0.24999999999999992, <WeightType.EXACT: 1>),\n",
+       " (0.24999999999999992, <WeightType.EXACT: 1>),\n",
+       " (-0.24999999999999992, <WeightType.EXACT: 1>),\n",
+       " (-0.24999999999999992, <WeightType.EXACT: 1>),\n",
+       " (-0.24999999999999992, <WeightType.EXACT: 1>),\n",
+       " (-0.24999999999999992, <WeightType.EXACT: 1>),\n",
+       " (0.24999999999999992, <WeightType.EXACT: 1>),\n",
+       " (-0.24999999999999992, <WeightType.EXACT: 1>),\n",
+       " (0.24999999999999992, <WeightType.EXACT: 1>),\n",
+       " (0.24999999999999992, <WeightType.EXACT: 1>),\n",
+       " (0.24999999999999992, <WeightType.EXACT: 1>),\n",
+       " (0.24999999999999992, <WeightType.EXACT: 1>),\n",
+       " (-0.24999999999999992, <WeightType.EXACT: 1>),\n",
+       " (0.24999999999999992, <WeightType.EXACT: 1>),\n",
+       " (-0.24999999999999992, <WeightType.EXACT: 1>),\n",
+       " (-0.24999999999999992, <WeightType.EXACT: 1>),\n",
+       " (-0.24999999999999992, <WeightType.EXACT: 1>),\n",
+       " (-0.24999999999999992, <WeightType.EXACT: 1>),\n",
+       " (0.24999999999999992, <WeightType.EXACT: 1>),\n",
+       " (-0.24999999999999992, <WeightType.EXACT: 1>),\n",
+       " (0.24999999999999992, <WeightType.EXACT: 1>)]"
+      ]
+     },
+     "execution_count": 6,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "quasi_dists, coefficients = execute_experiments(\n",
+    "    circuits=subcircuits,\n",
+    "    subobservables=subobservables,\n",
+    "    num_samples=np.inf,\n",
+    "    samplers=sampler,\n",
+    ")\n",
+    "coefficients"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "b5fc6579",
+   "metadata": {},
+   "source": [
+    "### Demonstrate how to find the minimum `num_samples` needed to retrieve all exact weights for 2 CNOT cuts\n",
     "\n",
-    "Here, we are cutting two CNOT gates. As shown below, the probability of any given mapping in a CNOT decomposition is 1/6, so the probability of any given mapping in the joint distribution combining the two cut CNOT gates is (1/6)<sup>2</sup>. Therefore, we need to take at least 6^<sup>2</sup> samples in order to retrieve the exact weights from `execute_experiments`."
+    "When `num_samples` is set to a finite number, each weight whose absolute value is above a threshold of 1 / `num_samples` will be evaluated exactly.  The remaining weights -- those in the tail of the distribution -- will then be sampled from, resulting in at most `num_samples` unique weights.\n",
+    "\n",
+    "In the case of a CNOT gate -- or any gate equivalent to it up to single-qubit unitaries -- each of the six weights of the quasi-probability decomposition have the same magnitude, so each gets sampled with a probability of $1/6$:"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 5,
+   "execution_count": 7,
+   "id": "78539fcc",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Mapping probabilities for a CNOT decomposition: [0.16666667 0.16666667 0.16666667 0.16666667 0.16666667 0.16666667]\n"
+     ]
+    }
+   ],
+   "source": [
+    "print(f\"Mapping probabilities for a CNOT decomposition: {bases[0].probabilities}\")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "5770cf75",
+   "metadata": {},
+   "source": [
+    "In this example, we have cut two CNOT gates. Given that the probability of any given mapping in a CNOT decomposition is $1/6$, the probability of any given mapping in the _joint_ distribution combining the two cut CNOT gates is $(1/6)^2$. Therefore, we need to take at least $6^2$ weights in order to retrieve all exact weights from `execute_experiments`."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 8,
    "id": "f07a6cc3",
    "metadata": {},
    "outputs": [
@@ -129,7 +240,6 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Mapping probabilities for a CNOT decomposition: [0.16666667 0.16666667 0.16666667 0.16666667 0.16666667 0.16666667]\n",
       "Number of samples needed to retrieve exact weights: 36.0\n"
      ]
     }
@@ -140,11 +250,16 @@
     "\n",
     "qpd_basis_cx = QPDBasis.from_gate(CXGate())\n",
     "\n",
+    "\n",
+    "def _min_nonzero(seq, /):\n",
+    "    \"\"\"Return the minimum value in a sequence, ignoring values near zero.\"\"\"\n",
+    "    return min(x for x in seq if not np.isclose(x, 0))\n",
+    "\n",
+    "\n",
     "num_cx_cuts = 2\n",
     "\n",
-    "print(f\"Mapping probabilities for a CNOT decomposition: {qpd_basis_cx.probabilities}\")\n",
     "print(\n",
-    "    f\"Number of samples needed to retrieve exact weights: {1 / min(qpd_basis_cx.probabilities)**num_cx_cuts}\"\n",
+    "    f\"Number of samples needed to retrieve exact weights: {1 / _min_nonzero(qpd_basis_cx.probabilities)**num_cx_cuts}\"\n",
     ")"
    ]
   },
@@ -160,7 +275,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 6,
+   "execution_count": 9,
    "id": "43d32869",
    "metadata": {},
    "outputs": [
@@ -205,14 +320,12 @@
        " (0.25, <WeightType.EXACT: 1>)]"
       ]
      },
-     "execution_count": 6,
+     "execution_count": 9,
      "metadata": {},
      "output_type": "execute_result"
     }
    ],
    "source": [
-    "sampler = Sampler(run_options={\"shots\": 1})\n",
-    "\n",
     "quasi_dists, coefficients = execute_experiments(\n",
     "    circuits=subcircuits,\n",
     "    subobservables=subobservables,\n",
@@ -239,7 +352,7 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.8.16"
+   "version": "3.9.7"
   }
  },
  "nbformat": 4,
diff --git a/docs/circuit_cutting/tutorials/01_gate_cutting_to_reduce_circuit_width.ipynb b/docs/circuit_cutting/tutorials/01_gate_cutting_to_reduce_circuit_width.ipynb
index b667736b2..c274cf8a8 100644
--- a/docs/circuit_cutting/tutorials/01_gate_cutting_to_reduce_circuit_width.ipynb
+++ b/docs/circuit_cutting/tutorials/01_gate_cutting_to_reduce_circuit_width.ipynb
@@ -237,9 +237,9 @@
     "\n",
     "To simulate the expectation value of the full-sized circuit, many subexperiments are generated from the decomposed gates' joint quasiprobability distribution and then executed on one or more backends.\n",
     "\n",
-    "Much of the circuit cutting literature describes a process where we sample from the distribution, take a single shot, sample from the distribution again and repeat; however, this is not feasible in practice. The number of shots needed grows exponentially with the number of cuts, and single shot experiments via Qiskit Runtime quickly become untenable. Instead, we take an equivalent number of shots for each sampled subexperiment, and send them to the backend(s) in batches. We just need to ensure the number of shots we take is appropriate for the heaviest-weighted samples, and thus, appropriate for all samples.\n",
+    "The number of weights taken from the distribution is controlled by `num_samples`. Each weight whose absolute value is above a threshold of 1 / `num_samples` will be evaluated exactly.  The remaining low-probability elements -- those in the tail of the distribution -- will then be sampled from, resulting in at most `num_samples` unique weights.\n",
     "\n",
-    "The number of samples taken from the distribution is controlled by `num_samples`. If the smallest sample probability in the joint quasi-probability distribution is above 1 / `num_samples`, exact weights will be used.  Otherwise, the weights will be sampled, resulting in at most `num_samples` unique samples.  In the future, this function will be changed to return a mixture of exact and sampled weights, so that the largest weights will be calculated exactly, while weights in the tail of the distribution will be sampled."
+    "Much of the circuit cutting literature describes a process where we sample from the distribution, take a single shot, then sample from the distribution again and repeat; however, this is not feasible in practice. The total number of shots needed grows exponentially with the number of cuts, and taking single shot experiments via Qiskit Runtime quickly becomes untenable. Instead, we take an equivalent number of shots for each considered subexperiment and send them to the backend(s) in batches. During reconstruction, each subexperiment contributes to the final result with proportion equal to its weight.  We just need to ensure the number of shots we take is appropriate for the heaviest weights, and thus, appropriate for all weights."
    ]
   },
   {
@@ -367,7 +367,7 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.8.16"
+   "version": "3.9.7"
   }
  },
  "nbformat": 4,
diff --git a/docs/circuit_cutting/tutorials/02_gate_cutting_to_reduce_circuit_depth.ipynb b/docs/circuit_cutting/tutorials/02_gate_cutting_to_reduce_circuit_depth.ipynb
index c598179c9..9fe5f41f2 100644
--- a/docs/circuit_cutting/tutorials/02_gate_cutting_to_reduce_circuit_depth.ipynb
+++ b/docs/circuit_cutting/tutorials/02_gate_cutting_to_reduce_circuit_depth.ipynb
@@ -291,9 +291,11 @@
     "\n",
     "`execute_experiments` accepts a circuit containing `TwoQubitQPDGate`s as its `circuits` argument. If a single circuit is passed in this way, a `PauliList` is the expected type of the `subobservables` argument. \n",
     "\n",
-    "Much of the circuit cutting literature describes a process where we sample from the distribution, take a single shot, sample from the distribution again and repeat; however, this is not feasible in practice. The number of shots needed grows exponentially with the number of cuts, and single shot experiments via Qiskit Runtime quickly become untenable. Instead, we take an equivalent number of shots for each sampled subexperiment, and send them to the backend(s) in batches. We just need to ensure the number of shots we take is appropriate for the heaviest-weighted samples, and thus, appropriate for all samples.\n",
+    "To simulate the expectation value of the full-sized circuit, many subexperiments are generated from the decomposed gates' joint quasiprobability distribution and then executed on one or more backends.\n",
     "\n",
-    "To simulate the expectation value of the full-sized circuit, many subexperiments are generated from the decomposed gates' joint quasiprobability distribution and then executed on one or more backends. The number of samples taken from the distribution is controlled by `num_samples`. If the smallest sample probability in the joint quasi-probability distribution is above 1 / `num_samples`, exact weights will be used.  Otherwise, the weights will be sampled, resulting in at most `num_samples` unique samples.  In the future, this function will be changed to return a mixture of exact and sampled weights, so that the largest weights will be calculated exactly, while weights in the tail of the distribution will be sampled."
+    "The number of weights taken from the distribution is controlled by `num_samples`. Each weight whose absolute value is above a threshold of 1 / `num_samples` will be evaluated exactly.  The remaining low-probability elements -- those in the tail of the distribution -- will then be sampled from, resulting in at most `num_samples` unique weights.\n",
+    "\n",
+    "Much of the circuit cutting literature describes a process where we sample from the distribution, take a single shot, then sample from the distribution again and repeat; however, this is not feasible in practice. The total number of shots needed grows exponentially with the number of cuts, and taking single shot experiments via Qiskit Runtime quickly becomes untenable. Instead, we take an equivalent number of shots for each considered subexperiment and send them to the backend(s) in batches. During reconstruction, each subexperiment contributes to the final result with proportion equal to its weight.  We just need to ensure the number of shots we take is appropriate for the heaviest weights, and thus, appropriate for all weights."
    ]
   },
   {
@@ -419,7 +421,7 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.8.16"
+   "version": "3.9.7"
   }
  },
  "nbformat": 4,
diff --git a/releasenotes/notes/generate-qpd-weights-mixture-79c1c420105c8564.yaml b/releasenotes/notes/generate-qpd-weights-mixture-79c1c420105c8564.yaml
new file mode 100644
index 000000000..ed1276639
--- /dev/null
+++ b/releasenotes/notes/generate-qpd-weights-mixture-79c1c420105c8564.yaml
@@ -0,0 +1,27 @@
+---
+features:
+  - |
+    :func:`~circuit_knitting.cutting.qpd.generate_qpd_weights` now
+    returns a mixture of exact and sampled weights when appropriate.
+    Specifically, it exactly evaluates all weights greater than or
+    equal to ``1 / num_samples`` and samples from the remaining
+    weights (ones which are below this threshold).  Previously, this
+    function would only return exact weights if *all* weights were
+    greater than or equal to ``1 / num_samples``; otherwise, all
+    weights were sampled.  The new behavior is expected to improve
+    performance on non-uniform quasi-probability decompositions,
+    e.g. for cut instantiations of
+    :class:`~qiskit.circuit.library.RXXGate`,
+    :class:`~qiskit.circuit.library.RYYGate`,
+    :class:`~qiskit.circuit.library.RZZGate`,
+    :class:`~qiskit.circuit.library.CRXGate`,
+    :class:`~qiskit.circuit.library.CRYGate`, and
+    :class:`~qiskit.circuit.library.CRZGate` away from
+    :math:`\theta=\pi/2`.
+deprecations:
+  - |
+    :func:`~circuit_knitting.cutting.qpd.generate_qpd_samples` has
+    been renamed to
+    :func:`~circuit_knitting.cutting.qpd.generate_qpd_weights`.  The
+    original name will be removed no sooner than version 0.4 of the
+    Circuit Knitting Toolbox.
diff --git a/test/cutting/qpd/test_qpd.py b/test/cutting/qpd/test_qpd.py
index cd5537dce..8b2e86883 100644
--- a/test/cutting/qpd/test_qpd.py
+++ b/test/cutting/qpd/test_qpd.py
@@ -13,9 +13,12 @@
 
 import unittest
 import math
+from collections import Counter
+import itertools
 
 import pytest
 import numpy as np
+import numpy.typing as npt
 from ddt import ddt, data, unpack
 from qiskit.circuit import CircuitInstruction
 from qiskit.circuit.library import (
@@ -35,10 +38,14 @@
     QPDBasis,
     SingleQubitQPDGate,
     TwoQubitQPDGate,
-    generate_qpd_samples,
+    generate_qpd_weights,
 )
 from circuit_knitting.cutting.qpd.qpd import *
-from circuit_knitting.cutting.qpd.qpd import _nonlocal_qpd_basis_from_u
+from circuit_knitting.cutting.qpd.qpd import (
+    _generate_qpd_weights,
+    _generate_exact_weights_and_conditional_probabilities,
+    _nonlocal_qpd_basis_from_u,
+)
 
 
 @ddt
@@ -70,41 +77,41 @@ def setUp(self):
         self.qpd_gate2 = qpd_gate2
         self.qpd_circuit = qpd_circuit
 
-    def test_generate_qpd_samples(self):
+    def test_generate_qpd_weights(self):
         with self.subTest("Negative number of samples"):
             with pytest.raises(ValueError) as e_info:
-                generate_qpd_samples([], -100)
+                generate_qpd_weights([], -100)
             assert e_info.value.args[0] == "num_samples must be at least 1."
         with self.subTest("num_samples == NaN"):
             with pytest.raises(ValueError) as e_info:
-                generate_qpd_samples([], math.nan)
+                generate_qpd_weights([], math.nan)
             assert e_info.value.args[0] == "num_samples must be at least 1."
         with self.subTest("Zero samples requested"):
             with pytest.raises(ValueError) as e_info:
-                generate_qpd_samples([], 0)
+                generate_qpd_weights([], 0)
             assert e_info.value.args[0] == "num_samples must be at least 1."
         with self.subTest("Empty case"):
             empty_samples = {(): (1000, WeightType.EXACT)}
-            samples = generate_qpd_samples([])
+            samples = generate_qpd_weights([])
             self.assertEqual(samples, empty_samples)
         with self.subTest("HWEA 100 samples"):
             basis_ids = [9, 20]
             bases = [self.qpd_circuit.data[i].operation.basis for i in basis_ids]
-            samples = generate_qpd_samples(bases, num_samples=100)
-            self.assertEqual(100, sum(w for w, t in samples.values()))
+            samples = generate_qpd_weights(bases, num_samples=100)
+            assert sum(w for w, t in samples.values()) == pytest.approx(100)
             for decomp_ids in samples.keys():
                 self.assertTrue(0 <= decomp_ids[0] < len(self.qpd_gate1.basis.maps))
                 self.assertTrue(0 <= decomp_ids[1] < len(self.qpd_gate2.basis.maps))
         with self.subTest("HWEA 100.5 samples"):
             basis_ids = [9, 20]
             bases = [self.qpd_circuit.data[i].operation.basis for i in basis_ids]
-            samples = generate_qpd_samples(bases, num_samples=100.5)
+            samples = generate_qpd_weights(bases, num_samples=100.5)
             assert sum(w for w, t in samples.values()) == pytest.approx(100.5)
         with self.subTest("HWEA exact weights"):
             # Do the same thing with num_samples above the threshold for exact weights
             basis_ids = [9, 20]
             bases = [self.qpd_circuit.data[i].operation.basis for i in basis_ids]
-            samples = generate_qpd_samples(bases, num_samples=1000)
+            samples = generate_qpd_weights(bases, num_samples=1000)
             assert sum(w for w, t in samples.values()) == pytest.approx(1000)
             assert all(t == WeightType.EXACT for w, t in samples.values())
             for decomp_ids in samples.keys():
@@ -113,7 +120,7 @@ def test_generate_qpd_samples(self):
         with self.subTest("HWEA exact weights via 'infinite' num_samples"):
             basis_ids = [9, 20]
             bases = [self.qpd_circuit.data[i].operation.basis for i in basis_ids]
-            samples = generate_qpd_samples(bases, num_samples=math.inf)
+            samples = generate_qpd_weights(bases, num_samples=math.inf)
             assert sum(w for w, t in samples.values()) == pytest.approx(1)
             assert all(t == WeightType.EXACT for w, t in samples.values())
 
@@ -293,6 +300,128 @@ def test_qpdbasis_from_gate_unique_maps(
         assert len(unique_by_eq(a for (a, b) in relevant_maps)) == q0_num_unique
         assert len(unique_by_eq(b for (a, b) in relevant_maps)) == q1_num_unique
 
+    @data(
+        ([RZZGate(np.pi)], 1e4, 1, 0),
+        ([RZZGate(np.pi + 0.02)], 200, 6, 0),
+        ([RZZGate(np.pi + 0.02)], 100, 1),
+        ([RXXGate(0.1), RXXGate(0.1)], 100, 9),
+        ([CXGate(), CXGate()], 1e4, 36, 0),
+        ([CXGate(), CXGate()], 30, 0),
+        # The following two check either side of the exact/sampled threshold.
+        ([CXGate()], 6, 6, 0),
+        ([CXGate()], np.nextafter(6, -math.inf), 0),
+        # The following makes sure memory does not blow up with many cuts.
+        ([RXXGate(0.1)] * 16, 10000, 2001),
+    )
+    @unpack
+    def test_generate_qpd_weights_from_gates(
+        self, gates, num_samples, expected_exact, expected_sampled=None
+    ):
+        bases = [QPDBasis.from_gate(gate) for gate in gates]
+        samples = generate_qpd_weights(bases, num_samples)
+
+        counts = Counter(weight_type for _, weight_type in samples.values())
+        assert counts[WeightType.EXACT] == expected_exact
+        if expected_sampled is not None:
+            assert counts[WeightType.SAMPLED] == expected_sampled
+
+        total_weight = sum(weight for weight, _ in samples.values())
+        assert total_weight == pytest.approx(
+            num_samples if math.isfinite(num_samples) else 1
+        )
+
+        # Test that the dictionary is actually in the expected order: exact
+        # weights first, and largest to smallest within each type of weight.
+        last_type = WeightType.EXACT
+        upper_bound = math.inf
+        for weight, weight_type in samples.values():
+            if last_type != weight_type:
+                # We allow one transition, from exact weights to sampled.
+                assert weight_type == WeightType.SAMPLED
+                last_type = weight_type
+                upper_bound = math.inf
+            assert weight <= upper_bound
+            upper_bound = weight
+
+        # All tests that follow require time & memory that scales exponentially
+        # with number of gates cut, so skip them when the number is too high.
+        if len(gates) > 3:
+            return
+
+        # Test conditional probabilities from
+        # _generate_exact_weights_and_conditional_probabilities
+        independent_probabilities = [basis.probabilities for basis in bases]
+        probs: dict[tuple[int, ...], float] = {}
+        conditional_probabilities: dict[tuple[int, ...], npt.NDArray[np.float64]] = {}
+        for (
+            map_ids,
+            probability,
+        ) in _generate_exact_weights_and_conditional_probabilities(
+            independent_probabilities, 1 / num_samples
+        ):
+            if len(map_ids) == len(bases):
+                assert map_ids not in probs
+                probs[map_ids] = probability
+            else:
+                conditional_probabilities[map_ids] = probability
+        stack = [(1.0, ())]
+        while stack:
+            running_prob, map_ids_partial = stack.pop()
+            # If it's missing from `conditional_probabilities`, that just means
+            # to use the corresponding entry in `independent_probabilities`.
+            try:
+                vec = conditional_probabilities[map_ids_partial]
+            except KeyError:
+                vec = independent_probabilities[len(map_ids_partial)]
+            for i, prob in enumerate(vec):
+                pp = running_prob * prob
+                if pp == 0:
+                    continue
+                map_ids = map_ids_partial + (i,)
+                if len(map_ids) == len(bases):
+                    assert map_ids not in probs
+                    probs[map_ids] = pp
+                else:
+                    stack.append((pp, map_ids))
+        # Now, systematically generate each exact weight, and compare with what
+        # we generated above.
+        for map_ids in itertools.product(
+            *[range(len(probs)) for probs in independent_probabilities]
+        ):
+            exact = np.prod(
+                [
+                    probs[i]
+                    for i, probs in strict_zip(map_ids, independent_probabilities)
+                ]
+            )
+            assert probs.get(map_ids, 0.0) == pytest.approx(exact, abs=1e-14)
+
+    def test_statistics_of_generate_qpd_weights(self):
+        # Values inspired by the R[XX,YY,ZZ]Gate rotations
+        def from_theta(theta):
+            v = np.array(
+                [
+                    np.cos(theta) ** 2,
+                    np.sin(theta) ** 2,
+                    4 * np.cos(theta) * np.sin(theta),
+                ]
+            )
+            return v / np.sum(v)
+
+        probs = [from_theta(0.1), from_theta(0.2)]
+        num_samples = 200
+        weights = _generate_qpd_weights(probs, num_samples, _samples_multiplier=10000)
+        for map_ids in [(0, 0), (0, 2), (0, 1), (2, 0), (2, 2), (2, 1)]:
+            assert weights[map_ids][0] / num_samples == pytest.approx(
+                probs[0][map_ids[0]] * probs[1][map_ids[1]]
+            )
+            assert weights[map_ids][1] == WeightType.EXACT
+        for map_ids in [(1, 2), (1, 0), (1, 1)]:
+            assert weights[map_ids][0] / num_samples == pytest.approx(
+                probs[0][map_ids[0]] * probs[1][map_ids[1]], rel=0.5
+            )
+            assert weights[map_ids][1] == WeightType.SAMPLED
+
     def test_supported_gates(self):
         gates = supported_gates()
         self.assertEqual(