From e2fec6a5a2614e99b4904751cb5c40c7ace4198f Mon Sep 17 00:00:00 2001
From: Virgile Andreani <virgile@pymc-devs.org>
Date: Mon, 7 Oct 2024 17:48:14 +0200
Subject: [PATCH 1/9] Auto-fix rules D202, D205, D209, D403 and D406

---
 pymc/backends/base.py              |  3 ++-
 pymc/backends/ndarray.py           |  2 +-
 pymc/data.py                       |  1 -
 pymc/distributions/custom.py       |  1 -
 pymc/distributions/dist_math.py    |  6 ++++--
 pymc/distributions/distribution.py |  2 --
 pymc/distributions/multivariate.py |  7 -------
 pymc/distributions/shape_utils.py  |  2 --
 pymc/gp/cov.py                     |  1 -
 pymc/gp/gp.py                      |  6 ------
 pymc/gp/hsgp_approx.py             |  1 -
 pymc/initial_point.py              |  1 -
 pymc/logprob/basic.py              |  2 --
 pymc/logprob/checks.py             |  2 --
 pymc/logprob/cumsum.py             |  1 -
 pymc/logprob/mixture.py            |  1 -
 pymc/logprob/rewriting.py          |  2 --
 pymc/logprob/scan.py               |  2 --
 pymc/logprob/transform_value.py    |  2 --
 pymc/logprob/transforms.py         |  4 ++--
 pymc/logprob/utils.py              |  2 --
 pymc/model/core.py                 | 16 ++++++++++------
 pymc/model/fgraph.py               |  1 -
 pymc/model/transform/basic.py      |  1 -
 pymc/ode/utils.py                  |  2 --
 pymc/printing.py                   | 11 ++++++-----
 pymc/pytensorf.py                  |  4 ++--
 pymc/sampling/forward.py           |  1 -
 pymc/sampling/jax.py               |  4 ----
 pymc/sampling/mcmc.py              |  4 ++--
 pymc/smc/kernels.py                |  2 --
 pymc/smc/sampling.py               |  1 -
 pymc/stats/log_density.py          |  1 -
 pymc/step_methods/compound.py      |  3 ++-
 pymc/step_methods/hmc/nuts.py      |  1 -
 pymc/step_methods/metropolis.py    |  1 -
 pymc/testing.py                    |  3 +--
 pymc/tuning/scaling.py             |  1 -
 pymc/variational/minibatch_rv.py   |  1 -
 pymc/variational/opvi.py           | 11 ++++++-----
 40 files changed, 38 insertions(+), 82 deletions(-)

diff --git a/pymc/backends/base.py b/pymc/backends/base.py
index 6aefed81b80..06ee2589891 100644
--- a/pymc/backends/base.py
+++ b/pymc/backends/base.py
@@ -562,7 +562,8 @@ def points(self, chains=None):
 
 def _squeeze_cat(results, combine: bool, squeeze: bool):
     """Squeeze and concatenate the results depending on values of
-    `combine` and `squeeze`."""
+    `combine` and `squeeze`.
+    """
     if combine:
         results = np.concatenate(results)
         if not squeeze:
diff --git a/pymc/backends/ndarray.py b/pymc/backends/ndarray.py
index cf57f9805d7..bffd9620992 100644
--- a/pymc/backends/ndarray.py
+++ b/pymc/backends/ndarray.py
@@ -212,7 +212,7 @@ def _slice_as_ndarray(strace, idx):
 def point_list_to_multitrace(
     point_list: list[dict[str, np.ndarray]], model: Model | None = None
 ) -> MultiTrace:
-    """transform point list into MultiTrace"""
+    """Transform point list into MultiTrace"""
     _model = modelcontext(model)
     varnames = list(point_list[0].keys())
     with _model:
diff --git a/pymc/data.py b/pymc/data.py
index a0d6893cb19..46a16826d96 100644
--- a/pymc/data.py
+++ b/pymc/data.py
@@ -186,7 +186,6 @@ def Minibatch(variable: TensorVariable, *variables: TensorVariable, batch_size:
     >>> data2 = np.random.randn(100, 20)
     >>> mdata1, mdata2 = Minibatch(data1, data2, batch_size=10)
     """
-
     if not isinstance(batch_size, int):
         raise TypeError("batch_size must be an integer")
 
diff --git a/pymc/distributions/custom.py b/pymc/distributions/custom.py
index 2e9a6887087..3b1c29e5b1d 100644
--- a/pymc/distributions/custom.py
+++ b/pymc/distributions/custom.py
@@ -363,7 +363,6 @@ def change_custom_dist_size(op, rv, new_size, expand):
     @staticmethod
     def _infer_final_signature(signature: str, n_inputs, n_outputs, n_rngs) -> str:
         """Add size and updates to user provided gufunc signature if they are missing."""
-
         # Regex to split across outer commas
         # Copied from https://stackoverflow.com/a/26634150
         outer_commas = re.compile(r",\s*(?![^()]*\))")
diff --git a/pymc/distributions/dist_math.py b/pymc/distributions/dist_math.py
index 32b61e2f6b5..ab510cad30e 100644
--- a/pymc/distributions/dist_math.py
+++ b/pymc/distributions/dist_math.py
@@ -178,14 +178,16 @@ def log_diff_normal_cdf(mu, sigma, x, y):
 def sigma2rho(sigma):
     """
     `sigma -> rho` PyTensor converter
-    :math:`mu + sigma*e = mu + log(1+exp(rho))*e`"""
+    :math:`mu + sigma*e = mu + log(1+exp(rho))*e`
+    """
     return pt.log(pt.exp(pt.abs(sigma)) - 1.0)
 
 
 def rho2sigma(rho):
     """
     `rho -> sigma` PyTensor converter
-    :math:`mu + sigma*e = mu + log(1+exp(rho))*e`"""
+    :math:`mu + sigma*e = mu + log(1+exp(rho))*e`
+    """
     return pt.softplus(rho)
 
 
diff --git a/pymc/distributions/distribution.py b/pymc/distributions/distribution.py
index 1b11f18baef..899a641736b 100644
--- a/pymc/distributions/distribution.py
+++ b/pymc/distributions/distribution.py
@@ -88,7 +88,6 @@ class DistributionMeta(ABCMeta):
     """
     DistributionMeta class
 
-
     Notes
     -----
     DistributionMeta currently performs many functions, and will likely be refactored soon.
@@ -477,7 +476,6 @@ def __new__(
         rv : TensorVariable
             The created random variable tensor, registered in the Model.
         """
-
         try:
             from pymc.model import Model
 
diff --git a/pymc/distributions/multivariate.py b/pymc/distributions/multivariate.py
index 5c99bc66730..cf65aaaa6f3 100644
--- a/pymc/distributions/multivariate.py
+++ b/pymc/distributions/multivariate.py
@@ -353,7 +353,6 @@ def mv_normal_to_precision_mv_normal(fgraph, node):
     Note: This won't be introduced when calling `pm.logp` as that will dispatch directly
     without triggering the logprob rewrites.
     """
-
     rng, size, mu, cov = node.inputs
     if cov.owner and cov.owner.op == matrix_inverse:
         tau = cov.owner.inputs[0]
@@ -658,7 +657,6 @@ def logp(value, n, p):
         -------
         TensorVariable
         """
-
         res = factln(n) + pt.sum(-factln(value) + logpow(p, value), axis=-1)
         res = pt.switch(
             pt.or_(pt.any(pt.lt(value, 0), axis=-1), pt.neq(pt.sum(value, axis=-1), n)),
@@ -1033,7 +1031,6 @@ def logp(X, nu, V):
         -------
         TensorVariable
         """
-
         p = V.shape[0]
 
         IVI = det(V)
@@ -1101,7 +1098,6 @@ def WishartBartlett(name, S, nu, is_cholesky=False, return_cholesky=False, initv
     This distribution is usually a bad idea to use as a prior for multivariate
     normal. You should instead use LKJCholeskyCov or LKJCorr.
     """
-
     L = S if is_cholesky else scipy.linalg.cholesky(S)
     diag_idx = np.diag_indices_from(S)
     tril_idx = np.tril_indices_from(S, k=-1)
@@ -1621,7 +1617,6 @@ def logp(value, n, eta):
         -------
         TensorVariable
         """
-
         if value.ndim > 1:
             raise NotImplementedError("LKJCorr logp is only implemented for vector values (ndim=1)")
 
@@ -1923,7 +1918,6 @@ def logp(value, mu, rowchol, colchol):
         -------
         TensorVariable
         """
-
         if value.ndim != 2:
             raise ValueError("Value must be two dimensional.")
 
@@ -2296,7 +2290,6 @@ def logp(value, mu, W, alpha, tau, W_is_valid):
         -------
         TensorVariable
         """
-
         # If expand_dims were added to (a potentially sparse) W, retrieve the non-expanded W
         extra_dims = W.type.ndim - 2
         if extra_dims:
diff --git a/pymc/distributions/shape_utils.py b/pymc/distributions/shape_utils.py
index 0cedd5da779..0d52cb196c5 100644
--- a/pymc/distributions/shape_utils.py
+++ b/pymc/distributions/shape_utils.py
@@ -164,7 +164,6 @@ def shape_from_dims(dims: StrongDims, model) -> StrongShape:
     dims : tuple of (str or None)
         Names or None for all RV dimensions.
     """
-
     # Dims must be known already
     unknowndim_dims = set(dims) - set(model.dim_lengths)
     if unknowndim_dims:
@@ -197,7 +196,6 @@ def find_size(
     size : tuble of int or TensorVariable, optional
         The size argument for creating the Distribution
     """
-
     if size is not None:
         return size
 
diff --git a/pymc/gp/cov.py b/pymc/gp/cov.py
index d7f5c665692..d97ce043d39 100644
--- a/pymc/gp/cov.py
+++ b/pymc/gp/cov.py
@@ -205,7 +205,6 @@ def _slice(self, X, Xs=None):
 class Combination(Covariance):
     def __init__(self, factor_list: Sequence):
         """Use constituent factors to get input_dim and active_dims for the Combination covariance."""
-
         # Check if all input_dim are the same in factor_list
         input_dims = {factor.input_dim for factor in factor_list if isinstance(factor, Covariance)}
 
diff --git a/pymc/gp/gp.py b/pymc/gp/gp.py
index 1d767b53e8c..80dd632cac8 100644
--- a/pymc/gp/gp.py
+++ b/pymc/gp/gp.py
@@ -48,7 +48,6 @@
 
 def _handle_sigma_noise_parameters(sigma, noise):
     """Helper function for transition of 'noise' parameter to be named 'sigma'."""
-
     if (sigma is None and noise is None) or (sigma is not None and noise is not None):
         raise ValueError("'sigma' argument must be specified.")
 
@@ -375,7 +374,6 @@ def prior(self, name, X, reparameterize=True, jitter=JITTER_DEFAULT, **kwargs):
             Extra keyword arguments that are passed to :class:`~pymc.MvStudentT`
             distribution constructor.
         """
-
         f = self._build_prior(name, X, reparameterize, jitter, **kwargs)
         self.X = X
         self.f = f
@@ -418,7 +416,6 @@ def conditional(self, name, Xnew, jitter=JITTER_DEFAULT, **kwargs):
             Extra keyword arguments that are passed to :class:`~pymc.MvStudentT` distribution
             constructor.
         """
-
         X = self.X
         f = self.f
         nu2, mu, cov = self._build_conditional(Xnew, X, f, jitter)
@@ -630,7 +627,6 @@ def conditional(
             Extra keyword arguments that are passed to :class:`~pymc.MvNormal` distribution
             constructor.
         """
-
         givens = self._get_given_vals(given)
         mu, cov = self._build_conditional(Xnew, pred_noise, False, *givens, jitter)
         return pm.MvNormal(name, mu=mu, cov=cov, **kwargs)
@@ -845,7 +841,6 @@ def marginal_likelihood(
             Extra keyword arguments that are passed to :class:`~pymc.MvNormal` distribution
             constructor.
         """
-
         self.X = X
         self.Xu = Xu
         self.y = y
@@ -934,7 +929,6 @@ def conditional(
             Extra keyword arguments that are passed to :class:`~pymc.MvNormal` distribution
             constructor.
         """
-
         givens = self._get_given_vals(given)
         mu, cov = self._build_conditional(Xnew, pred_noise, False, *givens, jitter)
         return pm.MvNormal(name, mu=mu, cov=cov, **kwargs)
diff --git a/pymc/gp/hsgp_approx.py b/pymc/gp/hsgp_approx.py
index f1adf331ddb..b0b08045900 100644
--- a/pymc/gp/hsgp_approx.py
+++ b/pymc/gp/hsgp_approx.py
@@ -44,7 +44,6 @@ def set_boundary(X: TensorLike, c: numbers.Real | TensorLike) -> np.ndarray:
 
 def calc_eigenvalues(L: TensorLike, m: Sequence[int]):
     """Calculate eigenvalues of the Laplacian."""
-
     S = np.meshgrid(*[np.arange(1, 1 + m[d]) for d in range(len(m))])
     S_arr = np.vstack([s.flatten() for s in S]).T
 
diff --git a/pymc/initial_point.py b/pymc/initial_point.py
index 2e06f51f522..37bcc92a68e 100644
--- a/pymc/initial_point.py
+++ b/pymc/initial_point.py
@@ -130,7 +130,6 @@ def make_initial_point_fn(
     return_transformed : bool
         If `True` the returned variables will correspond to transformed initial values.
     """
-
     sdict_overrides = convert_str_to_rv_dict(model, overrides or {})
     initval_strats = {
         **model.rvs_to_initial_values,
diff --git a/pymc/logprob/basic.py b/pymc/logprob/basic.py
index d8188f2644c..138d24d013e 100644
--- a/pymc/logprob/basic.py
+++ b/pymc/logprob/basic.py
@@ -87,7 +87,6 @@ def _warn_rvs_in_inferred_graph(graph: TensorVariable | Sequence[TensorVariable]
     This makes it impossible (or difficult) to replace it by the respective values afterward,
     so we instruct users to do it beforehand.
     """
-
     rvs_in_graph = _find_unallowed_rvs_in_graph(graph)
     if rvs_in_graph:
         warnings.warn(
@@ -583,7 +582,6 @@ def transformed_conditional_logp(
     This helper will only return the subset of logprob terms corresponding to `rvs`.
     All rvs_to_values and rvs_to_transforms mappings are required.
     """
-
     transform_rewrite = None
     values_to_transforms = {
         rvs_to_values[rv]: transform
diff --git a/pymc/logprob/checks.py b/pymc/logprob/checks.py
index c9c60bb0fba..59cc7298d7b 100644
--- a/pymc/logprob/checks.py
+++ b/pymc/logprob/checks.py
@@ -62,7 +62,6 @@ def logprob_specify_shape(op, values, inner_rv, *shapes, **kwargs):
 @node_rewriter([SpecifyShape])
 def find_measurable_specify_shapes(fgraph, node) -> list[TensorVariable] | None:
     r"""Finds `SpecifyShapeOp`\s for which a `logprob` can be computed."""
-
     if isinstance(node.op, MeasurableSpecifyShape):
         return None  # pragma: no cover
 
@@ -100,7 +99,6 @@ def logprob_check_and_raise(op, values, inner_rv, *assertions, **kwargs):
 @node_rewriter([CheckAndRaise])
 def find_measurable_check_and_raise(fgraph, node) -> list[TensorVariable] | None:
     r"""Finds `AssertOp`\s for which a `logprob` can be computed."""
-
     if isinstance(node.op, MeasurableCheckAndRaise):
         return None  # pragma: no cover
 
diff --git a/pymc/logprob/cumsum.py b/pymc/logprob/cumsum.py
index af7f73888cf..514500f1b6b 100644
--- a/pymc/logprob/cumsum.py
+++ b/pymc/logprob/cumsum.py
@@ -77,7 +77,6 @@ def logprob_cumsum(op, values, base_rv, **kwargs):
 @node_rewriter([CumOp])
 def find_measurable_cumsums(fgraph, node) -> list[TensorVariable] | None:
     r"""Finds `Cumsums`\s for which a `logprob` can be computed."""
-
     if not (isinstance(node.op, CumOp) and node.op.mode == "add"):
         return None
 
diff --git a/pymc/logprob/mixture.py b/pymc/logprob/mixture.py
index 1fd4b1156ce..55e506ad991 100644
--- a/pymc/logprob/mixture.py
+++ b/pymc/logprob/mixture.py
@@ -244,7 +244,6 @@ def get_stack_mixture_vars(
     node: Apply,
 ) -> tuple[list[TensorVariable] | None, int | None]:
     r"""Extract the mixture terms from a `*Subtensor*` applied to stacked `MeasurableVariable`\s."""
-
     assert isinstance(node.op, subtensor_ops)
 
     joined_rvs = node.inputs[0]
diff --git a/pymc/logprob/rewriting.py b/pymc/logprob/rewriting.py
index 08373ed7fcd..f8036d08981 100644
--- a/pymc/logprob/rewriting.py
+++ b/pymc/logprob/rewriting.py
@@ -106,7 +106,6 @@ def remove_promised_valued_rvs(outputs):
 @node_rewriter((Elemwise, Alloc, DimShuffle, *subtensor_ops))
 def local_lift_DiracDelta(fgraph, node):
     r"""Lift basic `Op`\s through `DiracDelta`\s."""
-
     if len(node.outputs) > 1:
         return
 
@@ -223,7 +222,6 @@ def construct_ir_fgraph(
     -------
     A `FunctionGraph` of the measurable IR.
     """
-
     # We add `ShapeFeature` because it will get rid of references to the old
     # `RandomVariable`s that have been lifted; otherwise, it will be difficult
     # to give good warnings when an unaccounted for `RandomVariable` is encountered
diff --git a/pymc/logprob/scan.py b/pymc/logprob/scan.py
index 4b643b7302a..c295bec0536 100644
--- a/pymc/logprob/scan.py
+++ b/pymc/logprob/scan.py
@@ -100,7 +100,6 @@ def convert_outer_out_to_in(
     A `ScanArgs` object for a `Scan` in which `outer_out_vars` has been converted to an
     outer-graph input.
     """
-
     output_scan_args = copy(input_scan_args)
     inner_outs_to_new_inner_ins = {}
 
@@ -396,7 +395,6 @@ def create_inner_out_logp(value_map: dict[TensorVariable, TensorVariable]) -> Te
 @node_rewriter([Scan, Subtensor])
 def find_measurable_scans(fgraph, node):
     r"""Find `Scan`\s for which a `logprob` can be computed."""
-
     if isinstance(node.op, Subtensor):
         node = node.inputs[0].owner
         if not (node and isinstance(node.op, Scan)):
diff --git a/pymc/logprob/transform_value.py b/pymc/logprob/transform_value.py
index fa013dbf3de..f9c6f720443 100644
--- a/pymc/logprob/transform_value.py
+++ b/pymc/logprob/transform_value.py
@@ -139,7 +139,6 @@ def transform_values(fgraph: FunctionGraph, node: Apply) -> list[Apply] | None:
     variable is specified on the log scale and back-transform it to obtain
     ``Y`` on the natural scale.
     """
-
     values_to_transforms: TransformValuesMapping | None = getattr(
         fgraph, "values_to_transforms", None
     )
@@ -220,7 +219,6 @@ def __init__(
             not be transformed.
 
         """
-
         self.values_to_transforms = values_to_transforms
 
     def add_requirements(self, fgraph):
diff --git a/pymc/logprob/transforms.py b/pymc/logprob/transforms.py
index d6dd0894b19..dee5e907bfb 100644
--- a/pymc/logprob/transforms.py
+++ b/pymc/logprob/transforms.py
@@ -137,7 +137,8 @@ def backward(
         self, value: TensorVariable, *inputs: Variable
     ) -> TensorVariable | tuple[TensorVariable, ...]:
         """Invert the transformation. Multiple values may be returned when the
-        transformation is not 1-to-1"""
+        transformation is not 1-to-1
+        """
 
     def log_jac_det(self, value: TensorVariable, *inputs) -> TensorVariable:
         """Construct the log of the absolute value of the Jacobian determinant."""
@@ -452,7 +453,6 @@ def measurable_power_exponent_to_exp(fgraph, node):
 )
 def find_measurable_transforms(fgraph: FunctionGraph, node: Node) -> list[Node] | None:
     """Find measurable transformations from Elemwise operators."""
-
     # Node was already converted
     if isinstance(node.op, MeasurableOp):
         return None
diff --git a/pymc/logprob/utils.py b/pymc/logprob/utils.py
index adc75b556ae..10aa3c8bb96 100644
--- a/pymc/logprob/utils.py
+++ b/pymc/logprob/utils.py
@@ -80,7 +80,6 @@ def replace_rvs_by_values(
     rvs_to_transforms, optional
         Mapping between the original graph RVs and respective value transforms
     """
-
     if rvs_to_transforms:
         # Conditional transforms like Interval can reference variables in the original RV graph
         # To avoid mutating the original graphs in place, we have to clone them
@@ -302,7 +301,6 @@ def diracdelta_logprob(op, values, *inputs, **kwargs):
 
 def find_negated_var(var):
     """Return a variable that is being multiplied by -1 or None otherwise."""
-
     if not (
         var.owner and isinstance(var.owner.op, Elemwise) and isinstance(var.owner.op.scalar_op, Mul)
     ):
diff --git a/pymc/model/core.py b/pymc/model/core.py
index 41d4e0864a9..38ee3de4861 100644
--- a/pymc/model/core.py
+++ b/pymc/model/core.py
@@ -130,7 +130,8 @@ def __init__(cls, name, bases, nmspc, context_class: type | None = None, **kwarg
     def get_context(cls, error_if_none=True, allow_block_model_access=False) -> T | None:
         """Return the most recently pushed context object of type ``cls``
         on the stack, or ``None``. If ``error_if_none`` is True (default),
-        raise a ``TypeError`` instead of returning ``None``."""
+        raise a ``TypeError`` instead of returning ``None``.
+        """
         try:
             candidate: T | None = cls.get_contexts()[-1]
         except IndexError:
@@ -145,7 +146,8 @@ def get_context(cls, error_if_none=True, allow_block_model_access=False) -> T |
 
     def get_contexts(cls) -> list[T]:
         """Return a stack of context instances for the ``context_class``
-        of ``cls``."""
+        of ``cls``.
+        """
         # This lazily creates the context class's contexts
         # thread-local object, as needed. This seems inelegant to me,
         # but since the context class is not guaranteed to exist when
@@ -852,19 +854,22 @@ def d2logp(
     @property
     def datalogp(self) -> Variable:
         """PyTensor scalar of log-probability of the observed variables and
-        potential terms"""
+        potential terms
+        """
         return self.observedlogp + self.potentiallogp
 
     @property
     def varlogp(self) -> Variable:
         """PyTensor scalar of log-probability of the unobserved random variables
-        (excluding deterministic)."""
+        (excluding deterministic).
+        """
         return self.logp(vars=self.free_RVs)
 
     @property
     def varlogp_nojac(self) -> Variable:
         """PyTensor scalar of log-probability of the unobserved random variables
-        (excluding deterministic) without jacobian term."""
+        (excluding deterministic) without jacobian term.
+        """
         return self.logp(vars=self.free_RVs, jacobian=False)
 
     @property
@@ -2204,7 +2209,6 @@ def compile_fn(
     -------
     Compiled PyTensor function
     """
-
     model = modelcontext(model)
     return model.compile_fn(
         outs,
diff --git a/pymc/model/fgraph.py b/pymc/model/fgraph.py
index 8c37861c8f3..6f1a1846cd4 100644
--- a/pymc/model/fgraph.py
+++ b/pymc/model/fgraph.py
@@ -151,7 +151,6 @@ def fgraph_from_model(
     memo: Dict
         A dictionary mapping original model variables to the equivalent nodes in the fgraph.
     """
-
     if any(v is not None for v in model.rvs_to_initial_values.values()):
         raise NotImplementedError("Cannot convert models with non-default initial_values")
 
diff --git a/pymc/model/transform/basic.py b/pymc/model/transform/basic.py
index 76556ae08ab..3d756785a5d 100644
--- a/pymc/model/transform/basic.py
+++ b/pymc/model/transform/basic.py
@@ -29,7 +29,6 @@
 
 def prune_vars_detached_from_observed(model: Model) -> Model:
     """Prune model variables that are not related to any observed variable in the Model."""
-
     # Potentials are ambiguous as whether they correspond to likelihood or prior terms,
     # We simply raise for now
     if model.potentials:
diff --git a/pymc/ode/utils.py b/pymc/ode/utils.py
index 1ccf7e5ba39..5ccd596b18c 100644
--- a/pymc/ode/utils.py
+++ b/pymc/ode/utils.py
@@ -44,7 +44,6 @@ def make_sens_ic(n_states, n_theta, floatX):
     dydp : array
         1D-array of shape (n_states * (n_states + n_theta),), representing the initial condition of the sensitivities
     """
-
     # Initialize the sensitivity matrix to be 0 everywhere
     sens_matrix = np.zeros((n_states, n_states + n_theta), dtype=floatX)
 
@@ -81,7 +80,6 @@ def augment_system(ode_func, n_states, n_theta):
     system: function
         Augemted system of differential equations.
     """
-
     # Present state of the system
     t_y = pt.vector("y", dtype="float64")
     t_y.tag.test_value = np.ones((n_states,), dtype="float64")
diff --git a/pymc/printing.py b/pymc/printing.py
index ef417f37993..b53049950b7 100644
--- a/pymc/printing.py
+++ b/pymc/printing.py
@@ -38,8 +38,8 @@ def str_for_dist(
     dist: TensorVariable, formatting: str = "plain", include_params: bool = True
 ) -> str:
     """Make a human-readable string representation of a Distribution in a model, either
-    LaTeX or plain, optionally with distribution parameter values included."""
-
+    LaTeX or plain, optionally with distribution parameter values included.
+    """
     if include_params:
         if isinstance(dist.owner.op, RandomVariable) or getattr(
             dist.owner.op, "extended_signature", None
@@ -99,8 +99,8 @@ def str_for_dist(
 
 def str_for_model(model: Model, formatting: str = "plain", include_params: bool = True) -> str:
     """Make a human-readable string representation of Model, listing all random variables
-    and their distributions, optionally including parameter values."""
-
+    and their distributions, optionally including parameter values.
+    """
     # Wrap functions to avoid confusing typecheckers
     sfd = partial(str_for_dist, formatting=formatting, include_params=include_params)
     sfp = partial(
@@ -147,7 +147,8 @@ def str_for_potential_or_deterministic(
     dist_name: str = "Deterministic",
 ) -> str:
     """Make a human-readable string representation of a Deterministic or Potential in a model, either
-    LaTeX or plain, optionally with distribution parameter values included."""
+    LaTeX or plain, optionally with distribution parameter values included.
+    """
     print_name = var.name if var.name is not None else "<unnamed>"
     if "latex" in formatting:
         print_name = r"\text{" + _latex_escape(print_name.strip("$")) + "}"
diff --git a/pymc/pytensorf.py b/pymc/pytensorf.py
index f180f279401..66ca9bf7c43 100644
--- a/pymc/pytensorf.py
+++ b/pymc/pytensorf.py
@@ -325,7 +325,7 @@ def smarttypeX(x):
 
 
 def gradient1(f, v):
-    """flat gradient of f wrt v"""
+    """Flat gradient of f wrt v"""
     return pt.flatten(grad(f, v, disconnected_inputs="warn"))
 
 
@@ -343,7 +343,7 @@ def gradient(f, vars=None):
 
 
 def jacobian1(f, v):
-    """jacobian of f wrt v"""
+    """Jacobian of f wrt v"""
     f = pt.flatten(f)
     idx = pt.arange(f.shape[0], dtype="int32")
 
diff --git a/pymc/sampling/forward.py b/pymc/sampling/forward.py
index c1504091e51..5d673030375 100644
--- a/pymc/sampling/forward.py
+++ b/pymc/sampling/forward.py
@@ -754,7 +754,6 @@ def sample_posterior_predictive(
 
 
     """
-
     _trace: MultiTrace | PointList
     nchain: int
     if idata_kwargs is None:
diff --git a/pymc/sampling/jax.py b/pymc/sampling/jax.py
index c530af8d9a3..809a223c069 100644
--- a/pymc/sampling/jax.py
+++ b/pymc/sampling/jax.py
@@ -99,7 +99,6 @@ def _replace_shared_variables(graph: list[TensorVariable]) -> list[TensorVariabl
     ValueError
         If any shared variable contains default_updates
     """
-
     shared_variables = [var for var in graph_inputs(graph) if isinstance(var, SharedVariable)]
 
     if any(isinstance(var.type, RandomType) for var in shared_variables):
@@ -124,7 +123,6 @@ def get_jaxified_graph(
     outputs: list[TensorVariable] | None = None,
 ) -> list[TensorVariable]:
     """Compile an PyTensor graph into an optimized JAX function"""
-
     graph = _replace_shared_variables(outputs) if outputs is not None else None
 
     fgraph = FunctionGraph(inputs=inputs, outputs=graph, clone=True)
@@ -224,7 +222,6 @@ def _get_batched_jittered_initial_points(
         list with one item per variable and number of chains as batch dimension.
         Each item has shape `(chains, *var.shape)`
     """
-
     initial_points = _init_jitter(
         model,
         initvals,
@@ -354,7 +351,6 @@ def _sample_blackjax_nuts(
         with their respective sample stats and pointwise log likeihood values (unless
         skipped with ``idata_kwargs``).
     """
-
     import blackjax
 
     # Adapted from numpyro
diff --git a/pymc/sampling/mcmc.py b/pymc/sampling/mcmc.py
index 228850e63e5..5e7c9344545 100644
--- a/pymc/sampling/mcmc.py
+++ b/pymc/sampling/mcmc.py
@@ -909,7 +909,8 @@ def _sample_return(
     model: Model,
 ) -> InferenceData | MultiTrace:
     """Final step of `pm.sampler` that picks/slices chains,
-    runs diagnostics and converts to the desired return type."""
+    runs diagnostics and converts to the desired return type.
+    """
     # Pick and slice chains to keep the maximum number of samples
     if discard_tuned_samples:
         traces, length = _choose_chains(traces, tune)
@@ -1312,7 +1313,6 @@ def _init_jitter(
     start : ``pymc.model.Point``
         Starting point for sampler
     """
-
     ipfns = make_initial_point_fns_per_chain(
         model=model,
         overrides=initvals,
diff --git a/pymc/smc/kernels.py b/pymc/smc/kernels.py
index 2bfde7583e9..e55c6a50f27 100644
--- a/pymc/smc/kernels.py
+++ b/pymc/smc/kernels.py
@@ -161,7 +161,6 @@ def __init__(
             Dictionary that contains information about model variables shape and size.
 
         """
-
         self.draws = draws
         self.start = start
         if threshold < 0 or threshold > 1:
@@ -614,7 +613,6 @@ def _logp_forw(point, out_vars, in_vars, shared):
     shared : list
         Containing TensorVariable for depended shared data
     """
-
     # Replace integer inputs with rounded float inputs
     if any(var.dtype in discrete_types for var in in_vars):
         replace_int_input = {}
diff --git a/pymc/smc/sampling.py b/pymc/smc/sampling.py
index 4cd3c11771a..155d5316475 100644
--- a/pymc/smc/sampling.py
+++ b/pymc/smc/sampling.py
@@ -150,7 +150,6 @@ def sample_smc(
         `link <http://ascelibrary.org/doi/abs/10.1061/%28ASCE%290733-9399
         %282007%29133:7%28816%29>`__
     """
-
     if isinstance(kernel, str) and kernel.lower() in ("abc", "metropolis"):
         warnings.warn(
             f'The kernel string argument "{kernel}" in sample_smc has been deprecated. '
diff --git a/pymc/stats/log_density.py b/pymc/stats/log_density.py
index 3216e26f3ee..4feea19ca13 100644
--- a/pymc/stats/log_density.py
+++ b/pymc/stats/log_density.py
@@ -154,7 +154,6 @@ def compute_log_density(
         InferenceData with the ``log_likelihood`` group when ``kind == "likelihood"``
         or the ``log_prior`` group when ``kind == "prior"``.
     """
-
     posterior = idata["posterior"]
 
     model = modelcontext(model)
diff --git a/pymc/step_methods/compound.py b/pymc/step_methods/compound.py
index 87dd30420a8..1d87297efd8 100644
--- a/pymc/step_methods/compound.py
+++ b/pymc/step_methods/compound.py
@@ -234,7 +234,8 @@ def __init__(self, methods: list[StepMethodState]):
 
 class CompoundStep(WithSamplingState):
     """Step method composed of a list of several other step
-    methods applied in sequence."""
+    methods applied in sequence.
+    """
 
     _state_class = CompoundStepState
 
diff --git a/pymc/step_methods/hmc/nuts.py b/pymc/step_methods/hmc/nuts.py
index 9bcde951041..fb816954b6a 100644
--- a/pymc/step_methods/hmc/nuts.py
+++ b/pymc/step_methods/hmc/nuts.py
@@ -225,7 +225,6 @@ def _hamiltonian_step(self, start, p0, step_size):
     @staticmethod
     def competence(var, has_grad):
         """Check how appropriate this class is for sampling a random variable."""
-
         if var.dtype in continuous_types and has_grad:
             return Competence.PREFERRED
         return Competence.INCOMPATIBLE
diff --git a/pymc/step_methods/metropolis.py b/pymc/step_methods/metropolis.py
index 21fb6c83e83..ea5db3fbb91 100644
--- a/pymc/step_methods/metropolis.py
+++ b/pymc/step_methods/metropolis.py
@@ -187,7 +187,6 @@ def __init__(
             :py:class:`~numpy.random.Generator` object. Refer to
             :py:func:`pymc.util.get_random_generator` for more information.
         """
-
         model = pm.modelcontext(model)
         initial_values = model.initial_point()
 
diff --git a/pymc/testing.py b/pymc/testing.py
index 7a43c637662..e9ded1824ea 100644
--- a/pymc/testing.py
+++ b/pymc/testing.py
@@ -68,7 +68,7 @@ def product(domains, n_samples=-1):
                  must be "domain-like", as in, have a `.vals` property
         n_samples: int, maximum samples to return.  -1 to return whole product
 
-    Returns:
+    Returns
     -------
         list of the cartesian product of the domains
     """
@@ -979,7 +979,6 @@ def seeded_numpy_distribution_builder(dist_name: str) -> Callable:
 
 def assert_no_rvs(vars: Sequence[Variable]) -> None:
     """Assert that there are no `MeasurableOp` nodes in a graph."""
-
     rvs = rvs_in_graph(vars)
     if rvs:
         raise AssertionError(f"RV found in graph: {rvs}")
diff --git a/pymc/tuning/scaling.py b/pymc/tuning/scaling.py
index 08d267adb5f..df3cf071c21 100644
--- a/pymc/tuning/scaling.py
+++ b/pymc/tuning/scaling.py
@@ -35,7 +35,6 @@ def fixed_hessian(point, model=None):
     vars: list
         Variables for which Hessian is to be calculated.
     """
-
     model = modelcontext(model)
     point = Point(point, model=model)
 
diff --git a/pymc/variational/minibatch_rv.py b/pymc/variational/minibatch_rv.py
index be71a358c96..21c16ed2a1c 100644
--- a/pymc/variational/minibatch_rv.py
+++ b/pymc/variational/minibatch_rv.py
@@ -82,7 +82,6 @@ def create_minibatch_rv(
 
 def get_scaling(total_size: Sequence[Variable], shape: TensorVariable) -> TensorVariable:
     """Gets scaling constant for logp."""
-
     # mypy doesn't understand we can convert a shape TensorVariable into a tuple
     shape = tuple(shape)  # type: ignore
 
diff --git a/pymc/variational/opvi.py b/pymc/variational/opvi.py
index 898b6afc54c..a66c87f7d5f 100644
--- a/pymc/variational/opvi.py
+++ b/pymc/variational/opvi.py
@@ -143,7 +143,6 @@ def inner(*args, **kwargs):
 
 def node_property(f):
     """A shortcut for wrapping method to accessible tensor"""
-
     if isinstance(f, str):
 
         def wrapper(fn):
@@ -1011,7 +1010,6 @@ def set_size_and_deterministic(
         -------
         :class:`Variable` or list with applied replacements, ready to use
         """
-
         flat2rand = self.make_size_and_deterministic_replacements(s, d, more_replacements)
         node_out = graph_replace(node, flat2rand, strict=False)
         assert not (
@@ -1327,19 +1325,22 @@ def _single_symbolic_varlogp_and_datalogp(self):
     @node_property
     def single_symbolic_varlogp(self):
         """*Dev* - for single MC sample estimate of :math:`E_{q}(prior term)` `pytensor.scan`
-        is not needed and code can be optimized"""
+        is not needed and code can be optimized
+        """
         return self._single_symbolic_varlogp_and_datalogp[0]
 
     @node_property
     def single_symbolic_datalogp(self):
         """*Dev* - for single MC sample estimate of :math:`E_{q}(data term)` `pytensor.scan`
-        is not needed and code can be optimized"""
+        is not needed and code can be optimized
+        """
         return self._single_symbolic_varlogp_and_datalogp[1]
 
     @node_property
     def single_symbolic_logp(self):
         """*Dev* - for single MC sample estimate of :math:`E_{q}(logP)` `pytensor.scan`
-        is not needed and code can be optimized"""
+        is not needed and code can be optimized
+        """
         return self.single_symbolic_datalogp + self.single_symbolic_varlogp
 
     @node_property

From cbba642fa82da7d62b2f1b845ab5ae54c2373438 Mon Sep 17 00:00:00 2001
From: Virgile Andreani <virgile@pymc-devs.org>
Date: Mon, 7 Oct 2024 18:00:36 +0200
Subject: [PATCH 2/9] Auto-fix D200, D301, D400

---
 benchmarks/benchmarks/benchmarks.py     |   8 +-
 pymc/backends/__init__.py               |   2 +-
 pymc/backends/base.py                   |   6 +-
 pymc/backends/ndarray.py                |   6 +-
 pymc/backends/report.py                 |   2 +-
 pymc/blocking.py                        |  10 +-
 pymc/data.py                            |   4 +-
 pymc/distributions/censored.py          |   4 +-
 pymc/distributions/continuous.py        |  20 ++--
 pymc/distributions/custom.py            |   8 +-
 pymc/distributions/discrete.py          |   2 +-
 pymc/distributions/dist_math.py         |  42 +++-----
 pymc/distributions/distribution.py      |  22 ++--
 pymc/distributions/mixture.py           |  10 +-
 pymc/distributions/multivariate.py      |  13 +--
 pymc/distributions/shape_utils.py       |   6 +-
 pymc/distributions/simulator.py         |   2 +-
 pymc/distributions/timeseries.py        |  14 +--
 pymc/distributions/transforms.py        |   4 +-
 pymc/distributions/truncated.py         |   2 +-
 pymc/exceptions.py                      |   4 +-
 pymc/gp/cov.py                          |  18 ++--
 pymc/gp/gp.py                           |   4 +-
 pymc/gp/mean.py                         |  13 +--
 pymc/gp/util.py                         |   2 +-
 pymc/initial_point.py                   |   4 +-
 pymc/logprob/abstract.py                |   4 +-
 pymc/logprob/censoring.py               |   4 +-
 pymc/logprob/order.py                   |   2 +-
 pymc/logprob/transforms.py              |   6 +-
 pymc/logprob/utils.py                   |   4 +-
 pymc/math.py                            |  15 ++-
 pymc/model/core.py                      |  26 ++---
 pymc/model/transform/conditioning.py    |   4 +-
 pymc/model_graph.py                     |   8 +-
 pymc/ode/ode.py                         |   2 +-
 pymc/ode/utils.py                       |   2 +-
 pymc/printing.py                        |   4 +-
 pymc/pytensorf.py                       |  30 +++---
 pymc/sampling/forward.py                |   6 +-
 pymc/sampling/jax.py                    |   8 +-
 pymc/sampling/mcmc.py                   |   4 +-
 pymc/smc/kernels.py                     |  26 ++---
 pymc/stats/log_density.py               |   6 +-
 pymc/step_methods/compound.py           |   3 +-
 pymc/step_methods/hmc/quadpotential.py  |   2 +-
 pymc/step_methods/metropolis.py         |  10 +-
 pymc/testing.py                         |  10 +-
 pymc/tuning/scaling.py                  |   2 +-
 pymc/tuning/starting.py                 |   2 +-
 pymc/util.py                            |  16 +--
 pymc/variational/approximations.py      |   6 +-
 pymc/variational/callbacks.py           |   4 +-
 pymc/variational/inference.py           |  22 ++--
 pymc/variational/minibatch_rv.py        |   2 +-
 pymc/variational/operators.py           |   6 +-
 pymc/variational/opvi.py                | 128 ++++++++++++------------
 pymc/variational/test_functions.py      |   4 +-
 pymc/variational/updates.py             |  28 +++---
 scripts/generate_pip_deps_from_conda.py |   2 +-
 setupegg.py                             |   4 +-
 61 files changed, 299 insertions(+), 345 deletions(-)

diff --git a/benchmarks/benchmarks/benchmarks.py b/benchmarks/benchmarks/benchmarks.py
index 0cf4d5e3669..f9913d8f7a3 100644
--- a/benchmarks/benchmarks/benchmarks.py
+++ b/benchmarks/benchmarks/benchmarks.py
@@ -24,7 +24,7 @@
 
 
 def glm_hierarchical_model(random_seed=123):
-    """Sample glm hierarchical model to use in benchmarks"""
+    """Sample glm hierarchical model to use in benchmarks."""
     np.random.seed(random_seed)
     data = pd.read_csv(pm.get_data("radon.csv"))
     data["log_radon"] = data["log_radon"].astype(pytensor.config.floatX)
@@ -47,7 +47,7 @@ def glm_hierarchical_model(random_seed=123):
 
 
 def mixture_model(random_seed=1234):
-    """Sample mixture model to use in benchmarks"""
+    """Sample mixture model to use in benchmarks."""
     np.random.seed(1234)
     size = 1000
     w_true = np.array([0.35, 0.4, 0.25])
@@ -79,7 +79,7 @@ def mixture_model(random_seed=1234):
 class OverheadSuite:
     """
     Just tests how long sampling from a normal distribution takes for various
-    samplers
+    samplers.
     """
 
     params = [pm.NUTS, pm.HamiltonianMC, pm.Metropolis, pm.Slice]
@@ -161,7 +161,7 @@ def time_glm_hierarchical(self):
 
 
 class NUTSInitSuite:
-    """Tests initializations for NUTS sampler on models"""
+    """Tests initializations for NUTS sampler on models."""
 
     timeout = 360.0
     params = ("adapt_diag", "jitter+adapt_diag", "jitter+adapt_full", "adapt_full")
diff --git a/pymc/backends/__init__.py b/pymc/backends/__init__.py
index 2f58b7ed8aa..8ef9ed3e8a5 100644
--- a/pymc/backends/__init__.py
+++ b/pymc/backends/__init__.py
@@ -12,7 +12,7 @@
 #   See the License for the specific language governing permissions and
 #   limitations under the License.
 
-"""Storage backends for traces
+"""Storage backends for traces.
 
 The NDArray (pymc.backends.NDArray) backend holds the entire trace in memory.
 
diff --git a/pymc/backends/base.py b/pymc/backends/base.py
index 06ee2589891..09fc59f7850 100644
--- a/pymc/backends/base.py
+++ b/pymc/backends/base.py
@@ -12,7 +12,7 @@
 #   See the License for the specific language governing permissions and
 #   limitations under the License.
 
-"""Base backend for traces
+"""Base backend for traces.
 
 See the docstring for pymc.backends for more information
 """
@@ -127,7 +127,7 @@ def close(self):
 
 
 class BaseTrace(IBaseTrace):
-    """Base trace object
+    """Base trace object.
 
     Parameters
     ----------
@@ -546,7 +546,7 @@ def point(self, idx: int, chain: int | None = None) -> dict[str, np.ndarray]:
         return self._straces[chain].point(idx)
 
     def points(self, chains=None):
-        """Return an iterator over all or some of the sample points
+        """Return an iterator over all or some of the sample points.
 
         Parameters
         ----------
diff --git a/pymc/backends/ndarray.py b/pymc/backends/ndarray.py
index bffd9620992..7997bf1e97f 100644
--- a/pymc/backends/ndarray.py
+++ b/pymc/backends/ndarray.py
@@ -12,7 +12,7 @@
 #   See the License for the specific language governing permissions and
 #   limitations under the License.
 
-"""NumPy array trace backend
+"""NumPy array trace backend.
 
 Store sampling values in memory as a NumPy array.
 """
@@ -27,7 +27,7 @@
 
 
 class NDArray(base.BaseTrace):
-    """NDArray trace object
+    """NDArray trace object.
 
     Parameters
     ----------
@@ -212,7 +212,7 @@ def _slice_as_ndarray(strace, idx):
 def point_list_to_multitrace(
     point_list: list[dict[str, np.ndarray]], model: Model | None = None
 ) -> MultiTrace:
-    """Transform point list into MultiTrace"""
+    """Transform point list into MultiTrace."""
     _model = modelcontext(model)
     varnames = list(point_list[0].keys())
     with _model:
diff --git a/pymc/backends/report.py b/pymc/backends/report.py
index 49e584a9791..9a630ee242f 100644
--- a/pymc/backends/report.py
+++ b/pymc/backends/report.py
@@ -43,7 +43,7 @@ def ok(self):
 
     @property
     def n_tune(self) -> int | None:
-        """Number of tune iterations - not necessarily kept in trace!"""
+        """Number of tune iterations - not necessarily kept in trace."""
         return self._n_tune
 
     @property
diff --git a/pymc/blocking.py b/pymc/blocking.py
index 287a06d530e..dcbfe0ead36 100644
--- a/pymc/blocking.py
+++ b/pymc/blocking.py
@@ -12,11 +12,7 @@
 #   See the License for the specific language governing permissions and
 #   limitations under the License.
 
-"""
-pymc.blocking
-
-Classes for working with subsets of parameters.
-"""
+"""Classes for working with subsets of parameters."""
 
 from __future__ import annotations
 
@@ -51,9 +47,7 @@ class RaveledVars(NamedTuple):
 
 
 class Compose(Generic[T]):
-    """
-    Compose two functions in a pickleable way
-    """
+    """Compose two functions in a pickleable way."""
 
     def __init__(self, fa: Callable[[PointType], T], fb: Callable[[RaveledVars], PointType]):
         self.fa = fa
diff --git a/pymc/data.py b/pymc/data.py
index 46a16826d96..0e92c9e809e 100644
--- a/pymc/data.py
+++ b/pymc/data.py
@@ -89,7 +89,7 @@ def clone(self):
 class GeneratorAdapter:
     """
     Helper class that helps to infer data type of generator with looking
-    at the first item, preserving the order of the resulting generator
+    at the first item, preserving the order of the resulting generator.
     """
 
     def make_variable(self, gop, name=None):
@@ -135,7 +135,7 @@ class MinibatchIndexRV(IntegersRV):
 
 
 class MinibatchOp(OpFromGraph):
-    """Encapsulate Minibatch random draws in an opaque OFG"""
+    """Encapsulate Minibatch random draws in an opaque OFG."""
 
     def __init__(self, *args, **kwargs):
         super().__init__(*args, **kwargs, inline=True)
diff --git a/pymc/distributions/censored.py b/pymc/distributions/censored.py
index ed11c633a59..4be21b1c9d9 100644
--- a/pymc/distributions/censored.py
+++ b/pymc/distributions/censored.py
@@ -33,7 +33,7 @@
 
 
 class CensoredRV(SymbolicRandomVariable):
-    """Censored random variable"""
+    """Censored random variable."""
 
     inline_logprob = True
     extended_signature = "(),(),()->()"
@@ -61,7 +61,7 @@ def rv_op(cls, dist, lower, upper, *, size=None):
 
 class Censored(Distribution):
     r"""
-    Censored distribution
+    Censored distribution.
 
     The pdf of a censored distribution is
 
diff --git a/pymc/distributions/continuous.py b/pymc/distributions/continuous.py
index 6e68d98bf84..8b15f1aa60b 100644
--- a/pymc/distributions/continuous.py
+++ b/pymc/distributions/continuous.py
@@ -137,19 +137,19 @@ def polyagamma_cdf(*args, **kwargs):
 
 
 class PositiveContinuous(Continuous):
-    """Base class for positive continuous distributions"""
+    """Base class for positive continuous distributions."""
 
 
 class UnitContinuous(Continuous):
-    """Base class for continuous distributions on [0,1]"""
+    """Base class for continuous distributions on [0,1]."""
 
 
 class CircularContinuous(Continuous):
-    """Base class for circular continuous distributions"""
+    """Base class for circular continuous distributions."""
 
 
 class BoundedContinuous(Continuous):
-    """Base class for bounded continuous distributions"""
+    """Base class for bounded continuous distributions."""
 
     # Indices of the arguments that define the lower and upper bounds of the distribution
     bound_args_indices: list[int] | None = None
@@ -216,8 +216,10 @@ def assert_negative_support(var, label, distname, value=-1e-6):
 
 def get_tau_sigma(tau=None, sigma=None):
     r"""
-    Find precision and standard deviation. The link between the two
-    parameterizations is given by the inverse relationship:
+    Find precision and standard deviation.
+
+    The link between the two parameterizations is given by the inverse
+    relationship:
 
     .. math::
         \tau = \frac{1}{\sigma^2}
@@ -3837,9 +3839,7 @@ def dist(cls, x_points, pdf_points, *args, **kwargs):
         return super().dist([x_points, pdf_points, cdf_points], **kwargs)
 
     def support_point(rv, size, x_points, pdf_points, cdf_points):
-        """
-        Estimates the expectation integral using the trapezoid rule; cdf_points are not used.
-        """
+        """Estimates the expectation integral using the trapezoid rule; cdf_points are not used."""
         x_fx = pt.mul(x_points, pdf_points)  # x_i * f(x_i) for all xi's in x_points
         support_point = (
             pt.sum(pt.mul(pt.diff(x_points, axis=-1), x_fx[..., 1:] + x_fx[..., :-1])) / 2
@@ -3993,7 +3993,7 @@ def __call__(self, h=1.0, z=0.0, size=None, **kwargs):
     @classmethod
     def rng_fn(cls, rng, h, z, size=None) -> np.ndarray:
         """
-        Generate a random sample from the distribution with the given parameters
+        Generate a random sample from the distribution with the given parameters.
 
         Parameters
         ----------
diff --git a/pymc/distributions/custom.py b/pymc/distributions/custom.py
index 3b1c29e5b1d..1bec80f4880 100644
--- a/pymc/distributions/custom.py
+++ b/pymc/distributions/custom.py
@@ -73,7 +73,7 @@ def default_support_point(rv, size, *rv_inputs, rv_name=None, has_fallback=False
 
 class CustomDistRV(RandomVariable):
     """
-    Base class for CustomDistRV
+    Base class for CustomDistRV.
 
     This should be subclassed when defining CustomDist objects.
     """
@@ -89,7 +89,7 @@ def rng_fn(cls, rng, *args):
 
 
 class _CustomDist(Distribution):
-    """A distribution that returns a subclass of CustomDistRV"""
+    """A distribution that returns a subclass of CustomDistRV."""
 
     rv_type = CustomDistRV
 
@@ -194,7 +194,7 @@ def custom_dist_support_point(op, rv, rng, size, *dist_params):
 
 class CustomSymbolicDistRV(SymbolicRandomVariable):
     """
-    Base class for CustomSymbolicDist
+    Base class for CustomSymbolicDist.
 
     This should be subclassed when defining custom CustomDist objects that have
     symbolic random methods.
@@ -459,7 +459,7 @@ def dist_support_point(op, rv, *args):
 
 
 class CustomDist:
-    """A helper class to create custom distributions
+    """A helper class to create custom distributions.
 
     This class can be used to wrap black-box random and logp methods for use in
     forward and mcmc sampling.
diff --git a/pymc/distributions/discrete.py b/pymc/distributions/discrete.py
index 93f318feb23..179bae25f55 100644
--- a/pymc/distributions/discrete.py
+++ b/pymc/distributions/discrete.py
@@ -293,7 +293,7 @@ def logcdf(value, n, alpha, beta):
 
 
 class Bernoulli(Discrete):
-    R"""Bernoulli log-likelihood
+    R"""Bernoulli log-likelihood.
 
     The Bernoulli distribution describes the probability of successes
     (x=1) and failures (x=0).
diff --git a/pymc/distributions/dist_math.py b/pymc/distributions/dist_math.py
index ab510cad30e..b730f39bf05 100644
--- a/pymc/distributions/dist_math.py
+++ b/pymc/distributions/dist_math.py
@@ -13,7 +13,7 @@
 #   limitations under the License.
 
 """
-Created on Mar 7, 2011
+Created on Mar 7, 2011.
 
 @author: johnsalvatier
 """
@@ -90,9 +90,7 @@ def check_icdf_value(expr: Variable, value: Variable) -> Variable:
 
 
 def logpow(x, m):
-    """
-    Calculates log(x**m) since m*log(x) will fail when m, x = 0.
-    """
+    """Calculates log(x**m) since m*log(x) will fail when m, x = 0."""
     # return m * log(x)
     return pt.switch(pt.eq(x, 0), pt.switch(pt.eq(m, 0), 0.0, -np.inf), m * pt.log(x))
 
@@ -110,9 +108,7 @@ def betaln(x, y):
 
 
 def std_cdf(x):
-    """
-    Calculates the standard normal cumulative distribution function.
-    """
+    """Calculates the standard normal cumulative distribution function."""
     return 0.5 + 0.5 * pt.erf(x / pt.sqrt(2.0))
 
 
@@ -136,7 +132,7 @@ def normal_lccdf(mu, sigma, x):
 
 
 def log_diff_normal_cdf(mu, sigma, x, y):
-    """
+    r"""
     Compute :math:`\\log(\\Phi(\frac{x - \\mu}{\\sigma}) - \\Phi(\frac{y - \\mu}{\\sigma}))` safely in log space.
 
     Parameters
@@ -178,7 +174,7 @@ def log_diff_normal_cdf(mu, sigma, x, y):
 def sigma2rho(sigma):
     """
     `sigma -> rho` PyTensor converter
-    :math:`mu + sigma*e = mu + log(1+exp(rho))*e`
+    :math:`mu + sigma*e = mu + log(1+exp(rho))*e`.
     """
     return pt.log(pt.exp(pt.abs(sigma)) - 1.0)
 
@@ -186,7 +182,7 @@ def sigma2rho(sigma):
 def rho2sigma(rho):
     """
     `rho -> sigma` PyTensor converter
-    :math:`mu + sigma*e = mu + log(1+exp(rho))*e`
+    :math:`mu + sigma*e = mu + log(1+exp(rho))*e`.
     """
     return pt.softplus(rho)
 
@@ -198,7 +194,7 @@ def rho2sigma(rho):
 def log_normal(x, mean, **kwargs):
     """
     Calculate logarithm of normal distribution at point `x`
-    with given `mean` and `std`
+    with given `mean` and `std`.
 
     Parameters
     ----------
@@ -241,9 +237,7 @@ def log_normal(x, mean, **kwargs):
 
 
 class SplineWrapper(Op):
-    """
-    Creates an PyTensor operation from scipy.interpolate.UnivariateSpline
-    """
+    """Creates an PyTensor operation from scipy.interpolate.UnivariateSpline."""
 
     __props__ = ("spline",)
 
@@ -278,9 +272,7 @@ def grad(self, inputs, grads):
 
 
 class I1e(UnaryScalarOp):
-    """
-    Modified Bessel function of the first kind of order 1, exponentially scaled.
-    """
+    """Modified Bessel function of the first kind of order 1, exponentially scaled."""
 
     nfunc_spec = ("scipy.special.i1e", 1, 1)
 
@@ -293,9 +285,7 @@ def impl(self, x):
 
 
 class I0e(UnaryScalarOp):
-    """
-    Modified Bessel function of the first kind of order 0, exponentially scaled.
-    """
+    """Modified Bessel function of the first kind of order 0, exponentially scaled."""
 
     nfunc_spec = ("scipy.special.i0e", 1, 1)
 
@@ -313,7 +303,7 @@ def grad(self, inp, grads):
 
 
 def random_choice(p, size):
-    """Return draws from categorical probability functions
+    """Return draws from categorical probability functions.
 
     Parameters
     ----------
@@ -352,9 +342,7 @@ def random_choice(p, size):
 
 
 def zvalue(value, sigma, mu):
-    """
-    Calculate the z-value for a normal distribution.
-    """
+    """Calculate the z-value for a normal distribution."""
     return (value - mu) / sigma
 
 
@@ -399,7 +387,7 @@ def clipped_beta_rvs(a, b, size=None, random_state=None, dtype="float64"):
 
 
 def multigammaln(a, p):
-    """Multivariate Log Gamma
+    """Multivariate Log Gamma.
 
     Parameters
     ----------
@@ -412,9 +400,7 @@ def multigammaln(a, p):
 
 
 def log_i0(x):
-    """
-    Calculates the logarithm of the 0 order modified Bessel function of the first kind""
-    """
+    """Calculates the logarithm of the 0 order modified Bessel function of the first kind."""
     return pt.switch(
         pt.lt(x, 5),
         pt.log1p(
diff --git a/pymc/distributions/distribution.py b/pymc/distributions/distribution.py
index 899a641736b..73532f4faec 100644
--- a/pymc/distributions/distribution.py
+++ b/pymc/distributions/distribution.py
@@ -86,7 +86,7 @@ class _Unpickling:
 
 class DistributionMeta(ABCMeta):
     """
-    DistributionMeta class
+    DistributionMeta class.
 
     Notes
     -----
@@ -208,7 +208,7 @@ def __get__(self, instance, type_):
 
 
 class SymbolicRandomVariable(MeasurableOp, OpFromGraph):
-    """Symbolic Random Variable
+    """Symbolic Random Variable.
 
     This is a subclasse of `OpFromGraph` which is used to encapsulate the symbolic
     random graph of complex distributions which are built on top of pure
@@ -269,7 +269,7 @@ def ndims_params(cls_or_self) -> Sequence[int] | None:
     @_class_or_instancemethod
     @property
     def ndim_supp(cls_or_self) -> int | None:
-        """Number of support dimensions of the RandomVariable
+        """Number of support dimensions of the RandomVariable.
 
         (0 for scalar, 1 for vector, ...)
         """
@@ -308,7 +308,7 @@ def default_output(cls_or_self) -> int | None:
     def get_input_output_type_idxs(
         extended_signature: str | None,
     ) -> tuple[tuple[tuple[int], int | None, tuple[int]], tuple[tuple[int], tuple[int]]]:
-        """Parse extended_signature and return indexes for *[rng], [size] and parameters as well as outputs"""
+        """Parse extended_signature and return indexes for *[rng], [size] and parameters as well as outputs."""
         if extended_signature is None:
             raise ValueError("extended_signature must be provided")
 
@@ -340,17 +340,17 @@ def get_input_output_type_idxs(
         )
 
     def rng_params(self, node) -> tuple[Variable, ...]:
-        """Extract the rng parameters from the node's inputs"""
+        """Extract the rng parameters from the node's inputs."""
         [rng_args_idxs, _, _], _ = self.get_input_output_type_idxs(self.extended_signature)
         return tuple(node.inputs[i] for i in rng_args_idxs)
 
     def size_param(self, node) -> Variable | None:
-        """Extract the size parameter from the node's inputs"""
+        """Extract the size parameter from the node's inputs."""
         [_, size_arg_idx, _], _ = self.get_input_output_type_idxs(self.extended_signature)
         return node.inputs[size_arg_idx] if size_arg_idx is not None else None
 
     def dist_params(self, node) -> tuple[Variable, ...]:
-        """Extract distribution parameters from the node's inputs"""
+        """Extract distribution parameters from the node's inputs."""
         [_, _, param_args_idxs], _ = self.get_input_output_type_idxs(self.extended_signature)
         return tuple(node.inputs[i] for i in param_args_idxs)
 
@@ -383,7 +383,7 @@ def __init__(
         super().__init__(*args, **kwargs)
 
     def update(self, node: Apply) -> dict[Variable, Variable]:
-        """Symbolic update expression for input random state variables
+        """Symbolic update expression for input random state variables.
 
         Returns a dictionary with the symbolic expressions required for correct updating
         of random state input variables repeated function evaluations. This is used by
@@ -420,7 +420,7 @@ def change_symbolic_rv_size(op: SymbolicRandomVariable, rv, new_size, expand) ->
 
 
 class Distribution(metaclass=DistributionMeta):
-    """Statistical distribution"""
+    """Statistical distribution."""
 
     rv_op: [RandomVariable, SymbolicRandomVariable] = None
     rv_type: MetaType = None
@@ -630,7 +630,7 @@ def moment(rv: TensorVariable) -> TensorVariable:
 
 
 class Discrete(Distribution):
-    """Base class for discrete distributions"""
+    """Base class for discrete distributions."""
 
     def __new__(cls, name, *args, **kwargs):
         if kwargs.get("transform", None):
@@ -640,7 +640,7 @@ def __new__(cls, name, *args, **kwargs):
 
 
 class Continuous(Distribution):
-    """Base class for continuous distributions"""
+    """Base class for continuous distributions."""
 
 
 class DiracDeltaRV(SymbolicRandomVariable):
diff --git a/pymc/distributions/mixture.py b/pymc/distributions/mixture.py
index 36cd1c397b5..cc819e87e21 100644
--- a/pymc/distributions/mixture.py
+++ b/pymc/distributions/mixture.py
@@ -163,7 +163,7 @@ def update(self, node: Apply):
 
 class Mixture(Distribution):
     R"""
-    Mixture log-likelihood
+    Mixture log-likelihood.
 
     Often used to model subpopulation heterogeneity
 
@@ -493,7 +493,7 @@ def mixture_args_fn(rng, weights, *components):
 
 class NormalMixture:
     R"""
-    Normal mixture log-likelihood
+    Normal mixture log-likelihood.
 
     .. math::
 
@@ -555,7 +555,7 @@ def dist(cls, w, mu, sigma=None, tau=None, **kwargs):
 
 
 def _zero_inflated_mixture(*, name, nonzero_p, nonzero_dist, **kwargs):
-    """Helper function to create a zero-inflated mixture
+    """Helper function to create a zero-inflated mixture.
 
     If name is `None`, this function returns an unregistered variable
     """
@@ -705,7 +705,7 @@ class ZeroInflatedNegativeBinomial:
     The Zero-inflated version of the Negative Binomial (NB).
     The NB distribution describes a Poisson random variable
     whose rate parameter is gamma distributed.
-    The pmf of this distribution is
+    The pmf of this distribution is.
 
     .. math::
 
@@ -799,7 +799,7 @@ def dist(cls, psi, mu=None, alpha=None, p=None, n=None, **kwargs):
 
 
 def _hurdle_mixture(*, name, nonzero_p, nonzero_dist, dtype, max_n_steps=10_000, **kwargs):
-    """Helper function to create a hurdle mixtures
+    """Helper function to create a hurdle mixtures.
 
     If name is `None`, this function returns an unregistered variable
 
diff --git a/pymc/distributions/multivariate.py b/pymc/distributions/multivariate.py
index cf65aaaa6f3..4438c6ff550 100644
--- a/pymc/distributions/multivariate.py
+++ b/pymc/distributions/multivariate.py
@@ -118,7 +118,7 @@ def _squeeze_to_ndim(var: TensorVariable | np.ndarray, ndim: int):
 
 
 class SimplexContinuous(Continuous):
-    """Base class for simplex continuous distributions"""
+    """Base class for simplex continuous distributions."""
 
 
 @_default_transform.register(SimplexContinuous)
@@ -345,7 +345,7 @@ def precision_mv_normal_logp(op: PrecisionMvNormalRV, value, rng, size, mean, ta
 
 @node_rewriter(tracks=[MvNormalRV])
 def mv_normal_to_precision_mv_normal(fgraph, node):
-    """Replaces MvNormal(mu, inv(tau)) -> PrecisionMvNormal(mu, tau)
+    """Replaces MvNormal(mu, inv(tau)) -> PrecisionMvNormal(mu, tau).
 
     This is introduced in logprob rewrites to provide a more efficient logp for a MvNormal
     that is defined by a precision matrix.
@@ -898,10 +898,7 @@ def posdef(AA):
 
 
 class PosDefMatrix(Op):
-    """
-    Check if input is positive definite. Input should be a square matrix.
-
-    """
+    """Check if input is positive definite. Input should be a square matrix."""
 
     # Properties attribute
     __props__ = ()
@@ -2163,7 +2160,7 @@ def rng_fn(cls, rng: np.random.RandomState, mu, W, alpha, tau, W_is_valid, size)
         Implementation of algorithm from paper
         Havard Rue, 2001. "Fast sampling of Gaussian Markov random fields,"
         Journal of the Royal Statistical Society Series B, Royal Statistical Society,
-        vol. 63(2), pages 325-338. DOI: 10.1111/1467-9868.00288
+        vol. 63(2), pages 325-338. DOI: 10.1111/1467-9868.00288.
         """
         if not W_is_valid.all():
             raise ValueError("W must be a valid adjacency matrix")
@@ -2658,7 +2655,7 @@ def logp(value, alpha, K):
 
 
 class ZeroSumNormalRV(SymbolicRandomVariable):
-    """ZeroSumNormal random variable"""
+    """ZeroSumNormal random variable."""
 
     _print_name = ("ZeroSumNormal", "\\operatorname{ZeroSumNormal}")
 
diff --git a/pymc/distributions/shape_utils.py b/pymc/distributions/shape_utils.py
index 0d52cb196c5..9f3219a2f1c 100644
--- a/pymc/distributions/shape_utils.py
+++ b/pymc/distributions/shape_utils.py
@@ -51,7 +51,7 @@
 
 
 def to_tuple(shape):
-    """Convert ints, arrays, and Nones to tuples
+    """Convert ints, arrays, and Nones to tuples.
 
     Parameters
     ----------
@@ -208,7 +208,7 @@ def find_size(
 
 
 def rv_size_is_none(size: TensorVariable | Constant | None) -> bool:
-    """Check whether an rv size is None (i.e., NoneConst)"""
+    """Check whether an rv size is None (i.e., NoneConst)."""
     if size is None:
         return True
     return isinstance(size.type, NoneTypeT)
@@ -341,7 +341,7 @@ def get_support_shape(
     support_shape_offset: Sequence[int] | None = None,
     ndim_supp: int = 1,
 ) -> TensorVariable | None:
-    """Extract the support shapes from shape / dims / observed information
+    """Extract the support shapes from shape / dims / observed information.
 
     Parameters
     ----------
diff --git a/pymc/distributions/simulator.py b/pymc/distributions/simulator.py
index dc7700f7d57..a9531a88c22 100644
--- a/pymc/distributions/simulator.py
+++ b/pymc/distributions/simulator.py
@@ -34,7 +34,7 @@
 
 class SimulatorRV(RandomVariable):
     """
-    Base class for SimulatorRVs
+    Base class for SimulatorRVs.
 
     This should be subclassed when defining custom Simulator objects.
     """
diff --git a/pymc/distributions/timeseries.py b/pymc/distributions/timeseries.py
index 80b13f56d91..6469cd101b6 100644
--- a/pymc/distributions/timeseries.py
+++ b/pymc/distributions/timeseries.py
@@ -60,7 +60,7 @@
 
 
 class RandomWalkRV(SymbolicRandomVariable):
-    """RandomWalk Variable"""
+    """RandomWalk Variable."""
 
     _print_name = ("RandomWalk", "\\operatorname{RandomWalk}")
 
@@ -121,7 +121,7 @@ def rv_op(cls, init_dist, innovation_dist, steps, size=None):
 
 
 class RandomWalk(Distribution):
-    r"""RandomWalk Distribution
+    r"""RandomWalk Distribution.
 
     TODO: Expand docstrings
     """
@@ -247,7 +247,7 @@ def random_walk_logp(op, values, *inputs, **kwargs):
 
 
 class PredefinedRandomWalk(ABCMeta):
-    """Base class for predefined RandomWalk distributions"""
+    """Base class for predefined RandomWalk distributions."""
 
     def __new__(cls, name, *args, **kwargs):
         init_dist, innovation_dist, kwargs = cls.get_dists(*args, **kwargs)
@@ -309,7 +309,7 @@ def get_dists(cls, mu=0.0, sigma=1.0, *, init_dist=None, **kwargs):
 
 
 class MvGaussianRandomWalk(PredefinedRandomWalk):
-    r"""Random Walk with Multivariate Normal innovations
+    r"""Random Walk with Multivariate Normal innovations.
 
     Parameters
     ----------
@@ -361,7 +361,7 @@ def get_dists(cls, mu, *, cov=None, tau=None, chol=None, lower=True, init_dist=N
 
 
 class MvStudentTRandomWalk(PredefinedRandomWalk):
-    r"""Multivariate Random Walk with StudentT innovations
+    r"""Multivariate Random Walk with StudentT innovations.
 
     Parameters
     ----------
@@ -630,7 +630,7 @@ def dist(
 
     @classmethod
     def _get_ar_order(cls, rhos: TensorVariable, ar_order: int | None, constant: bool) -> int:
-        """Compute ar_order given inputs
+        """Compute ar_order given inputs.
 
         If ar_order is not specified we do constant folding on the shape of rhos
         to retrieve it. For example, this will detect that
@@ -774,7 +774,7 @@ def update(self, node: Node):
 
 class GARCH11(Distribution):
     r"""
-    GARCH(1,1) with Normal innovations. The model is specified by
+    GARCH(1,1) with Normal innovations. The model is specified by.
 
     .. math::
         y_t \sim N(0, \sigma_t^2)
diff --git a/pymc/distributions/transforms.py b/pymc/distributions/transforms.py
index 2c4e121b47f..92aa52afd02 100644
--- a/pymc/distributions/transforms.py
+++ b/pymc/distributions/transforms.py
@@ -69,7 +69,7 @@ def __getattr__(name):
 
 @singledispatch
 def _default_transform(op: Op, rv: TensorVariable):
-    """Return default transform for a given Distribution `Op`"""
+    """Return default transform for a given Distribution `Op`."""
     return None
 
 
@@ -141,7 +141,7 @@ def log_jac_det(self, value, *inputs):
 class CholeskyCovPacked(Transform):
     """
     Transforms the diagonal elements of the LKJCholeskyCov distribution to be on the
-    log scale
+    log scale.
     """
 
     name = "cholesky-cov-packed"
diff --git a/pymc/distributions/truncated.py b/pymc/distributions/truncated.py
index f0200b73688..051a57798e0 100644
--- a/pymc/distributions/truncated.py
+++ b/pymc/distributions/truncated.py
@@ -244,7 +244,7 @@ def __str__(self):
 
 class Truncated(Distribution):
     r"""
-    Truncated distribution
+    Truncated distribution.
 
     The pdf of a Truncated distribution is
 
diff --git a/pymc/exceptions.py b/pymc/exceptions.py
index 7caa2ac3e5a..f062be527cf 100644
--- a/pymc/exceptions.py
+++ b/pymc/exceptions.py
@@ -31,7 +31,7 @@ class IncorrectArgumentsError(ValueError):
 
 
 class TraceDirectoryError(ValueError):
-    """Error from trying to load a trace from an incorrectly-structured directory,"""
+    """Error from trying to load a trace from an incorrectly-structured directory."""
 
     pass
 
@@ -77,7 +77,7 @@ def __init__(self, message, actual=None, expected=None):
 
 
 class TruncationError(RuntimeError):
-    """Exception for errors generated from truncated graphs"""
+    """Exception for errors generated from truncated graphs."""
 
 
 class NotConstantValueError(ValueError):
diff --git a/pymc/gp/cov.py b/pymc/gp/cov.py
index d97ce043d39..9d3017b163e 100644
--- a/pymc/gp/cov.py
+++ b/pymc/gp/cov.py
@@ -56,9 +56,7 @@
 
 
 class BaseCovariance:
-    """
-    Base class for kernels/covariance functions.
-    """
+    """Base class for kernels/covariance functions."""
 
     def __call__(
         self,
@@ -116,9 +114,7 @@ def __pow__(self, other) -> "Exponentiated":
         return Exponentiated(self, other)
 
     def __array_wrap__(self, result):
-        """
-        Required to allow radd/rmul by numpy arrays.
-        """
+        """Required to allow radd/rmul by numpy arrays."""
         result = np.squeeze(result)
         if len(result.shape) <= 1:
             result = result.reshape(1, 1)
@@ -579,7 +575,7 @@ def full_from_distance(self, dist: TensorLike, squared: bool = False) -> TensorV
 
     def power_spectral_density(self, omega: TensorLike) -> TensorVariable:
         r"""
-        The power spectral density for the ExpQuad kernel is:
+        Power spectral density for the ExpQuad kernel.
 
         .. math::
 
@@ -638,7 +634,7 @@ def full_from_distance(self, dist: TensorLike, squared: bool = False) -> TensorV
 
     def power_spectral_density(self, omega: TensorLike) -> TensorVariable:
         r"""
-        The power spectral density for the Matern52 kernel is:
+        Power spectral density for the Matern52 kernel.
 
         .. math::
 
@@ -677,7 +673,7 @@ def full_from_distance(self, dist: TensorLike, squared: bool = False) -> TensorV
 
     def power_spectral_density(self, omega: TensorLike) -> TensorVariable:
         r"""
-        The power spectral density for the Matern32 kernel is:
+        Power spectral density for the Matern32 kernel.
 
         .. math::
 
@@ -702,7 +698,7 @@ def power_spectral_density(self, omega: TensorLike) -> TensorVariable:
 
 class Matern12(Stationary):
     r"""
-    The Matern kernel with nu = 1/2
+    The Matern kernel with nu = 1/2.
 
     .. math::
 
@@ -788,7 +784,7 @@ def full_from_distance(self, dist: TensorLike, squared: bool = False) -> TensorV
         return pt.exp(-0.5 * r2)
 
     def power_spectral_density_approx(self, J: TensorLike) -> TensorVariable:
-        """
+        r"""
         Technically, this is not a spectral density but these are the first `m` coefficients of
         the low rank approximation for the periodic kernel, which are used in the same way.
         `J` is a vector of `np.arange(m)`.
diff --git a/pymc/gp/gp.py b/pymc/gp/gp.py
index 80dd632cac8..131b00c542f 100644
--- a/pymc/gp/gp.py
+++ b/pymc/gp/gp.py
@@ -59,9 +59,7 @@ def _handle_sigma_noise_parameters(sigma, noise):
 
 
 class Base:
-    R"""
-    Base class.
-    """
+    """Base class."""
 
     def __init__(self, *, mean_func=Zero(), cov_func=Constant(0.0)):
         self.mean_func = mean_func
diff --git a/pymc/gp/mean.py b/pymc/gp/mean.py
index 30a6fe244c5..800cbf55635 100644
--- a/pymc/gp/mean.py
+++ b/pymc/gp/mean.py
@@ -18,9 +18,7 @@
 
 
 class Mean:
-    R"""
-    Base class for mean functions
-    """
+    """Base class for mean functions."""
 
     def __call__(self, X):
         R"""
@@ -40,17 +38,14 @@ def __mul__(self, other):
 
 
 class Zero(Mean):
-    R"""
-    Zero mean function for Gaussian process.
-
-    """
+    """Zero mean function for Gaussian process."""
 
     def __call__(self, X):
         return pt.alloc(0.0, X.shape[0])
 
 
 class Constant(Mean):
-    R"""
+    """
     Constant mean function for Gaussian process.
 
     Parameters
@@ -68,7 +63,7 @@ def __call__(self, X):
 
 
 class Linear(Mean):
-    R"""
+    """
     Linear mean function for Gaussian process.
 
     Parameters
diff --git a/pymc/gp/util.py b/pymc/gp/util.py
index 734d36ed324..53ba91aa9e7 100644
--- a/pymc/gp/util.py
+++ b/pymc/gp/util.py
@@ -174,7 +174,7 @@ def plot_gp_dist(
     fill_kwargs=None,
     samples_kwargs=None,
 ):
-    """A helper function for plotting 1D GP posteriors from trace
+    """A helper function for plotting 1D GP posteriors from trace.
 
     Parameters
     ----------
diff --git a/pymc/initial_point.py b/pymc/initial_point.py
index 37bcc92a68e..b46d13cb55b 100644
--- a/pymc/initial_point.py
+++ b/pymc/initial_point.py
@@ -37,7 +37,7 @@ def convert_str_to_rv_dict(
 ) -> dict[TensorVariable, np.ndarray | Variable | str | None]:
     """Helper function for converting a user-provided start dict with str keys of (transformed) variable names
     to a dict mapping the RV tensors to untransformed initvals.
-    TODO: Deprecate this functionality and only accept TensorVariables as keys
+    TODO: Deprecate this functionality and only accept TensorVariables as keys.
     """
     initvals = {}
     for key, initval in start.items():
@@ -59,7 +59,7 @@ def make_initial_point_fns_per_chain(
     jitter_rvs: set[TensorVariable] | None = None,
     chains: int,
 ) -> list[Callable]:
-    """Create an initial point function for each chain, as defined by initvals
+    """Create an initial point function for each chain, as defined by initvals.
 
     If a single initval dictionary is passed, the function is replicated for each
     chain, otherwise a unique function is compiled for each entry in the dictionary.
diff --git a/pymc/logprob/abstract.py b/pymc/logprob/abstract.py
index 097bf84afa5..927cd50cf2d 100644
--- a/pymc/logprob/abstract.py
+++ b/pymc/logprob/abstract.py
@@ -144,14 +144,14 @@ def _icdf_helper(rv, value, **kwargs):
 
 
 class MeasurableOp(abc.ABC):
-    """An operation whose outputs can be assigned a measure/log-probability"""
+    """An operation whose outputs can be assigned a measure/log-probability."""
 
 
 MeasurableOp.register(RandomVariable)
 
 
 class MeasurableElemwise(MeasurableOp, Elemwise):
-    """Base class for Measurable Elemwise variables"""
+    """Base class for Measurable Elemwise variables."""
 
     valid_scalar_types: tuple[MetaType, ...] = ()
 
diff --git a/pymc/logprob/censoring.py b/pymc/logprob/censoring.py
index 248c285ba5f..2104ecb6ef2 100644
--- a/pymc/logprob/censoring.py
+++ b/pymc/logprob/censoring.py
@@ -90,7 +90,7 @@ def find_measurable_clips(fgraph: FunctionGraph, node: Node) -> list[TensorVaria
 
 @_logprob.register(MeasurableClip)
 def clip_logprob(op, values, base_rv, lower_bound, upper_bound, **kwargs):
-    r"""Logprob of a clipped censored distribution
+    r"""Logprob of a clipped censored distribution.
 
     The probability is given by
     .. math::
@@ -174,7 +174,7 @@ def find_measurable_roundings(fgraph: FunctionGraph, node: Node) -> list[TensorV
 
 @_logprob.register(MeasurableRound)
 def round_logprob(op, values, base_rv, **kwargs):
-    r"""Logprob of a rounded censored distribution
+    r"""Logprob of a rounded censored distribution.
 
     The probability of a distribution rounded to the nearest integer is given by
     .. math::
diff --git a/pymc/logprob/order.py b/pymc/logprob/order.py
index 51833a128bc..6eceb819dd8 100644
--- a/pymc/logprob/order.py
+++ b/pymc/logprob/order.py
@@ -61,7 +61,7 @@ class MeasurableMax(MeasurableOp, Max):
 
 
 class MeasurableMaxDiscrete(MeasurableOp, Max):
-    """A placeholder used to specify a log-likelihood for sub-graphs of maxima of discrete variables"""
+    """A placeholder used to specify a log-likelihood for sub-graphs of maxima of discrete variables."""
 
 
 @node_rewriter([Max])
diff --git a/pymc/logprob/transforms.py b/pymc/logprob/transforms.py
index dee5e907bfb..4ae78fc7809 100644
--- a/pymc/logprob/transforms.py
+++ b/pymc/logprob/transforms.py
@@ -137,7 +137,7 @@ def backward(
         self, value: TensorVariable, *inputs: Variable
     ) -> TensorVariable | tuple[TensorVariable, ...]:
         """Invert the transformation. Multiple values may be returned when the
-        transformation is not 1-to-1
+        transformation is not 1-to-1.
         """
 
     def log_jac_det(self, value: TensorVariable, *inputs) -> TensorVariable:
@@ -158,7 +158,7 @@ def __str__(self):
 
 
 class MeasurableTransform(MeasurableElemwise):
-    """A placeholder used to specify a log-likelihood for a transformed measurable variable"""
+    """A placeholder used to specify a log-likelihood for a transformed measurable variable."""
 
     valid_scalar_types = (
         Exp,
@@ -371,7 +371,7 @@ def measurable_neg_to_product(fgraph, node):
 
 @node_rewriter([sub])
 def measurable_sub_to_neg(fgraph, node):
-    """Convert subtraction involving `MeasurableVariable`s to addition with neg"""
+    """Convert subtraction involving `MeasurableVariable`s to addition with neg."""
     if not filter_measurable_variables(node.inputs):
         return None
 
diff --git a/pymc/logprob/utils.py b/pymc/logprob/utils.py
index 10aa3c8bb96..93353a86c31 100644
--- a/pymc/logprob/utils.py
+++ b/pymc/logprob/utils.py
@@ -199,7 +199,7 @@ def expand_fn(var):
 
 
 class ParameterValueError(ValueError):
-    """Exception for invalid parameters values in logprob graphs"""
+    """Exception for invalid parameters values in logprob graphs."""
 
 
 class CheckParameterValue(CheckAndRaise):
@@ -220,7 +220,7 @@ def __str__(self):
 
 @node_rewriter(tracks=[CheckParameterValue])
 def local_remove_check_parameter(fgraph, node):
-    """Rewrite that removes CheckParameterValue
+    """Rewrite that removes CheckParameterValue.
 
     This is used when compile_rv_inplace
     """
diff --git a/pymc/math.py b/pymc/math.py
index b5fc50a8eb4..9ffe8a6e8fe 100644
--- a/pymc/math.py
+++ b/pymc/math.py
@@ -185,7 +185,7 @@
 
 def kronecker(*Ks):
     r"""Return the Kronecker product of arguments:
-          :math:`K_1 \otimes K_2 \otimes ... \otimes K_D`
+          :math:`K_1 \otimes K_2 \otimes ... \otimes K_D`.
 
     Parameters
     ----------
@@ -219,7 +219,7 @@ def cartesian(*arrays):
 
 
 def kron_matrix_op(krons, m, op):
-    r"""Apply op to krons and m in a way that reproduces ``op(kronecker(*krons), m)``
+    r"""Apply op to krons and m in a way that reproduces ``op(kronecker(*krons), m)``.
 
     Parameters
     ----------
@@ -275,12 +275,12 @@ def kron_diag(*diags):
 
 
 def logdiffexp(a, b):
-    """log(exp(a) - exp(b))"""
+    """Return log(exp(a) - exp(b))."""
     return a + pt.log1mexp(b - a)
 
 
 def logdiffexp_numpy(a, b):
-    """log(exp(a) - exp(b))"""
+    """Return log(exp(a) - exp(b))."""
     warnings.warn(
         "pymc.math.logdiffexp_numpy is being deprecated.",
         FutureWarning,
@@ -331,6 +331,7 @@ def log1mexp(x, *, negative_input=False):
 def log1mexp_numpy(x, *, negative_input=False):
     """Return log(1 - exp(x)).
     This function is numerically more stable than the naive approach.
+
     For details, see
     https://cran.r-project.org/web/packages/Rmpfr/vignettes/log1mexp-note.pdf
     """
@@ -460,9 +461,7 @@ def expand_packed_triangular(n, packed, lower=True, diagonal_only=False):
 
 
 class BatchedDiag(Op):
-    """
-    Fast BatchedDiag allocation
-    """
+    """Fast BatchedDiag allocation."""
 
     __props__ = ()
 
@@ -510,7 +509,7 @@ def batched_diag(C):
 
 def block_diagonal(matrices, sparse=False, format="csr"):
     r"""See pt.slinalg.block_diag or
-    pytensor.sparse.basic.block_diag for reference
+    pytensor.sparse.basic.block_diag for reference.
 
     Parameters
     ----------
diff --git a/pymc/model/core.py b/pymc/model/core.py
index 38ee3de4861..81fc4fa13c6 100644
--- a/pymc/model/core.py
+++ b/pymc/model/core.py
@@ -854,7 +854,7 @@ def d2logp(
     @property
     def datalogp(self) -> Variable:
         """PyTensor scalar of log-probability of the observed variables and
-        potential terms
+        potential terms.
         """
         return self.observedlogp + self.potentiallogp
 
@@ -874,12 +874,12 @@ def varlogp_nojac(self) -> Variable:
 
     @property
     def observedlogp(self) -> Variable:
-        """PyTensor scalar of log-probability of the observed variables"""
+        """PyTensor scalar of log-probability of the observed variables."""
         return self.logp(vars=self.observed_RVs)
 
     @property
     def potentiallogp(self) -> Variable:
-        """PyTensor scalar of log-probability of the Potential terms"""
+        """PyTensor scalar of log-probability of the Potential terms."""
         # Convert random variables in Potential expression into their log-likelihood
         # inputs and apply their transforms, if any
         potentials = self.replace_rvs_by_values(self.potentials)
@@ -899,7 +899,7 @@ def value_vars(self):
     def unobserved_value_vars(self):
         """List of all random variables (including untransformed projections),
         as well as deterministics used as inputs and outputs of the model's
-        log-likelihood graph
+        log-likelihood graph.
         """
         vars = []
         transformed_rvs = []
@@ -920,12 +920,12 @@ def unobserved_value_vars(self):
 
     @property
     def discrete_value_vars(self):
-        """All the discrete value variables in the model"""
+        """All the discrete value variables in the model."""
         return list(typefilter(self.value_vars, discrete_types))
 
     @property
     def continuous_value_vars(self):
-        """All the continuous value variables in the model"""
+        """All the continuous value variables in the model."""
         return list(typefilter(self.value_vars, continuous_types))
 
     @property
@@ -1550,7 +1550,7 @@ def prefix(self) -> str:
         return name
 
     def name_for(self, name):
-        """Checks if name has prefix and adds if needed"""
+        """Checks if name has prefix and adds if needed."""
         name = self._validate_name(name)
         if self.prefix:
             if not name.startswith(self.prefix + "::"):
@@ -1561,7 +1561,7 @@ def name_for(self, name):
             return name
 
     def name_of(self, name):
-        """Checks if name has prefix and deletes if needed"""
+        """Checks if name has prefix and deletes if needed."""
         name = self._validate_name(name)
         if not self.prefix or not name:
             return name
@@ -1590,7 +1590,7 @@ def __deepcopy__(self, _):
 
     def copy(self):
         """
-        Clone the model
+        Clone the model.
 
         To access variables in the cloned model use `cloned_model["var_name"]`.
 
@@ -1672,7 +1672,7 @@ def compile_fn(
         point_fn: bool = True,
         **kwargs,
     ) -> PointFunc | Function:
-        """Compiles an PyTensor function
+        """Compiles an PyTensor function.
 
         Parameters
         ----------
@@ -1784,7 +1784,7 @@ def eval_rv_shapes(self) -> dict[str, tuple[int, ...]]:
 
     def check_start_vals(self, start, **kwargs):
         r"""Check that the starting values for MCMC do not cause the relevant log probability
-        to evaluate to something invalid (e.g. Inf or NaN)
+        to evaluate to something invalid (e.g. Inf or NaN).
 
         Parameters
         ----------
@@ -2094,7 +2094,7 @@ def to_graphviz(
 
 
 class BlockModelAccess(Model):
-    """Can be used to prevent user access to Model contexts"""
+    """Can be used to prevent user access to Model contexts."""
 
     def __init__(self, *args, error_msg_on_access="Model access is blocked", **kwargs):
         self.error_msg_on_access = error_msg_on_access
@@ -2189,7 +2189,7 @@ def compile_fn(
     model: Model | None = None,
     **kwargs,
 ) -> PointFunc | Function:
-    """Compiles an PyTensor function
+    """Compiles an PyTensor function.
 
     Parameters
     ----------
diff --git a/pymc/model/transform/conditioning.py b/pymc/model/transform/conditioning.py
index 9bcdce7c4b7..23e0175503b 100644
--- a/pymc/model/transform/conditioning.py
+++ b/pymc/model/transform/conditioning.py
@@ -225,7 +225,7 @@ def change_value_transforms(
     model: Model,
     vars_to_transforms: Mapping[ModelVariable, Transform | None],
 ) -> Model:
-    """Change the value variables transforms in the model
+    r"""Change the value variables transforms in the model.
 
     Parameters
     ----------
@@ -309,7 +309,7 @@ def remove_value_transforms(
     model: Model,
     vars: Sequence[ModelVariable] | None = None,
 ) -> Model:
-    """Remove the value variables transforms in the model
+    r"""Remove the value variables transforms in the model.
 
     Parameters
     ----------
diff --git a/pymc/model_graph.py b/pymc/model_graph.py
index 6596477261e..bacf38b9178 100644
--- a/pymc/model_graph.py
+++ b/pymc/model_graph.py
@@ -226,7 +226,7 @@ def _make_node(
     cluster: str | None = None,
     formatting: str = "plain",
 ):
-    """Attaches the given variable to a graphviz or networkx Digraph"""
+    """Attaches the given variable to a graphviz or networkx Digraph."""
     node_formatter = node_formatters[node.node_type]
     kwargs = node_formatter(node.var)
 
@@ -311,7 +311,7 @@ def vars_to_plot(self, var_names: Iterable[VarName] | None = None) -> list[VarNa
     def make_compute_graph(
         self, var_names: Iterable[VarName] | None = None
     ) -> dict[VarName, set[VarName]]:
-        """Get map of var_name -> set(input var names) for the model"""
+        """Get map of var_name -> set(input var names) for the model."""
         input_map: dict[VarName, set[VarName]] = defaultdict(set)
 
         for var_name in self.vars_to_plot(var_names):
@@ -441,7 +441,7 @@ def make_graph(
     node_formatters: NodeTypeFormatterMapping | None = None,
     create_plate_label: PlateLabelFunc = create_plate_label_with_dim_length,
 ):
-    """Make graphviz Digraph of PyMC model
+    """Make graphviz Digraph of PyMC model.
 
     Returns
     -------
@@ -511,7 +511,7 @@ def make_networkx(
     node_formatters: NodeTypeFormatterMapping | None = None,
     create_plate_label: PlateLabelFunc = create_plate_label_with_dim_length,
 ):
-    """Make networkx Digraph of PyMC model
+    """Make networkx Digraph of PyMC model.
 
     Returns
     -------
diff --git a/pymc/ode/ode.py b/pymc/ode/ode.py
index 7ecb4deba82..26584d823c5 100644
--- a/pymc/ode/ode.py
+++ b/pymc/ode/ode.py
@@ -32,7 +32,7 @@
 
 class DifferentialEquation(Op):
     r"""
-    Specify an ordinary differential equation
+    Specify an ordinary differential equation.
 
     Due to the nature of the model (as well as included solvers), the process of ODE solution may perform slowly.  A faster alternative library based on PyMC--sunode--has implemented Adams' method and BDF (backward differentation formula).  More information about sunode is available at: https://github.com/aseyboldt/sunode.
 
diff --git a/pymc/ode/utils.py b/pymc/ode/utils.py
index 5ccd596b18c..fbe4ba97ab8 100644
--- a/pymc/ode/utils.py
+++ b/pymc/ode/utils.py
@@ -19,7 +19,7 @@
 
 def make_sens_ic(n_states, n_theta, floatX):
     r"""
-    The sensitivity matrix will always have consistent form. (n_states, n_states + n_theta)
+    The sensitivity matrix will always have consistent form. (n_states, n_states + n_theta).
 
     If the first n_states entries of the parameters vector in the simulate call
     correspond to initial conditions of the system,
diff --git a/pymc/printing.py b/pymc/printing.py
index b53049950b7..f90f7564a3b 100644
--- a/pymc/printing.py
+++ b/pymc/printing.py
@@ -303,7 +303,5 @@ def _default_repr_pretty(obj: TensorVariable | Model, p, cycle):
 
 
 def _format_underscore(variable: str) -> str:
-    """
-    Escapes all unescaped underscores in the variable name for LaTeX representation.
-    """
+    """Escapes all unescaped underscores in the variable name for LaTeX representation."""
     return re.sub(r"(?<!\\)_", r"\\_", variable)
diff --git a/pymc/pytensorf.py b/pymc/pytensorf.py
index 66ca9bf7c43..c7a73dd85d7 100644
--- a/pymc/pytensorf.py
+++ b/pymc/pytensorf.py
@@ -244,7 +244,7 @@ def replace_vars_in_graphs(
 
 def inputvars(a):
     """
-    Get the inputs into PyTensor variables
+    Get the inputs into PyTensor variables.
 
     Parameters
     ----------
@@ -263,7 +263,7 @@ def inputvars(a):
 
 def cont_inputs(a):
     """
-    Get the continuous inputs into PyTensor variables
+    Get the continuous inputs into PyTensor variables.
 
     Parameters
     ----------
@@ -277,9 +277,7 @@ def cont_inputs(a):
 
 
 def floatX(X):
-    """
-    Convert an PyTensor tensor or numpy array to pytensor.config.floatX type.
-    """
+    """Convert an PyTensor tensor or numpy array to pytensor.config.floatX type."""
     try:
         return X.astype(pytensor.config.floatX)
     except AttributeError:
@@ -291,9 +289,7 @@ def floatX(X):
 
 
 def intX(X):
-    """
-    Convert a pytensor tensor or numpy array to pytensor.tensor.int32 type.
-    """
+    """Convert a pytensor tensor or numpy array to pytensor.tensor.int32 type."""
     intX = _conversion_map[pytensor.config.floatX]
     try:
         return X.astype(intX)
@@ -303,9 +299,7 @@ def intX(X):
 
 
 def smartfloatX(x):
-    """
-    Converts numpy float values to floatX and leaves values of other types unchanged.
-    """
+    """Converts numpy float values to floatX and leaves values of other types unchanged."""
     if str(x.dtype).startswith("float"):
         x = floatX(x)
     return x
@@ -325,7 +319,7 @@ def smarttypeX(x):
 
 
 def gradient1(f, v):
-    """Flat gradient of f wrt v"""
+    """Flat gradient of f wrt v."""
     return pt.flatten(grad(f, v, disconnected_inputs="warn"))
 
 
@@ -343,7 +337,7 @@ def gradient(f, vars=None):
 
 
 def jacobian1(f, v):
-    """Jacobian of f wrt v"""
+    """Jacobian of f wrt v."""
     f = pt.flatten(f)
     idx = pt.arange(f.shape[0], dtype="int32")
 
@@ -751,7 +745,7 @@ def generator(gen, default=None):
 
 def ix_(*args):
     """
-    PyTensor np.ix_ analog
+    PyTensor np.ix_ analog.
 
     See numpy.lib.index_tricks.ix_ for reference
     """
@@ -778,14 +772,14 @@ def largest_common_dtype(tensors):
 def find_rng_nodes(
     variables: Iterable[Variable],
 ) -> list[RandomGeneratorSharedVariable]:
-    """Return shared RNG variables in a graph"""
+    """Return shared RNG variables in a graph."""
     return [
         node for node in graph_inputs(variables) if isinstance(node, RandomGeneratorSharedVariable)
     ]
 
 
 def replace_rng_nodes(outputs: Sequence[TensorVariable]) -> list[TensorVariable]:
-    """Replace any RNG nodes upstream of outputs by new RNGs of the same type
+    """Replace any RNG nodes upstream of outputs by new RNGs of the same type.
 
     This can be used when combining a pre-existing graph with a cloned one, to ensure
     RNGs are unique across the two graphs.
@@ -809,7 +803,7 @@ def reseed_rngs(
     rngs: Sequence[SharedVariable],
     seed: SeedSequenceSeed,
 ) -> None:
-    """Create a new set of RandomState/Generator for each rng based on a seed"""
+    """Create a new set of RandomState/Generator for each rng based on a seed."""
     bit_generators = [
         np.random.PCG64(sub_seed) for sub_seed in np.random.SeedSequence(seed).spawn(len(rngs))
     ]
@@ -1142,7 +1136,7 @@ def toposort_replace(
 
 
 def normalize_rng_param(rng: None | Variable) -> Variable:
-    """Validate rng is a valid type or create a new one if None"""
+    """Validate rng is a valid type or create a new one if None."""
     if rng is None:
         rng = pytensor.shared(np.random.default_rng())
     elif not isinstance(rng.type, RandomType):
diff --git a/pymc/sampling/forward.py b/pymc/sampling/forward.py
index 5d673030375..ce00ab74d30 100644
--- a/pymc/sampling/forward.py
+++ b/pymc/sampling/forward.py
@@ -75,7 +75,7 @@
 
 
 def get_constant_coords(trace_coords: dict[str, np.ndarray], model: Model) -> set:
-    """Get the set of coords that have remained constant between the trace and model"""
+    """Get the set of coords that have remained constant between the trace and model."""
     constant_coords = set()
     for dim, coord in trace_coords.items():
         current_coord = model.coords.get(dim, None)
@@ -284,7 +284,7 @@ def draw(
     random_seed: RandomState = None,
     **kwargs,
 ) -> np.ndarray | list[np.ndarray]:
-    """Draw samples for one variable or a list of variables
+    """Draw samples for one variable or a list of variables.
 
     Parameters
     ----------
@@ -346,7 +346,7 @@ def draw(
 
 
 def observed_dependent_deterministics(model: Model):
-    """Find deterministics that depend directly on observed variables"""
+    """Find deterministics that depend directly on observed variables."""
     deterministics = model.deterministics
     observed_rvs = set(model.observed_RVs)
     blockers = model.basic_RVs
diff --git a/pymc/sampling/jax.py b/pymc/sampling/jax.py
index 809a223c069..d38a77d941d 100644
--- a/pymc/sampling/jax.py
+++ b/pymc/sampling/jax.py
@@ -92,7 +92,7 @@ def posdefmatrix_fn(value, *inps):
 
 
 def _replace_shared_variables(graph: list[TensorVariable]) -> list[TensorVariable]:
-    """Replace shared variables in graph by their constant values
+    """Replace shared variables in graph by their constant values.
 
     Raises
     ------
@@ -122,7 +122,7 @@ def get_jaxified_graph(
     inputs: list[TensorVariable] | None = None,
     outputs: list[TensorVariable] | None = None,
 ) -> list[TensorVariable]:
-    """Compile an PyTensor graph into an optimized JAX function"""
+    """Compile an PyTensor graph into an optimized JAX function."""
     graph = _replace_shared_variables(outputs) if outputs is not None else None
 
     fgraph = FunctionGraph(inputs=inputs, outputs=graph, clone=True)
@@ -162,7 +162,7 @@ def _get_log_likelihood(
     backend: Literal["cpu", "gpu"] | None = None,
     postprocessing_vectorize: Literal["vmap", "scan"] = "scan",
 ) -> dict:
-    """Compute log-likelihood for all observations"""
+    """Compute log-likelihood for all observations."""
     elemwise_logp = model.logp(model.observed_RVs, sum=False)
     jax_fn = get_jaxified_graph(inputs=model.value_vars, outputs=elemwise_logp)
     result = _postprocess_samples(
@@ -214,7 +214,7 @@ def _get_batched_jittered_initial_points(
     jitter: bool = True,
     jitter_max_retries: int = 10,
 ) -> np.ndarray | list[np.ndarray]:
-    """Get jittered initial point in format expected by NumPyro MCMC kernel
+    """Get jittered initial point in format expected by NumPyro MCMC kernel.
 
     Returns
     -------
diff --git a/pymc/sampling/mcmc.py b/pymc/sampling/mcmc.py
index 5e7c9344545..f6e8d9d4cc5 100644
--- a/pymc/sampling/mcmc.py
+++ b/pymc/sampling/mcmc.py
@@ -254,7 +254,7 @@ def _print_step_hierarchy(s: Step, level: int = 0) -> None:
 
 
 def all_continuous(vars):
-    """Check that vars not include discrete variables"""
+    """Check that vars not include discrete variables."""
     if any((var.dtype in discrete_types) for var in vars):
         return False
     else:
@@ -1126,7 +1126,7 @@ def _iter_sample(
     model: Model | None = None,
     callback: SamplingIteratorCallback | None = None,
 ) -> Iterator[bool]:
-    """Generator for sampling one chain. (Used in singleprocess sampling.)
+    """Generator for sampling one chain. (Used in singleprocess sampling.).
 
     Parameters
     ----------
diff --git a/pymc/smc/kernels.py b/pymc/smc/kernels.py
index e55c6a50f27..60dcf033a0b 100644
--- a/pymc/smc/kernels.py
+++ b/pymc/smc/kernels.py
@@ -186,7 +186,7 @@ def __init__(
         self.weights = np.ones(self.draws) / self.draws
 
     def initialize_population(self) -> dict[str, np.ndarray]:
-        """Create an initial population from the prior distribution"""
+        """Create an initial population from the prior distribution."""
         sys.stdout.write(" ")  # see issue #5828
         with warnings.catch_warnings():
             warnings.filterwarnings(
@@ -212,7 +212,7 @@ def initialize_population(self) -> dict[str, np.ndarray]:
         return cast(dict[str, np.ndarray], dict_prior)
 
     def _initialize_kernel(self):
-        """Create variables and logp function necessary to run SMC kernel
+        """Create variables and logp function necessary to run SMC kernel.
 
         This method should not be overwritten. If needed, use `setup_kernel`
         instead.
@@ -252,11 +252,11 @@ def _initialize_kernel(self):
         self.likelihood_logp = np.array(likelihoods).squeeze()
 
     def setup_kernel(self):
-        """Setup logic performed once before sampling starts"""
+        """Setup logic performed once before sampling starts."""
         pass
 
     def update_beta_and_weights(self):
-        """Calculate the next inverse temperature (beta)
+        """Calculate the next inverse temperature (beta).
 
         The importance weights based on two successive tempered likelihoods (i.e.
         two successive values of beta) and updates the marginal likelihood estimate.
@@ -293,7 +293,7 @@ def update_beta_and_weights(self):
         self.log_marginal_likelihood += logsumexp(log_weights_un) - np.log(self.draws)
 
     def resample(self):
-        """Resample particles based on importance weights"""
+        """Resample particles based on importance weights."""
         self.resampling_indexes = systematic_resampling(self.weights, self.rng)
 
         self.tempered_posterior = self.tempered_posterior[self.resampling_indexes]
@@ -303,16 +303,16 @@ def resample(self):
         self.tempered_posterior_logp = self.prior_logp + self.likelihood_logp * self.beta
 
     def tune(self):
-        """Tuning logic performed before every mutation step"""
+        """Tuning logic performed before every mutation step."""
         pass
 
     @abc.abstractmethod
     def mutate(self):
-        """Apply kernel-specific perturbation to the particles once per stage"""
+        """Apply kernel-specific perturbation to the particles once per stage."""
         pass
 
     def sample_stats(self) -> SMCStats:
-        """Stats to be saved at the end of each stage
+        """Stats to be saved at the end of each stage.
 
         These stats will be saved under `sample_stats` in the final InferenceData object.
         """
@@ -333,7 +333,7 @@ def sample_settings(self) -> SMCSettings:
         }
 
     def _posterior_to_trace(self, chain=0) -> NDArray:
-        """Save results into a PyMC trace
+        """Save results into a PyMC trace.
 
         This method should not be overwritten.
         """
@@ -360,7 +360,7 @@ def _posterior_to_trace(self, chain=0) -> NDArray:
 
 
 class IMH(SMC_KERNEL):
-    """Independent Metropolis-Hastings SMC_kernel"""
+    """Independent Metropolis-Hastings SMC_kernel."""
 
     def __init__(self, *args, correlation_threshold=0.01, **kwargs):
         """
@@ -466,7 +466,7 @@ def get(self, b):
 
 
 class MH(SMC_KERNEL):
-    """Metropolis-Hastings SMC_kernel"""
+    """Metropolis-Hastings SMC_kernel."""
 
     def __init__(self, *args, correlation_threshold=0.01, **kwargs):
         """
@@ -489,7 +489,7 @@ def __init__(self, *args, correlation_threshold=0.01, **kwargs):
 
     def setup_kernel(self):
         """Proposal dist is just a Multivariate Normal with unit identity covariance.
-        Dimension specific scaling is provided by `self.proposal_scales` and set in `self.tune()`
+        Dimension specific scaling is provided by `self.proposal_scales` and set in `self.tune()`.
         """
         ndim = self.tempered_posterior.shape[1]
         self.proposal_scales = np.full(self.draws, min(1, 2.38**2 / ndim))
@@ -501,7 +501,7 @@ def resample(self):
             self.chain_acc_rate = self.chain_acc_rate[self.resampling_indexes]
 
     def tune(self):
-        """Update proposal scales for each particle dimension and update number of MH steps"""
+        """Update proposal scales for each particle dimension and update number of MH steps."""
         if self.iteration > 1:
             # Rescale based on distance to 0.234 acceptance rate
             chain_scales = np.exp(np.log(self.proposal_scales) + (self.chain_acc_rate - 0.234))
diff --git a/pymc/stats/log_density.py b/pymc/stats/log_density.py
index 4feea19ca13..266ceaac1fc 100644
--- a/pymc/stats/log_density.py
+++ b/pymc/stats/log_density.py
@@ -38,7 +38,7 @@ def compute_log_likelihood(
     progressbar=True,
     compile_kwargs: dict[str, Any] | None = None,
 ):
-    """Compute elemwise log_likelihood of model given InferenceData with posterior group
+    """Compute elemwise log_likelihood of model given InferenceData with posterior group.
 
     Parameters
     ----------
@@ -81,7 +81,7 @@ def compute_log_prior(
     progressbar=True,
     compile_kwargs=None,
 ):
-    """Compute elemwise log_prior of model given InferenceData with posterior group
+    """Compute elemwise log_prior of model given InferenceData with posterior group.
 
     Parameters
     ----------
@@ -127,7 +127,7 @@ def compute_log_density(
     compile_kwargs=None,
 ) -> InferenceData | Dataset:
     """
-    Compute elemwise log_likelihood or log_prior of model given InferenceData with posterior group
+    Compute elemwise log_likelihood or log_prior of model given InferenceData with posterior group.
 
     Parameters
     ----------
diff --git a/pymc/step_methods/compound.py b/pymc/step_methods/compound.py
index 1d87297efd8..700c7afa6b4 100644
--- a/pymc/step_methods/compound.py
+++ b/pymc/step_methods/compound.py
@@ -13,7 +13,7 @@
 #   limitations under the License.
 
 """
-Created on Mar 7, 2011
+Created on Mar 7, 2011.
 
 @author: johnsalvatier
 """
@@ -40,6 +40,7 @@
 @unique
 class Competence(IntEnum):
     """Enum for characterizing competence classes of step methods.
+
     Values include:
     0: INCOMPATIBLE
     1: COMPATIBLE
diff --git a/pymc/step_methods/hmc/quadpotential.py b/pymc/step_methods/hmc/quadpotential.py
index 05da188f9b3..53185bbb857 100644
--- a/pymc/step_methods/hmc/quadpotential.py
+++ b/pymc/step_methods/hmc/quadpotential.py
@@ -842,7 +842,7 @@ class WeightedCovarianceState(DataClassState):
 
 
 class _WeightedCovariance(WithSamplingState):
-    """Online algorithm for computing mean and covariance
+    """Online algorithm for computing mean and covariance.
 
     This implements the `Welford's algorithm
     <https://en.wikipedia.org/wiki/Algorithms_for_calculating_variance>`_ based
diff --git a/pymc/step_methods/metropolis.py b/pymc/step_methods/metropolis.py
index ea5db3fbb91..a9d23d5ceb3 100644
--- a/pymc/step_methods/metropolis.py
+++ b/pymc/step_methods/metropolis.py
@@ -134,7 +134,7 @@ class MetropolisState(StepMethodState):
 
 
 class Metropolis(ArrayStepShared):
-    """Metropolis-Hastings sampling step"""
+    """Metropolis-Hastings sampling step."""
 
     name = "metropolis"
 
@@ -161,7 +161,7 @@ def __init__(
         rng=None,
         **kwargs,
     ):
-        """Create an instance of a Metropolis stepper
+        """Create an instance of a Metropolis stepper.
 
         Parameters
         ----------
@@ -324,7 +324,7 @@ def competence(var, has_grad):
 def tune(scale, acc_rate):
     """
     Tunes the scaling parameter for the proposal distribution
-    according to the acceptance rate over the last tune_interval:
+    according to the acceptance rate over the last tune_interval.
 
     Rate    Variance adaptation
     ----    -------------------
@@ -380,7 +380,7 @@ class BinaryMetropolisState(StepMethodState):
 
 
 class BinaryMetropolis(ArrayStep):
-    """Metropolis-Hastings optimized for binary variables
+    """Metropolis-Hastings optimized for binary variables.
 
     Parameters
     ----------
@@ -489,7 +489,7 @@ class BinaryGibbsMetropolisState(StepMethodState):
 
 
 class BinaryGibbsMetropolis(ArrayStep):
-    """A Metropolis-within-Gibbs step method optimized for binary variables
+    """A Metropolis-within-Gibbs step method optimized for binary variables.
 
     Parameters
     ----------
diff --git a/pymc/testing.py b/pymc/testing.py
index e9ded1824ea..b33e1cef2b5 100644
--- a/pymc/testing.py
+++ b/pymc/testing.py
@@ -311,7 +311,7 @@ def check_logp(
     skip_paramdomain_outside_edge_test: bool = False,
 ) -> None:
     """
-    Generic test for PyMC logp methods
+    Generic test for PyMC logp methods.
 
     Test PyMC logp and equivalent scipy logpmf/logpdf methods give similar
     results for valid values and parameters inside the supported edges.
@@ -421,7 +421,7 @@ def check_logcdf(
     skip_paramdomain_outside_edge_test: bool = False,
 ) -> None:
     """
-    Generic test for PyMC logcdf methods
+    Generic test for PyMC logcdf methods.
 
     The following tests are performed by default:
         1. Test PyMC logcdf and equivalent scipy logcdf methods give similar
@@ -536,7 +536,7 @@ def check_icdf(
     n_samples: int = 100,
 ) -> None:
     """
-    Generic test for PyMC icdf methods
+    Generic test for PyMC icdf methods.
 
     The following tests are performed by default:
         1. Test PyMC icdf and equivalent scipy icdf (ppf) methods give similar
@@ -633,9 +633,7 @@ def check_selfconsistency_discrete_logcdf(
     decimal: int | None = None,
     n_samples: int = 100,
 ) -> None:
-    """
-    Check that logcdf of discrete distributions matches sum of logps up to value.
-    """
+    """Check that logcdf of discrete distributions matches sum of logps up to value."""
     if decimal is None:
         decimal = select_by_precision(float64=6, float32=3)
 
diff --git a/pymc/tuning/scaling.py b/pymc/tuning/scaling.py
index df3cf071c21..459b3a40aa7 100644
--- a/pymc/tuning/scaling.py
+++ b/pymc/tuning/scaling.py
@@ -109,7 +109,7 @@ def eig_recompose(val, vec):
 
 def trace_cov(trace, vars=None, model=None):
     """
-    Calculate the flattened covariance matrix using a sample trace
+    Calculate the flattened covariance matrix using a sample trace.
 
     Useful if you want to base your covariance matrix for further sampling on some initial samples.
 
diff --git a/pymc/tuning/starting.py b/pymc/tuning/starting.py
index cb8ae010d75..5f0bcbb8546 100644
--- a/pymc/tuning/starting.py
+++ b/pymc/tuning/starting.py
@@ -13,7 +13,7 @@
 #   limitations under the License.
 
 """
-Created on Mar 12, 2011
+Created on Mar 12, 2011.
 
 @author: johnsalvatier
 """
diff --git a/pymc/util.py b/pymc/util.py
index 7733d41b608..c05bbcde5eb 100644
--- a/pymc/util.py
+++ b/pymc/util.py
@@ -71,7 +71,7 @@ def __repr__(self):
 
 
 def withparent(meth):
-    """Helper wrapper that passes calls to parent's instance"""
+    """Helper wrapper that passes calls to parent's instance."""
 
     def wrapped(self, *args, **kwargs):
         res = meth(self, *args, **kwargs)
@@ -89,7 +89,7 @@ def wrapped(self, *args, **kwargs):
 class treelist(list):
     """A list that passes mutable extending operations used in Model
     to parent list instance.
-    Extending treelist you will also extend its parent
+    Extending treelist you will also extend its parent.
     """
 
     def __init__(self, iterable=(), parent=None):
@@ -134,7 +134,7 @@ def __imul__(self, other) -> "treelist":
 class treedict(dict):
     """A dict that passes mutable extending operations used in Model
     to parent dict instance.
-    Extending treedict you will also extend its parent
+    Extending treedict you will also extend its parent.
     """
 
     def __init__(self, iterable=(), parent=None, **kwargs):
@@ -160,7 +160,7 @@ def tree_contains(self, item):
 
 def get_transformed_name(name, transform):
     r"""
-    Consistent way of transforming names
+    Consistent way of transforming names.
 
     Parameters
     ----------
@@ -179,7 +179,7 @@ def get_transformed_name(name, transform):
 
 def is_transformed_name(name):
     r"""
-    Quickly check if a name was transformed with `get_transformed_name`
+    Quickly check if a name was transformed with `get_transformed_name`.
 
     Parameters
     ----------
@@ -196,7 +196,7 @@ def is_transformed_name(name):
 
 def get_untransformed_name(name):
     r"""
-    Undo transformation in `get_transformed_name`. Throws ValueError if name wasn't transformed
+    Undo transformation in `get_transformed_name`. Throws ValueError if name wasn't transformed.
 
     Parameters
     ----------
@@ -369,7 +369,7 @@ def cf(self):
 
 
 def check_dist_not_registered(dist, model=None):
-    """Check that a dist is not registered in the model already"""
+    """Check that a dist is not registered in the model already."""
     from pymc.model import modelcontext
 
     try:
@@ -531,7 +531,7 @@ def makeiter(a):
 
 
 class CustomProgress(Progress):
-    """A child of Progress that allows to disable progress bars and its container
+    """A child of Progress that allows to disable progress bars and its container.
 
     The implementation simply checks an `is_enabled` flag and generates the progress bar only if
     it's `True`.
diff --git a/pymc/variational/approximations.py b/pymc/variational/approximations.py
index 1f42e552855..1800d9a6aa2 100644
--- a/pymc/variational/approximations.py
+++ b/pymc/variational/approximations.py
@@ -43,7 +43,7 @@ class MeanFieldGroup(Group):
     R"""Mean Field approximation to the posterior where spherical Gaussian family
     is fitted to minimize KL divergence from True posterior. It is assumed
     that latent space variables are uncorrelated that is the main drawback
-    of the method
+    of the method.
     """
 
     __param_spec__ = {"mu": ("d",), "rho": ("d",)}
@@ -189,7 +189,7 @@ def symbolic_random(self):
 @Group.register
 class EmpiricalGroup(Group):
     """Builds Approximation instance from a given trace,
-    it has the same interface as variational approximation
+    it has the same interface as variational approximation.
     """
 
     has_logq = False
@@ -330,7 +330,7 @@ def sample_approx(approx, draws=100, include_transformed=True):
 
 # single group shortcuts exported to user
 class SingleGroupApproximation(Approximation):
-    """Base class for Single Group Approximation"""
+    """Base class for Single Group Approximation."""
 
     _group_class: type | None = None
 
diff --git a/pymc/variational/callbacks.py b/pymc/variational/callbacks.py
index 820e9d7b847..5e3393fb690 100644
--- a/pymc/variational/callbacks.py
+++ b/pymc/variational/callbacks.py
@@ -43,7 +43,7 @@ def absolute(current: np.ndarray, prev: np.ndarray) -> np.ndarray:
 
 
 class CheckParametersConvergence(Callback):
-    """Convergence stopping check
+    """Convergence stopping check.
 
     Parameters
     ----------
@@ -93,7 +93,7 @@ def flatten_shared(shared_list):
 
 class Tracker(Callback):
     """
-    Helper class to record arbitrary stats during VI
+    Helper class to record arbitrary stats during VI.
 
     It is possible to pass a function that takes no arguments
     If call fails then (approx, hist, i) are passed
diff --git a/pymc/variational/inference.py b/pymc/variational/inference.py
index 9a400bb1e6e..3dcb59b5910 100644
--- a/pymc/variational/inference.py
+++ b/pymc/variational/inference.py
@@ -45,7 +45,7 @@
 
 
 class Inference:
-    r"""**Base class for Variational Inference**
+    r"""**Base class for Variational Inference**.
 
     Communicates Operator, Approximation and Test Function to build Objective Function
 
@@ -101,7 +101,7 @@ def fit(
         progressbar_theme=default_progress_theme,
         **kwargs,
     ):
-        """Perform Operator Variational Inference
+        """Perform Operator Variational Inference.
 
         Parameters
         ----------
@@ -206,7 +206,7 @@ def _iterate_without_loss(self, s, n, step_func, progressbar, progressbar_theme,
 
     def _iterate_with_loss(self, s, n, step_func, progressbar, progressbar_theme, callbacks):
         def _infmean(input_array):
-            """Return the mean of the finite values of the array"""
+            """Return the mean of the finite values of the array."""
             input_array = input_array[np.isfinite(input_array)].astype("float64")
             if len(input_array) == 0:
                 return np.nan
@@ -285,7 +285,7 @@ def _infmean(input_array):
         return State(i + s, step=step_func, callbacks=callbacks, score=True)
 
     def refine(self, n, progressbar=True, progressbar_theme=default_progress_theme):
-        """Refine the solution using the last compiled step function"""
+        """Refine the solution using the last compiled step function."""
         if self.state is None:
             raise TypeError("Need to call `.fit` first")
         i, step, callbacks, score = self.state
@@ -299,7 +299,7 @@ def refine(self, n, progressbar=True, progressbar_theme=default_progress_theme):
 
 
 class KLqp(Inference):
-    r"""**Kullback Leibler Divergence Inference**
+    r"""**Kullback Leibler Divergence Inference**.
 
     General approach to fit Approximations that define :math:`logq`
     by maximizing ELBO (Evidence Lower Bound). In some cases
@@ -328,7 +328,7 @@ def __init__(self, approx, beta=1.0):
 
 
 class ADVI(KLqp):
-    r"""**Automatic Differentiation Variational Inference (ADVI)**
+    r"""**Automatic Differentiation Variational Inference (ADVI)**.
 
     This class implements the meanfield ADVI, where the variational
     posterior distribution is assumed to be spherical Gaussian without
@@ -472,7 +472,7 @@ def __init__(self, *args, **kwargs):
 
 
 class FullRankADVI(KLqp):
-    r"""**Full Rank Automatic Differentiation Variational Inference (ADVI)**
+    r"""**Full Rank Automatic Differentiation Variational Inference (ADVI)**.
 
     Parameters
     ----------
@@ -501,7 +501,7 @@ def __init__(self, *args, **kwargs):
 
 
 class ImplicitGradient(Inference):
-    """**Implicit Gradient for Variational Inference**
+    """**Implicit Gradient for Variational Inference**.
 
     **not suggested to use**
 
@@ -517,7 +517,7 @@ def __init__(self, approx, estimator=KSD, kernel=test_functions.rbf, **kwargs):
 
 
 class SVGD(ImplicitGradient):
-    r"""**Stein Variational Gradient Descent**
+    r"""**Stein Variational Gradient Descent**.
 
     This inference is based on Kernelized Stein Discrepancy
     it's main idea is to move initial noisy particles so that
@@ -585,7 +585,7 @@ def __init__(
 
 
 class ASVGD(ImplicitGradient):
-    r"""**Amortized Stein Variational Gradient Descent**
+    r"""**Amortized Stein Variational Gradient Descent**.
 
     **not suggested to use**
 
@@ -679,7 +679,7 @@ def fit(
     inf_kwargs=None,
     **kwargs,
 ):
-    r"""Handy shortcut for using inference methods in functional way
+    r"""Handy shortcut for using inference methods in functional way.
 
     Parameters
     ----------
diff --git a/pymc/variational/minibatch_rv.py b/pymc/variational/minibatch_rv.py
index 21c16ed2a1c..f3ecccf2952 100644
--- a/pymc/variational/minibatch_rv.py
+++ b/pymc/variational/minibatch_rv.py
@@ -25,7 +25,7 @@
 
 
 class MinibatchRandomVariable(MeasurableOp, Op):
-    """RV whose logprob should be rescaled to match total_size"""
+    """RV whose logprob should be rescaled to match total_size."""
 
     __props__ = ()
     view_map = {0: [0]}
diff --git a/pymc/variational/operators.py b/pymc/variational/operators.py
index f6ef0957234..fc1226be1fc 100644
--- a/pymc/variational/operators.py
+++ b/pymc/variational/operators.py
@@ -32,7 +32,7 @@
 
 
 class KL(Operator):
-    R"""**Operator based on Kullback Leibler Divergence**
+    R"""**Operator based on Kullback Leibler Divergence**.
 
     This operator constructs Evidence Lower Bound (ELBO) objective
 
@@ -67,7 +67,7 @@ def apply(self, f):
 
 
 class KSDObjective(ObjectiveFunction):
-    R"""Helper class for construction loss and updates for variational inference
+    R"""Helper class for construction loss and updates for variational inference.
 
     Parameters
     ----------
@@ -104,7 +104,7 @@ def __call__(self, nmc, **kwargs) -> list[Variable]:
 
 
 class KSD(Operator):
-    R"""**Operator based on Kernelized Stein Discrepancy**
+    R"""**Operator based on Kernelized Stein Discrepancy**.
 
     Input: A target distribution with density function :math:`p(x)`
         and a set of initial particles :math:`\{x^0_i\}^n_{i=1}`
diff --git a/pymc/variational/opvi.py b/pymc/variational/opvi.py
index a66c87f7d5f..03432e7cc58 100644
--- a/pymc/variational/opvi.py
+++ b/pymc/variational/opvi.py
@@ -91,27 +91,27 @@
 
 
 class VariationalInferenceError(Exception):
-    """Exception for VI specific cases"""
+    """Exception for VI specific cases."""
 
 
 class NotImplementedInference(VariationalInferenceError, NotImplementedError):
-    """Marking non functional parts of code"""
+    """Marking non functional parts of code."""
 
 
 class ExplicitInferenceError(VariationalInferenceError, TypeError):
-    """Exception for bad explicit inference"""
+    """Exception for bad explicit inference."""
 
 
 class AEVBInferenceError(VariationalInferenceError, TypeError):
-    """Exception for bad aevb inference"""
+    """Exception for bad aevb inference."""
 
 
 class ParametrizationError(VariationalInferenceError, ValueError):
-    """Error raised in case of bad parametrization"""
+    """Error raised in case of bad parametrization."""
 
 
 class GroupError(VariationalInferenceError, TypeError):
-    """Error related to VI groups"""
+    """Error related to VI groups."""
 
 
 def _known_scan_ignored_inputs(terms):
@@ -142,7 +142,7 @@ def inner(*args, **kwargs):
 
 
 def node_property(f):
-    """A shortcut for wrapping method to accessible tensor"""
+    """A shortcut for wrapping method to accessible tensor."""
     if isinstance(f, str):
 
         def wrapper(fn):
@@ -179,7 +179,7 @@ def try_to_set_test_value(node_in, node_out, s):
 
 
 class ObjectiveUpdates(pytensor.OrderedUpdates):
-    """OrderedUpdates extension for storing loss"""
+    """OrderedUpdates extension for storing loss."""
 
     loss = None
 
@@ -189,7 +189,7 @@ def _warn_not_used(smth, where):
 
 
 class ObjectiveFunction:
-    """Helper class for construction loss and updates for variational inference
+    """Helper class for construction loss and updates for variational inference.
 
     Parameters
     ----------
@@ -220,7 +220,7 @@ def updates(
         total_grad_norm_constraint=None,
     ):
         """Calculate gradients for objective function, test function and then
-        constructs updates for optimization step
+        constructs updates for optimization step.
 
         Parameters
         ----------
@@ -397,7 +397,7 @@ def step_function(
     def score_function(
         self, sc_n_mc=None, more_replacements=None, fn_kwargs=None
     ):  # pragma: no cover
-        R"""Compile scoring function that operates which takes no inputs and returns Loss
+        R"""Compile scoring function that operates which takes no inputs and returns Loss.
 
         Parameters
         ----------
@@ -434,7 +434,7 @@ def __call__(self, nmc, **kwargs):
 
 
 class Operator:
-    R"""**Base class for Operator**
+    R"""**Base class for Operator**.
 
     Parameters
     ----------
@@ -473,7 +473,7 @@ def __init__(self, approx):
     model = property(lambda self: self.approx.model)
 
     def apply(self, f):  # pragma: no cover
-        R"""Operator itself
+        R"""Operator itself.
 
         .. math::
 
@@ -514,7 +514,7 @@ def __str__(self):  # pragma: no cover
 
 def collect_shared_to_list(params):
     """Helper function for getting a list from
-    usable representation of parameters
+    usable representation of parameters.
 
     Parameters
     ----------
@@ -561,7 +561,7 @@ def from_function(cls, f):
 
 
 class Group(WithMemoization):
-    R"""**Base class for grouping variables in VI**
+    R"""**Base class for grouping variables in VI**.
 
     Grouped Approximation is used for modelling mutual dependencies
     for a specified group of variables. Base for local and global group.
@@ -777,7 +777,7 @@ def get_param_spec_for(cls, **kwargs):
 
     def _check_user_params(self, **kwargs):
         R"""*Dev* - checks user params, allocates them if they are correct, returns True.
-        If they are not present, returns False
+        If they are not present, returns False.
 
         Parameters
         ----------
@@ -807,7 +807,7 @@ def _check_user_params(self, **kwargs):
         return True
 
     def _initial_type(self, name):
-        R"""*Dev* - initial type with given name. The correct type depends on `self.batched`
+        R"""*Dev* - initial type with given name. The correct type depends on `self.batched`.
 
         Parameters
         ----------
@@ -821,7 +821,7 @@ def _initial_type(self, name):
         return pt.matrix(name)
 
     def _input_type(self, name):
-        R"""*Dev* - input type with given name. The correct type depends on `self.batched`
+        R"""*Dev* - input type with given name. The correct type depends on `self.batched`.
 
         Parameters
         ----------
@@ -874,7 +874,7 @@ def __init_group__(self, group):
             start_idx += size
 
     def _finalize_init(self):
-        """*Dev* - clean up after init"""
+        """*Dev* - clean up after init."""
         del self._kwargs
 
     @property
@@ -894,7 +894,7 @@ def params(self):
             return collect_shared_to_list(self.shared_params)
 
     def _new_initial_shape(self, size, dim, more_replacements=None):
-        """*Dev* - correctly proceeds sampling with variable batch size
+        """*Dev* - correctly proceeds sampling with variable batch size.
 
         Parameters
         ----------
@@ -920,7 +920,7 @@ def ddim(self):
         return sum(s.stop - s.start for _, s, _, _ in self.ordering.values())
 
     def _new_initial(self, size, deterministic, more_replacements=None):
-        """*Dev* - allocates new initial random generator
+        """*Dev* - allocates new initial random generator.
 
         Parameters
         ----------
@@ -993,7 +993,7 @@ def set_size_and_deterministic(
         self, node: Variable | list[Variable], s, d: bool, more_replacements: dict | None = None
     ) -> Variable | list[Variable]:
         """*Dev* - after node is sampled via :func:`symbolic_sample_over_posterior` or
-        :func:`symbolic_single_sample` new random generator can be allocated and applied to node
+        :func:`symbolic_single_sample` new random generator can be allocated and applied to node.
 
         Parameters
         ----------
@@ -1020,12 +1020,12 @@ def set_size_and_deterministic(
         return node_out
 
     def to_flat_input(self, node):
-        """*Dev* - replace vars with flattened view stored in `self.inputs`"""
+        """*Dev* - replace vars with flattened view stored in `self.inputs`."""
         return graph_replace(node, self.replacements, strict=False)
 
     def symbolic_sample_over_posterior(self, node):
         """*Dev* - performs sampling of node applying independent samples from posterior each time.
-        Note that it is done symbolically and this node needs :func:`set_size_and_deterministic` call
+        Note that it is done symbolically and this node needs :func:`set_size_and_deterministic` call.
         """
         node = self.to_flat_input(node)
         random = self.symbolic_random.astype(self.symbolic_initial.dtype)
@@ -1043,7 +1043,7 @@ def sample(post, *_):
     def symbolic_single_sample(self, node):
         """*Dev* - performs sampling of node applying single sample from posterior.
         Note that it is done symbolically and this node needs
-        :func:`set_size_and_deterministic` call with `size=1`
+        :func:`set_size_and_deterministic` call with `size=1`.
         """
         node = self.to_flat_input(node)
         random = self.symbolic_random.astype(self.symbolic_initial.dtype)
@@ -1051,7 +1051,7 @@ def symbolic_single_sample(self, node):
 
     def make_size_and_deterministic_replacements(self, s, d, more_replacements=None):
         """*Dev* - creates correct replacements for initial depending on
-        sample size and deterministic flag
+        sample size and deterministic flag.
 
         Parameters
         ----------
@@ -1081,7 +1081,7 @@ def make_size_and_deterministic_replacements(self, s, d, more_replacements=None)
 
     @node_property
     def symbolic_normalizing_constant(self):
-        """*Dev* - normalizing constant for `self.logq`, scales it to `minibatch_size` instead of `total_size`"""
+        """*Dev* - normalizing constant for `self.logq`, scales it to `minibatch_size` instead of `total_size`."""
         t = self.to_flat_input(
             pt.max(
                 [
@@ -1099,23 +1099,23 @@ def symbolic_normalizing_constant(self):
     def symbolic_logq_not_scaled(self):
         """*Dev* - symbolically computed logq for `self.symbolic_random`
         computations can be more efficient since all is known beforehand including
-        `self.symbolic_random`
+        `self.symbolic_random`.
         """
         raise NotImplementedError  # shape (s,)
 
     @node_property
     def symbolic_logq(self):
-        """*Dev* - correctly scaled `self.symbolic_logq_not_scaled`"""
+        """*Dev* - correctly scaled `self.symbolic_logq_not_scaled`."""
         return self.symbolic_logq_not_scaled
 
     @node_property
     def logq(self):
-        """*Dev* - Monte Carlo estimate for group `logQ`"""
+        """*Dev* - Monte Carlo estimate for group `logQ`."""
         return self.symbolic_logq.mean(0)
 
     @node_property
     def logq_norm(self):
-        """*Dev* - Monte Carlo estimate for group `logQ` normalized"""
+        """*Dev* - Monte Carlo estimate for group `logQ` normalized."""
         return self.logq / self.symbolic_normalizing_constant
 
     def __str__(self):
@@ -1127,17 +1127,17 @@ def __str__(self):
 
     @node_property
     def std(self) -> pt.TensorVariable:
-        """Standard deviation of the latent variables as an unstructured 1-dimensional tensor variable"""
+        """Standard deviation of the latent variables as an unstructured 1-dimensional tensor variable."""
         raise NotImplementedError()
 
     @node_property
     def cov(self) -> pt.TensorVariable:
-        """Covariance between the latent variables as an unstructured 2-dimensional tensor variable"""
+        """Covariance between the latent variables as an unstructured 2-dimensional tensor variable."""
         raise NotImplementedError()
 
     @node_property
     def mean(self) -> pt.TensorVariable:
-        """Mean of the latent variables as an unstructured 1-dimensional tensor variable"""
+        """Mean of the latent variables as an unstructured 1-dimensional tensor variable."""
         raise NotImplementedError()
 
     def var_to_data(self, shared: pt.TensorVariable) -> xarray.Dataset:
@@ -1160,12 +1160,12 @@ def var_to_data(self, shared: pt.TensorVariable) -> xarray.Dataset:
 
     @property
     def mean_data(self) -> xarray.Dataset:
-        """Mean of the latent variables as an xarray Dataset"""
+        """Mean of the latent variables as an xarray Dataset."""
         return self.var_to_data(self.mean)
 
     @property
     def std_data(self) -> xarray.Dataset:
-        """Standard deviation of the latent variables as an xarray Dataset"""
+        """Standard deviation of the latent variables as an xarray Dataset."""
         return self.var_to_data(self.std)
 
 
@@ -1174,7 +1174,7 @@ def std_data(self) -> xarray.Dataset:
 
 
 class Approximation(WithMemoization):
-    """**Wrapper for grouped approximations**
+    """**Wrapper for grouped approximations**.
 
     Wraps list of groups, creates an Approximation instance that collects
     sampled variables from all the groups, also collects logQ needed for
@@ -1240,7 +1240,7 @@ def collect(self, item):
 
     @property
     def scale_cost_to_minibatch(self):
-        """*Dev* - Property to control scaling cost to minibatch"""
+        """*Dev* - Property to control scaling cost to minibatch."""
         return bool(self._scale_cost_to_minibatch.get_value())
 
     @scale_cost_to_minibatch.setter
@@ -1250,7 +1250,7 @@ def scale_cost_to_minibatch(self, value):
     @node_property
     def symbolic_normalizing_constant(self):
         """*Dev* - normalizing constant for `self.logq`, scales it to `minibatch_size` instead of `total_size`.
-        Here the effect is controlled by `self.scale_cost_to_minibatch`
+        Here the effect is controlled by `self.scale_cost_to_minibatch`.
         """
         t = pt.max(
             self.collect("symbolic_normalizing_constant")
@@ -1265,22 +1265,22 @@ def symbolic_normalizing_constant(self):
 
     @node_property
     def symbolic_logq(self):
-        """*Dev* - collects `symbolic_logq` for all groups"""
+        """*Dev* - collects `symbolic_logq` for all groups."""
         return pt.add(*self.collect("symbolic_logq"))
 
     @node_property
     def logq(self):
-        """*Dev* - collects `logQ` for all groups"""
+        """*Dev* - collects `logQ` for all groups."""
         return pt.add(*self.collect("logq"))
 
     @node_property
     def logq_norm(self):
-        """*Dev* - collects `logQ` for all groups and normalizes it"""
+        """*Dev* - collects `logQ` for all groups and normalizes it."""
         return self.logq / self.symbolic_normalizing_constant
 
     @node_property
     def _sized_symbolic_varlogp_and_datalogp(self):
-        """*Dev* - computes sampled prior term from model via `pytensor.scan`"""
+        """*Dev* - computes sampled prior term from model via `pytensor.scan`."""
         varlogp_s, datalogp_s = self.symbolic_sample_over_posterior(
             [self.model.varlogp, self.model.datalogp]
         )
@@ -1288,86 +1288,86 @@ def _sized_symbolic_varlogp_and_datalogp(self):
 
     @node_property
     def sized_symbolic_varlogp(self):
-        """*Dev* - computes sampled prior term from model via `pytensor.scan`"""
+        """*Dev* - computes sampled prior term from model via `pytensor.scan`."""
         return self._sized_symbolic_varlogp_and_datalogp[0]  # shape (s,)
 
     @node_property
     def sized_symbolic_datalogp(self):
-        """*Dev* - computes sampled data term from model via `pytensor.scan`"""
+        """*Dev* - computes sampled data term from model via `pytensor.scan`."""
         return self._sized_symbolic_varlogp_and_datalogp[1]  # shape (s,)
 
     @node_property
     def sized_symbolic_logp(self):
-        """*Dev* - computes sampled logP from model via `pytensor.scan`"""
+        """*Dev* - computes sampled logP from model via `pytensor.scan`."""
         return self.sized_symbolic_varlogp + self.sized_symbolic_datalogp  # shape (s,)
 
     @node_property
     def logp(self):
-        """*Dev* - computes :math:`E_{q}(logP)` from model via `pytensor.scan` that can be optimized later"""
+        """*Dev* - computes :math:`E_{q}(logP)` from model via `pytensor.scan` that can be optimized later."""
         return self.varlogp + self.datalogp
 
     @node_property
     def varlogp(self):
-        """*Dev* - computes :math:`E_{q}(prior term)` from model via `pytensor.scan` that can be optimized later"""
+        """*Dev* - computes :math:`E_{q}(prior term)` from model via `pytensor.scan` that can be optimized later."""
         return self.sized_symbolic_varlogp.mean(0)
 
     @node_property
     def datalogp(self):
-        """*Dev* - computes :math:`E_{q}(data term)` from model via `pytensor.scan` that can be optimized later"""
+        """*Dev* - computes :math:`E_{q}(data term)` from model via `pytensor.scan` that can be optimized later."""
         return self.sized_symbolic_datalogp.mean(0)
 
     @node_property
     def _single_symbolic_varlogp_and_datalogp(self):
-        """*Dev* - computes sampled prior term from model via `pytensor.scan`"""
+        """*Dev* - computes sampled prior term from model via `pytensor.scan`."""
         varlogp, datalogp = self.symbolic_single_sample([self.model.varlogp, self.model.datalogp])
         return varlogp, datalogp
 
     @node_property
     def single_symbolic_varlogp(self):
         """*Dev* - for single MC sample estimate of :math:`E_{q}(prior term)` `pytensor.scan`
-        is not needed and code can be optimized
+        is not needed and code can be optimized.
         """
         return self._single_symbolic_varlogp_and_datalogp[0]
 
     @node_property
     def single_symbolic_datalogp(self):
         """*Dev* - for single MC sample estimate of :math:`E_{q}(data term)` `pytensor.scan`
-        is not needed and code can be optimized
+        is not needed and code can be optimized.
         """
         return self._single_symbolic_varlogp_and_datalogp[1]
 
     @node_property
     def single_symbolic_logp(self):
         """*Dev* - for single MC sample estimate of :math:`E_{q}(logP)` `pytensor.scan`
-        is not needed and code can be optimized
+        is not needed and code can be optimized.
         """
         return self.single_symbolic_datalogp + self.single_symbolic_varlogp
 
     @node_property
     def logp_norm(self):
-        """*Dev* - normalized :math:`E_{q}(logP)`"""
+        """*Dev* - normalized :math:`E_{q}(logP)`."""
         return self.logp / self.symbolic_normalizing_constant
 
     @node_property
     def varlogp_norm(self):
-        """*Dev* - normalized :math:`E_{q}(prior term)`"""
+        """*Dev* - normalized :math:`E_{q}(prior term)`."""
         return self.varlogp / self.symbolic_normalizing_constant
 
     @node_property
     def datalogp_norm(self):
-        """*Dev* - normalized :math:`E_{q}(data term)`"""
+        """*Dev* - normalized :math:`E_{q}(data term)`."""
         return self.datalogp / self.symbolic_normalizing_constant
 
     @property
     def replacements(self):
-        """*Dev* - all replacements from groups to replace PyMC random variables with approximation"""
+        """*Dev* - all replacements from groups to replace PyMC random variables with approximation."""
         return collections.OrderedDict(
             itertools.chain.from_iterable(g.replacements.items() for g in self.groups)
         )
 
     def make_size_and_deterministic_replacements(self, s, d, more_replacements=None):
         """*Dev* - creates correct replacements for initial depending on
-        sample size and deterministic flag
+        sample size and deterministic flag.
 
         Parameters
         ----------
@@ -1393,7 +1393,7 @@ def make_size_and_deterministic_replacements(self, s, d, more_replacements=None)
     @pytensor.config.change_flags(compute_test_value="off")
     def set_size_and_deterministic(self, node, s, d, more_replacements=None):
         """*Dev* - after node is sampled via :func:`symbolic_sample_over_posterior` or
-        :func:`symbolic_single_sample` new random generator can be allocated and applied to node
+        :func:`symbolic_single_sample` new random generator can be allocated and applied to node.
 
         Parameters
         ----------
@@ -1420,14 +1420,14 @@ def set_size_and_deterministic(self, node, s, d, more_replacements=None):
         return node
 
     def to_flat_input(self, node, more_replacements=None):
-        """*Dev* - replace vars with flattened view stored in `self.inputs`"""
+        """*Dev* - replace vars with flattened view stored in `self.inputs`."""
         more_replacements = more_replacements or {}
         node = graph_replace(node, more_replacements, strict=False)
         return graph_replace(node, self.replacements, strict=False)
 
     def symbolic_sample_over_posterior(self, node, more_replacements=None):
         """*Dev* - performs sampling of node applying independent samples from posterior each time.
-        Note that it is done symbolically and this node needs :func:`set_size_and_deterministic` call
+        Note that it is done symbolically and this node needs :func:`set_size_and_deterministic` call.
         """
         node = self.to_flat_input(node)
 
@@ -1443,7 +1443,7 @@ def sample(*post):
     def symbolic_single_sample(self, node, more_replacements=None):
         """*Dev* - performs sampling of node applying single sample from posterior.
         Note that it is done symbolically and this node needs
-        :func:`set_size_and_deterministic` call with `size=1`
+        :func:`set_size_and_deterministic` call with `size=1`.
         """
         node = self.to_flat_input(node, more_replacements=more_replacements)
         post = [v[0] for v in self.symbolic_randoms]
@@ -1463,7 +1463,7 @@ def get_optimization_replacements(self, s, d):
 
     @pytensor.config.change_flags(compute_test_value="off")
     def sample_node(self, node, size=None, deterministic=False, more_replacements=None):
-        """Samples given node or nodes over shared posterior
+        """Samples given node or nodes over shared posterior.
 
         Parameters
         ----------
@@ -1498,7 +1498,7 @@ def sample_node(self, node, size=None, deterministic=False, more_replacements=No
 
     def rslice(self, name):
         """*Dev* - vectorized sampling for named random variable without call to `pytensor.scan`.
-        This node still needs :func:`set_size_and_deterministic` to be evaluated
+        This node still needs :func:`set_size_and_deterministic` to be evaluated.
         """
 
         def vars_names(vs):
diff --git a/pymc/variational/test_functions.py b/pymc/variational/test_functions.py
index 303c6cc0903..26ad0619316 100644
--- a/pymc/variational/test_functions.py
+++ b/pymc/variational/test_functions.py
@@ -21,8 +21,8 @@
 
 
 class Kernel(TestFunction):
-    """
-    Dummy base class for kernel SVGD in case we implement more
+    r"""
+    Dummy base class for kernel SVGD in case we implement more.
 
     .. math::
 
diff --git a/pymc/variational/updates.py b/pymc/variational/updates.py
index d919a7e24c6..eec732b41d7 100644
--- a/pymc/variational/updates.py
+++ b/pymc/variational/updates.py
@@ -136,7 +136,7 @@
 
 
 def get_or_compute_grads(loss_or_grads, params):
-    """Helper function returning a list of gradients
+    """Helper function returning a list of gradients.
 
     Parameters
     ----------
@@ -185,7 +185,7 @@ def _get_call_kwargs(_locals_):
 
 
 def sgd(loss_or_grads=None, params=None, learning_rate=1e-3):
-    """Stochastic Gradient Descent (SGD) updates
+    """Stochastic Gradient Descent (SGD) updates.
 
     Generates update expressions of the form:
 
@@ -238,7 +238,7 @@ def sgd(loss_or_grads=None, params=None, learning_rate=1e-3):
 
 
 def apply_momentum(updates, params=None, momentum=0.9):
-    """Returns a modified update dictionary including momentum
+    """Returns a modified update dictionary including momentum.
 
     Generates update expressions of the form:
 
@@ -285,7 +285,7 @@ def apply_momentum(updates, params=None, momentum=0.9):
 
 
 def momentum(loss_or_grads=None, params=None, learning_rate=1e-3, momentum=0.9):
-    """Stochastic Gradient Descent (SGD) updates with momentum
+    """Stochastic Gradient Descent (SGD) updates with momentum.
 
     Generates update expressions of the form:
 
@@ -345,7 +345,7 @@ def momentum(loss_or_grads=None, params=None, learning_rate=1e-3, momentum=0.9):
 
 
 def apply_nesterov_momentum(updates, params=None, momentum=0.9):
-    """Returns a modified update dictionary including Nesterov momentum
+    """Returns a modified update dictionary including Nesterov momentum.
 
     Generates update expressions of the form:
 
@@ -398,7 +398,7 @@ def apply_nesterov_momentum(updates, params=None, momentum=0.9):
 
 
 def nesterov_momentum(loss_or_grads=None, params=None, learning_rate=1e-3, momentum=0.9):
-    """Stochastic Gradient Descent (SGD) updates with Nesterov momentum
+    """Stochastic Gradient Descent (SGD) updates with Nesterov momentum.
 
     Generates update expressions of the form:
 
@@ -463,7 +463,7 @@ def nesterov_momentum(loss_or_grads=None, params=None, learning_rate=1e-3, momen
 
 
 def adagrad(loss_or_grads=None, params=None, learning_rate=1.0, epsilon=1e-6):
-    """Adagrad updates
+    r"""Adagrad updates.
 
     Scale learning rates by dividing with the square root of accumulated
     squared gradients. See [1]_ for further description.
@@ -541,7 +541,7 @@ def adagrad(loss_or_grads=None, params=None, learning_rate=1.0, epsilon=1e-6):
 
 def adagrad_window(loss_or_grads=None, params=None, learning_rate=0.001, epsilon=0.1, n_win=10):
     """Returns a function that returns parameter updates.
-    Instead of accumulated estimate, uses running window
+    Instead of accumulated estimate, uses running window.
 
     Parameters
     ----------
@@ -585,7 +585,7 @@ def adagrad_window(loss_or_grads=None, params=None, learning_rate=0.001, epsilon
 
 
 def rmsprop(loss_or_grads=None, params=None, learning_rate=1.0, rho=0.9, epsilon=1e-6):
-    """RMSProp updates
+    r"""RMSProp updates.
 
     Scale learning rates by dividing with the moving average of the root mean
     squared (RMS) gradients. See [1]_ for further description.
@@ -666,7 +666,7 @@ def rmsprop(loss_or_grads=None, params=None, learning_rate=1.0, rho=0.9, epsilon
 
 
 def adadelta(loss_or_grads=None, params=None, learning_rate=1.0, rho=0.95, epsilon=1e-6):
-    r"""Adadelta updates
+    r"""Adadelta updates.
 
     Scale learning rates by the ratio of accumulated gradients to accumulated
     updates, see [1]_ and notes for further description.
@@ -772,7 +772,7 @@ def adadelta(loss_or_grads=None, params=None, learning_rate=1.0, rho=0.95, epsil
 def adam(
     loss_or_grads=None, params=None, learning_rate=0.001, beta1=0.9, beta2=0.999, epsilon=1e-8
 ):
-    """Adam updates
+    """Adam updates.
 
     Adam updates implemented as in [1]_.
 
@@ -859,7 +859,7 @@ def adam(
 def adamax(
     loss_or_grads=None, params=None, learning_rate=0.002, beta1=0.9, beta2=0.999, epsilon=1e-8
 ):
-    """Adamax updates
+    """Adamax updates.
 
     Adamax updates implemented as in [1]_. This is a variant of the Adam
     algorithm based on the infinity norm.
@@ -941,7 +941,7 @@ def adamax(
 
 
 def norm_constraint(tensor_var, max_norm, norm_axes=None, epsilon=1e-7):
-    """Max weight norm constraints and gradient clipping
+    """Max weight norm constraints and gradient clipping.
 
     This takes a TensorVariable and rescales it so that incoming weight
     norms are below a specified constraint value. Vectors violating the
@@ -1016,7 +1016,7 @@ def norm_constraint(tensor_var, max_norm, norm_axes=None, epsilon=1e-7):
 
 
 def total_norm_constraint(tensor_vars, max_norm, epsilon=1e-7, return_norm=False):
-    """Rescales a list of tensors based on their combined norm
+    """Rescales a list of tensors based on their combined norm.
 
     If the combined norm of the input tensors exceeds the threshold then all
     tensors are rescaled such that the combined norm is equal to the threshold.
diff --git a/scripts/generate_pip_deps_from_conda.py b/scripts/generate_pip_deps_from_conda.py
index 6ad6dac6bc6..7d72737b2d9 100755
--- a/scripts/generate_pip_deps_from_conda.py
+++ b/scripts/generate_pip_deps_from_conda.py
@@ -31,7 +31,7 @@
 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
 """
-Check requirements-dev.txt has been generated from conda-envs/environment-dev.yml
+Check requirements-dev.txt has been generated from conda-envs/environment-dev.yml.
 
 This is intended to be used as a pre-commit hook, see `.pre-commit-config.yaml`.
 You can run it manually with `pre-commit run pip-from-conda --all`.
diff --git a/setupegg.py b/setupegg.py
index e179aafc064..c263f958458 100755
--- a/setupegg.py
+++ b/setupegg.py
@@ -13,9 +13,7 @@
 #   limitations under the License.
 
 #!/usr/bin/env python
-"""
-A setup.py script to use setuptools, which gives egg goodness, etc.
-"""
+"""A setup.py script to use setuptools, which gives egg goodness, etc."""
 
 with open("setup.py") as s:
     exec(s.read())

From 6aa47a3533f46bf7176a9b6a56a57fd36ca20cda Mon Sep 17 00:00:00 2001
From: Virgile Andreani <virgile@pymc-devs.org>
Date: Mon, 7 Oct 2024 18:31:26 +0200
Subject: [PATCH 3/9] Add docstrings to all modules (D104)

---
 benchmarks/benchmarks/__init__.py | 2 ++
 pymc/__init__.py                  | 2 ++
 pymc/distributions/__init__.py    | 2 ++
 pymc/gp/__init__.py               | 2 ++
 pymc/logprob/__init__.py          | 2 ++
 pymc/model/__init__.py            | 3 +++
 pymc/model/transform/__init__.py  | 2 ++
 pymc/sampling/__init__.py         | 2 ++
 pymc/smc/__init__.py              | 2 ++
 pymc/step_methods/__init__.py     | 2 ++
 pymc/step_methods/hmc/__init__.py | 2 ++
 pymc/tuning/__init__.py           | 2 ++
 pymc/variational/__init__.py      | 2 ++
 13 files changed, 27 insertions(+)

diff --git a/benchmarks/benchmarks/__init__.py b/benchmarks/benchmarks/__init__.py
index ae0da7db238..1217c81ed2f 100644
--- a/benchmarks/benchmarks/__init__.py
+++ b/benchmarks/benchmarks/__init__.py
@@ -11,3 +11,5 @@
 #   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 #   See the License for the specific language governing permissions and
 #   limitations under the License.
+
+"""Benchmarks for PyMC."""
diff --git a/pymc/__init__.py b/pymc/__init__.py
index 83d147a3a95..a828b72827f 100644
--- a/pymc/__init__.py
+++ b/pymc/__init__.py
@@ -13,6 +13,8 @@
 #   limitations under the License.
 
 
+"""PyMC: Bayesian Modeling and Probabilistic Programming in Python."""
+
 import logging
 
 _log = logging.getLogger(__name__)
diff --git a/pymc/distributions/__init__.py b/pymc/distributions/__init__.py
index d5b23bfbaf6..4d208835640 100644
--- a/pymc/distributions/__init__.py
+++ b/pymc/distributions/__init__.py
@@ -12,6 +12,8 @@
 #   See the License for the specific language governing permissions and
 #   limitations under the License.
 
+"""Probability distributions."""
+
 from pymc.distributions.censored import Censored
 from pymc.distributions.continuous import (
     AsymmetricLaplace,
diff --git a/pymc/gp/__init__.py b/pymc/gp/__init__.py
index 633562d7d22..15a49efeb6e 100644
--- a/pymc/gp/__init__.py
+++ b/pymc/gp/__init__.py
@@ -12,6 +12,8 @@
 #   See the License for the specific language governing permissions and
 #   limitations under the License.
 
+"""Gaussian Processes."""
+
 from pymc.gp import cov, mean, util
 from pymc.gp.gp import (
     TP,
diff --git a/pymc/logprob/__init__.py b/pymc/logprob/__init__.py
index bed9ee3a9c8..aaa8b2052d7 100644
--- a/pymc/logprob/__init__.py
+++ b/pymc/logprob/__init__.py
@@ -34,6 +34,8 @@
 #   OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
 #   SOFTWARE.
 
+"""Conversion of PyMC graphs into logp graphs."""
+
 from pymc.logprob.basic import (
     conditional_logp,
     icdf,
diff --git a/pymc/model/__init__.py b/pymc/model/__init__.py
index d6316898adc..4caa7013786 100644
--- a/pymc/model/__init__.py
+++ b/pymc/model/__init__.py
@@ -11,5 +11,8 @@
 #   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 #   See the License for the specific language governing permissions and
 #   limitations under the License.
+
+"""Model object."""
+
 from pymc.model.core import *
 from pymc.model.core import ValueGradFunction
diff --git a/pymc/model/transform/__init__.py b/pymc/model/transform/__init__.py
index ae0da7db238..008e6f8ff09 100644
--- a/pymc/model/transform/__init__.py
+++ b/pymc/model/transform/__init__.py
@@ -11,3 +11,5 @@
 #   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 #   See the License for the specific language governing permissions and
 #   limitations under the License.
+
+"""Model transforms."""
diff --git a/pymc/sampling/__init__.py b/pymc/sampling/__init__.py
index 547250cd58b..bb5206ecc8f 100644
--- a/pymc/sampling/__init__.py
+++ b/pymc/sampling/__init__.py
@@ -12,6 +12,8 @@
 #   See the License for the specific language governing permissions and
 #   limitations under the License.
 
+"""MCMC samplers."""
+
 from pymc.sampling.deterministic import compute_deterministics
 from pymc.sampling.forward import *
 from pymc.sampling.mcmc import *
diff --git a/pymc/smc/__init__.py b/pymc/smc/__init__.py
index 4608b39ce75..4d6f90eab31 100644
--- a/pymc/smc/__init__.py
+++ b/pymc/smc/__init__.py
@@ -12,6 +12,8 @@
 #   See the License for the specific language governing permissions and
 #   limitations under the License.
 
+"""Sequential Monte Carlo samplers."""
+
 from pymc.smc.kernels import IMH, MH
 from pymc.smc.sampling import sample_smc
 
diff --git a/pymc/step_methods/__init__.py b/pymc/step_methods/__init__.py
index 5f44acc728c..47fabc10ddd 100644
--- a/pymc/step_methods/__init__.py
+++ b/pymc/step_methods/__init__.py
@@ -12,6 +12,8 @@
 #   See the License for the specific language governing permissions and
 #   limitations under the License.
 
+"""Step methods."""
+
 from pymc.step_methods.compound import BlockedStep, CompoundStep
 from pymc.step_methods.hmc import NUTS, HamiltonianMC
 from pymc.step_methods.metropolis import (
diff --git a/pymc/step_methods/hmc/__init__.py b/pymc/step_methods/hmc/__init__.py
index c6f0d2b8b9d..8ec9f91ace4 100644
--- a/pymc/step_methods/hmc/__init__.py
+++ b/pymc/step_methods/hmc/__init__.py
@@ -12,5 +12,7 @@
 #   See the License for the specific language governing permissions and
 #   limitations under the License.
 
+"""Hamiltonian Monte Carlo."""
+
 from pymc.step_methods.hmc.hmc import HamiltonianMC
 from pymc.step_methods.hmc.nuts import NUTS
diff --git a/pymc/tuning/__init__.py b/pymc/tuning/__init__.py
index a00dd3feed6..f2920849b92 100644
--- a/pymc/tuning/__init__.py
+++ b/pymc/tuning/__init__.py
@@ -12,5 +12,7 @@
 #   See the License for the specific language governing permissions and
 #   limitations under the License.
 
+"""Tuning phase."""
+
 from pymc.tuning.scaling import find_hessian, guess_scaling, trace_cov
 from pymc.tuning.starting import find_MAP
diff --git a/pymc/variational/__init__.py b/pymc/variational/__init__.py
index 0ba558f58fa..785fb11cb08 100644
--- a/pymc/variational/__init__.py
+++ b/pymc/variational/__init__.py
@@ -12,6 +12,8 @@
 #   See the License for the specific language governing permissions and
 #   limitations under the License.
 
+"""Variational Monte Carlo."""
+
 # commonly used
 from pymc.variational import (
     approximations,

From 2a05955138052a69816c986985b79ac77eef4b17 Mon Sep 17 00:00:00 2001
From: Virgile Andreani <virgile@pymc-devs.org>
Date: Mon, 7 Oct 2024 20:29:41 +0200
Subject: [PATCH 4/9] Add a docstring to all magic methods (D105)

---
 pymc/backends/base.py           |  7 +++++++
 pymc/backends/mcbackend.py      |  1 +
 pymc/backends/ndarray.py        |  1 +
 pymc/data.py                    |  4 ++++
 pymc/distributions/truncated.py |  1 +
 pymc/gp/gp.py                   |  4 ++++
 pymc/gp/hsgp_approx.py          |  1 +
 pymc/logprob/abstract.py        |  1 +
 pymc/logprob/scan.py            |  1 +
 pymc/logprob/transforms.py      |  1 +
 pymc/logprob/utils.py           |  1 +
 pymc/model/core.py              | 10 ++++++++--
 pymc/sampling/parallel.py       |  5 +++++
 pymc/testing.py                 |  3 +++
 pymc/util.py                    | 11 +++++++++++
 pymc/variational/callbacks.py   |  1 +
 pymc/variational/opvi.py        |  4 ++++
 17 files changed, 55 insertions(+), 2 deletions(-)

diff --git a/pymc/backends/base.py b/pymc/backends/base.py
index 09fc59f7850..912aed842cf 100644
--- a/pymc/backends/base.py
+++ b/pymc/backends/base.py
@@ -55,6 +55,7 @@ class IBaseTrace(ABC, Sized):
     """Sampler stats for each sampler."""
 
     def __len__(self):
+        """Length of the chain."""
         raise NotImplementedError()
 
     def get_values(self, varname: str, burn=0, thin=1) -> np.ndarray:
@@ -208,6 +209,7 @@ def setup(self, draws, chain, sampler_vars=None) -> None:
     # Selection methods
 
     def __getitem__(self, idx):
+        """Get the sample at index `idx`."""
         if isinstance(idx, slice):
             return self._slice(idx)
 
@@ -339,6 +341,7 @@ def __init__(self, straces: Sequence[IBaseTrace]):
         self._report = SamplerReport()
 
     def __repr__(self):
+        """Return a string representation of MultiTrace."""
         template = "<{}: {} chains, {} iterations, {} variables>"
         return template.format(self.__class__.__name__, self.nchains, len(self), len(self.varnames))
 
@@ -355,9 +358,11 @@ def report(self) -> SamplerReport:
         return self._report
 
     def __iter__(self):
+        """Return an iterator of the MultiTrace."""
         raise NotImplementedError
 
     def __getitem__(self, idx):
+        """Get the sample at index `idx`."""
         if isinstance(idx, slice):
             return self._slice(idx)
 
@@ -393,6 +398,7 @@ def __getitem__(self, idx):
     _attrs = {"_straces", "varnames", "chains", "stat_names", "_report"}
 
     def __getattr__(self, name):
+        """Get the value of the attribute of name `name`."""
         # Avoid infinite recursion when called before __init__
         # variables are set up (e.g., when pickling).
         if name in self._attrs:
@@ -412,6 +418,7 @@ def __getattr__(self, name):
         raise AttributeError(f"'{type(self).__name__}' object has no attribute '{name}'")
 
     def __len__(self):
+        """Length of the chains."""
         chain = self.chains[-1]
         return len(self._straces[chain])
 
diff --git a/pymc/backends/mcbackend.py b/pymc/backends/mcbackend.py
index 3e6dd8ba812..268ae712a17 100644
--- a/pymc/backends/mcbackend.py
+++ b/pymc/backends/mcbackend.py
@@ -124,6 +124,7 @@ def record(self, draw: Mapping[str, np.ndarray], stats: Sequence[Mapping[str, An
         return self._chain.append(value_dict, stats_dict)
 
     def __len__(self):
+        """Length of the chain."""
         return len(self._chain)
 
     def get_values(self, varname: str, burn=0, thin=1) -> np.ndarray:
diff --git a/pymc/backends/ndarray.py b/pymc/backends/ndarray.py
index 7997bf1e97f..016a70d7d3a 100644
--- a/pymc/backends/ndarray.py
+++ b/pymc/backends/ndarray.py
@@ -138,6 +138,7 @@ def close(self):
     # Selection methods
 
     def __len__(self):
+        """Length of the chain."""
         if not self.samples:  # `setup` has not been called.
             return 0
         return self.draw_idx
diff --git a/pymc/data.py b/pymc/data.py
index 0e92c9e809e..0f3c2fb27ca 100644
--- a/pymc/data.py
+++ b/pymc/data.py
@@ -108,6 +108,7 @@ def __init__(self, generator):
 
     # python3 generator
     def __next__(self):
+        """Next value in the generator."""
         if not self._yielded_test_value:
             self._yielded_test_value = True
             return self.test_value
@@ -118,12 +119,15 @@ def __next__(self):
     next = __next__
 
     def __iter__(self):
+        """Return an iterator."""
         return self
 
     def __eq__(self, other):
+        """Return true if both objects are actually the same."""
         return id(self) == id(other)
 
     def __hash__(self):
+        """Return a hash of the object."""
         return hash(id(self))
 
 
diff --git a/pymc/distributions/truncated.py b/pymc/distributions/truncated.py
index 051a57798e0..8fd380dfbba 100644
--- a/pymc/distributions/truncated.py
+++ b/pymc/distributions/truncated.py
@@ -239,6 +239,7 @@ def __init__(self, msg=""):
         super().__init__(TruncationError, msg)
 
     def __str__(self):
+        """Return a string representation of the object."""
         return f"TruncationCheck{{{self.msg}}}"
 
 
diff --git a/pymc/gp/gp.py b/pymc/gp/gp.py
index 131b00c542f..b8d95321ea0 100644
--- a/pymc/gp/gp.py
+++ b/pymc/gp/gp.py
@@ -334,6 +334,7 @@ def __init__(self, *, mean_func=Zero(), scale_func=Constant(0.0), cov_func=None,
         super().__init__(mean_func=mean_func, cov_func=scale_func)
 
     def __add__(self, other):
+        """Add two Student's T processes."""
         raise TypeError("Student's T processes aren't additive")
 
     def _build_prior(self, name, X, reparameterize=True, jitter=JITTER_DEFAULT, **kwargs):
@@ -773,6 +774,7 @@ def __init__(self, approx="VFE", *, mean_func=Zero(), cov_func=Constant(0.0)):
         super().__init__(mean_func=mean_func, cov_func=cov_func)
 
     def __add__(self, other):
+        """Add two Gaussian processes."""
         new_gp = super().__add__(other)
         if not self.approx == other.approx:
             raise TypeError("Cannot add GPs with different approximations")
@@ -1004,6 +1006,7 @@ def __init__(self, *, mean_func=Zero(), cov_funcs=(Constant(0.0))):
         super().__init__(mean_func=mean_func, cov_func=cov_func)
 
     def __add__(self, other):
+        """Add two Gaussian processes."""
         raise TypeError("Additive, Kronecker-structured processes not implemented")
 
     def _build_prior(self, name, Xs, jitter, **kwargs):
@@ -1165,6 +1168,7 @@ def __init__(self, *, mean_func=Zero(), cov_funcs=(Constant(0.0))):
         super().__init__(mean_func=mean_func, cov_func=cov_func)
 
     def __add__(self, other):
+        """Add two Gaussian processes."""
         raise TypeError("Additive, Kronecker-structured processes not implemented")
 
     def _build_marginal_likelihood(self, Xs):
diff --git a/pymc/gp/hsgp_approx.py b/pymc/gp/hsgp_approx.py
index b0b08045900..a023da10bc7 100644
--- a/pymc/gp/hsgp_approx.py
+++ b/pymc/gp/hsgp_approx.py
@@ -310,6 +310,7 @@ def __init__(
         super().__init__(mean_func=mean_func, cov_func=cov_func)
 
     def __add__(self, other):
+        """Add two HSGPs."""
         raise NotImplementedError("Additive HSGPs aren't supported.")
 
     @property
diff --git a/pymc/logprob/abstract.py b/pymc/logprob/abstract.py
index 927cd50cf2d..9fac05651df 100644
--- a/pymc/logprob/abstract.py
+++ b/pymc/logprob/abstract.py
@@ -164,6 +164,7 @@ def __init__(self, scalar_op, *args, **kwargs):
         super().__init__(scalar_op, *args, **kwargs)
 
     def __str__(self):
+        """Return a string representation of the object."""
         return f"Measurable{super().__str__()}"
 
 
diff --git a/pymc/logprob/scan.py b/pymc/logprob/scan.py
index c295bec0536..ecd04b9c790 100644
--- a/pymc/logprob/scan.py
+++ b/pymc/logprob/scan.py
@@ -68,6 +68,7 @@ class MeasurableScan(MeasurableOp, Scan):
     """A placeholder used to specify a log-likelihood for a scan sub-graph."""
 
     def __str__(self):
+        """Return a string representation of the object."""
         return f"Measurable{super().__str__()}"
 
 
diff --git a/pymc/logprob/transforms.py b/pymc/logprob/transforms.py
index 4ae78fc7809..c7d1bde7706 100644
--- a/pymc/logprob/transforms.py
+++ b/pymc/logprob/transforms.py
@@ -154,6 +154,7 @@ def log_jac_det(self, value: TensorVariable, *inputs) -> TensorVariable:
             return pt.log(pt.abs(pt.nlinalg.det(pt.atleast_2d(jacobian(phi_inv, [value])[0]))))
 
     def __str__(self):
+        """Return a string representation of the object."""
         return f"{self.__class__.__name__}"
 
 
diff --git a/pymc/logprob/utils.py b/pymc/logprob/utils.py
index 93353a86c31..e96426fbe8c 100644
--- a/pymc/logprob/utils.py
+++ b/pymc/logprob/utils.py
@@ -215,6 +215,7 @@ def __init__(self, msg: str = "", can_be_replaced_by_ninf: bool = False):
         self.can_be_replaced_by_ninf = can_be_replaced_by_ninf
 
     def __str__(self):
+        """Return a string representation of the object."""
         return f"Check{{{self.msg}}}"
 
 
diff --git a/pymc/model/core.py b/pymc/model/core.py
index 81fc4fa13c6..fef1f7e6c9a 100644
--- a/pymc/model/core.py
+++ b/pymc/model/core.py
@@ -504,9 +504,11 @@ class Model(WithMemoization, metaclass=ContextMeta):
 
     if TYPE_CHECKING:
 
-        def __enter__(self: Self) -> Self: ...
+        def __enter__(self: Self) -> Self:
+            """Enter the context manager."""
 
-        def __exit__(self, exc_type: None, exc_val: None, exc_tb: None) -> None: ...
+        def __exit__(self, exc_type: None, exc_val: None, exc_tb: None) -> None:
+            """Exit the context manager."""
 
     def __new__(cls, *args, model: Union[Literal[UNSET], None, "Model"] = UNSET, **kwargs):
         # resolves the parent instance
@@ -1571,6 +1573,7 @@ def name_of(self, name):
             return name
 
     def __getitem__(self, key):
+        """Get the variable named `key`."""
         try:
             return self.named_vars[key]
         except KeyError as e:
@@ -1580,12 +1583,15 @@ def __getitem__(self, key):
                 raise e
 
     def __contains__(self, key):
+        """Check if the model contains a variable named `key`."""
         return key in self.named_vars or self.name_for(key) in self.named_vars
 
     def __copy__(self):
+        """Clone the model."""
         return self.copy()
 
     def __deepcopy__(self, _):
+        """Clone the model."""
         return self.copy()
 
     def copy(self):
diff --git a/pymc/sampling/parallel.py b/pymc/sampling/parallel.py
index 4b76e53a977..191111101c8 100644
--- a/pymc/sampling/parallel.py
+++ b/pymc/sampling/parallel.py
@@ -50,6 +50,7 @@ def __init__(self, tb):
         self.tb = tb
 
     def __str__(self):
+        """Return a string representation of the object."""
         return self.tb
 
 
@@ -61,6 +62,7 @@ def __init__(self, exc, tb):
         self.tb = f'\n"""\n{tb}"""'
 
     def __reduce__(self):
+        """Return a tuple to pickle."""
         return rebuild_exc, (self.exc, self.tb)
 
 
@@ -460,6 +462,7 @@ def _make_active(self):
             self._active.append(proc)
 
     def __iter__(self):
+        """Return an iterator over draws."""
         if not self._in_context:
             raise ValueError("Use ParallelSampler as context manager.")
         self._make_active()
@@ -504,10 +507,12 @@ def __iter__(self):
                 yield Draw(proc.chain, is_last, draw, tuning, stats, point)
 
     def __enter__(self):
+        """Enter the context manager."""
         self._in_context = True
         return self
 
     def __exit__(self, *args):
+        """Exit the context manager."""
         ProcessAdapter.terminate_all(self._samplers)
 
 
diff --git a/pymc/testing.py b/pymc/testing.py
index b33e1cef2b5..d6cc495665a 100644
--- a/pymc/testing.py
+++ b/pymc/testing.py
@@ -114,6 +114,7 @@ def __init__(self, vals, dtype=pytensor.config.floatX, edges=None, shape=None):
         self.dtype = dtype
 
     def __add__(self, other):
+        """Add two domains."""
         return Domain(
             [v + other for v in self.vals],
             self.dtype,
@@ -122,6 +123,7 @@ def __add__(self, other):
         )
 
     def __mul__(self, other):
+        """Multiply two domains."""
         try:
             return Domain(
                 [v * other for v in self.vals],
@@ -138,6 +140,7 @@ def __mul__(self, other):
             )
 
     def __neg__(self):
+        """Negate one domain."""
         return Domain([-v for v in self.vals], self.dtype, (-self.lower, -self.upper), self.shape)
 
 
diff --git a/pymc/util.py b/pymc/util.py
index c05bbcde5eb..22dd499051c 100644
--- a/pymc/util.py
+++ b/pymc/util.py
@@ -113,6 +113,7 @@ def tree_contains(self, item):
             return list.__contains__(self, item)
 
     def __setitem__(self, key, value):
+        """Set value at index `key` with value `value`."""
         raise NotImplementedError(
             "Method is removed as we are not able to determine appropriate logic for it"
         )
@@ -121,9 +122,11 @@ def __setitem__(self, key, value):
     # This is my best guess about what this should do.  I might be happier
     # to kill both of these if they are not used.
     def __mul__(self, other) -> "treelist":
+        """Multiplication."""
         return cast("treelist", super().__mul__(other))
 
     def __imul__(self, other) -> "treelist":
+        """Inplace multiplication."""
         t0 = len(self)
         super().__imul__(other)
         if self.parent is not None:
@@ -334,25 +337,31 @@ def __init__(self, obj):
         self.obj = obj
 
     def __hash__(self):
+        """Return a hash of the object."""
         return hashable(self.obj)
 
     def __eq__(self, other):
+        """Compare this object with `other`."""
         return self.obj == other
 
     def __repr__(self):
+        """Return a string representation of the object."""
         return f"{type(self).__name__}({self.obj})"
 
 
 class WithMemoization:
     def __hash__(self):
+        """Return a hash of the object."""
         return hash(id(self))
 
     def __getstate__(self):
+        """Return an object to pickle."""
         state = self.__dict__.copy()
         state.pop("_cache", None)
         return state
 
     def __setstate__(self, state):
+        """Set the object from a pickled object."""
         self.__dict__.update(state)
 
 
@@ -543,11 +552,13 @@ def __init__(self, *args, **kwargs):
             super().__init__(*args, **kwargs)
 
     def __enter__(self):
+        """Enter the context manager."""
         if self.is_enabled:
             self.start()
         return self
 
     def __exit__(self, exc_type, exc_val, exc_tb):
+        """Exit the context manager."""
         if self.is_enabled:
             super().__exit__(exc_type, exc_val, exc_tb)
 
diff --git a/pymc/variational/callbacks.py b/pymc/variational/callbacks.py
index 5e3393fb690..2fe4aa7f1d2 100644
--- a/pymc/variational/callbacks.py
+++ b/pymc/variational/callbacks.py
@@ -149,6 +149,7 @@ def clear(self):
         self.hist = collections.defaultdict(list)
 
     def __getitem__(self, item):
+        """Get the element at index `item`."""
         return self.hist[item]
 
     __call__ = record
diff --git a/pymc/variational/opvi.py b/pymc/variational/opvi.py
index 03432e7cc58..91d1f23a09d 100644
--- a/pymc/variational/opvi.py
+++ b/pymc/variational/opvi.py
@@ -509,6 +509,7 @@ def __call__(self, f=None):
         return self.objective_class(self, f)
 
     def __str__(self):  # pragma: no cover
+        """Return a string representation of the object."""
         return f"{self.__class__.__name__}[{self.approx.__class__.__name__}]"
 
 
@@ -836,6 +837,7 @@ def _input_type(self, name):
 
     @pytensor.config.change_flags(compute_test_value="off")
     def __init_group__(self, group):
+        """Initialize the group."""
         if not group:
             raise GroupError("Got empty group")
         if self.group is None:
@@ -1119,6 +1121,7 @@ def logq_norm(self):
         return self.logq / self.symbolic_normalizing_constant
 
     def __str__(self):
+        """Return a string representation for the object."""
         if self.group is None:
             shp = "undefined"
         else:
@@ -1591,6 +1594,7 @@ def symbolic_random(self):
         return pt.concatenate(self.collect("symbolic_random"), axis=-1)
 
     def __str__(self):
+        """Return a string representation of the object."""
         if len(self.groups) < 5:
             return "Approximation{" + " & ".join(map(str, self.groups)) + "}"
         else:

From 1526d214c993814132752fee819e4a634d8299e3 Mon Sep 17 00:00:00 2001
From: Virgile Andreani <virgile@pymc-devs.org>
Date: Mon, 7 Oct 2024 20:42:31 +0200
Subject: [PATCH 5/9] Remove file encoding statements

---
 pymc/distributions/continuous.py   | 1 -
 pymc/distributions/multivariate.py | 3 ---
 pymc/distributions/shape_utils.py  | 1 -
 3 files changed, 5 deletions(-)

diff --git a/pymc/distributions/continuous.py b/pymc/distributions/continuous.py
index 8b15f1aa60b..9ec2daa4df8 100644
--- a/pymc/distributions/continuous.py
+++ b/pymc/distributions/continuous.py
@@ -14,7 +14,6 @@
 
 # Contains code from AePPL, Copyright (c) 2021-2022, Aesara Developers.
 
-# coding: utf-8
 """
 A collection of common probability distributions for stochastic
 nodes in PyMC.
diff --git a/pymc/distributions/multivariate.py b/pymc/distributions/multivariate.py
index 4438c6ff550..951c08367fb 100644
--- a/pymc/distributions/multivariate.py
+++ b/pymc/distributions/multivariate.py
@@ -12,9 +12,6 @@
 #   See the License for the specific language governing permissions and
 #   limitations under the License.
 
-#!/usr/bin/env python
-# -*- coding: utf-8 -*-
-
 import warnings
 
 from functools import partial, reduce
diff --git a/pymc/distributions/shape_utils.py b/pymc/distributions/shape_utils.py
index 9f3219a2f1c..165f26f3bca 100644
--- a/pymc/distributions/shape_utils.py
+++ b/pymc/distributions/shape_utils.py
@@ -12,7 +12,6 @@
 #   See the License for the specific language governing permissions and
 #   limitations under the License.
 
-# -*- coding: utf-8 -*-
 """
 A collection of common shape operations needed for broadcasting
 samples from probability distributions for stochastic nodes in PyMC.

From c40e41a17ac8c4a9714dcfc5e4fcafb577abc05f Mon Sep 17 00:00:00 2001
From: Virgile Andreani <virgile@pymc-devs.org>
Date: Tue, 8 Oct 2024 08:07:41 +0200
Subject: [PATCH 6/9] Write docstrings in imperative mood (D401)

---
 pymc/backends/__init__.py              |  4 ++--
 pymc/backends/mcbackend.py             |  4 ++--
 pymc/data.py                           |  6 +++---
 pymc/distributions/dist_math.py        |  6 +++---
 pymc/distributions/distribution.py     |  9 ++++-----
 pymc/distributions/mixture.py          |  6 +++---
 pymc/distributions/multivariate.py     |  5 +++--
 pymc/distributions/shape_utils.py      | 10 +++++++---
 pymc/gp/cov.py                         | 14 +++++++-------
 pymc/gp/gp.py                          | 26 +++++++++++++-------------
 pymc/gp/hsgp_approx.py                 | 11 +++++------
 pymc/gp/util.py                        |  6 +++---
 pymc/initial_point.py                  |  5 +++--
 pymc/logprob/abstract.py               |  6 +++---
 pymc/logprob/checks.py                 |  4 ++--
 pymc/logprob/cumsum.py                 |  2 +-
 pymc/logprob/tensor.py                 |  4 ++--
 pymc/math.py                           |  4 ++--
 pymc/model/core.py                     | 18 +++++++++---------
 pymc/model_graph.py                    | 10 +++++-----
 pymc/ode/ode.py                        |  4 +++-
 pymc/ode/utils.py                      |  4 +++-
 pymc/pytensorf.py                      |  9 +++++----
 pymc/sampling/mcmc.py                  | 15 ++++++++-------
 pymc/sampling/population.py            |  4 ++--
 pymc/smc/kernels.py                    |  2 +-
 pymc/stats/convergence.py              |  6 +++---
 pymc/step_methods/compound.py          |  4 ++--
 pymc/step_methods/metropolis.py        |  4 ++--
 pymc/testing.py                        | 11 ++++++-----
 pymc/tuning/scaling.py                 |  6 +++---
 pymc/tuning/starting.py                |  2 +-
 pymc/util.py                           |  8 ++++----
 pymc/variational/approximations.py     |  2 +-
 pymc/variational/minibatch_rv.py       |  2 +-
 pymc/variational/opvi.py               | 15 +++++++--------
 pymc/variational/updates.py            | 12 ++++++------
 scripts/check_all_tests_are_covered.py |  2 +-
 scripts/run_mypy.py                    |  2 +-
 39 files changed, 142 insertions(+), 132 deletions(-)

diff --git a/pymc/backends/__init__.py b/pymc/backends/__init__.py
index 8ef9ed3e8a5..fc1356b8503 100644
--- a/pymc/backends/__init__.py
+++ b/pymc/backends/__init__.py
@@ -101,7 +101,7 @@ def _init_trace(
     model: Model,
     trace_vars: list[TensorVariable] | None = None,
 ) -> BaseTrace:
-    """Initializes a trace backend for a chain."""
+    """Initialize a trace backend for a chain."""
     strace: BaseTrace
     if trace is None:
         strace = NDArray(model=model, vars=trace_vars)
@@ -126,7 +126,7 @@ def init_traces(
     model: Model,
     trace_vars: list[TensorVariable] | None = None,
 ) -> tuple[RunType | None, Sequence[IBaseTrace]]:
-    """Initializes a trace recorder for each chain."""
+    """Initialize a trace recorder for each chain."""
     if HAS_MCB and isinstance(backend, Backend):
         return init_chain_adapters(
             backend=backend,
diff --git a/pymc/backends/mcbackend.py b/pymc/backends/mcbackend.py
index 268ae712a17..3d2c8fd9e7e 100644
--- a/pymc/backends/mcbackend.py
+++ b/pymc/backends/mcbackend.py
@@ -43,7 +43,7 @@
 
 
 def find_data(pmodel: Model) -> list[mcb.DataVariable]:
-    """Extracts data variables from a model."""
+    """Extract data variables from a model."""
     observed_rvs = {pmodel.rvs_to_values[rv] for rv in pmodel.observed_RVs}
     dvars = []
     # All data containers are named vars!
@@ -131,7 +131,7 @@ def get_values(self, varname: str, burn=0, thin=1) -> np.ndarray:
         return self._chain.get_draws(varname, slice(burn, None, thin))
 
     def _get_stats(self, fname: str, slc: slice) -> np.ndarray:
-        """Wraps `self._chain.get_stats` but unpickles automatically."""
+        """Wrap `self._chain.get_stats` but unpickle automatically."""
         values = self._chain.get_stats(fname, slc)
         # Unpickle object stats
         if fname in self._statsbj.object_stats:
diff --git a/pymc/data.py b/pymc/data.py
index 0f3c2fb27ca..717a3e442ad 100644
--- a/pymc/data.py
+++ b/pymc/data.py
@@ -53,7 +53,7 @@
 
 
 def get_data(filename):
-    """Returns a BytesIO object for a package data file.
+    """Return a BytesIO object for a package data file.
 
     Parameters
     ----------
@@ -224,7 +224,7 @@ def determine_coords(
     dims: Sequence[str | None] | None = None,
     coords: dict[str, Sequence | np.ndarray] | None = None,
 ) -> tuple[dict[str, Sequence | np.ndarray], Sequence[str | None]]:
-    """Determines coordinate values from data or the model (via ``dims``)."""
+    """Determine coordinate values from data or the model (via ``dims``)."""
     if coords is None:
         coords = {}
 
@@ -343,7 +343,7 @@ def Data(
     mutable: bool | None = None,
     **kwargs,
 ) -> SharedVariable | TensorConstant:
-    """Data container that registers a data variable with the model.
+    """Create a data container that registers a data variable with the model.
 
     Depending on the ``mutable`` setting (default: True), the variable
     is registered as a :class:`~pytensor.compile.sharedvalue.SharedVariable`,
diff --git a/pymc/distributions/dist_math.py b/pymc/distributions/dist_math.py
index b730f39bf05..a93a9a52a67 100644
--- a/pymc/distributions/dist_math.py
+++ b/pymc/distributions/dist_math.py
@@ -90,7 +90,7 @@ def check_icdf_value(expr: Variable, value: Variable) -> Variable:
 
 
 def logpow(x, m):
-    """Calculates log(x**m) since m*log(x) will fail when m, x = 0."""
+    """Calculate log(x**m) since m*log(x) will fail when m, x = 0."""
     # return m * log(x)
     return pt.switch(pt.eq(x, 0), pt.switch(pt.eq(m, 0), 0.0, -np.inf), m * pt.log(x))
 
@@ -108,7 +108,7 @@ def betaln(x, y):
 
 
 def std_cdf(x):
-    """Calculates the standard normal cumulative distribution function."""
+    """Calculate the standard normal cumulative distribution function."""
     return 0.5 + 0.5 * pt.erf(x / pt.sqrt(2.0))
 
 
@@ -400,7 +400,7 @@ def multigammaln(a, p):
 
 
 def log_i0(x):
-    """Calculates the logarithm of the 0 order modified Bessel function of the first kind."""
+    """Calculate the logarithm of the 0 order modified Bessel function of the first kind."""
     return pt.switch(
         pt.lt(x, 5),
         pt.log1p(
diff --git a/pymc/distributions/distribution.py b/pymc/distributions/distribution.py
index 73532f4faec..442f8ba1180 100644
--- a/pymc/distributions/distribution.py
+++ b/pymc/distributions/distribution.py
@@ -392,7 +392,7 @@ def update(self, node: Apply) -> dict[Variable, Variable]:
         return collect_default_updates_inner_fgraph(node)
 
     def batch_ndim(self, node: Apply) -> int:
-        """Number of dimensions of the distribution's batch shape."""
+        """Return the number of dimensions of the distribution's batch shape."""
         out_ndim = max(getattr(out.type, "ndim", 0) for out in node.outputs)
         return out_ndim - self.ndim_supp
 
@@ -438,7 +438,7 @@ def __new__(
         default_transform=UNSET,
         **kwargs,
     ) -> TensorVariable:
-        """Adds a tensor variable corresponding to a PyMC distribution to the current model.
+        """Add a tensor variable corresponding to a PyMC distribution to the current model.
 
         Note that all remaining kwargs must be compatible with ``.dist()``
 
@@ -531,7 +531,7 @@ def dist(
         shape: Shape | None = None,
         **kwargs,
     ) -> TensorVariable:
-        """Creates a tensor variable corresponding to the `cls` distribution.
+        """Create a tensor variable corresponding to the `cls` distribution.
 
         Parameters
         ----------
@@ -604,8 +604,7 @@ def _support_point(op, rv, *rv_inputs) -> TensorVariable:
 
 
 def support_point(rv: TensorVariable) -> TensorVariable:
-    """Method for choosing a representative point/value
-    that can be used to start optimization or MCMC sampling.
+    """Choose a representative point/value that can be used to start optimization or MCMC sampling.
 
     The only parameter to this function is the RandomVariable
     for which the value is to be derived.
diff --git a/pymc/distributions/mixture.py b/pymc/distributions/mixture.py
index cc819e87e21..1bcf42d7662 100644
--- a/pymc/distributions/mixture.py
+++ b/pymc/distributions/mixture.py
@@ -555,9 +555,9 @@ def dist(cls, w, mu, sigma=None, tau=None, **kwargs):
 
 
 def _zero_inflated_mixture(*, name, nonzero_p, nonzero_dist, **kwargs):
-    """Helper function to create a zero-inflated mixture.
+    """Create a zero-inflated mixture (helper function).
 
-    If name is `None`, this function returns an unregistered variable
+    If name is `None`, this function returns an unregistered variable.
     """
     nonzero_p = pt.as_tensor_variable(nonzero_p)
     weights = pt.stack([1 - nonzero_p, nonzero_p], axis=-1)
@@ -799,7 +799,7 @@ def dist(cls, psi, mu=None, alpha=None, p=None, n=None, **kwargs):
 
 
 def _hurdle_mixture(*, name, nonzero_p, nonzero_dist, dtype, max_n_steps=10_000, **kwargs):
-    """Helper function to create a hurdle mixtures.
+    """Create a hurdle mixtures (helper function).
 
     If name is `None`, this function returns an unregistered variable
 
diff --git a/pymc/distributions/multivariate.py b/pymc/distributions/multivariate.py
index 951c08367fb..dfada28845f 100644
--- a/pymc/distributions/multivariate.py
+++ b/pymc/distributions/multivariate.py
@@ -342,7 +342,7 @@ def precision_mv_normal_logp(op: PrecisionMvNormalRV, value, rng, size, mean, ta
 
 @node_rewriter(tracks=[MvNormalRV])
 def mv_normal_to_precision_mv_normal(fgraph, node):
-    """Replaces MvNormal(mu, inv(tau)) -> PrecisionMvNormal(mu, tau).
+    """Replace MvNormal(mu, inv(tau)) -> PrecisionMvNormal(mu, tau).
 
     This is introduced in logprob rewrites to provide a more efficient logp for a MvNormal
     that is defined by a precision matrix.
@@ -2153,7 +2153,8 @@ def make_node(self, rng, size, mu, W, alpha, tau, W_is_valid):
 
     @classmethod
     def rng_fn(cls, rng: np.random.RandomState, mu, W, alpha, tau, W_is_valid, size):
-        """
+        """Sample a numeric random variate.
+
         Implementation of algorithm from paper
         Havard Rue, 2001. "Fast sampling of Gaussian Markov random fields,"
         Journal of the Royal Statistical Society Series B, Royal Statistical Society,
diff --git a/pymc/distributions/shape_utils.py b/pymc/distributions/shape_utils.py
index 165f26f3bca..10b84bb7a8d 100644
--- a/pymc/distributions/shape_utils.py
+++ b/pymc/distributions/shape_utils.py
@@ -149,7 +149,7 @@ def convert_size(size: Size) -> StrongSize | None:
 
 
 def shape_from_dims(dims: StrongDims, model) -> StrongShape:
-    """Determines shape from a `dims` tuple.
+    """Determine shape from a `dims` tuple.
 
     Parameters
     ----------
@@ -178,7 +178,7 @@ def find_size(
     size: StrongSize | None,
     ndim_supp: int,
 ) -> StrongSize | None:
-    """Determines the size keyword argument for creating a Distribution.
+    """Determine the size keyword argument for creating a Distribution.
 
     Parameters
     ----------
@@ -436,7 +436,11 @@ def get_support_shape_1d(
     observed: Any | None = None,
     support_shape_offset: int = 0,
 ) -> TensorVariable | None:
-    """Helper function for cases when you just care about one dimension."""
+    """
+    Extract the support shapes from shape / dims / observed information.
+
+    Helper function for cases when you just care about one dimension.
+    """
     support_shape_tuple = get_support_shape(
         support_shape=(support_shape,) if support_shape is not None else None,
         shape=shape,
diff --git a/pymc/gp/cov.py b/pymc/gp/cov.py
index 9d3017b163e..e5b0f8cc61b 100644
--- a/pymc/gp/cov.py
+++ b/pymc/gp/cov.py
@@ -114,7 +114,7 @@ def __pow__(self, other) -> "Exponentiated":
         return Exponentiated(self, other)
 
     def __array_wrap__(self, result):
-        """Required to allow radd/rmul by numpy arrays."""
+        """Allow radd/rmul by numpy arrays."""
         result = np.squeeze(result)
         if len(result.shape) <= 1:
             result = result.reshape(1, 1)
@@ -234,7 +234,7 @@ def __init__(self, factor_list: Sequence):
                 self._factor_list.append(factor)
 
     def _merge_factors_cov(self, X, Xs=None, diag=False):
-        """Called to evaluate either all the sums or all the
+        """Evaluate either all the sums or all the
         products of kernels that are possible to evaluate.
         """
         factor_list = []
@@ -264,12 +264,12 @@ def _merge_factors_cov(self, X, Xs=None, diag=False):
         return factor_list
 
     def _merge_factors_psd(self, omega):
-        """Called to evaluatate spectral densities of combination kernels when possible.
+        """Evaluate spectral densities of combination kernels when possible.
 
-        Implements
-        a more restricted set of rules than `_merge_factors_cov` -- just additivity of stationary
-        covariances with defined power spectral densities and multiplication by scalars.  Also, the
-        active_dims for all covariances in the sum must be the same.
+        Implements a more restricted set of rules than `_merge_factors_cov` --
+        just additivity of stationary covariances with defined power spectral
+        densities and multiplication by scalars.  Also, the active_dims for all
+        covariances in the sum must be the same.
         """
         factor_list = []
         for factor in self._factor_list:
diff --git a/pymc/gp/gp.py b/pymc/gp/gp.py
index b8d95321ea0..3445d355377 100644
--- a/pymc/gp/gp.py
+++ b/pymc/gp/gp.py
@@ -47,7 +47,7 @@
 
 
 def _handle_sigma_noise_parameters(sigma, noise):
-    """Helper function for transition of 'noise' parameter to be named 'sigma'."""
+    """Help transition of 'noise' parameter to be named 'sigma'."""
     if (sigma is None and noise is None) or (sigma is not None and noise is not None):
         raise ValueError("'sigma' argument must be specified.")
 
@@ -177,7 +177,7 @@ def _build_prior(
 
     def prior(self, name, X, n_outputs=1, reparameterize=True, jitter=JITTER_DEFAULT, **kwargs):
         R"""
-        Returns the GP prior distribution evaluated over the input
+        Return the GP prior distribution evaluated over the input
         locations `X`.
 
         This is the prior probability over the space
@@ -250,7 +250,7 @@ def _build_conditional(self, Xnew, X, f, cov_total, mean_total, jitter):
 
     def conditional(self, name, Xnew, given=None, jitter=JITTER_DEFAULT, **kwargs):
         R"""
-        Returns the conditional distribution evaluated over new input
+        Return the conditional distribution evaluated over new input
         locations `Xnew`.
 
         Given a set of function values `f` that
@@ -350,7 +350,7 @@ def _build_prior(self, name, X, reparameterize=True, jitter=JITTER_DEFAULT, **kw
 
     def prior(self, name, X, reparameterize=True, jitter=JITTER_DEFAULT, **kwargs):
         R"""
-        Returns the TP prior distribution evaluated over the input
+        Return the TP prior distribution evaluated over the input
         locations `X`.
 
         This is the prior probability over the space
@@ -394,7 +394,7 @@ def _build_conditional(self, Xnew, X, f, jitter):
 
     def conditional(self, name, Xnew, jitter=JITTER_DEFAULT, **kwargs):
         R"""
-        Returns the conditional distribution evaluated over new input
+        Return the conditional distribution evaluated over new input
         locations `Xnew`.
 
         Given a set of function values `f` that
@@ -487,7 +487,7 @@ def marginal_likelihood(
         **kwargs,
     ):
         R"""
-        Returns the marginal likelihood distribution, given the input
+        Return the marginal likelihood distribution, given the input
         locations `X` and the data `y`.
 
         This is the integral over the product of the GP prior and a normal likelihood.
@@ -594,7 +594,7 @@ def conditional(
         self, name, Xnew, pred_noise=False, given=None, jitter=JITTER_DEFAULT, **kwargs
     ):
         R"""
-        Returns the conditional distribution evaluated over new input
+        Return the conditional distribution evaluated over new input
         locations `Xnew`.
 
         Given a set of function values `f` that the GP prior was over, the
@@ -814,7 +814,7 @@ def marginal_likelihood(
         self, name, X, Xu, y, sigma=None, noise=None, jitter=JITTER_DEFAULT, **kwargs
     ):
         R"""
-        Returns the approximate marginal likelihood distribution, given the input
+        Return the approximate marginal likelihood distribution, given the input
         locations `X`, inducing point locations `Xu`, data `y`, and white noise
         standard deviations `sigma`.
 
@@ -906,7 +906,7 @@ def conditional(
         self, name, Xnew, pred_noise=False, given=None, jitter=JITTER_DEFAULT, **kwargs
     ):
         R"""
-        Returns the approximate conditional distribution of the GP evaluated over
+        Return the approximate conditional distribution of the GP evaluated over
         new input locations `Xnew`.
 
         Parameters
@@ -1019,7 +1019,7 @@ def _build_prior(self, name, Xs, jitter, **kwargs):
 
     def prior(self, name, Xs, jitter=JITTER_DEFAULT, **kwargs):
         """
-        Returns the prior distribution evaluated over the input
+        Return the prior distribution evaluated over the input
         locations `Xs`.
 
         Parameters
@@ -1065,7 +1065,7 @@ def _build_conditional(self, Xnew, jitter):
 
     def conditional(self, name, Xnew, jitter=JITTER_DEFAULT, **kwargs):
         """
-        Returns the conditional distribution evaluated over new input
+        Return the conditional distribution evaluated over new input
         locations `Xnew`.
 
         `Xnew` will be split by columns and fed to the relevant
@@ -1188,7 +1188,7 @@ def _check_inputs(self, Xs, y):
 
     def marginal_likelihood(self, name, Xs, y, sigma, is_observed=True, **kwargs):
         """
-        Returns the marginal likelihood distribution, given the input
+        Return the marginal likelihood distribution, given the input
         locations `cartesian(*Xs)` and the data `y`.
 
         Parameters
@@ -1267,7 +1267,7 @@ def _build_conditional(self, Xnew, diag, pred_noise):
 
     def conditional(self, name, Xnew, pred_noise=False, diag=False, **kwargs):
         """
-        Returns the conditional distribution evaluated over new input
+        Return the conditional distribution evaluated over new input
         locations `Xnew`, just as in `Marginal`.
 
         `Xnew` will be split by columns and fed to the relevant
diff --git a/pymc/gp/hsgp_approx.py b/pymc/gp/hsgp_approx.py
index a023da10bc7..abb56adab28 100644
--- a/pymc/gp/hsgp_approx.py
+++ b/pymc/gp/hsgp_approx.py
@@ -93,8 +93,7 @@ def calc_basis_periodic(
 def approx_hsgp_hyperparams(
     x_range: list[float], lengthscale_range: list[float], cov_func: str
 ) -> tuple[int, float]:
-    """Utility function that uses heuristics to recommend minimum `m` and `c` values,
-    based on recommendations from Ruitort-Mayol et. al.
+    """Use heuristics to recommend minimum `m` and `c` values, based on recommendations from Ruitort-Mayol et. al.
 
     In practice, you need to choose `c` large enough to handle the largest lengthscales,
     and `m` large enough to accommodate the smallest lengthscales.  Use your prior on the
@@ -427,7 +426,7 @@ def prior(
         **kwargs,
     ):  # type: ignore
         R"""
-        Returns the (approximate) GP prior distribution evaluated over the input locations `X`.
+        Return the (approximate) GP prior distribution evaluated over the input locations `X`.
         For usage examples, refer to `pm.gp.Latent`.
 
         Parameters
@@ -490,7 +489,7 @@ def _build_conditional(self, Xnew):
 
     def conditional(self, name: str, Xnew: TensorLike, dims: str | None = None):  # type: ignore
         R"""
-        Returns the (approximate) conditional distribution evaluated over new input locations
+        Return the (approximate) conditional distribution evaluated over new input locations
         `Xnew`.
 
         Parameters
@@ -685,7 +684,7 @@ def prior_linearized(self, X: TensorLike):
 
     def prior(self, name: str, X: TensorLike, dims: str | None = None):  # type: ignore
         R"""
-        Returns the (approximate) GP prior distribution evaluated over the input locations `X`.
+        Return the (approximate) GP prior distribution evaluated over the input locations `X`.
         For usage examples, refer to `pm.gp.Latent`.
 
         Parameters
@@ -736,7 +735,7 @@ def _build_conditional(self, Xnew):
 
     def conditional(self, name: str, Xnew: TensorLike, dims: str | None = None):  # type: ignore
         R"""
-        Returns the (approximate) conditional distribution evaluated over new input locations
+        Return the (approximate) conditional distribution evaluated over new input locations
         `Xnew`.
 
         Parameters
diff --git a/pymc/gp/util.py b/pymc/gp/util.py
index 53ba91aa9e7..b7d074baa70 100644
--- a/pymc/gp/util.py
+++ b/pymc/gp/util.py
@@ -76,7 +76,7 @@ def replace_with_values(vars_needed, replacements=None, model=None):
 
 def stabilize(K, jitter=JITTER_DEFAULT):
     R"""
-    Adds small diagonal to a covariance matrix.
+    Add small diagonal to a covariance matrix.
 
     Often the matrices calculated from covariance functions, `K = cov_func(X)`
     do not appear numerically to be positive semi-definite.  Adding a small
@@ -131,7 +131,7 @@ def kmeans_inducing_points(n_inducing, X, **kmeans_kwargs):
 
 
 def conditioned_vars(varnames):
-    """Decorator for validating attrs that are conditioned on."""
+    """Validate attrs that are conditioned on."""
 
     def gp_wrapper(cls):
         def make_getter(name):
@@ -174,7 +174,7 @@ def plot_gp_dist(
     fill_kwargs=None,
     samples_kwargs=None,
 ):
-    """A helper function for plotting 1D GP posteriors from trace.
+    """Plot 1D GP posteriors from trace.
 
     Parameters
     ----------
diff --git a/pymc/initial_point.py b/pymc/initial_point.py
index b46d13cb55b..4c40d171cc6 100644
--- a/pymc/initial_point.py
+++ b/pymc/initial_point.py
@@ -35,8 +35,9 @@
 def convert_str_to_rv_dict(
     model, start: StartDict
 ) -> dict[TensorVariable, np.ndarray | Variable | str | None]:
-    """Helper function for converting a user-provided start dict with str keys of (transformed) variable names
+    """Convert a user-provided start dict with str keys of (transformed) variable names
     to a dict mapping the RV tensors to untransformed initvals.
+
     TODO: Deprecate this functionality and only accept TensorVariables as keys.
     """
     initvals = {}
@@ -182,7 +183,7 @@ def make_initial_point_expression(
     default_strategy: str = "support_point",
     return_transformed: bool = False,
 ) -> list[TensorVariable]:
-    """Creates the tensor variables that need to be evaluated to obtain an initial point.
+    """Create the tensor variables that need to be evaluated to obtain an initial point.
 
     Parameters
     ----------
diff --git a/pymc/logprob/abstract.py b/pymc/logprob/abstract.py
index 9fac05651df..f47c39e2b5a 100644
--- a/pymc/logprob/abstract.py
+++ b/pymc/logprob/abstract.py
@@ -76,7 +76,7 @@ def _logprob(
 
 
 def _logprob_helper(rv, *values, **kwargs):
-    """Helper that calls `_logprob` dispatcher."""
+    """Help call `_logprob` dispatcher."""
     logprob = _logprob(rv.owner.op, values, *rv.owner.inputs, **kwargs)
 
     name = rv.name
@@ -109,7 +109,7 @@ def _logcdf(
 
 
 def _logcdf_helper(rv, value, **kwargs):
-    """Helper that calls `_logcdf` dispatcher."""
+    """Help call `_logcdf` dispatcher."""
     logcdf = _logcdf(rv.owner.op, value, *rv.owner.inputs, name=rv.name, **kwargs)
 
     if rv.name:
@@ -134,7 +134,7 @@ def _icdf(
 
 
 def _icdf_helper(rv, value, **kwargs):
-    """Helper that calls `_icdf` dispatcher."""
+    """Help call `_icdf` dispatcher."""
     rv_icdf = _icdf(rv.owner.op, value, *rv.owner.inputs, **kwargs)
 
     if rv.name:
diff --git a/pymc/logprob/checks.py b/pymc/logprob/checks.py
index 59cc7298d7b..c8c21ef61c2 100644
--- a/pymc/logprob/checks.py
+++ b/pymc/logprob/checks.py
@@ -61,7 +61,7 @@ def logprob_specify_shape(op, values, inner_rv, *shapes, **kwargs):
 
 @node_rewriter([SpecifyShape])
 def find_measurable_specify_shapes(fgraph, node) -> list[TensorVariable] | None:
-    r"""Finds `SpecifyShapeOp`\s for which a `logprob` can be computed."""
+    r"""Find `SpecifyShapeOp`\s for which a `logprob` can be computed."""
     if isinstance(node.op, MeasurableSpecifyShape):
         return None  # pragma: no cover
 
@@ -98,7 +98,7 @@ def logprob_check_and_raise(op, values, inner_rv, *assertions, **kwargs):
 
 @node_rewriter([CheckAndRaise])
 def find_measurable_check_and_raise(fgraph, node) -> list[TensorVariable] | None:
-    r"""Finds `AssertOp`\s for which a `logprob` can be computed."""
+    r"""Find `AssertOp`\s for which a `logprob` can be computed."""
     if isinstance(node.op, MeasurableCheckAndRaise):
         return None  # pragma: no cover
 
diff --git a/pymc/logprob/cumsum.py b/pymc/logprob/cumsum.py
index 514500f1b6b..4fd5a6eaeb0 100644
--- a/pymc/logprob/cumsum.py
+++ b/pymc/logprob/cumsum.py
@@ -76,7 +76,7 @@ def logprob_cumsum(op, values, base_rv, **kwargs):
 
 @node_rewriter([CumOp])
 def find_measurable_cumsums(fgraph, node) -> list[TensorVariable] | None:
-    r"""Finds `Cumsums`\s for which a `logprob` can be computed."""
+    r"""Find `Cumsums`\s for which a `logprob` can be computed."""
     if not (isinstance(node.op, CumOp) and node.op.mode == "add"):
         return None
 
diff --git a/pymc/logprob/tensor.py b/pymc/logprob/tensor.py
index 750ace56943..0f4624ca353 100644
--- a/pymc/logprob/tensor.py
+++ b/pymc/logprob/tensor.py
@@ -140,7 +140,7 @@ def logprob_join(op, values, axis, *base_rvs, **kwargs):
 
 @node_rewriter([MakeVector, Join])
 def find_measurable_stacks(fgraph, node) -> list[TensorVariable] | None:
-    r"""Finds `Joins`\s and `MakeVector`\s for which a `logprob` can be computed."""
+    r"""Find `Joins`\s and `MakeVector`\s for which a `logprob` can be computed."""
     from pymc.pytensorf import toposort_replace
 
     if isinstance(node.op, MeasurableOp):
@@ -218,7 +218,7 @@ def logprob_dimshuffle(op: MeasurableDimShuffle, values, base_var, **kwargs):
 
 @node_rewriter([DimShuffle])
 def find_measurable_dimshuffles(fgraph, node) -> list[TensorVariable] | None:
-    r"""Finds `Dimshuffle`\s for which a `logprob` can be computed."""
+    r"""Find `Dimshuffle`\s for which a `logprob` can be computed."""
     from pymc.distributions.distribution import SymbolicRandomVariable
 
     if isinstance(node.op, MeasurableOp):
diff --git a/pymc/math.py b/pymc/math.py
index 9ffe8a6e8fe..705bb8df07e 100644
--- a/pymc/math.py
+++ b/pymc/math.py
@@ -201,7 +201,7 @@ def kronecker(*Ks):
 
 
 def cartesian(*arrays):
-    """Makes the Cartesian product of arrays.
+    """Make the Cartesian product of arrays.
 
     Parameters
     ----------
@@ -264,7 +264,7 @@ def flat_outer(a, b):
 
 
 def kron_diag(*diags):
-    """Returns diagonal of a kronecker product.
+    """Return diagonal of a kronecker product.
 
     Parameters
     ----------
diff --git a/pymc/model/core.py b/pymc/model/core.py
index fef1f7e6c9a..097327f9cfe 100644
--- a/pymc/model/core.py
+++ b/pymc/model/core.py
@@ -989,7 +989,7 @@ def add_coord(
         *,
         length: int | Variable | None = None,
     ):
-        """Registers a dimension coordinate with the model.
+        """Register a dimension coordinate with the model.
 
         Parameters
         ----------
@@ -1090,7 +1090,7 @@ def set_dim(self, name: str, new_length: int, coord_values: Sequence | None = No
         return
 
     def initial_point(self, random_seed: SeedSequenceSeed = None) -> dict[str, np.ndarray]:
-        """Computes the initial point of the model.
+        """Compute the initial point of the model.
 
         Parameters
         ----------
@@ -1106,7 +1106,7 @@ def initial_point(self, random_seed: SeedSequenceSeed = None) -> dict[str, np.nd
         return Point(fn(random_seed), model=self)
 
     def set_initval(self, rv_var, initval):
-        """Sets an initial value (strategy) for a random variable."""
+        """Set an initial value (strategy) for a random variable."""
         if initval is not None and not isinstance(initval, Variable | str):
             # Convert scalars or array-like inputs to ndarrays
             initval = rv_var.type.filter(initval)
@@ -1119,7 +1119,7 @@ def set_data(
         values: Sequence | np.ndarray,
         coords: dict[str, Sequence] | None = None,
     ):
-        """Changes the values of a data variable in the model.
+        """Change the values of a data variable in the model.
 
         In contrast to pm.Data().set_value, this method can also
         update the corresponding coordinates.
@@ -1552,7 +1552,7 @@ def prefix(self) -> str:
         return name
 
     def name_for(self, name):
-        """Checks if name has prefix and adds if needed."""
+        """Check if name has prefix and adds if needed."""
         name = self._validate_name(name)
         if self.prefix:
             if not name.startswith(self.prefix + "::"):
@@ -1563,7 +1563,7 @@ def name_for(self, name):
             return name
 
     def name_of(self, name):
-        """Checks if name has prefix and deletes if needed."""
+        """Check if name has prefix and deletes if needed."""
         name = self._validate_name(name)
         if not self.prefix or not name:
             return name
@@ -1763,7 +1763,7 @@ def update_start_vals(self, a: dict[str, np.ndarray], b: dict[str, np.ndarray]):
         )
 
     def eval_rv_shapes(self) -> dict[str, tuple[int, ...]]:
-        """Evaluates shapes of untransformed AND transformed free variables.
+        """Evaluate shapes of untransformed AND transformed free variables.
 
         Returns
         -------
@@ -1841,7 +1841,7 @@ def check_start_vals(self, start, **kwargs):
                 )
 
     def point_logps(self, point=None, round_vals=2, **kwargs):
-        """Computes the log probability of `point` for all random variables in the model.
+        """Compute the log probability of `point` for all random variables in the model.
 
         Parameters
         ----------
@@ -2115,7 +2115,7 @@ def new_or_existing_block_model_access(*args, **kwargs):
 
 
 def set_data(new_data, model=None, *, coords=None):
-    """Sets the value of one or more data container variables.  Note that the shape is also
+    """Set the value of one or more data container variables.  Note that the shape is also
     dynamic, it is updated when the value is changed.  See the examples below for two common
     use-cases that take advantage of this behavior.
 
diff --git a/pymc/model_graph.py b/pymc/model_graph.py
index bacf38b9178..08589a828c6 100644
--- a/pymc/model_graph.py
+++ b/pymc/model_graph.py
@@ -117,7 +117,7 @@ def __eq__(self, other) -> bool:
 
 
 def default_potential(var: TensorVariable) -> GraphvizNodeKwargs:
-    """Default data for potential in the graph."""
+    """Return default data for potential in the graph."""
     return {
         "shape": "octagon",
         "style": "filled",
@@ -136,7 +136,7 @@ def random_variable_symbol(var: TensorVariable) -> str:
 
 
 def default_free_rv(var: TensorVariable) -> GraphvizNodeKwargs:
-    """Default data for free RV in the graph."""
+    """Return default data for free RV in the graph."""
     symbol = random_variable_symbol(var)
 
     return {
@@ -147,7 +147,7 @@ def default_free_rv(var: TensorVariable) -> GraphvizNodeKwargs:
 
 
 def default_observed_rv(var: TensorVariable) -> GraphvizNodeKwargs:
-    """Default data for observed RV in the graph."""
+    """Return default data for observed RV in the graph."""
     symbol = random_variable_symbol(var)
 
     return {
@@ -158,7 +158,7 @@ def default_observed_rv(var: TensorVariable) -> GraphvizNodeKwargs:
 
 
 def default_deterministic(var: TensorVariable) -> GraphvizNodeKwargs:
-    """Default data for the deterministic in the graph."""
+    """Return default data for the deterministic in the graph."""
     return {
         "shape": "box",
         "style": None,
@@ -167,7 +167,7 @@ def default_deterministic(var: TensorVariable) -> GraphvizNodeKwargs:
 
 
 def default_data(var: TensorVariable) -> GraphvizNodeKwargs:
-    """Default data for the data in the graph."""
+    """Return default data for the data in the graph."""
     return {
         "shape": "box",
         "style": "rounded, filled",
diff --git a/pymc/ode/ode.py b/pymc/ode/ode.py
index 26584d823c5..ca01af13b65 100644
--- a/pymc/ode/ode.py
+++ b/pymc/ode/ode.py
@@ -108,7 +108,9 @@ def __init__(self, func, times, *, n_states, n_theta, t0=0):
         self._output_sensitivities = {}
 
     def _system(self, Y, t, p):
-        r"""The function that will be passed to odeint. Solves both ODE and sensitivities.
+        r"""Solve both ODE and sensitivities.
+
+        This function will be passed to odeint.
 
         Parameters
         ----------
diff --git a/pymc/ode/utils.py b/pymc/ode/utils.py
index fbe4ba97ab8..3ad05b1e143 100644
--- a/pymc/ode/utils.py
+++ b/pymc/ode/utils.py
@@ -19,6 +19,8 @@
 
 def make_sens_ic(n_states, n_theta, floatX):
     r"""
+    Make initial condition for the sensitivity matrix.
+
     The sensitivity matrix will always have consistent form. (n_states, n_states + n_theta).
 
     If the first n_states entries of the parameters vector in the simulate call
@@ -58,7 +60,7 @@ def make_sens_ic(n_states, n_theta, floatX):
 
 def augment_system(ode_func, n_states, n_theta):
     """
-    Function to create augmented system.
+    Create augmented system.
 
     Take a function which specifies a set of differential equations and return
     a compiled function which allows for computation of gradients of the
diff --git a/pymc/pytensorf.py b/pymc/pytensorf.py
index c7a73dd85d7..0af861e7282 100644
--- a/pymc/pytensorf.py
+++ b/pymc/pytensorf.py
@@ -299,7 +299,7 @@ def intX(X):
 
 
 def smartfloatX(x):
-    """Converts numpy float values to floatX and leaves values of other types unchanged."""
+    """Convert numpy float values to floatX and leaves values of other types unchanged."""
     if str(x.dtype).startswith("float"):
         x = floatX(x)
     return x
@@ -440,7 +440,7 @@ def __hash__(self):
 
 def make_shared_replacements(point, vars, model):
     """
-    Makes shared replacements for all *other* variables than the ones passed.
+    Make shared replacements for all *other* variables than the ones passed.
 
     This way functions can be called many times without setting unchanging variables. Allows us
     to use func.trust_input by removing the need for DictToArrayBijection and kwargs.
@@ -642,7 +642,7 @@ def __init__(self, tensor):
         self.tensor = tensor
 
     def __call__(self, input):
-        """Replaces the single input of symbolic variable to be the passed argument.
+        """Replace the single input of symbolic variable to be the passed argument.
 
         Parameters
         ----------
@@ -723,7 +723,8 @@ def set_default(self, value):
 
 def generator(gen, default=None):
     """
-    Generator variable with possibility to set default value and new generator.
+    Create a generator variable with possibility to set default value and new generator.
+
     If generator is exhausted variable will produce default value if it is not None,
     else raises `StopIteration` exception that can be caught on runtime.
 
diff --git a/pymc/sampling/mcmc.py b/pymc/sampling/mcmc.py
index f6e8d9d4cc5..1c124eb7a95 100644
--- a/pymc/sampling/mcmc.py
+++ b/pymc/sampling/mcmc.py
@@ -908,8 +908,9 @@ def _sample_return(
     idata_kwargs: dict[str, Any],
     model: Model,
 ) -> InferenceData | MultiTrace:
-    """Final step of `pm.sampler` that picks/slices chains,
-    runs diagnostics and converts to the desired return type.
+    """Pick/slice chains, run diagnostics and convert to the desired return type.
+
+    Final step of `pm.sampler`.
     """
     # Pick and slice chains to keep the maximum number of samples
     if discard_tuned_samples:
@@ -967,7 +968,7 @@ def _sample_return(
 
 
 def _check_start_shape(model, start: PointType):
-    """Checks that the prior evaluations and initial points have identical shapes.
+    """Check that the prior evaluations and initial points have identical shapes.
 
     Parameters
     ----------
@@ -1002,7 +1003,7 @@ def _sample_many(
     callback: SamplingIteratorCallback | None = None,
     **kwargs,
 ):
-    """Samples all chains sequentially.
+    """Sample all chains sequentially.
 
     Parameters
     ----------
@@ -1046,7 +1047,7 @@ def _sample(
     callback=None,
     **kwargs,
 ) -> None:
-    """Main iteration for singleprocess sampling.
+    """Sample one chain (singleprocess).
 
     Multiple step methods are supported via compound step methods.
 
@@ -1126,7 +1127,7 @@ def _iter_sample(
     model: Model | None = None,
     callback: SamplingIteratorCallback | None = None,
 ) -> Iterator[bool]:
-    """Generator for sampling one chain. (Used in singleprocess sampling.).
+    """Sample one chain with a generator (singleprocess).
 
     Parameters
     ----------
@@ -1211,7 +1212,7 @@ def _mp_sample(
     mp_ctx=None,
     **kwargs,
 ) -> None:
-    """Main iteration for multiprocess sampling.
+    """Sample all chains (multiprocess).
 
     Parameters
     ----------
diff --git a/pymc/sampling/population.py b/pymc/sampling/population.py
index c0dc813b5c7..4e5a2299601 100644
--- a/pymc/sampling/population.py
+++ b/pymc/sampling/population.py
@@ -62,7 +62,7 @@ def _sample_population(
     traces: Sequence[BaseTrace],
     **kwargs,
 ):
-    """Performs sampling of a population of chains using the ``PopulationStepper``.
+    """Perform sampling of a population of chains using the ``PopulationStepper``.
 
     Parameters
     ----------
@@ -234,7 +234,7 @@ def __exit__(self, exc_type, exc_val, exc_tb):
 
     @staticmethod
     def _run_secondary(c, stepper_dumps, secondary_end, task, progress):
-        """The method is started on a separate process to perform stepping of a chain.
+        """Perform stepping of a chain from a separate process.
 
         Parameters
         ----------
diff --git a/pymc/smc/kernels.py b/pymc/smc/kernels.py
index 60dcf033a0b..5de78d0cbbd 100644
--- a/pymc/smc/kernels.py
+++ b/pymc/smc/kernels.py
@@ -252,7 +252,7 @@ def _initialize_kernel(self):
         self.likelihood_logp = np.array(likelihoods).squeeze()
 
     def setup_kernel(self):
-        """Setup logic performed once before sampling starts."""
+        """Perform setup logic once before sampling starts."""
         pass
 
     def update_beta_and_weights(self):
diff --git a/pymc/stats/convergence.py b/pymc/stats/convergence.py
index 47359365ebf..eee6677825c 100644
--- a/pymc/stats/convergence.py
+++ b/pymc/stats/convergence.py
@@ -131,7 +131,7 @@ def run_convergence_checks(idata: arviz.InferenceData, model) -> list[SamplerWar
 
 
 def warn_divergences(idata: arviz.InferenceData) -> list[SamplerWarning]:
-    """Checks sampler stats and creates a list of warnings about divergences."""
+    """Check sampler stats and creates a list of warnings about divergences."""
     sampler_stats = idata.get("sample_stats", None)
     if sampler_stats is None:
         return []
@@ -153,7 +153,7 @@ def warn_divergences(idata: arviz.InferenceData) -> list[SamplerWarning]:
 
 
 def warn_treedepth(idata: arviz.InferenceData) -> list[SamplerWarning]:
-    """Checks sampler stats and creates a list of warnings about tree depth."""
+    """Check sampler stats and creates a list of warnings about tree depth."""
     sampler_stats = idata.get("sample_stats", None)
     if sampler_stats is None:
         return []
@@ -187,7 +187,7 @@ def log_warnings(warnings: Sequence[SamplerWarning]):
 
 
 def log_warning_stats(stats: Sequence[dict[str, Any]]):
-    """Logs 'warning' stats if present."""
+    """Log 'warning' stats if present."""
     if stats is None:
         return
 
diff --git a/pymc/step_methods/compound.py b/pymc/step_methods/compound.py
index 700c7afa6b4..38effcb134c 100644
--- a/pymc/step_methods/compound.py
+++ b/pymc/step_methods/compound.py
@@ -59,7 +59,7 @@ def infer_warn_stats_info(
     sds: dict[str, tuple[StatDtype, StatShape]],
     stepname: str,
 ) -> tuple[list[dict[str, StatDtype]], dict[str, tuple[StatDtype, StatShape]]]:
-    """Helper function to get `stats_dtypes` and `stats_dtypes_shapes` from either of them."""
+    """Get `stats_dtypes` and `stats_dtypes_shapes` from either of them."""
     # Avoid side-effects on the original lists/dicts
     stats_dtypes = [d.copy() for d in stats_dtypes]
     sds = sds.copy()
@@ -214,7 +214,7 @@ def flat_statname(sampler_idx: int, sname: str) -> str:
 def get_stats_dtypes_shapes_from_steps(
     steps: Iterable[BlockedStep],
 ) -> dict[str, tuple[StatDtype, StatShape]]:
-    """Combines stats dtype shape dictionaries from multiple step methods.
+    """Combine stats dtype shape dictionaries from multiple step methods.
 
     In the resulting stats dict, each sampler stat is prefixed by `sampler_#__`.
     """
diff --git a/pymc/step_methods/metropolis.py b/pymc/step_methods/metropolis.py
index a9d23d5ceb3..e0101b8fe3a 100644
--- a/pymc/step_methods/metropolis.py
+++ b/pymc/step_methods/metropolis.py
@@ -254,7 +254,7 @@ def __init__(
         super().__init__(vars, shared, rng=rng)
 
     def reset_tuning(self):
-        """Resets the tuned sampler parameters to their initial values."""
+        """Reset the tuned sampler parameters to their initial values."""
         for attr, initial_value in self._untuned_settings.items():
             setattr(self, attr, initial_value)
         self.accepted_sum[:] = 0
@@ -1076,7 +1076,7 @@ def __init__(
         super().__init__(vars, shared, rng=rng)
 
     def reset_tuning(self):
-        """Resets the tuned sampler parameters and history to their initial values."""
+        """Reset the tuned sampler parameters and history to their initial values."""
         # history can't be reset via the _untuned_settings dict because it's a list
         self._history = []
         for attr, initial_value in self._untuned_settings.items():
diff --git a/pymc/testing.py b/pymc/testing.py
index d6cc495665a..d4c625edcaf 100644
--- a/pymc/testing.py
+++ b/pymc/testing.py
@@ -226,7 +226,7 @@ def RandomPdMatrix(n):
 
 
 def select_by_precision(float64, float32):
-    """Helper function to choose reasonable decimal cutoffs for different floatX modes."""
+    """Choose reasonable decimal cutoffs for different floatX modes."""
     decimal = float64 if pytensor.config.floatX == "float64" else float32
     return decimal
 
@@ -314,10 +314,9 @@ def check_logp(
     skip_paramdomain_outside_edge_test: bool = False,
 ) -> None:
     """
-    Generic test for PyMC logp methods.
-
     Test PyMC logp and equivalent scipy logpmf/logpdf methods give similar
     results for valid values and parameters inside the supported edges.
+
     Edges are excluded by default, but can be artificially included by
     creating a domain with repeated values (e.g., `Domain([0, 0, .5, 1, 1]`)
 
@@ -424,7 +423,8 @@ def check_logcdf(
     skip_paramdomain_outside_edge_test: bool = False,
 ) -> None:
     """
-    Generic test for PyMC logcdf methods.
+    Test PyMC logcdf and equivalent scipy logcdf methods give similar
+    results for valid values and parameters inside the supported edges.
 
     The following tests are performed by default:
         1. Test PyMC logcdf and equivalent scipy logcdf methods give similar
@@ -539,7 +539,8 @@ def check_icdf(
     n_samples: int = 100,
 ) -> None:
     """
-    Generic test for PyMC icdf methods.
+    Test PyMC icdf and equivalent scipy icdf methods give similar
+    results for valid values and parameters inside the supported edges.
 
     The following tests are performed by default:
         1. Test PyMC icdf and equivalent scipy icdf (ppf) methods give similar
diff --git a/pymc/tuning/scaling.py b/pymc/tuning/scaling.py
index 459b3a40aa7..56e1fb33ba5 100644
--- a/pymc/tuning/scaling.py
+++ b/pymc/tuning/scaling.py
@@ -26,7 +26,7 @@
 
 def fixed_hessian(point, model=None):
     """
-    Returns a fixed Hessian for any chain location.
+    Return a fixed Hessian for any chain location.
 
     Parameters
     ----------
@@ -44,7 +44,7 @@ def fixed_hessian(point, model=None):
 
 def find_hessian(point, vars=None, model=None, negate_output=True):
     """
-    Returns Hessian of logp at the point passed.
+    Return Hessian of logp at the point passed.
 
     Parameters
     ----------
@@ -60,7 +60,7 @@ def find_hessian(point, vars=None, model=None, negate_output=True):
 
 def find_hessian_diag(point, vars=None, model=None, negate_output=True):
     """
-    Returns Hessian of logp at the point passed.
+    Return Hessian of logp at the point passed.
 
     Parameters
     ----------
diff --git a/pymc/tuning/starting.py b/pymc/tuning/starting.py
index 5f0bcbb8546..3264010071e 100644
--- a/pymc/tuning/starting.py
+++ b/pymc/tuning/starting.py
@@ -62,7 +62,7 @@ def find_MAP(
     seed: int | None = None,
     **kwargs,
 ):
-    """Finds the local maximum a posteriori point given a model.
+    """Find the local maximum a posteriori point given a model.
 
     `find_MAP` should not be used to initialize the NUTS sampler. Simply call
     ``pymc.sample()`` and it will automatically initialize NUTS in a better
diff --git a/pymc/util.py b/pymc/util.py
index 22dd499051c..8f952e589cd 100644
--- a/pymc/util.py
+++ b/pymc/util.py
@@ -71,7 +71,7 @@ def __repr__(self):
 
 
 def withparent(meth):
-    """Helper wrapper that passes calls to parent's instance."""
+    """Pass calls to parent's instance."""
 
     def wrapped(self, *args, **kwargs):
         res = meth(self, *args, **kwargs)
@@ -217,7 +217,7 @@ def get_untransformed_name(name):
 
 
 def get_default_varnames(var_iterator, include_transformed):
-    r"""Helper to extract default varnames from a trace.
+    r"""Extract default varnames from a trace.
 
     Parameters
     ----------
@@ -267,7 +267,7 @@ def enhanced(*args, **kwargs):
 
 
 def drop_warning_stat(idata: arviz.InferenceData) -> arviz.InferenceData:
-    """Returns a new ``InferenceData`` object with the "warning" stat removed from sample stats groups.
+    """Return a new ``InferenceData`` object with the "warning" stat removed from sample stats groups.
 
     This function should be applied to an ``InferenceData`` object obtained with
     ``pm.sample(keep_warning_stat=True)`` before trying to ``.to_netcdf()`` or ``.to_zarr()`` it.
@@ -468,7 +468,7 @@ def _get_unique_seeds_per_chain(integers_fn):
 
 
 def get_value_vars_from_user_vars(vars: Variable | Sequence[Variable], model) -> list[Variable]:
-    """Converts user "vars" input into value variables.
+    """Convert user "vars" input into value variables.
 
     More often than not, users will pass random variables, and we will extract the
     respective value variables, but we also allow for the input to already be value
diff --git a/pymc/variational/approximations.py b/pymc/variational/approximations.py
index 1800d9a6aa2..c382763bcb6 100644
--- a/pymc/variational/approximations.py
+++ b/pymc/variational/approximations.py
@@ -372,7 +372,7 @@ def __init__(self, trace=None, size=None, **kwargs):
 
     def evaluate_over_trace(self, node):
         R"""
-        Allows to statically evaluate any symbolic expression over the trace.
+        Allow to statically evaluate any symbolic expression over the trace.
 
         Parameters
         ----------
diff --git a/pymc/variational/minibatch_rv.py b/pymc/variational/minibatch_rv.py
index f3ecccf2952..0437f666303 100644
--- a/pymc/variational/minibatch_rv.py
+++ b/pymc/variational/minibatch_rv.py
@@ -81,7 +81,7 @@ def create_minibatch_rv(
 
 
 def get_scaling(total_size: Sequence[Variable], shape: TensorVariable) -> TensorVariable:
-    """Gets scaling constant for logp."""
+    """Get scaling constant for logp."""
     # mypy doesn't understand we can convert a shape TensorVariable into a tuple
     shape = tuple(shape)  # type: ignore
 
diff --git a/pymc/variational/opvi.py b/pymc/variational/opvi.py
index 91d1f23a09d..9458c593875 100644
--- a/pymc/variational/opvi.py
+++ b/pymc/variational/opvi.py
@@ -142,7 +142,7 @@ def inner(*args, **kwargs):
 
 
 def node_property(f):
-    """A shortcut for wrapping method to accessible tensor."""
+    """Wrap method to accessible tensor."""
     if isinstance(f, str):
 
         def wrapper(fn):
@@ -514,8 +514,7 @@ def __str__(self):  # pragma: no cover
 
 
 def collect_shared_to_list(params):
-    """Helper function for getting a list from
-    usable representation of parameters.
+    """Get a list from a usable representation of parameters.
 
     Parameters
     ----------
@@ -1130,21 +1129,21 @@ def __str__(self):
 
     @node_property
     def std(self) -> pt.TensorVariable:
-        """Standard deviation of the latent variables as an unstructured 1-dimensional tensor variable."""
+        """Return the standard deviation of the latent variables as an unstructured 1-dimensional tensor variable."""
         raise NotImplementedError()
 
     @node_property
     def cov(self) -> pt.TensorVariable:
-        """Covariance between the latent variables as an unstructured 2-dimensional tensor variable."""
+        """Return the covariance between the latent variables as an unstructured 2-dimensional tensor variable."""
         raise NotImplementedError()
 
     @node_property
     def mean(self) -> pt.TensorVariable:
-        """Mean of the latent variables as an unstructured 1-dimensional tensor variable."""
+        """Return the mean of the latent variables as an unstructured 1-dimensional tensor variable."""
         raise NotImplementedError()
 
     def var_to_data(self, shared: pt.TensorVariable) -> xarray.Dataset:
-        """Takes a flat 1-dimensional tensor variable and maps it to an xarray data set based on the information in
+        """Take a flat 1-dimensional tensor variable and maps it to an xarray data set based on the information in
         `self.ordering`.
         """
         # This is somewhat similar to `DictToArrayBijection.rmap`, which doesn't work here since we don't have
@@ -1466,7 +1465,7 @@ def get_optimization_replacements(self, s, d):
 
     @pytensor.config.change_flags(compute_test_value="off")
     def sample_node(self, node, size=None, deterministic=False, more_replacements=None):
-        """Samples given node or nodes over shared posterior.
+        """Sample given node or nodes over shared posterior.
 
         Parameters
         ----------
diff --git a/pymc/variational/updates.py b/pymc/variational/updates.py
index eec732b41d7..256db4abca5 100644
--- a/pymc/variational/updates.py
+++ b/pymc/variational/updates.py
@@ -136,7 +136,7 @@
 
 
 def get_or_compute_grads(loss_or_grads, params):
-    """Helper function returning a list of gradients.
+    """Return a list of gradients.
 
     Parameters
     ----------
@@ -238,9 +238,9 @@ def sgd(loss_or_grads=None, params=None, learning_rate=1e-3):
 
 
 def apply_momentum(updates, params=None, momentum=0.9):
-    """Returns a modified update dictionary including momentum.
+    """Return a modified update dictionary including momentum.
 
-    Generates update expressions of the form:
+    Generate update expressions of the form:
 
     * ``velocity := momentum * velocity + updates[param] - param``
     * ``param := param + velocity``
@@ -345,9 +345,9 @@ def momentum(loss_or_grads=None, params=None, learning_rate=1e-3, momentum=0.9):
 
 
 def apply_nesterov_momentum(updates, params=None, momentum=0.9):
-    """Returns a modified update dictionary including Nesterov momentum.
+    """Return a modified update dictionary including Nesterov momentum.
 
-    Generates update expressions of the form:
+    Generate update expressions of the form:
 
     * ``velocity := momentum * velocity + updates[param] - param``
     * ``param := param + momentum * velocity + updates[param] - param``
@@ -540,7 +540,7 @@ def adagrad(loss_or_grads=None, params=None, learning_rate=1.0, epsilon=1e-6):
 
 
 def adagrad_window(loss_or_grads=None, params=None, learning_rate=0.001, epsilon=0.1, n_win=10):
-    """Returns a function that returns parameter updates.
+    """Return a function that returns parameter updates.
     Instead of accumulated estimate, uses running window.
 
     Parameters
diff --git a/scripts/check_all_tests_are_covered.py b/scripts/check_all_tests_are_covered.py
index 4d2b1338670..23079338d66 100644
--- a/scripts/check_all_tests_are_covered.py
+++ b/scripts/check_all_tests_are_covered.py
@@ -31,7 +31,7 @@ def find_testfiles():
 
 
 def from_yaml():
-    """Determines how often each test file is run per platform and floatX setting.
+    """Determine how often each test file is run per platform and floatX setting.
 
     An exception is raised if tests run multiple times with the same configuration.
     """
diff --git a/scripts/run_mypy.py b/scripts/run_mypy.py
index f5a0b6d3ac8..7620b025002 100755
--- a/scripts/run_mypy.py
+++ b/scripts/run_mypy.py
@@ -97,7 +97,7 @@ def mypy_to_pandas(input_lines: Iterator[str]) -> pandas.DataFrame:
 
 
 def check_no_unexpected_results(mypy_lines: Iterator[str]):
-    """Compares mypy results with list of known FAILING files.
+    """Compare mypy results with list of known FAILING files.
 
     Exits the process with non-zero exit code upon unexpected results.
     """

From e1a05bbd497b33f735c732e461e1ede9398445c4 Mon Sep 17 00:00:00 2001
From: Virgile Andreani <virgile@pymc-devs.org>
Date: Tue, 8 Oct 2024 09:21:14 +0200
Subject: [PATCH 7/9] Put summary line on only one line (D205)

---
 benchmarks/benchmarks/benchmarks.py     |  5 +-
 pymc/backends/base.py                   | 12 +++--
 pymc/backends/ndarray.py                |  8 ++-
 pymc/data.py                            |  6 +--
 pymc/distributions/continuous.py        | 13 ++---
 pymc/distributions/dist_math.py         | 11 ++---
 pymc/distributions/distribution.py      | 10 ++--
 pymc/distributions/mixture.py           |  1 +
 pymc/distributions/multivariate.py      | 65 ++++++++++++------------
 pymc/distributions/shape_utils.py       |  5 +-
 pymc/distributions/simulator.py         |  3 +-
 pymc/distributions/transforms.py        | 15 +++---
 pymc/distributions/truncated.py         |  6 +--
 pymc/func_utils.py                      |  3 +-
 pymc/gp/cov.py                          | 37 +++++++-------
 pymc/gp/gp.py                           | 58 +++++++++-------------
 pymc/gp/hsgp_approx.py                  | 55 +++++++++++++--------
 pymc/gp/util.py                         |  4 +-
 pymc/initial_point.py                   |  4 +-
 pymc/logprob/basic.py                   |  3 +-
 pymc/logprob/transform_value.py         |  3 +-
 pymc/logprob/transforms.py              |  7 +--
 pymc/math.py                            | 15 +++---
 pymc/model/core.py                      | 66 ++++++++++---------------
 pymc/model/fgraph.py                    |  4 +-
 pymc/printing.py                        | 18 ++++---
 pymc/pytensorf.py                       | 11 ++---
 pymc/sampling/parallel.py               |  5 +-
 pymc/smc/kernels.py                     |  5 ++
 pymc/step_methods/arraystep.py          | 13 +++--
 pymc/step_methods/compound.py           |  4 +-
 pymc/step_methods/metropolis.py         | 25 ++++------
 pymc/testing.py                         | 14 +++---
 pymc/util.py                            | 17 ++++---
 pymc/variational/approximations.py      | 25 ++++++----
 pymc/variational/opvi.py                | 65 +++++++++++-------------
 pymc/variational/updates.py             |  1 +
 scripts/generate_pip_deps_from_conda.py |  7 +--
 scripts/run_mypy.py                     |  5 +-
 39 files changed, 301 insertions(+), 333 deletions(-)

diff --git a/benchmarks/benchmarks/benchmarks.py b/benchmarks/benchmarks/benchmarks.py
index f9913d8f7a3..7485cef65ee 100644
--- a/benchmarks/benchmarks/benchmarks.py
+++ b/benchmarks/benchmarks/benchmarks.py
@@ -77,10 +77,7 @@ def mixture_model(random_seed=1234):
 
 
 class OverheadSuite:
-    """
-    Just tests how long sampling from a normal distribution takes for various
-    samplers.
-    """
+    """Test how long sampling from a normal distribution takes for various samplers."""
 
     params = [pm.NUTS, pm.HamiltonianMC, pm.Metropolis, pm.Slice]
     timer = timeit.default_timer
diff --git a/pymc/backends/base.py b/pymc/backends/base.py
index 912aed842cf..c0239f8dec9 100644
--- a/pymc/backends/base.py
+++ b/pymc/backends/base.py
@@ -102,8 +102,12 @@ def _slice(self, idx: slice) -> "IBaseTrace":
         raise NotImplementedError()
 
     def point(self, idx: int) -> dict[str, np.ndarray]:
-        """Return dictionary of point values at `idx` for current chain
-        with variables names as keys.
+        """Return point values at `idx` for current chain.
+
+        Returns
+        -------
+        values : dict[str, np.ndarray]
+            Dictionary of values with variable names as keys.
         """
         raise NotImplementedError()
 
@@ -568,9 +572,7 @@ def points(self, chains=None):
 
 
 def _squeeze_cat(results, combine: bool, squeeze: bool):
-    """Squeeze and concatenate the results depending on values of
-    `combine` and `squeeze`.
-    """
+    """Squeeze and/or concatenate the results."""
     if combine:
         results = np.concatenate(results)
         if not squeeze:
diff --git a/pymc/backends/ndarray.py b/pymc/backends/ndarray.py
index 016a70d7d3a..98a11fdeca2 100644
--- a/pymc/backends/ndarray.py
+++ b/pymc/backends/ndarray.py
@@ -184,8 +184,12 @@ def _slice(self, idx: slice):
         return sliced
 
     def point(self, idx) -> dict[str, Any]:
-        """Return dictionary of point values at `idx` for current chain
-        with variable names as keys.
+        """Return point values at `idx` for current chain.
+
+        Returns
+        -------
+        values : dict[str, Any]
+            Dictionary of values with variable names as keys.
         """
         idx = int(idx)
         return {varname: values[idx] for varname, values in self.samples.items()}
diff --git a/pymc/data.py b/pymc/data.py
index 717a3e442ad..247825981f1 100644
--- a/pymc/data.py
+++ b/pymc/data.py
@@ -87,9 +87,9 @@ def clone(self):
 
 
 class GeneratorAdapter:
-    """
-    Helper class that helps to infer data type of generator with looking
-    at the first item, preserving the order of the resulting generator.
+    """Class that helps infer data type of generator.
+
+    It looks at the first item, preserving the order of the resulting generator.
     """
 
     def make_variable(self, gop, name=None):
diff --git a/pymc/distributions/continuous.py b/pymc/distributions/continuous.py
index 9ec2daa4df8..d72346ff0de 100644
--- a/pymc/distributions/continuous.py
+++ b/pymc/distributions/continuous.py
@@ -14,10 +14,7 @@
 
 # Contains code from AePPL, Copyright (c) 2021-2022, Aesara Developers.
 
-"""
-A collection of common probability distributions for stochastic
-nodes in PyMC.
-"""
+"""A collection of common probability distributions for stochastic nodes in PyMC."""
 
 import warnings
 
@@ -371,10 +368,7 @@ def rng_fn(cls, rng, size):
 
 
 class Flat(Continuous):
-    """
-    Uninformative log-likelihood that returns 0 regardless of
-    the passed value.
-    """
+    """Uninformative log-likelihood that returns 0 regardless of the passed value."""
 
     rv_op = flat
 
@@ -3767,8 +3761,7 @@ def rng_fn(cls, rng, x, pdf, cdf, size=None) -> np.ndarray:
 
 class Interpolated(BoundedContinuous):
     r"""
-    Univariate probability distribution defined as a linear interpolation
-    of probability density function evaluated on some lattice of points.
+    Univariate linear interpolation of pdf evaluated on some lattice of points.
 
     The lattice can be uneven, so the steps between different points can have
     different size and it is possible to vary the precision between regions
diff --git a/pymc/distributions/dist_math.py b/pymc/distributions/dist_math.py
index a93a9a52a67..49a900ac78d 100644
--- a/pymc/distributions/dist_math.py
+++ b/pymc/distributions/dist_math.py
@@ -172,16 +172,16 @@ def log_diff_normal_cdf(mu, sigma, x, y):
 
 
 def sigma2rho(sigma):
-    """
-    `sigma -> rho` PyTensor converter
+    """Convert `sigma` into `rho` with PyTensor.
+
     :math:`mu + sigma*e = mu + log(1+exp(rho))*e`.
     """
     return pt.log(pt.exp(pt.abs(sigma)) - 1.0)
 
 
 def rho2sigma(rho):
-    """
-    `rho -> sigma` PyTensor converter
+    """Convert `rho` to `sigma` with PyTensor.
+
     :math:`mu + sigma*e = mu + log(1+exp(rho))*e`.
     """
     return pt.softplus(rho)
@@ -193,8 +193,7 @@ def rho2sigma(rho):
 
 def log_normal(x, mean, **kwargs):
     """
-    Calculate logarithm of normal distribution at point `x`
-    with given `mean` and `std`.
+    Calculate logarithm of normal distribution at point `x` with given `mean` and `std`.
 
     Parameters
     ----------
diff --git a/pymc/distributions/distribution.py b/pymc/distributions/distribution.py
index 442f8ba1180..21d3a4d29ef 100644
--- a/pymc/distributions/distribution.py
+++ b/pymc/distributions/distribution.py
@@ -193,8 +193,9 @@ def support_point(op, rv, *dist_params):
 
 
 class _class_or_instancemethod(classmethod):
-    """Allow a method to be called both as a classmethod and an instancemethod,
-    giving priority to the instancemethod.
+    """Allow a method to be called both as a classmethod and an instancemethod.
+
+    Priority is given to the instancemethod.
 
     This is used to allow extracting information from the signature of a SymbolicRandomVariable
     which may be provided either as a class attribute or as an instance attribute.
@@ -580,10 +581,7 @@ def dist(
 
 @node_rewriter([SymbolicRandomVariable])
 def inline_symbolic_random_variable(fgraph, node):
-    """
-    Optimization that expands the internal graph of a SymbolicRV when obtaining the logp
-    graph, if the flag `inline_logprob` is True.
-    """
+    """Expand a SymbolicRV when obtaining the logp graph if `inline_logprob` is True."""
     op = node.op
     if op.inline_logprob:
         return clone_replace(op.inner_outputs, dict(zip(op.inner_inputs, node.inputs)))
diff --git a/pymc/distributions/mixture.py b/pymc/distributions/mixture.py
index 1bcf42d7662..dc704e5121d 100644
--- a/pymc/distributions/mixture.py
+++ b/pymc/distributions/mixture.py
@@ -702,6 +702,7 @@ def dist(cls, psi, n, p, **kwargs):
 class ZeroInflatedNegativeBinomial:
     R"""
     Zero-Inflated Negative binomial log-likelihood.
+
     The Zero-inflated version of the Negative Binomial (NB).
     The NB distribution describes a Poisson random variable
     whose rate parameter is gamma distributed.
diff --git a/pymc/distributions/multivariate.py b/pymc/distributions/multivariate.py
index dfada28845f..55e275be3eb 100644
--- a/pymc/distributions/multivariate.py
+++ b/pymc/distributions/multivariate.py
@@ -279,8 +279,7 @@ def support_point(rv, size, mu, cov):
 
     def logp(value, mu, cov):
         """
-        Calculate log-probability of Multivariate Normal distribution
-        at specified value.
+        Calculate logp of Multivariate Normal distribution at specified value.
 
         Parameters
         ----------
@@ -469,8 +468,7 @@ def support_point(rv, size, nu, mu, scale):
 
     def logp(value, nu, mu, scale):
         """
-        Calculate log-probability of Multivariate Student's T distribution
-        at specified value.
+        Calculate logp of Multivariate Student's T distribution at specified value.
 
         Parameters
         ----------
@@ -535,8 +533,7 @@ def support_point(rv, size, a):
 
     def logp(value, a):
         """
-        Calculate log-probability of Dirichlet distribution
-        at specified value.
+        Calculate logp of Dirichlet distribution at specified value.
 
         Parameters
         ----------
@@ -642,8 +639,7 @@ def support_point(rv, size, n, p):
 
     def logp(value, n, p):
         """
-        Calculate log-probability of Multinomial distribution
-        at specified value.
+        Calculate logp of Multinomial distribution at specified value.
 
         Parameters
         ----------
@@ -736,8 +732,7 @@ def support_point(rv, size, n, a):
 
     def logp(value, n, a):
         """
-        Calculate log-probability of DirichletMultinomial distribution
-        at specified value.
+        Calculate logp of DirichletMultinomial distribution at specified value.
 
         Parameters
         ----------
@@ -773,6 +768,7 @@ def logp(value, n, a):
 class _OrderedMultinomial(Multinomial):
     r"""
     Underlying class for ordered multinomial distributions.
+
     See docs for the OrderedMultinomial wrapper class for more details on how to use it in models.
     """
 
@@ -1013,8 +1009,7 @@ def dist(cls, nu, V, *args, **kwargs):
 
     def logp(X, nu, V):
         """
-        Calculate log-probability of Wishart distribution
-        at specified value.
+        Calculate logp of Wishart distribution at specified value.
 
         Parameters
         ----------
@@ -1047,9 +1042,10 @@ def logp(X, nu, V):
 
 def WishartBartlett(name, S, nu, is_cholesky=False, return_cholesky=False, initval=None):
     r"""
-    Bartlett decomposition of the Wishart distribution. As the Wishart
-    distribution requires the matrix to be symmetric positive semi-definite
-    it is impossible for MCMC to ever propose acceptable matrices.
+    Bartlett decomposition of the Wishart distribution.
+
+    As the Wishart distribution requires the matrix to be symmetric positive
+    semi-definite, it is impossible for MCMC to ever propose acceptable matrices.
 
     Instead, we can use the Barlett decomposition which samples a lower
     diagonal matrix. Specifically:
@@ -1248,6 +1244,7 @@ def update(self, node):
 
 class _LKJCholeskyCov(Distribution):
     r"""Underlying class for covariance matrix with LKJ distributed correlations.
+
     See docs for LKJCholeskyCov function for more details on how to use it in models.
     """
 
@@ -1599,8 +1596,7 @@ def support_point(rv, *args):
 
     def logp(value, n, eta):
         """
-        Calculate log-probability of LKJ distribution at specified
-        value.
+        Calculate logp of LKJ distribution at specified value.
 
         Parameters
         ----------
@@ -1900,8 +1896,7 @@ def support_point(rv, size, mu, rowchol, colchol):
 
     def logp(value, mu, rowchol, colchol):
         """
-        Calculate log-probability of Matrix-valued Normal distribution
-        at specified value.
+        Calculate logp of Matrix-valued Normal distribution at specified value.
 
         Parameters
         ----------
@@ -2083,8 +2078,7 @@ def support_point(rv, rng, size, mu, sigma, *covs):
 
     def logp(value, rng, size, mu, sigma, *covs):
         """
-        Calculate log-probability of Multivariate Normal distribution
-        with Kronecker-structured covariance at specified value.
+        Calculate logp of Multivariate Normal distribution with Kronecker-structured covariance at specified value.
 
         Parameters
         ----------
@@ -2209,8 +2203,10 @@ def rng_fn(cls, rng: np.random.RandomState, mu, W, alpha, tau, W_is_valid, size)
 
 class CAR(Continuous):
     r"""
-    Likelihood for a conditional autoregression. This is a special case of the
-    multivariate normal with an adjacency-structured covariance matrix.
+    Likelihood for a conditional autoregression.
+
+    This is a special case of the multivariate normal with an
+    adjacency-structured covariance matrix.
 
     .. math::
 
@@ -2271,8 +2267,9 @@ def support_point(rv, size, mu, W, alpha, tau, W_is_valid):
 
     def logp(value, mu, W, alpha, tau, W_is_valid):
         """
-        Calculate log-probability of a CAR-distributed vector
-        at specified value. This log probability function differs from
+        Calculate logp of a CAR-distributed vector at specified value.
+
+        This log probability function differs from
         the true CAR log density (AKA a multivariate normal with CAR-structured
         covariance matrix) by an additive constant.
 
@@ -2356,9 +2353,10 @@ def rng_fn(cls, rng, size, W, sigma, zero_sum_stdev):
 
 class ICAR(Continuous):
     r"""
-    The intrinsic conditional autoregressive prior. It is primarily used to model
-    covariance between neighboring areas. It is a special case
-    of the :class:`~pymc.CAR` distribution where alpha is set to 1.
+    The intrinsic conditional autoregressive prior.
+
+    It is primarily used to model covariance between neighboring areas. It is a
+    special case of the :class:`~pymc.CAR` distribution where alpha is set to 1.
 
     The log probability density function is
 
@@ -2541,7 +2539,9 @@ def rng_fn(cls, rng, alpha, K, size):
 
 class StickBreakingWeights(SimplexContinuous):
     r"""
-    Likelihood of truncated stick-breaking weights. The weights are generated from a
+    Likelihood of truncated stick-breaking weights.
+
+    The weights are generated from a
     stick-breaking proceduce where :math:`x_k = v_k \prod_{\ell < k} (1 - v_\ell)` for
     :math:`k \in \{1, \ldots, K\}` and :math:`x_K = \prod_{\ell = 1}^{K} (1 - v_\ell) = 1 - \sum_{\ell=1}^K x_\ell`
     with :math:`v_k \stackrel{\text{i.i.d.}}{\sim} \text{Beta}(1, \alpha)`.
@@ -2605,8 +2605,7 @@ def support_point(rv, size, alpha, K):
 
     def logp(value, alpha, K):
         """
-        Calculate log-probability of the distribution induced from the stick-breaking process
-        at specified value.
+        Calculate logp of the distribution induced from the stick-breaking process at specified value.
 
         Parameters
         ----------
@@ -2688,8 +2687,8 @@ def rv_op(cls, sigma, support_shape, *, size=None, rng=None):
 
 class ZeroSumNormal(Distribution):
     r"""
-    ZeroSumNormal distribution, i.e Normal distribution where one or
-    several axes are constrained to sum to zero.
+    Normal distribution where one or several axes are constrained to sum to zero.
+
     By default, the last axis is constrained to sum to zero.
     See `n_zerosum_axes` kwarg for more details.
 
diff --git a/pymc/distributions/shape_utils.py b/pymc/distributions/shape_utils.py
index 10b84bb7a8d..09a8c00a241 100644
--- a/pymc/distributions/shape_utils.py
+++ b/pymc/distributions/shape_utils.py
@@ -12,10 +12,7 @@
 #   See the License for the specific language governing permissions and
 #   limitations under the License.
 
-"""
-A collection of common shape operations needed for broadcasting
-samples from probability distributions for stochastic nodes in PyMC.
-"""
+"""Common shape operations to broadcast samples from probability distributions for stochastic nodes in PyMC."""
 
 import warnings
 
diff --git a/pymc/distributions/simulator.py b/pymc/distributions/simulator.py
index a9531a88c22..de5da40a157 100644
--- a/pymc/distributions/simulator.py
+++ b/pymc/distributions/simulator.py
@@ -63,8 +63,7 @@ def sum_stat(cls, *args, **kwargs):
 
 class Simulator(Distribution):
     r"""
-    Simulator distribution, used for Approximate Bayesian Inference (ABC)
-    with Sequential Monte Carlo (SMC) sampling via :func:`~pymc.sample_smc`.
+    Used for Approximate Bayesian Inference with SMC sampling via :func:`~pymc.sample_smc`.
 
     Simulator distributions have a stochastic pseudo-loglikelihood defined by
     a distance metric between the observed and simulated data, and tweaked
diff --git a/pymc/distributions/transforms.py b/pymc/distributions/transforms.py
index 92aa52afd02..d29bb340229 100644
--- a/pymc/distributions/transforms.py
+++ b/pymc/distributions/transforms.py
@@ -116,8 +116,9 @@ def log_jac_det(self, value, *inputs):
 
 class SumTo1(Transform):
     """
-    Transforms K - 1 dimensional simplex space (k values in [0,1] and that sum to 1) to a K - 1 vector of values in [0,1]
-    This Transformation operates on the last dimension of the input tensor.
+    Transforms K - 1 dimensional simplex space (K values in [0, 1] that sum to 1) to a K - 1 vector of values in [0, 1].
+
+    This transformation operates on the last dimension of the input tensor.
     """
 
     name = "sumto1"
@@ -139,15 +140,12 @@ def log_jac_det(self, value, *inputs):
 
 
 class CholeskyCovPacked(Transform):
-    """
-    Transforms the diagonal elements of the LKJCholeskyCov distribution to be on the
-    log scale.
-    """
+    """Transforms the diagonal elements of the LKJCholeskyCov distribution to be on the log scale."""
 
     name = "cholesky-cov-packed"
 
     def __init__(self, n):
-        """
+        """Create a CholeskyCovPack object.
 
         Parameters
         ----------
@@ -180,8 +178,7 @@ def log_jac_det(self, value, *inputs):
 
 
 class Interval(IntervalTransform):
-    """Wrapper around  :class:`pymc.logprob.transforms.IntervalTransform` for use in the
-    ``transform`` argument of a random variable.
+    """Wrapper around  :class:`pymc.logprob.transforms.IntervalTransform` for use in the ``transform`` argument of a random variable.
 
     Parameters
     ----------
diff --git a/pymc/distributions/truncated.py b/pymc/distributions/truncated.py
index 8fd380dfbba..2a74cfa2b8c 100644
--- a/pymc/distributions/truncated.py
+++ b/pymc/distributions/truncated.py
@@ -51,10 +51,7 @@
 
 
 class TruncatedRV(SymbolicRandomVariable):
-    """
-    An `Op` constructed from an PyTensor graph
-    that represents a truncated univariate random variable.
-    """
+    """An `Op` constructed from an PyTensor graph that represents a truncated univariate random variable."""
 
     default_output: int = 0
     base_rv_op: Op
@@ -232,6 +229,7 @@ def _truncated(op: Op, lower, upper, size, *params):
 
 class TruncationCheck(CheckAndRaise):
     """Implements a check in truncated graphs.
+
     Raises `TruncationError` if the check is not True.
     """
 
diff --git a/pymc/func_utils.py b/pymc/func_utils.py
index edcaf50952f..21492a34e74 100644
--- a/pymc/func_utils.py
+++ b/pymc/func_utils.py
@@ -37,8 +37,7 @@ def find_constrained_prior(
     **kwargs,
 ) -> dict[str, float]:
     """
-    Find optimal parameters to get `mass` % of probability
-    of a :ref:`distribution <api_distributions>` between `lower` and `upper`.
+    Find optimal parameters to get `mass` % of probability of a distribution between `lower` and `upper`.
 
     Note: only works for one- and two-parameter distributions, as there
     are exactly two constraints. Fix some combination of parameters
diff --git a/pymc/gp/cov.py b/pymc/gp/cov.py
index e5b0f8cc61b..22406606b7c 100644
--- a/pymc/gp/cov.py
+++ b/pymc/gp/cov.py
@@ -148,8 +148,9 @@ def _alloc(X, *shape: int) -> TensorVariable:
 
 class Covariance(BaseCovariance):
     """
-    Base class for kernels/covariance functions with input_dim and active_dims, which excludes
-    kernels like `Constant` and `WhiteNoise`.
+    Base class for kernels/covariance functions with input_dim and active_dims.
+
+    This excludes kernels like `Constant` and `WhiteNoise`.
 
     Parameters
     ----------
@@ -173,9 +174,7 @@ def __init__(self, input_dim: int, active_dims: IntSequence | None = None):
 
     @property
     def n_dims(self) -> int:
-        """The dimensionality of the input, as taken from the
-        `active_dims`.
-        """
+        """The dimensionality of the input, as taken from the `active_dims`."""
         # Evaluate lazily in case this changes.
         return len(self.active_dims)
 
@@ -234,9 +233,7 @@ def __init__(self, factor_list: Sequence):
                 self._factor_list.append(factor)
 
     def _merge_factors_cov(self, X, Xs=None, diag=False):
-        """Evaluate either all the sums or all the
-        products of kernels that are possible to evaluate.
-        """
+        """Evaluate either all the sums or all the products of kernels that are possible to evaluate."""
         factor_list = []
         for factor in self._factor_list:
             # make sure diag=True is handled properly
@@ -560,8 +557,9 @@ def power_spectral_density(self, omega: TensorLike) -> TensorVariable:
 
 class ExpQuad(Stationary):
     r"""
-    The Exponentiated Quadratic kernel.  Also referred to as the Squared
-    Exponential, or Radial Basis Function kernel.
+    The Exponentiated Quadratic kernel.
+
+    Also referred to as the Squared Exponential, or Radial Basis Function kernel.
 
     .. math::
 
@@ -784,7 +782,8 @@ def full_from_distance(self, dist: TensorLike, squared: bool = False) -> TensorV
         return pt.exp(-0.5 * r2)
 
     def power_spectral_density_approx(self, J: TensorLike) -> TensorVariable:
-        r"""
+        r"""Power spectral density approximation.
+
         Technically, this is not a spectral density but these are the first `m` coefficients of
         the low rank approximation for the periodic kernel, which are used in the same way.
         `J` is a vector of `np.arange(m)`.
@@ -860,8 +859,7 @@ def diag(self, X: TensorLike) -> TensorVariable:
 
 class WarpedInput(Covariance):
     r"""
-    Warp the inputs of any kernel using an arbitrary function
-    defined using PyTensor.
+    Warp the inputs of any kernel using an arbitrary function defined using PyTensor.
 
     .. math::
        k(x, x') = k(w(x), w(x'))
@@ -972,8 +970,10 @@ def diag(self, X: TensorLike) -> TensorVariable:
 
 class Gibbs(Covariance):
     r"""
-    The Gibbs kernel.  Use an arbitrary lengthscale function defined
-    using PyTensor.  Only tested in one dimension.
+    The Gibbs kernel.
+
+    Use an arbitrary lengthscale function defined using PyTensor.
+    Only tested in one dimension.
 
     .. math::
        k(x, x') = \sqrt{\frac{2\ell(x)\ell(x')}{\ell^2(x) + \ell^2(x')}}
@@ -1039,9 +1039,9 @@ def diag(self, X: TensorLike) -> TensorVariable:
 
 class ScaledCov(Covariance):
     r"""
-    Construct a kernel by multiplying a base kernel with a scaling
-    function defined using PyTensor.  The scaling function is
-    non-negative, and can be parameterized.
+    Construct a kernel by multiplying a base kernel with a scaling function defined using PyTensor.
+
+    The scaling function is non-negative, and can be parameterized.
 
     .. math::
        k(x, x') = \phi(x) k_{\text{base}}(x, x') \phi(x')
@@ -1091,6 +1091,7 @@ def full(self, X: TensorLike, Xs: TensorLike | None = None) -> TensorVariable:
 
 class Coregion(Covariance):
     r"""Covariance function for intrinsic/linear coregionalization models.
+
     Adapted from GPy http://gpy.readthedocs.io/en/deploy/GPy.kern.src.html#GPy.kern.src.coregionalize.Coregionalize.
 
     This covariance has the form:
diff --git a/pymc/gp/gp.py b/pymc/gp/gp.py
index 3445d355377..e08ebffbed6 100644
--- a/pymc/gp/gp.py
+++ b/pymc/gp/gp.py
@@ -177,8 +177,7 @@ def _build_prior(
 
     def prior(self, name, X, n_outputs=1, reparameterize=True, jitter=JITTER_DEFAULT, **kwargs):
         R"""
-        Return the GP prior distribution evaluated over the input
-        locations `X`.
+        Return the GP prior distribution evaluated over the input locations `X`.
 
         This is the prior probability over the space
         of functions described by its mean and covariance function.
@@ -250,8 +249,7 @@ def _build_conditional(self, Xnew, X, f, cov_total, mean_total, jitter):
 
     def conditional(self, name, Xnew, given=None, jitter=JITTER_DEFAULT, **kwargs):
         R"""
-        Return the conditional distribution evaluated over new input
-        locations `Xnew`.
+        Return the conditional distribution evaluated over new input locations `Xnew`.
 
         Given a set of function values `f` that
         the GP prior was over, the conditional distribution over a
@@ -350,8 +348,7 @@ def _build_prior(self, name, X, reparameterize=True, jitter=JITTER_DEFAULT, **kw
 
     def prior(self, name, X, reparameterize=True, jitter=JITTER_DEFAULT, **kwargs):
         R"""
-        Return the TP prior distribution evaluated over the input
-        locations `X`.
+        Return the TP prior distribution evaluated over the input locations `X`.
 
         This is the prior probability over the space
         of functions described by its mean and covariance function.
@@ -394,8 +391,7 @@ def _build_conditional(self, Xnew, X, f, jitter):
 
     def conditional(self, name, Xnew, jitter=JITTER_DEFAULT, **kwargs):
         R"""
-        Return the conditional distribution evaluated over new input
-        locations `Xnew`.
+        Return the conditional distribution evaluated over new input locations `Xnew`.
 
         Given a set of function values `f` that
         the TP prior was over, the conditional distribution over a
@@ -487,8 +483,7 @@ def marginal_likelihood(
         **kwargs,
     ):
         R"""
-        Return the marginal likelihood distribution, given the input
-        locations `X` and the data `y`.
+        Return the marginal likelihood distribution, given the input locations `X` and the data `y`.
 
         This is the integral over the product of the GP prior and a normal likelihood.
 
@@ -594,8 +589,7 @@ def conditional(
         self, name, Xnew, pred_noise=False, given=None, jitter=JITTER_DEFAULT, **kwargs
     ):
         R"""
-        Return the conditional distribution evaluated over new input
-        locations `Xnew`.
+        Return the conditional distribution evaluated over new input locations `Xnew`.
 
         Given a set of function values `f` that the GP prior was over, the
         conditional distribution over a set of new points, `f_*` is:
@@ -641,9 +635,9 @@ def predict(
         model=None,
     ):
         R"""
-        Return the mean vector and covariance matrix of the conditional
-        distribution as numpy arrays, given a `point`, such as the MAP
-        estimate or a sample from a `trace`.
+        Return mean and covariance of the conditional distribution given a `point`.
+
+        The `point` might be the MAP estimate or a sample from a trace.
 
         Parameters
         ----------
@@ -676,8 +670,7 @@ def predict(
 
     def _predict_at(self, Xnew, diag=False, pred_noise=False, given=None, jitter=JITTER_DEFAULT):
         R"""
-        Return the mean vector and covariance matrix of the conditional
-        distribution as symbolic variables.
+        Return symbolic mean and covariance of the conditional distribution.
 
         Parameters
         ----------
@@ -814,9 +807,10 @@ def marginal_likelihood(
         self, name, X, Xu, y, sigma=None, noise=None, jitter=JITTER_DEFAULT, **kwargs
     ):
         R"""
-        Return the approximate marginal likelihood distribution, given the input
-        locations `X`, inducing point locations `Xu`, data `y`, and white noise
-        standard deviations `sigma`.
+        Return the approximate marginal likelihood distribution.
+
+        This is given the input locations `X`, inducing point locations `Xu`,
+        data `y`, and white noise standard deviations `sigma`.
 
         Parameters
         ----------
@@ -906,8 +900,7 @@ def conditional(
         self, name, Xnew, pred_noise=False, given=None, jitter=JITTER_DEFAULT, **kwargs
     ):
         R"""
-        Return the approximate conditional distribution of the GP evaluated over
-        new input locations `Xnew`.
+        Return the approximate conditional distribution of the GP evaluated over new input locations `Xnew`.
 
         Parameters
         ----------
@@ -1019,8 +1012,7 @@ def _build_prior(self, name, Xs, jitter, **kwargs):
 
     def prior(self, name, Xs, jitter=JITTER_DEFAULT, **kwargs):
         """
-        Return the prior distribution evaluated over the input
-        locations `Xs`.
+        Return the prior distribution evaluated over the input locations `Xs`.
 
         Parameters
         ----------
@@ -1065,8 +1057,7 @@ def _build_conditional(self, Xnew, jitter):
 
     def conditional(self, name, Xnew, jitter=JITTER_DEFAULT, **kwargs):
         """
-        Return the conditional distribution evaluated over new input
-        locations `Xnew`.
+        Return the conditional distribution evaluated over new input locations `Xnew`.
 
         `Xnew` will be split by columns and fed to the relevant
         covariance functions based on their `input_dim`. For example, if
@@ -1188,8 +1179,7 @@ def _check_inputs(self, Xs, y):
 
     def marginal_likelihood(self, name, Xs, y, sigma, is_observed=True, **kwargs):
         """
-        Return the marginal likelihood distribution, given the input
-        locations `cartesian(*Xs)` and the data `y`.
+        Return the marginal likelihood distribution, given the input locations `cartesian(*Xs)` and the data `y`.
 
         Parameters
         ----------
@@ -1267,8 +1257,7 @@ def _build_conditional(self, Xnew, diag, pred_noise):
 
     def conditional(self, name, Xnew, pred_noise=False, diag=False, **kwargs):
         """
-        Return the conditional distribution evaluated over new input
-        locations `Xnew`, just as in `Marginal`.
+        Return the conditional distribution evaluated over new input locations `Xnew`, just as in `Marginal`.
 
         `Xnew` will be split by columns and fed to the relevant
         covariance functions based on their `input_dim`. For example, if
@@ -1303,9 +1292,9 @@ def conditional(self, name, Xnew, pred_noise=False, diag=False, **kwargs):
 
     def predict(self, Xnew, point=None, diag=False, pred_noise=False, model=None):
         R"""
-        Return the mean vector and covariance matrix of the conditional
-        distribution as numpy arrays, given a `point`, such as the MAP
-        estimate or a sample from a `trace`.
+        Return mean and covariance of the conditional distribution given a `point`.
+
+        The `point` might be the MAP estimate or a sample from a trace.
 
         Parameters
         ----------
@@ -1329,8 +1318,7 @@ def predict(self, Xnew, point=None, diag=False, pred_noise=False, model=None):
 
     def _predict_at(self, Xnew, diag=False, pred_noise=False):
         R"""
-        Return the mean vector and covariance matrix of the conditional
-        distribution as symbolic variables.
+        Return symbolic mean and covariance of the conditional distribution.
 
         Parameters
         ----------
diff --git a/pymc/gp/hsgp_approx.py b/pymc/gp/hsgp_approx.py
index abb56adab28..73e80dfd67c 100644
--- a/pymc/gp/hsgp_approx.py
+++ b/pymc/gp/hsgp_approx.py
@@ -31,8 +31,10 @@
 
 
 def set_boundary(X: TensorLike, c: numbers.Real | TensorLike) -> np.ndarray:
-    """Set the boundary using `X` and `c`.  `X` can be centered around zero but doesn't have to be,
-    and `c` is usually a scalar multiplier greater than 1.0, but it may also be one value per
+    """Set the boundary using `X` and `c`.
+
+    `X` can be centered around zero but doesn't have to be, and `c` is usually
+    a scalar multiplier greater than 1.0, but it may also be one value per
     dimension or column of `X`.
     """
     # compute radius. Works whether X is 0-centered or not
@@ -56,8 +58,9 @@ def calc_eigenvectors(
     eigvals: TensorLike,
     m: Sequence[int],
 ):
-    """Calculate eigenvectors of the Laplacian. These are used as basis vectors in the HSGP
-    approximation.
+    """Calculate eigenvectors of the Laplacian.
+
+    These are used as basis vectors in the HSGP approximation.
     """
     m_star = int(np.prod(m))
 
@@ -79,6 +82,7 @@ def calc_basis_periodic(
 ):
     """
     Calculate basis vectors for the cosine series expansion of the periodic covariance function.
+
     These are derived from the Taylor series representation of the covariance.
     """
     w0 = (2 * np.pi) / period  # angular frequency defining the periodicity
@@ -323,14 +327,18 @@ def L(self, value: TensorLike):
         self._L = pt.as_tensor_variable(value)
 
     def prior_linearized(self, X: TensorLike):
-        """Linearized version of the HSGP.  Returns the Laplace eigenfunctions and the square root
+        """Linearized version of the HSGP.
+
+        Returns the Laplace eigenfunctions and the square root
         of the power spectral density needed to create the GP.
 
-        This function allows the user to bypass the GP interface and work with the basis
-        and coefficients directly.  This format allows the user to create predictions using
-        `pm.set_data` similarly to a linear model.  It also enables computational speed ups in
-        multi-GP models, since they may share the same basis.  The return values are the Laplace
-        eigenfunctions `phi`, and the square root of the power spectral density.
+        This function allows the user to bypass the GP interface and work with
+        the basis and coefficients directly.  This format allows the user to
+        create predictions using `pm.set_data` similarly to a linear model.  It
+        also enables computational speed ups in multi-GP models, since they may
+        share the same basis.  The return values are the Laplace eigenfunctions
+        `phi`, and the square root of the power spectral density.
+
         An example is given below.
 
         Parameters
@@ -427,6 +435,7 @@ def prior(
     ):  # type: ignore
         R"""
         Return the (approximate) GP prior distribution evaluated over the input locations `X`.
+
         For usage examples, refer to `pm.gp.Latent`.
 
         Parameters
@@ -489,8 +498,7 @@ def _build_conditional(self, Xnew):
 
     def conditional(self, name: str, Xnew: TensorLike, dims: str | None = None):  # type: ignore
         R"""
-        Return the (approximate) conditional distribution evaluated over new input locations
-        `Xnew`.
+        Return the (approximate) conditional distribution evaluated over new input locations `Xnew`.
 
         Parameters
         ----------
@@ -599,16 +607,21 @@ def __init__(
         super().__init__(mean_func=mean_func, cov_func=cov_func)
 
     def prior_linearized(self, X: TensorLike):
-        """Linearized version of the approximation. Returns the cosine and sine bases and coefficients
+        """Linearized version of the approximation.
+
+        Returns the cosine and sine bases and coefficients
         of the expansion needed to create the GP.
 
-        This function allows the user to bypass the GP interface and work directly with the basis
-        and coefficients directly.  This format allows the user to create predictions using
-        `pm.set_data` similarly to a linear model.  It also enables computational speed ups in
-        multi-GP models since they may share the same basis.
+        This function allows the user to bypass the GP interface and work
+        directly with the basis and coefficients directly.  This format allows
+        the user to create predictions using `pm.set_data` similarly to a linear
+        model.  It also enables computational speed ups in multi-GP models since
+        they may share the same basis.
+
+        Correct results when using `prior_linearized` in tandem with
+        `pm.set_data` and `pm.MutableData` require that the `Xs` are
+        zero-centered, so its mean must be subtracted.
 
-        Correct results when using `prior_linearized` in tandem with `pm.set_data` and
-        `pm.MutableData` require that the `Xs` are zero-centered, so it's mean must be subtracted.
         An example is given below.
 
         Parameters
@@ -685,6 +698,7 @@ def prior_linearized(self, X: TensorLike):
     def prior(self, name: str, X: TensorLike, dims: str | None = None):  # type: ignore
         R"""
         Return the (approximate) GP prior distribution evaluated over the input locations `X`.
+
         For usage examples, refer to `pm.gp.Latent`.
 
         Parameters
@@ -735,8 +749,7 @@ def _build_conditional(self, Xnew):
 
     def conditional(self, name: str, Xnew: TensorLike, dims: str | None = None):  # type: ignore
         R"""
-        Return the (approximate) conditional distribution evaluated over new input locations
-        `Xnew`.
+        Return the (approximate) conditional distribution evaluated over new input locations `Xnew`.
 
         Parameters
         ----------
diff --git a/pymc/gp/util.py b/pymc/gp/util.py
index b7d074baa70..b2d7447b1c7 100644
--- a/pymc/gp/util.py
+++ b/pymc/gp/util.py
@@ -31,6 +31,7 @@
 def replace_with_values(vars_needed, replacements=None, model=None):
     R"""
     Replace random variable nodes in the graph with values given by the replacements dict.
+
     Uses untransformed versions of the inputs, performs some basic input validation.
 
     Parameters
@@ -94,8 +95,7 @@ def stabilize(K, jitter=JITTER_DEFAULT):
 
 def kmeans_inducing_points(n_inducing, X, **kmeans_kwargs):
     R"""
-    Use the K-means algorithm to initialize the locations `X` for the inducing
-    points `fu`.
+    Use the K-means algorithm to initialize the locations `X` for the inducing points `fu`.
 
     Parameters
     ----------
diff --git a/pymc/initial_point.py b/pymc/initial_point.py
index 4c40d171cc6..15f4f887c0b 100644
--- a/pymc/initial_point.py
+++ b/pymc/initial_point.py
@@ -35,7 +35,9 @@
 def convert_str_to_rv_dict(
     model, start: StartDict
 ) -> dict[TensorVariable, np.ndarray | Variable | str | None]:
-    """Convert a user-provided start dict with str keys of (transformed) variable names
+    """Convert a user-provided start dict to an untransformed RV start dict.
+
+    Converts a dict of str keys of (transformed) variable names
     to a dict mapping the RV tensors to untransformed initvals.
 
     TODO: Deprecate this functionality and only accept TensorVariables as keys.
diff --git a/pymc/logprob/basic.py b/pymc/logprob/basic.py
index 138d24d013e..2e62660cbdd 100644
--- a/pymc/logprob/basic.py
+++ b/pymc/logprob/basic.py
@@ -414,8 +414,7 @@ def conditional_logp(
     extra_rewrites: GraphRewriter | NodeRewriter | None = None,
     **kwargs,
 ) -> dict[TensorVariable, TensorVariable]:
-    r"""Create a map between variables and conditional log-probabilities
-    such that the sum is their joint log-probability.
+    r"""Create a map between variables and conditional logps such that the sum is their joint logp.
 
     The `rv_values` dictionary specifies a joint probability graph defined by
     pairs of random variables and respective measure-space input parameters
diff --git a/pymc/logprob/transform_value.py b/pymc/logprob/transform_value.py
index f9c6f720443..f093ddbf205 100644
--- a/pymc/logprob/transform_value.py
+++ b/pymc/logprob/transform_value.py
@@ -209,7 +209,8 @@ def __init__(
         self,
         values_to_transforms: dict[TensorVariable, Transform | None],
     ):
-        """
+        """Create the rewriter.
+
         Parameters
         ----------
         values_to_transforms
diff --git a/pymc/logprob/transforms.py b/pymc/logprob/transforms.py
index c7d1bde7706..41233223b46 100644
--- a/pymc/logprob/transforms.py
+++ b/pymc/logprob/transforms.py
@@ -136,8 +136,9 @@ def forward(self, value: TensorVariable, *inputs: Variable) -> TensorVariable:
     def backward(
         self, value: TensorVariable, *inputs: Variable
     ) -> TensorVariable | tuple[TensorVariable, ...]:
-        """Invert the transformation. Multiple values may be returned when the
-        transformation is not 1-to-1.
+        """Invert the transformation.
+
+        Multiple values may be returned when the transformation is not 1-to-1.
         """
 
     def log_jac_det(self, value: TensorVariable, *inputs) -> TensorVariable:
@@ -847,7 +848,7 @@ class IntervalTransform(Transform):
     name = "interval"
 
     def __init__(self, args_fn: Callable[..., tuple[Variable | None, Variable | None]]):
-        """
+        """Create the IntervalTransform object.
 
         Parameters
         ----------
diff --git a/pymc/math.py b/pymc/math.py
index 705bb8df07e..3aba3931405 100644
--- a/pymc/math.py
+++ b/pymc/math.py
@@ -184,8 +184,9 @@
 
 
 def kronecker(*Ks):
-    r"""Return the Kronecker product of arguments:
-          :math:`K_1 \otimes K_2 \otimes ... \otimes K_D`.
+    r"""Return the Kronecker product of arguments.
+
+    math:`K_1 \otimes K_2 \otimes ... \otimes K_D`
 
     Parameters
     ----------
@@ -330,6 +331,7 @@ def log1mexp(x, *, negative_input=False):
 
 def log1mexp_numpy(x, *, negative_input=False):
     """Return log(1 - exp(x)).
+
     This function is numerically more stable than the naive approach.
 
     For details, see
@@ -364,9 +366,9 @@ def flatten_list(tensors):
 
 
 class LogDet(Op):
-    r"""Compute the logarithm of the absolute determinant of a square
-    matrix M, log(abs(det(M))) on the CPU. Avoids det(M) overflow/
-    underflow.
+    r"""Compute the logarithm of the absolute determinant of a square matrix M, log(abs(det(M))) on the CPU.
+
+    Avoids det(M) overflow/underflow.
 
     Notes
     -----
@@ -508,8 +510,7 @@ def batched_diag(C):
 
 
 def block_diagonal(matrices, sparse=False, format="csr"):
-    r"""See pt.slinalg.block_diag or
-    pytensor.sparse.basic.block_diag for reference.
+    r"""See pt.slinalg.block_diag or pytensor.sparse.basic.block_diag for reference.
 
     Parameters
     ----------
diff --git a/pymc/model/core.py b/pymc/model/core.py
index 097327f9cfe..588fb3f5cf2 100644
--- a/pymc/model/core.py
+++ b/pymc/model/core.py
@@ -96,9 +96,7 @@
 
 
 class ContextMeta(type):
-    """Functionality for objects that put themselves in a context using
-    the `with` statement.
-    """
+    """Functionality for objects that put themselves in a context manager."""
 
     def __new__(cls, name, bases, dct, **kwargs):
         """Add __enter__ and __exit__ methods to the class."""
@@ -128,9 +126,9 @@ def __init__(cls, name, bases, nmspc, context_class: type | None = None, **kwarg
         super().__init__(name, bases, nmspc)
 
     def get_context(cls, error_if_none=True, allow_block_model_access=False) -> T | None:
-        """Return the most recently pushed context object of type ``cls``
-        on the stack, or ``None``. If ``error_if_none`` is True (default),
-        raise a ``TypeError`` instead of returning ``None``.
+        """Return the most recently pushed context object of type ``cls`` on the stack, or ``None``.
+
+        If ``error_if_none`` is True (default), raise a ``TypeError`` instead of returning ``None``.
         """
         try:
             candidate: T | None = cls.get_contexts()[-1]
@@ -145,9 +143,7 @@ def get_context(cls, error_if_none=True, allow_block_model_access=False) -> T |
         return candidate
 
     def get_contexts(cls) -> list[T]:
-        """Return a stack of context instances for the ``context_class``
-        of ``cls``.
-        """
+        """Return a stack of context instances for the ``context_class`` of ``cls``."""
         # This lazily creates the context class's contexts
         # thread-local object, as needed. This seems inelegant to me,
         # but since the context class is not guaranteed to exist when
@@ -208,10 +204,7 @@ def __call__(cls, *args, **kwargs):
 
 
 def modelcontext(model: Optional["Model"]) -> "Model":
-    """
-    Return the given model or, if none was supplied, try to find one in
-    the context stack.
-    """
+    """Return the given model or, if None was supplied, try to find one in the context stack."""
     if model is None:
         model = Model.get_context(error_if_none=False)
 
@@ -855,23 +848,17 @@ def d2logp(
 
     @property
     def datalogp(self) -> Variable:
-        """PyTensor scalar of log-probability of the observed variables and
-        potential terms.
-        """
+        """PyTensor scalar of log-probability of the observed variables and potential terms."""
         return self.observedlogp + self.potentiallogp
 
     @property
     def varlogp(self) -> Variable:
-        """PyTensor scalar of log-probability of the unobserved random variables
-        (excluding deterministic).
-        """
+        """PyTensor scalar of log-probability of the unobserved random variables (excluding deterministic)."""
         return self.logp(vars=self.free_RVs)
 
     @property
     def varlogp_nojac(self) -> Variable:
-        """PyTensor scalar of log-probability of the unobserved random variables
-        (excluding deterministic) without jacobian term.
-        """
+        """PyTensor scalar of log-probability of the unobserved random variables (excluding deterministic) without jacobian term."""
         return self.logp(vars=self.free_RVs, jacobian=False)
 
     @property
@@ -892,17 +879,12 @@ def potentiallogp(self) -> Variable:
 
     @property
     def value_vars(self):
-        """List of unobserved random variables used as inputs to the model's
-        log-likelihood (which excludes deterministics).
-        """
+        """List of unobserved random variables used as inputs to the model's log-likelihood (which excludes deterministics)."""
         return [self.rvs_to_values[v] for v in self.free_RVs]
 
     @property
     def unobserved_value_vars(self):
-        """List of all random variables (including untransformed projections),
-        as well as deterministics used as inputs and outputs of the model's
-        log-likelihood graph.
-        """
+        """List of all random variables (including untransformed projections), as well as deterministics used as inputs and outputs of the model's log-likelihood graph."""
         vars = []
         transformed_rvs = []
         for rv in self.free_RVs:
@@ -932,8 +914,9 @@ def continuous_value_vars(self):
 
     @property
     def basic_RVs(self):
-        """List of random variables the model is defined in terms of
-        (which excludes deterministics).
+        """List of random variables the model is defined in terms of.
+
+        This excludes deterministics.
 
         These are the actual random variable terms that make up the
         "sample-space" graph (i.e. you can sample these graphs by compiling them
@@ -1426,8 +1409,7 @@ def create_value_var(
         transform: Transform,
         value_var: Variable | None = None,
     ) -> TensorVariable:
-        """Create a ``TensorVariable`` that will be used as the random
-        variable's "value" in log-likelihood graphs.
+        """Create a ``TensorVariable`` that will be used as the random variable's "value" in log-likelihood graphs.
 
         In general, we'll call this type of variable the "value" variable.
 
@@ -1716,8 +1698,7 @@ def compile_fn(
         return fn
 
     def profile(self, outs, *, n=1000, point=None, profile=True, **kwargs):
-        """Compiles and profiles an PyTensor function which returns ``outs`` and
-        takes values of model vars as a dict as an argument.
+        """Compile and profile a PyTensor function which returns ``outs`` and takes values of model vars as a dict as an argument.
 
         Parameters
         ----------
@@ -1789,8 +1770,7 @@ def eval_rv_shapes(self) -> dict[str, tuple[int, ...]]:
         return {name: tuple(shape) for name, shape in zip(names, f())}
 
     def check_start_vals(self, start, **kwargs):
-        r"""Check that the starting values for MCMC do not cause the relevant log probability
-        to evaluate to something invalid (e.g. Inf or NaN).
+        r"""Check that the logp is defined and finite at the starting point.
 
         Parameters
         ----------
@@ -2115,9 +2095,11 @@ def new_or_existing_block_model_access(*args, **kwargs):
 
 
 def set_data(new_data, model=None, *, coords=None):
-    """Set the value of one or more data container variables.  Note that the shape is also
-    dynamic, it is updated when the value is changed.  See the examples below for two common
-    use-cases that take advantage of this behavior.
+    """Set the value of one or more data container variables.
+
+    Note that the shape is also dynamic, it is updated when the value is
+    changed.  See the examples below for two common use-cases that take
+    advantage of this behavior.
 
     Parameters
     ----------
@@ -2226,7 +2208,9 @@ def compile_fn(
 
 
 def Point(*args, filter_model_vars=False, **kwargs) -> dict[VarName, np.ndarray]:
-    """Build a point. Uses same args as dict() does.
+    """Build a point.
+
+    Uses same args as dict() does.
     Filters out variables not in the model. All keys are strings.
 
     Parameters
diff --git a/pymc/model/fgraph.py b/pymc/model/fgraph.py
index 6f1a1846cd4..78ad61306e3 100644
--- a/pymc/model/fgraph.py
+++ b/pymc/model/fgraph.py
@@ -30,9 +30,7 @@
 
 
 class ModelVar(Op):
-    """A dummy Op that describes the purpose of a Model variable and contains
-    meta-information as additional inputs (value and dims).
-    """
+    """A dummy Op that describes the purpose of a Model variable and contains meta-information as additional inputs (value and dims)."""
 
     def make_node(self, rv, *dims):
         assert isinstance(rv, Variable)
diff --git a/pymc/printing.py b/pymc/printing.py
index f90f7564a3b..946a8a213b6 100644
--- a/pymc/printing.py
+++ b/pymc/printing.py
@@ -37,8 +37,10 @@
 def str_for_dist(
     dist: TensorVariable, formatting: str = "plain", include_params: bool = True
 ) -> str:
-    """Make a human-readable string representation of a Distribution in a model, either
-    LaTeX or plain, optionally with distribution parameter values included.
+    """Make a human-readable string representation of a Distribution in a model.
+
+    This can be either LaTeX or plain, optionally with distribution parameter
+    values included.
     """
     if include_params:
         if isinstance(dist.owner.op, RandomVariable) or getattr(
@@ -98,8 +100,10 @@ def str_for_dist(
 
 
 def str_for_model(model: Model, formatting: str = "plain", include_params: bool = True) -> str:
-    """Make a human-readable string representation of Model, listing all random variables
-    and their distributions, optionally including parameter values.
+    """Make a human-readable string representation of Model.
+
+    This lists all random variables and their distributions, optionally
+    including parameter values.
     """
     # Wrap functions to avoid confusing typecheckers
     sfd = partial(str_for_dist, formatting=formatting, include_params=include_params)
@@ -146,8 +150,10 @@ def str_for_potential_or_deterministic(
     include_params: bool = True,
     dist_name: str = "Deterministic",
 ) -> str:
-    """Make a human-readable string representation of a Deterministic or Potential in a model, either
-    LaTeX or plain, optionally with distribution parameter values included.
+    """Make a human-readable string representation of a Deterministic or Potential in a model.
+
+    This can be either LaTeX or plain, optionally with distribution parameter
+    values included.
     """
     print_name = var.name if var.name is not None else "<unnamed>"
     if "latex" in formatting:
diff --git a/pymc/pytensorf.py b/pymc/pytensorf.py
index 0af861e7282..6df6eaea1cf 100644
--- a/pymc/pytensorf.py
+++ b/pymc/pytensorf.py
@@ -470,8 +470,7 @@ def join_nonshared_inputs(
     make_inputs_shared: bool = False,
 ) -> tuple[list[TensorVariable], TensorVariable]:
     """
-    Create new outputs and input TensorVariables where the non-shared inputs are joined
-    in a single raveled vector input.
+    Create new outputs and input TensorVariables where the non-shared inputs are joined in a single raveled vector input.
 
     Parameters
     ----------
@@ -634,9 +633,7 @@ def __call__(self, state):
 
 
 class CallableTensor:
-    """Turns a symbolic variable with one input into a function that returns symbolic arguments
-    with the one variable replaced with the input.
-    """
+    """Turns a symbolic variable with one input into a function that returns symbolic arguments with the one variable replaced with the input."""
 
     def __init__(self, tensor):
         self.tensor = tensor
@@ -1087,9 +1084,7 @@ def constant_fold(
 
 
 def rewrite_pregrad(graph):
-    """Apply simplifying or stabilizing rewrites to graph that are safe to use
-    pre-grad.
-    """
+    """Apply simplifying or stabilizing rewrites to graph that are safe to use pre-grad."""
     return rewrite_graph(graph, include=("canonicalize", "stabilize"))
 
 
diff --git a/pymc/sampling/parallel.py b/pymc/sampling/parallel.py
index 191111101c8..6e19c4aeb0a 100644
--- a/pymc/sampling/parallel.py
+++ b/pymc/sampling/parallel.py
@@ -82,6 +82,7 @@ def rebuild_exc(exc, tb):
 
 class _Process:
     """Separate process for each chain.
+
     We communicate with the main process using a pipe,
     and send finished samples using shared memory.
     """
@@ -277,9 +278,7 @@ def __init__(
 
     @property
     def shared_point_view(self):
-        """May only be written to or read between a `recv_draw`
-        call from the process and a `write_next` or `abort` call.
-        """
+        """May only be written to or read between a `recv_draw` call from the process and a `write_next` or `abort` call."""
         if not self._readable:
             raise RuntimeError()
         return self._point
diff --git a/pymc/smc/kernels.py b/pymc/smc/kernels.py
index 5de78d0cbbd..608454ef3ce 100644
--- a/pymc/smc/kernels.py
+++ b/pymc/smc/kernels.py
@@ -364,6 +364,8 @@ class IMH(SMC_KERNEL):
 
     def __init__(self, *args, correlation_threshold=0.01, **kwargs):
         """
+        Create the Independent Metropolis-Hastings SMC kernel object.
+
         Parameters
         ----------
         correlation_threshold : float, default 0.01
@@ -470,6 +472,8 @@ class MH(SMC_KERNEL):
 
     def __init__(self, *args, correlation_threshold=0.01, **kwargs):
         """
+        Create a Metropolis-Hastings SMC kernel.
+
         Parameters
         ----------
         correlation_threshold : float, default 0.01
@@ -489,6 +493,7 @@ def __init__(self, *args, correlation_threshold=0.01, **kwargs):
 
     def setup_kernel(self):
         """Proposal dist is just a Multivariate Normal with unit identity covariance.
+
         Dimension specific scaling is provided by `self.proposal_scales` and set in `self.tune()`.
         """
         ndim = self.tempered_posterior.shape[1]
diff --git a/pymc/step_methods/arraystep.py b/pymc/step_methods/arraystep.py
index bddf02f1551..b7da80aee02 100644
--- a/pymc/step_methods/arraystep.py
+++ b/pymc/step_methods/arraystep.py
@@ -75,8 +75,10 @@ def astep(self, apoint: RaveledVars, *args) -> tuple[RaveledVars, StatsType]:
 
 
 class ArrayStepShared(BlockedStep):
-    """Faster version of ArrayStep that requires the substep method that does not wrap
-       the functions the step method uses.
+    """Faster version of ArrayStep.
+
+    It requires the substep method that does not wrap the functions the step
+    method uses.
 
     Works by setting shared variables before using the step. This eliminates the mapping
     and unmapping overhead as well as moving fewer variables around.
@@ -84,6 +86,8 @@ class ArrayStepShared(BlockedStep):
 
     def __init__(self, vars, shared, blocked=True, rng: RandomGenerator = None):
         """
+        Create the ArrayStepShared object.
+
         Parameters
         ----------
         vars: list of sampling value variables
@@ -122,14 +126,15 @@ def astep(self, q0: RaveledVars) -> tuple[RaveledVars, StatsType]:
 
 
 class PopulationArrayStepShared(ArrayStepShared):
-    """Version of ArrayStepShared that allows samplers to access the states
-    of other chains in the population.
+    """Version of ArrayStepShared that allows samplers to access the states of other chains in the population.
 
     Works by linking a list of Points that is updated as the chains are iterated.
     """
 
     def __init__(self, vars, shared, blocked=True, rng: RandomGenerator = None):
         """
+        Create the PopulationArrayStepShared object.
+
         Parameters
         ----------
         vars: list of sampling value variables
diff --git a/pymc/step_methods/compound.py b/pymc/step_methods/compound.py
index 38effcb134c..253e0bd0447 100644
--- a/pymc/step_methods/compound.py
+++ b/pymc/step_methods/compound.py
@@ -234,9 +234,7 @@ def __init__(self, methods: list[StepMethodState]):
 
 
 class CompoundStep(WithSamplingState):
-    """Step method composed of a list of several other step
-    methods applied in sequence.
-    """
+    """Step method composed of a list of several other step methods applied in sequence."""
 
     _state_class = CompoundStepState
 
diff --git a/pymc/step_methods/metropolis.py b/pymc/step_methods/metropolis.py
index e0101b8fe3a..15b3a3b2bb4 100644
--- a/pymc/step_methods/metropolis.py
+++ b/pymc/step_methods/metropolis.py
@@ -323,8 +323,9 @@ def competence(var, has_grad):
 
 def tune(scale, acc_rate):
     """
-    Tunes the scaling parameter for the proposal distribution
-    according to the acceptance rate over the last tune_interval.
+    Tune the scaling parameter for the proposal distribution.
+
+    Uses the acceptance rate over the last tune_interval.
 
     Rate    Variance adaptation
     ----    -------------------
@@ -456,10 +457,7 @@ def astep(self, apoint: RaveledVars, *args) -> tuple[RaveledVars, StatsType]:
 
     @staticmethod
     def competence(var):
-        """
-        BinaryMetropolis is only suitable for binary (bool)
-        and Categorical variables with k=1.
-        """
+        """BinaryMetropolis is only suitable for binary (bool) and Categorical variables with k=1."""
         distribution = getattr(var.owner, "op", None)
 
         if isinstance(distribution, BernoulliRV):
@@ -578,10 +576,7 @@ def astep(self, apoint: RaveledVars, *args) -> tuple[RaveledVars, StatsType]:
 
     @staticmethod
     def competence(var):
-        """
-        BinaryMetropolis is only suitable for Bernoulli
-        and Categorical variables with k=2.
-        """
+        """BinaryMetropolis is only suitable for Bernoulli and Categorical variables with k=2."""
         distribution = getattr(var.owner, "op", None)
 
         if isinstance(distribution, BernoulliRV):
@@ -755,10 +750,7 @@ def metropolis_proportional(self, q, logp, logp_curr, dim, k):
 
     @staticmethod
     def competence(var):
-        """
-        CategoricalGibbsMetropolis is only suitable for Bernoulli and
-        Categorical variables.
-        """
+        """CategoricalGibbsMetropolis is only suitable for Bernoulli and Categorical variables."""
         distribution = getattr(var.owner, "op", None)
 
         if isinstance(distribution, CategoricalRV):
@@ -1135,8 +1127,9 @@ def astep(self, q0: RaveledVars) -> tuple[RaveledVars, StatsType]:
         return RaveledVars(q_new, point_map_info), [stats]
 
     def stop_tuning(self):
-        """At the end of the tuning phase, this method removes the first x% of the history
-        so future proposals are not informed by unconverged tuning iterations.
+        """Remove the first x% of the history at the end of the tuning phase.
+
+        This is so future proposals are not informed by unconverged tuning iterations.
         """
         it = len(self._history)
         n_drop = int(self.tune_drop_fraction * it)
diff --git a/pymc/testing.py b/pymc/testing.py
index d4c625edcaf..943e2355e96 100644
--- a/pymc/testing.py
+++ b/pymc/testing.py
@@ -314,8 +314,7 @@ def check_logp(
     skip_paramdomain_outside_edge_test: bool = False,
 ) -> None:
     """
-    Test PyMC logp and equivalent scipy logpmf/logpdf methods give similar
-    results for valid values and parameters inside the supported edges.
+    Test PyMC logp and equivalent scipy logpmf/logpdf methods give similar results for valid values and parameters inside the supported edges.
 
     Edges are excluded by default, but can be artificially included by
     creating a domain with repeated values (e.g., `Domain([0, 0, .5, 1, 1]`)
@@ -423,8 +422,7 @@ def check_logcdf(
     skip_paramdomain_outside_edge_test: bool = False,
 ) -> None:
     """
-    Test PyMC logcdf and equivalent scipy logcdf methods give similar
-    results for valid values and parameters inside the supported edges.
+    Test PyMC logcdf and equivalent scipy logcdf methods give similar results for valid values and parameters inside the supported edges.
 
     The following tests are performed by default:
         1. Test PyMC logcdf and equivalent scipy logcdf methods give similar
@@ -539,8 +537,7 @@ def check_icdf(
     n_samples: int = 100,
 ) -> None:
     """
-    Test PyMC icdf and equivalent scipy icdf methods give similar
-    results for valid values and parameters inside the supported edges.
+    Test PyMC icdf and equivalent scipy icdf methods give similar results for valid values and parameters inside the supported edges.
 
     The following tests are performed by default:
         1. Test PyMC icdf and equivalent scipy icdf (ppf) methods give similar
@@ -798,8 +795,9 @@ def discrete_random_tester(
 
 class BaseTestDistributionRandom:
     """
-    Base class for tests that new RandomVariables are correctly
-    implemented, and that the mapping of parameters between the PyMC
+    Base class for tests that new RandomVariables are correctly implemented.
+
+    Also checks that the mapping of parameters between the PyMC
     Distribution and the respective RandomVariable is correct.
 
     Three default tests are provided which check:
diff --git a/pymc/util.py b/pymc/util.py
index 8f952e589cd..41520587cfc 100644
--- a/pymc/util.py
+++ b/pymc/util.py
@@ -87,8 +87,8 @@ def wrapped(self, *args, **kwargs):
 
 
 class treelist(list):
-    """A list that passes mutable extending operations used in Model
-    to parent list instance.
+    """A list that passes mutable extending operations used in Model to parent list instance.
+
     Extending treelist you will also extend its parent.
     """
 
@@ -135,8 +135,8 @@ def __imul__(self, other) -> "treelist":
 
 
 class treedict(dict):
-    """A dict that passes mutable extending operations used in Model
-    to parent dict instance.
+    """A dict that passes mutable extending operations used in Model to parent dict instance.
+
     Extending treedict you will also extend its parent.
     """
 
@@ -301,7 +301,8 @@ def chains_and_samples(data: xarray.Dataset | arviz.InferenceData) -> tuple[int,
 
 def hashable(a=None) -> int:
     """
-    Hashes many kinds of objects, including some that are unhashable through the builtin `hash` function.
+    Hash many kinds of objects, including some that are unhashable through the builtin `hash` function.
+
     Lists and tuples are hashed based on their elements.
     """
     if isinstance(a, dict):
@@ -395,8 +396,10 @@ def check_dist_not_registered(dist, model=None):
 
 
 def point_wrapper(core_function):
-    """Wrap an pytensor compiled function to be able to ingest point dictionaries whilst
-    ignoring the keys that are not valid inputs to the core function.
+    """
+    Wrap a pytensor compiled function to ingest point dictionaries.
+
+    It ignores the keys that are not valid inputs to the core function.
     """
     ins = [i.name for i in core_function.maker.fgraph.inputs if not isinstance(i, SharedVariable)]
 
diff --git a/pymc/variational/approximations.py b/pymc/variational/approximations.py
index c382763bcb6..61940418b1c 100644
--- a/pymc/variational/approximations.py
+++ b/pymc/variational/approximations.py
@@ -40,10 +40,12 @@
 
 @Group.register
 class MeanFieldGroup(Group):
-    R"""Mean Field approximation to the posterior where spherical Gaussian family
-    is fitted to minimize KL divergence from True posterior. It is assumed
-    that latent space variables are uncorrelated that is the main drawback
-    of the method.
+    """Mean Field approximation to the posterior.
+
+    Spherical Gaussian family is fitted to minimize KL divergence from posterior.
+
+    It is assumed that latent space variables are uncorrelated that is the main
+    drawback of the method.
     """
 
     __param_spec__ = {"mu": ("d",), "rho": ("d",)}
@@ -116,10 +118,12 @@ def symbolic_logq_not_scaled(self):
 
 @Group.register
 class FullRankGroup(Group):
-    """Full Rank approximation to the posterior where Multivariate Gaussian family
-    is fitted to minimize KL divergence from True posterior. In contrast to
-    MeanField approach correlations between variables are taken in account. The
-    main drawback of the method is computational cost.
+    """Full Rank approximation to the posterior.
+
+    Multivariate Gaussian family is fitted to minimize KL divergence from posterior.
+
+    In contrast to MeanField approach, correlations between variables are taken
+    into account. The main drawback of the method is its computational cost.
     """
 
     __param_spec__ = {"mu": ("d",), "L_tril": ("int(d * (d + 1) / 2)",)}
@@ -188,8 +192,9 @@ def symbolic_random(self):
 
 @Group.register
 class EmpiricalGroup(Group):
-    """Builds Approximation instance from a given trace,
-    it has the same interface as variational approximation.
+    """Builds Approximation instance from a given trace.
+
+    It has the same interface as variational approximation.
     """
 
     has_logq = False
diff --git a/pymc/variational/opvi.py b/pymc/variational/opvi.py
index 9458c593875..b07b9ded840 100644
--- a/pymc/variational/opvi.py
+++ b/pymc/variational/opvi.py
@@ -12,7 +12,8 @@
 #   See the License for the specific language governing permissions and
 #   limitations under the License.
 
-R"""
+R"""Operational Variational Inference.
+
 Variational inference is a great approach for doing really complex,
 often intractable Bayesian inference in approximate form. Common methods
 (e.g. ADVI) lack from complexity so that approximate posterior does not
@@ -219,8 +220,7 @@ def updates(
         more_replacements=None,
         total_grad_norm_constraint=None,
     ):
-        """Calculate gradients for objective function, test function and then
-        constructs updates for optimization step.
+        """Construct updates for optimization step after calculating gradients.
 
         Parameters
         ----------
@@ -776,7 +776,8 @@ def get_param_spec_for(cls, **kwargs):
         return res
 
     def _check_user_params(self, **kwargs):
-        R"""*Dev* - checks user params, allocates them if they are correct, returns True.
+        R"""*Dev* - check user params, if correct allocate them and return True.
+
         If they are not present, returns False.
 
         Parameters
@@ -967,8 +968,7 @@ def _new_initial(self, size, deterministic, more_replacements=None):
 
     @node_property
     def symbolic_random(self):
-        """*Dev* - abstract node that takes `self.symbolic_initial` and creates
-        approximate posterior that is parametrized with `self.params_dict`.
+        """*Dev* - abstract node that takes `self.symbolic_initial` and creates approximate posterior that is parametrized with `self.params_dict`.
 
         Implementation should take in account `self.batched`. If `self.batched` is `True`, then
         `self.symbolic_initial` is 3d tensor, else 2d
@@ -993,8 +993,7 @@ def set_size_and_deterministic(
     def set_size_and_deterministic(
         self, node: Variable | list[Variable], s, d: bool, more_replacements: dict | None = None
     ) -> Variable | list[Variable]:
-        """*Dev* - after node is sampled via :func:`symbolic_sample_over_posterior` or
-        :func:`symbolic_single_sample` new random generator can be allocated and applied to node.
+        """*Dev* - after node is sampled via :func:`symbolic_sample_over_posterior` or :func:`symbolic_single_sample` new random generator can be allocated and applied to node.
 
         Parameters
         ----------
@@ -1025,7 +1024,8 @@ def to_flat_input(self, node):
         return graph_replace(node, self.replacements, strict=False)
 
     def symbolic_sample_over_posterior(self, node):
-        """*Dev* - performs sampling of node applying independent samples from posterior each time.
+        """*Dev* - perform sampling of node applying independent samples from posterior each time.
+
         Note that it is done symbolically and this node needs :func:`set_size_and_deterministic` call.
         """
         node = self.to_flat_input(node)
@@ -1042,7 +1042,8 @@ def sample(post, *_):
         return nodes
 
     def symbolic_single_sample(self, node):
-        """*Dev* - performs sampling of node applying single sample from posterior.
+        """*Dev* - perform sampling of node applying single sample from posterior.
+
         Note that it is done symbolically and this node needs
         :func:`set_size_and_deterministic` call with `size=1`.
         """
@@ -1051,8 +1052,7 @@ def symbolic_single_sample(self, node):
         return graph_replace(node, {self.input: random[0]}, strict=False)
 
     def make_size_and_deterministic_replacements(self, s, d, more_replacements=None):
-        """*Dev* - creates correct replacements for initial depending on
-        sample size and deterministic flag.
+        """*Dev* - create correct replacements for initial depending on sample size and deterministic flag.
 
         Parameters
         ----------
@@ -1098,10 +1098,7 @@ def symbolic_normalizing_constant(self):
 
     @node_property
     def symbolic_logq_not_scaled(self):
-        """*Dev* - symbolically computed logq for `self.symbolic_random`
-        computations can be more efficient since all is known beforehand including
-        `self.symbolic_random`.
-        """
+        """*Dev* - symbolically computed logq for `self.symbolic_random` computations can be more efficient since all is known beforehand including `self.symbolic_random`."""
         raise NotImplementedError  # shape (s,)
 
     @node_property
@@ -1143,9 +1140,7 @@ def mean(self) -> pt.TensorVariable:
         raise NotImplementedError()
 
     def var_to_data(self, shared: pt.TensorVariable) -> xarray.Dataset:
-        """Take a flat 1-dimensional tensor variable and maps it to an xarray data set based on the information in
-        `self.ordering`.
-        """
+        """Take a flat 1-dimensional tensor variable and maps it to an xarray data set based on the information in `self.ordering`."""
         # This is somewhat similar to `DictToArrayBijection.rmap`, which doesn't work here since we don't have
         # `RaveledVars` and need to take the information from `self.ordering` instead
         shared_nda = shared.eval()
@@ -1252,6 +1247,7 @@ def scale_cost_to_minibatch(self, value):
     @node_property
     def symbolic_normalizing_constant(self):
         """*Dev* - normalizing constant for `self.logq`, scales it to `minibatch_size` instead of `total_size`.
+
         Here the effect is controlled by `self.scale_cost_to_minibatch`.
         """
         t = pt.max(
@@ -1326,23 +1322,17 @@ def _single_symbolic_varlogp_and_datalogp(self):
 
     @node_property
     def single_symbolic_varlogp(self):
-        """*Dev* - for single MC sample estimate of :math:`E_{q}(prior term)` `pytensor.scan`
-        is not needed and code can be optimized.
-        """
+        """*Dev* - for single MC sample estimate of :math:`E_{q}(prior term)` `pytensor.scan` is not needed and code can be optimized."""
         return self._single_symbolic_varlogp_and_datalogp[0]
 
     @node_property
     def single_symbolic_datalogp(self):
-        """*Dev* - for single MC sample estimate of :math:`E_{q}(data term)` `pytensor.scan`
-        is not needed and code can be optimized.
-        """
+        """*Dev* - for single MC sample estimate of :math:`E_{q}(data term)` `pytensor.scan` is not needed and code can be optimized."""
         return self._single_symbolic_varlogp_and_datalogp[1]
 
     @node_property
     def single_symbolic_logp(self):
-        """*Dev* - for single MC sample estimate of :math:`E_{q}(logP)` `pytensor.scan`
-        is not needed and code can be optimized.
-        """
+        """*Dev* - for single MC sample estimate of :math:`E_{q}(logP)` `pytensor.scan` is not needed and code can be optimized."""
         return self.single_symbolic_datalogp + self.single_symbolic_varlogp
 
     @node_property
@@ -1368,8 +1358,7 @@ def replacements(self):
         )
 
     def make_size_and_deterministic_replacements(self, s, d, more_replacements=None):
-        """*Dev* - creates correct replacements for initial depending on
-        sample size and deterministic flag.
+        """*Dev* - create correct replacements for initial depending on sample size and deterministic flag.
 
         Parameters
         ----------
@@ -1394,8 +1383,7 @@ def make_size_and_deterministic_replacements(self, s, d, more_replacements=None)
 
     @pytensor.config.change_flags(compute_test_value="off")
     def set_size_and_deterministic(self, node, s, d, more_replacements=None):
-        """*Dev* - after node is sampled via :func:`symbolic_sample_over_posterior` or
-        :func:`symbolic_single_sample` new random generator can be allocated and applied to node.
+        """*Dev* - after node is sampled via :func:`symbolic_sample_over_posterior` or :func:`symbolic_single_sample` new random generator can be allocated and applied to node.
 
         Parameters
         ----------
@@ -1428,7 +1416,8 @@ def to_flat_input(self, node, more_replacements=None):
         return graph_replace(node, self.replacements, strict=False)
 
     def symbolic_sample_over_posterior(self, node, more_replacements=None):
-        """*Dev* - performs sampling of node applying independent samples from posterior each time.
+        """*Dev* - perform sampling of node applying independent samples from posterior each time.
+
         Note that it is done symbolically and this node needs :func:`set_size_and_deterministic` call.
         """
         node = self.to_flat_input(node)
@@ -1443,7 +1432,8 @@ def sample(*post):
         return nodes
 
     def symbolic_single_sample(self, node, more_replacements=None):
-        """*Dev* - performs sampling of node applying single sample from posterior.
+        """*Dev* - perform sampling of node applying single sample from posterior.
+
         Note that it is done symbolically and this node needs
         :func:`set_size_and_deterministic` call with `size=1`.
         """
@@ -1453,8 +1443,10 @@ def symbolic_single_sample(self, node, more_replacements=None):
         return graph_replace(node, dict(zip(inp, post)), strict=False)
 
     def get_optimization_replacements(self, s, d):
-        """*Dev* - optimizations for logP. If sample size is static and equal to 1:
-        then `pytensor.scan` MC estimate is replaced with single sample without call to `pytensor.scan`.
+        """*Dev* - optimizations for logP.
+
+        If sample size is static and equal to 1, then `pytensor.scan` MC
+        estimate is replaced with single sample without call to `pytensor.scan`.
         """
         repl = collections.OrderedDict()
         # avoid scan if size is constant and equal to one
@@ -1500,6 +1492,7 @@ def sample_node(self, node, size=None, deterministic=False, more_replacements=No
 
     def rslice(self, name):
         """*Dev* - vectorized sampling for named random variable without call to `pytensor.scan`.
+
         This node still needs :func:`set_size_and_deterministic` to be evaluated.
         """
 
diff --git a/pymc/variational/updates.py b/pymc/variational/updates.py
index 256db4abca5..656dbd0429d 100644
--- a/pymc/variational/updates.py
+++ b/pymc/variational/updates.py
@@ -541,6 +541,7 @@ def adagrad(loss_or_grads=None, params=None, learning_rate=1.0, epsilon=1e-6):
 
 def adagrad_window(loss_or_grads=None, params=None, learning_rate=0.001, epsilon=0.1, n_win=10):
     """Return a function that returns parameter updates.
+
     Instead of accumulated estimate, uses running window.
 
     Parameters
diff --git a/scripts/generate_pip_deps_from_conda.py b/scripts/generate_pip_deps_from_conda.py
index 7d72737b2d9..698d54a1d2d 100755
--- a/scripts/generate_pip_deps_from_conda.py
+++ b/scripts/generate_pip_deps_from_conda.py
@@ -95,8 +95,7 @@ def conda_package_to_pip(package):
 
 def main(conda_fname, pip_fname):
     """
-    Generate the pip dependencies file from the conda file, or compare that
-    they are synchronized (``compare=True``).
+    Generate the pip dependencies file from the conda file.
 
     Parameters
     ----------
@@ -104,10 +103,6 @@ def main(conda_fname, pip_fname):
         Path to the conda file with dependencies (e.g. `environment.yml`).
     pip_fname : str
         Path to the pip file with dependencies (e.g. `requirements-dev.txt`).
-    compare : bool, default False
-        Whether to generate the pip file (``False``) or to compare if the
-        pip file has been generated with this script and the last version
-        of the conda file (``True``).
 
     Returns
     -------
diff --git a/scripts/run_mypy.py b/scripts/run_mypy.py
index 7620b025002..842fb0a1323 100755
--- a/scripts/run_mypy.py
+++ b/scripts/run_mypy.py
@@ -1,7 +1,8 @@
 #!/usr/bin/env python
 """
-Invokes mypy and compare the reults with files in /pymc except tests
-and a list of files that are known to fail.
+Invoke mypy and compare the reults with files in /pymc.
+
+Excludes tests and a list of files that are known to fail.
 
 Exit code 0 indicates that there are no unexpected results.
 

From 4a00b49a64829fdaaaec50f4d9449efe1f4f1883 Mon Sep 17 00:00:00 2001
From: Virgile Andreani <virgile@pymc-devs.org>
Date: Tue, 8 Oct 2024 09:22:07 +0200
Subject: [PATCH 8/9] Use ruff to check for numpy-style docstrings

---
 pyproject.toml | 29 +++++++----------------------
 1 file changed, 7 insertions(+), 22 deletions(-)

diff --git a/pyproject.toml b/pyproject.toml
index 770fd0e0405..a5efd4b7b52 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -44,30 +44,15 @@ ignore = [
   "RUF001", # String contains ambiguous character (such as Greek letters)
   "RUF002", # Docstring contains ambiguous character (such as Greek letters)
   "RUF012", # Mutable class attributes should be annotated with `typing.ClassVar`
-  "D100",
-  "D101",
-  "D102",
-  "D103",
-  "D104",
-  "D105",
-  "D107",
-  "D200",
-  "D202",
-  "D203",
-  "D204",
-  "D205",
-  "D209",
-  "D212",
-  "D213",
-  "D301",
-  "D400",
-  "D401",
-  "D403",
-  "D413",
-  "D415",
-  "D417",
+  "D100",  # Missing docstring in public module
+  "D101",  # Missing docstring in public class
+  "D102",  # Missing docstring in public method
+  "D103",  # Missing docstring in public function
 ]
 
+[tool.ruff.lint.pydocstyle]
+convention = "numpy"
+
 [tool.ruff.lint.isort]
 lines-between-types = 1
 

From 3c5364bbdf10cf2558883da9715bc7b3bbef8949 Mon Sep 17 00:00:00 2001
From: Virgile Andreani <virgile@pymc-devs.org>
Date: Tue, 8 Oct 2024 19:01:45 +0200
Subject: [PATCH 9/9] Replace 'an pytensor' -> 'a pytensor'

---
 docs/source/contributing/developer_guide.md            | 10 +++++-----
 docs/source/guides/Gaussian_Processes.rst              |  2 +-
 .../source/learn/core_notebooks/Gaussian_Processes.rst |  2 +-
 docs/source/learn/core_notebooks/pymc_pytensor.ipynb   |  2 +-
 pymc/distributions/custom.py                           | 10 +++++-----
 pymc/distributions/dist_math.py                        |  2 +-
 pymc/distributions/truncated.py                        |  2 +-
 pymc/logprob/rewriting.py                              |  2 +-
 pymc/model/core.py                                     |  8 ++++----
 pymc/pytensorf.py                                      |  2 +-
 pymc/sampling/jax.py                                   |  2 +-
 tests/test_pytensorf.py                                |  2 +-
 12 files changed, 23 insertions(+), 23 deletions(-)

diff --git a/docs/source/contributing/developer_guide.md b/docs/source/contributing/developer_guide.md
index 257795b1403..7820b1f4388 100644
--- a/docs/source/contributing/developer_guide.md
+++ b/docs/source/contributing/developer_guide.md
@@ -34,7 +34,7 @@ $$
 z \sim \text{Normal}(0, 5)
 $$
 
-A call to a {class}`~pymc.Distribution` constructor as shown above returns an PyTensor {class}`~pytensor.tensor.TensorVariable`, which is a symbolic representation of the model variable and the graph of inputs it depends on.
+A call to a {class}`~pymc.Distribution` constructor as shown above returns a PyTensor {class}`~pytensor.tensor.TensorVariable`, which is a symbolic representation of the model variable and the graph of inputs it depends on.
 Under the hood, the variables are created through the {meth}`~pymc.Distribution.dist` API, which calls the {class}`~pytensor.tensor.random.basic.RandomVariable` {class}`~pytensor.graph.op.Op` corresponding to the distribution.
 
 At a high level of abstraction, the idea behind ``RandomVariable`` ``Op``s is to create symbolic variables (``TensorVariable``s) that can be associated with the properties of a probability distribution.
@@ -134,7 +134,7 @@ model_logp                                       # ==> -6.6973152
 
 ## Behind the scenes of the ``logp`` function
 
-The ``logp`` function is straightforward - it is an PyTensor function within each distribution.
+The ``logp`` function is straightforward - it is a PyTensor function within each distribution.
 It has the following signature:
 
 :::{warning}
@@ -277,7 +277,7 @@ as for ``FreeRV`` and ``ObservedRV``, they are ``TensorVariable``\s with
 
 ``Factor`` basically `enable and assign the
 logp <https://github.com/pymc-devs/pymc/blob/6d07591962a6c135640a3c31903eba66b34e71d8/pymc/model.py#L195-L276>`__
-(represented as a tensor also) property to an PyTensor tensor (thus
+(represented as a tensor also) property to a PyTensor tensor (thus
 making it a random variable). For a ``TransformedRV``, it transforms the
 distribution into a ``TransformedDistribution``, and then ``model.Var`` is
 called again to added the RV associated with the
@@ -373,7 +373,7 @@ def logpt(self):
         return logp
 ```
 
-which returns an PyTensor tensor that its value depends on the free parameters in the model (i.e., its parent nodes from the PyTensor graph).
+which returns a PyTensor tensor that its value depends on the free parameters in the model (i.e., its parent nodes from the PyTensor graph).
 You can evaluate or compile into a python callable (that you can pass numpy as input args).
 Note that the logp tensor depends on its input in the PyTensor graph, thus you cannot pass new tensor to generate a logp function.
 For similar reason, in PyMC we do graph copying a lot using pytensor.clone_replace to replace the inputs to a tensor.
@@ -561,7 +561,7 @@ Moreover, transition kernels in TFP do not flatten the tensors, see eg docstring
 We love NUTS, or to be more precise Dynamic HMC with complex stopping rules.
 This part is actually all done outside of PyTensor, for NUTS, it includes:
 The leapfrog, dual averaging, tuning of mass matrix and step size, the tree building, sampler related statistics like divergence and energy checking.
-We actually have an PyTensor version of HMC, but it has never been used, and has been removed from the main repository.
+We actually have a PyTensor version of HMC, but it has never been used, and has been removed from the main repository.
 It can still be found in the [git history](https://github.com/pymc-devs/pymc/pull/3734/commits/0fdae8207fd14f66635f3673ef267b2b8817aa68), though.
 
 #### Variational Inference (VI)
diff --git a/docs/source/guides/Gaussian_Processes.rst b/docs/source/guides/Gaussian_Processes.rst
index 3d1fbc80b30..19f47c1f122 100644
--- a/docs/source/guides/Gaussian_Processes.rst
+++ b/docs/source/guides/Gaussian_Processes.rst
@@ -158,7 +158,7 @@ other type of random variable.  The first argument is the name of the random
 variable representing the function we are placing the prior over.
 The second argument is the inputs to the function that the prior is over,
 :code:`X`.  The inputs are usually known and present in the data, but they can
-also be PyMC random variables.  If the inputs are an PyTensor tensor or a
+also be PyMC random variables.  If the inputs are a PyTensor tensor or a
 PyMC random variable, the :code:`shape` needs to be given.
 
 Usually at this point, inference is performed on the model.  The
diff --git a/docs/source/learn/core_notebooks/Gaussian_Processes.rst b/docs/source/learn/core_notebooks/Gaussian_Processes.rst
index f076a6f6515..41cb5903c6b 100644
--- a/docs/source/learn/core_notebooks/Gaussian_Processes.rst
+++ b/docs/source/learn/core_notebooks/Gaussian_Processes.rst
@@ -155,7 +155,7 @@ other type of random variable.  The first argument is the name of the random
 variable representing the function we are placing the prior over.
 The second argument is the inputs to the function that the prior is over,
 :code:`X`.  The inputs are usually known and present in the data, but they can
-also be PyMC random variables.  If the inputs are an PyTensor tensor or a
+also be PyMC random variables.  If the inputs are a PyTensor tensor or a
 PyMC random variable, the :code:`shape` needs to be given.
 
 Usually at this point, inference is performed on the model.  The
diff --git a/docs/source/learn/core_notebooks/pymc_pytensor.ipynb b/docs/source/learn/core_notebooks/pymc_pytensor.ipynb
index a5524fe9dfe..aad72316a35 100644
--- a/docs/source/learn/core_notebooks/pymc_pytensor.ipynb
+++ b/docs/source/learn/core_notebooks/pymc_pytensor.ipynb
@@ -415,7 +415,7 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "### What is in an PyTensor graph?\n",
+    "### What is in a PyTensor graph?\n",
     "\n",
     "The following diagram shows the basic structure of an `pytensor` graph.\n",
     "\n",
diff --git a/pymc/distributions/custom.py b/pymc/distributions/custom.py
index 1bec80f4880..3238680bb3f 100644
--- a/pymc/distributions/custom.py
+++ b/pymc/distributions/custom.py
@@ -510,9 +510,9 @@ class CustomDist:
         A callable that calculates the log probability of some given ``value``
         conditioned on certain distribution parameter values. It must have the
         following signature: ``logp(value, *dist_params)``, where ``value`` is
-        an PyTensor tensor that represents the distribution value, and ``dist_params``
+        a PyTensor tensor that represents the distribution value, and ``dist_params``
         are the tensors that hold the values of the distribution parameters.
-        This function must return an PyTensor tensor.
+        This function must return a PyTensor tensor.
 
         When the `dist` function is specified, PyMC will try to automatically
         infer the `logp` when this is not provided.
@@ -523,9 +523,9 @@ class CustomDist:
         A callable that calculates the log cumulative log probability of some given
         ``value`` conditioned on certain distribution parameter values. It must have the
         following signature: ``logcdf(value, *dist_params)``, where ``value`` is
-        an PyTensor tensor that represents the distribution value, and ``dist_params``
+        a PyTensor tensor that represents the distribution value, and ``dist_params``
         are the tensors that hold the values of the distribution parameters.
-        This function must return an PyTensor tensor. If ``None``, a ``NotImplementedError``
+        This function must return a PyTensor tensor. If ``None``, a ``NotImplementedError``
         will be raised when trying to compute the distribution's logcdf.
     support_point : Optional[Callable]
         A callable that can be used to compute the finete logp point of the distribution.
@@ -550,7 +550,7 @@ class CustomDist:
         When specified, `ndim_supp` and `ndims_params` are not needed. See examples below.
     dtype : str
         The dtype of the distribution. All draws and observations passed into the
-        distribution will be cast onto this dtype. This is not needed if an PyTensor
+        distribution will be cast onto this dtype. This is not needed if a PyTensor
         dist function is provided, which should already return the right dtype!
     class_name : str
         Name for the class which will wrap the CustomDist methods. When not specified,
diff --git a/pymc/distributions/dist_math.py b/pymc/distributions/dist_math.py
index 49a900ac78d..1cdb3b29458 100644
--- a/pymc/distributions/dist_math.py
+++ b/pymc/distributions/dist_math.py
@@ -236,7 +236,7 @@ def log_normal(x, mean, **kwargs):
 
 
 class SplineWrapper(Op):
-    """Creates an PyTensor operation from scipy.interpolate.UnivariateSpline."""
+    """Creates a PyTensor operation from scipy.interpolate.UnivariateSpline."""
 
     __props__ = ("spline",)
 
diff --git a/pymc/distributions/truncated.py b/pymc/distributions/truncated.py
index 2a74cfa2b8c..6f32918bbc5 100644
--- a/pymc/distributions/truncated.py
+++ b/pymc/distributions/truncated.py
@@ -51,7 +51,7 @@
 
 
 class TruncatedRV(SymbolicRandomVariable):
-    """An `Op` constructed from an PyTensor graph that represents a truncated univariate random variable."""
+    """An `Op` constructed from a PyTensor graph that represents a truncated univariate random variable."""
 
     default_output: int = 0
     base_rv_op: Op
diff --git a/pymc/logprob/rewriting.py b/pymc/logprob/rewriting.py
index f8036d08981..cd390e13a9d 100644
--- a/pymc/logprob/rewriting.py
+++ b/pymc/logprob/rewriting.py
@@ -199,7 +199,7 @@ def construct_ir_fgraph(
     A custom IR rewriter can be specified. By default,
     `logprob_rewrites_db.query(RewriteDatabaseQuery(include=["basic"]))` is used.
 
-    Our measurable IR takes the form of an PyTensor graph that is more-or-less
+    Our measurable IR takes the form of a PyTensor graph that is more-or-less
     equivalent to a given PyTensor graph (i.e. the keys of `rv_values`) but
     contains `Op`s that are subclasses of the `MeasurableOp` type in
     place of ones that do not inherit from `MeasurableOp` in the original
diff --git a/pymc/model/core.py b/pymc/model/core.py
index 588fb3f5cf2..48d2117eb26 100644
--- a/pymc/model/core.py
+++ b/pymc/model/core.py
@@ -216,7 +216,7 @@ def modelcontext(model: Optional["Model"]) -> "Model":
 
 
 class ValueGradFunction:
-    """Create an PyTensor function that computes a value and its gradient.
+    """Create a PyTensor function that computes a value and its gradient.
 
     Parameters
     ----------
@@ -593,7 +593,7 @@ def isroot(self):
         return self.parent is None
 
     def logp_dlogp_function(self, grad_vars=None, tempered=False, **kwargs):
-        """Compile an PyTensor function that computes logp and gradient.
+        """Compile a PyTensor function that computes logp and gradient.
 
         Parameters
         ----------
@@ -1660,7 +1660,7 @@ def compile_fn(
         point_fn: bool = True,
         **kwargs,
     ) -> PointFunc | Function:
-        """Compiles an PyTensor function.
+        """Compiles a PyTensor function.
 
         Parameters
         ----------
@@ -2177,7 +2177,7 @@ def compile_fn(
     model: Model | None = None,
     **kwargs,
 ) -> PointFunc | Function:
-    """Compiles an PyTensor function.
+    """Compiles a PyTensor function.
 
     Parameters
     ----------
diff --git a/pymc/pytensorf.py b/pymc/pytensorf.py
index 6df6eaea1cf..213831c9f11 100644
--- a/pymc/pytensorf.py
+++ b/pymc/pytensorf.py
@@ -277,7 +277,7 @@ def cont_inputs(a):
 
 
 def floatX(X):
-    """Convert an PyTensor tensor or numpy array to pytensor.config.floatX type."""
+    """Convert a PyTensor tensor or numpy array to pytensor.config.floatX type."""
     try:
         return X.astype(pytensor.config.floatX)
     except AttributeError:
diff --git a/pymc/sampling/jax.py b/pymc/sampling/jax.py
index d38a77d941d..43e1baa87fa 100644
--- a/pymc/sampling/jax.py
+++ b/pymc/sampling/jax.py
@@ -122,7 +122,7 @@ def get_jaxified_graph(
     inputs: list[TensorVariable] | None = None,
     outputs: list[TensorVariable] | None = None,
 ) -> list[TensorVariable]:
-    """Compile an PyTensor graph into an optimized JAX function."""
+    """Compile a PyTensor graph into an optimized JAX function."""
     graph = _replace_shared_variables(outputs) if outputs is not None else None
 
     fgraph = FunctionGraph(inputs=inputs, outputs=graph, clone=True)
diff --git a/tests/test_pytensorf.py b/tests/test_pytensorf.py
index f0ae4355976..b3564cac1f4 100644
--- a/tests/test_pytensorf.py
+++ b/tests/test_pytensorf.py
@@ -277,7 +277,7 @@ def test_convert_generator_data(input_dtype):
         result = convert_generator_data(square_generator)
     apply = result.owner
     op = apply.op
-    # Make sure the returned object is an PyTensor TensorVariable
+    # Make sure the returned object is a PyTensor TensorVariable
     assert isinstance(result, TensorVariable)
     assert isinstance(op, GeneratorOp), f"It's a {type(apply)}"
     # There are no inputs - because it generates...