pymc-devs · Armavica · Oct 8, 2024 · Oct 7, 2024 · Oct 7, 2024 · Oct 7, 2024
diff --git a/benchmarks/benchmarks/__init__.py b/benchmarks/benchmarks/__init__.py
@@ -11,3 +11,5 @@
 #   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 #   See the License for the specific language governing permissions and
 #   limitations under the License.
+
+"""Benchmarks for PyMC."""
diff --git a/benchmarks/benchmarks/benchmarks.py b/benchmarks/benchmarks/benchmarks.py
@@ -24,7 +24,7 @@
 
 
 def glm_hierarchical_model(random_seed=123):
-    """Sample glm hierarchical model to use in benchmarks"""
+    """Sample glm hierarchical model to use in benchmarks."""
     np.random.seed(random_seed)
     data = pd.read_csv(pm.get_data("radon.csv"))
     data["log_radon"] = data["log_radon"].astype(pytensor.config.floatX)
@@ -47,7 +47,7 @@ def glm_hierarchical_model(random_seed=123):
 
 
 def mixture_model(random_seed=1234):
-    """Sample mixture model to use in benchmarks"""
+    """Sample mixture model to use in benchmarks."""
     np.random.seed(1234)
     size = 1000
     w_true = np.array([0.35, 0.4, 0.25])
@@ -77,10 +77,7 @@ def mixture_model(random_seed=1234):
 
 
 class OverheadSuite:
-    """
-    Just tests how long sampling from a normal distribution takes for various
-    samplers
-    """
+    """Test how long sampling from a normal distribution takes for various samplers."""
 
     params = [pm.NUTS, pm.HamiltonianMC, pm.Metropolis, pm.Slice]
     timer = timeit.default_timer
@@ -161,7 +158,7 @@ def time_glm_hierarchical(self):
 
 
 class NUTSInitSuite:
-    """Tests initializations for NUTS sampler on models"""
+    """Tests initializations for NUTS sampler on models."""
 
     timeout = 360.0
     params = ("adapt_diag", "jitter+adapt_diag", "jitter+adapt_full", "adapt_full")

diff --git a/docs/source/contributing/developer_guide.md b/docs/source/contributing/developer_guide.md
@@ -34,7 +34,7 @@ $$
 z \sim \text{Normal}(0, 5)
 $$
 
-A call to a {class}`~pymc.Distribution` constructor as shown above returns an PyTensor {class}`~pytensor.tensor.TensorVariable`, which is a symbolic representation of the model variable and the graph of inputs it depends on.
+A call to a {class}`~pymc.Distribution` constructor as shown above returns a PyTensor {class}`~pytensor.tensor.TensorVariable`, which is a symbolic representation of the model variable and the graph of inputs it depends on.
 Under the hood, the variables are created through the {meth}`~pymc.Distribution.dist` API, which calls the {class}`~pytensor.tensor.random.basic.RandomVariable` {class}`~pytensor.graph.op.Op` corresponding to the distribution.
 
 At a high level of abstraction, the idea behind ``RandomVariable`` ``Op``s is to create symbolic variables (``TensorVariable``s) that can be associated with the properties of a probability distribution.
@@ -134,7 +134,7 @@ model_logp                                       # ==> -6.6973152
 
 ## Behind the scenes of the ``logp`` function
 
-The ``logp`` function is straightforward - it is an PyTensor function within each distribution.
+The ``logp`` function is straightforward - it is a PyTensor function within each distribution.
 It has the following signature:
 
 :::{warning}
@@ -277,7 +277,7 @@ as for ``FreeRV`` and ``ObservedRV``, they are ``TensorVariable``\s with
 
 ``Factor`` basically `enable and assign the
 logp <https://github.com/pymc-devs/pymc/blob/6d07591962a6c135640a3c31903eba66b34e71d8/pymc/model.py#L195-L276>`__
-(represented as a tensor also) property to an PyTensor tensor (thus
+(represented as a tensor also) property to a PyTensor tensor (thus
 making it a random variable). For a ``TransformedRV``, it transforms the
 distribution into a ``TransformedDistribution``, and then ``model.Var`` is
 called again to added the RV associated with the
@@ -373,7 +373,7 @@ def logpt(self):
         return logp
 ```
 
-which returns an PyTensor tensor that its value depends on the free parameters in the model (i.e., its parent nodes from the PyTensor graph).
+which returns a PyTensor tensor that its value depends on the free parameters in the model (i.e., its parent nodes from the PyTensor graph).
 You can evaluate or compile into a python callable (that you can pass numpy as input args).
 Note that the logp tensor depends on its input in the PyTensor graph, thus you cannot pass new tensor to generate a logp function.
 For similar reason, in PyMC we do graph copying a lot using pytensor.clone_replace to replace the inputs to a tensor.
@@ -561,7 +561,7 @@ Moreover, transition kernels in TFP do not flatten the tensors, see eg docstring
 We love NUTS, or to be more precise Dynamic HMC with complex stopping rules.
 This part is actually all done outside of PyTensor, for NUTS, it includes:
 The leapfrog, dual averaging, tuning of mass matrix and step size, the tree building, sampler related statistics like divergence and energy checking.
-We actually have an PyTensor version of HMC, but it has never been used, and has been removed from the main repository.
+We actually have a PyTensor version of HMC, but it has never been used, and has been removed from the main repository.
 It can still be found in the [git history](https://github.com/pymc-devs/pymc/pull/3734/commits/0fdae8207fd14f66635f3673ef267b2b8817aa68), though.
 
 #### Variational Inference (VI)

diff --git a/docs/source/guides/Gaussian_Processes.rst b/docs/source/guides/Gaussian_Processes.rst
@@ -158,7 +158,7 @@ other type of random variable.  The first argument is the name of the random
 variable representing the function we are placing the prior over.
 The second argument is the inputs to the function that the prior is over,
 :code:`X`.  The inputs are usually known and present in the data, but they can
-also be PyMC random variables.  If the inputs are an PyTensor tensor or a
+also be PyMC random variables.  If the inputs are a PyTensor tensor or a
 PyMC random variable, the :code:`shape` needs to be given.
 
 Usually at this point, inference is performed on the model.  The

diff --git a/docs/source/learn/core_notebooks/Gaussian_Processes.rst b/docs/source/learn/core_notebooks/Gaussian_Processes.rst
@@ -155,7 +155,7 @@ other type of random variable.  The first argument is the name of the random
 variable representing the function we are placing the prior over.
 The second argument is the inputs to the function that the prior is over,
 :code:`X`.  The inputs are usually known and present in the data, but they can
-also be PyMC random variables.  If the inputs are an PyTensor tensor or a
+also be PyMC random variables.  If the inputs are a PyTensor tensor or a
 PyMC random variable, the :code:`shape` needs to be given.
 
 Usually at this point, inference is performed on the model.  The

diff --git a/docs/source/learn/core_notebooks/pymc_pytensor.ipynb b/docs/source/learn/core_notebooks/pymc_pytensor.ipynb
@@ -415,7 +415,7 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "### What is in an PyTensor graph?\n",
+    "### What is in a PyTensor graph?\n",
     "\n",
     "The following diagram shows the basic structure of an `pytensor` graph.\n",
     "\n",

diff --git a/pymc/__init__.py b/pymc/__init__.py
@@ -13,6 +13,8 @@
 #   limitations under the License.
 
 
+"""PyMC: Bayesian Modeling and Probabilistic Programming in Python."""
+
 import logging
 
 _log = logging.getLogger(__name__)

diff --git a/pymc/backends/__init__.py b/pymc/backends/__init__.py
@@ -12,7 +12,7 @@
 #   See the License for the specific language governing permissions and
 #   limitations under the License.
 
-"""Storage backends for traces
+"""Storage backends for traces.
 
 The NDArray (pymc.backends.NDArray) backend holds the entire trace in memory.
 
@@ -101,7 +101,7 @@ def _init_trace(
     model: Model,
     trace_vars: list[TensorVariable] | None = None,
 ) -> BaseTrace:
-    """Initializes a trace backend for a chain."""
+    """Initialize a trace backend for a chain."""
     strace: BaseTrace
     if trace is None:
         strace = NDArray(model=model, vars=trace_vars)
@@ -126,7 +126,7 @@ def init_traces(
     model: Model,
     trace_vars: list[TensorVariable] | None = None,
 ) -> tuple[RunType | None, Sequence[IBaseTrace]]:
-    """Initializes a trace recorder for each chain."""
+    """Initialize a trace recorder for each chain."""
     if HAS_MCB and isinstance(backend, Backend):
         return init_chain_adapters(
             backend=backend,

diff --git a/pymc/backends/base.py b/pymc/backends/base.py
@@ -12,7 +12,7 @@
 #   See the License for the specific language governing permissions and
 #   limitations under the License.
 
-"""Base backend for traces
+"""Base backend for traces.
 
 See the docstring for pymc.backends for more information
 """
@@ -55,6 +55,7 @@ class IBaseTrace(ABC, Sized):
     """Sampler stats for each sampler."""
 
     def __len__(self):
+        """Length of the chain."""
         raise NotImplementedError()
 
     def get_values(self, varname: str, burn=0, thin=1) -> np.ndarray:
@@ -101,8 +102,12 @@ def _slice(self, idx: slice) -> "IBaseTrace":
         raise NotImplementedError()
 
     def point(self, idx: int) -> dict[str, np.ndarray]:
-        """Return dictionary of point values at `idx` for current chain
-        with variables names as keys.
+        """Return point values at `idx` for current chain.
+
+        Returns
+        -------
+        values : dict[str, np.ndarray]
+            Dictionary of values with variable names as keys.
         """
         raise NotImplementedError()
 
@@ -127,7 +132,7 @@ def close(self):
 
 
 class BaseTrace(IBaseTrace):
-    """Base trace object
+    """Base trace object.
 
     Parameters
     ----------
@@ -208,6 +213,7 @@ def setup(self, draws, chain, sampler_vars=None) -> None:
     # Selection methods
 
     def __getitem__(self, idx):
+        """Get the sample at index `idx`."""
         if isinstance(idx, slice):
             return self._slice(idx)
 
@@ -339,6 +345,7 @@ def __init__(self, straces: Sequence[IBaseTrace]):
         self._report = SamplerReport()
 
     def __repr__(self):
+        """Return a string representation of MultiTrace."""
         template = "<{}: {} chains, {} iterations, {} variables>"
         return template.format(self.__class__.__name__, self.nchains, len(self), len(self.varnames))
 
@@ -355,9 +362,11 @@ def report(self) -> SamplerReport:
         return self._report
 
     def __iter__(self):
+        """Return an iterator of the MultiTrace."""
         raise NotImplementedError
 
     def __getitem__(self, idx):
+        """Get the sample at index `idx`."""
         if isinstance(idx, slice):
             return self._slice(idx)
 
@@ -393,6 +402,7 @@ def __getitem__(self, idx):
     _attrs = {"_straces", "varnames", "chains", "stat_names", "_report"}
 
     def __getattr__(self, name):
+        """Get the value of the attribute of name `name`."""
         # Avoid infinite recursion when called before __init__
         # variables are set up (e.g., when pickling).
         if name in self._attrs:
@@ -412,6 +422,7 @@ def __getattr__(self, name):
         raise AttributeError(f"'{type(self).__name__}' object has no attribute '{name}'")
 
     def __len__(self):
+        """Length of the chains."""
         chain = self.chains[-1]
         return len(self._straces[chain])
 
@@ -546,7 +557,7 @@ def point(self, idx: int, chain: int | None = None) -> dict[str, np.ndarray]:
         return self._straces[chain].point(idx)
 
     def points(self, chains=None):
-        """Return an iterator over all or some of the sample points
+        """Return an iterator over all or some of the sample points.
 
         Parameters
         ----------
@@ -561,8 +572,7 @@ def points(self, chains=None):
 
 
 def _squeeze_cat(results, combine: bool, squeeze: bool):
-    """Squeeze and concatenate the results depending on values of
-    `combine` and `squeeze`."""
+    """Squeeze and/or concatenate the results."""
     if combine:
         results = np.concatenate(results)
         if not squeeze:

diff --git a/pymc/backends/mcbackend.py b/pymc/backends/mcbackend.py
@@ -43,7 +43,7 @@
 
 
 def find_data(pmodel: Model) -> list[mcb.DataVariable]:
-    """Extracts data variables from a model."""
+    """Extract data variables from a model."""
     observed_rvs = {pmodel.rvs_to_values[rv] for rv in pmodel.observed_RVs}
     dvars = []
     # All data containers are named vars!
@@ -124,13 +124,14 @@ def record(self, draw: Mapping[str, np.ndarray], stats: Sequence[Mapping[str, An
         return self._chain.append(value_dict, stats_dict)
 
     def __len__(self):
+        """Length of the chain."""
         return len(self._chain)
 
     def get_values(self, varname: str, burn=0, thin=1) -> np.ndarray:
         return self._chain.get_draws(varname, slice(burn, None, thin))
 
     def _get_stats(self, fname: str, slc: slice) -> np.ndarray:
-        """Wraps `self._chain.get_stats` but unpickles automatically."""
+        """Wrap `self._chain.get_stats` but unpickle automatically."""
         values = self._chain.get_stats(fname, slc)
         # Unpickle object stats
         if fname in self._statsbj.object_stats:

diff --git a/pymc/backends/ndarray.py b/pymc/backends/ndarray.py
@@ -12,7 +12,7 @@
 #   See the License for the specific language governing permissions and
 #   limitations under the License.
 
-"""NumPy array trace backend
+"""NumPy array trace backend.
 
 Store sampling values in memory as a NumPy array.
 """
@@ -27,7 +27,7 @@
 
 
 class NDArray(base.BaseTrace):
-    """NDArray trace object
+    """NDArray trace object.
 
     Parameters
     ----------
@@ -138,6 +138,7 @@ def close(self):
     # Selection methods
 
     def __len__(self):
+        """Length of the chain."""
         if not self.samples:  # `setup` has not been called.
             return 0
         return self.draw_idx
@@ -183,8 +184,12 @@ def _slice(self, idx: slice):
         return sliced
 
     def point(self, idx) -> dict[str, Any]:
-        """Return dictionary of point values at `idx` for current chain
-        with variable names as keys.
+        """Return point values at `idx` for current chain.
+
+        Returns
+        -------
+        values : dict[str, Any]
+            Dictionary of values with variable names as keys.
         """
         idx = int(idx)
         return {varname: values[idx] for varname, values in self.samples.items()}
@@ -212,7 +217,7 @@ def _slice_as_ndarray(strace, idx):
 def point_list_to_multitrace(
     point_list: list[dict[str, np.ndarray]], model: Model | None = None
 ) -> MultiTrace:
-    """transform point list into MultiTrace"""
+    """Transform point list into MultiTrace."""
     _model = modelcontext(model)
     varnames = list(point_list[0].keys())
     with _model:

diff --git a/pymc/backends/report.py b/pymc/backends/report.py
@@ -43,7 +43,7 @@ def ok(self):
 
     @property
     def n_tune(self) -> int | None:
-        """Number of tune iterations - not necessarily kept in trace!"""
+        """Number of tune iterations - not necessarily kept in trace."""
         return self._n_tune
 
     @property

diff --git a/pymc/blocking.py b/pymc/blocking.py
@@ -12,11 +12,7 @@
 #   See the License for the specific language governing permissions and
 #   limitations under the License.
 
-"""
-pymc.blocking
-
-Classes for working with subsets of parameters.
-"""
+"""Classes for working with subsets of parameters."""
 
 from __future__ import annotations
 
@@ -51,9 +47,7 @@ class RaveledVars(NamedTuple):
 
 
 class Compose(Generic[T]):
-    """
-    Compose two functions in a pickleable way
-    """
+    """Compose two functions in a pickleable way."""
 
     def __init__(self, fa: Callable[[PointType], T], fb: Callable[[RaveledVars], PointType]):
         self.fa = fa