Add ruff and codespell to pre-commit hooks + perform associated fixes

equinor · Oct 3, 2023 · 627dc84 · 627dc84
1 parent afc6588
commit 627dc84
Show file tree

Hide file tree

Showing 18 changed files with 198 additions and 78 deletions.
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
@@ -22,3 +22,20 @@ repos:
                 ^docs
             )
         args: [--strict, --ignore-missing-imports, --no-warn-unused-ignores]
+
+-   repo: https://github.com/codespell-project/codespell
+    rev: v2.1.0
+    hooks:
+    -   id: codespell
+        files: ^.*\.(py|c|h|md|rst|yml)$
+        args: [
+        "docs tests",
+        "*.py *.rst *.md",
+        ]
+
+-   repo: https://github.com/astral-sh/ruff-pre-commit
+    # Ruff version.
+    rev: v0.0.289
+    hooks:
+    -   id: ruff
+        args: [--fix, --exit-non-zero-on-fix]
diff --git a/README.md b/README.md
@@ -1,6 +1,8 @@
 Iterative Ensemble Smoother
 ===========================
 
+[![Precommit: enabled](https://img.shields.io/badge/pre--commit-enabled-brightgreen?logo=pre-commit)](https://github.com/pre-commit/pre-commit)
+[![Ruff](https://img.shields.io/endpoint?url=https://raw.githubusercontent.com/astral-sh/ruff/main/assets/badge/v2.json)](https://github.com/astral-sh/ruff)
 [![Code style: black](https://img.shields.io/badge/code%20style-black-000000.svg)](https://github.com/psf/black)
 [![docs](https://readthedocs.org/projects/iterative_ensemble_smoother/badge/?version=latest&style=plastic)](https://iterative-ensemble-smoother.readthedocs.io/)
 

diff --git a/docs/source/Oscillator.py b/docs/source/Oscillator.py
@@ -12,13 +12,14 @@
 #     language: python
 #     name: python3
 # ---
-
+# ruff: noqa: E402
 # %% [markdown]
 # # Estimating parameters of an anharmonic oscillator
 #
 # The anharnomic oscillator can be modelled by a non-linear partial differential
 # equation as described in section 6.3.4 of the book [Fundamentals of Algorithms
-# and Data Assimilation](https://www.amazon.com/Data-Assimilation-Methods-Algorithms-Applications/dp/1611974534) by Mark Asch, Marc Bocquet and Maëlle Nodet.
+# and Data Assimilation](https://www.amazon.com/Data-Assimilation-Methods-Algorithms-Applications/dp/1611974534)
+# by Mark Asch, Marc Bocquet and Maëlle Nodet.
 #
 # -------------
 #
@@ -53,11 +54,11 @@
 #
 #
 
-# %%
-from matplotlib import pyplot as plt
+#%%
 import numpy as np
+from matplotlib import pyplot as plt
 from scipy import stats
-from scipy.special import erf
+
 import iterative_ensemble_smoother as ies
 
 rng = np.random.default_rng(12345)
@@ -168,7 +169,7 @@ def forward_model(A, response_x_axis):
 # The trick is to sample $x \sim \mathcal{N}(0, 1)$,
 # then define a function $f$ that maps from standard normal to the
 # exponential distribution.
-# This funciton can be constructed by first mapping from standard normal to
+# This function can be constructed by first mapping from standard normal to
 # the interval $[0, 1)$ using the CDF, then mapping to exponential using the
 # quantile function (inverse CDF) of the exponential distribution.
 #

diff --git a/docs/source/Polynomial.py b/docs/source/Polynomial.py
@@ -12,16 +12,21 @@
 #     language: python
 #     name: python3
 # ---
+# ruff: noqa: E402
+# ruff: noqa: E501
 
 # %% [markdown]
 # # Fitting a polynomial with Gaussian priors
 #
-# We fit a simple polynomial with Gaussian priors, which is an example of a Gauss-linear problem for which the results obtained using Subspace Iterative Ensemble Smoother (SIES) tend to those obtained using Ensemble Smoother (ES).
+# We fit a simple polynomial with Gaussian priors, which is an example of a Gauss-linear
+# problem for which the results obtained using Subspace Iterative Ensemble Smoother
+# (SIES) tend to those obtained using Ensemble Smoother (ES).
 # This notebook illustrated this property.
 # %%
+import itertools
+
 import numpy as np
 import pandas as pd
-import itertools
 
 np.set_printoptions(suppress=True)
 rng = np.random.default_rng(12345)
@@ -32,9 +37,8 @@
 
 plt.rcParams["figure.figsize"] = (6, 6)
 plt.rcParams.update({"font.size": 10})
-from ipywidgets import interact
-import ipywidgets as widgets
-
+from ipywidgets import interact  # noqa  # isort:skip
+import ipywidgets as widgets  # noqa  # isort:skip
 from p_tqdm import p_map
 
 import iterative_ensemble_smoother as ies
@@ -112,7 +116,7 @@ def poly(a, b, c, x):
     coeff_b,
     coeff_c,
     [np.arange(max(x_observations) + 1)] * ensemble_size,
-    desc=f"Running forward model.",
+    desc="Running forward model.",
 )
 
 # %% [markdown]
@@ -228,7 +232,7 @@ def plot_posterior(ax, posterior, method):
     )
 
 # Plot the posterior
-ax2.set_title(f"ES ert posterior")
+ax2.set_title("ES ert posterior")
 ax2.plot(x_plot, poly(a_t, b_t, c_t, x_plot), zorder=10, lw=4, color="black")
 for parameter_posterior in X_ES_ert.T:
     ax2.plot(

diff --git a/pyproject.toml b/pyproject.toml
@@ -75,3 +75,56 @@ write_to = "src/iterative_ensemble_smoother/_version.py"
 testpaths = [
     "tests"
 ]
+
+[tool.ruff]
+select = [
+    # Pyflakes
+    "F",
+    # Pycodestyle
+    "E",
+    "W",
+    # isort
+    "I"
+]
+src = ["src", "tests", "docs"]
+
+# Allow autofix for all enabled rules (when `--fix`) is provided.
+fixable = ["A", "B", "C", "D", "E", "F", "I"]
+unfixable = []
+
+# Exclude a variety of commonly ignored directories.
+exclude = [
+    ".bzr",
+    ".direnv",
+    ".eggs",
+    ".git",
+    ".hg",
+    ".mypy_cache",
+    ".nox",
+    ".pants.d",
+    ".pytype",
+    ".ruff_cache",
+    ".svn",
+    ".tox",
+    ".venv",
+    "__pypackages__",
+    "_build",
+    "buck-out",
+    "build",
+    "dist",
+    "node_modules",
+    "venv",
+]
+
+# Same as Black.
+line-length = 88
+
+# Allow unused variables when underscore-prefixed.
+dummy-variable-rgx = "^(_+|(_+[a-zA-Z0-9_]*[a-zA-Z0-9]+?))$"
+
+# Assume Python 3.10.
+target-version = "py310"
+
+[tool.ruff.mccabe]
+# Unlike Flake8, default to a complexity level of 10.
+max-complexity = 10
diff --git a/setup.cfg b/setup.cfg
@@ -1,3 +1,9 @@
-[flake8]
-ignore = E302, W503, E501, E741, E203, F405
-max-line-length = 88
+[codespell]
+skip = *.pyc,*.gif,*.png,*.jpg,*.ply, ./bibliography.bib,*.ipynb
+ignore-words-list = lod,byteorder,flem,parm,doubleclick,revered,inout,fro,nd,sies,hist,ans
+quiet-level = 3
+
+[pylint.LOGGING]
+# Format style used to check logging format string. `old` means using %
+# formatting, `new` is for `{}` formatting,and `fstr` is for f-strings.
+logging-format-style=fstr
diff --git a/src/iterative_ensemble_smoother/__init__.py b/src/iterative_ensemble_smoother/__init__.py
@@ -9,6 +9,7 @@
 except ImportError:
     __version__ = "unknown version"
     version_tuple = (0, 0, "unknown version", "unknown commit")
+
 from iterative_ensemble_smoother._iterative_ensemble_smoother import ES, SIES
 from iterative_ensemble_smoother.esmda import ESMDA
 

diff --git a/src/iterative_ensemble_smoother/_iterative_ensemble_smoother.py b/src/iterative_ensemble_smoother/_iterative_ensemble_smoother.py
@@ -1,32 +1,34 @@
 from __future__ import annotations
-from typing import Optional, TYPE_CHECKING, Callable
+
+from typing import TYPE_CHECKING, Callable, Optional
 
 import numpy as np
 
 if TYPE_CHECKING:
     import numpy.typing as npt
 
+from iterative_ensemble_smoother.ies import create_coefficient_matrix
 from iterative_ensemble_smoother.utils import (
     _validate_inputs,
     covariance_to_correlation,
-    steplength_exponential,
     response_projection,
+    steplength_exponential,
 )
 
-from iterative_ensemble_smoother.ies import create_coefficient_matrix
-
 
 class SIES:
     """
     Initialize a Subspace Iterative Ensemble Smoother (SIES) instance.
 
     This is an implementation of the algorithm described in the paper:
-    Efficient Implementation of an Iterative Ensemble Smoother for Data Assimilation and Reservoir History Matching
-    written by Evensen et al (2019), URL: https://www.frontiersin.org/articles/10.3389/fams.2019.00047/full
+    Efficient Implementation of an Iterative Ensemble Smoother for Data Assimilation
+    and Reservoir History Matching written by Evensen et al (2019),
+    URL: https://www.frontiersin.org/articles/10.3389/fams.2019.00047/full
 
     The default step length is described in equation (49) in the paper
     Formulating the history matching problem with consistent error statistics
-    written by Geir Evensen (2021), URL: https://link.springer.com/article/10.1007/s10596-021-10032-7
+    written by Geir Evensen (2021),
+    URL: https://link.springer.com/article/10.1007/s10596-021-10032-7
 
     Parameters
     ----------
@@ -110,7 +112,8 @@ def fit(
         Parameters
         ----------
         response_ensemble : npt.NDArray[np.double]
-            A 2D array of reponses from the model g(X) of shape (observations, ensemble_size).
+            A 2D array of responses from the model g(X) of shape
+            (observations, ensemble_size).
             This matrix is Y in Evensen (2019).
         observation_errors : npt.NDArray[np.double]
             Either a 1D array of standard deviations, or a 2D covariance matrix.
@@ -254,7 +257,8 @@ def fit(
 
         if np.isnan(W).sum() != 0:
             raise ValueError(
-                "Fit produces NaNs. Check your response matrix for outliers or use an inversion type with truncation."
+                "Fit produces NaNs. Check your response matrix for outliers "
+                "or use an inversion type with truncation."
             )
 
         self.iteration += 1

diff --git a/src/iterative_ensemble_smoother/esmda.py b/src/iterative_ensemble_smoother/esmda.py
@@ -21,7 +21,6 @@
 https://helper.ipam.ucla.edu/publications/oilws3/oilws3_14147.pdf
 
 """
-
 import numbers
 from typing import Optional, Union
 
@@ -106,16 +105,19 @@ def __init__(
             isinstance(seed, (int, np.random._generator.Generator)) or seed is None
         ):
             raise TypeError(
-                "Argument `seed` must be an integer or numpy.random._generator.Generator."
+                "Argument `seed` must be an integer "
+                "or numpy.random._generator.Generator."
             )
 
         if not isinstance(inversion, str):
             raise TypeError(
-                f"Argument `inversion` must be a string in {tuple(self._inversion_methods.keys())}"
+                "Argument `inversion` must be a string in "
+                f"{tuple(self._inversion_methods.keys())}"
             )
         if inversion not in self._inversion_methods.keys():
             raise ValueError(
-                f"Argument `inversion` must be a string in {tuple(self._inversion_methods.keys())}"
+                "Argument `inversion` must be a string in "
+                f"{tuple(self._inversion_methods.keys())}"
             )
 
         # Store data

diff --git a/src/iterative_ensemble_smoother/esmda_inversion.py b/src/iterative_ensemble_smoother/esmda_inversion.py
@@ -135,8 +135,10 @@ def singular_values_to_keep(
 #
 #  C_MD @ inv(C_DD + alpha * C_D) @ (D - Y)
 #
-# where C_MD = empirical_cross_covariance(X, Y) = center(X) @ center(Y).T / (X.shape[1] - 1)
-#       C_DD = empirical_cross_covariance(Y, Y) = center(Y) @ center(Y).T / (Y.shape[1] - 1)
+# where C_MD = empirical_cross_covariance(X, Y) =
+# center(X) @ center(Y).T / (X.shape[1] - 1)
+#       C_DD = empirical_cross_covariance(Y, Y) =
+# center(Y) @ center(Y).T / (Y.shape[1] - 1)
 #
 # The methods can be classified as
 #   - exact : with truncation=1.0, these methods compute the exact solution
@@ -312,7 +314,9 @@ def inversion_exact_rescaled(
     X_shift = (X - np.mean(X, axis=1, keepdims=True)) / (N_e - 1)
     Y_shift = Y - np.mean(Y, axis=1, keepdims=True)
 
-    return np.linalg.multi_dot([X_shift, Y_shift.T, term / s_r, term.T, (D - Y)])  # type: ignore
+    return np.linalg.multi_dot(  # type: ignore
+        [X_shift, Y_shift.T, term / s_r, term.T, (D - Y)]
+    )
 
 
 def inversion_exact_subspace_woodbury(
@@ -339,7 +343,8 @@ def inversion_exact_subspace_woodbury(
 
     """
 
-    # Woodbury: (A + U @ U.T)^-1 = A^-1 - A^-1 @ U @ (1 + U.T @ A^-1 @ U )^-1 @ U.T @ A^-1
+    # Woodbury:
+    # (A + U @ U.T)^-1 = A^-1 - A^-1 @ U @ (1 + U.T @ A^-1 @ U )^-1 @ U.T @ A^-1
 
     # Compute D_delta. N_n = number of outputs, N_e = number of ensemble members
     N_n, N_e = Y.shape
@@ -364,7 +369,9 @@ def inversion_exact_subspace_woodbury(
 
         # Compute the woodbury inversion, then return
         inverted = C_D_inv - np.linalg.multi_dot([term, sp.linalg.inv(center), term.T])
-        return np.linalg.multi_dot([X_shift, D_delta.T, inverted, (D - Y)])  # type: ignore
+        return np.linalg.multi_dot(  # type: ignore
+            [X_shift, D_delta.T, inverted, (D - Y)]
+        )
 
     # A diagonal covariance matrix was given as a 1D array.
     # Same computation as above, but exploit the diagonal structure
@@ -376,7 +383,9 @@ def inversion_exact_subspace_woodbury(
         inverted = np.diag(C_D_inv) - np.linalg.multi_dot(
             [UT_D.T, sp.linalg.inv(center), UT_D]
         )
-        return np.linalg.multi_dot([X_shift, D_delta.T, inverted, (D - Y)])  # type: ignore
+        return np.linalg.multi_dot(  # type: ignore
+            [X_shift, D_delta.T, inverted, (D - Y)]
+        )
 
 
 def inversion_subspace(
@@ -461,7 +470,9 @@ def inversion_subspace(
 
     # Compute C_MD = center(X) @ center(Y).T / (num_ensemble - 1)
     X_shift = X - np.mean(X, axis=1, keepdims=True)
-    return np.linalg.multi_dot([X_shift, D_delta.T, (term / (1 + T)), term.T, (D - Y)])  # type: ignore
+    return np.linalg.multi_dot(  # type: ignore
+        [X_shift, D_delta.T, (term / (1 + T)), term.T, (D - Y)]
+    )
 
 
 def inversion_rescaled_subspace(
@@ -520,7 +531,9 @@ def inversion_rescaled_subspace(
 
     # Compute C_MD
     X_shift = X - np.mean(X, axis=1, keepdims=True)
-    return np.linalg.multi_dot([X_shift, D_delta.T, (term * diag), term.T, (D - Y)])  # type: ignore
+    return np.linalg.multi_dot(  # type: ignore
+        [X_shift, D_delta.T, (term * diag), term.T, (D - Y)]
+    )
 
 
 if __name__ == "__main__":

diff --git a/src/iterative_ensemble_smoother/experimental.py b/src/iterative_ensemble_smoother/experimental.py
@@ -4,13 +4,10 @@
 """
 import numpy as np
 
-rng = np.random.default_rng()
-
-from iterative_ensemble_smoother.utils import (
-    covariance_to_correlation,
-)
-
 from iterative_ensemble_smoother.ies import create_coefficient_matrix
+from iterative_ensemble_smoother.utils import covariance_to_correlation
+
+rng = np.random.default_rng()
 
 
 def ensemble_smoother_update_step_row_scaling(
@@ -56,7 +53,7 @@ def ensemble_smoother_update_step_row_scaling(
             np.zeros((ensemble_size, ensemble_size)),
             1.0,
         )
-        I = np.identity(ensemble_size)
+        I = np.identity(ensemble_size)  # noqa: E741
         transition_matrix = I + W / np.sqrt(ensemble_size - 1)
         row_scale.multiply(A, transition_matrix)
     return A_with_row_scaling