From 96f67f6bc8eceb6c6d82d63f892683524cbb7779 Mon Sep 17 00:00:00 2001
From: MothNik <nik.zoe@web.de>
Date: Sat, 25 May 2024 17:42:11 +0200
Subject: [PATCH 01/62] pkg: made `.venv` in `.gitignore` more general

---
 .gitignore | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.gitignore b/.gitignore
index bdd5bb4..7eb4c23 100644
--- a/.gitignore
+++ b/.gitignore
@@ -83,7 +83,7 @@ celerybeat-schedule
 .env
 
 # virtualenv
-.venv
+.venv*
 venv/
 ENV/
 

From 3deb05ca115a11e1d7ad419a9e3b7223a6ea680d Mon Sep 17 00:00:00 2001
From: MothNik <nik.zoe@web.de>
Date: Fri, 7 Jun 2024 21:40:00 +0200
Subject: [PATCH 02/62] wip: [11] refactored algorithm 1 to handle multiple
 right-hand sides

---
 src/pentapy/core.py    |   7 ++
 src/pentapy/solver.pxd |   2 +-
 src/pentapy/solver.pyx | 272 ++++++++++++++++++++++++++++++++++-------
 3 files changed, 237 insertions(+), 44 deletions(-)

diff --git a/src/pentapy/core.py b/src/pentapy/core.py
index 067189d..9a55780 100644
--- a/src/pentapy/core.py
+++ b/src/pentapy/core.py
@@ -77,7 +77,14 @@ def solve(mat, rhs, is_flat=False, index_row_wise=True, solver=1):
         else:
             mat_flat = create_banded(mat, col_wise=False, dtype=np.double)
         rhs = np.asarray(rhs, dtype=np.double)
+        single_rhs = rhs.ndim == 1
+        if single_rhs:
+            rhs = rhs[:, np.newaxis]
+
         try:
+            if single_rhs:
+                return penta_solver1(mat_flat, rhs).ravel()
+
             return penta_solver1(mat_flat, rhs)
         except ZeroDivisionError:
             warnings.warn("pentapy: PTRANS-I not suitable for input-matrix.")
diff --git a/src/pentapy/solver.pxd b/src/pentapy/solver.pxd
index e7c471e..05d249f 100644
--- a/src/pentapy/solver.pxd
+++ b/src/pentapy/solver.pxd
@@ -1,4 +1,4 @@
 # cython: language_level=3
-cdef double[:] c_penta_solver1(double[:, :] mat_flat, double[:] rhs)
+cdef double[:, :] c_penta_solver1(double[:, :] mat_flat, double[:, :] rhs)
 
 cdef double[:] c_penta_solver2(double[:, :] mat_flat, double[:] rhs)
diff --git a/src/pentapy/solver.pyx b/src/pentapy/solver.pyx
index 469b074..c59226b 100644
--- a/src/pentapy/solver.pyx
+++ b/src/pentapy/solver.pyx
@@ -1,14 +1,23 @@
-# cython: language_level=3, boundscheck=False, wraparound=False, cdivision=True
+# cython: language_level=3, boundscheck=True, wraparound=False, cdivision=True
+
 """
 This is a solver linear equation systems with a penta-diagonal matrix,
-implemented in cython.
+implemented in Cython.
+
 """
+
+### Imports ###
+
 import numpy as np
 
 cimport numpy as np
+from libc.stdint cimport int64_t, uint64_t
 
 
-def penta_solver1(double[:, :] mat_flat, double[:] rhs):
+### Main Python Interface ###
+
+
+def penta_solver1(double[:, :] mat_flat, double[:, :] rhs):
     return np.asarray(c_penta_solver1(mat_flat, rhs))
 
 
@@ -16,56 +25,233 @@ def penta_solver2(double[:, :] mat_flat, double[:] rhs):
     return np.asarray(c_penta_solver2(mat_flat, rhs))
 
 
-cdef double[:] c_penta_solver1(double[:, :] mat_flat, double[:] rhs):
-
-    cdef int mat_j = mat_flat.shape[1]
-
-    cdef double[:] result = np.zeros(mat_j)
+### Solver Algorithm 1 ###
 
-    cdef double[:] al = np.zeros(mat_j)
-    cdef double[:] be = np.zeros(mat_j)
-    cdef double[:] ze = np.zeros(mat_j)
-    cdef double[:] ga = np.zeros(mat_j)
-    cdef double[:] mu = np.zeros(mat_j)
 
-    cdef int i
+cdef double[:, :] c_penta_solver1(double[:, :] mat_flat, double[:, :] rhs):
+    """
+    Solves the pentadiagonal system of equations ``Ax = b`` with the matrix ``A`` and
+    the right-hand side ``b`` by
 
-    mu[0] = mat_flat[2, 0]
-    al[0] = mat_flat[1, 0] / mu[0]
-    be[0] = mat_flat[0, 0] / mu[0]
-    ze[0] = rhs[0] / mu[0]
+    - factorizing the matrix ``A`` into auxiliary coefficients and a unit upper
+        triangular matrix ``U``
+    - transforming the right-hand side into a vector ``zeta``
+    - solving the system of equations ``Ux = zeta`` by backward substitution
 
-    ga[1] = mat_flat[3, 1]
-    mu[1] = mat_flat[2, 1] - al[0] * ga[1]
-    al[1] = (mat_flat[1, 1] - be[0] * ga[1]) / mu[1]
-    be[1] = mat_flat[0, 1] / mu[1]
-    ze[1] = (rhs[1] - ze[0] * ga[1]) / mu[1]
+    """
 
-    for i in range(2, mat_j-2):
-        ga[i] = mat_flat[3, i] - al[i-2] * mat_flat[4, i]
-        mu[i] = mat_flat[2, i] - be[i-2] * mat_flat[4, i] - al[i-1] * ga[i]
-        al[i] = (mat_flat[1, i] - be[i-1] * ga[i]) / mu[i]
-        be[i] = mat_flat[0, i] / mu[i]
-        ze[i] = (rhs[i] - ze[i-2] * mat_flat[4, i] - ze[i-1] * ga[i]) / mu[i]
+    cdef uint64_t mat_n_rows = mat_flat.shape[1]
+    cdef uint64_t rhs_n_cols = rhs.shape[1]
+    cdef uint64_t iter_col
+    cdef double[::, ::1] result = np.empty(shape=(mat_n_rows, rhs_n_cols))
+    cdef double[::, ::1] mat_factorized = np.empty(shape=(mat_n_rows, 5))
 
-    ga[mat_j-2] = mat_flat[3, mat_j-2] - al[mat_j-4] * mat_flat[4, mat_j-2]
-    mu[mat_j-2] = mat_flat[2, mat_j-2] - be[mat_j-4] * mat_flat[4, mat_j-2] - al[mat_j-3] * ga[mat_j-2]
-    al[mat_j-2] = (mat_flat[1, mat_j-2] - be[mat_j-3] * ga[mat_j-2]) / mu[mat_j-2]
+    # first, the matrix is factorized
+    c_penta_factorize_algo1(
+        mat_flat,
+        mat_n_rows,
+        mat_factorized,
+    )
 
-    ga[mat_j-1] = mat_flat[3, mat_j-1] - al[mat_j-3] * mat_flat[4, mat_j-1]
-    mu[mat_j-1] = mat_flat[2, mat_j-1] - be[mat_j-3] * mat_flat[4, mat_j-1] - al[mat_j-2] * ga[mat_j-1]
+    # then, all the right-hand sides are solved
+    for iter_col in range(rhs_n_cols):
+        c_solve_penta_from_factorize_algo_1(
+            mat_n_rows,
+            mat_factorized,
+            rhs[::, iter_col],
+            result[::, iter_col],
+        )
 
-    ze[mat_j-2] = (rhs[mat_j-2] - ze[mat_j-4] * mat_flat[4, mat_j-2] - ze[mat_j-3] * ga[mat_j-2]) / mu[mat_j-2]
-    ze[mat_j-1] = (rhs[mat_j-1] - ze[mat_j-3] * mat_flat[4, mat_j-1] - ze[mat_j-2] * ga[mat_j-1]) / mu[mat_j-1]
-
-    # Backward substitution
-    result[mat_j-1] = ze[mat_j-1]
-    result[mat_j-2] = ze[mat_j-2] - al[mat_j-2] * result[mat_j-1]
+    return result
 
-    for i in range(mat_j-3, -1, -1):
-        result[i] = ze[i] - al[i] * result[i+1] - be[i] * result[i+2]
 
-    return result
+cdef void c_penta_factorize_algo1(
+    double[:, :] mat_flat,
+    uint64_t mat_n_rows,
+    double[::, ::1] mat_factorized,
+):
+    """
+    Factorizes the pentadiagonal matrix ``A`` into
+
+    - auxiliary coefficients ``e``, ``mu`` and ``gamma`` for the transformation of the
+        right-hand side
+    - a unit upper triangular matrix with the main diagonals ``alpha`` and ``beta``
+        for the following backward substitution. Its unit main diagonal is implicit.
+
+    They are overwriting the memoryview ``mat_factorized`` as follows:
+
+    ```bash
+    [[  *           mu_0        *           al_0        be_0  ]
+     [  *           mu_1        ga_1        al_1        be_1  ]
+     [  e_2         mu_2        ga_2        al_2        be_2  ]
+                                ...
+     [  e_i         mu_i        ga_i        al_i        be_i  ]
+                                ...
+     [  e_{n-2}     mu_{n-2}    ga_{n-2}    al_{n-2}    *     ]
+     [  e_{n-1}     mu_{n-1}    ga_{n-1}    *           *     ]]
+    ```
+
+    where the entries marked with ``*`` are not used by design, but overwritten with
+    zeros.
+
+    """
+
+    ### Variable declarations ###
+
+    cdef uint64_t iter_row
+    cdef double mu_i, ga_i, e_i
+    cdef double al_i, al_i_minus_1, al_i_plus_1
+
+    ### Factorization ###
+
+    # First row
+    mu_i = mat_flat[2, 0]
+    al_i_minus_1 = mat_flat[1, 0] / mu_i
+    be_i_minus_1 = mat_flat[0, 0] / mu_i
+
+    mat_factorized[0, 0] = 0.0
+    mat_factorized[0, 1] = mu_i
+    mat_factorized[0, 2] = 0.0
+    mat_factorized[0, 3] = al_i_minus_1
+    mat_factorized[0, 4] = be_i_minus_1
+
+    # Second row
+    ga_i = mat_flat[3, 1]
+    mu_i = mat_flat[2, 1] - al_i_minus_1 * ga_i
+    al_i = (mat_flat[1, 1] - be_i_minus_1 * ga_i) / mu_i
+    be_i = mat_flat[0, 1] / mu_i
+
+    mat_factorized[1, 0] = 0.0
+    mat_factorized[1, 1] = mu_i
+    mat_factorized[1, 2] = ga_i
+    mat_factorized[1, 3] = al_i
+    mat_factorized[1, 4] = be_i
+
+    # Central rows
+    for iter_row in range(2, mat_n_rows-2):
+        e_i = mat_flat[4, iter_row]
+        ga_i = mat_flat[3, iter_row] - al_i_minus_1 * e_i
+        mu_i = mat_flat[2, iter_row] - be_i_minus_1 * e_i - al_i * ga_i
+
+        al_i_plus_1 = (mat_flat[1, iter_row] - be_i * ga_i) / mu_i
+        al_i_minus_1 = al_i
+        al_i = al_i_plus_1
+
+        be_i_plus_1 = mat_flat[0, iter_row] / mu_i
+        be_i_minus_1 = be_i
+        be_i = be_i_plus_1
+
+        mat_factorized[iter_row, 0] = e_i
+        mat_factorized[iter_row, 1] = mu_i
+        mat_factorized[iter_row, 2] = ga_i
+        mat_factorized[iter_row, 3] = al_i
+        mat_factorized[iter_row, 4] = be_i
+
+    # Second to last row
+    e_i = mat_flat[4, mat_n_rows-2]
+    ga_i = mat_flat[3, mat_n_rows-2] - al_i_minus_1 * e_i
+    mu_i = mat_flat[2, mat_n_rows-2] - be_i_minus_1 * e_i - al_i * ga_i
+    al_i_plus_1 = (mat_flat[1, mat_n_rows-2] - be_i * ga_i) / mu_i
+
+    mat_factorized[mat_n_rows-2, 0] = e_i
+    mat_factorized[mat_n_rows-2, 1] = mu_i
+    mat_factorized[mat_n_rows-2, 2] = ga_i
+    mat_factorized[mat_n_rows-2, 3] = al_i_plus_1
+    mat_factorized[mat_n_rows-2, 4] = 0.0
+
+    # Last Row
+    e_i = mat_flat[4, mat_n_rows-1]
+    ga_i = mat_flat[3, mat_n_rows-1] - al_i * e_i
+    mu_i = mat_flat[2, mat_n_rows-1] - be_i * e_i - al_i_plus_1 * ga_i
+
+    mat_factorized[mat_n_rows-1, 0] = e_i
+    mat_factorized[mat_n_rows-1, 1] = mu_i
+    mat_factorized[mat_n_rows-1, 2] = ga_i
+    mat_factorized[mat_n_rows-1, 3] = 0.0
+    mat_factorized[mat_n_rows-1, 4] = 0.0
+
+    return
+
+
+cdef void c_solve_penta_from_factorize_algo_1(
+    uint64_t mat_n_rows,
+    double[::, ::1] mat_factorized,
+    double[::] rhs_single,
+    double[::] result_view,
+):
+    """
+    Solves the pentadiagonal system of equations ``Ax = b`` with the factorized
+    unit upper triangular matrix ``U`` and the right-hand side ``b``.
+    It overwrites the right-hand side ``b`` first with the transformed vector ``zeta``
+    and then with the solution vector ``x`` for ``Ux = zeta``.
+
+    """
+
+    ### Variable declarations ###
+
+    cdef int64_t iter_row
+    cdef double ze_i, ze_i_minus_1, ze_i_plus_1
+
+    ### Transformation ###
+
+    # first, the right-hand side is transformed into the vector ``zeta``
+    # First row
+
+    ze_i_minus_1 = rhs_single[0] / mat_factorized[0, 1]
+    result_view[0] = ze_i_minus_1
+
+    # Second row
+    ze_i = (rhs_single[1] - ze_i_minus_1 * mat_factorized[1, 2]) / mat_factorized[1, 1]
+    result_view[1] = ze_i
+
+    # Central rows
+    for iter_row in range(2, mat_n_rows-2):
+        ze_i_plus_1 = (
+            rhs_single[iter_row]
+            - ze_i_minus_1 * mat_factorized[iter_row, 0]
+            - ze_i * mat_factorized[iter_row, 2]
+        ) / mat_factorized[iter_row, 1]
+        ze_i_minus_1 = ze_i
+        ze_i = ze_i_plus_1
+        result_view[iter_row] = ze_i_plus_1
+
+    # Second to last row
+    ze_i_plus_1 = (
+        rhs_single[mat_n_rows-2]
+        - ze_i_minus_1 * mat_factorized[mat_n_rows-2, 0]
+        - ze_i * mat_factorized[mat_n_rows-2, 2]
+    ) / mat_factorized[mat_n_rows-2, 1]
+    ze_i_minus_1 = ze_i
+    ze_i = ze_i_plus_1
+    result_view[mat_n_rows-2] = ze_i_plus_1
+
+    # Last row
+    ze_i_plus_1 = (
+        rhs_single[mat_n_rows-1]
+        - ze_i_minus_1 * mat_factorized[mat_n_rows-1, 0]
+        - ze_i * mat_factorized[mat_n_rows-1, 2]
+    ) / mat_factorized[mat_n_rows-1, 1]
+    result_view[mat_n_rows-1] = ze_i_plus_1
+
+    ### Backward substitution ###
+
+    # The solution vector is calculated by backward substitution that overwrites the
+    # right-hand side vector with the solution vector
+    ze_i -= mat_factorized[mat_n_rows-2, 3] * ze_i_plus_1
+    result_view[mat_n_rows-2] = ze_i
+
+    for iter_row in range(mat_n_rows-3, -1, -1):
+        result_view[iter_row] -= (
+            mat_factorized[iter_row, 3] * ze_i
+            + mat_factorized[iter_row, 4] * ze_i_plus_1
+        )
+        ze_i_plus_1 = ze_i
+        ze_i = result_view[iter_row]
+
+    return
+
+
+### Solver Algorithm 2 ###
 
 
 cdef double[:] c_penta_solver2(double[:, :] mat_flat, double[:] rhs):

From 328d9c90492ec4dd10a3a62f50917547c63082eb Mon Sep 17 00:00:00 2001
From: MothNik <nik.zoe@web.de>
Date: Fri, 7 Jun 2024 21:40:29 +0200
Subject: [PATCH 03/62] feat: [11] added doctest runs to `pytest`

---
 pytest.ini | 2 ++
 1 file changed, 2 insertions(+)
 create mode 100644 pytest.ini

diff --git a/pytest.ini b/pytest.ini
new file mode 100644
index 0000000..2bed0f3
--- /dev/null
+++ b/pytest.ini
@@ -0,0 +1,2 @@
+[pytest]
+addopts = --doctest-modules
\ No newline at end of file

From 90bdeb143f8677427d4255e332a00d83b51a40cf Mon Sep 17 00:00:00 2001
From: MothNik <nik.zoe@web.de>
Date: Fri, 7 Jun 2024 22:33:59 +0200
Subject: [PATCH 04/62] tests: [11] replaced single tools test by scalable
 parametrized tools test that test more different cases

---
 tests/test_tools.py | 205 ++++++++++++++++++++++++++++++++++++++++++++
 tests/util_funcs.py | 175 +++++++++++++++++++++++++++++++++++++
 2 files changed, 380 insertions(+)
 create mode 100644 tests/test_tools.py
 create mode 100644 tests/util_funcs.py

diff --git a/tests/test_tools.py b/tests/test_tools.py
new file mode 100644
index 0000000..879a713
--- /dev/null
+++ b/tests/test_tools.py
@@ -0,0 +1,205 @@
+"""
+This test suite implements the test for the ``tools`` module of the ``pentapy`` package.
+
+"""
+
+### Imports ###
+
+import warnings
+from typing import Optional, Tuple, Type
+
+import numpy as np
+import pentapy as pp
+import pytest
+import util_funcs as uf
+from pentapy.tools import _check_penta
+
+warnings.simplefilter("always")
+
+### Constants ###
+
+SEED = 19_031_977
+N_ROWS = [
+    3,
+    4,
+    5,
+    10,
+    11,
+    25,
+    26,
+    50,
+    51,
+    100,
+    101,
+    250,
+    251,
+    500,
+    501,
+    1_000,
+    1_001,
+    10_000,
+    10_001,
+]
+
+### Tests ###
+
+
+@pytest.mark.parametrize("offset", [0, 1, 2, -1, -2])
+@pytest.mark.parametrize("n_rows", N_ROWS)
+def test_diag_indices(n_rows: int, offset: int) -> None:
+    """
+    Tests the generation of the diagonal indices via the function
+    ``pentapy.diag_indices``.
+
+    """
+
+    # the diagonal indices are obtained with NumPy and pentapy
+    row_idxs_ref, col_idxs_ref = uf.get_diag_indices(n=n_rows, offset=offset)
+    row_idxs, col_idxs = pp.diag_indices(n=n_rows, offset=offset)
+
+    # the diagonal indices are compared
+    assert np.array_equal(row_idxs_ref, row_idxs)
+    assert np.array_equal(col_idxs_ref, col_idxs)
+
+
+@pytest.mark.parametrize("copy", [True, False])
+@pytest.mark.parametrize("with_shift", [True, False])
+@pytest.mark.parametrize("col_wise", [True, False])
+@pytest.mark.parametrize("n_rows", N_ROWS)
+def test_penta_generators(
+    n_rows: int,
+    col_wise: bool,
+    with_shift: bool,
+    copy: bool,
+) -> None:
+    """
+    Tests the generation of pentadiagonal matrices where the matrix.
+
+    """
+
+    # a reference matrix is initialised
+    mat_full_ref = uf.gen_rand_penta_matrix_dense_int(
+        n_rows=n_rows,
+        seed=SEED,
+        with_pentapy_indices=False,
+    )
+
+    # then, it is turned into a banded matrix ...
+    mat_banded = pp.create_banded(mat_full_ref, col_wise=col_wise)
+
+    # ... which is maybe shifted
+    # Case 1: copied shift
+    if with_shift and copy:
+        mat_banded = pp.shift_banded(mat_banded, col_to_row=col_wise, copy=True)
+        col_wise = not col_wise
+
+    # Case 2: in-place shift
+    if with_shift and not copy:
+        mat_banded = pp.shift_banded(mat_banded, col_to_row=col_wise, copy=False)
+        col_wise = not col_wise
+
+    # ... from which a full matrix is created again
+    mat_full = pp.create_full(mat_banded, col_wise=col_wise)
+
+    # the matrices are compared
+    assert np.array_equal(mat_full_ref, mat_full)
+
+
+@pytest.mark.parametrize(
+    "shape, exception",
+    [
+        ((5, 5), None),  # Valid 2D Array with 5 rows and 5 rows
+        ((5, 2), ValueError),  # 2D Array with 5 rows but only 2 columns
+        ((2, 5), ValueError),  # 2D Array with 2 rows but 5 columns
+        ((5,), ValueError),  # 1D Array
+    ],
+)
+def test_create_banded_raises(
+    shape: Tuple[int, ...],
+    exception: Optional[Type[Exception]],
+) -> None:
+    """
+    Test if the function ``pentapy.create_banded`` raises the expected exceptions.
+
+    """
+
+    # the test matrix is initialised
+    np.random.seed(SEED)
+    mat = np.random.rand(*shape)
+
+    # Case 1: no exception should be raised
+    if exception is None:
+        pp.create_banded(mat)
+        return
+
+    # Case 2: an exception should be raised
+    with pytest.raises(exception):
+        pp.create_banded(mat)
+
+
+@pytest.mark.parametrize(
+    "shape, exception",
+    [
+        ((5, 5), None),  # Valid 2D Array with 5 bands and 5 columns
+        ((5, 10), None),  # Valid 2D Array with 5 bands and 10 columns
+        ((5, 3), None),  # 2D Array with 5 bands and the minimum number of columns
+        ((6, 20), ValueError),  # 2D Array does not have 5 bands
+        ((4, 30), ValueError),  # 2D Array does not have 5 bands
+        ((5, 1), ValueError),  # 2D Array with 5 bands but too little columns
+        ((5, 2), ValueError),  # 2D Array with 5 bands but too little columns
+        ((5,), ValueError),  # 1D Array
+    ],
+)
+def test_create_full_raises(
+    shape: Tuple[int, ...],
+    exception: Optional[Type[Exception]],
+) -> None:
+    """
+    Test if the function ``pentapy.create_full`` raises the expected exceptions.
+
+    """
+
+    # the test matrix is initialised
+    np.random.seed(SEED)
+    mat = np.random.rand(*shape)
+
+    # Case 1: no exception should be raised
+    if exception is None:
+        pp.create_full(mat)
+        return
+
+    # Case 2: an exception should be raised
+    with pytest.raises(exception):
+        pp.create_full(mat)
+
+
+@pytest.mark.parametrize(
+    "shape, exception",
+    [
+        ((5, 3), None),  # Valid 2D Array with 5 bands and 3 rows
+        ((5, 2), ValueError),  # 2D Array with 5 bands but less than 3 rows
+        ((4, 3), ValueError),  # 2D Array with less than 5 bands
+        ((5,), ValueError),  # 1D Array
+    ],
+)
+def test_check_penta(
+    shape: Tuple[int, ...],
+    exception: Optional[Type[Exception]],
+) -> None:
+    """
+    Test if the function ``pentapy.tools._check_penta`` raises the expected exceptions.
+
+    """
+
+    # the test matrix is initialised
+    np.random.seed(SEED)
+    mat = np.random.rand(*shape)
+
+    # Case 1: no exception should be raised
+    if exception is None:
+        _check_penta(mat)
+        return
+
+    # Case 2: an exception should be raised
+    with pytest.raises(exception):
+        _check_penta(mat)
diff --git a/tests/util_funcs.py b/tests/util_funcs.py
new file mode 100644
index 0000000..2402d25
--- /dev/null
+++ b/tests/util_funcs.py
@@ -0,0 +1,175 @@
+"""
+This test suite implements the utility functions for testing the ``pentapy`` package.
+
+"""
+
+### Imports ###
+
+from functools import partial
+from typing import Tuple
+
+import numpy as np
+import pentapy as pp
+
+### Utility Functions ###
+
+
+def get_diag_indices(
+    n: int,
+    offset: int,
+) -> Tuple[np.ndarray, np.ndarray]:
+    """
+    Computes the row and column indices of the diagonal of a matrix ``mat``.
+
+    This answer is based on the Stack Overflow answer that can be found at:
+    https://stackoverflow.com/a/18081653/14814813
+
+    Doctests
+    --------
+    >>> # Setting up a test matrix
+    >>> n_rows = 5
+    >>> mat = np.arange(start=0, stop=n_rows * n_rows).reshape(n_rows, n_rows)
+
+    >>> # Getting the main diagonal indices
+    >>> row_idxs, col_idxs = get_diag_indices(n=n_rows, offset=0)
+    >>> row_idxs
+    array([0, 1, 2, 3, 4])
+    >>> col_idxs
+    array([0, 1, 2, 3, 4])
+    >>> mat[row_idxs, col_idxs]
+    array([ 0,  6, 12, 18, 24])
+
+    >>> # Getting the first upper diagonal indices
+    >>> row_idxs, col_idxs = get_diag_indices(n=n_rows, offset=1)
+    >>> row_idxs
+    array([0, 1, 2, 3])
+    >>> col_idxs
+    array([1, 2, 3, 4])
+    >>> mat[row_idxs, col_idxs]
+    array([ 1,  7, 13, 19])
+
+    >>> # Getting the second upper diagonal indices
+    >>> row_idxs, col_idxs = get_diag_indices(n=n_rows, offset=2)
+    >>> row_idxs
+    array([0, 1, 2])
+    >>> col_idxs
+    array([2, 3, 4])
+    >>> mat[row_idxs, col_idxs]
+    array([ 2,  8, 14])
+
+    >>> # Getting the first lower diagonal indices
+    >>> row_idxs, col_idxs = get_diag_indices(n=n_rows, offset=-1)
+    >>> row_idxs
+    array([1, 2, 3, 4])
+    >>> col_idxs
+    array([0, 1, 2, 3])
+    >>> mat[row_idxs, col_idxs]
+    array([ 5, 11, 17, 23])
+
+    >>> # Getting the second lower diagonal indices
+    >>> row_idxs, col_idxs = get_diag_indices(n=n_rows, offset=-2)
+    >>> row_idxs
+    array([2, 3, 4])
+    >>> col_idxs
+    array([0, 1, 2])
+    >>> mat[row_idxs, col_idxs]
+    array([10, 16, 22])
+
+    """
+
+    row_idxs, col_idxs = np.diag_indices(n=n, ndim=2)
+    if offset < 0:
+        row_idx_from = -offset
+        row_idx_to = None
+        col_idx_from = 0
+        col_idx_to = offset
+    elif offset > 0:
+        row_idx_from = 0
+        row_idx_to = -offset
+        col_idx_from = offset
+        col_idx_to = None
+    else:
+        row_idx_from = None
+        row_idx_to = None
+        col_idx_from = None
+        col_idx_to = None
+
+    return (
+        row_idxs[row_idx_from:row_idx_to],
+        col_idxs[col_idx_from:col_idx_to],
+    )
+
+
+def gen_rand_penta_matrix_dense_int(
+    n_rows: int,
+    seed: int,
+    with_pentapy_indices: bool,
+) -> np.ndarray:
+    """
+    Generates a random dense pentadiagonal matrix with shape ``(n_rows, n_rows)`` and
+    data type ``int64``.
+
+    Doctests
+    --------
+    >>> # Generating a random pentadiagonal matrix with NumPy indices
+    >>> n_rows = 5
+    >>> seed = 19_031_977
+    >>> with_pentapy_indices = False
+
+    >>> mat_no_pentapy = gen_rand_penta_matrix_dense_int(
+    ...     n_rows=n_rows,
+    ...     seed=seed,
+    ...     with_pentapy_indices=with_pentapy_indices
+    ... )
+    >>> mat_no_pentapy
+    array([[117, 499,  43,   0,   0],
+           [378, 149, 857, 353,   0],
+           [285, 769, 767, 229, 484],
+           [  0, 717, 214, 243, 877],
+           [  0,   0, 410, 611,  79]], dtype=int64)
+
+    >>> # Generating a random pentadiagonal matrix with pentapy indices
+    >>> mat_with_pentapy = gen_rand_penta_matrix_dense_int(
+    ...     n_rows=n_rows,
+    ...     seed=seed,
+    ...     with_pentapy_indices=True
+    ... )
+    >>> mat_with_pentapy
+    array([[117, 499,  43,   0,   0],
+           [378, 149, 857, 353,   0],
+           [285, 769, 767, 229, 484],
+           [  0, 717, 214, 243, 877],
+           [  0,   0, 410, 611,  79]], dtype=int64)
+
+    >>> # Checking if the two matrices are equal
+    >>> np.array_equal(mat_no_pentapy, mat_with_pentapy)
+    True
+
+    """
+
+    # first, a matrix of zeros is initialised ...
+    mat = np.zeros((n_rows, n_rows), dtype=np.int64)
+    # ... together with a partially specified random vector generator
+    # NOTE: this ensures consistent random numbers for both cases
+    gen_rand_int = partial(np.random.randint, low=1, high=1_000)
+
+    # then, the diagonal index function is obtained
+    diag_idx_func = get_diag_indices
+    if with_pentapy_indices:
+        diag_idx_func = pp.diag_indices
+
+    # then, the diagonals are filled with random integers
+    np.random.seed(seed=seed)
+    for offset in range(-2, 3):
+        row_idxs, col_idxs = diag_idx_func(n=n_rows, offset=offset)
+        mat[row_idxs, col_idxs] = gen_rand_int(size=n_rows - abs(offset))
+
+    return mat
+
+
+### Doctests ###
+
+if __name__ == "__main__":  # pragma: no cover
+    import doctest
+
+    doctest.testmod()

From 50806411c1087036776576f7c4e68a05d0f3b6a6 Mon Sep 17 00:00:00 2001
From: MothNik <nik.zoe@web.de>
Date: Fri, 7 Jun 2024 22:35:43 +0200
Subject: [PATCH 05/62] tests: [11] removed version from coverage

---
 pyproject.toml | 1 +
 1 file changed, 1 insertion(+)

diff --git a/pyproject.toml b/pyproject.toml
index 7b0aec6..4c400f8 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -103,6 +103,7 @@ max-line-length = 120
         "*examples*",
         "*tests*",
         "*paper*",
+        "pentapy/src/pentapy/_version.py",
     ]
 
     [tool.coverage.report]

From 2f576092dfc0bb52e4ab268248e0673dc0eda667 Mon Sep 17 00:00:00 2001
From: MothNik <nik.zoe@web.de>
Date: Fri, 7 Jun 2024 23:02:53 +0200
Subject: [PATCH 06/62] lint: [11] fixed block comment lint error

---
 src/pentapy/solver.pyx | 18 +++++++++---------
 1 file changed, 9 insertions(+), 9 deletions(-)

diff --git a/src/pentapy/solver.pyx b/src/pentapy/solver.pyx
index c59226b..bf075da 100644
--- a/src/pentapy/solver.pyx
+++ b/src/pentapy/solver.pyx
@@ -6,7 +6,7 @@ implemented in Cython.
 
 """
 
-### Imports ###
+# Imports
 
 import numpy as np
 
@@ -14,7 +14,7 @@ cimport numpy as np
 from libc.stdint cimport int64_t, uint64_t
 
 
-### Main Python Interface ###
+# Main Python Interface
 
 
 def penta_solver1(double[:, :] mat_flat, double[:, :] rhs):
@@ -25,7 +25,7 @@ def penta_solver2(double[:, :] mat_flat, double[:] rhs):
     return np.asarray(c_penta_solver2(mat_flat, rhs))
 
 
-### Solver Algorithm 1 ###
+# Solver Algorithm 1
 
 
 cdef double[:, :] c_penta_solver1(double[:, :] mat_flat, double[:, :] rhs):
@@ -96,13 +96,13 @@ cdef void c_penta_factorize_algo1(
 
     """
 
-    ### Variable declarations ###
+    # Variable declarations
 
     cdef uint64_t iter_row
     cdef double mu_i, ga_i, e_i
     cdef double al_i, al_i_minus_1, al_i_plus_1
 
-    ### Factorization ###
+    # Factorization
 
     # First row
     mu_i = mat_flat[2, 0]
@@ -187,12 +187,12 @@ cdef void c_solve_penta_from_factorize_algo_1(
 
     """
 
-    ### Variable declarations ###
+    # Variable declarations
 
     cdef int64_t iter_row
     cdef double ze_i, ze_i_minus_1, ze_i_plus_1
 
-    ### Transformation ###
+    # Transformation
 
     # first, the right-hand side is transformed into the vector ``zeta``
     # First row
@@ -233,7 +233,7 @@ cdef void c_solve_penta_from_factorize_algo_1(
     ) / mat_factorized[mat_n_rows-1, 1]
     result_view[mat_n_rows-1] = ze_i_plus_1
 
-    ### Backward substitution ###
+    # Backward substitution
 
     # The solution vector is calculated by backward substitution that overwrites the
     # right-hand side vector with the solution vector
@@ -251,7 +251,7 @@ cdef void c_solve_penta_from_factorize_algo_1(
     return
 
 
-### Solver Algorithm 2 ###
+# Solver Algorithm 2
 
 
 cdef double[:] c_penta_solver2(double[:, :] mat_flat, double[:] rhs):

From 8db6ba4841b697725a60ba7d205b7db59f4b3d79 Mon Sep 17 00:00:00 2001
From: MothNik <nik.zoe@web.de>
Date: Sat, 8 Jun 2024 10:34:23 +0200
Subject: [PATCH 07/62] style: [11] improved headlines

---
 src/pentapy/solver.pyx | 16 ++++++++--------
 tests/test_tools.py    |  6 +++---
 tests/util_funcs.py    | 10 +++++-----
 3 files changed, 16 insertions(+), 16 deletions(-)

diff --git a/src/pentapy/solver.pyx b/src/pentapy/solver.pyx
index bf075da..c8a22a2 100644
--- a/src/pentapy/solver.pyx
+++ b/src/pentapy/solver.pyx
@@ -14,7 +14,7 @@ cimport numpy as np
 from libc.stdint cimport int64_t, uint64_t
 
 
-# Main Python Interface
+# === Main Python Interface ===
 
 
 def penta_solver1(double[:, :] mat_flat, double[:, :] rhs):
@@ -25,7 +25,7 @@ def penta_solver2(double[:, :] mat_flat, double[:] rhs):
     return np.asarray(c_penta_solver2(mat_flat, rhs))
 
 
-# Solver Algorithm 1
+# === Solver Algorithm 1 ===
 
 
 cdef double[:, :] c_penta_solver1(double[:, :] mat_flat, double[:, :] rhs):
@@ -96,13 +96,13 @@ cdef void c_penta_factorize_algo1(
 
     """
 
-    # Variable declarations
+    # === Variable declarations ===
 
     cdef uint64_t iter_row
     cdef double mu_i, ga_i, e_i
     cdef double al_i, al_i_minus_1, al_i_plus_1
 
-    # Factorization
+    # === Factorization ===
 
     # First row
     mu_i = mat_flat[2, 0]
@@ -187,12 +187,12 @@ cdef void c_solve_penta_from_factorize_algo_1(
 
     """
 
-    # Variable declarations
+    # === Variable declarations ===
 
     cdef int64_t iter_row
     cdef double ze_i, ze_i_minus_1, ze_i_plus_1
 
-    # Transformation
+    # === Transformation ===
 
     # first, the right-hand side is transformed into the vector ``zeta``
     # First row
@@ -233,7 +233,7 @@ cdef void c_solve_penta_from_factorize_algo_1(
     ) / mat_factorized[mat_n_rows-1, 1]
     result_view[mat_n_rows-1] = ze_i_plus_1
 
-    # Backward substitution
+    # === Backward substitution ===
 
     # The solution vector is calculated by backward substitution that overwrites the
     # right-hand side vector with the solution vector
@@ -251,7 +251,7 @@ cdef void c_solve_penta_from_factorize_algo_1(
     return
 
 
-# Solver Algorithm 2
+# === Solver Algorithm 2 ===
 
 
 cdef double[:] c_penta_solver2(double[:, :] mat_flat, double[:] rhs):
diff --git a/tests/test_tools.py b/tests/test_tools.py
index 879a713..2f54c48 100644
--- a/tests/test_tools.py
+++ b/tests/test_tools.py
@@ -3,7 +3,7 @@
 
 """
 
-### Imports ###
+# === Imports ===
 
 import warnings
 from typing import Optional, Tuple, Type
@@ -16,7 +16,7 @@
 
 warnings.simplefilter("always")
 
-### Constants ###
+# === Constants ===
 
 SEED = 19_031_977
 N_ROWS = [
@@ -41,7 +41,7 @@
     10_001,
 ]
 
-### Tests ###
+# === Tests ===
 
 
 @pytest.mark.parametrize("offset", [0, 1, 2, -1, -2])
diff --git a/tests/util_funcs.py b/tests/util_funcs.py
index 2402d25..594e75d 100644
--- a/tests/util_funcs.py
+++ b/tests/util_funcs.py
@@ -3,7 +3,7 @@
 
 """
 
-### Imports ###
+# === Imports ===
 
 from functools import partial
 from typing import Tuple
@@ -11,7 +11,7 @@
 import numpy as np
 import pentapy as pp
 
-### Utility Functions ###
+# === Utility Functions ===
 
 
 def get_diag_indices(
@@ -25,7 +25,7 @@ def get_diag_indices(
     https://stackoverflow.com/a/18081653/14814813
 
     Doctests
-    --------
+    ======--
     >>> # Setting up a test matrix
     >>> n_rows = 5
     >>> mat = np.arange(start=0, stop=n_rows * n_rows).reshape(n_rows, n_rows)
@@ -110,7 +110,7 @@ def gen_rand_penta_matrix_dense_int(
     data type ``int64``.
 
     Doctests
-    --------
+    ======--
     >>> # Generating a random pentadiagonal matrix with NumPy indices
     >>> n_rows = 5
     >>> seed = 19_031_977
@@ -167,7 +167,7 @@ def gen_rand_penta_matrix_dense_int(
     return mat
 
 
-### Doctests ###
+# === Doctests ===
 
 if __name__ == "__main__":  # pragma: no cover
     import doctest

From 1cdd525f8a93611e7c369253b724572388841819 Mon Sep 17 00:00:00 2001
From: MothNik <nik.zoe@web.de>
Date: Sat, 8 Jun 2024 10:35:27 +0200
Subject: [PATCH 08/62] wip: [11] formatted

---
 tests/util_funcs.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/tests/util_funcs.py b/tests/util_funcs.py
index 594e75d..040bd04 100644
--- a/tests/util_funcs.py
+++ b/tests/util_funcs.py
@@ -9,6 +9,7 @@
 from typing import Tuple
 
 import numpy as np
+
 import pentapy as pp
 
 # === Utility Functions ===

From ae656cf94f38b4651545161e848443a97930b53f Mon Sep 17 00:00:00 2001
From: MothNik <nik.zoe@web.de>
Date: Sat, 8 Jun 2024 11:13:44 +0200
Subject: [PATCH 09/62] test: [11] created conditioned banded matrix creator
 for testing purposes

---
 tests/util_funcs.py | 199 +++++++++++++++++++++++++++++++++++++++++++-
 1 file changed, 197 insertions(+), 2 deletions(-)

diff --git a/tests/util_funcs.py b/tests/util_funcs.py
index 040bd04..ca8f2be 100644
--- a/tests/util_funcs.py
+++ b/tests/util_funcs.py
@@ -9,9 +9,15 @@
 from typing import Tuple
 
 import numpy as np
+from scipy import sparse as sprs
 
 import pentapy as pp
 
+# === Constants ===
+
+_MIN_DIAG_VAL = 1e-3
+
+
 # === Utility Functions ===
 
 
@@ -26,7 +32,7 @@ def get_diag_indices(
     https://stackoverflow.com/a/18081653/14814813
 
     Doctests
-    ======--
+    --------
     >>> # Setting up a test matrix
     >>> n_rows = 5
     >>> mat = np.arange(start=0, stop=n_rows * n_rows).reshape(n_rows, n_rows)
@@ -111,7 +117,7 @@ def gen_rand_penta_matrix_dense_int(
     data type ``int64``.
 
     Doctests
-    ======--
+    --------
     >>> # Generating a random pentadiagonal matrix with NumPy indices
     >>> n_rows = 5
     >>> seed = 19_031_977
@@ -168,6 +174,195 @@ def gen_rand_penta_matrix_dense_int(
     return mat
 
 
+def gen_conditioned_rand_penta_matrix_dense(
+    n_rows: int,
+    seed: int,
+    ill_conditioned: bool,
+) -> np.ndarray:
+    """
+    Generates a well- or ill-conditioned random banded pentadiagonal matrix with shape
+    ``(n_rows, n_rows)``.
+
+    This is achieved as follows:
+    - a fake LDU decomposition is generated where ``L`` and ``U`` are unit lower and
+      upper triangular matrices, respectively, and ``D`` is a diagonal matrix
+    - the matrix is then reconstructed by multiplying the three matrices and converting
+      the result to a banded matrix
+
+    If ``D`` does not have any zeros or values of small magnitude compared to the
+    largest value, the matrix should be well-conditioned.
+    Otherwise, it is ill-conditioned.
+
+    Doctests
+    --------
+    >>> # Imports
+    >>> from scipy.linalg import bandwidth
+
+    >>> # 1) Generating a super small well-conditioned random pentadiagonal matrix
+    >>> n_rows = 3
+    >>> seed = 19_031_977
+
+    >>> mat = gen_conditioned_rand_penta_matrix_dense(
+    ...     n_rows=n_rows,
+    ...     seed=seed,
+    ...     ill_conditioned=False,
+    ... )
+    >>> mat
+    array([[ 0.92453713,  0.28308514, -0.09972199],
+           [-0.09784268,  0.2270634 , -0.1509019 ],
+           [-0.23431267,  0.00468463,  0.22991003]])
+    >>> # its bandwidth is computed and should be equal to 2
+    >>> bandwidth(mat)
+    (2, 2)
+    >>> # its condition number is computed and values below 1e10 can be considered good
+    >>> np.linalg.cond(mat)
+    4.976880305142543
+
+    >>> # 2) Generating a super small ill-conditioned random pentadiagonal matrix
+    >>> mat = gen_conditioned_rand_penta_matrix_dense(
+    ...     n_rows=n_rows,
+    ...     seed=seed,
+    ...     ill_conditioned=True,
+    ... )
+    >>> mat
+    array([[ 0.92453713,  0.28308514, -0.09972199],
+           [-0.09784268,  0.2270634 , -0.1509019 ],
+           [-0.23431267,  0.00468463, -0.02273771]])
+    >>> # its bandwidth is computed and should be equal to 2
+    >>> bandwidth(mat)
+    (2, 2)
+    >>> # its condition number is computed and its value should be close to the
+    >>> # reciprocal floating point precision, i.e., ~1e16
+    >>> np.linalg.cond(mat)
+    1.493156437173682e+17
+
+    >>> # 3) Generating a small well-conditioned random pentadiagonal matrix
+    >>> n_rows = 7
+
+    >>> mat = gen_conditioned_rand_penta_matrix_dense(
+    ...     n_rows=n_rows,
+    ...     seed=seed,
+    ...     ill_conditioned=False,
+    ... )
+    >>> np.round(mat, 2)
+    array([[ 0.92, -0.72,  0.73,  0.  ,  0.  ,  0.  ,  0.  ],
+           [ 0.83, -0.02,  1.08,  0.41,  0.  ,  0.  ,  0.  ],
+           [-0.58,  0.13, -0.13, -0.37,  0.18,  0.  ,  0.  ],
+           [ 0.  , -0.07, -0.58,  0.46, -0.31,  0.28,  0.  ],
+           [ 0.  ,  0.  ,  0.43,  0.13,  0.39, -0.1 , -0.15],
+           [ 0.  ,  0.  ,  0.  ,  0.06, -0.14,  0.4 ,  0.28],
+           [ 0.  ,  0.  ,  0.  ,  0.  , -0.14,  0.36,  0.53]])
+    >>> # its bandwidth is computed and should be equal to 2
+    >>> bandwidth(mat)
+    (2, 2)
+    >>> # its condition number is computed and values below 1e10 can be considered good
+    >>> np.linalg.cond(mat)
+    42.4847446467131
+
+    >>> # 4) Generating a small ill-conditioned random pentadiagonal matrix
+    >>> mat = gen_conditioned_rand_penta_matrix_dense(
+    ...     n_rows=n_rows,
+    ...     seed=seed,
+    ...     ill_conditioned=True,
+    ... )
+    >>> np.round(mat, 2)
+    array([[ 0.92, -0.72,  0.73,  0.  ,  0.  ,  0.  ,  0.  ],
+           [ 0.83, -0.02,  1.08,  0.41,  0.  ,  0.  ,  0.  ],
+           [-0.58,  0.13, -0.13, -0.37,  0.18,  0.  ,  0.  ],
+           [ 0.  , -0.07, -0.58,  0.46, -0.31,  0.28,  0.  ],
+           [ 0.  ,  0.  ,  0.43,  0.13,  0.39, -0.1 , -0.15],
+           [ 0.  ,  0.  ,  0.  ,  0.06, -0.14,  0.4 ,  0.28],
+           [ 0.  ,  0.  ,  0.  ,  0.  , -0.14,  0.36,  0.28]])
+    >>> # its bandwidth is computed and should be equal to 2
+    >>> bandwidth(mat)
+    (2, 2)
+    >>> # its condition number is computed and its value should be close to the
+    >>> # reciprocal floating point precision, i.e., ~1e16
+    >>> np.linalg.cond(mat)
+    1.1079218802103074e+17
+
+    >>> # 5) Generating a large well-conditioned random pentadiagonal matrix
+    >>> n_rows = 1_000
+
+    >>> mat = gen_conditioned_rand_penta_matrix_dense(
+    ...     n_rows=n_rows,
+    ...     seed=seed,
+    ...     ill_conditioned=False,
+    ... )
+    >>> # its bandwidth is computed and should be equal to 2
+    >>> bandwidth(mat)
+    (2, 2)
+    >>> # its condition number is computed and values below 1e10 can be considered good
+    >>> np.linalg.cond(mat)
+    9570.995402466417
+
+    >>> # 6) Generating a large ill-conditioned random pentadiagonal matrix
+    >>> mat = gen_conditioned_rand_penta_matrix_dense(
+    ...     n_rows=n_rows,
+    ...     seed=seed,
+    ...     ill_conditioned=True,
+    ... )
+    >>> # its bandwidth is computed and should be equal to 2
+    >>> bandwidth(mat)
+    (2, 2)
+    >>> # its condition number is computed and its value should be close to the
+    >>> # reciprocal floating point precision, i.e., ~1e16
+    >>> np.linalg.cond(mat)
+    5.058722571393928e+17
+
+    """
+
+    # first, the fake diagonal matrix is generated whose entries are strictly
+    # positive and sorted in descending order
+    np.random.seed(seed=seed)
+    d_diag = np.flip(np.sort(np.random.rand(n_rows)))
+
+    # the conditioning is achieved by manipulating the smallest diagonal entry
+    # Case 1: well-conditioned matrix
+    if not ill_conditioned:
+        # here, the smallest diagonal entry is set to a value that is enforced to have
+        # a minimum magnitude
+        d_diag = np.maximum(d_diag, _MIN_DIAG_VAL)
+
+    # Case 2: ill-conditioned matrix
+    else:
+        # here, the smallest diagonal entry is set to a value that is numerically zero
+        # compared to the largest entry
+        d_diag[n_rows - 1] = 0.1 * np.finfo(np.float64).eps * d_diag[0]
+
+    # ... followed by a unit lower triangular matrix with 2 sub-diagonals, but here
+    # the entries may be negative ...
+    diagonals = [
+        1.0 - 2.0 * np.random.rand(n_rows - 2),
+        1.0 - 2.0 * np.random.rand(n_rows - 1),
+        np.ones(n_rows),
+    ]
+    l_mat = sprs.diags(
+        diagonals=diagonals,
+        offsets=[-2, -1, 0],  # type: ignore
+        shape=(n_rows, n_rows),
+        format="csr",
+        dtype=np.float64,
+    )
+
+    # ... and an upper triangular matrix with 2 super-diagonals
+    diagonals = [
+        np.ones(n_rows),
+        1.0 - 2.0 * np.random.rand(n_rows - 1),
+        1.0 - 2.0 * np.random.rand(n_rows - 2),
+    ]
+    u_mat = sprs.diags(
+        diagonals=diagonals,
+        offsets=[0, 1, 2],  # type: ignore
+        shape=(n_rows, n_rows),
+        format="csr",
+        dtype=np.float64,
+    )
+
+    # finally, the matrix is reconstructed by multiplying the three matrices
+    return (l_mat.multiply(d_diag[np.newaxis, ::]).dot(u_mat)).toarray()
+
+
 # === Doctests ===
 
 if __name__ == "__main__":  # pragma: no cover

From 26752f292f569e96b0a7b39d5a4fb95e695b6f72 Mon Sep 17 00:00:00 2001
From: MothNik <nik.zoe@web.de>
Date: Sat, 8 Jun 2024 11:44:29 +0200
Subject: [PATCH 10/62] tests: [11] add doctested reference solver; made
 ill-conditioning more severe

---
 tests/util_funcs.py | 85 ++++++++++++++++++++++++++++++++++++++-------
 1 file changed, 72 insertions(+), 13 deletions(-)

diff --git a/tests/util_funcs.py b/tests/util_funcs.py
index ca8f2be..8ef8efa 100644
--- a/tests/util_funcs.py
+++ b/tests/util_funcs.py
@@ -9,6 +9,7 @@
 from typing import Tuple
 
 import numpy as np
+from scipy import linalg as spla
 from scipy import sparse as sprs
 
 import pentapy as pp
@@ -195,9 +196,6 @@ def gen_conditioned_rand_penta_matrix_dense(
 
     Doctests
     --------
-    >>> # Imports
-    >>> from scipy.linalg import bandwidth
-
     >>> # 1) Generating a super small well-conditioned random pentadiagonal matrix
     >>> n_rows = 3
     >>> seed = 19_031_977
@@ -212,7 +210,7 @@ def gen_conditioned_rand_penta_matrix_dense(
            [-0.09784268,  0.2270634 , -0.1509019 ],
            [-0.23431267,  0.00468463,  0.22991003]])
     >>> # its bandwidth is computed and should be equal to 2
-    >>> bandwidth(mat)
+    >>> spla.bandwidth(mat)
     (2, 2)
     >>> # its condition number is computed and values below 1e10 can be considered good
     >>> np.linalg.cond(mat)
@@ -229,7 +227,7 @@ def gen_conditioned_rand_penta_matrix_dense(
            [-0.09784268,  0.2270634 , -0.1509019 ],
            [-0.23431267,  0.00468463, -0.02273771]])
     >>> # its bandwidth is computed and should be equal to 2
-    >>> bandwidth(mat)
+    >>> spla.bandwidth(mat)
     (2, 2)
     >>> # its condition number is computed and its value should be close to the
     >>> # reciprocal floating point precision, i.e., ~1e16
@@ -253,7 +251,7 @@ def gen_conditioned_rand_penta_matrix_dense(
            [ 0.  ,  0.  ,  0.  ,  0.06, -0.14,  0.4 ,  0.28],
            [ 0.  ,  0.  ,  0.  ,  0.  , -0.14,  0.36,  0.53]])
     >>> # its bandwidth is computed and should be equal to 2
-    >>> bandwidth(mat)
+    >>> spla.bandwidth(mat)
     (2, 2)
     >>> # its condition number is computed and values below 1e10 can be considered good
     >>> np.linalg.cond(mat)
@@ -274,7 +272,7 @@ def gen_conditioned_rand_penta_matrix_dense(
            [ 0.  ,  0.  ,  0.  ,  0.06, -0.14,  0.4 ,  0.28],
            [ 0.  ,  0.  ,  0.  ,  0.  , -0.14,  0.36,  0.28]])
     >>> # its bandwidth is computed and should be equal to 2
-    >>> bandwidth(mat)
+    >>> spla.bandwidth(mat)
     (2, 2)
     >>> # its condition number is computed and its value should be close to the
     >>> # reciprocal floating point precision, i.e., ~1e16
@@ -290,7 +288,7 @@ def gen_conditioned_rand_penta_matrix_dense(
     ...     ill_conditioned=False,
     ... )
     >>> # its bandwidth is computed and should be equal to 2
-    >>> bandwidth(mat)
+    >>> spla.bandwidth(mat)
     (2, 2)
     >>> # its condition number is computed and values below 1e10 can be considered good
     >>> np.linalg.cond(mat)
@@ -303,12 +301,12 @@ def gen_conditioned_rand_penta_matrix_dense(
     ...     ill_conditioned=True,
     ... )
     >>> # its bandwidth is computed and should be equal to 2
-    >>> bandwidth(mat)
+    >>> spla.bandwidth(mat)
     (2, 2)
     >>> # its condition number is computed and its value should be close to the
     >>> # reciprocal floating point precision, i.e., ~1e16
     >>> np.linalg.cond(mat)
-    5.058722571393928e+17
+    1.7137059583101745e+19
 
     """
 
@@ -326,9 +324,8 @@ def gen_conditioned_rand_penta_matrix_dense(
 
     # Case 2: ill-conditioned matrix
     else:
-        # here, the smallest diagonal entry is set to a value that is numerically zero
-        # compared to the largest entry
-        d_diag[n_rows - 1] = 0.1 * np.finfo(np.float64).eps * d_diag[0]
+        # here, the smallest diagonal entry is set to zero
+        d_diag[n_rows - 1] = 0.0
 
     # ... followed by a unit lower triangular matrix with 2 sub-diagonals, but here
     # the entries may be negative ...
@@ -363,6 +360,68 @@ def gen_conditioned_rand_penta_matrix_dense(
     return (l_mat.multiply(d_diag[np.newaxis, ::]).dot(u_mat)).toarray()
 
 
+def solve_penta_matrix_dense_scipy(
+    mat: np.ndarray,
+    rhs: np.ndarray,
+) -> np.ndarray:
+    """
+    Solves a pentadiagonal matrix system using SciPy's banded solver.
+
+    Doctests
+    --------
+    >>> # Setting up a small test matrix and right-hand side
+    >>> n_rows = 5
+    >>> seed = 19_031_977
+
+    >>> mat = gen_conditioned_rand_penta_matrix_dense(
+    ...     n_rows=n_rows,
+    ...     seed=seed,
+    ...     ill_conditioned=False,
+    ... )
+    >>> rhs = np.random.rand(n_rows, 5)
+
+    >>> # Solving the system using SciPy's banded solver
+    >>> sol = solve_penta_matrix_dense_scipy(mat=mat, rhs=rhs)
+    >>> np.round(sol, 2)
+    array([[-2.16, -0.36,  0.72,  0.23, -0.2 ],
+           [ 4.07,  1.3 ,  0.81,  1.31,  0.48],
+           [ 4.05,  0.33,  2.19,  1.22,  0.58],
+           [-1.9 , -0.79,  1.02, -0.39,  1.02],
+           [ 6.31,  1.81,  1.29,  1.41,  0.37]])
+
+    >>> # the solution is checked by verifying that the residual is close to zero
+    >>> np.max(np.abs(mat @ sol - rhs)) <= np.finfo(np.float64).eps * n_rows
+    True
+
+    >>> # Setting up a large test matrix and right-hand side
+    >>> n_rows = 1_000
+
+    >>> mat = gen_conditioned_rand_penta_matrix_dense(
+    ...     n_rows=n_rows,
+    ...     seed=seed,
+    ...     ill_conditioned=False,
+    ... )
+    >>> rhs = np.random.rand(n_rows, 5)
+
+    >>> # Solving the system using SciPy's banded solver
+    >>> sol = solve_penta_matrix_dense_scipy(mat=mat, rhs=rhs)
+    >>> # the solution is checked by verifying that the residual is close to zero
+    >>> np.max(np.abs(mat @ sol - rhs)) <= np.finfo(np.float64).eps * n_rows
+    True
+
+    """
+
+    # first, the matrix is converted to LAPACK banded storage format
+    mat_banded = pp.create_banded(mat=mat, col_wise=True)
+
+    # then, the system is solved using SciPy's banded solver
+    return spla.solve_banded(
+        l_and_u=(2, 2),
+        ab=mat_banded,
+        b=rhs,
+    )
+
+
 # === Doctests ===
 
 if __name__ == "__main__":  # pragma: no cover

From a0c8849e05d96236e1a234af00095def29848efb Mon Sep 17 00:00:00 2001
From: MothNik <nik.zoe@web.de>
Date: Sat, 8 Jun 2024 12:05:50 +0200
Subject: [PATCH 11/62] feat: [11] made error messages informative

---
 src/pentapy/tools.py | 30 ++++++++++++++++++++++--------
 1 file changed, 22 insertions(+), 8 deletions(-)

diff --git a/src/pentapy/tools.py b/src/pentapy/tools.py
index 3db7126..ca80d27 100644
--- a/src/pentapy/tools.py
+++ b/src/pentapy/tools.py
@@ -172,10 +172,14 @@ def create_banded(mat, up=2, low=2, col_wise=True, dtype=None):
     """
     mat = np.asanyarray(mat)
     if mat.ndim != 2:
-        msg = "create_banded: matrix has to be 2D"
+        msg = f"create_banded: matrix has to be 2D, got {mat.ndim}D"
         raise ValueError(msg)
+
     if mat.shape[0] != mat.shape[1]:
-        msg = "create_banded: matrix has to be n x n"
+        msg = (
+            f"create_banded: matrix has to be n x n, "
+            f"got {mat.shape[0]} x {mat.shape[1]}"
+        )
         raise ValueError(msg)
 
     size = mat.shape[0]
@@ -246,14 +250,23 @@ def create_full(mat, up=2, low=2, col_wise=True):
     """
     mat = np.asanyarray(mat)
     if mat.ndim != 2:
-        msg = "create_full: matrix has to be 2D"
+        msg = f"create_full: matrix has to be 2D, got {mat.ndim}D"
         raise ValueError(msg)
+
     if mat.shape[0] != up + low + 1:
-        msg = "create_full: matrix has wrong count of bands"
+        msg = (
+            f"create_full: matrix has wrong count of bands, required "
+            f"{up} + {low} + 1 = {up + low + 1}, got {mat.shape[0]} bands"
+        )
         raise ValueError(msg)
+
     if mat.shape[1] < max(up, low) + 1:
-        msg = "create_full: matrix has to few information"
+        msg = (
+            f"create_full: matrix has to few information, required "
+            f"{max(up, low) + 1} columns, got {mat.shape[1]} columns"
+        )
         raise ValueError(msg)
+
     size = mat.shape[1]
     mat_full = np.diag(mat[up])
     if col_wise:
@@ -266,16 +279,17 @@ def create_full(mat, up=2, low=2, col_wise=True):
             mat_full[diag_indices(size, up - i)] = mat[i, : -(up - i)]
         for i in range(low):
             mat_full[diag_indices(size, -(low - i))] = mat[-i - 1, (low - i) :]
+
     return mat_full
 
 
 def _check_penta(mat):
     if mat.ndim != 2:
-        msg = "pentapy: matrix has to be 2D"
+        msg = f"pentapy: matrix has to be 2D, got {mat.ndim}D"
         raise ValueError(msg)
     if mat.shape[0] != 5:
-        msg = "pentapy: matrix needs 5 bands"
+        msg = f"pentapy: matrix needs 5 bands, got {mat.shape[0]} bands"
         raise ValueError(msg)
     if mat.shape[1] < 3:
-        msg = "pentapy: matrix needs at least 3 rows"
+        msg = f"pentapy: matrix needs at least 3 rows, got {mat.shape[1]} rows"
         raise ValueError(msg)

From ee6943a2fae9d3fb676bc56e20c90f41b2b4d29b Mon Sep 17 00:00:00 2001
From: MothNik <nik.zoe@web.de>
Date: Sat, 8 Jun 2024 13:23:46 +0200
Subject: [PATCH 12/62] fix: [23] disabled cdivision to fix error handling on
 Python side

---
 src/pentapy/solver.pyx | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/pentapy/solver.pyx b/src/pentapy/solver.pyx
index c8a22a2..091288c 100644
--- a/src/pentapy/solver.pyx
+++ b/src/pentapy/solver.pyx
@@ -1,4 +1,4 @@
-# cython: language_level=3, boundscheck=True, wraparound=False, cdivision=True
+# cython: language_level=3, boundscheck=True, wraparound=False, cdivision=False
 
 """
 This is a solver linear equation systems with a penta-diagonal matrix,

From 028484372fcce5d445695cae86f9db884bbe4290 Mon Sep 17 00:00:00 2001
From: MothNik <nik.zoe@web.de>
Date: Sat, 8 Jun 2024 13:28:14 +0200
Subject: [PATCH 13/62] feat/refactor: [11] enabled multipe right-hand sides
 for solver I; improved import chain; improved code readability

---
 src/pentapy/core.py | 58 ++++++++++++++++++++++++++++++---------------
 1 file changed, 39 insertions(+), 19 deletions(-)

diff --git a/src/pentapy/core.py b/src/pentapy/core.py
index 9a55780..2393122 100644
--- a/src/pentapy/core.py
+++ b/src/pentapy/core.py
@@ -5,8 +5,8 @@
 
 import numpy as np
 
+from pentapy import tools as ptools
 from pentapy.solver import penta_solver1, penta_solver2
-from pentapy.tools import _check_penta, create_banded, shift_banded
 
 
 def solve(mat, rhs, is_flat=False, index_row_wise=True, solver=1):
@@ -66,40 +66,60 @@ def solve(mat, rhs, is_flat=False, index_row_wise=True, solver=1):
     result : :class:`numpy.ndarray`
         Solution of the equation system
     """
+
     if solver in [1, "1", "PTRANS-I"]:
         if is_flat and index_row_wise:
             mat_flat = np.asarray(mat, dtype=np.double)
-            _check_penta(mat_flat)
+            ptools._check_penta(mat_flat)
         elif is_flat:
             mat_flat = np.array(mat, dtype=np.double)
-            _check_penta(mat_flat)
-            shift_banded(mat_flat, copy=False)
+            ptools._check_penta(mat_flat)
+            ptools.shift_banded(mat_flat, copy=False)
         else:
-            mat_flat = create_banded(mat, col_wise=False, dtype=np.double)
+            mat_flat = ptools.create_banded(mat, col_wise=False, dtype=np.double)
+
         rhs = np.asarray(rhs, dtype=np.double)
+
+        # Special case: Early exit when the matrix has only 3 rows/columns
+        # NOTE: this avoids memory leakage in the Cython-solver that will iterate over
+        #       at least 4 rows/columns no matter what
+        if mat_flat.shape[1] == 3:
+            return np.linalg.solve(
+                a=ptools.create_full(mat_flat, col_wise=False),
+                b=rhs,
+            )
+
+        # if there is only a single right-hand side, it has to be reshaped to a 2D array
+        # NOTE: this has to be reverted at the end
         single_rhs = rhs.ndim == 1
+        rhs_og_shape = rhs.shape
         if single_rhs:
             rhs = rhs[:, np.newaxis]
 
         try:
+            # if there was only a 1D right-hand side, the result has to be flattened
             if single_rhs:
                 return penta_solver1(mat_flat, rhs).ravel()
 
             return penta_solver1(mat_flat, rhs)
+
         except ZeroDivisionError:
             warnings.warn("pentapy: PTRANS-I not suitable for input-matrix.")
-            return np.full_like(rhs, np.nan)
+            return np.full(shape=rhs_og_shape, fill_value=np.nan)
+
     elif solver in [2, "2", "PTRANS-II"]:
         if is_flat and index_row_wise:
             mat_flat = np.asarray(mat, dtype=np.double)
-            _check_penta(mat_flat)
+            ptools._check_penta(mat_flat)
         elif is_flat:
             mat_flat = np.array(mat, dtype=np.double)
-            _check_penta(mat_flat)
-            shift_banded(mat_flat, copy=False)
+            ptools._check_penta(mat_flat)
+            ptools.shift_banded(mat_flat, copy=False)
         else:
-            mat_flat = create_banded(mat, col_wise=False, dtype=np.double)
+            mat_flat = ptools.create_banded(mat, col_wise=False, dtype=np.double)
+
         rhs = np.asarray(rhs, dtype=np.double)
+
         try:
             return penta_solver2(mat_flat, rhs)
         except ZeroDivisionError:
@@ -113,12 +133,12 @@ def solve(mat, rhs, is_flat=False, index_row_wise=True, solver=1):
             raise ValueError(msg) from imp_err
         if is_flat and index_row_wise:
             mat_flat = np.array(mat)
-            _check_penta(mat_flat)
-            shift_banded(mat_flat, col_to_row=False, copy=False)
+            ptools._check_penta(mat_flat)
+            ptools.shift_banded(mat_flat, col_to_row=False, copy=False)
         elif is_flat:
             mat_flat = np.asarray(mat)
         else:
-            mat_flat = create_banded(mat)
+            mat_flat = ptools.create_banded(mat)
         return solve_banded((2, 2), mat_flat, rhs)
     elif solver in [4, "4", "spsolve"]:  # pragma: no cover
         try:
@@ -129,12 +149,12 @@ def solve(mat, rhs, is_flat=False, index_row_wise=True, solver=1):
             raise ValueError(msg) from imp_err
         if is_flat and index_row_wise:
             mat_flat = np.array(mat)
-            _check_penta(mat_flat)
-            shift_banded(mat_flat, col_to_row=False, copy=False)
+            ptools._check_penta(mat_flat)
+            ptools.shift_banded(mat_flat, col_to_row=False, copy=False)
         elif is_flat:
             mat_flat = np.asarray(mat)
         else:
-            mat_flat = create_banded(mat)
+            mat_flat = ptools.create_banded(mat)
         size = mat_flat.shape[1]
         M = sps.spdiags(mat_flat, [2, 1, 0, -1, -2], size, size, format="csc")
         return spsolve(M, rhs, use_umfpack=False)
@@ -153,12 +173,12 @@ def solve(mat, rhs, is_flat=False, index_row_wise=True, solver=1):
             raise ValueError(msg) from imp_err
         if is_flat and index_row_wise:
             mat_flat = np.array(mat)
-            _check_penta(mat_flat)
-            shift_banded(mat_flat, col_to_row=False, copy=False)
+            ptools._check_penta(mat_flat)
+            ptools.shift_banded(mat_flat, col_to_row=False, copy=False)
         elif is_flat:
             mat_flat = np.asarray(mat)
         else:
-            mat_flat = create_banded(mat)
+            mat_flat = ptools.create_banded(mat)
         size = mat_flat.shape[1]
         M = sps.spdiags(mat_flat, [2, 1, 0, -1, -2], size, size, format="csc")
         return spsolve(M, rhs, use_umfpack=True)

From c32bec2744b08d455627197a56eca6927bfa1a0b Mon Sep 17 00:00:00 2001
From: MothNik <nik.zoe@web.de>
Date: Sat, 8 Jun 2024 13:28:40 +0200
Subject: [PATCH 14/62] tests: [11] added shape check to doctest

---
 tests/util_funcs.py | 24 ++++++++++++++++++------
 1 file changed, 18 insertions(+), 6 deletions(-)

diff --git a/tests/util_funcs.py b/tests/util_funcs.py
index 8ef8efa..be9f3c1 100644
--- a/tests/util_funcs.py
+++ b/tests/util_funcs.py
@@ -209,7 +209,9 @@ def gen_conditioned_rand_penta_matrix_dense(
     array([[ 0.92453713,  0.28308514, -0.09972199],
            [-0.09784268,  0.2270634 , -0.1509019 ],
            [-0.23431267,  0.00468463,  0.22991003]])
-    >>> # its bandwidth is computed and should be equal to 2
+    >>> # it has to be square and its bandwidth is computed and should be equal to 2
+    >>> mat.shape[0] == mat.shape[1]
+    True
     >>> spla.bandwidth(mat)
     (2, 2)
     >>> # its condition number is computed and values below 1e10 can be considered good
@@ -226,7 +228,9 @@ def gen_conditioned_rand_penta_matrix_dense(
     array([[ 0.92453713,  0.28308514, -0.09972199],
            [-0.09784268,  0.2270634 , -0.1509019 ],
            [-0.23431267,  0.00468463, -0.02273771]])
-    >>> # its bandwidth is computed and should be equal to 2
+    >>> # it has to be square and its bandwidth is computed and should be equal to 2
+    >>> mat.shape[0] == mat.shape[1]
+    True
     >>> spla.bandwidth(mat)
     (2, 2)
     >>> # its condition number is computed and its value should be close to the
@@ -250,7 +254,9 @@ def gen_conditioned_rand_penta_matrix_dense(
            [ 0.  ,  0.  ,  0.43,  0.13,  0.39, -0.1 , -0.15],
            [ 0.  ,  0.  ,  0.  ,  0.06, -0.14,  0.4 ,  0.28],
            [ 0.  ,  0.  ,  0.  ,  0.  , -0.14,  0.36,  0.53]])
-    >>> # its bandwidth is computed and should be equal to 2
+    >>> # it has to be square and its bandwidth is computed and should be equal to 2
+    >>> mat.shape[0] == mat.shape[1]
+    True
     >>> spla.bandwidth(mat)
     (2, 2)
     >>> # its condition number is computed and values below 1e10 can be considered good
@@ -271,7 +277,9 @@ def gen_conditioned_rand_penta_matrix_dense(
            [ 0.  ,  0.  ,  0.43,  0.13,  0.39, -0.1 , -0.15],
            [ 0.  ,  0.  ,  0.  ,  0.06, -0.14,  0.4 ,  0.28],
            [ 0.  ,  0.  ,  0.  ,  0.  , -0.14,  0.36,  0.28]])
-    >>> # its bandwidth is computed and should be equal to 2
+    >>> # it has to be square and its bandwidth is computed and should be equal to 2
+    >>> mat.shape[0] == mat.shape[1]
+    True
     >>> spla.bandwidth(mat)
     (2, 2)
     >>> # its condition number is computed and its value should be close to the
@@ -287,7 +295,9 @@ def gen_conditioned_rand_penta_matrix_dense(
     ...     seed=seed,
     ...     ill_conditioned=False,
     ... )
-    >>> # its bandwidth is computed and should be equal to 2
+    >>> # it has to be square and its bandwidth is computed and should be equal to 2
+    >>> mat.shape[0] == mat.shape[1]
+    True
     >>> spla.bandwidth(mat)
     (2, 2)
     >>> # its condition number is computed and values below 1e10 can be considered good
@@ -300,7 +310,9 @@ def gen_conditioned_rand_penta_matrix_dense(
     ...     seed=seed,
     ...     ill_conditioned=True,
     ... )
-    >>> # its bandwidth is computed and should be equal to 2
+    >>> # it has to be square and its bandwidth is computed and should be equal to 2
+    >>> mat.shape[0] == mat.shape[1]
+    True
     >>> spla.bandwidth(mat)
     (2, 2)
     >>> # its condition number is computed and its value should be close to the

From e7d92c86e67ad2cc77a9e293e9d35c46f81ac624 Mon Sep 17 00:00:00 2001
From: MothNik <nik.zoe@web.de>
Date: Sat, 8 Jun 2024 13:29:57 +0200
Subject: [PATCH 15/62] tests: [11] added extensive parametrized tests for
 solver I that also cover the edge cases

---
 tests/test_solver_1.py | 146 +++++++++++++++++++++++++++++++++++++++++
 1 file changed, 146 insertions(+)
 create mode 100644 tests/test_solver_1.py

diff --git a/tests/test_solver_1.py b/tests/test_solver_1.py
new file mode 100644
index 0000000..df00f9d
--- /dev/null
+++ b/tests/test_solver_1.py
@@ -0,0 +1,146 @@
+"""
+Test suite for testing the pentadiagonal solver based on Algorithm PTRANS-I.
+
+"""
+
+# === Imports ===
+
+from typing import Literal
+
+import numpy as np
+import pentapy as pp
+import pytest
+import util_funcs as uf
+
+# === Constants ===
+
+SEED = 19_031_977
+N_ROWS = [
+    3,
+    4,
+    5,
+    10,
+    11,
+    25,
+    26,
+    50,
+    51,
+    100,
+    101,
+    250,
+    251,
+    500,
+    501,
+    1_000,
+    1_001,
+    10_000,
+    10_001,
+]
+REF_WARNING = "pentapy: PTRANS-I not suitable for input-matrix."
+
+# === Tests ===
+
+
+@pytest.mark.parametrize("induce_error", [False, True])
+@pytest.mark.parametrize("solver_alias", [1])  # "1", "PTRANS-I"])
+@pytest.mark.parametrize("input_layout", ["full", "banded_row_wise", "banded_col_wise"])
+@pytest.mark.parametrize("n_rhs", [None, 1, 10])
+@pytest.mark.parametrize("n_rows", N_ROWS)
+def test_penta_solver1(
+    n_rows: int,
+    n_rhs: int,
+    input_layout: Literal["full", "banded_row_wise", "banded_col_wise"],
+    solver_alias: Literal[1, "1", "PTRANS-I"],
+    induce_error: bool,
+) -> None:
+    """
+    Tests the pentadiagonal solver based on Algorithm PTRANS-I when starting from
+    different input layouts, number of right-hand sides, number of rows, and also
+    when inducing an error by making the first diagonal element zero.
+    It has to be ensured that the edge case of ``n_rows = 3`` is also covered.
+
+    """
+
+    # first, a random pentadiagonal matrix is generated
+    mat_full = uf.gen_conditioned_rand_penta_matrix_dense(
+        n_rows=n_rows,
+        seed=SEED,
+        ill_conditioned=False,
+    )
+
+    # an error is induced by setting the first diagonal element to zero
+    if induce_error:
+        # the induction of the error is only possible if the matrix does not have
+        # only 3 rows
+        if n_rows == 3:
+            pytest.skip(
+                "Only 3 rows, cannot induce error because this will not go into "
+                "PTRANS-I, but NumPy"
+            )
+
+        mat_full[0, 0] = 0.0
+
+    # the right-hand side is generated
+    np.random.seed(SEED)
+    if n_rhs is not None:
+        rhs = np.random.rand(n_rows, n_rhs)
+        result_shape = (n_rows, n_rhs)
+    else:
+        rhs = np.random.rand(n_rows)
+        result_shape = (n_rows,)
+
+    # the matrix is converted to the desired layout
+    if input_layout == "full":
+        mat = mat_full
+        kwargs = dict(is_flat=False)
+
+    elif input_layout == "banded_row_wise":
+        mat = pp.create_banded(mat_full, col_wise=False)
+        kwargs = dict(
+            is_flat=True,
+            index_row_wise=True,
+        )
+
+    elif input_layout == "banded_col_wise":
+        mat = pp.create_banded(mat_full, col_wise=True)
+        kwargs = dict(
+            is_flat=True,
+            index_row_wise=False,
+        )
+
+    else:
+        raise ValueError(f"Invalid input layout: {input_layout}")
+
+    # the solution is computed
+    # Case 1: in case of an error, a warning has to be issued and the result has to
+    # be NaN
+    if induce_error:
+        with pytest.warns(UserWarning, match=REF_WARNING):
+            sol = pp.solve(
+                mat=mat,
+                rhs=rhs,
+                solver=solver_alias,  # type: ignore
+                **kwargs,
+            )
+            assert sol.shape == result_shape
+            assert np.isnan(sol).all()
+
+        return
+
+    # Case 2: in case of no error, the solution can be computed without any issues
+    sol = pp.solve(
+        mat=mat,
+        rhs=rhs,
+        solver=solver_alias,  # type: ignore
+        **kwargs,
+    )
+    assert sol.shape == result_shape
+
+    # if no error was induced, the reference solution is computed with SciPy
+    sol_ref = uf.solve_penta_matrix_dense_scipy(
+        mat=mat_full,
+        rhs=rhs,
+    )
+
+    # the solutions are compared
+    assert np.allclose(sol, sol_ref)

From 647012f05677306e911ef0edcfa1162785efbafe Mon Sep 17 00:00:00 2001
From: MothNik <nik.zoe@web.de>
Date: Sat, 8 Jun 2024 14:11:28 +0200
Subject: [PATCH 16/62] tests: [11] added more intermediate sizes for tests

---
 tests/test_solver_1.py | 4 ++++
 tests/test_tools.py    | 7 ++++++-
 2 files changed, 10 insertions(+), 1 deletion(-)

diff --git a/tests/test_solver_1.py b/tests/test_solver_1.py
index df00f9d..78956f6 100644
--- a/tests/test_solver_1.py
+++ b/tests/test_solver_1.py
@@ -33,6 +33,10 @@
     501,
     1_000,
     1_001,
+    2500,
+    2501,
+    5_000,
+    5_001,
     10_000,
     10_001,
 ]
diff --git a/tests/test_tools.py b/tests/test_tools.py
index 2f54c48..cabac61 100644
--- a/tests/test_tools.py
+++ b/tests/test_tools.py
@@ -9,9 +9,10 @@
 from typing import Optional, Tuple, Type
 
 import numpy as np
-import pentapy as pp
 import pytest
 import util_funcs as uf
+
+import pentapy as pp
 from pentapy.tools import _check_penta
 
 warnings.simplefilter("always")
@@ -37,6 +38,10 @@
     501,
     1_000,
     1_001,
+    2500,
+    2501,
+    5_000,
+    5_001,
     10_000,
     10_001,
 ]

From 8ab514f9404c803b4f6f368a5c21f607729d4201 Mon Sep 17 00:00:00 2001
From: MothNik <nik.zoe@web.de>
Date: Sat, 8 Jun 2024 15:51:36 +0200
Subject: [PATCH 17/62] feat/fix: [11] added cython annotations to build
 process; fixed wrong f-string; fixed typo

---
 setup.py               | 9 ++++++---
 src/pentapy/.gitignore | 2 ++
 2 files changed, 8 insertions(+), 3 deletions(-)
 create mode 100644 src/pentapy/.gitignore

diff --git a/setup.py b/setup.py
index 8081e05..fc8648c 100644
--- a/setup.py
+++ b/setup.py
@@ -1,15 +1,18 @@
-"""pentapy: A toolbox for pentadiagonal matrizes."""
+"""pentapy: A toolbox for pentadiagonal matrices."""
 
 import os
 
+import Cython.Compiler.Options
 import numpy as np
 from Cython.Build import cythonize
 from setuptools import Extension, setup
 
+Cython.Compiler.Options.annotate = True
+
 # cython extensions
 CY_MODULES = [
     Extension(
-        name=f"pentapy.solver",
+        name="pentapy.solver",
         sources=[os.path.join("src", "pentapy", "solver.pyx")],
         include_dirs=[np.get_include()],
         define_macros=[("NPY_NO_DEPRECATED_API", "NPY_1_7_API_VERSION")],
@@ -17,7 +20,7 @@
 ]
 
 setup(
-    ext_modules=cythonize(CY_MODULES),
+    ext_modules=cythonize(CY_MODULES, nthreads=1, annotate=True),
     package_data={"pentapy": ["*.pxd"]},  # include pxd files
     include_package_data=False,  # ignore other files
     zip_safe=False,
diff --git a/src/pentapy/.gitignore b/src/pentapy/.gitignore
new file mode 100644
index 0000000..53cc0d6
--- /dev/null
+++ b/src/pentapy/.gitignore
@@ -0,0 +1,2 @@
+# Cython html files
+*.html
\ No newline at end of file

From 5086959a21a974f697aa3212ecfb22d8671248d4 Mon Sep 17 00:00:00 2001
From: MothNik <nik.zoe@web.de>
Date: Sat, 8 Jun 2024 15:52:50 +0200
Subject: [PATCH 18/62] refactor/doc: fixed missing variable declarations;
 added clarifying comments

---
 src/pentapy/solver.pyx | 14 ++++++++------
 1 file changed, 8 insertions(+), 6 deletions(-)

diff --git a/src/pentapy/solver.pyx b/src/pentapy/solver.pyx
index 091288c..9d45c90 100644
--- a/src/pentapy/solver.pyx
+++ b/src/pentapy/solver.pyx
@@ -73,10 +73,11 @@ cdef void c_penta_factorize_algo1(
     """
     Factorizes the pentadiagonal matrix ``A`` into
 
-    - auxiliary coefficients ``e``, ``mu`` and ``gamma`` for the transformation of the
-        right-hand side
-    - a unit upper triangular matrix with the main diagonals ``alpha`` and ``beta``
-        for the following backward substitution. Its unit main diagonal is implicit.
+    - auxiliary coefficients ``e``, ``mu`` and ``gamma`` (``ga``) for the transformation
+        of the right-hand side
+    - a unit upper triangular matrix with the main diagonals ``alpha``(``al``) and
+        ``beta`` (``be``) for the following backward substitution. Its unit main
+        diagonal is implicit.
 
     They are overwriting the memoryview ``mat_factorized`` as follows:
 
@@ -99,8 +100,9 @@ cdef void c_penta_factorize_algo1(
     # === Variable declarations ===
 
     cdef uint64_t iter_row
-    cdef double mu_i, ga_i, e_i
-    cdef double al_i, al_i_minus_1, al_i_plus_1
+    cdef double mu_i, ga_i, e_i # mu, gamma, e
+    cdef double al_i, al_i_minus_1, al_i_plus_1 # alpha
+    cdef double be_i, be_i_minus_1, be_i_plus_1 # beta
 
     # === Factorization ===
 

From f062b886c3b8218460f651fc742f75a6df305744 Mon Sep 17 00:00:00 2001
From: MothNik <nik.zoe@web.de>
Date: Sat, 8 Jun 2024 17:26:08 +0200
Subject: [PATCH 19/62] style: [11] made - signs better readable

---
 src/pentapy/solver.pyx | 64 +++++++++++++++++++++---------------------
 1 file changed, 32 insertions(+), 32 deletions(-)

diff --git a/src/pentapy/solver.pyx b/src/pentapy/solver.pyx
index 9d45c90..ea15048 100644
--- a/src/pentapy/solver.pyx
+++ b/src/pentapy/solver.pyx
@@ -130,7 +130,7 @@ cdef void c_penta_factorize_algo1(
     mat_factorized[1, 4] = be_i
 
     # Central rows
-    for iter_row in range(2, mat_n_rows-2):
+    for iter_row in range(2, mat_n_rows - 2):
         e_i = mat_flat[4, iter_row]
         ga_i = mat_flat[3, iter_row] - al_i_minus_1 * e_i
         mu_i = mat_flat[2, iter_row] - be_i_minus_1 * e_i - al_i * ga_i
@@ -150,27 +150,27 @@ cdef void c_penta_factorize_algo1(
         mat_factorized[iter_row, 4] = be_i
 
     # Second to last row
-    e_i = mat_flat[4, mat_n_rows-2]
-    ga_i = mat_flat[3, mat_n_rows-2] - al_i_minus_1 * e_i
-    mu_i = mat_flat[2, mat_n_rows-2] - be_i_minus_1 * e_i - al_i * ga_i
-    al_i_plus_1 = (mat_flat[1, mat_n_rows-2] - be_i * ga_i) / mu_i
+    e_i = mat_flat[4, mat_n_rows - 2]
+    ga_i = mat_flat[3, mat_n_rows - 2] - al_i_minus_1 * e_i
+    mu_i = mat_flat[2, mat_n_rows - 2] - be_i_minus_1 * e_i - al_i * ga_i
+    al_i_plus_1 = (mat_flat[1, mat_n_rows - 2] - be_i * ga_i) / mu_i
 
-    mat_factorized[mat_n_rows-2, 0] = e_i
-    mat_factorized[mat_n_rows-2, 1] = mu_i
-    mat_factorized[mat_n_rows-2, 2] = ga_i
-    mat_factorized[mat_n_rows-2, 3] = al_i_plus_1
-    mat_factorized[mat_n_rows-2, 4] = 0.0
+    mat_factorized[mat_n_rows - 2, 0] = e_i
+    mat_factorized[mat_n_rows - 2, 1] = mu_i
+    mat_factorized[mat_n_rows - 2, 2] = ga_i
+    mat_factorized[mat_n_rows - 2, 3] = al_i_plus_1
+    mat_factorized[mat_n_rows - 2, 4] = 0.0
 
     # Last Row
-    e_i = mat_flat[4, mat_n_rows-1]
-    ga_i = mat_flat[3, mat_n_rows-1] - al_i * e_i
-    mu_i = mat_flat[2, mat_n_rows-1] - be_i * e_i - al_i_plus_1 * ga_i
+    e_i = mat_flat[4, mat_n_rows - 1]
+    ga_i = mat_flat[3, mat_n_rows - 1] - al_i * e_i
+    mu_i = mat_flat[2, mat_n_rows - 1] - be_i * e_i - al_i_plus_1 * ga_i
 
-    mat_factorized[mat_n_rows-1, 0] = e_i
-    mat_factorized[mat_n_rows-1, 1] = mu_i
-    mat_factorized[mat_n_rows-1, 2] = ga_i
-    mat_factorized[mat_n_rows-1, 3] = 0.0
-    mat_factorized[mat_n_rows-1, 4] = 0.0
+    mat_factorized[mat_n_rows - 1, 0] = e_i
+    mat_factorized[mat_n_rows - 1, 1] = mu_i
+    mat_factorized[mat_n_rows - 1, 2] = ga_i
+    mat_factorized[mat_n_rows - 1, 3] = 0.0
+    mat_factorized[mat_n_rows - 1, 4] = 0.0
 
     return
 
@@ -207,7 +207,7 @@ cdef void c_solve_penta_from_factorize_algo_1(
     result_view[1] = ze_i
 
     # Central rows
-    for iter_row in range(2, mat_n_rows-2):
+    for iter_row in range(2, mat_n_rows - 2):
         ze_i_plus_1 = (
             rhs_single[iter_row]
             - ze_i_minus_1 * mat_factorized[iter_row, 0]
@@ -219,30 +219,30 @@ cdef void c_solve_penta_from_factorize_algo_1(
 
     # Second to last row
     ze_i_plus_1 = (
-        rhs_single[mat_n_rows-2]
-        - ze_i_minus_1 * mat_factorized[mat_n_rows-2, 0]
-        - ze_i * mat_factorized[mat_n_rows-2, 2]
-    ) / mat_factorized[mat_n_rows-2, 1]
+        rhs_single[mat_n_rows - 2]
+        - ze_i_minus_1 * mat_factorized[mat_n_rows - 2, 0]
+        - ze_i * mat_factorized[mat_n_rows - 2, 2]
+    ) / mat_factorized[mat_n_rows - 2, 1]
     ze_i_minus_1 = ze_i
     ze_i = ze_i_plus_1
-    result_view[mat_n_rows-2] = ze_i_plus_1
+    result_view[mat_n_rows - 2] = ze_i_plus_1
 
     # Last row
     ze_i_plus_1 = (
-        rhs_single[mat_n_rows-1]
-        - ze_i_minus_1 * mat_factorized[mat_n_rows-1, 0]
-        - ze_i * mat_factorized[mat_n_rows-1, 2]
-    ) / mat_factorized[mat_n_rows-1, 1]
-    result_view[mat_n_rows-1] = ze_i_plus_1
+        rhs_single[mat_n_rows - 1]
+        - ze_i_minus_1 * mat_factorized[mat_n_rows - 1, 0]
+        - ze_i * mat_factorized[mat_n_rows - 1, 2]
+    ) / mat_factorized[mat_n_rows - 1, 1]
+    result_view[mat_n_rows - 1] = ze_i_plus_1
 
     # === Backward substitution ===
 
     # The solution vector is calculated by backward substitution that overwrites the
     # right-hand side vector with the solution vector
-    ze_i -= mat_factorized[mat_n_rows-2, 3] * ze_i_plus_1
-    result_view[mat_n_rows-2] = ze_i
+    ze_i -= mat_factorized[mat_n_rows - 2, 3] * ze_i_plus_1
+    result_view[mat_n_rows - 2] = ze_i
 
-    for iter_row in range(mat_n_rows-3, -1, -1):
+    for iter_row in range(mat_n_rows - 3, -1, -1):
         result_view[iter_row] -= (
             mat_factorized[iter_row, 3] * ze_i
             + mat_factorized[iter_row, 4] * ze_i_plus_1

From 15c787e19c2396f18eb16d4a7717d5739b516632 Mon Sep 17 00:00:00 2001
From: MothNik <nik.zoe@web.de>
Date: Sat, 8 Jun 2024 18:56:44 +0200
Subject: [PATCH 20/62] doc/refactor: [11] improved docs and comments of
 algorithm I; removed `uint`

---
 src/pentapy/solver.pyx | 30 +++++++++++++++++-------------
 1 file changed, 17 insertions(+), 13 deletions(-)

diff --git a/src/pentapy/solver.pyx b/src/pentapy/solver.pyx
index ea15048..451eee9 100644
--- a/src/pentapy/solver.pyx
+++ b/src/pentapy/solver.pyx
@@ -11,7 +11,7 @@ implemented in Cython.
 import numpy as np
 
 cimport numpy as np
-from libc.stdint cimport int64_t, uint64_t
+from libc.stdint cimport int64_t
 
 
 # === Main Python Interface ===
@@ -40,12 +40,16 @@ cdef double[:, :] c_penta_solver1(double[:, :] mat_flat, double[:, :] rhs):
 
     """
 
-    cdef uint64_t mat_n_rows = mat_flat.shape[1]
-    cdef uint64_t rhs_n_cols = rhs.shape[1]
-    cdef uint64_t iter_col
+    # === Variable declarations ===
+
+    cdef int64_t mat_n_rows = mat_flat.shape[1]
+    cdef int64_t rhs_n_cols = rhs.shape[1]
+    cdef int64_t iter_col
     cdef double[::, ::1] result = np.empty(shape=(mat_n_rows, rhs_n_cols))
     cdef double[::, ::1] mat_factorized = np.empty(shape=(mat_n_rows, 5))
 
+    # === Solving the system of equations ===
+
     # first, the matrix is factorized
     c_penta_factorize_algo1(
         mat_flat,
@@ -67,7 +71,7 @@ cdef double[:, :] c_penta_solver1(double[:, :] mat_flat, double[:, :] rhs):
 
 cdef void c_penta_factorize_algo1(
     double[:, :] mat_flat,
-    uint64_t mat_n_rows,
+    int64_t mat_n_rows,
     double[::, ::1] mat_factorized,
 ):
     """
@@ -82,14 +86,14 @@ cdef void c_penta_factorize_algo1(
     They are overwriting the memoryview ``mat_factorized`` as follows:
 
     ```bash
-    [[  *           mu_0        *           al_0        be_0  ]
-     [  *           mu_1        ga_1        al_1        be_1  ]
-     [  e_2         mu_2        ga_2        al_2        be_2  ]
+    [[   *          mu_0         *          al_0        be_0      ]
+     [   *          mu_1        ga_1        al_1        be_1      ]
+     [  e_2         mu_2        ga_2        al_2        be_2      ]
                                 ...
      [  e_i         mu_i        ga_i        al_i        be_i  ]
-                                ...
-     [  e_{n-2}     mu_{n-2}    ga_{n-2}    al_{n-2}    *     ]
-     [  e_{n-1}     mu_{n-1}    ga_{n-1}    *           *     ]]
+     [  e_{n-3}     mu_{n-3}    ga_{n-3}    al_{n-3}    be_{n-3}  ]                                ...
+     [  e_{n-2}     mu_{n-2}    ga_{n-2}    al_{n-2}      *       ]
+     [  e_{n-1}     mu_{n-1}    ga_{n-1}      *           *       ]]
     ```
 
     where the entries marked with ``*`` are not used by design, but overwritten with
@@ -99,7 +103,7 @@ cdef void c_penta_factorize_algo1(
 
     # === Variable declarations ===
 
-    cdef uint64_t iter_row
+    cdef int64_t iter_row
     cdef double mu_i, ga_i, e_i # mu, gamma, e
     cdef double al_i, al_i_minus_1, al_i_plus_1 # alpha
     cdef double be_i, be_i_minus_1, be_i_plus_1 # beta
@@ -176,7 +180,7 @@ cdef void c_penta_factorize_algo1(
 
 
 cdef void c_solve_penta_from_factorize_algo_1(
-    uint64_t mat_n_rows,
+    int64_t mat_n_rows,
     double[::, ::1] mat_factorized,
     double[::] rhs_single,
     double[::] result_view,

From ef7ec51a6eceda76a54c39b83b912baeafe2f4bf Mon Sep 17 00:00:00 2001
From: MothNik <nik.zoe@web.de>
Date: Sat, 8 Jun 2024 19:45:01 +0200
Subject: [PATCH 21/62] wip: [11] restructured chaotic alias comparisons

---
 src/pentapy/_models.py |  49 +++++++++++++++++
 src/pentapy/core.py    | 122 ++++++++++++++++++++++++-----------------
 2 files changed, 121 insertions(+), 50 deletions(-)
 create mode 100644 src/pentapy/_models.py

diff --git a/src/pentapy/_models.py b/src/pentapy/_models.py
new file mode 100644
index 0000000..c75eb8c
--- /dev/null
+++ b/src/pentapy/_models.py
@@ -0,0 +1,49 @@
+"""
+Auxiliary models for the pentapy package.
+
+"""
+
+# === Imports ===
+
+from enum import IntEnum
+from typing import Dict
+
+# === Models ===
+
+
+class PentaSolverAliases(IntEnum):
+    """
+    Defines all available solver aliases for pentadiagonal systems, namely
+
+    - ``PTRANS_I``: The PTRANS-I algorithm
+    - ``PTRANS_II``: The PTRANS-II algorithm
+    - ``LAPACK``: Scipy's LAPACK solver :func:`scipy.linalg.solve_banded`
+    - ``SUPER_LU``: Scipy's SuperLU solver :func:`scipy.sparse.linalg.spsolve(..., use_umfpack=False)`
+    - ``UMFPACK``: Scipy's UMFpack solver :func:`scipy.sparse.linalg.spsolve(..., use_umfpack=True)`
+
+    """  # noqa: E501
+
+    PTRANS_I = 1
+    PTRANS_II = 2
+    LAPACK = 3
+    SUPER_LU = 4
+    UMFPACK = 5
+
+
+# === Constants ===
+
+_SOLVER_ALIAS_CONVERSIONS: Dict[str, PentaSolverAliases] = {
+    "1": PentaSolverAliases.PTRANS_I,
+    "ptrans-i": PentaSolverAliases.PTRANS_I,
+    "2": PentaSolverAliases.PTRANS_II,
+    "ptrans-ii": PentaSolverAliases.PTRANS_II,
+    "3": PentaSolverAliases.LAPACK,
+    "lapack": PentaSolverAliases.LAPACK,
+    "solve_banded": PentaSolverAliases.LAPACK,
+    "4": PentaSolverAliases.SUPER_LU,
+    "spsolve": PentaSolverAliases.SUPER_LU,
+    "5": PentaSolverAliases.UMFPACK,
+    "spsolve_umf": PentaSolverAliases.UMFPACK,
+    "umf": PentaSolverAliases.UMFPACK,
+    "umf_pack": PentaSolverAliases.UMFPACK,
+}
diff --git a/src/pentapy/core.py b/src/pentapy/core.py
index 2393122..ccf0c8f 100644
--- a/src/pentapy/core.py
+++ b/src/pentapy/core.py
@@ -1,15 +1,48 @@
 """The core module of pentapy."""
 
 # pylint: disable=C0103, C0415, R0911, E0611
+
+# === Imports ===
+
 import warnings
+from typing import Literal
 
 import numpy as np
 
+from pentapy import _models as pmodels
+from pentapy import solver as psolver  # type: ignore
 from pentapy import tools as ptools
-from pentapy.solver import penta_solver1, penta_solver2
-
 
-def solve(mat, rhs, is_flat=False, index_row_wise=True, solver=1):
+# === Solver ===
+
+
+def solve(
+    mat: np.ndarray,
+    rhs: np.ndarray,
+    is_flat: bool = False,
+    index_row_wise: bool = True,
+    solver: Literal[
+        1,
+        "1",
+        "PTRANS-I",
+        "ptrans-i",
+        2,
+        "2",
+        "PTRANS-II",
+        "ptrans-ii",
+        3,
+        "3",
+        "lapack",
+        4,
+        "4",
+        "spsolve",
+        5,
+        "5",
+        "spsolve_umf",
+        "umf",
+        "umf_pack",
+    ] = 1,
+) -> np.ndarray:
     """
     Solver for a pentadiagonal system.
 
@@ -39,35 +72,39 @@ def solve(mat, rhs, is_flat=False, index_row_wise=True, solver=1):
 
     Parameters
     ----------
-    mat : :class:`numpy.ndarray`
-        The Matrix or the flattened Version of the pentadiagonal matrix.
-    rhs : :class:`numpy.ndarray`
-        The right hand side of the equation system.
-    is_flat : :class:`bool`, optional
+    mat : :class:`numpy.ndarray` of shape (m, m) or (5, m)
+        The full or flattened version of the pentadiagonal matrix.
+    rhs : :class:`numpy.ndarray` of shape (m,) or (m, n)
+        The right hand side(s) of the equation system. Its shape is preserved.
+    is_flat : :class:`bool`, default=False
         State if the matrix is already flattend. Default: ``False``
-    index_row_wise : :class:`bool`, optional
+    index_row_wise : :class:`bool`, default=True
         State if the flattend matrix is row-wise flattend. Default: ``True``
-    solver : :class:`int` or :class:`str`, optional
+    solver : :class:`int` or :class:`str`, default=1
         Which solver should be used. The following are provided:
 
-            * ``[1, "1", "PTRANS-I"]`` : The PTRANS-I algorithm
+            * ``[1, "1", "PTRANS-I"]`` : The PTRANS-I algorithm (default)
             * ``[2, "2", "PTRANS-II"]`` : The PTRANS-II algorithm
-            * ``[3, "3", "lapack", "solve_banded"]`` :
-              scipy.linalg.solve_banded
-            * ``[4, "4", "spsolve"]`` :
-              The scipy sparse solver without umf_pack
-            * ``[5, "5", "spsolve_umf", "umf", "umf_pack"]`` :
-              The scipy sparse solver with umf_pack
+            * ``[3, "3", "lapack", "solve_banded"]`` : :func:`scipy.linalg.solve_banded`
+            * ``[4, "4", "spsolve"]`` : :func:`scipy.sparse.linalg.spsolve(..., use_umfpack=False)`
+            * ``[5, "5", "spsolve_umf", "umf", "umf_pack"]`` : :func:`scipy.sparse.linalg.spsolve(..., use_umfpack=False)`
 
-        Default: ``1``
+        Strings are not case-sensitive.
 
     Returns
     -------
-    result : :class:`numpy.ndarray`
-        Solution of the equation system
+    result : :class:`numpy.ndarray` of shape (m,) or (m, n)
+        Solution of the equation system with the same shape as ``rhs``.
+
     """
 
-    if solver in [1, "1", "PTRANS-I"]:
+    # first, the solver is converted to the internal name to avoid confusion
+    solver_inter = pmodels._SOLVER_ALIAS_CONVERSIONS[str(solver).lower()]
+
+    if solver_inter in {
+        pmodels.PentaSolverAliases.PTRANS_I,
+        pmodels.PentaSolverAliases.PTRANS_II,
+    }:
         if is_flat and index_row_wise:
             mat_flat = np.asarray(mat, dtype=np.double)
             ptools._check_penta(mat_flat)
@@ -97,35 +134,23 @@ def solve(mat, rhs, is_flat=False, index_row_wise=True, solver=1):
             rhs = rhs[:, np.newaxis]
 
         try:
+            solver_func = (
+                psolver.penta_solver1
+                if solver_inter == pmodels.PentaSolverAliases.PTRANS_I
+                else psolver.penta_solver2
+            )
+
             # if there was only a 1D right-hand side, the result has to be flattened
             if single_rhs:
-                return penta_solver1(mat_flat, rhs).ravel()
+                return solver_func(mat_flat, rhs).ravel()
 
-            return penta_solver1(mat_flat, rhs)
+            return solver_func(mat_flat, rhs)
 
         except ZeroDivisionError:
             warnings.warn("pentapy: PTRANS-I not suitable for input-matrix.")
             return np.full(shape=rhs_og_shape, fill_value=np.nan)
 
-    elif solver in [2, "2", "PTRANS-II"]:
-        if is_flat and index_row_wise:
-            mat_flat = np.asarray(mat, dtype=np.double)
-            ptools._check_penta(mat_flat)
-        elif is_flat:
-            mat_flat = np.array(mat, dtype=np.double)
-            ptools._check_penta(mat_flat)
-            ptools.shift_banded(mat_flat, copy=False)
-        else:
-            mat_flat = ptools.create_banded(mat, col_wise=False, dtype=np.double)
-
-        rhs = np.asarray(rhs, dtype=np.double)
-
-        try:
-            return penta_solver2(mat_flat, rhs)
-        except ZeroDivisionError:
-            warnings.warn("pentapy: PTRANS-II not suitable for input-matrix.")
-            return np.full_like(rhs, np.nan)
-    elif solver in [3, "3", "lapack", "solve_banded"]:  # pragma: no cover
+    elif solver_inter == pmodels.PentaSolverAliases.LAPACK:  # pragma: no cover
         try:
             from scipy.linalg import solve_banded
         except ImportError as imp_err:  # pragma: no cover
@@ -140,7 +165,8 @@ def solve(mat, rhs, is_flat=False, index_row_wise=True, solver=1):
         else:
             mat_flat = ptools.create_banded(mat)
         return solve_banded((2, 2), mat_flat, rhs)
-    elif solver in [4, "4", "spsolve"]:  # pragma: no cover
+
+    elif solver_inter == pmodels.PentaSolverAliases.SUPER_LU:  # pragma: no cover
         try:
             from scipy import sparse as sps
             from scipy.sparse.linalg import spsolve
@@ -158,13 +184,8 @@ def solve(mat, rhs, is_flat=False, index_row_wise=True, solver=1):
         size = mat_flat.shape[1]
         M = sps.spdiags(mat_flat, [2, 1, 0, -1, -2], size, size, format="csc")
         return spsolve(M, rhs, use_umfpack=False)
-    elif solver in [
-        5,
-        "5",
-        "spsolve_umf",
-        "umf",
-        "umf_pack",
-    ]:  # pragma: no cover
+
+    elif solver_inter == pmodels.PentaSolverAliases.UMFPACK:  # pragma: no cover
         try:
             from scipy import sparse as sps
             from scipy.sparse.linalg import spsolve
@@ -182,6 +203,7 @@ def solve(mat, rhs, is_flat=False, index_row_wise=True, solver=1):
         size = mat_flat.shape[1]
         M = sps.spdiags(mat_flat, [2, 1, 0, -1, -2], size, size, format="csc")
         return spsolve(M, rhs, use_umfpack=True)
+
     else:  # pragma: no cover
         msg = f"pentapy.solve: unknown solver ({solver})"
         raise ValueError(msg)

From 94d110ce03f676fe9ba6643d1f5ef69788cf715b Mon Sep 17 00:00:00 2001
From: MothNik <nik.zoe@web.de>
Date: Sat, 8 Jun 2024 20:15:26 +0200
Subject: [PATCH 22/62] feat: [11] finalized multiple right-hand side support
 (serial) on Cython level; fixed typos

---
 src/pentapy/solver.pxd |   4 +-
 src/pentapy/solver.pyx | 262 ++++++++++++++++++++++++++++++++++-------
 2 files changed, 222 insertions(+), 44 deletions(-)

diff --git a/src/pentapy/solver.pxd b/src/pentapy/solver.pxd
index 05d249f..b16f8a0 100644
--- a/src/pentapy/solver.pxd
+++ b/src/pentapy/solver.pxd
@@ -1,4 +1,4 @@
 # cython: language_level=3
-cdef double[:, :] c_penta_solver1(double[:, :] mat_flat, double[:, :] rhs)
+cdef double[::, ::] c_penta_solver1(double[::, ::] mat_flat, double[::, ::] rhs)
 
-cdef double[:] c_penta_solver2(double[:, :] mat_flat, double[:] rhs)
+cdef double[::, ::] c_penta_solver2(double[::, ::] mat_flat, double[::, ::] rhs)
diff --git a/src/pentapy/solver.pyx b/src/pentapy/solver.pyx
index 451eee9..1494a25 100644
--- a/src/pentapy/solver.pyx
+++ b/src/pentapy/solver.pyx
@@ -1,4 +1,4 @@
-# cython: language_level=3, boundscheck=True, wraparound=False, cdivision=False
+# cython: language_level=3, boundscheck=False, wraparound=False, cdivision=False
 
 """
 This is a solver linear equation systems with a penta-diagonal matrix,
@@ -17,18 +17,18 @@ from libc.stdint cimport int64_t
 # === Main Python Interface ===
 
 
-def penta_solver1(double[:, :] mat_flat, double[:, :] rhs):
+def penta_solver1(double[::, ::] mat_flat, double[::, ::] rhs):
     return np.asarray(c_penta_solver1(mat_flat, rhs))
 
 
-def penta_solver2(double[:, :] mat_flat, double[:] rhs):
+def penta_solver2(double[::, ::] mat_flat, double[::, ::] rhs):
     return np.asarray(c_penta_solver2(mat_flat, rhs))
 
 
 # === Solver Algorithm 1 ===
 
 
-cdef double[:, :] c_penta_solver1(double[:, :] mat_flat, double[:, :] rhs):
+cdef double[::, ::] c_penta_solver1(double[::, ::] mat_flat, double[::, ::] rhs):
     """
     Solves the pentadiagonal system of equations ``Ax = b`` with the matrix ``A`` and
     the right-hand side ``b`` by
@@ -70,7 +70,7 @@ cdef double[:, :] c_penta_solver1(double[:, :] mat_flat, double[:, :] rhs):
 
 
 cdef void c_penta_factorize_algo1(
-    double[:, :] mat_flat,
+    double[::, ::] mat_flat,
     int64_t mat_n_rows,
     double[::, ::1] mat_factorized,
 ):
@@ -260,53 +260,231 @@ cdef void c_solve_penta_from_factorize_algo_1(
 # === Solver Algorithm 2 ===
 
 
-cdef double[:] c_penta_solver2(double[:, :] mat_flat, double[:] rhs):
+cdef double[::, ::] c_penta_solver2(double[::, ::] mat_flat, double[::, ::] rhs):
+    """
+    Solves the pentadiagonal system of equations ``Ax = b`` with the matrix ``A`` and
+    the right-hand side ``b`` by
+
+    - factorizing the matrix ``A`` into auxiliary coefficients and a unit lower
+        triangular matrix ``L``
+    - transforming the right-hand side into a vector ``omega``
+    - solving the system of equations ``Lx = omega`` by backward substitution
+
+    """
+
+    # Variable declarations
+
+    cdef int64_t mat_n_rows = mat_flat.shape[1]
+    cdef int64_t rhs_n_cols = rhs.shape[1]
+    cdef int64_t iter_col
+    cdef double[::, ::1] result = np.empty(shape=(mat_n_rows, rhs_n_cols))
+    cdef double[::, ::1] mat_factorized = np.empty(shape=(mat_n_rows, 5))
+
+    # first, the matrix is factorized
+    c_penta_factorize_algo2(
+        mat_flat,
+        mat_n_rows,
+        mat_factorized,
+    )
 
-    cdef int mat_j = mat_flat.shape[1]
+    # then, all the right-hand sides are solved
+    for iter_col in range(rhs_n_cols):
+        c_solve_penta_from_factorize_algo_2(
+            mat_n_rows,
+            mat_factorized,
+            rhs[::, iter_col],
+            result[::, iter_col],
+        )
 
-    cdef double[:] result = np.zeros(mat_j)
+    return result
 
-    cdef double[:] ps = np.zeros(mat_j)  # psi
-    cdef double[:] si = np.zeros(mat_j)  # sigma
-    cdef double[:] ph = np.zeros(mat_j)  # phi
-    cdef double[:] ro = np.zeros(mat_j)  # rho
-    cdef double[:] we = np.zeros(mat_j)  # w
+cdef void c_penta_factorize_algo2(
+    double[::, ::] mat_flat,
+    int64_t mat_n_rows,
+    double[::, ::1] mat_factorized,
+):
+    """
+    Factorizes the pentadiagonal matrix ``A`` into
 
-    cdef int i
+    - auxiliary coefficients ``psi`` (``ps``), ``rho`` and ``b`` for the transformation
+        of the right-hand side
+    - a unit lower triangular matrix with the main diagonals ``phi`` and ``sigma``
+       (``si``) for the following forward substitution. Its unit main diagonal is
+       implicit.
 
-    ps[mat_j-1] = mat_flat[2, mat_j-1]
-    si[mat_j-1] = mat_flat[3, mat_j-1] / ps[mat_j-1]
-    ph[mat_j-1] = mat_flat[4, mat_j-1] / ps[mat_j-1]
-    we[mat_j-1] = rhs[mat_j-1] / ps[mat_j-1]
+    They are overwriting the memoryview ``mat_factorized`` as follows:
 
-    ro[mat_j-2] = mat_flat[1, mat_j-2]
-    ps[mat_j-2] = mat_flat[2, mat_j-2] - si[mat_j-1] * ro[mat_j-2]
-    si[mat_j-2] = (mat_flat[3, mat_j-2] - ph[mat_j-1] * ro[mat_j-2]) / ps[mat_j-2]
-    ph[mat_j-2] = mat_flat[4, mat_j-2] / ps[mat_j-2]
-    we[mat_j-2] = (rhs[mat_j-2] - we[mat_j-1] * ro[mat_j-2]) / ps[mat_j-2]
+    ```bash
+    [[    *           *         ps_0        rho_0       b_i      ]
+     [    *         si_1        ps_1        rho_1       b_1      ]
+     [  phi_2       si_2        ps_2        rho_2       b_2      ]
+                                ...
+     [  phi_i       si_i        ps_i        rho_i       b_i      ]
+                                ...
+     [  phi_{n-3}   si_{n-3}    ps_{n-3}    rho_{n-3}   b_{n-3}  ]
+     [  phi_{n-2}   si_{n-2}    ps_{n-2}    rho_{n-2}     *      ]
+     [  phi_{n-1}   si_{n-1}    ps_{n-1}      *           *      ]]
+    ```
 
-    for i in range(mat_j-3, 1, -1):
-        ro[i] = mat_flat[1, i] - si[i+2] * mat_flat[0, i]
-        ps[i] = mat_flat[2, i] - ph[i+2] * mat_flat[0, i] - si[i+1] * ro[i]
-        si[i] = (mat_flat[3, i] - ph[i+1] * ro[i]) / ps[i]
-        ph[i] = mat_flat[4, i] / ps[i]
-        we[i] = (rhs[i] - we[i+2] * mat_flat[0, i] - we[i+1] * ro[i]) / ps[i]
+    where the entries marked with ``*`` are not used by design, but overwritten with
+    zeros.
 
-    ro[1] = mat_flat[1, 1] - si[3] * mat_flat[0, 1]
-    ps[1] = mat_flat[2, 1] - ph[3] * mat_flat[0, 1] - si[2] * ro[1]
-    si[1] = (mat_flat[3, 1] - ph[2] * ro[1]) / ps[1]
+    """
 
-    ro[0] = mat_flat[1, 0] - si[2] * mat_flat[0, 0]
-    ps[0] = mat_flat[2, 0] - ph[2] * mat_flat[0, 0] - si[1] * ro[0]
+    # === Variable declarations ===
 
-    we[1] = (rhs[1] - we[3] * mat_flat[0, 1] - we[2] * ro[1]) / ps[1]
-    we[0] = (rhs[0] - we[2] * mat_flat[0, 0] - we[1] * ro[0]) / ps[0]
+    cdef int64_t iter_row
+    cdef double ps_i, rho_i # psi, rho
+    cdef double si_i, si_i_minus_1, si_i_plus_1 # sigma
+    cdef double phi_i, phi_i_minus_1, phi_i_plus_1 # phi
 
-    # Foreward substitution
-    result[0] = we[0]
-    result[1] = we[1] - si[1] * result[0]
+    # === Factorization ===
 
-    for i in range(2, mat_j):
-        result[i] = we[i] - si[i] * result[i-1] - ph[i] * result[i-2]
+    # First row
+    ps_i = mat_flat[2, mat_n_rows - 1]
+    si_i_plus_1 = mat_flat[3, mat_n_rows - 1] / ps_i
+    phi_i_plus_1 = mat_flat[4, mat_n_rows - 1] / ps_i
 
-    return result
+    mat_factorized[mat_n_rows - 1, 0] = phi_i_plus_1
+    mat_factorized[mat_n_rows - 1, 1] = si_i_plus_1
+    mat_factorized[mat_n_rows - 1, 2] = ps_i
+    mat_factorized[mat_n_rows - 1, 3] = 0.0
+    mat_factorized[mat_n_rows - 1, 4] = 0.0
+
+    # Second row
+    rho_i = mat_flat[1, mat_n_rows-2]
+    ps_i = mat_flat[2, mat_n_rows-2] - si_i_plus_1 * rho_i
+    si_i = (mat_flat[3, mat_n_rows-2] - phi_i_plus_1 * rho_i) / ps_i
+    phi_i = mat_flat[4, mat_n_rows-2] / ps_i
+
+    mat_factorized[mat_n_rows - 2, 0] = phi_i
+    mat_factorized[mat_n_rows - 2, 1] = si_i
+    mat_factorized[mat_n_rows - 2, 2] = ps_i
+    mat_factorized[mat_n_rows - 2, 3] = rho_i
+    mat_factorized[mat_n_rows - 2, 4] = 0.0
+
+    # Central rows
+    for iter_row in range(mat_n_rows-3, 1, -1):
+        b_i = mat_flat[0, iter_row]
+        rho_i = mat_flat[1, iter_row] - si_i_plus_1 * b_i
+        ps_i = mat_flat[2, iter_row] - phi_i_plus_1 * b_i - si_i * rho_i
+        si_i_minus_1 = (mat_flat[3, iter_row] - phi_i * rho_i) / ps_i
+        si_i_plus_1 = si_i
+        si_i = si_i_minus_1
+        phi_i_minus_1 = mat_flat[4, iter_row] / ps_i
+        phi_i_plus_1 = phi_i
+        phi_i = phi_i_minus_1
+
+        mat_factorized[iter_row, 0] = phi_i
+        mat_factorized[iter_row, 1] = si_i
+        mat_factorized[iter_row, 2] = ps_i
+        mat_factorized[iter_row, 3] = rho_i
+        mat_factorized[iter_row, 4] = b_i
+
+    # Second to last row
+    b_i = mat_flat[0, 1]
+    rho_i = mat_flat[1, 1] - si_i_plus_1 * b_i
+    ps_i = mat_flat[2, 1] - phi_i_plus_1 * b_i - si_i * rho_i
+    si_i_minus_1 = (mat_flat[3, 1] - phi_i * rho_i) / ps_i
+    si_i_plus_1 = si_i
+    si_i = si_i_minus_1
+
+    mat_factorized[1, 0] = 0.0
+    mat_factorized[1, 1] = si_i
+    mat_factorized[1, 2] = ps_i
+    mat_factorized[1, 3] = rho_i
+    mat_factorized[1, 4] = b_i
+
+    # Last row
+    b_i = mat_flat[0, 0]
+    rho_i = mat_flat[1, 0] - si_i_plus_1 * b_i
+    ps_i = mat_flat[2, 0] - phi_i * b_i - si_i * rho_i
+
+    mat_factorized[0, 0] = 0.0
+    mat_factorized[0, 1] = 0.0
+    mat_factorized[0, 2] = ps_i
+    mat_factorized[0, 3] = rho_i
+    mat_factorized[0, 4] = b_i
+
+    return
+
+
+cdef void c_solve_penta_from_factorize_algo_2(
+    int64_t mat_n_rows,
+    double[::, ::1] mat_factorized,
+    double[::] rhs_single,
+    double[::] result_view,
+):
+    """
+    Solves the pentadiagonal system of equations ``Ax = b`` with the factorized
+    unit lower triangular matrix ``L`` and the right-hand side ``b``.
+    It overwrites the right-hand side ``b`` first with the transformed vector ``omega``
+    and then with the solution vector ``x`` for ``Lx = omega``.
+
+    """
+
+    # === Variable declarations ===
+
+    cdef int64_t iter_row
+    cdef double om_i, om_i_minus_1, om_i_minus_2 # omega
+
+    # === Transformation ===
+
+    # first, the right-hand side is transformed into the vector ``omega``
+    # First row
+    om_i_plus_1 = rhs_single[mat_n_rows-1] / mat_factorized[mat_n_rows - 1, 2]
+    result_view[mat_n_rows-1] = om_i_plus_1
+
+    # Second row
+    om_i = (
+        rhs_single[mat_n_rows-2]
+        - om_i_plus_1 * mat_factorized[mat_n_rows - 2, 3]
+    ) / mat_factorized[mat_n_rows - 2, 2]
+    result_view[mat_n_rows-2] = om_i
+
+    # Central rows
+    for iter_row in range(mat_n_rows-3, 1, -1):
+        om_i_minus_1 = (
+            rhs_single[iter_row]
+            - om_i_plus_1 * mat_factorized[iter_row, 4]
+            - om_i * mat_factorized[iter_row, 3]
+        ) / mat_factorized[iter_row, 2]
+        om_i_plus_1 = om_i
+        om_i = om_i_minus_1
+        result_view[iter_row] = om_i
+
+    # Second to last row
+    om_i_minus_1 = (
+        rhs_single[1]
+        - om_i_plus_1 * mat_factorized[1, 4]
+        - om_i * mat_factorized[1, 3]
+    ) / mat_factorized[1, 2]
+    om_i_plus_1 = om_i
+    om_i = om_i_minus_1
+    result_view[1] = om_i
+
+    # Last row
+    om_i_minus_1 = (
+        rhs_single[0]
+        - om_i_plus_1 * mat_factorized[0, 4]
+        - om_i * mat_factorized[0, 3]
+    ) / mat_factorized[0, 2]
+    result_view[0] = om_i_minus_1
+
+    # === Forward substitution ===
+
+    # The solution vector is calculated by forward substitution that overwrites the
+    # right-hand side vector with the solution vector
+    om_i -= mat_factorized[1, 1] * om_i_minus_1
+    result_view[1] = om_i
+
+    for iter_row in range(2, mat_n_rows):
+        result_view[iter_row] = (
+            result_view[iter_row]
+            - mat_factorized[iter_row, 0] * om_i_minus_1
+            - mat_factorized[iter_row, 1] * om_i
+        )
+        om_i_minus_1 = om_i
+        om_i = result_view[iter_row]
+
+    return
\ No newline at end of file

From ba5945f3e1b8ba10862d6c4e7302aa3e0f4dec00 Mon Sep 17 00:00:00 2001
From: MothNik <nik.zoe@web.de>
Date: Sat, 8 Jun 2024 20:16:15 +0200
Subject: [PATCH 23/62] tests: [11] unified test for both pentapy algorithms,
 made tests a lot more extensive

---
 ...t_solver_1.py => test_solvers_internal.py} | 22 +++++++++++++------
 1 file changed, 15 insertions(+), 7 deletions(-)
 rename tests/{test_solver_1.py => test_solvers_internal.py} (82%)

diff --git a/tests/test_solver_1.py b/tests/test_solvers_internal.py
similarity index 82%
rename from tests/test_solver_1.py
rename to tests/test_solvers_internal.py
index 78956f6..2b543cf 100644
--- a/tests/test_solver_1.py
+++ b/tests/test_solvers_internal.py
@@ -1,5 +1,6 @@
 """
-Test suite for testing the pentadiagonal solver based on Algorithm PTRANS-I.
+Test suite for testing the pentadiagonal solver based on either Algorithm PTRANS-I or
+PTRANS-II.
 
 """
 
@@ -40,13 +41,17 @@
     10_000,
     10_001,
 ]
-REF_WARNING = "pentapy: PTRANS-I not suitable for input-matrix."
+REF_WARNING_CONTENT = "not suitable for input-matrix."
+SOLVER_ALIASES_PTRANS_I = [1, "1", "PTRANS-I", "ptrans-i"]
+SOLVER_ALIASES_PTRANS_II = [2, "2", "PTRANS-II", "ptrans-ii"]
 
 # === Tests ===
 
 
 @pytest.mark.parametrize("induce_error", [False, True])
-@pytest.mark.parametrize("solver_alias", [1])  # "1", "PTRANS-I"])
+@pytest.mark.parametrize(
+    "solver_alias", SOLVER_ALIASES_PTRANS_I + SOLVER_ALIASES_PTRANS_II
+)
 @pytest.mark.parametrize("input_layout", ["full", "banded_row_wise", "banded_col_wise"])
 @pytest.mark.parametrize("n_rhs", [None, 1, 10])
 @pytest.mark.parametrize("n_rows", N_ROWS)
@@ -72,17 +77,20 @@ def test_penta_solver1(
         ill_conditioned=False,
     )
 
-    # an error is induced by setting the first diagonal element to zero
+    # an error is induced by setting the first or last diagonal element to zero
     if induce_error:
         # the induction of the error is only possible if the matrix does not have
         # only 3 rows
         if n_rows == 3:
             pytest.skip(
                 "Only 3 rows, cannot induce error because this will not go into "
-                "PTRANS-I, but NumPy"
+                "PTRANS-I, but NumPy."
             )
 
-        mat_full[0, 0] = 0.0
+        if solver_alias in SOLVER_ALIASES_PTRANS_I:
+            mat_full[0, 0] = 0.0
+        else:
+            mat_full[n_rows - 1, n_rows - 1] = 0.0
 
     # the right-hand side is generated
     np.random.seed(SEED)
@@ -119,7 +127,7 @@ def test_penta_solver1(
     # Case 1: in case of an error, a warning has to be issued and the result has to
     # be NaN
     if induce_error:
-        with pytest.warns(UserWarning, match=REF_WARNING):
+        with pytest.warns(UserWarning, match=REF_WARNING_CONTENT):
             sol = pp.solve(
                 mat=mat,
                 rhs=rhs,

From 026e8eab05dfe8efffe3d0eff0348db5e2728c13 Mon Sep 17 00:00:00 2001
From: MothNik <nik.zoe@web.de>
Date: Sat, 8 Jun 2024 20:24:26 +0200
Subject: [PATCH 24/62] tests: [11] removed superficial unittest

---
 tests/test_pentapy.py | 115 ------------------------------------------
 1 file changed, 115 deletions(-)
 delete mode 100755 tests/test_pentapy.py

diff --git a/tests/test_pentapy.py b/tests/test_pentapy.py
deleted file mode 100755
index 558de5e..0000000
--- a/tests/test_pentapy.py
+++ /dev/null
@@ -1,115 +0,0 @@
-"""
-This is the unittest for pentapy.
-"""
-
-import unittest
-
-# import platform
-import warnings
-
-import numpy as np
-
-import pentapy as pp
-
-warnings.simplefilter("always")
-
-
-class TestPentapy(unittest.TestCase):
-    def setUp(self):
-        self.seed = 19031977
-        self.size = 1000
-        self.rand = np.random.RandomState(self.seed)
-        self.mat = (self.rand.rand(5, self.size) - 0.5) * 1e-5
-        self.rhs = self.rand.rand(self.size) * 1e5
-
-    def test_tools(self):
-        self.mat_int = np.zeros((100, 100), dtype=int)
-        # fill bands of pentadiagonal matrix
-        self.mat_int[pp.diag_indices(100, 0)] = self.rand.randint(1, 1000, size=100)
-        self.mat_int[pp.diag_indices(100, 1)] = self.rand.randint(1, 1000, size=99)
-        self.mat_int[pp.diag_indices(100, 2)] = self.rand.randint(1, 1000, size=98)
-        self.mat_int[pp.diag_indices(100, -1)] = self.rand.randint(1, 1000, size=99)
-        self.mat_int[pp.diag_indices(100, -2)] = self.rand.randint(1, 1000, size=98)
-        # create banded
-        self.mat_int_col = pp.create_banded(self.mat_int)
-        self.mat_int_row = pp.create_banded(self.mat_int, col_wise=False)
-        # create full
-        self.mat_int_col_ful = pp.create_full(self.mat_int_col, col_wise=True)
-        self.mat_int_row_ful = pp.create_full(self.mat_int_row, col_wise=False)
-        # shifting
-        self.mat_shift_cr = pp.shift_banded(self.mat_int_col)
-        self.mat_shift_rc = pp.shift_banded(self.mat_int_row, col_to_row=False)
-        # in place shifting
-        self.mat_int_col_ip = pp.create_banded(self.mat_int)
-        self.mat_int_row_ip = pp.create_banded(self.mat_int, col_wise=False)
-        pp.shift_banded(self.mat_int_col_ip, copy=False)
-        pp.shift_banded(self.mat_int_row_ip, copy=False, col_to_row=False)
-        # checking
-        self.assertEqual(np.sum(self.mat_int > 0), 494)
-        self.assertTrue(np.array_equal(self.mat_int_col, self.mat_shift_rc))
-        self.assertTrue(np.array_equal(self.mat_int_row, self.mat_shift_cr))
-        self.assertTrue(np.array_equal(self.mat_int_col, self.mat_int_row_ip))
-        self.assertTrue(np.array_equal(self.mat_int_row, self.mat_int_col_ip))
-        self.assertTrue(np.array_equal(self.mat_int, self.mat_int_col_ful))
-        self.assertTrue(np.array_equal(self.mat_int, self.mat_int_row_ful))
-
-    def test_solve1(self):
-        self.mat_col = pp.shift_banded(self.mat, col_to_row=False)
-        self.mat_ful = pp.create_full(self.mat, col_wise=False)
-
-        sol_row = pp.solve(self.mat, self.rhs, is_flat=True, solver=1)
-        sol_col = pp.solve(
-            self.mat_col,
-            self.rhs,
-            is_flat=True,
-            index_row_wise=False,
-            solver=1,
-        )
-        sol_ful = pp.solve(self.mat_ful, self.rhs, solver=1)
-
-        diff_row = np.max(np.abs(np.dot(self.mat_ful, sol_row) - self.rhs))
-        diff_col = np.max(np.abs(np.dot(self.mat_ful, sol_col) - self.rhs))
-        diff_ful = np.max(np.abs(np.dot(self.mat_ful, sol_ful) - self.rhs))
-
-        diff_row_col = np.max(np.abs(self.mat_ful - pp.create_full(self.mat_col)))
-        self.assertAlmostEqual(diff_row * 1e-5, 0.0)
-        self.assertAlmostEqual(diff_col * 1e-5, 0.0)
-        self.assertAlmostEqual(diff_ful * 1e-5, 0.0)
-        self.assertAlmostEqual(diff_row_col * 1e5, 0.0)
-
-    def test_solve2(self):
-        self.mat_col = pp.shift_banded(self.mat, col_to_row=False)
-        self.mat_ful = pp.create_full(self.mat, col_wise=False)
-
-        sol_row = pp.solve(self.mat, self.rhs, is_flat=True, solver=2)
-        sol_col = pp.solve(
-            self.mat_col,
-            self.rhs,
-            is_flat=True,
-            index_row_wise=False,
-            solver=2,
-        )
-        sol_ful = pp.solve(self.mat_ful, self.rhs, solver=2)
-
-        diff_row = np.max(np.abs(np.dot(self.mat_ful, sol_row) - self.rhs))
-        diff_col = np.max(np.abs(np.dot(self.mat_ful, sol_col) - self.rhs))
-        diff_ful = np.max(np.abs(np.dot(self.mat_ful, sol_ful) - self.rhs))
-
-        diff_row_col = np.max(np.abs(self.mat_ful - pp.create_full(self.mat_col)))
-        self.assertAlmostEqual(diff_row * 1e-5, 0.0)
-        self.assertAlmostEqual(diff_col * 1e-5, 0.0)
-        self.assertAlmostEqual(diff_ful * 1e-5, 0.0)
-        self.assertAlmostEqual(diff_row_col * 1e5, 0.0)
-
-    def test_error(self):
-        self.err_mat = np.array(
-            [[3, 2, 1, 0], [-3, -2, 7, 1], [3, 2, -1, 5], [0, 1, 2, 3]]
-        )
-        self.err_rhs = np.array([6, 3, 9, 6])
-        sol_2 = pp.solve(self.err_mat, self.err_rhs, is_flat=False, solver=2)
-        diff_2 = np.max(np.abs(np.dot(self.err_mat, sol_2) - self.err_rhs))
-        self.assertAlmostEqual(diff_2, 0.0)
-
-
-if __name__ == "__main__":
-    unittest.main()

From f6fa4cbb28cb18b2d921bc4f6ad3243d539cdad0 Mon Sep 17 00:00:00 2001
From: MothNik <nik.zoe@web.de>
Date: Sat, 8 Jun 2024 20:25:26 +0200
Subject: [PATCH 25/62] misc: [11] fixed typos

---
 examples/README.rst  |  4 ++--
 paper/paper.md       |  6 +++---
 src/pentapy/core.py  | 12 ++++++------
 src/pentapy/tools.py | 22 +++++++++++-----------
 4 files changed, 22 insertions(+), 22 deletions(-)

diff --git a/examples/README.rst b/examples/README.rst
index ea7bac4..a1b7ae0 100644
--- a/examples/README.rst
+++ b/examples/README.rst
@@ -88,7 +88,7 @@ If M is a full matrix, you call the following:
 
     X = pp.solve(M, Y)
 
-If M is flattend in row-wise order you have to set the keyword argument ``is_flat=True``:
+If M is flattened in row-wise order you have to set the keyword argument ``is_flat=True``:
 
 .. code-block:: python
 
@@ -99,7 +99,7 @@ If M is flattend in row-wise order you have to set the keyword argument ``is_fla
 
     X = pp.solve(M, Y, is_flat=True)
 
-If you got a col-wise flattend matrix you have to set ``index_row_wise=False``:
+If you got a col-wise flattened matrix you have to set ``index_row_wise=False``:
 
 .. code-block:: python
 
diff --git a/paper/paper.md b/paper/paper.md
index 53c6f04..fa0fdc3 100644
--- a/paper/paper.md
+++ b/paper/paper.md
@@ -51,8 +51,8 @@ $$
 
 Here, $d_i$ are the diagonal entries and $d_i^{(j)}$ represent the $j$-th minor diagonal.
 
-Recently, @askar presented two algorithms to 
-solve the linear systems of equations for $X$, ``PTRANS-I`` and ``PTRANS-II``, 
+Recently, @askar presented two algorithms to
+solve the linear systems of equations for $X$, ``PTRANS-I`` and ``PTRANS-II``,
 applying first transformation to a triangular matrix and then, respectively, backward and forward substitution.
 ``pentapy`` provides Cython [@cython] implementations of these
 algorithms and a set of tools to convert matrices to row-wise or
@@ -73,7 +73,7 @@ The linear algebra solver of NumPy [@numpy] served as a standard reference, whic
 ``pentapy`` is designed to provide a fast solver for the special case of a
 pentadiagonal linear system. To the best of the author's knowledge,
 this package outperforms the current algorithms for solving pentadiagonal systems in Python.
-The solver can handle different input formats of the coefficient matrix, i.e., a flattend matrix or a
+The solver can handle different input formats of the coefficient matrix, i.e., a flattened matrix or a
 quadratic matrix.
 
 
diff --git a/src/pentapy/core.py b/src/pentapy/core.py
index ccf0c8f..ad704a6 100644
--- a/src/pentapy/core.py
+++ b/src/pentapy/core.py
@@ -46,8 +46,8 @@ def solve(
     """
     Solver for a pentadiagonal system.
 
-    The matrix can be given as a full n x n matrix or as a flattend one.
-    The flattend matrix can be given in a row-wise flattend form::
+    The matrix can be given as a full n x n matrix or as a flattened one.
+    The flattened matrix can be given in a row-wise flattened form::
 
       [[Dup2[0]  Dup2[1]  Dup2[2]  ... Dup2[N-2]  0          0       ]
        [Dup1[0]  Dup1[1]  Dup1[2]  ... Dup1[N-2]  Dup1[N-1]  0       ]
@@ -55,7 +55,7 @@ def solve(
        [0        Dlow1[1] Dlow1[2] ... Dlow1[N-2] Dlow1[N-1] Dlow1[N]]
        [0        0        Dlow2[2] ... Dlow2[N-2] Dlow2[N-2] Dlow2[N]]]
 
-    Or a column-wise flattend form::
+    Or a column-wise flattened form::
 
       [[0        0        Dup2[2]  ... Dup2[N-2]  Dup2[N-1]  Dup2[N] ]
        [0        Dup1[1]  Dup1[2]  ... Dup1[N-2]  Dup1[N-1]  Dup1[N] ]
@@ -65,7 +65,7 @@ def solve(
 
     Dup1 and Dup2 are the first and second upper minor-diagonals
     and Dlow1 resp. Dlow2 are the lower ones.
-    If you provide a column-wise flattend matrix, you have to set::
+    If you provide a column-wise flattened matrix, you have to set::
 
       index_row_wise=False
 
@@ -77,9 +77,9 @@ def solve(
     rhs : :class:`numpy.ndarray` of shape (m,) or (m, n)
         The right hand side(s) of the equation system. Its shape is preserved.
     is_flat : :class:`bool`, default=False
-        State if the matrix is already flattend. Default: ``False``
+        State if the matrix is already flattened. Default: ``False``
     index_row_wise : :class:`bool`, default=True
-        State if the flattend matrix is row-wise flattend. Default: ``True``
+        State if the flattened matrix is row-wise flattened. Default: ``True``
     solver : :class:`int` or :class:`str`, default=1
         Which solver should be used. The following are provided:
 
diff --git a/src/pentapy/tools.py b/src/pentapy/tools.py
index ca80d27..4bb0b52 100644
--- a/src/pentapy/tools.py
+++ b/src/pentapy/tools.py
@@ -52,8 +52,8 @@ def shift_banded(mat, up=2, low=2, col_to_row=True, copy=True):
 
     Either from column-wise to row-wise storage or vice versa.
 
-    The Matrix has to be given as a flattend matrix.
-    Either in a column-wise flattend form::
+    The Matrix has to be given as a flattened matrix.
+    Either in a column-wise flattened form::
 
       [[0        0        Dup2[2]  ... Dup2[N-2]  Dup2[N-1]  Dup2[N] ]
        [0        Dup1[1]  Dup1[2]  ... Dup1[N-2]  Dup1[N-1]  Dup1[N] ]
@@ -65,7 +65,7 @@ def shift_banded(mat, up=2, low=2, col_to_row=True, copy=True):
 
       col_to_row=True
 
-    Or in a row-wise flattend form::
+    Or in a row-wise flattened form::
 
       [[Dup2[0]  Dup2[1]  Dup2[2]  ... Dup2[N-2]  0          0       ]
        [Dup1[0]  Dup1[1]  Dup1[2]  ... Dup1[N-2]  Dup1[N-1]  0       ]
@@ -98,7 +98,7 @@ def shift_banded(mat, up=2, low=2, col_to_row=True, copy=True):
     Returns
     -------
     :class:`numpy.ndarray`
-        Shifted bandend matrix
+        Shifted banded matrix
     """
     if copy:
         mat_flat = np.copy(mat)
@@ -124,8 +124,8 @@ def shift_banded(mat, up=2, low=2, col_to_row=True, copy=True):
 def create_banded(mat, up=2, low=2, col_wise=True, dtype=None):
     """Create a banded matrix from a given quadratic Matrix.
 
-    The Matrix will to be returned as a flattend matrix.
-    Either in a column-wise flattend form::
+    The Matrix will to be returned as a flattened matrix.
+    Either in a column-wise flattened form::
 
       [[0        0        Dup2[2]  ... Dup2[N-2]  Dup2[N-1]  Dup2[N] ]
        [0        Dup1[1]  Dup1[2]  ... Dup1[N-2]  Dup1[N-1]  Dup1[N] ]
@@ -137,7 +137,7 @@ def create_banded(mat, up=2, low=2, col_wise=True, dtype=None):
 
       col_wise=True
 
-    Or in a row-wise flattend form::
+    Or in a row-wise flattened form::
 
       [[Dup2[0]  Dup2[1]  Dup2[2]  ... Dup2[N-2]  0          0       ]
        [Dup1[0]  Dup1[1]  Dup1[2]  ... Dup1[N-2]  Dup1[N-1]  0       ]
@@ -168,7 +168,7 @@ def create_banded(mat, up=2, low=2, col_wise=True, dtype=None):
     Returns
     -------
     :class:`numpy.ndarray`
-        Bandend matrix
+        Banded matrix
     """
     mat = np.asanyarray(mat)
     if mat.ndim != 2:
@@ -202,8 +202,8 @@ def create_banded(mat, up=2, low=2, col_wise=True, dtype=None):
 def create_full(mat, up=2, low=2, col_wise=True):
     """Create a (n x n) Matrix from a given banded matrix.
 
-    The given Matrix has to be a flattend matrix.
-    Either in a column-wise flattend form::
+    The given Matrix has to be a flattened matrix.
+    Either in a column-wise flattened form::
 
       [[0        0        Dup2[2]  ... Dup2[N-2]  Dup2[N-1]  Dup2[N] ]
        [0        Dup1[1]  Dup1[2]  ... Dup1[N-2]  Dup1[N-1]  Dup1[N] ]
@@ -215,7 +215,7 @@ def create_full(mat, up=2, low=2, col_wise=True):
 
       col_wise=True
 
-    Or in a row-wise flattend form::
+    Or in a row-wise flattened form::
 
       [[Dup2[0]  Dup2[1]  Dup2[2]  ... Dup2[N-2]  0          0       ]
        [Dup1[0]  Dup1[1]  Dup1[2]  ... Dup1[N-2]  Dup1[N-1]  0       ]

From 9b3e1224b205f50caec5fa136771680d952bab55 Mon Sep 17 00:00:00 2001
From: MothNik <nik.zoe@web.de>
Date: Sat, 8 Jun 2024 20:39:43 +0200
Subject: [PATCH 26/62] style/refactor: [11] made core code readable to humans
 and not only a machine

---
 src/pentapy/core.py | 55 +++++++++++++++++++++++++++------------------
 1 file changed, 33 insertions(+), 22 deletions(-)

diff --git a/src/pentapy/core.py b/src/pentapy/core.py
index ad704a6..4158d94 100644
--- a/src/pentapy/core.py
+++ b/src/pentapy/core.py
@@ -101,6 +101,7 @@ def solve(
     # first, the solver is converted to the internal name to avoid confusion
     solver_inter = pmodels._SOLVER_ALIAS_CONVERSIONS[str(solver).lower()]
 
+    # Case 1: the pentapy solvers
     if solver_inter in {
         pmodels.PentaSolverAliases.PTRANS_I,
         pmodels.PentaSolverAliases.PTRANS_II,
@@ -150,29 +151,14 @@ def solve(
             warnings.warn("pentapy: PTRANS-I not suitable for input-matrix.")
             return np.full(shape=rhs_og_shape, fill_value=np.nan)
 
+    # Case 2: LAPACK's banded solver
     elif solver_inter == pmodels.PentaSolverAliases.LAPACK:  # pragma: no cover
         try:
             from scipy.linalg import solve_banded
         except ImportError as imp_err:  # pragma: no cover
             msg = "pentapy.solve: scipy.linalg.solve_banded could not be imported"
             raise ValueError(msg) from imp_err
-        if is_flat and index_row_wise:
-            mat_flat = np.array(mat)
-            ptools._check_penta(mat_flat)
-            ptools.shift_banded(mat_flat, col_to_row=False, copy=False)
-        elif is_flat:
-            mat_flat = np.asarray(mat)
-        else:
-            mat_flat = ptools.create_banded(mat)
-        return solve_banded((2, 2), mat_flat, rhs)
 
-    elif solver_inter == pmodels.PentaSolverAliases.SUPER_LU:  # pragma: no cover
-        try:
-            from scipy import sparse as sps
-            from scipy.sparse.linalg import spsolve
-        except ImportError as imp_err:
-            msg = "pentapy.solve: scipy.sparse could not be imported"
-            raise ValueError(msg) from imp_err
         if is_flat and index_row_wise:
             mat_flat = np.array(mat)
             ptools._check_penta(mat_flat)
@@ -181,17 +167,27 @@ def solve(
             mat_flat = np.asarray(mat)
         else:
             mat_flat = ptools.create_banded(mat)
-        size = mat_flat.shape[1]
-        M = sps.spdiags(mat_flat, [2, 1, 0, -1, -2], size, size, format="csc")
-        return spsolve(M, rhs, use_umfpack=False)
 
-    elif solver_inter == pmodels.PentaSolverAliases.UMFPACK:  # pragma: no cover
+        # NOTE: since this is a general banded solver, the number of sub- and super-
+        #       diagonals has to be provided
+        return solve_banded(
+            l_and_u=(2, 2),
+            ab=mat_flat,
+            b=rhs,
+        )
+
+    # Case 3: SciPy's sparse solver with or without UMFPACK
+    elif solver_inter in {
+        pmodels.PentaSolverAliases.SUPER_LU,
+        pmodels.PentaSolverAliases.UMFPACK,
+    }:
         try:
             from scipy import sparse as sps
             from scipy.sparse.linalg import spsolve
         except ImportError as imp_err:
             msg = "pentapy.solve: scipy.sparse could not be imported"
             raise ValueError(msg) from imp_err
+
         if is_flat and index_row_wise:
             mat_flat = np.array(mat)
             ptools._check_penta(mat_flat)
@@ -200,9 +196,24 @@ def solve(
             mat_flat = np.asarray(mat)
         else:
             mat_flat = ptools.create_banded(mat)
+
+        # the solvers require a sparse left-hand side matrix, so this is created here
+        # NOTE: the UMFPACK solver will not be triggered for multiple right-hand sides
+        use_umfpack = solver_inter == pmodels.PentaSolverAliases.UMFPACK
         size = mat_flat.shape[1]
-        M = sps.spdiags(mat_flat, [2, 1, 0, -1, -2], size, size, format="csc")
-        return spsolve(M, rhs, use_umfpack=True)
+        M = sps.spdiags(
+            data=mat_flat,
+            diags=[2, 1, 0, -1, -2],
+            m=size,
+            n=size,
+            format="csc",
+        )
+
+        return spsolve(
+            A=M,
+            b=rhs,
+            use_umfpack=use_umfpack,
+        )
 
     else:  # pragma: no cover
         msg = f"pentapy.solve: unknown solver ({solver})"

From 92abbbdaca93083334917c7db42fe63e63ed6c23 Mon Sep 17 00:00:00 2001
From: MothNik <nik.zoe@web.de>
Date: Sat, 8 Jun 2024 21:27:50 +0200
Subject: [PATCH 27/62] feat/refactor: [11] ensured all solvers behave the same
 in terms of output and warning behaviour; tested it altogether

---
 src/pentapy/core.py            |  32 ++++++---
 tests/test_solvers_external.py | 121 +++++++++++++++++++++++++++++++++
 tests/test_solvers_internal.py |   2 +-
 3 files changed, 145 insertions(+), 10 deletions(-)
 create mode 100644 tests/test_solvers_external.py

diff --git a/src/pentapy/core.py b/src/pentapy/core.py
index 4158d94..c701573 100644
--- a/src/pentapy/core.py
+++ b/src/pentapy/core.py
@@ -142,10 +142,11 @@ def solve(
             )
 
             # if there was only a 1D right-hand side, the result has to be flattened
+            sol = solver_func(mat_flat, rhs)
             if single_rhs:
-                return solver_func(mat_flat, rhs).ravel()
+                sol = sol.ravel()
 
-            return solver_func(mat_flat, rhs)
+            return sol
 
         except ZeroDivisionError:
             warnings.warn("pentapy: PTRANS-I not suitable for input-matrix.")
@@ -170,11 +171,17 @@ def solve(
 
         # NOTE: since this is a general banded solver, the number of sub- and super-
         #       diagonals has to be provided
-        return solve_banded(
-            l_and_u=(2, 2),
-            ab=mat_flat,
-            b=rhs,
-        )
+        # NOTE: LAPACK handles all the reshaping and flattening internally
+        try:
+            return solve_banded(
+                l_and_u=(2, 2),
+                ab=mat_flat,
+                b=rhs,
+            )
+
+        except np.linalg.LinAlgError:
+            warnings.warn("pentapy: LAPACK solver encountered singular matrix.")
+            return np.full(shape=rhs.shape, fill_value=np.nan)
 
     # Case 3: SciPy's sparse solver with or without UMFPACK
     elif solver_inter in {
@@ -184,7 +191,7 @@ def solve(
         try:
             from scipy import sparse as sps
             from scipy.sparse.linalg import spsolve
-        except ImportError as imp_err:
+        except ImportError as imp_err:  # pragma: no cover
             msg = "pentapy.solve: scipy.sparse could not be imported"
             raise ValueError(msg) from imp_err
 
@@ -209,12 +216,19 @@ def solve(
             format="csc",
         )
 
-        return spsolve(
+        sol = spsolve(
             A=M,
             b=rhs,
             use_umfpack=use_umfpack,
         )
 
+        # NOTE: spsolve flattens column-vectors, thus their shape has to be restored
+        # NOTE: it already fills the result vector with NaNs if the matrix is singular
+        if rhs.ndim == 2 and 1 in rhs.shape:
+            sol = sol[::, np.newaxis]
+
+        return sol
+
     else:  # pragma: no cover
         msg = f"pentapy.solve: unknown solver ({solver})"
         raise ValueError(msg)
diff --git a/tests/test_solvers_external.py b/tests/test_solvers_external.py
new file mode 100644
index 0000000..075dfb4
--- /dev/null
+++ b/tests/test_solvers_external.py
@@ -0,0 +1,121 @@
+"""
+Test suite for testing the external solvers that can be called via pentapy. The tests
+are not exhaustive and only check whether the solvers can be called and return a
+solution.
+
+"""
+
+# === Imports ===
+
+from typing import Literal
+
+import numpy as np
+import pentapy as pp
+import pytest
+import util_funcs as uf
+
+# === Constants ===
+
+SEED = 19_031_977
+N_ROWS = [
+    3,
+    4,
+    5,
+    10,
+    11,
+    25,
+    26,
+    50,
+    51,
+]
+REF_WARNING_CONTENT = "singular"
+SOLVER_ALIASES_LAPACK = [3, "3", "lapack", "LaPaCk"]
+SOLVER_ALIASES_SPSOLVE = [4, "4", "spsolve", "SpSoLvE"]
+
+# === Tests ===
+
+
+@pytest.mark.parametrize("induce_error", [False, True])
+@pytest.mark.parametrize("solver_alias", SOLVER_ALIASES_LAPACK + SOLVER_ALIASES_SPSOLVE)
+@pytest.mark.parametrize("input_layout", ["full", "banded_row_wise", "banded_col_wise"])
+@pytest.mark.parametrize("n_rhs", [None, 1, 10])
+@pytest.mark.parametrize("n_rows", N_ROWS)
+def test_external_solvers(
+    n_rows: int,
+    n_rhs: int,
+    input_layout: Literal["full", "banded_row_wise", "banded_col_wise"],
+    solver_alias: Literal[1, "1", "PTRANS-I"],
+    induce_error: bool,
+) -> None:
+    """
+    Tests the external bindings for solving pentadiagonal systems starting from
+    different input layouts, number of right-hand sides, number of rows, and when an
+    error is induced by a zero matrix.
+    It has to be ensured that the edge case of ``n_rows = 3`` is also covered.
+
+    """
+
+    # first, a random pentadiagonal matrix is generated
+    mat_full = np.zeros(shape=(n_rows, n_rows))
+    if not induce_error:
+        mat_full[::, ::] = uf.gen_conditioned_rand_penta_matrix_dense(
+            n_rows=n_rows,
+            seed=SEED,
+            ill_conditioned=False,
+        )
+
+    # the right-hand side is generated
+    np.random.seed(SEED)
+    if n_rhs is not None:
+        rhs = np.random.rand(n_rows, n_rhs)
+        result_shape = (n_rows, n_rhs)
+    else:
+        rhs = np.random.rand(n_rows)
+        result_shape = (n_rows,)
+
+    # the matrix is converted to the desired layout
+    if input_layout == "full":
+        mat = mat_full
+        kwargs = dict(is_flat=False)
+
+    elif input_layout == "banded_row_wise":
+        mat = pp.create_banded(mat_full, col_wise=False)
+        kwargs = dict(
+            is_flat=True,
+            index_row_wise=True,
+        )
+
+    elif input_layout == "banded_col_wise":
+        mat = pp.create_banded(mat_full, col_wise=True)
+        kwargs = dict(
+            is_flat=True,
+            index_row_wise=False,
+        )
+
+    else:
+        raise ValueError(f"Invalid input layout: {input_layout}")
+
+    # the solution is computed
+    # Case 1: in case of an error, a warning has to be issued and the result has to
+    # be NaN
+    if induce_error:
+        with pytest.warns(UserWarning, match=REF_WARNING_CONTENT):
+            sol = pp.solve(
+                mat=mat,
+                rhs=rhs,
+                solver=solver_alias,  # type: ignore
+                **kwargs,
+            )
+            assert sol.shape == result_shape
+            assert np.isnan(sol).all()
+
+        return
+
+    # Case 2: in case of no error, the solution can be computed without any issues
+    sol = pp.solve(
+        mat=mat,
+        rhs=rhs,
+        solver=solver_alias,  # type: ignore
+        **kwargs,
+    )
+    assert sol.shape == result_shape
diff --git a/tests/test_solvers_internal.py b/tests/test_solvers_internal.py
index 2b543cf..cdc55fa 100644
--- a/tests/test_solvers_internal.py
+++ b/tests/test_solvers_internal.py
@@ -55,7 +55,7 @@
 @pytest.mark.parametrize("input_layout", ["full", "banded_row_wise", "banded_col_wise"])
 @pytest.mark.parametrize("n_rhs", [None, 1, 10])
 @pytest.mark.parametrize("n_rows", N_ROWS)
-def test_penta_solver1(
+def test_pentapy_solvers(
     n_rows: int,
     n_rhs: int,
     input_layout: Literal["full", "banded_row_wise", "banded_col_wise"],

From fb0cf2a342240e3b637774f2f3a8661424d1d515 Mon Sep 17 00:00:00 2001
From: MothNik <nik.zoe@web.de>
Date: Sat, 8 Jun 2024 21:28:23 +0200
Subject: [PATCH 28/62] fix: [11] fixed coverage typo

---
 src/pentapy/__init__.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/pentapy/__init__.py b/src/pentapy/__init__.py
index 655c428..3705064 100644
--- a/src/pentapy/__init__.py
+++ b/src/pentapy/__init__.py
@@ -44,7 +44,7 @@
 
 try:
     from pentapy._version import __version__
-except ImportError:  # pragma: nocover
+except ImportError:  # pragma: no cover
     # package is not installed
     __version__ = "0.0.0.dev0"
 

From c2e8032007a172528adb596f667eb44aedc89c3b Mon Sep 17 00:00:00 2001
From: MothNik <nik.zoe@web.de>
Date: Sat, 8 Jun 2024 21:28:50 +0200
Subject: [PATCH 29/62] lint: [11] linted cython files

---
 src/pentapy/solver.pyx | 16 ++++++++--------
 1 file changed, 8 insertions(+), 8 deletions(-)

diff --git a/src/pentapy/solver.pyx b/src/pentapy/solver.pyx
index 1494a25..2da01db 100644
--- a/src/pentapy/solver.pyx
+++ b/src/pentapy/solver.pyx
@@ -104,9 +104,9 @@ cdef void c_penta_factorize_algo1(
     # === Variable declarations ===
 
     cdef int64_t iter_row
-    cdef double mu_i, ga_i, e_i # mu, gamma, e
-    cdef double al_i, al_i_minus_1, al_i_plus_1 # alpha
-    cdef double be_i, be_i_minus_1, be_i_plus_1 # beta
+    cdef double mu_i, ga_i, e_i  # mu, gamma, e
+    cdef double al_i, al_i_minus_1, al_i_plus_1  # alpha
+    cdef double be_i, be_i_minus_1, be_i_plus_1  # beta
 
     # === Factorization ===
 
@@ -334,9 +334,9 @@ cdef void c_penta_factorize_algo2(
     # === Variable declarations ===
 
     cdef int64_t iter_row
-    cdef double ps_i, rho_i # psi, rho
-    cdef double si_i, si_i_minus_1, si_i_plus_1 # sigma
-    cdef double phi_i, phi_i_minus_1, phi_i_plus_1 # phi
+    cdef double ps_i, rho_i  # psi, rho
+    cdef double si_i, si_i_minus_1, si_i_plus_1  # sigma
+    cdef double phi_i, phi_i_minus_1, phi_i_plus_1  # phi
 
     # === Factorization ===
 
@@ -426,7 +426,7 @@ cdef void c_solve_penta_from_factorize_algo_2(
     # === Variable declarations ===
 
     cdef int64_t iter_row
-    cdef double om_i, om_i_minus_1, om_i_minus_2 # omega
+    cdef double om_i, om_i_minus_1, om_i_plus_1  # omega
 
     # === Transformation ===
 
@@ -487,4 +487,4 @@ cdef void c_solve_penta_from_factorize_algo_2(
         om_i_minus_1 = om_i
         om_i = result_view[iter_row]
 
-    return
\ No newline at end of file
+    return

From b4190f8144a24a122cf79c6724bb5ef938278dae Mon Sep 17 00:00:00 2001
From: MothNik <nik.zoe@web.de>
Date: Sat, 8 Jun 2024 21:31:46 +0200
Subject: [PATCH 30/62] package: [11] made requirements file-based and dynamic
 to make facilitate development without compromising on build

---
 pyproject.toml         | 33 ++++++---------------------------
 requirements/all.txt   |  2 ++
 requirements/base.txt  |  1 +
 requirements/check.txt |  4 ++++
 requirements/doc.txt   |  8 ++++++++
 requirements/scipy.txt |  1 +
 requirements/test.txt  |  4 ++++
 requirements/umfpack   |  1 +
 8 files changed, 27 insertions(+), 27 deletions(-)
 create mode 100644 requirements/all.txt
 create mode 100644 requirements/base.txt
 create mode 100644 requirements/check.txt
 create mode 100644 requirements/doc.txt
 create mode 100644 requirements/scipy.txt
 create mode 100644 requirements/test.txt
 create mode 100644 requirements/umfpack

diff --git a/pyproject.toml b/pyproject.toml
index 4c400f8..b26ae54 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -14,7 +14,7 @@ name = "pentapy"
 authors = [{name = "Sebastian Müller", email = "info@geostat-framework.org"}]
 readme = "README.md"
 license = {text = "MIT"}
-dynamic = ["version"]
+dynamic = ["version", "dependencies", "optional-dependencies"]
 description = "pentapy: A toolbox for pentadiagonal matrizes."
 classifiers = [
     "Development Status :: 5 - Production/Stable",
@@ -35,32 +35,10 @@ classifiers = [
     "Topic :: Scientific/Engineering",
     "Topic :: Utilities",
 ]
-dependencies = ["numpy>=1.20.0"]
 
-[project.optional-dependencies]
-scipy = ["scipy"]
-umfpack = ["scikit-umfpack"]
-all = [
-    "scipy",
-    "scikit-umfpack",
-]
-doc = [
-    "m2r2>=0.2.8",
-    "scipy>=1.1.0",
-    "matplotlib>=3",
-    "perfplot<0.9",
-    "numpydoc>=1.1",
-    "sphinx>=7",
-    "sphinx-gallery>=0.8",
-    "sphinx-rtd-theme>=2",
-]
-test = ["pytest-cov>=3"]
-check = [
-  "black>=24,<25",
-  "isort[colors]",
-  "pylint",
-  "cython-lint",
-]
+[tool.setuptools.dynamic]
+dependencies = {file = ["requirements/base.txt"]}
+optional-dependencies = {scipy = {file = ["requirements/scipy.txt"]}, umfpack = {file = ["requirements/umfpack.txt"]}, all = {file = ["requirements/all.txt"]}, doc = {file = ["requirements/doc.txt"]}, test = {file = ["requirements/test.txt"]}, check = {file = ["requirements/check.txt"]}}
 
 [project.urls]
 Homepage = "https://github.com/GeoStat-Framework/pentapy"
@@ -103,7 +81,8 @@ max-line-length = 120
         "*examples*",
         "*tests*",
         "*paper*",
-        "pentapy/src/pentapy/_version.py",
+        "src/pentapy/_version.py",
+        "src/pentapy/__init__.py",
     ]
 
     [tool.coverage.report]
diff --git a/requirements/all.txt b/requirements/all.txt
new file mode 100644
index 0000000..be8d325
--- /dev/null
+++ b/requirements/all.txt
@@ -0,0 +1,2 @@
+scikit-umfpack
+scipy
\ No newline at end of file
diff --git a/requirements/base.txt b/requirements/base.txt
new file mode 100644
index 0000000..19b3787
--- /dev/null
+++ b/requirements/base.txt
@@ -0,0 +1 @@
+numpy>=1.20.0
\ No newline at end of file
diff --git a/requirements/check.txt b/requirements/check.txt
new file mode 100644
index 0000000..4af46fc
--- /dev/null
+++ b/requirements/check.txt
@@ -0,0 +1,4 @@
+black>=24,<25
+isort[colors]
+pylint
+cython-lint
\ No newline at end of file
diff --git a/requirements/doc.txt b/requirements/doc.txt
new file mode 100644
index 0000000..c49be85
--- /dev/null
+++ b/requirements/doc.txt
@@ -0,0 +1,8 @@
+m2r2>=0.2.8
+scipy>=1.1.0
+matplotlib>=3
+perfplot<0.9
+numpydoc>=1.1
+sphinx>=7
+sphinx-gallery>=0.8
+sphinx-rtd-theme>=2
\ No newline at end of file
diff --git a/requirements/scipy.txt b/requirements/scipy.txt
new file mode 100644
index 0000000..9c61c73
--- /dev/null
+++ b/requirements/scipy.txt
@@ -0,0 +1 @@
+scipy
\ No newline at end of file
diff --git a/requirements/test.txt b/requirements/test.txt
new file mode 100644
index 0000000..2f8c0c7
--- /dev/null
+++ b/requirements/test.txt
@@ -0,0 +1,4 @@
+pytest>=8
+pytest-cov>=3
+pytest-xdist>=3
+scipy>=1.1.0
\ No newline at end of file
diff --git a/requirements/umfpack b/requirements/umfpack
new file mode 100644
index 0000000..a8630c1
--- /dev/null
+++ b/requirements/umfpack
@@ -0,0 +1 @@
+scikit-umfpack
\ No newline at end of file

From 4c1e3a0827ec42a78a31be65d4aba32077c3d7b8 Mon Sep 17 00:00:00 2001
From: MothNik <nik.zoe@web.de>
Date: Sat, 8 Jun 2024 21:32:11 +0200
Subject: [PATCH 31/62] feat: [11] updated chagelog

---
 CHANGELOG.md | 34 ++++++++++++++++++++++++++++++++++
 1 file changed, 34 insertions(+)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 1417c4c..b197caf 100755
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -2,6 +2,39 @@
 
 All notable changes to **pentapy** will be documented in this file.
 
+## [1.4.0] - 2024-06
+
+See [#22](https://github.com/GeoStat-Framework/pentapy/pull/22)
+
+### Enhancements
+
+- added support for multiple right-hand sides (currently serial)
+- improved error handling and added debug information to error messages
+
+### Changes
+
+- shotgun refactored and documented the Cython implementation of PTRANS-I and PTRANS-II for single and multiple right-hand sides support
+- fully typed the function ``pentapy.solve``
+- made internal solver alias handling of ``pentapy.solve`` smarter, more robust, and removed all duplicate code
+- gave all solvers a consistent interface
+- made code in ``pentapy.core`` more human-readable and maintainable and added comments
+- fixed typos in documentation
+
+### Bugfixes
+
+- fixed error handling in case of zero-division to trigger dead error handling branch (see [Issue 23](https://github.com/GeoStat-Framework/pentapy/issues/23))
+- fixed edge case error for row/column of 3 (see [Issue 24](https://github.com/GeoStat-Framework/pentapy/issues/24))
+
+### Tests
+
+- transitioned from ``unittest``-based testing to fully ``pytest``-based testing with parametrized and parallelized exhaustive testing (see [Issue 25](https://github.com/GeoStat-Framework/pentapy/issues/25))
+- made actual tests more meaningful by comparing them to LAPACK as reference standard (see [Issue 25](https://github.com/GeoStat-Framework/pentapy/issues/25))
+- included external solver bindings accessible via ``pentapy.solve`` as part of the test suite
+- increased true coverage (not line-hit coverage) close to 100%
+
+### Packaging
+
+- made dependency specification file-based and dynamic
 
 ## [1.3.0] - 2024-04
 
@@ -100,6 +133,7 @@ This is the first release of pentapy, a python toolbox for solving pentadiagonal
 The solver is implemented in cython, which makes it really fast.
 
 
+[1.4.0]: https://github.com/GeoStat-Framework/pentapy/compare/v1.3.0...v1.4.0
 [1.3.0]: https://github.com/GeoStat-Framework/pentapy/compare/v1.2.0...v1.3.0
 [1.2.0]: https://github.com/GeoStat-Framework/pentapy/compare/v1.1.2...v1.2.0
 [1.1.2]: https://github.com/GeoStat-Framework/pentapy/compare/v1.1.1...v1.1.2

From b4f79e5fd3b73e666c738d3756e223ef952930a0 Mon Sep 17 00:00:00 2001
From: MothNik <nik.zoe@web.de>
Date: Sat, 8 Jun 2024 21:53:28 +0200
Subject: [PATCH 32/62] fix: [11] fixed wrong coverage exclude

---
 src/pentapy/core.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/pentapy/core.py b/src/pentapy/core.py
index c701573..456155e 100644
--- a/src/pentapy/core.py
+++ b/src/pentapy/core.py
@@ -153,7 +153,7 @@ def solve(
             return np.full(shape=rhs_og_shape, fill_value=np.nan)
 
     # Case 2: LAPACK's banded solver
-    elif solver_inter == pmodels.PentaSolverAliases.LAPACK:  # pragma: no cover
+    elif solver_inter == pmodels.PentaSolverAliases.LAPACK:
         try:
             from scipy.linalg import solve_banded
         except ImportError as imp_err:  # pragma: no cover

From a2edcb4d7d458e6ffd2cc095d42312b4095032fc Mon Sep 17 00:00:00 2001
From: MothNik <nik.zoe@web.de>
Date: Sat, 8 Jun 2024 22:34:59 +0200
Subject: [PATCH 33/62] doc: [11] improved wording for preserving shape

---
 src/pentapy/core.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/src/pentapy/core.py b/src/pentapy/core.py
index 456155e..56e2a54 100644
--- a/src/pentapy/core.py
+++ b/src/pentapy/core.py
@@ -75,7 +75,8 @@ def solve(
     mat : :class:`numpy.ndarray` of shape (m, m) or (5, m)
         The full or flattened version of the pentadiagonal matrix.
     rhs : :class:`numpy.ndarray` of shape (m,) or (m, n)
-        The right hand side(s) of the equation system. Its shape is preserved.
+        The right hand side(s) of the equation system. Its shape determines the shape
+        of the output as they will be identical.
     is_flat : :class:`bool`, default=False
         State if the matrix is already flattened. Default: ``False``
     index_row_wise : :class:`bool`, default=True

From 007bc5a0f794bcabe6edab86b2f10038ce84a4d9 Mon Sep 17 00:00:00 2001
From: MothNik <nik.zoe@web.de>
Date: Sat, 8 Jun 2024 22:36:04 +0200
Subject: [PATCH 34/62] fix: [11] fixed changelog

---
 CHANGELOG.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index b197caf..d13b435 100755
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -4,7 +4,7 @@ All notable changes to **pentapy** will be documented in this file.
 
 ## [1.4.0] - 2024-06
 
-See [#22](https://github.com/GeoStat-Framework/pentapy/pull/22)
+See [#26](https://github.com/GeoStat-Framework/pentapy/pull/26)
 
 ### Enhancements
 

From 997d372162352ccdd8e61e54101a1acc4e6f10ad Mon Sep 17 00:00:00 2001
From: MothNik <nik.zoe@web.de>
Date: Sun, 9 Jun 2024 16:27:14 +0200
Subject: [PATCH 35/62] feat: [11] enable multi-threaded parallelism for
 multiple right-hand sides

---
 requirements/base.txt  |   3 +-
 src/pentapy/core.py    |  38 +++-
 src/pentapy/solver.pxd |  12 +-
 src/pentapy/solver.pyx | 489 ++++++++++++++++++++++++-----------------
 4 files changed, 333 insertions(+), 209 deletions(-)

diff --git a/requirements/base.txt b/requirements/base.txt
index 19b3787..0e77631 100644
--- a/requirements/base.txt
+++ b/requirements/base.txt
@@ -1 +1,2 @@
-numpy>=1.20.0
\ No newline at end of file
+numpy>=1.20.0
+psutil>=5.8.0
\ No newline at end of file
diff --git a/src/pentapy/core.py b/src/pentapy/core.py
index 56e2a54..87761a4 100644
--- a/src/pentapy/core.py
+++ b/src/pentapy/core.py
@@ -8,6 +8,7 @@
 from typing import Literal
 
 import numpy as np
+import psutil
 
 from pentapy import _models as pmodels
 from pentapy import solver as psolver  # type: ignore
@@ -42,6 +43,7 @@ def solve(
         "umf",
         "umf_pack",
     ] = 1,
+    workers: int = 1,
 ) -> np.ndarray:
     """
     Solver for a pentadiagonal system.
@@ -91,6 +93,11 @@ def solve(
             * ``[5, "5", "spsolve_umf", "umf", "umf_pack"]`` : :func:`scipy.sparse.linalg.spsolve(..., use_umfpack=False)`
 
         Strings are not case-sensitive.
+    workers : :class:`int`, default=1
+        Number of workers used in the PTRANS-I and PTRANS-II solvers for parallel
+        processing of multiple right-hand sides. Parallelisation overhead can be
+        significant for small systems. If set to ``-1``, the number of workers is
+        automatically determined. Default: ``1``
 
     Returns
     -------
@@ -107,16 +114,18 @@ def solve(
         pmodels.PentaSolverAliases.PTRANS_I,
         pmodels.PentaSolverAliases.PTRANS_II,
     }:
+        # the matrix is checked and shifted if necessary ...
         if is_flat and index_row_wise:
             mat_flat = np.asarray(mat, dtype=np.double)
             ptools._check_penta(mat_flat)
         elif is_flat:
-            mat_flat = np.array(mat, dtype=np.double)
+            mat_flat = np.asarray(mat, dtype=np.double)
             ptools._check_penta(mat_flat)
             ptools.shift_banded(mat_flat, copy=False)
         else:
             mat_flat = ptools.create_banded(mat, col_wise=False, dtype=np.double)
 
+        # ... followed by the conversion of the right-hand side
         rhs = np.asarray(rhs, dtype=np.double)
 
         # Special case: Early exit when the matrix has only 3 rows/columns
@@ -128,6 +137,23 @@ def solve(
                 b=rhs,
             )
 
+        # now, the number of workers for multithreading has to be determined if
+        # necessary
+        # NOTE: the following does not count the number of total threads, but the number
+        #       of threads available for the solver
+        if workers < -1:
+            raise ValueError(
+                f"pentapy.solve: workers has to be -1 or greater, not {workers=}"
+            )
+
+        if workers == -1:
+            proc = psutil.Process()
+            workers = len(proc.cpu_affinity())  # type: ignore
+            del proc
+
+        elif workers == 0:
+            workers = 1
+
         # if there is only a single right-hand side, it has to be reshaped to a 2D array
         # NOTE: this has to be reverted at the end
         single_rhs = rhs.ndim == 1
@@ -143,7 +169,11 @@ def solve(
             )
 
             # if there was only a 1D right-hand side, the result has to be flattened
-            sol = solver_func(mat_flat, rhs)
+            sol = solver_func(
+                np.ascontiguousarray(mat_flat),
+                np.ascontiguousarray(rhs),
+                workers,
+            )
             if single_rhs:
                 sol = sol.ravel()
 
@@ -162,7 +192,7 @@ def solve(
             raise ValueError(msg) from imp_err
 
         if is_flat and index_row_wise:
-            mat_flat = np.array(mat)
+            mat_flat = np.asarray(mat)
             ptools._check_penta(mat_flat)
             ptools.shift_banded(mat_flat, col_to_row=False, copy=False)
         elif is_flat:
@@ -197,7 +227,7 @@ def solve(
             raise ValueError(msg) from imp_err
 
         if is_flat and index_row_wise:
-            mat_flat = np.array(mat)
+            mat_flat = np.asarray(mat)
             ptools._check_penta(mat_flat)
             ptools.shift_banded(mat_flat, col_to_row=False, copy=False)
         elif is_flat:
diff --git a/src/pentapy/solver.pxd b/src/pentapy/solver.pxd
index b16f8a0..4fe6c1e 100644
--- a/src/pentapy/solver.pxd
+++ b/src/pentapy/solver.pxd
@@ -1,4 +1,12 @@
 # cython: language_level=3
-cdef double[::, ::] c_penta_solver1(double[::, ::] mat_flat, double[::, ::] rhs)
+cdef double[::, ::1] c_penta_solver1(
+    double[::, ::1] mat_flat,
+    double[::, ::1] rhs,
+    int workers,
+)
 
-cdef double[::, ::] c_penta_solver2(double[::, ::] mat_flat, double[::, ::] rhs)
+cdef double[::, ::1] c_penta_solver2(
+    double[::, ::1] mat_flat,
+    double[::, ::1] rhs,
+    int workers,
+)
diff --git a/src/pentapy/solver.pyx b/src/pentapy/solver.pyx
index 2da01db..fc2a9a5 100644
--- a/src/pentapy/solver.pyx
+++ b/src/pentapy/solver.pyx
@@ -11,24 +11,50 @@ implemented in Cython.
 import numpy as np
 
 cimport numpy as np
+from cython.parallel import prange
 from libc.stdint cimport int64_t
 
 
+cdef enum: MAT_FACT_N_COLS = 5
+
 # === Main Python Interface ===
 
 
-def penta_solver1(double[::, ::] mat_flat, double[::, ::] rhs):
-    return np.asarray(c_penta_solver1(mat_flat, rhs))
+def penta_solver1(
+    double[::, ::1] mat_flat,
+    double[::, ::1] rhs,
+    int workers,
+):
+    return np.asarray(
+        c_penta_solver1(
+            mat_flat,
+            rhs,
+            workers,
+        )
+    )
 
 
-def penta_solver2(double[::, ::] mat_flat, double[::, ::] rhs):
-    return np.asarray(c_penta_solver2(mat_flat, rhs))
+def penta_solver2(
+    double[::, ::1] mat_flat,
+    double[::, ::1] rhs,
+    int workers,
+):
+    return np.asarray(
+        c_penta_solver2(
+            mat_flat,
+            rhs,
+            workers,
+        )
+    )
 
 
 # === Solver Algorithm 1 ===
 
-
-cdef double[::, ::] c_penta_solver1(double[::, ::] mat_flat, double[::, ::] rhs):
+cdef double[::, ::1] c_penta_solver1(
+    double[::, ::1] mat_flat,
+    double[::, ::1] rhs,
+    int workers,
+):
     """
     Solves the pentadiagonal system of equations ``Ax = b`` with the matrix ``A`` and
     the right-hand side ``b`` by
@@ -42,37 +68,43 @@ cdef double[::, ::] c_penta_solver1(double[::, ::] mat_flat, double[::, ::] rhs)
 
     # === Variable declarations ===
 
-    cdef int64_t mat_n_rows = mat_flat.shape[1]
+    cdef int64_t mat_n_cols = mat_flat.shape[1]
     cdef int64_t rhs_n_cols = rhs.shape[1]
     cdef int64_t iter_col
-    cdef double[::, ::1] result = np.empty(shape=(mat_n_rows, rhs_n_cols))
-    cdef double[::, ::1] mat_factorized = np.empty(shape=(mat_n_rows, 5))
+
+    cdef double[::, ::1] result = np.empty(shape=(mat_n_cols, rhs_n_cols))
+    cdef double[::, ::1] mat_factorized = np.empty(shape=(mat_n_cols, MAT_FACT_N_COLS))
 
     # === Solving the system of equations ===
 
     # first, the matrix is factorized
     c_penta_factorize_algo1(
-        mat_flat,
-        mat_n_rows,
-        mat_factorized,
+        &mat_flat[0, 0],
+        mat_n_cols,
+        &mat_factorized[0, 0],
     )
 
     # then, all the right-hand sides are solved
-    for iter_col in range(rhs_n_cols):
+    for iter_col in prange(
+        rhs_n_cols,
+        nogil=True,
+        num_threads=workers,
+    ):
         c_solve_penta_from_factorize_algo_1(
-            mat_n_rows,
-            mat_factorized,
-            rhs[::, iter_col],
-            result[::, iter_col],
+            mat_n_cols,
+            &mat_factorized[0, 0],
+            &rhs[0, iter_col],
+            rhs_n_cols,
+            &result[0, iter_col],
         )
 
     return result
 
 
 cdef void c_penta_factorize_algo1(
-    double[::, ::] mat_flat,
-    int64_t mat_n_rows,
-    double[::, ::1] mat_factorized,
+    double* mat_flat,
+    int64_t mat_n_cols,
+    double* mat_factorized,
 ):
     """
     Factorizes the pentadiagonal matrix ``A`` into
@@ -103,7 +135,11 @@ cdef void c_penta_factorize_algo1(
 
     # === Variable declarations ===
 
-    cdef int64_t iter_row
+    cdef int64_t iter_row, fact_curr_base_idx
+    cdef int64_t mat_row_base_idx_1 = mat_n_cols  # base index for the second row
+    cdef int64_t mat_row_base_idx_2 = 2 * mat_n_cols  # base index for the third row
+    cdef int64_t mat_row_base_idx_3 = 3 * mat_n_cols  # base index for the fourth row
+    cdef int64_t mat_row_base_idx_4 = 4 * mat_n_cols  # base index for the fifth row
     cdef double mu_i, ga_i, e_i  # mu, gamma, e
     cdef double al_i, al_i_minus_1, al_i_plus_1  # alpha
     cdef double be_i, be_i_minus_1, be_i_plus_1  # beta
@@ -111,80 +147,84 @@ cdef void c_penta_factorize_algo1(
     # === Factorization ===
 
     # First row
-    mu_i = mat_flat[2, 0]
-    al_i_minus_1 = mat_flat[1, 0] / mu_i
-    be_i_minus_1 = mat_flat[0, 0] / mu_i
+    mu_i = mat_flat[mat_row_base_idx_2]
+    al_i_minus_1 = mat_flat[mat_row_base_idx_1] / mu_i
+    be_i_minus_1 = mat_flat[0] / mu_i
 
-    mat_factorized[0, 0] = 0.0
-    mat_factorized[0, 1] = mu_i
-    mat_factorized[0, 2] = 0.0
-    mat_factorized[0, 3] = al_i_minus_1
-    mat_factorized[0, 4] = be_i_minus_1
+    mat_factorized[0] = 0.0
+    mat_factorized[1] = mu_i
+    mat_factorized[2] = 0.0
+    mat_factorized[3] = al_i_minus_1
+    mat_factorized[4] = be_i_minus_1
 
     # Second row
-    ga_i = mat_flat[3, 1]
-    mu_i = mat_flat[2, 1] - al_i_minus_1 * ga_i
-    al_i = (mat_flat[1, 1] - be_i_minus_1 * ga_i) / mu_i
-    be_i = mat_flat[0, 1] / mu_i
+    ga_i = mat_flat[mat_row_base_idx_3 + 1]
+    mu_i = mat_flat[mat_row_base_idx_2 + 1] - al_i_minus_1 * ga_i
+    al_i = (mat_flat[mat_row_base_idx_1 + 1] - be_i_minus_1 * ga_i) / mu_i
+    be_i = mat_flat[1] / mu_i
 
-    mat_factorized[1, 0] = 0.0
-    mat_factorized[1, 1] = mu_i
-    mat_factorized[1, 2] = ga_i
-    mat_factorized[1, 3] = al_i
-    mat_factorized[1, 4] = be_i
+    mat_factorized[5] = 0.0
+    mat_factorized[6] = mu_i
+    mat_factorized[7] = ga_i
+    mat_factorized[8] = al_i
+    mat_factorized[9] = be_i
 
     # Central rows
-    for iter_row in range(2, mat_n_rows - 2):
-        e_i = mat_flat[4, iter_row]
-        ga_i = mat_flat[3, iter_row] - al_i_minus_1 * e_i
-        mu_i = mat_flat[2, iter_row] - be_i_minus_1 * e_i - al_i * ga_i
+    fact_curr_base_idx = 10
+    for iter_row in range(2, mat_n_cols-2):
+        e_i = mat_flat[mat_row_base_idx_4 + iter_row]
+        ga_i = mat_flat[mat_row_base_idx_3 + iter_row] - al_i_minus_1 * e_i
+        mu_i = mat_flat[mat_row_base_idx_2 + iter_row] - be_i_minus_1 * e_i - al_i * ga_i
 
-        al_i_plus_1 = (mat_flat[1, iter_row] - be_i * ga_i) / mu_i
+        al_i_plus_1 = (mat_flat[mat_row_base_idx_1 + iter_row] - be_i * ga_i) / mu_i
         al_i_minus_1 = al_i
         al_i = al_i_plus_1
 
-        be_i_plus_1 = mat_flat[0, iter_row] / mu_i
+        be_i_plus_1 = mat_flat[iter_row] / mu_i
         be_i_minus_1 = be_i
         be_i = be_i_plus_1
 
-        mat_factorized[iter_row, 0] = e_i
-        mat_factorized[iter_row, 1] = mu_i
-        mat_factorized[iter_row, 2] = ga_i
-        mat_factorized[iter_row, 3] = al_i
-        mat_factorized[iter_row, 4] = be_i
+        mat_factorized[fact_curr_base_idx] = e_i
+        mat_factorized[fact_curr_base_idx + 1] = mu_i
+        mat_factorized[fact_curr_base_idx + 2] = ga_i
+        mat_factorized[fact_curr_base_idx + 3] = al_i
+        mat_factorized[fact_curr_base_idx + 4] = be_i
+
+        fact_curr_base_idx += MAT_FACT_N_COLS
 
     # Second to last row
-    e_i = mat_flat[4, mat_n_rows - 2]
-    ga_i = mat_flat[3, mat_n_rows - 2] - al_i_minus_1 * e_i
-    mu_i = mat_flat[2, mat_n_rows - 2] - be_i_minus_1 * e_i - al_i * ga_i
-    al_i_plus_1 = (mat_flat[1, mat_n_rows - 2] - be_i * ga_i) / mu_i
+    e_i = mat_flat[mat_row_base_idx_4 + mat_n_cols - 2]
+    ga_i = mat_flat[mat_row_base_idx_3 + mat_n_cols - 2] - al_i_minus_1 * e_i
+    mu_i = mat_flat[mat_row_base_idx_2 + mat_n_cols - 2] - be_i_minus_1 * e_i - al_i * ga_i
+    al_i_plus_1 = (mat_flat[mat_row_base_idx_1 + mat_n_cols - 2] - be_i * ga_i) / mu_i
 
-    mat_factorized[mat_n_rows - 2, 0] = e_i
-    mat_factorized[mat_n_rows - 2, 1] = mu_i
-    mat_factorized[mat_n_rows - 2, 2] = ga_i
-    mat_factorized[mat_n_rows - 2, 3] = al_i_plus_1
-    mat_factorized[mat_n_rows - 2, 4] = 0.0
+    mat_factorized[fact_curr_base_idx] = e_i
+    mat_factorized[fact_curr_base_idx + 1] = mu_i
+    mat_factorized[fact_curr_base_idx + 2] = ga_i
+    mat_factorized[fact_curr_base_idx + 3] = al_i_plus_1
+    mat_factorized[fact_curr_base_idx + 4] = 0.0
 
     # Last Row
-    e_i = mat_flat[4, mat_n_rows - 1]
-    ga_i = mat_flat[3, mat_n_rows - 1] - al_i * e_i
-    mu_i = mat_flat[2, mat_n_rows - 1] - be_i * e_i - al_i_plus_1 * ga_i
+    e_i = mat_flat[mat_row_base_idx_4 + mat_n_cols - 1]
+    ga_i = mat_flat[mat_row_base_idx_3 + mat_n_cols - 1] - al_i * e_i
+    mu_i = mat_flat[mat_row_base_idx_2 + mat_n_cols - 1] - be_i * e_i - al_i_plus_1 * ga_i
 
-    mat_factorized[mat_n_rows - 1, 0] = e_i
-    mat_factorized[mat_n_rows - 1, 1] = mu_i
-    mat_factorized[mat_n_rows - 1, 2] = ga_i
-    mat_factorized[mat_n_rows - 1, 3] = 0.0
-    mat_factorized[mat_n_rows - 1, 4] = 0.0
+    mat_factorized[fact_curr_base_idx + 5] = e_i
+    mat_factorized[fact_curr_base_idx + 6] = mu_i
+    mat_factorized[fact_curr_base_idx + 7] = ga_i
+    mat_factorized[fact_curr_base_idx + 8] = 0.0
+    mat_factorized[fact_curr_base_idx + 9] = 0.0
 
     return
 
 
-cdef void c_solve_penta_from_factorize_algo_1(
-    int64_t mat_n_rows,
-    double[::, ::1] mat_factorized,
-    double[::] rhs_single,
-    double[::] result_view,
-):
+cdef int c_solve_penta_from_factorize_algo_1(
+    int64_t mat_n_cols,
+    double* mat_factorized,
+    double* rhs_single,
+    int64_t rhs_n_cols,
+    double* result_view,
+) except * nogil:
     """
     Solves the pentadiagonal system of equations ``Ax = b`` with the factorized
     unit upper triangular matrix ``U`` and the right-hand side ``b``.
@@ -195,72 +235,84 @@ cdef void c_solve_penta_from_factorize_algo_1(
 
     # === Variable declarations ===
 
-    cdef int64_t iter_row
-    cdef double ze_i, ze_i_minus_1, ze_i_plus_1
+    cdef int64_t iter_row, fact_curr_base_idx, res_curr_base_idx
+    cdef double ze_i, ze_i_minus_1, ze_i_plus_1  # zeta
 
     # === Transformation ===
 
     # first, the right-hand side is transformed into the vector ``zeta``
     # First row
 
-    ze_i_minus_1 = rhs_single[0] / mat_factorized[0, 1]
+    ze_i_minus_1 = rhs_single[0] / mat_factorized[1]
     result_view[0] = ze_i_minus_1
 
     # Second row
-    ze_i = (rhs_single[1] - ze_i_minus_1 * mat_factorized[1, 2]) / mat_factorized[1, 1]
-    result_view[1] = ze_i
+    ze_i = (rhs_single[rhs_n_cols] - ze_i_minus_1 * mat_factorized[7]) / mat_factorized[6]
+    result_view[rhs_n_cols] = ze_i
 
     # Central rows
-    for iter_row in range(2, mat_n_rows - 2):
+    fact_curr_base_idx = 10
+    res_curr_base_idx = rhs_n_cols + rhs_n_cols
+
+    for iter_row in range(2, mat_n_cols-2):
         ze_i_plus_1 = (
-            rhs_single[iter_row]
-            - ze_i_minus_1 * mat_factorized[iter_row, 0]
-            - ze_i * mat_factorized[iter_row, 2]
-        ) / mat_factorized[iter_row, 1]
+            rhs_single[res_curr_base_idx]
+            - ze_i_minus_1 * mat_factorized[fact_curr_base_idx]
+            - ze_i * mat_factorized[fact_curr_base_idx + 2]
+        ) / mat_factorized[fact_curr_base_idx + 1]
         ze_i_minus_1 = ze_i
         ze_i = ze_i_plus_1
-        result_view[iter_row] = ze_i_plus_1
+        result_view[res_curr_base_idx] = ze_i_plus_1
+
+        fact_curr_base_idx += MAT_FACT_N_COLS
+        res_curr_base_idx += rhs_n_cols
 
     # Second to last row
     ze_i_plus_1 = (
-        rhs_single[mat_n_rows - 2]
-        - ze_i_minus_1 * mat_factorized[mat_n_rows - 2, 0]
-        - ze_i * mat_factorized[mat_n_rows - 2, 2]
-    ) / mat_factorized[mat_n_rows - 2, 1]
+        rhs_single[res_curr_base_idx]
+        - ze_i_minus_1 * mat_factorized[fact_curr_base_idx]
+        - ze_i * mat_factorized[fact_curr_base_idx + 2]
+    ) / mat_factorized[fact_curr_base_idx + 1]
     ze_i_minus_1 = ze_i
     ze_i = ze_i_plus_1
-    result_view[mat_n_rows - 2] = ze_i_plus_1
+    result_view[res_curr_base_idx] = ze_i_plus_1
 
     # Last row
     ze_i_plus_1 = (
-        rhs_single[mat_n_rows - 1]
-        - ze_i_minus_1 * mat_factorized[mat_n_rows - 1, 0]
-        - ze_i * mat_factorized[mat_n_rows - 1, 2]
-    ) / mat_factorized[mat_n_rows - 1, 1]
-    result_view[mat_n_rows - 1] = ze_i_plus_1
+        rhs_single[res_curr_base_idx + rhs_n_cols]
+        - ze_i_minus_1 * mat_factorized[fact_curr_base_idx + 5]
+        - ze_i * mat_factorized[fact_curr_base_idx + 7]
+    ) / mat_factorized[fact_curr_base_idx + 6]
+    result_view[res_curr_base_idx + rhs_n_cols] = ze_i_plus_1
 
     # === Backward substitution ===
 
     # The solution vector is calculated by backward substitution that overwrites the
     # right-hand side vector with the solution vector
-    ze_i -= mat_factorized[mat_n_rows - 2, 3] * ze_i_plus_1
-    result_view[mat_n_rows - 2] = ze_i
+    ze_i -= mat_factorized[fact_curr_base_idx + 3] * ze_i_plus_1
+    result_view[res_curr_base_idx] = ze_i
+
+    for iter_row in range(mat_n_cols-3, -1, -1):
+        fact_curr_base_idx -= MAT_FACT_N_COLS
+        res_curr_base_idx -= rhs_n_cols
 
-    for iter_row in range(mat_n_rows - 3, -1, -1):
-        result_view[iter_row] -= (
-            mat_factorized[iter_row, 3] * ze_i
-            + mat_factorized[iter_row, 4] * ze_i_plus_1
+        result_view[res_curr_base_idx] -= (
+            mat_factorized[fact_curr_base_idx + 3] * ze_i
+            + mat_factorized[fact_curr_base_idx + 4] * ze_i_plus_1
         )
         ze_i_plus_1 = ze_i
-        ze_i = result_view[iter_row]
-
-    return
+        ze_i = result_view[res_curr_base_idx]
 
+    return 0
 
 # === Solver Algorithm 2 ===
 
 
-cdef double[::, ::] c_penta_solver2(double[::, ::] mat_flat, double[::, ::] rhs):
+cdef double[::, ::1] c_penta_solver2(
+    double[::, ::1] mat_flat,
+    double[::, ::1] rhs,
+    int workers,
+):
     """
     Solves the pentadiagonal system of equations ``Ax = b`` with the matrix ``A`` and
     the right-hand side ``b`` by
@@ -272,36 +324,44 @@ cdef double[::, ::] c_penta_solver2(double[::, ::] mat_flat, double[::, ::] rhs)
 
     """
 
-    # Variable declarations
+    # === Variable declarations ===
 
-    cdef int64_t mat_n_rows = mat_flat.shape[1]
+    cdef int64_t mat_n_cols = mat_flat.shape[1]
     cdef int64_t rhs_n_cols = rhs.shape[1]
     cdef int64_t iter_col
-    cdef double[::, ::1] result = np.empty(shape=(mat_n_rows, rhs_n_cols))
-    cdef double[::, ::1] mat_factorized = np.empty(shape=(mat_n_rows, 5))
+
+    cdef double[::, ::1] result = np.empty(shape=(mat_n_cols, rhs_n_cols))
+    cdef double[::, ::1] mat_factorized = np.empty(shape=(mat_n_cols, 5))
+
+    # === Solving the system of equations ===
 
     # first, the matrix is factorized
     c_penta_factorize_algo2(
-        mat_flat,
-        mat_n_rows,
-        mat_factorized,
+        &mat_flat[0, 0],
+        mat_n_cols,
+        &mat_factorized[0, 0],
     )
 
     # then, all the right-hand sides are solved
-    for iter_col in range(rhs_n_cols):
+    for iter_col in prange(
+        rhs_n_cols,
+        nogil=True,
+        num_threads=workers,
+    ):
         c_solve_penta_from_factorize_algo_2(
-            mat_n_rows,
-            mat_factorized,
-            rhs[::, iter_col],
-            result[::, iter_col],
+            mat_n_cols,
+            &mat_factorized[0, 0],
+            &rhs[0, iter_col],
+            rhs_n_cols,
+            &result[0, iter_col],
         )
 
     return result
 
 cdef void c_penta_factorize_algo2(
-    double[::, ::] mat_flat,
-    int64_t mat_n_rows,
-    double[::, ::1] mat_factorized,
+    double* mat_flat,
+    int64_t mat_n_cols,
+    double* mat_factorized,
 ):
     """
     Factorizes the pentadiagonal matrix ``A`` into
@@ -333,7 +393,11 @@ cdef void c_penta_factorize_algo2(
 
     # === Variable declarations ===
 
-    cdef int64_t iter_row
+    cdef int64_t iter_row, fact_curr_base_idx
+    cdef int64_t mat_row_base_idx_1 = mat_n_cols  # base index for the second row
+    cdef int64_t mat_row_base_idx_2 = 2 * mat_n_cols  # base index for the third row
+    cdef int64_t mat_row_base_idx_3 = 3 * mat_n_cols  # base index for the fourth row
+    cdef int64_t mat_row_base_idx_4 = 4 * mat_n_cols  # base index for the fifth row
     cdef double ps_i, rho_i  # psi, rho
     cdef double si_i, si_i_minus_1, si_i_plus_1  # sigma
     cdef double phi_i, phi_i_minus_1, phi_i_plus_1  # phi
@@ -341,80 +405,86 @@ cdef void c_penta_factorize_algo2(
     # === Factorization ===
 
     # First row
-    ps_i = mat_flat[2, mat_n_rows - 1]
-    si_i_plus_1 = mat_flat[3, mat_n_rows - 1] / ps_i
-    phi_i_plus_1 = mat_flat[4, mat_n_rows - 1] / ps_i
 
-    mat_factorized[mat_n_rows - 1, 0] = phi_i_plus_1
-    mat_factorized[mat_n_rows - 1, 1] = si_i_plus_1
-    mat_factorized[mat_n_rows - 1, 2] = ps_i
-    mat_factorized[mat_n_rows - 1, 3] = 0.0
-    mat_factorized[mat_n_rows - 1, 4] = 0.0
+    ps_i = mat_flat[mat_row_base_idx_2 + mat_n_cols - 1]
+    si_i_plus_1 = mat_flat[mat_row_base_idx_3 + mat_n_cols - 1] / ps_i
+    phi_i_plus_1 = mat_flat[mat_row_base_idx_4 + mat_n_cols - 1] / ps_i
 
-    # Second row
-    rho_i = mat_flat[1, mat_n_rows-2]
-    ps_i = mat_flat[2, mat_n_rows-2] - si_i_plus_1 * rho_i
-    si_i = (mat_flat[3, mat_n_rows-2] - phi_i_plus_1 * rho_i) / ps_i
-    phi_i = mat_flat[4, mat_n_rows-2] / ps_i
+    fact_curr_base_idx = (mat_n_cols - 1) * MAT_FACT_N_COLS
+    mat_factorized[fact_curr_base_idx + 4] = 0.0
+    mat_factorized[fact_curr_base_idx + 3] = 0.0
+    mat_factorized[fact_curr_base_idx + 2] = ps_i
+    mat_factorized[fact_curr_base_idx + 1] = si_i_plus_1
+    mat_factorized[fact_curr_base_idx] = phi_i_plus_1
 
-    mat_factorized[mat_n_rows - 2, 0] = phi_i
-    mat_factorized[mat_n_rows - 2, 1] = si_i
-    mat_factorized[mat_n_rows - 2, 2] = ps_i
-    mat_factorized[mat_n_rows - 2, 3] = rho_i
-    mat_factorized[mat_n_rows - 2, 4] = 0.0
+    # Second row
+    rho_i = mat_flat[mat_row_base_idx_1 + mat_n_cols - 2]
+    ps_i = mat_flat[mat_row_base_idx_2 + mat_n_cols - 2] - si_i_plus_1 * rho_i
+    si_i = (mat_flat[mat_row_base_idx_3 + mat_n_cols - 2] - phi_i_plus_1 * rho_i) / ps_i
+    phi_i = mat_flat[mat_row_base_idx_4 + mat_n_cols - 2] / ps_i
+
+    fact_curr_base_idx -= MAT_FACT_N_COLS
+    mat_factorized[fact_curr_base_idx + 4] = 0.0
+    mat_factorized[fact_curr_base_idx + 3] = rho_i
+    mat_factorized[fact_curr_base_idx + 2] = ps_i
+    mat_factorized[fact_curr_base_idx + 1] = si_i
+    mat_factorized[fact_curr_base_idx] = phi_i
 
     # Central rows
-    for iter_row in range(mat_n_rows-3, 1, -1):
-        b_i = mat_flat[0, iter_row]
-        rho_i = mat_flat[1, iter_row] - si_i_plus_1 * b_i
-        ps_i = mat_flat[2, iter_row] - phi_i_plus_1 * b_i - si_i * rho_i
-        si_i_minus_1 = (mat_flat[3, iter_row] - phi_i * rho_i) / ps_i
+    for iter_row in range(mat_n_cols - 3, 1, -1):
+        b_i = mat_flat[iter_row]
+        rho_i = mat_flat[mat_row_base_idx_1 + iter_row] - si_i_plus_1 * b_i
+        ps_i = mat_flat[mat_row_base_idx_2 + iter_row] - phi_i_plus_1 * b_i - si_i * rho_i
+        si_i_minus_1 = (mat_flat[mat_row_base_idx_3 + iter_row] - phi_i * rho_i) / ps_i
         si_i_plus_1 = si_i
         si_i = si_i_minus_1
-        phi_i_minus_1 = mat_flat[4, iter_row] / ps_i
+        phi_i_minus_1 = mat_flat[mat_row_base_idx_4 + iter_row] / ps_i
         phi_i_plus_1 = phi_i
         phi_i = phi_i_minus_1
 
-        mat_factorized[iter_row, 0] = phi_i
-        mat_factorized[iter_row, 1] = si_i
-        mat_factorized[iter_row, 2] = ps_i
-        mat_factorized[iter_row, 3] = rho_i
-        mat_factorized[iter_row, 4] = b_i
+        fact_curr_base_idx -= MAT_FACT_N_COLS
+        mat_factorized[fact_curr_base_idx + 4] = b_i
+        mat_factorized[fact_curr_base_idx + 3] = rho_i
+        mat_factorized[fact_curr_base_idx + 2] = ps_i
+        mat_factorized[fact_curr_base_idx + 1] = si_i
+        mat_factorized[fact_curr_base_idx] = phi_i
 
     # Second to last row
-    b_i = mat_flat[0, 1]
-    rho_i = mat_flat[1, 1] - si_i_plus_1 * b_i
-    ps_i = mat_flat[2, 1] - phi_i_plus_1 * b_i - si_i * rho_i
-    si_i_minus_1 = (mat_flat[3, 1] - phi_i * rho_i) / ps_i
+    b_i = mat_flat[1]
+    rho_i = mat_flat[mat_row_base_idx_1 + 1] - si_i_plus_1 * b_i
+    ps_i = mat_flat[mat_row_base_idx_2 + 1] - phi_i_plus_1 * b_i - si_i * rho_i
+    si_i_minus_1 = (mat_flat[mat_row_base_idx_3 + 1] - phi_i * rho_i) / ps_i
     si_i_plus_1 = si_i
     si_i = si_i_minus_1
 
-    mat_factorized[1, 0] = 0.0
-    mat_factorized[1, 1] = si_i
-    mat_factorized[1, 2] = ps_i
-    mat_factorized[1, 3] = rho_i
-    mat_factorized[1, 4] = b_i
+    mat_factorized[9] = b_i
+    mat_factorized[8] = rho_i
+    mat_factorized[7] = ps_i
+    mat_factorized[6] = si_i
+    mat_factorized[5] = 0.0
 
     # Last row
-    b_i = mat_flat[0, 0]
-    rho_i = mat_flat[1, 0] - si_i_plus_1 * b_i
-    ps_i = mat_flat[2, 0] - phi_i * b_i - si_i * rho_i
+    b_i = mat_flat[0]
+    rho_i = mat_flat[mat_row_base_idx_1 + 0] - si_i_plus_1 * b_i
+    ps_i = mat_flat[mat_row_base_idx_2 + 0] - phi_i * b_i - si_i * rho_i
 
-    mat_factorized[0, 0] = 0.0
-    mat_factorized[0, 1] = 0.0
-    mat_factorized[0, 2] = ps_i
-    mat_factorized[0, 3] = rho_i
-    mat_factorized[0, 4] = b_i
+    mat_factorized[4] = b_i
+    mat_factorized[3] = rho_i
+    mat_factorized[2] = ps_i
+    mat_factorized[1] = 0.0
+    mat_factorized[0] = 0.0
 
     return
 
 
-cdef void c_solve_penta_from_factorize_algo_2(
-    int64_t mat_n_rows,
-    double[::, ::1] mat_factorized,
-    double[::] rhs_single,
-    double[::] result_view,
-):
+cdef int c_solve_penta_from_factorize_algo_2(
+    int64_t mat_n_cols,
+    double* mat_factorized,
+    double* rhs_single,
+    int64_t rhs_n_cols,
+    double* result_view,
+) except * nogil:
+
     """
     Solves the pentadiagonal system of equations ``Ax = b`` with the factorized
     unit lower triangular matrix ``L`` and the right-hand side ``b``.
@@ -425,66 +495,81 @@ cdef void c_solve_penta_from_factorize_algo_2(
 
     # === Variable declarations ===
 
-    cdef int64_t iter_row
+    cdef int64_t iter_row, fact_curr_base_idx, res_curr_base_idx
     cdef double om_i, om_i_minus_1, om_i_plus_1  # omega
 
     # === Transformation ===
 
     # first, the right-hand side is transformed into the vector ``omega``
     # First row
-    om_i_plus_1 = rhs_single[mat_n_rows-1] / mat_factorized[mat_n_rows - 1, 2]
-    result_view[mat_n_rows-1] = om_i_plus_1
+    fact_curr_base_idx = (mat_n_cols - 1) * MAT_FACT_N_COLS
+    res_curr_base_idx = (mat_n_cols - 1) * rhs_n_cols
+
+    om_i_plus_1 = rhs_single[res_curr_base_idx] / mat_factorized[fact_curr_base_idx + 2]
+    result_view[res_curr_base_idx] = om_i_plus_1
 
     # Second row
+    fact_curr_base_idx -= MAT_FACT_N_COLS
+    res_curr_base_idx -= rhs_n_cols
+
     om_i = (
-        rhs_single[mat_n_rows-2]
-        - om_i_plus_1 * mat_factorized[mat_n_rows - 2, 3]
-    ) / mat_factorized[mat_n_rows - 2, 2]
-    result_view[mat_n_rows-2] = om_i
+        rhs_single[res_curr_base_idx]
+        - om_i_plus_1 * mat_factorized[fact_curr_base_idx + 3]
+    ) / mat_factorized[fact_curr_base_idx + 2]
+    result_view[res_curr_base_idx] = om_i
 
     # Central rows
-    for iter_row in range(mat_n_rows-3, 1, -1):
+    for iter_row in range(mat_n_cols - 3, 1, -1):
+        fact_curr_base_idx -= MAT_FACT_N_COLS
+        res_curr_base_idx -= rhs_n_cols
+
         om_i_minus_1 = (
-            rhs_single[iter_row]
-            - om_i_plus_1 * mat_factorized[iter_row, 4]
-            - om_i * mat_factorized[iter_row, 3]
-        ) / mat_factorized[iter_row, 2]
+            rhs_single[res_curr_base_idx]
+            - om_i_plus_1 * mat_factorized[fact_curr_base_idx + 4]
+            - om_i * mat_factorized[fact_curr_base_idx + 3]
+        ) / mat_factorized[fact_curr_base_idx + 2]
         om_i_plus_1 = om_i
         om_i = om_i_minus_1
-        result_view[iter_row] = om_i
+        result_view[res_curr_base_idx] = om_i
 
     # Second to last row
+    fact_curr_base_idx -= MAT_FACT_N_COLS
+    res_curr_base_idx -= rhs_n_cols
+
     om_i_minus_1 = (
-        rhs_single[1]
-        - om_i_plus_1 * mat_factorized[1, 4]
-        - om_i * mat_factorized[1, 3]
-    ) / mat_factorized[1, 2]
+        rhs_single[res_curr_base_idx]
+        - om_i_plus_1 * mat_factorized[fact_curr_base_idx + 4]
+        - om_i * mat_factorized[fact_curr_base_idx + 3]
+    ) / mat_factorized[fact_curr_base_idx + 2]
     om_i_plus_1 = om_i
     om_i = om_i_minus_1
-    result_view[1] = om_i
+    result_view[res_curr_base_idx] = om_i
 
     # Last row
     om_i_minus_1 = (
         rhs_single[0]
-        - om_i_plus_1 * mat_factorized[0, 4]
-        - om_i * mat_factorized[0, 3]
-    ) / mat_factorized[0, 2]
+        - om_i_plus_1 * mat_factorized[4]
+        - om_i * mat_factorized[3]
+    ) / mat_factorized[2]
     result_view[0] = om_i_minus_1
 
     # === Forward substitution ===
 
     # The solution vector is calculated by forward substitution that overwrites the
     # right-hand side vector with the solution vector
-    om_i -= mat_factorized[1, 1] * om_i_minus_1
-    result_view[1] = om_i
-
-    for iter_row in range(2, mat_n_rows):
-        result_view[iter_row] = (
-            result_view[iter_row]
-            - mat_factorized[iter_row, 0] * om_i_minus_1
-            - mat_factorized[iter_row, 1] * om_i
+    om_i -= mat_factorized[fact_curr_base_idx + 1] * om_i_minus_1
+    result_view[res_curr_base_idx] = om_i
+
+    for iter_row in range(2, mat_n_cols):
+        fact_curr_base_idx += MAT_FACT_N_COLS
+        res_curr_base_idx += rhs_n_cols
+
+        result_view[res_curr_base_idx] = (
+            result_view[res_curr_base_idx]
+            - mat_factorized[fact_curr_base_idx] * om_i_minus_1
+            - mat_factorized[fact_curr_base_idx + 1] * om_i
         )
         om_i_minus_1 = om_i
-        om_i = result_view[iter_row]
+        om_i = result_view[res_curr_base_idx]
 
-    return
+    return 0

From 0cf9890e507a3ac887b6ed8a68ab737c9d1ed1b9 Mon Sep 17 00:00:00 2001
From: MothNik <nik.zoe@web.de>
Date: Sun, 9 Jun 2024 16:30:22 +0200
Subject: [PATCH 36/62] tests: [11] renamed tests for serial mode

---
 ...st_solvers_internal.py => test_solvers_internal_serial.py} | 4 ++++
 1 file changed, 4 insertions(+)
 rename tests/{test_solvers_internal.py => test_solvers_internal_serial.py} (98%)

diff --git a/tests/test_solvers_internal.py b/tests/test_solvers_internal_serial.py
similarity index 98%
rename from tests/test_solvers_internal.py
rename to tests/test_solvers_internal_serial.py
index cdc55fa..8b75964 100644
--- a/tests/test_solvers_internal.py
+++ b/tests/test_solvers_internal_serial.py
@@ -2,6 +2,8 @@
 Test suite for testing the pentadiagonal solver based on either Algorithm PTRANS-I or
 PTRANS-II.
 
+It tests them in SERIAL mode only.
+
 """
 
 # === Imports ===
@@ -132,6 +134,7 @@ def test_pentapy_solvers(
                 mat=mat,
                 rhs=rhs,
                 solver=solver_alias,  # type: ignore
+                workers=1,
                 **kwargs,
             )
             assert sol.shape == result_shape
@@ -144,6 +147,7 @@ def test_pentapy_solvers(
         mat=mat,
         rhs=rhs,
         solver=solver_alias,  # type: ignore
+        workers=1,
         **kwargs,
     )
     assert sol.shape == result_shape

From 9040bb7a4ff5e5f26f55e394f316cfa001c3a751 Mon Sep 17 00:00:00 2001
From: MothNik <nik.zoe@web.de>
Date: Sun, 9 Jun 2024 18:28:33 +0200
Subject: [PATCH 37/62] test: [11] reduced load on tests; transitioned to
 template-based approach; added tests for parallel solvers

---
 tests/templates.py                      | 176 ++++++++++++++++++++++++
 tests/test_solvers_external.py          |   4 +-
 tests/test_solvers_internal_parallel.py |  59 ++++++++
 tests/test_solvers_internal_serial.py   | 161 ++++------------------
 4 files changed, 264 insertions(+), 136 deletions(-)
 create mode 100644 tests/templates.py
 create mode 100644 tests/test_solvers_internal_parallel.py

diff --git a/tests/templates.py b/tests/templates.py
new file mode 100644
index 0000000..fe5b316
--- /dev/null
+++ b/tests/templates.py
@@ -0,0 +1,176 @@
+"""
+This test suite implements reusable templates for testing the pentadiagonal solver based
+on either Algorithm PTRANS-I or PTRANS-II.
+"""
+
+# === Imports ===
+
+from typing import Literal
+
+import numpy as np
+import pytest
+import util_funcs as uf
+
+import pentapy as pp
+
+# === Constants ===
+
+SEED = 19_031_977
+REF_WARNING_CONTENT = "not suitable for input-matrix."
+N_ROWS = [
+    3,  # important edge case
+    4,  # important edge case
+    5,  # important edge case
+    10,  # even
+    11,  # odd
+    50,  # even
+    51,  # odd
+    100,  # ...
+    101,
+    500,
+    501,
+    1_000,
+    1_001,
+    5_000,
+    5_001,
+]
+SOLVER_ALIASES_PTRANS_I = [1, "1", "pTrAnS-I"]
+SOLVER_ALIASES_PTRANS_II = [2, "2", "pTrAnS-Ii"]
+
+PARAM_DICT = {
+    "n_rows": N_ROWS,
+    "n_rhs": [None, 1, 10],
+    "input_layout": ["full", "banded_row_wise", "banded_col_wise"],
+    "solver_alias": SOLVER_ALIASES_PTRANS_I + SOLVER_ALIASES_PTRANS_II,
+    "induce_error": [False, True],
+    "from_order": ["C", "F"],
+    "workers": [1],
+}
+
+# === Templates ===
+
+
+def pentapy_solvers_template(
+    n_rows: int,
+    n_rhs: int,
+    input_layout: Literal["full", "banded_row_wise", "banded_col_wise"],
+    solver_alias: Literal[
+        1,
+        "1",
+        "PTRANS-I",
+        "pTrAnS-I",
+        2,
+        "2",
+        "PTRANS-II",
+        "pTrAnS-Ii",
+    ],
+    induce_error: bool,
+    from_order: Literal["C", "F"],
+    workers: int,
+) -> None:
+    """
+    Tests the pentadiagonal solver based on Algorithm PTRANS-I when starting from
+    different input layouts, number of right-hand sides, number of rows, and also
+    when inducing an error by making the first diagonal element zero.
+    It has to be ensured that the edge case of ``n_rows = 3`` is also covered.
+
+    """
+
+    # first, a random pentadiagonal matrix is generated
+    mat_full = uf.gen_conditioned_rand_penta_matrix_dense(
+        n_rows=n_rows,
+        seed=SEED,
+        ill_conditioned=False,
+    )
+
+    # an error is induced by setting the first or last diagonal element to zero
+    if induce_error:
+        # the induction of the error is only possible if the matrix does not have
+        # only 3 rows
+        if n_rows == 3:
+            pytest.skip(
+                "Only 3 rows, cannot induce error because this will not go into "
+                "PTRANS-I, but NumPy."
+            )
+
+        if solver_alias in SOLVER_ALIASES_PTRANS_I:
+            mat_full[0, 0] = 0.0
+        else:
+            mat_full[n_rows - 1, n_rows - 1] = 0.0
+
+    # the right-hand side is generated
+    np.random.seed(SEED)
+    if n_rhs is not None:
+        rhs = np.random.rand(n_rows, n_rhs)
+        result_shape = (n_rows, n_rhs)
+    else:
+        rhs = np.random.rand(n_rows)
+        result_shape = (n_rows,)
+
+    # the matrix is converted to the desired layout
+    if input_layout == "full":
+        mat = mat_full
+        kwargs = dict(is_flat=False)
+
+    elif input_layout == "banded_row_wise":
+        mat = pp.create_banded(mat_full, col_wise=False)
+        kwargs = dict(
+            is_flat=True,
+            index_row_wise=True,
+        )
+
+    elif input_layout == "banded_col_wise":
+        mat = pp.create_banded(mat_full, col_wise=True)
+        kwargs = dict(
+            is_flat=True,
+            index_row_wise=False,
+        )
+
+    else:
+        raise ValueError(f"Invalid input layout: {input_layout}")
+
+    # the matrix is converted to the desired order
+    if from_order == "C":
+        mat = np.ascontiguousarray(mat)
+        rhs = np.ascontiguousarray(rhs)
+    elif from_order == "F":
+        mat = np.asfortranarray(mat)
+        rhs = np.asfortranarray(rhs)
+    else:
+        raise ValueError(f"Invalid from order: {from_order=}")
+
+    # the solution is computed
+    # Case 1: in case of an error, a warning has to be issued and the result has to
+    # be NaN
+    if induce_error:
+        with pytest.warns(UserWarning, match=REF_WARNING_CONTENT):
+            sol = pp.solve(
+                mat=mat,
+                rhs=rhs,
+                solver=solver_alias,  # type: ignore
+                workers=workers,
+                **kwargs,
+            )
+            assert sol.shape == result_shape
+            assert np.isnan(sol).all()
+
+        return
+
+    # Case 2: in case of no error, the solution can be computed without any issues
+    sol = pp.solve(
+        mat=mat,
+        rhs=rhs,
+        solver=solver_alias,  # type: ignore
+        workers=workers,
+        **kwargs,
+    )
+    assert sol.shape == result_shape
+
+    # if no error was induced, the reference solution is computed with SciPy
+    sol_ref = uf.solve_penta_matrix_dense_scipy(
+        mat=mat_full,
+        rhs=rhs,
+    )
+
+    # the solutions are compared
+    assert np.allclose(sol, sol_ref)
diff --git a/tests/test_solvers_external.py b/tests/test_solvers_external.py
index 075dfb4..30c7b9a 100644
--- a/tests/test_solvers_external.py
+++ b/tests/test_solvers_external.py
@@ -29,8 +29,8 @@
     51,
 ]
 REF_WARNING_CONTENT = "singular"
-SOLVER_ALIASES_LAPACK = [3, "3", "lapack", "LaPaCk"]
-SOLVER_ALIASES_SPSOLVE = [4, "4", "spsolve", "SpSoLvE"]
+SOLVER_ALIASES_LAPACK = [3, "3", "LaPaCk"]
+SOLVER_ALIASES_SPSOLVE = [4, "4", "SpSoLvE"]
 
 # === Tests ===
 
diff --git a/tests/test_solvers_internal_parallel.py b/tests/test_solvers_internal_parallel.py
new file mode 100644
index 0000000..b8595b1
--- /dev/null
+++ b/tests/test_solvers_internal_parallel.py
@@ -0,0 +1,59 @@
+"""
+Test suite for testing the pentadiagonal solver based on either Algorithm PTRANS-I or
+PTRANS-II.
+
+It tests them in PARALLEL mode.
+
+"""
+
+# === Imports ===
+
+from copy import deepcopy
+from typing import Literal
+
+import pytest
+import templates
+
+# === Tests ===
+
+# the following series of decorators parametrize the tests for the pentadiagonal solver
+# based on either Algorithm PTRANS-I or PTRANS-II in parallel mode
+param_dict = deepcopy(templates.PARAM_DICT)
+param_dict["from_order"] = ["C"]
+param_dict["workers"] = [-1]
+
+
+def test_pentapy_solvers_parallel(
+    n_rows: int,
+    n_rhs: int,
+    input_layout: Literal["full", "banded_row_wise", "banded_col_wise"],
+    solver_alias: Literal[
+        1,
+        "1",
+        "PTRANS-I",
+        "pTrAnS-I",
+        2,
+        "2",
+        "PTRANS-II",
+        "pTrAnS-Ii",
+    ],
+    induce_error: bool,
+    from_order: Literal["C", "F"],
+    workers: int,
+) -> None:
+
+    templates.pentapy_solvers_template(
+        n_rows=n_rows,
+        n_rhs=n_rhs,
+        input_layout=input_layout,
+        solver_alias=solver_alias,
+        induce_error=induce_error,
+        from_order=from_order,
+        workers=workers,
+    )
+
+
+for key, value in param_dict.items():
+    test_pentapy_solvers_parallel = pytest.mark.parametrize(key, value)(
+        test_pentapy_solvers_parallel
+    )
diff --git a/tests/test_solvers_internal_serial.py b/tests/test_solvers_internal_serial.py
index 8b75964..6bed962 100644
--- a/tests/test_solvers_internal_serial.py
+++ b/tests/test_solvers_internal_serial.py
@@ -10,153 +10,46 @@
 
 from typing import Literal
 
-import numpy as np
-import pentapy as pp
 import pytest
-import util_funcs as uf
-
-# === Constants ===
-
-SEED = 19_031_977
-N_ROWS = [
-    3,
-    4,
-    5,
-    10,
-    11,
-    25,
-    26,
-    50,
-    51,
-    100,
-    101,
-    250,
-    251,
-    500,
-    501,
-    1_000,
-    1_001,
-    2500,
-    2501,
-    5_000,
-    5_001,
-    10_000,
-    10_001,
-]
-REF_WARNING_CONTENT = "not suitable for input-matrix."
-SOLVER_ALIASES_PTRANS_I = [1, "1", "PTRANS-I", "ptrans-i"]
-SOLVER_ALIASES_PTRANS_II = [2, "2", "PTRANS-II", "ptrans-ii"]
+import templates
 
 # === Tests ===
 
+# the following series of decorators parametrize the tests for the pentadiagonal solver
+# based on either Algorithm PTRANS-I or PTRANS-II in serial mode
 
-@pytest.mark.parametrize("induce_error", [False, True])
-@pytest.mark.parametrize(
-    "solver_alias", SOLVER_ALIASES_PTRANS_I + SOLVER_ALIASES_PTRANS_II
-)
-@pytest.mark.parametrize("input_layout", ["full", "banded_row_wise", "banded_col_wise"])
-@pytest.mark.parametrize("n_rhs", [None, 1, 10])
-@pytest.mark.parametrize("n_rows", N_ROWS)
-def test_pentapy_solvers(
+
+def test_pentapy_solvers_serial(
     n_rows: int,
     n_rhs: int,
     input_layout: Literal["full", "banded_row_wise", "banded_col_wise"],
-    solver_alias: Literal[1, "1", "PTRANS-I"],
+    solver_alias: Literal[
+        1,
+        "1",
+        "PTRANS-I",
+        "pTrAnS-I",
+        2,
+        "2",
+        "PTRANS-II",
+        "pTrAnS-Ii",
+    ],
     induce_error: bool,
+    from_order: Literal["C", "F"],
+    workers: int,
 ) -> None:
-    """
-    Tests the pentadiagonal solver based on Algorithm PTRANS-I when starting from
-    different input layouts, number of right-hand sides, number of rows, and also
-    when inducing an error by making the first diagonal element zero.
-    It has to be ensured that the edge case of ``n_rows = 3`` is also covered.
-
-    """
 
-    # first, a random pentadiagonal matrix is generated
-    mat_full = uf.gen_conditioned_rand_penta_matrix_dense(
+    templates.pentapy_solvers_template(
         n_rows=n_rows,
-        seed=SEED,
-        ill_conditioned=False,
+        n_rhs=n_rhs,
+        input_layout=input_layout,
+        solver_alias=solver_alias,
+        induce_error=induce_error,
+        from_order=from_order,
+        workers=workers,
     )
 
-    # an error is induced by setting the first or last diagonal element to zero
-    if induce_error:
-        # the induction of the error is only possible if the matrix does not have
-        # only 3 rows
-        if n_rows == 3:
-            pytest.skip(
-                "Only 3 rows, cannot induce error because this will not go into "
-                "PTRANS-I, but NumPy."
-            )
-
-        if solver_alias in SOLVER_ALIASES_PTRANS_I:
-            mat_full[0, 0] = 0.0
-        else:
-            mat_full[n_rows - 1, n_rows - 1] = 0.0
-
-    # the right-hand side is generated
-    np.random.seed(SEED)
-    if n_rhs is not None:
-        rhs = np.random.rand(n_rows, n_rhs)
-        result_shape = (n_rows, n_rhs)
-    else:
-        rhs = np.random.rand(n_rows)
-        result_shape = (n_rows,)
-
-    # the matrix is converted to the desired layout
-    if input_layout == "full":
-        mat = mat_full
-        kwargs = dict(is_flat=False)
-
-    elif input_layout == "banded_row_wise":
-        mat = pp.create_banded(mat_full, col_wise=False)
-        kwargs = dict(
-            is_flat=True,
-            index_row_wise=True,
-        )
 
-    elif input_layout == "banded_col_wise":
-        mat = pp.create_banded(mat_full, col_wise=True)
-        kwargs = dict(
-            is_flat=True,
-            index_row_wise=False,
-        )
-
-    else:
-        raise ValueError(f"Invalid input layout: {input_layout}")
-
-    # the solution is computed
-    # Case 1: in case of an error, a warning has to be issued and the result has to
-    # be NaN
-    if induce_error:
-        with pytest.warns(UserWarning, match=REF_WARNING_CONTENT):
-            sol = pp.solve(
-                mat=mat,
-                rhs=rhs,
-                solver=solver_alias,  # type: ignore
-                workers=1,
-                **kwargs,
-            )
-            assert sol.shape == result_shape
-            assert np.isnan(sol).all()
-
-        return
-
-    # Case 2: in case of no error, the solution can be computed without any issues
-    sol = pp.solve(
-        mat=mat,
-        rhs=rhs,
-        solver=solver_alias,  # type: ignore
-        workers=1,
-        **kwargs,
+for key, value in templates.PARAM_DICT.items():
+    test_pentapy_solvers_serial = pytest.mark.parametrize(key, value)(
+        test_pentapy_solvers_serial
     )
-    assert sol.shape == result_shape
-
-    # if no error was induced, the reference solution is computed with SciPy
-    sol_ref = uf.solve_penta_matrix_dense_scipy(
-        mat=mat_full,
-        rhs=rhs,
-    )
-
-    # the solutions are compared
-    assert np.allclose(sol, sol_ref)

From a0b33883290d1580ab12a8d5e019cb6753ab7242 Mon Sep 17 00:00:00 2001
From: MothNik <nik.zoe@web.de>
Date: Sun, 9 Jun 2024 18:31:31 +0200
Subject: [PATCH 38/62] doc/style: [11] fully typed `tools`; improved
 documentation

---
 src/pentapy/tools.py | 154 ++++++++++++++++++++++++++++++-------------
 1 file changed, 107 insertions(+), 47 deletions(-)

diff --git a/src/pentapy/tools.py b/src/pentapy/tools.py
index 4bb0b52..0e446b5 100644
--- a/src/pentapy/tools.py
+++ b/src/pentapy/tools.py
@@ -14,12 +14,21 @@
    create_full
 """
 
-# pylint: disable=C0103
+# === Imports ===
+
+from typing import Optional, Tuple, Type
+
 import numpy as np
 
+# === Functions ===
 
-def diag_indices(n, offset=0):
+
+def diag_indices(
+    n: int,
+    offset: int = 0,
+) -> Tuple[np.ndarray, np.ndarray]:
     """
+
     Get indices for the main or minor diagonals of a matrix.
 
     This returns a tuple of indices that can be used to access the main
@@ -28,17 +37,17 @@ def diag_indices(n, offset=0):
 
     Parameters
     ----------
-    n : int
+    n : :class:`int`
       The size, along each dimension, of the arrays for which the returned
       indices can be used.
-    offset : int, optional
-      The diagonal offset.
+    offset : :class:`int`, default=0
+      The diagonal offset. Default: 0
 
     Returns
     -------
-    idx : :class:`numpy.ndarray`
+    idx : :class:`numpy.ndarray` of shape (n - abs(offset),)
         row indices
-    idy : :class:`numpy.ndarray`
+    idy : :class:`numpy.ndarray` of shape (n - abs(offset),)
         col indices
 
     """
@@ -47,8 +56,15 @@ def diag_indices(n, offset=0):
     return idx, idy
 
 
-def shift_banded(mat, up=2, low=2, col_to_row=True, copy=True):
-    """Shift rows of a banded matrix.
+def shift_banded(
+    mat: np.ndarray,
+    up: int = 2,
+    low: int = 2,
+    col_to_row: bool = True,
+    copy: bool = True,
+) -> np.ndarray:
+    """
+    Shift rows of a banded matrix.
 
     Either from column-wise to row-wise storage or vice versa.
 
@@ -83,27 +99,33 @@ def shift_banded(mat, up=2, low=2, col_to_row=True, copy=True):
 
     Parameters
     ----------
-    mat : :class:`numpy.ndarray`
+    mat : :class:`numpy.ndarray` of shape (5, n)
         The Matrix or the flattened Version of the pentadiagonal matrix.
-    up : :class:`int`
+    up : :class:`int`, default=2
         The number of upper minor-diagonals. Default: 2
-    low : :class:`int`
+    low : :class:`int`, default=2
         The number of lower minor-diagonals. Default: 2
-    col_to_row : :class:`bool`, optional
+    col_to_row : :class:`bool`, default=``True``
         Shift from column-wise to row-wise storage or vice versa.
         Default: ``True``
-    copy : :class:`bool`, optional
+    copy : :class:`bool`, default=``True``
         Copy the input matrix or overwrite it. Default: ``True``
 
     Returns
     -------
-    :class:`numpy.ndarray`
+    :class:`numpy.ndarray` of shape (5, n)
         Shifted banded matrix
+
     """
+
+    # first, the matrix is copied if required
     if copy:
         mat_flat = np.copy(mat)
     else:
         mat_flat = mat
+
+    # then, the shifting is performed
+    # Case 1: Column-wise to row-wise
     if col_to_row:
         for i in range(up):
             mat_flat[i, : -(up - i)] = mat_flat[i, (up - i) :]
@@ -111,18 +133,29 @@ def shift_banded(mat, up=2, low=2, col_to_row=True, copy=True):
         for i in range(low):
             mat_flat[-i - 1, (low - i) :] = mat_flat[-i - 1, : -(low - i)]
             mat_flat[-i - 1, : (low - i)] = 0
-    else:
-        for i in range(up):
-            mat_flat[i, (up - i) :] = mat_flat[i, : -(up - i)]
-            mat_flat[i, : (up - i)] = 0
-        for i in range(low):
-            mat_flat[-i - 1, : -(low - i)] = mat_flat[-i - 1, (low - i) :]
-            mat_flat[-i - 1, -(low - i) :] = 0
+
+        return mat_flat
+
+    # Case 2: Row-wise to column-wise
+    for i in range(up):
+        mat_flat[i, (up - i) :] = mat_flat[i, : -(up - i)]
+        mat_flat[i, : (up - i)] = 0
+    for i in range(low):
+        mat_flat[-i - 1, : -(low - i)] = mat_flat[-i - 1, (low - i) :]
+        mat_flat[-i - 1, -(low - i) :] = 0
+
     return mat_flat
 
 
-def create_banded(mat, up=2, low=2, col_wise=True, dtype=None):
-    """Create a banded matrix from a given quadratic Matrix.
+def create_banded(
+    mat: np.ndarray,
+    up: int = 2,
+    low: int = 2,
+    col_wise: bool = True,
+    dtype: Optional[Type] = None,
+) -> np.ndarray:
+    """
+    Create a banded matrix from a given square Matrix.
 
     The Matrix will to be returned as a flattened matrix.
     Either in a column-wise flattened form::
@@ -155,21 +188,27 @@ def create_banded(mat, up=2, low=2, col_wise=True, dtype=None):
 
     Parameters
     ----------
-    mat : :class:`numpy.ndarray`
+    mat : :class:`numpy.ndarray` of shape (n, n)
         The full (n x n) Matrix.
-    up : :class:`int`
+    up : :class:`int`, default=2
         The number of upper minor-diagonals. Default: 2
-    low : :class:`int`
+    low : :class:`int`, default=2
         The number of lower minor-diagonals. Default: 2
-    col_wise : :class:`bool`, optional
+    col_wise : :class:`bool`, default=``True``
         Use column-wise storage. If False, use row-wise storage.
         Default: ``True``
+    dtype : :class:`type` or ``None``, default=``None``
+        The data type of the returned matrix. If ``None``, the data type of the
+        input matrix is preserved. Default: ``None``
 
     Returns
     -------
-    :class:`numpy.ndarray`
+    :class:`numpy.ndarray` of shape (5, n)
         Banded matrix
+
     """
+
+    # first, the matrix is checked
     mat = np.asanyarray(mat)
     if mat.ndim != 2:
         msg = f"create_banded: matrix has to be 2D, got {mat.ndim}D"
@@ -182,24 +221,36 @@ def create_banded(mat, up=2, low=2, col_wise=True, dtype=None):
         )
         raise ValueError(msg)
 
+    # then, the matrix is created
+    dtype = mat.dtype if dtype is None else dtype
     size = mat.shape[0]
-    mat_flat = np.zeros((5, size), dtype=dtype)
+    mat_flat = np.zeros(shape=(5, size), dtype=dtype)
     mat_flat[up, :] = mat.diagonal()
 
+    # Case 1: Column-wise storage
     if col_wise:
         for i in range(up):
             mat_flat[i, (up - i) :] = mat.diagonal(up - i)
         for i in range(low):
             mat_flat[-i - 1, : -(low - i)] = mat.diagonal(-(low - i))
-    else:
-        for i in range(up):
-            mat_flat[i, : -(up - i)] = mat.diagonal(up - i)
-        for i in range(low):
-            mat_flat[-i - 1, (low - i) :] = mat.diagonal(-(low - i))
+
+        return mat_flat
+
+    # Case 2: Row-wise storage
+    for i in range(up):
+        mat_flat[i, : -(up - i)] = mat.diagonal(up - i)
+    for i in range(low):
+        mat_flat[-i - 1, (low - i) :] = mat.diagonal(-(low - i))
+
     return mat_flat
 
 
-def create_full(mat, up=2, low=2, col_wise=True):
+def create_full(
+    mat: np.ndarray,
+    up: int = 2,
+    low: int = 2,
+    col_wise: bool = True,
+) -> np.ndarray:
     """Create a (n x n) Matrix from a given banded matrix.
 
     The given Matrix has to be a flattened matrix.
@@ -233,21 +284,24 @@ def create_full(mat, up=2, low=2, col_wise=True):
 
     Parameters
     ----------
-    mat : :class:`numpy.ndarray`
+    mat : :class:`numpy.ndarray` of shape (5, n)
         The flattened Matrix.
-    up : :class:`int`
+    up : :class:`int`, default=2
         The number of upper minor-diagonals. Default: 2
-    low : :class:`int`
+    low : :class:`int`, default=2
         The number of lower minor-diagonals. Default: 2
-    col_wise : :class:`bool`, optional
+    col_wise : :class:`bool`, default=``True``
         Input is in column-wise storage. If False, use as row-wise storage.
         Default: ``True``
 
     Returns
     -------
-    :class:`numpy.ndarray`
+    :class:`numpy.ndarray` of shape (n, n)
         Full matrix.
+
     """
+
+    # first, the matrix is checked
     mat = np.asanyarray(mat)
     if mat.ndim != 2:
         msg = f"create_full: matrix has to be 2D, got {mat.ndim}D"
@@ -267,23 +321,29 @@ def create_full(mat, up=2, low=2, col_wise=True):
         )
         raise ValueError(msg)
 
+    # then, the matrix is created
     size = mat.shape[1]
     mat_full = np.diag(mat[up])
+
+    # Case 1: Column-wise storage
     if col_wise:
         for i in range(up):
             mat_full[diag_indices(size, up - i)] = mat[i, (up - i) :]
         for i in range(low):
             mat_full[diag_indices(size, -(low - i))] = mat[-i - 1, : -(low - i)]
-    else:
-        for i in range(up):
-            mat_full[diag_indices(size, up - i)] = mat[i, : -(up - i)]
-        for i in range(low):
-            mat_full[diag_indices(size, -(low - i))] = mat[-i - 1, (low - i) :]
+
+        return mat_full
+
+    # Case 2: Row-wise storage
+    for i in range(up):
+        mat_full[diag_indices(size, up - i)] = mat[i, : -(up - i)]
+    for i in range(low):
+        mat_full[diag_indices(size, -(low - i))] = mat[-i - 1, (low - i) :]
 
     return mat_full
 
 
-def _check_penta(mat):
+def _check_penta(mat: np.ndarray) -> None:
     if mat.ndim != 2:
         msg = f"pentapy: matrix has to be 2D, got {mat.ndim}D"
         raise ValueError(msg)

From b93102baba32f5d1d14282ac8f7dddf76cf3572d Mon Sep 17 00:00:00 2001
From: MothNik <nik.zoe@web.de>
Date: Sun, 9 Jun 2024 18:32:03 +0200
Subject: [PATCH 39/62] package: [11] included build information for
 parallelized solvers

---
 setup.py | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/setup.py b/setup.py
index fc8648c..8d7421d 100644
--- a/setup.py
+++ b/setup.py
@@ -1,12 +1,18 @@
 """pentapy: A toolbox for pentadiagonal matrices."""
 
 import os
+import sys
 
 import Cython.Compiler.Options
 import numpy as np
 from Cython.Build import cythonize
 from setuptools import Extension, setup
 
+if sys.platform.startswith("win"):
+    openmp_arg = "/openmp"
+else:
+    openmp_arg = "-fopenmp"
+
 Cython.Compiler.Options.annotate = True
 
 # cython extensions
@@ -16,6 +22,8 @@
         sources=[os.path.join("src", "pentapy", "solver.pyx")],
         include_dirs=[np.get_include()],
         define_macros=[("NPY_NO_DEPRECATED_API", "NPY_1_7_API_VERSION")],
+        extra_compile_args=[openmp_arg],
+        extra_link_args=[openmp_arg],
     )
 ]
 

From 3b015dc7615756b03796f791b79fbfefbe0f816b Mon Sep 17 00:00:00 2001
From: MothNik <nik.zoe@web.de>
Date: Sun, 9 Jun 2024 18:55:37 +0200
Subject: [PATCH 40/62] tests: [11] finalised parallel solver tests

---
 tests/test_solvers_internal_parallel.py | 34 ++++++++++++++++++++++++-
 1 file changed, 33 insertions(+), 1 deletion(-)

diff --git a/tests/test_solvers_internal_parallel.py b/tests/test_solvers_internal_parallel.py
index b8595b1..d091d8d 100644
--- a/tests/test_solvers_internal_parallel.py
+++ b/tests/test_solvers_internal_parallel.py
@@ -9,7 +9,7 @@
 # === Imports ===
 
 from copy import deepcopy
-from typing import Literal
+from typing import Literal, Optional, Type
 
 import pytest
 import templates
@@ -57,3 +57,35 @@ def test_pentapy_solvers_parallel(
     test_pentapy_solvers_parallel = pytest.mark.parametrize(key, value)(
         test_pentapy_solvers_parallel
     )
+
+
+@pytest.mark.parametrize(
+    "workers, expected", [(0, None), (1, None), (-1, None), (-2, ValueError)]
+)
+def test_pentapy_solvers_parallel_different_workers(
+    workers: int, expected: Optional[Type[Exception]]
+) -> None:
+    """
+    Tests the parallel solver with different number of workers, which might be wrong.
+
+    """
+
+    kwargs = dict(
+        n_rows=10,
+        n_rhs=1,
+        input_layout="full",
+        solver_alias=1,
+        induce_error=False,
+        from_order="C",
+        workers=workers,
+    )
+
+    # Case 1: the test should fail
+    if expected is not None:
+        with pytest.raises(expected):
+            templates.pentapy_solvers_template(**kwargs)  # type: ignore
+
+        return
+
+    # Case 2: the test should pass
+    templates.pentapy_solvers_template(**kwargs)  # type: ignore

From 5e86a349e011bd19ce55443a0dd55bfd4b69a2b0 Mon Sep 17 00:00:00 2001
From: MothNik <nik.zoe@web.de>
Date: Sun, 9 Jun 2024 19:15:21 +0200
Subject: [PATCH 41/62] tests/fix: [11] fixed inter-os-incompatibility of
 doctests

---
 tests/util_funcs.py | 27 +++++++++++++++------------
 1 file changed, 15 insertions(+), 12 deletions(-)

diff --git a/tests/util_funcs.py b/tests/util_funcs.py
index be9f3c1..f9d9074 100644
--- a/tests/util_funcs.py
+++ b/tests/util_funcs.py
@@ -196,6 +196,9 @@ def gen_conditioned_rand_penta_matrix_dense(
 
     Doctests
     --------
+    >>> # Imports
+    >>> from math import log10
+
     >>> # 1) Generating a super small well-conditioned random pentadiagonal matrix
     >>> n_rows = 3
     >>> seed = 19_031_977
@@ -215,8 +218,8 @@ def gen_conditioned_rand_penta_matrix_dense(
     >>> spla.bandwidth(mat)
     (2, 2)
     >>> # its condition number is computed and values below 1e10 can be considered good
-    >>> np.linalg.cond(mat)
-    4.976880305142543
+    >>> round(np.linalg.cond(mat), 2)
+    4.98
 
     >>> # 2) Generating a super small ill-conditioned random pentadiagonal matrix
     >>> mat = gen_conditioned_rand_penta_matrix_dense(
@@ -235,8 +238,8 @@ def gen_conditioned_rand_penta_matrix_dense(
     (2, 2)
     >>> # its condition number is computed and its value should be close to the
     >>> # reciprocal floating point precision, i.e., ~1e16
-    >>> np.linalg.cond(mat)
-    1.493156437173682e+17
+    >>> round(log10(np.linalg.cond(mat)), 2)
+    17.17
 
     >>> # 3) Generating a small well-conditioned random pentadiagonal matrix
     >>> n_rows = 7
@@ -260,8 +263,8 @@ def gen_conditioned_rand_penta_matrix_dense(
     >>> spla.bandwidth(mat)
     (2, 2)
     >>> # its condition number is computed and values below 1e10 can be considered good
-    >>> np.linalg.cond(mat)
-    42.4847446467131
+    >>> round(np.linalg.cond(mat), 2)
+    42.48
 
     >>> # 4) Generating a small ill-conditioned random pentadiagonal matrix
     >>> mat = gen_conditioned_rand_penta_matrix_dense(
@@ -284,8 +287,8 @@ def gen_conditioned_rand_penta_matrix_dense(
     (2, 2)
     >>> # its condition number is computed and its value should be close to the
     >>> # reciprocal floating point precision, i.e., ~1e16
-    >>> np.linalg.cond(mat)
-    1.1079218802103074e+17
+    >>> round(log10(np.linalg.cond(mat)), 2)
+    17.04
 
     >>> # 5) Generating a large well-conditioned random pentadiagonal matrix
     >>> n_rows = 1_000
@@ -301,8 +304,8 @@ def gen_conditioned_rand_penta_matrix_dense(
     >>> spla.bandwidth(mat)
     (2, 2)
     >>> # its condition number is computed and values below 1e10 can be considered good
-    >>> np.linalg.cond(mat)
-    9570.995402466417
+    >>> round(np.linalg.cond(mat), 2)
+    9571.0
 
     >>> # 6) Generating a large ill-conditioned random pentadiagonal matrix
     >>> mat = gen_conditioned_rand_penta_matrix_dense(
@@ -317,8 +320,8 @@ def gen_conditioned_rand_penta_matrix_dense(
     (2, 2)
     >>> # its condition number is computed and its value should be close to the
     >>> # reciprocal floating point precision, i.e., ~1e16
-    >>> np.linalg.cond(mat)
-    1.7137059583101745e+19
+    >>> round(log10(np.linalg.cond(mat)), 2)
+    19.23
 
     """
 

From 712b0c78d049e3810f6424ed7f2884c739f0e577 Mon Sep 17 00:00:00 2001
From: MothNik <nik.zoe@web.de>
Date: Sun, 9 Jun 2024 19:22:53 +0200
Subject: [PATCH 42/62] test/fix: [11] ? really fixed the inter-os-problems ?

---
 tests/util_funcs.py | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/tests/util_funcs.py b/tests/util_funcs.py
index f9d9074..0ea1582 100644
--- a/tests/util_funcs.py
+++ b/tests/util_funcs.py
@@ -320,8 +320,10 @@ def gen_conditioned_rand_penta_matrix_dense(
     (2, 2)
     >>> # its condition number is computed and its value should be close to the
     >>> # reciprocal floating point precision, i.e., ~1e16
-    >>> round(log10(np.linalg.cond(mat)), 2)
-    19.23
+    >>> # NOTE: the next number will be so big that it will be different on each OS
+    >>> #       so it will only be checked if it is greater than 1e16
+    >>> round(log10(np.linalg.cond(mat)), 2) >= 16
+    True
 
     """
 

From 6fd57d4d2840cd2f5b5642f9576d351c570b34cf Mon Sep 17 00:00:00 2001
From: MothNik <nik.zoe@web.de>
Date: Sun, 9 Jun 2024 19:37:59 +0200
Subject: [PATCH 43/62] feat: [11] updated changelog

---
 CHANGELOG.md | 11 +++++++++++
 1 file changed, 11 insertions(+)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index d13b435..5891ee6 100755
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -2,6 +2,16 @@
 
 All notable changes to **pentapy** will be documented in this file.
 
+## [2.0.0] - 2024-06
+
+See [#27](https://github.com/GeoStat-Framework/pentapy/pull/27)
+
+### Breaking Changes
+
+- fully parallelized the Cython implementation of PTRANS-I and PTRANS-II for single and multiple right-hand sides support that can now be enabled via the new ``workers`` parameter in ``pentapy.solve`` (default: 1)
+- fully typed the ``pentapy.tools`` module
+- updated the **Cython low level interfaces** to PTRANS-I and PTRANS-II to **only accept C-contiguous arrays** (not backwards compatible)
+
 ## [1.4.0] - 2024-06
 
 See [#26](https://github.com/GeoStat-Framework/pentapy/pull/26)
@@ -133,6 +143,7 @@ This is the first release of pentapy, a python toolbox for solving pentadiagonal
 The solver is implemented in cython, which makes it really fast.
 
 
+[2.0.0]: https://github.com/GeoStat-Framework/pentapy/compare/v1.4.0...v2.0.0
 [1.4.0]: https://github.com/GeoStat-Framework/pentapy/compare/v1.3.0...v1.4.0
 [1.3.0]: https://github.com/GeoStat-Framework/pentapy/compare/v1.2.0...v1.3.0
 [1.2.0]: https://github.com/GeoStat-Framework/pentapy/compare/v1.1.2...v1.2.0

From 88e30e6efd697af0ee4e8b93db22a909ed335f98 Mon Sep 17 00:00:00 2001
From: MothNik <nik.zoe@web.de>
Date: Sun, 9 Jun 2024 21:08:34 +0200
Subject: [PATCH 44/62] fix/doc: [11] reverted change that caused overwrite of
 `mat`; added a fucking comment; augmented tests to cover this error

---
 src/pentapy/core.py            | 6 +++---
 tests/templates.py             | 4 ++++
 tests/test_solvers_external.py | 7 ++++++-
 3 files changed, 13 insertions(+), 4 deletions(-)

diff --git a/src/pentapy/core.py b/src/pentapy/core.py
index 87761a4..0794ba9 100644
--- a/src/pentapy/core.py
+++ b/src/pentapy/core.py
@@ -119,7 +119,7 @@ def solve(
             mat_flat = np.asarray(mat, dtype=np.double)
             ptools._check_penta(mat_flat)
         elif is_flat:
-            mat_flat = np.asarray(mat, dtype=np.double)
+            mat_flat = np.array(mat, dtype=np.double) # NOTE: this is a copy
             ptools._check_penta(mat_flat)
             ptools.shift_banded(mat_flat, copy=False)
         else:
@@ -192,7 +192,7 @@ def solve(
             raise ValueError(msg) from imp_err
 
         if is_flat and index_row_wise:
-            mat_flat = np.asarray(mat)
+            mat_flat = np.array(mat)  # NOTE: this is a copy
             ptools._check_penta(mat_flat)
             ptools.shift_banded(mat_flat, col_to_row=False, copy=False)
         elif is_flat:
@@ -227,7 +227,7 @@ def solve(
             raise ValueError(msg) from imp_err
 
         if is_flat and index_row_wise:
-            mat_flat = np.asarray(mat)
+            mat_flat = np.array(mat)  # NOTE: this is a copy
             ptools._check_penta(mat_flat)
             ptools.shift_banded(mat_flat, col_to_row=False, copy=False)
         elif is_flat:
diff --git a/tests/templates.py b/tests/templates.py
index fe5b316..d67d6cc 100644
--- a/tests/templates.py
+++ b/tests/templates.py
@@ -144,6 +144,7 @@ def pentapy_solvers_template(
     # be NaN
     if induce_error:
         with pytest.warns(UserWarning, match=REF_WARNING_CONTENT):
+            mat_ref_copy = mat.copy()
             sol = pp.solve(
                 mat=mat,
                 rhs=rhs,
@@ -153,10 +154,12 @@ def pentapy_solvers_template(
             )
             assert sol.shape == result_shape
             assert np.isnan(sol).all()
+            assert np.array_equal(mat, mat_ref_copy)
 
         return
 
     # Case 2: in case of no error, the solution can be computed without any issues
+    mat_ref_copy = mat.copy()
     sol = pp.solve(
         mat=mat,
         rhs=rhs,
@@ -165,6 +168,7 @@ def pentapy_solvers_template(
         **kwargs,
     )
     assert sol.shape == result_shape
+    assert np.array_equal(mat, mat_ref_copy)
 
     # if no error was induced, the reference solution is computed with SciPy
     sol_ref = uf.solve_penta_matrix_dense_scipy(
diff --git a/tests/test_solvers_external.py b/tests/test_solvers_external.py
index 30c7b9a..0f407aa 100644
--- a/tests/test_solvers_external.py
+++ b/tests/test_solvers_external.py
@@ -10,10 +10,11 @@
 from typing import Literal
 
 import numpy as np
-import pentapy as pp
 import pytest
 import util_funcs as uf
 
+import pentapy as pp
+
 # === Constants ===
 
 SEED = 19_031_977
@@ -100,6 +101,7 @@ def test_external_solvers(
     # be NaN
     if induce_error:
         with pytest.warns(UserWarning, match=REF_WARNING_CONTENT):
+            mat_ref_copy = mat.copy()
             sol = pp.solve(
                 mat=mat,
                 rhs=rhs,
@@ -108,10 +110,12 @@ def test_external_solvers(
             )
             assert sol.shape == result_shape
             assert np.isnan(sol).all()
+            assert np.array_equal(mat, mat_ref_copy)
 
         return
 
     # Case 2: in case of no error, the solution can be computed without any issues
+    mat_ref_copy = mat.copy()
     sol = pp.solve(
         mat=mat,
         rhs=rhs,
@@ -119,3 +123,4 @@ def test_external_solvers(
         **kwargs,
     )
     assert sol.shape == result_shape
+    assert np.array_equal(mat, mat_ref_copy)

From b4ce4a2a5a2adf8e0f838c6ea52d3f2a5037f926 Mon Sep 17 00:00:00 2001
From: MothNik <nik.zoe@web.de>
Date: Sun, 9 Jun 2024 21:29:09 +0200
Subject: [PATCH 45/62] docs: [11] removed doubled defaults from docstrings

---
 src/pentapy/core.py  | 10 +++++-----
 src/pentapy/tools.py | 24 ++++++++++++------------
 2 files changed, 17 insertions(+), 17 deletions(-)

diff --git a/src/pentapy/core.py b/src/pentapy/core.py
index 0794ba9..af338c8 100644
--- a/src/pentapy/core.py
+++ b/src/pentapy/core.py
@@ -79,11 +79,11 @@ def solve(
     rhs : :class:`numpy.ndarray` of shape (m,) or (m, n)
         The right hand side(s) of the equation system. Its shape determines the shape
         of the output as they will be identical.
-    is_flat : :class:`bool`, default=False
+    is_flat : :class:`bool`, optional
         State if the matrix is already flattened. Default: ``False``
-    index_row_wise : :class:`bool`, default=True
+    index_row_wise : :class:`bool`, optional
         State if the flattened matrix is row-wise flattened. Default: ``True``
-    solver : :class:`int` or :class:`str`, default=1
+    solver : :class:`int` or :class:`str`, optional
         Which solver should be used. The following are provided:
 
             * ``[1, "1", "PTRANS-I"]`` : The PTRANS-I algorithm (default)
@@ -93,7 +93,7 @@ def solve(
             * ``[5, "5", "spsolve_umf", "umf", "umf_pack"]`` : :func:`scipy.sparse.linalg.spsolve(..., use_umfpack=False)`
 
         Strings are not case-sensitive.
-    workers : :class:`int`, default=1
+    workers : :class:`int`, optional
         Number of workers used in the PTRANS-I and PTRANS-II solvers for parallel
         processing of multiple right-hand sides. Parallelisation overhead can be
         significant for small systems. If set to ``-1``, the number of workers is
@@ -119,7 +119,7 @@ def solve(
             mat_flat = np.asarray(mat, dtype=np.double)
             ptools._check_penta(mat_flat)
         elif is_flat:
-            mat_flat = np.array(mat, dtype=np.double) # NOTE: this is a copy
+            mat_flat = np.array(mat, dtype=np.double)  # NOTE: this is a copy
             ptools._check_penta(mat_flat)
             ptools.shift_banded(mat_flat, copy=False)
         else:
diff --git a/src/pentapy/tools.py b/src/pentapy/tools.py
index 0e446b5..57d7f81 100644
--- a/src/pentapy/tools.py
+++ b/src/pentapy/tools.py
@@ -40,7 +40,7 @@ def diag_indices(
     n : :class:`int`
       The size, along each dimension, of the arrays for which the returned
       indices can be used.
-    offset : :class:`int`, default=0
+    offset : :class:`int`, optional
       The diagonal offset. Default: 0
 
     Returns
@@ -101,14 +101,14 @@ def shift_banded(
     ----------
     mat : :class:`numpy.ndarray` of shape (5, n)
         The Matrix or the flattened Version of the pentadiagonal matrix.
-    up : :class:`int`, default=2
+    up : :class:`int`, optional
         The number of upper minor-diagonals. Default: 2
-    low : :class:`int`, default=2
+    low : :class:`int`, optional
         The number of lower minor-diagonals. Default: 2
-    col_to_row : :class:`bool`, default=``True``
+    col_to_row : :class:`bool`, optional
         Shift from column-wise to row-wise storage or vice versa.
         Default: ``True``
-    copy : :class:`bool`, default=``True``
+    copy : :class:`bool`, optional
         Copy the input matrix or overwrite it. Default: ``True``
 
     Returns
@@ -190,14 +190,14 @@ def create_banded(
     ----------
     mat : :class:`numpy.ndarray` of shape (n, n)
         The full (n x n) Matrix.
-    up : :class:`int`, default=2
+    up : :class:`int`, optional
         The number of upper minor-diagonals. Default: 2
-    low : :class:`int`, default=2
+    low : :class:`int`, optional
         The number of lower minor-diagonals. Default: 2
-    col_wise : :class:`bool`, default=``True``
+    col_wise : :class:`bool`, optional
         Use column-wise storage. If False, use row-wise storage.
         Default: ``True``
-    dtype : :class:`type` or ``None``, default=``None``
+    dtype : :class:`type` or ``None``, optional
         The data type of the returned matrix. If ``None``, the data type of the
         input matrix is preserved. Default: ``None``
 
@@ -286,11 +286,11 @@ def create_full(
     ----------
     mat : :class:`numpy.ndarray` of shape (5, n)
         The flattened Matrix.
-    up : :class:`int`, default=2
+    up : :class:`int`, optional
         The number of upper minor-diagonals. Default: 2
-    low : :class:`int`, default=2
+    low : :class:`int`, optional
         The number of lower minor-diagonals. Default: 2
-    col_wise : :class:`bool`, default=``True``
+    col_wise : :class:`bool`, optional
         Input is in column-wise storage. If False, use as row-wise storage.
         Default: ``True``
 

From e71ff9d4a26709d32359a9d33d1e6363da13dbc1 Mon Sep 17 00:00:00 2001
From: MothNik <nik.zoe@web.de>
Date: Sun, 9 Jun 2024 21:36:35 +0200
Subject: [PATCH 46/62] docs: [11] fixed docstring and spelling inconsistencies

---
 src/pentapy/core.py  | 2 +-
 src/pentapy/tools.py | 4 ++--
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/src/pentapy/core.py b/src/pentapy/core.py
index af338c8..98b07cd 100644
--- a/src/pentapy/core.py
+++ b/src/pentapy/core.py
@@ -48,7 +48,7 @@ def solve(
     """
     Solver for a pentadiagonal system.
 
-    The matrix can be given as a full n x n matrix or as a flattened one.
+    The matrix can be given as a full (n x n) matrix or as a flattened one.
     The flattened matrix can be given in a row-wise flattened form::
 
       [[Dup2[0]  Dup2[1]  Dup2[2]  ... Dup2[N-2]  0          0       ]
diff --git a/src/pentapy/tools.py b/src/pentapy/tools.py
index 57d7f81..fc5b29e 100644
--- a/src/pentapy/tools.py
+++ b/src/pentapy/tools.py
@@ -28,7 +28,6 @@ def diag_indices(
     offset: int = 0,
 ) -> Tuple[np.ndarray, np.ndarray]:
     """
-
     Get indices for the main or minor diagonals of a matrix.
 
     This returns a tuple of indices that can be used to access the main
@@ -251,7 +250,8 @@ def create_full(
     low: int = 2,
     col_wise: bool = True,
 ) -> np.ndarray:
-    """Create a (n x n) Matrix from a given banded matrix.
+    """
+    Create an (n x n) Matrix from a given banded matrix.
 
     The given Matrix has to be a flattened matrix.
     Either in a column-wise flattened form::

From aef7d0f23694e9c37895df185b2aebd01d7ef089 Mon Sep 17 00:00:00 2001
From: MothNik <nik.zoe@web.de>
Date: Mon, 10 Jun 2024 13:33:50 +0200
Subject: [PATCH 47/62] feat: [11] enabled future possible validation of the
 quality of the solve on C-level

---
 src/pentapy/core.py    |  4 ++-
 src/pentapy/solver.pxd |  4 +++
 src/pentapy/solver.pyx | 58 ++++++++++++++++++++++++++++++------------
 3 files changed, 49 insertions(+), 17 deletions(-)

diff --git a/src/pentapy/core.py b/src/pentapy/core.py
index 98b07cd..6ed1aaa 100644
--- a/src/pentapy/core.py
+++ b/src/pentapy/core.py
@@ -169,11 +169,13 @@ def solve(
             )
 
             # if there was only a 1D right-hand side, the result has to be flattened
-            sol = solver_func(
+            sol, info = solver_func(  # NOTE: info is for potential future validation
                 np.ascontiguousarray(mat_flat),
                 np.ascontiguousarray(rhs),
                 workers,
+                False,  # NOTE: this can enable validation in the future
             )
+
             if single_rhs:
                 sol = sol.ravel()
 
diff --git a/src/pentapy/solver.pxd b/src/pentapy/solver.pxd
index 4fe6c1e..2fe2843 100644
--- a/src/pentapy/solver.pxd
+++ b/src/pentapy/solver.pxd
@@ -3,10 +3,14 @@ cdef double[::, ::1] c_penta_solver1(
     double[::, ::1] mat_flat,
     double[::, ::1] rhs,
     int workers,
+    bint validate,
+    int* info,
 )
 
 cdef double[::, ::1] c_penta_solver2(
     double[::, ::1] mat_flat,
     double[::, ::1] rhs,
     int workers,
+    bint validate,
+    int* info,
 )
diff --git a/src/pentapy/solver.pyx b/src/pentapy/solver.pyx
index fc2a9a5..53e8cd3 100644
--- a/src/pentapy/solver.pyx
+++ b/src/pentapy/solver.pyx
@@ -24,13 +24,23 @@ def penta_solver1(
     double[::, ::1] mat_flat,
     double[::, ::1] rhs,
     int workers,
+    bint validate,
 ):
-    return np.asarray(
-        c_penta_solver1(
-            mat_flat,
-            rhs,
-            workers,
-        )
+
+    # NOTE: info is defined to be overwritten for possible future validations
+    cdef int info
+
+    return (
+        np.asarray(
+            c_penta_solver1(
+                mat_flat,
+                rhs,
+                workers,
+                validate,
+                &info,
+            )
+        ),
+        info,
     )
 
 
@@ -38,13 +48,23 @@ def penta_solver2(
     double[::, ::1] mat_flat,
     double[::, ::1] rhs,
     int workers,
+    bint validate,
 ):
-    return np.asarray(
-        c_penta_solver2(
-            mat_flat,
-            rhs,
-            workers,
-        )
+
+    # NOTE: info is defined to be overwritten for possible future validations
+    cdef int info
+
+    return (
+        np.asarray(
+            c_penta_solver2(
+                mat_flat,
+                rhs,
+                workers,
+                validate,
+                &info,
+            )
+        ),
+        info,
     )
 
 
@@ -54,6 +74,8 @@ cdef double[::, ::1] c_penta_solver1(
     double[::, ::1] mat_flat,
     double[::, ::1] rhs,
     int workers,
+    bint validate,
+    int* info,
 ):
     """
     Solves the pentadiagonal system of equations ``Ax = b`` with the matrix ``A`` and
@@ -78,7 +100,7 @@ cdef double[::, ::1] c_penta_solver1(
     # === Solving the system of equations ===
 
     # first, the matrix is factorized
-    c_penta_factorize_algo1(
+    c_penta_factorize_algo_1(
         &mat_flat[0, 0],
         mat_n_cols,
         &mat_factorized[0, 0],
@@ -98,10 +120,11 @@ cdef double[::, ::1] c_penta_solver1(
             &result[0, iter_col],
         )
 
+    info[0] = 0
     return result
 
 
-cdef void c_penta_factorize_algo1(
+cdef void c_penta_factorize_algo_1(
     double* mat_flat,
     int64_t mat_n_cols,
     double* mat_factorized,
@@ -312,6 +335,8 @@ cdef double[::, ::1] c_penta_solver2(
     double[::, ::1] mat_flat,
     double[::, ::1] rhs,
     int workers,
+    bint validate,
+    int* info,
 ):
     """
     Solves the pentadiagonal system of equations ``Ax = b`` with the matrix ``A`` and
@@ -336,7 +361,7 @@ cdef double[::, ::1] c_penta_solver2(
     # === Solving the system of equations ===
 
     # first, the matrix is factorized
-    c_penta_factorize_algo2(
+    c_penta_factorize_algo_2(
         &mat_flat[0, 0],
         mat_n_cols,
         &mat_factorized[0, 0],
@@ -356,9 +381,10 @@ cdef double[::, ::1] c_penta_solver2(
             &result[0, iter_col],
         )
 
+    info[0] = 0
     return result
 
-cdef void c_penta_factorize_algo2(
+cdef void c_penta_factorize_algo_2(
     double* mat_flat,
     int64_t mat_n_cols,
     double* mat_factorized,

From 86ebf887cedd016d79dc27a7975da6228c982102 Mon Sep 17 00:00:00 2001
From: MothNik <nik.zoe@web.de>
Date: Mon, 10 Jun 2024 14:03:01 +0200
Subject: [PATCH 48/62] refactor: [11] made internal `workers`-handling safer

---
 src/pentapy/core.py | 14 +++++++++++---
 1 file changed, 11 insertions(+), 3 deletions(-)

diff --git a/src/pentapy/core.py b/src/pentapy/core.py
index 6ed1aaa..87e6160 100644
--- a/src/pentapy/core.py
+++ b/src/pentapy/core.py
@@ -147,13 +147,21 @@ def solve(
             )
 
         if workers == -1:
-            proc = psutil.Process()
-            workers = len(proc.cpu_affinity())  # type: ignore
-            del proc
+            # NOTE: the following will be overwritten by the number of available threads
+            workers = 999_999_999_999_999_999_999_999_999
 
         elif workers == 0:
             workers = 1
 
+        # the number of workers is limited to the number of available threads
+        proc = psutil.Process()
+        workers = min(
+            workers,
+            len(proc.cpu_affinity()),  # type: ignore
+        )
+        workers = max(workers, 1)
+        del proc
+
         # if there is only a single right-hand side, it has to be reshaped to a 2D array
         # NOTE: this has to be reverted at the end
         single_rhs = rhs.ndim == 1

From b5f8b95e8b830e57d034270cd5a3cf0c193c695e Mon Sep 17 00:00:00 2001
From: MothNik <nik.zoe@web.de>
Date: Mon, 10 Jun 2024 14:03:42 +0200
Subject: [PATCH 49/62] docs: [11] updated outdated `README`

---
 README.md | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/README.md b/README.md
index 6e5983a..33e7237 100644
--- a/README.md
+++ b/README.md
@@ -107,7 +107,8 @@ Have a look at the script: [``examples/03_perform_simple.py``](https://github.co
 
 ## Requirements:
 
-- [NumPy >= 1.14.5](https://www.numpy.org)
+- [NumPy >= 1.20.0](https://www.numpy.org)
+- [psutil >= 5.8.0](https://psutil.readthedocs.io/en/latest/) (for parallelisation)
 
 ### Optional
 

From 87264d71f71af7a49cda27222cc4e166b96df9de Mon Sep 17 00:00:00 2001
From: MothNik <nik.zoe@web.de>
Date: Mon, 10 Jun 2024 18:35:04 +0200
Subject: [PATCH 50/62] refactor: [11] removed `validate` and leveraged `info`;
 reduced test load

---
 src/pentapy/core.py    | 42 ++++++++++++++-------------
 src/pentapy/solver.pxd |  2 --
 src/pentapy/solver.pyx | 64 ++++++++++++++++++++++++++++++++----------
 tests/templates.py     |  6 +---
 4 files changed, 73 insertions(+), 41 deletions(-)

diff --git a/src/pentapy/core.py b/src/pentapy/core.py
index 87e6160..f8134ac 100644
--- a/src/pentapy/core.py
+++ b/src/pentapy/core.py
@@ -169,29 +169,33 @@ def solve(
         if single_rhs:
             rhs = rhs[:, np.newaxis]
 
-        try:
-            solver_func = (
-                psolver.penta_solver1
-                if solver_inter == pmodels.PentaSolverAliases.PTRANS_I
-                else psolver.penta_solver2
-            )
+        # the respective solver is chosen ...
+        solver_func = (
+            psolver.penta_solver1
+            if solver_inter == pmodels.PentaSolverAliases.PTRANS_I
+            else psolver.penta_solver2
+        )
 
-            # if there was only a 1D right-hand side, the result has to be flattened
-            sol, info = solver_func(  # NOTE: info is for potential future validation
-                np.ascontiguousarray(mat_flat),
-                np.ascontiguousarray(rhs),
-                workers,
-                False,  # NOTE: this can enable validation in the future
-            )
+        # ... and the solver is called
+        sol, info = solver_func(
+            np.ascontiguousarray(mat_flat),
+            np.ascontiguousarray(rhs),
+            workers,
+        )
 
-            if single_rhs:
-                sol = sol.ravel()
+        # in case of failure, the solver will return NaNs and issue a warning
+        if info > 0:
+            warnings.warn(
+                f"pentapy: {solver_inter.name} solver encountered singular matrix at "
+                f"row index {info - 1}. Returning NaNs."
+            )
+            sol = np.full(shape=rhs_og_shape, fill_value=np.nan)
 
-            return sol
+        # in case of success, the solution can be returned (reshaped if necessary)
+        if single_rhs:
+            sol = sol.ravel()
 
-        except ZeroDivisionError:
-            warnings.warn("pentapy: PTRANS-I not suitable for input-matrix.")
-            return np.full(shape=rhs_og_shape, fill_value=np.nan)
+        return sol
 
     # Case 2: LAPACK's banded solver
     elif solver_inter == pmodels.PentaSolverAliases.LAPACK:
diff --git a/src/pentapy/solver.pxd b/src/pentapy/solver.pxd
index 2fe2843..879dcf6 100644
--- a/src/pentapy/solver.pxd
+++ b/src/pentapy/solver.pxd
@@ -3,7 +3,6 @@ cdef double[::, ::1] c_penta_solver1(
     double[::, ::1] mat_flat,
     double[::, ::1] rhs,
     int workers,
-    bint validate,
     int* info,
 )
 
@@ -11,6 +10,5 @@ cdef double[::, ::1] c_penta_solver2(
     double[::, ::1] mat_flat,
     double[::, ::1] rhs,
     int workers,
-    bint validate,
     int* info,
 )
diff --git a/src/pentapy/solver.pyx b/src/pentapy/solver.pyx
index 53e8cd3..be860c8 100644
--- a/src/pentapy/solver.pyx
+++ b/src/pentapy/solver.pyx
@@ -1,4 +1,4 @@
-# cython: language_level=3, boundscheck=False, wraparound=False, cdivision=False
+# cython: language_level=3, boundscheck=False, wraparound=False, cdivision=True
 
 """
 This is a solver linear equation systems with a penta-diagonal matrix,
@@ -24,7 +24,6 @@ def penta_solver1(
     double[::, ::1] mat_flat,
     double[::, ::1] rhs,
     int workers,
-    bint validate,
 ):
 
     # NOTE: info is defined to be overwritten for possible future validations
@@ -36,7 +35,6 @@ def penta_solver1(
                 mat_flat,
                 rhs,
                 workers,
-                validate,
                 &info,
             )
         ),
@@ -48,7 +46,6 @@ def penta_solver2(
     double[::, ::1] mat_flat,
     double[::, ::1] rhs,
     int workers,
-    bint validate,
 ):
 
     # NOTE: info is defined to be overwritten for possible future validations
@@ -60,7 +57,6 @@ def penta_solver2(
                 mat_flat,
                 rhs,
                 workers,
-                validate,
                 &info,
             )
         ),
@@ -74,7 +70,6 @@ cdef double[::, ::1] c_penta_solver1(
     double[::, ::1] mat_flat,
     double[::, ::1] rhs,
     int workers,
-    bint validate,
     int* info,
 ):
     """
@@ -100,12 +95,16 @@ cdef double[::, ::1] c_penta_solver1(
     # === Solving the system of equations ===
 
     # first, the matrix is factorized
-    c_penta_factorize_algo_1(
+    info[0] = c_penta_factorize_algo_1(
         &mat_flat[0, 0],
         mat_n_cols,
         &mat_factorized[0, 0],
     )
 
+    # in case of a zero-division, the function exits early
+    if info[0] > 0:
+        return result
+
     # then, all the right-hand sides are solved
     for iter_col in prange(
         rhs_n_cols,
@@ -120,11 +119,10 @@ cdef double[::, ::1] c_penta_solver1(
             &result[0, iter_col],
         )
 
-    info[0] = 0
     return result
 
 
-cdef void c_penta_factorize_algo_1(
+cdef int c_penta_factorize_algo_1(
     double* mat_flat,
     int64_t mat_n_cols,
     double* mat_factorized,
@@ -169,11 +167,19 @@ cdef void c_penta_factorize_algo_1(
 
     # === Factorization ===
 
+    # NOTE: in the following mu is manually checked for zero-division to extract the
+    #       proper value of ``info`` and exit early in case of failure;
+    #       ``info`` is set to the row count where the error occured as for LAPACK ``pbtrf``
+
     # First row
     mu_i = mat_flat[mat_row_base_idx_2]
+    if mu_i == 0.0:
+        return 1
+
     al_i_minus_1 = mat_flat[mat_row_base_idx_1] / mu_i
     be_i_minus_1 = mat_flat[0] / mu_i
 
+
     mat_factorized[0] = 0.0
     mat_factorized[1] = mu_i
     mat_factorized[2] = 0.0
@@ -183,6 +189,9 @@ cdef void c_penta_factorize_algo_1(
     # Second row
     ga_i = mat_flat[mat_row_base_idx_3 + 1]
     mu_i = mat_flat[mat_row_base_idx_2 + 1] - al_i_minus_1 * ga_i
+    if mu_i == 0.0:
+        return 2
+
     al_i = (mat_flat[mat_row_base_idx_1 + 1] - be_i_minus_1 * ga_i) / mu_i
     be_i = mat_flat[1] / mu_i
 
@@ -198,6 +207,8 @@ cdef void c_penta_factorize_algo_1(
         e_i = mat_flat[mat_row_base_idx_4 + iter_row]
         ga_i = mat_flat[mat_row_base_idx_3 + iter_row] - al_i_minus_1 * e_i
         mu_i = mat_flat[mat_row_base_idx_2 + iter_row] - be_i_minus_1 * e_i - al_i * ga_i
+        if mu_i == 0.0:
+            return iter_row + 1
 
         al_i_plus_1 = (mat_flat[mat_row_base_idx_1 + iter_row] - be_i * ga_i) / mu_i
         al_i_minus_1 = al_i
@@ -219,6 +230,9 @@ cdef void c_penta_factorize_algo_1(
     e_i = mat_flat[mat_row_base_idx_4 + mat_n_cols - 2]
     ga_i = mat_flat[mat_row_base_idx_3 + mat_n_cols - 2] - al_i_minus_1 * e_i
     mu_i = mat_flat[mat_row_base_idx_2 + mat_n_cols - 2] - be_i_minus_1 * e_i - al_i * ga_i
+    if mu_i == 0.0:
+        return mat_n_cols - 1
+
     al_i_plus_1 = (mat_flat[mat_row_base_idx_1 + mat_n_cols - 2] - be_i * ga_i) / mu_i
 
     mat_factorized[fact_curr_base_idx] = e_i
@@ -231,6 +245,8 @@ cdef void c_penta_factorize_algo_1(
     e_i = mat_flat[mat_row_base_idx_4 + mat_n_cols - 1]
     ga_i = mat_flat[mat_row_base_idx_3 + mat_n_cols - 1] - al_i * e_i
     mu_i = mat_flat[mat_row_base_idx_2 + mat_n_cols - 1] - be_i * e_i - al_i_plus_1 * ga_i
+    if mu_i == 0.0:
+        return mat_n_cols
 
     mat_factorized[fact_curr_base_idx + 5] = e_i
     mat_factorized[fact_curr_base_idx + 6] = mu_i
@@ -238,7 +254,7 @@ cdef void c_penta_factorize_algo_1(
     mat_factorized[fact_curr_base_idx + 8] = 0.0
     mat_factorized[fact_curr_base_idx + 9] = 0.0
 
-    return
+    return 0
 
 
 cdef int c_solve_penta_from_factorize_algo_1(
@@ -335,7 +351,6 @@ cdef double[::, ::1] c_penta_solver2(
     double[::, ::1] mat_flat,
     double[::, ::1] rhs,
     int workers,
-    bint validate,
     int* info,
 ):
     """
@@ -361,11 +376,13 @@ cdef double[::, ::1] c_penta_solver2(
     # === Solving the system of equations ===
 
     # first, the matrix is factorized
-    c_penta_factorize_algo_2(
+    info[0] = c_penta_factorize_algo_2(
         &mat_flat[0, 0],
         mat_n_cols,
         &mat_factorized[0, 0],
     )
+    if info[0] > 0:
+        return result
 
     # then, all the right-hand sides are solved
     for iter_col in prange(
@@ -381,10 +398,9 @@ cdef double[::, ::1] c_penta_solver2(
             &result[0, iter_col],
         )
 
-    info[0] = 0
     return result
 
-cdef void c_penta_factorize_algo_2(
+cdef int c_penta_factorize_algo_2(
     double* mat_flat,
     int64_t mat_n_cols,
     double* mat_factorized,
@@ -430,9 +446,16 @@ cdef void c_penta_factorize_algo_2(
 
     # === Factorization ===
 
+    # NOTE: in the following ps is manually checked for zero-division to extract the
+    #       proper value of ``info`` and exit early in case of failure;
+    #       ``info`` is set to the row count where the error occured as for LAPACK ``pbtrf``
+
     # First row
 
     ps_i = mat_flat[mat_row_base_idx_2 + mat_n_cols - 1]
+    if ps_i == 0.0:
+        return mat_n_cols
+
     si_i_plus_1 = mat_flat[mat_row_base_idx_3 + mat_n_cols - 1] / ps_i
     phi_i_plus_1 = mat_flat[mat_row_base_idx_4 + mat_n_cols - 1] / ps_i
 
@@ -446,6 +469,9 @@ cdef void c_penta_factorize_algo_2(
     # Second row
     rho_i = mat_flat[mat_row_base_idx_1 + mat_n_cols - 2]
     ps_i = mat_flat[mat_row_base_idx_2 + mat_n_cols - 2] - si_i_plus_1 * rho_i
+    if ps_i == 0.0:
+        return mat_n_cols - 1
+
     si_i = (mat_flat[mat_row_base_idx_3 + mat_n_cols - 2] - phi_i_plus_1 * rho_i) / ps_i
     phi_i = mat_flat[mat_row_base_idx_4 + mat_n_cols - 2] / ps_i
 
@@ -461,6 +487,9 @@ cdef void c_penta_factorize_algo_2(
         b_i = mat_flat[iter_row]
         rho_i = mat_flat[mat_row_base_idx_1 + iter_row] - si_i_plus_1 * b_i
         ps_i = mat_flat[mat_row_base_idx_2 + iter_row] - phi_i_plus_1 * b_i - si_i * rho_i
+        if ps_i == 0.0:
+            return iter_row + 1
+
         si_i_minus_1 = (mat_flat[mat_row_base_idx_3 + iter_row] - phi_i * rho_i) / ps_i
         si_i_plus_1 = si_i
         si_i = si_i_minus_1
@@ -479,6 +508,9 @@ cdef void c_penta_factorize_algo_2(
     b_i = mat_flat[1]
     rho_i = mat_flat[mat_row_base_idx_1 + 1] - si_i_plus_1 * b_i
     ps_i = mat_flat[mat_row_base_idx_2 + 1] - phi_i_plus_1 * b_i - si_i * rho_i
+    if ps_i == 0.0:
+        return 2
+
     si_i_minus_1 = (mat_flat[mat_row_base_idx_3 + 1] - phi_i * rho_i) / ps_i
     si_i_plus_1 = si_i
     si_i = si_i_minus_1
@@ -493,6 +525,8 @@ cdef void c_penta_factorize_algo_2(
     b_i = mat_flat[0]
     rho_i = mat_flat[mat_row_base_idx_1 + 0] - si_i_plus_1 * b_i
     ps_i = mat_flat[mat_row_base_idx_2 + 0] - phi_i * b_i - si_i * rho_i
+    if ps_i == 0.0:
+        return 1
 
     mat_factorized[4] = b_i
     mat_factorized[3] = rho_i
@@ -500,7 +534,7 @@ cdef void c_penta_factorize_algo_2(
     mat_factorized[1] = 0.0
     mat_factorized[0] = 0.0
 
-    return
+    return 0
 
 
 cdef int c_solve_penta_from_factorize_algo_2(
diff --git a/tests/templates.py b/tests/templates.py
index d67d6cc..0c35226 100644
--- a/tests/templates.py
+++ b/tests/templates.py
@@ -16,7 +16,7 @@
 # === Constants ===
 
 SEED = 19_031_977
-REF_WARNING_CONTENT = "not suitable for input-matrix."
+REF_WARNING_CONTENT = "singular matrix at row index"
 N_ROWS = [
     3,  # important edge case
     4,  # important edge case
@@ -27,12 +27,8 @@
     51,  # odd
     100,  # ...
     101,
-    500,
-    501,
     1_000,
     1_001,
-    5_000,
-    5_001,
 ]
 SOLVER_ALIASES_PTRANS_I = [1, "1", "pTrAnS-I"]
 SOLVER_ALIASES_PTRANS_II = [2, "2", "pTrAnS-Ii"]

From 8db881fa3e2ae1cfd9874180b06c9a4132c17c98 Mon Sep 17 00:00:00 2001
From: MothNik <nik.zoe@web.de>
Date: Mon, 10 Jun 2024 18:43:55 +0200
Subject: [PATCH 51/62] doc: [11] updated outdated index

---
 docs/source/index.rst | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/docs/source/index.rst b/docs/source/index.rst
index 8433404..83ae30f 100644
--- a/docs/source/index.rst
+++ b/docs/source/index.rst
@@ -94,7 +94,8 @@ The performance plot was created with ``perfplot`` (`link <https://github.com/ns
 Requirements
 ============
 
-- `Numpy >= 1.14.5 <http://www.numpy.org>`_
+- `Numpy >= 1.20.0 <http://www.numpy.org>`_
+- `psutil >= 5.8.0 <https://psutil.readthedocs.io/en/latest/>`_
 
 Optional
 --------

From 37c2fbc615a2d4dea226d1bbf045a534546b8371 Mon Sep 17 00:00:00 2001
From: MothNik <nik.zoe@web.de>
Date: Tue, 11 Jun 2024 20:18:24 +0200
Subject: [PATCH 52/62] refactor: [11] made Cython structure smarter for
 algorithm I

---
 src/pentapy/solver.pyx | 108 ++++++++++++++++++++++++++++++++---------
 1 file changed, 86 insertions(+), 22 deletions(-)

diff --git a/src/pentapy/solver.pyx b/src/pentapy/solver.pyx
index be860c8..c6379e5 100644
--- a/src/pentapy/solver.pyx
+++ b/src/pentapy/solver.pyx
@@ -11,6 +11,8 @@ implemented in Cython.
 import numpy as np
 
 cimport numpy as np
+
+from cython cimport view
 from cython.parallel import prange
 from libc.stdint cimport int64_t
 
@@ -83,35 +85,98 @@ cdef double[::, ::1] c_penta_solver1(
 
     """
 
-    # === Variable declarations ===
+    # --- Initial checks ---
 
-    cdef int64_t mat_n_cols = mat_flat.shape[1]
-    cdef int64_t rhs_n_cols = rhs.shape[1]
-    cdef int64_t iter_col
+    # if the number of columns in the flattened matrix is not equal to the number of
+    # rows in the right-hand side, the function exits early to avoid memory errors
+    if mat_flat.shape[1] != rhs.shape[0]:
+        info[0] = -1
+        return np.empty_like(rhs)
 
-    cdef double[::, ::1] result = np.empty(shape=(mat_n_cols, rhs_n_cols))
-    cdef double[::, ::1] mat_factorized = np.empty(shape=(mat_n_cols, MAT_FACT_N_COLS))
-
-    # === Solving the system of equations ===
+    # --- Solving the system of equations ---
 
     # first, the matrix is factorized
-    info[0] = c_penta_factorize_algo_1(
-        &mat_flat[0, 0],
-        mat_n_cols,
-        &mat_factorized[0, 0],
+    cdef double[::, ::1] mat_factorized = _c_interf_factorize_algo_1(
+        mat_flat,
+        info,
     )
 
     # in case of a zero-division, the function exits early
     if info[0] > 0:
-        return result
+        return np.empty_like(rhs)
 
     # then, all the right-hand sides are solved
+    return _c_interf_factorize_solve_algo_1(
+        mat_factorized,
+        rhs,
+        workers,
+    )
+
+
+
+cdef double[::, ::1] _c_interf_factorize_algo_1(
+    double[::, ::1] mat_flat,
+    int* info,
+):
+    """
+    This function serves as the interface that takes the memoryview of the flattened
+    matrix and returns the freshly allocated factorized matrix.
+
+    """
+
+    # --- Variable declarations ---
+
+    cdef int64_t mat_n_cols = mat_flat.shape[1]
+    tmp = view.array(
+        shape=(mat_n_cols, MAT_FACT_N_COLS),
+        itemsize=sizeof(double),
+        format="d",
+    )
+    cdef double[::, ::1] mat_factorized = tmp
+
+    # --- Factorization ---
+
+    info[0] = _c_core_factorize_algo_1(
+        &mat_flat[0, 0],
+        mat_n_cols,
+        &mat_factorized[0, 0],
+    )
+
+    return mat_factorized
+
+
+cdef double[::, ::1] _c_interf_factorize_solve_algo_1(
+    double[::, ::1] mat_factorized,
+    double[::, ::1] rhs,
+    int workers,
+):
+    """
+    This function serves as the interface that takes the factorized matrix and the
+    right-hand sides and returns the freshly allocated solution vector obtained by
+    solving the system of equations via backward substitution.
+
+    """
+
+    # --- Variable declarations ---
+
+    cdef int64_t mat_n_cols = mat_factorized.shape[0]
+    cdef int64_t rhs_n_cols = rhs.shape[1]
+    cdef int64_t iter_col
+    tmp = view.array(
+        shape=(mat_n_cols, rhs_n_cols),
+        itemsize=sizeof(double),
+        format="d",
+    )
+    cdef double[::, ::1] result = tmp
+
+    # --- Solving the system of equations ---
+
     for iter_col in prange(
         rhs_n_cols,
         nogil=True,
         num_threads=workers,
     ):
-        c_solve_penta_from_factorize_algo_1(
+        _c_core_factorize_solve_algo_1(
             mat_n_cols,
             &mat_factorized[0, 0],
             &rhs[0, iter_col],
@@ -121,8 +186,7 @@ cdef double[::, ::1] c_penta_solver1(
 
     return result
 
-
-cdef int c_penta_factorize_algo_1(
+cdef int _c_core_factorize_algo_1(
     double* mat_flat,
     int64_t mat_n_cols,
     double* mat_factorized,
@@ -154,7 +218,7 @@ cdef int c_penta_factorize_algo_1(
 
     """
 
-    # === Variable declarations ===
+    # --- Variable declarations ---
 
     cdef int64_t iter_row, fact_curr_base_idx
     cdef int64_t mat_row_base_idx_1 = mat_n_cols  # base index for the second row
@@ -257,7 +321,7 @@ cdef int c_penta_factorize_algo_1(
     return 0
 
 
-cdef int c_solve_penta_from_factorize_algo_1(
+cdef int _c_core_factorize_solve_algo_1(
     int64_t mat_n_cols,
     double* mat_factorized,
     double* rhs_single,
@@ -272,7 +336,7 @@ cdef int c_solve_penta_from_factorize_algo_1(
 
     """
 
-    # === Variable declarations ===
+    # --- Variable declarations ---
 
     cdef int64_t iter_row, fact_curr_base_idx, res_curr_base_idx
     cdef double ze_i, ze_i_minus_1, ze_i_plus_1  # zeta
@@ -364,7 +428,7 @@ cdef double[::, ::1] c_penta_solver2(
 
     """
 
-    # === Variable declarations ===
+    # --- Variable declarations ---
 
     cdef int64_t mat_n_cols = mat_flat.shape[1]
     cdef int64_t rhs_n_cols = rhs.shape[1]
@@ -433,7 +497,7 @@ cdef int c_penta_factorize_algo_2(
 
     """
 
-    # === Variable declarations ===
+    # --- Variable declarations ---
 
     cdef int64_t iter_row, fact_curr_base_idx
     cdef int64_t mat_row_base_idx_1 = mat_n_cols  # base index for the second row
@@ -553,7 +617,7 @@ cdef int c_solve_penta_from_factorize_algo_2(
 
     """
 
-    # === Variable declarations ===
+    # --- Variable declarations ---
 
     cdef int64_t iter_row, fact_curr_base_idx, res_curr_base_idx
     cdef double om_i, om_i_minus_1, om_i_plus_1  # omega

From 896b1ccd67666a7a7c34ce954c0e64d7d26f5203 Mon Sep 17 00:00:00 2001
From: MothNik <nik.zoe@web.de>
Date: Tue, 11 Jun 2024 21:25:48 +0200
Subject: [PATCH 53/62] wip: [11] made use of Enums for checks; prepared for
 unifying solver I and II

---
 src/pentapy/solver.pyx | 77 +++++++++++++++++++++++++++++-------------
 1 file changed, 54 insertions(+), 23 deletions(-)

diff --git a/src/pentapy/solver.pyx b/src/pentapy/solver.pyx
index c6379e5..f31debe 100644
--- a/src/pentapy/solver.pyx
+++ b/src/pentapy/solver.pyx
@@ -17,8 +17,19 @@ from cython.parallel import prange
 from libc.stdint cimport int64_t
 
 
+# === Constants ===
+
 cdef enum: MAT_FACT_N_COLS = 5
 
+cdef enum Solvers:
+    PTRRANS_1 = 1
+    PTRRANS_2 = 2
+
+cdef enum Infos:
+    SUCCESS = 0
+    SHAPE_MISMATCH = -1
+    WRONG_SOLVER = -2
+
 # === Main Python Interface ===
 
 
@@ -90,33 +101,37 @@ cdef double[::, ::1] c_penta_solver1(
     # if the number of columns in the flattened matrix is not equal to the number of
     # rows in the right-hand side, the function exits early to avoid memory errors
     if mat_flat.shape[1] != rhs.shape[0]:
-        info[0] = -1
+        info[0] = Infos.SHAPE_MISMATCH
         return np.empty_like(rhs)
 
     # --- Solving the system of equations ---
 
     # first, the matrix is factorized
-    cdef double[::, ::1] mat_factorized = _c_interf_factorize_algo_1(
+    cdef double[::, ::1] mat_factorized = _c_interf_factorize_algo(
         mat_flat,
         info,
+        Solvers.PTRRANS_1,
     )
 
-    # in case of a zero-division, the function exits early
-    if info[0] > 0:
+    # in case of an error during factorization, the function exits early
+    if info[0] != Infos.SUCCESS:
         return np.empty_like(rhs)
 
     # then, all the right-hand sides are solved
-    return _c_interf_factorize_solve_algo_1(
+    return _c_interf_factorize_solve_algo(
         mat_factorized,
         rhs,
         workers,
+        info,
+        Solvers.PTRRANS_1,
     )
 
 
 
-cdef double[::, ::1] _c_interf_factorize_algo_1(
+cdef double[::, ::1] _c_interf_factorize_algo(
     double[::, ::1] mat_flat,
     int* info,
+    int solver,
 ):
     """
     This function serves as the interface that takes the memoryview of the flattened
@@ -136,19 +151,28 @@ cdef double[::, ::1] _c_interf_factorize_algo_1(
 
     # --- Factorization ---
 
-    info[0] = _c_core_factorize_algo_1(
-        &mat_flat[0, 0],
-        mat_n_cols,
-        &mat_factorized[0, 0],
-    )
+    # the solver algorithm is chosen based on the input parameter
+    # Case 1: PTRRANS-I
+    if solver == Solvers.PTRRANS_1:
+        info[0] = _c_core_factorize_algo_1(
+            &mat_flat[0, 0],
+            mat_n_cols,
+            &mat_factorized[0, 0],
+        )
+
+    # Case 3: the wrong solver is chosen
+    else:
+        info[0] = Infos.WRONG_SOLVER
 
     return mat_factorized
 
 
-cdef double[::, ::1] _c_interf_factorize_solve_algo_1(
+cdef double[::, ::1] _c_interf_factorize_solve_algo(
     double[::, ::1] mat_factorized,
     double[::, ::1] rhs,
     int workers,
+    int* info,
+    int solver,
 ):
     """
     This function serves as the interface that takes the factorized matrix and the
@@ -171,18 +195,25 @@ cdef double[::, ::1] _c_interf_factorize_solve_algo_1(
 
     # --- Solving the system of equations ---
 
-    for iter_col in prange(
-        rhs_n_cols,
-        nogil=True,
-        num_threads=workers,
-    ):
-        _c_core_factorize_solve_algo_1(
-            mat_n_cols,
-            &mat_factorized[0, 0],
-            &rhs[0, iter_col],
+    # the solver algorithm is chosen based on the input parameter
+    # Case 1: PTRRANS-I
+    if solver == Solvers.PTRRANS_1:
+        for iter_col in prange(
             rhs_n_cols,
-            &result[0, iter_col],
-        )
+            nogil=True,
+            num_threads=workers,
+        ):
+            info[0] = _c_core_factorize_solve_algo_1(
+                mat_n_cols,
+                &mat_factorized[0, 0],
+                &rhs[0, iter_col],
+                rhs_n_cols,
+                &result[0, iter_col],
+            )
+
+    # Case 3: the wrong solver is chosen
+    else:
+        info[0] = Infos.WRONG_SOLVER
 
     return result
 

From 2a6daf4cda2eecafed6a4ff1eef273dbd4ef0536 Mon Sep 17 00:00:00 2001
From: MothNik <nik.zoe@web.de>
Date: Tue, 11 Jun 2024 22:53:04 +0200
Subject: [PATCH 54/62] refactor/tests: [11] fully unified Cython
 implementations of the solvers I and II: - now, they share one common
 processing logic that - calls the solvers it needs depending on which solver
 was called

Besides, a shape check was introduced and the `info`-variable was extended.
All of this is now reflected in `core`, where the solvers and errors are not structured a bit better.
---
 src/pentapy/_models.py                  |  21 ++-
 src/pentapy/core.py                     | 139 +++++++++++++-----
 src/pentapy/errors.py                   |  37 +++++
 src/pentapy/solver.pyx                  | 107 ++++++++------
 tests/templates.py                      | 178 ++++++++++++++++++------
 tests/test_solvers_internal_parallel.py |  52 ++++++-
 tests/test_solvers_internal_serial.py   |  53 ++++++-
 7 files changed, 460 insertions(+), 127 deletions(-)
 create mode 100644 src/pentapy/errors.py

diff --git a/src/pentapy/_models.py b/src/pentapy/_models.py
index c75eb8c..16119cd 100644
--- a/src/pentapy/_models.py
+++ b/src/pentapy/_models.py
@@ -11,12 +11,29 @@
 # === Models ===
 
 
+class Infos(IntEnum):
+    """
+    Defines the possible returns for ``info`` of the low level pentadiagonal solvers,
+    namely
+
+    - ``SUCCESS``: the solver has successfully solved the system
+    - ``SHAPE_MISMATCH``: the shape of the input arrays is incorrect
+    - ``WRONG_SOLVER``: the solver alias is the solver alias is incorrect on C-level
+        (internal error, should not occur)
+
+    """
+
+    SUCCESS = 0
+    SHAPE_MISMATCH = -1
+    WRONG_SOLVER = -2
+
+
 class PentaSolverAliases(IntEnum):
     """
     Defines all available solver aliases for pentadiagonal systems, namely
 
-    - ``PTRANS_I``: The PTRANS-I algorithm
-    - ``PTRANS_II``: The PTRANS-II algorithm
+    - ``PTRANS_I``: the PTRANS-I algorithm
+    - ``PTRANS_II``: the PTRANS-II algorithm
     - ``LAPACK``: Scipy's LAPACK solver :func:`scipy.linalg.solve_banded`
     - ``SUPER_LU``: Scipy's SuperLU solver :func:`scipy.sparse.linalg.spsolve(..., use_umfpack=False)`
     - ``UMFPACK``: Scipy's UMFpack solver :func:`scipy.sparse.linalg.spsolve(..., use_umfpack=True)`
diff --git a/src/pentapy/core.py b/src/pentapy/core.py
index f8134ac..da9fcbe 100644
--- a/src/pentapy/core.py
+++ b/src/pentapy/core.py
@@ -11,9 +11,52 @@
 import psutil
 
 from pentapy import _models as pmodels
+from pentapy import errors as perrors
 from pentapy import solver as psolver  # type: ignore
 from pentapy import tools as ptools
 
+# === Auxiliary functions ===
+
+
+def _get_num_workers(workers: int) -> int:
+    """
+    Gets the number of available workers for the solver.
+
+    Parameters
+    ----------
+    workers : :class:`int`
+        Number of workers requested.
+
+    Returns
+    -------
+    workers : :class:`int`
+        Number of workers available.
+
+    """
+
+    if workers < -1:
+        raise ValueError(
+            perrors.PentaPyErrorMessages.WRONG_WORKERS.format(workers=workers)
+        )
+
+    if workers == -1:
+        # NOTE: the following will be overwritten by the number of available threads
+        workers = 999_999_999_999_999_999_999_999_999
+
+    # the number of workers is limited between 1 and the number of available threads
+    # NOTE: the following does not count the number of total threads, but the number of
+    #       threads available for the solver
+    proc = psutil.Process()
+    workers = min(
+        workers,
+        len(proc.cpu_affinity()),  # type: ignore
+    )
+    workers = max(workers, 1)
+    del proc
+
+    return workers
+
+
 # === Solver ===
 
 
@@ -104,6 +147,14 @@ def solve(
     result : :class:`numpy.ndarray` of shape (m,) or (m, n)
         Solution of the equation system with the same shape as ``rhs``.
 
+    Raises
+    ------
+    ValueError
+        If the number of workers is incorrect.
+    ValueError
+        If there is a shape mismatch between the number of equations in the left-hand
+        side matrix and the number of right-hand sides.
+
     """
 
     # first, the solver is converted to the internal name to avoid confusion
@@ -132,35 +183,28 @@ def solve(
         # NOTE: this avoids memory leakage in the Cython-solver that will iterate over
         #       at least 4 rows/columns no matter what
         if mat_flat.shape[1] == 3:
-            return np.linalg.solve(
-                a=ptools.create_full(mat_flat, col_wise=False),
-                b=rhs,
-            )
+            if not mat_flat.shape[1] == rhs.shape[0]:
+                raise ValueError(
+                    perrors.PentaPyErrorMessages.SHAPE_MISMATCH.format(
+                        lhs_n_cols=mat_flat.shape[1],
+                        rhs_n_rows=rhs.shape[0],
+                    )
+                )
+
+            try:
+                return np.linalg.solve(
+                    a=ptools.create_full(mat_flat, col_wise=False),
+                    b=rhs,
+                )
+            except np.linalg.LinAlgError:
+                warnings.warn(
+                    "pentapy: NumPy LAPACK dense solver encountered singular matrix."
+                )
+                return np.full(shape=rhs.shape, fill_value=np.nan)
 
         # now, the number of workers for multithreading has to be determined if
         # necessary
-        # NOTE: the following does not count the number of total threads, but the number
-        #       of threads available for the solver
-        if workers < -1:
-            raise ValueError(
-                f"pentapy.solve: workers has to be -1 or greater, not {workers=}"
-            )
-
-        if workers == -1:
-            # NOTE: the following will be overwritten by the number of available threads
-            workers = 999_999_999_999_999_999_999_999_999
-
-        elif workers == 0:
-            workers = 1
-
-        # the number of workers is limited to the number of available threads
-        proc = psutil.Process()
-        workers = min(
-            workers,
-            len(proc.cpu_affinity()),  # type: ignore
-        )
-        workers = max(workers, 1)
-        del proc
+        workers = _get_num_workers(workers)
 
         # if there is only a single right-hand side, it has to be reshaped to a 2D array
         # NOTE: this has to be reverted at the end
@@ -183,19 +227,44 @@ def solve(
             workers,
         )
 
-        # in case of failure, the solver will return NaNs and issue a warning
-        if info > 0:
+        print(f"{info=}")
+
+        # in case of success, the solution can be returned (reshaped if necessary)
+        if info == pmodels.Infos.SUCCESS:
+            if single_rhs:
+                sol = sol.ravel()
+
+            return sol
+
+        # in case of a shape mismatch, an error will be raised
+        if info == pmodels.Infos.SHAPE_MISMATCH:
+            raise ValueError(
+                perrors.PentaPyErrorMessages.SHAPE_MISMATCH.format(
+                    lhs_n_cols=mat_flat.shape[1],
+                    rhs_n_rows=rhs_og_shape[0],
+                )
+            )
+
+        # in case of a zero-division, the solver will return NaNs and issue a warning
+        elif info > pmodels.Infos.SUCCESS:
             warnings.warn(
-                f"pentapy: {solver_inter.name} solver encountered singular matrix at "
-                f"row index {info - 1}. Returning NaNs."
+                perrors.PentaPyErrorMessages.SINGULAR_MATRIX.format(
+                    solver_inter_name=solver_inter.name,
+                    row_idx=info - 1,
+                )
             )
-            sol = np.full(shape=rhs_og_shape, fill_value=np.nan)
 
-        # in case of success, the solution can be returned (reshaped if necessary)
-        if single_rhs:
-            sol = sol.ravel()
+            return np.full(shape=rhs_og_shape, fill_value=np.nan)
 
-        return sol
+        # in case of an internal error in determination of the solver, an error will be
+        # raised
+        elif info == pmodels.Infos.WRONG_SOLVER:  # pragma: no cover
+            raise AssertionError(perrors.PentaPyErrorMessages.WRONG_SOLVER)
+
+        # in case of an unknown error, an error will be raised
+        raise AssertionError(  # pragma: no cover
+            perrors.PentaPyErrorMessages.UNKNOWN_ERROR
+        )
 
     # Case 2: LAPACK's banded solver
     elif solver_inter == pmodels.PentaSolverAliases.LAPACK:
diff --git a/src/pentapy/errors.py b/src/pentapy/errors.py
new file mode 100644
index 0000000..0974816
--- /dev/null
+++ b/src/pentapy/errors.py
@@ -0,0 +1,37 @@
+"""
+Auxiliary errors for the pentapy package.
+
+"""
+
+# === Imports ===
+
+from enum import Enum
+
+
+class PentaPyErrorMessages(str, Enum):
+    """
+    Defines the possible error messages for the pentapy package, namely
+
+    - ``WRONG_WORKERS``: the number of workers is incorrect
+    - ``SINGULAR_MATRIX``: the matrix is singular
+    - ``SHAPE_MISMATCH``: the shape of the input arrays is incorrect
+    - ``WRONG_SOLVER``: the solver alias is incorrect on C-level (internal error,
+        should not occur)
+    - ``UNKNOWN_ERROR``: an unknown error occurred
+
+    """
+
+    WRONG_WORKERS = (
+        "pentapy.solve: workers has to be -1 or greater, but got workers={workers}"
+    )
+    SINGULAR_MATRIX = (
+        "pentapy: {solver_inter_name} solver encountered singular matrix at "
+        "row index {row_idx}. Returning NaNs."
+    )
+    SHAPE_MISMATCH = (
+        "pentapy.solve: shape mismatch between the number of equations in the "
+        "left-hand side matrix ({lhs_n_cols}) and the number of right-hand sides "
+        "({rhs_n_rows})."
+    )
+    WRONG_SOLVER = "pentapy.solve: failure in determining the solver internally."
+    UNKNOWN_ERROR = "pentapy.solve: unknown error in the pentadiagonal solver."
diff --git a/src/pentapy/solver.pyx b/src/pentapy/solver.pyx
index f31debe..32fdf77 100644
--- a/src/pentapy/solver.pyx
+++ b/src/pentapy/solver.pyx
@@ -107,7 +107,7 @@ cdef double[::, ::1] c_penta_solver1(
     # --- Solving the system of equations ---
 
     # first, the matrix is factorized
-    cdef double[::, ::1] mat_factorized = _c_interf_factorize_algo(
+    cdef double[::, ::1] mat_factorized = _c_interf_factorize(
         mat_flat,
         info,
         Solvers.PTRRANS_1,
@@ -118,7 +118,7 @@ cdef double[::, ::1] c_penta_solver1(
         return np.empty_like(rhs)
 
     # then, all the right-hand sides are solved
-    return _c_interf_factorize_solve_algo(
+    return _c_interf_factorize_solve(
         mat_factorized,
         rhs,
         workers,
@@ -128,7 +128,7 @@ cdef double[::, ::1] c_penta_solver1(
 
 
 
-cdef double[::, ::1] _c_interf_factorize_algo(
+cdef double[::, ::1] _c_interf_factorize(
     double[::, ::1] mat_flat,
     int* info,
     int solver,
@@ -159,15 +159,24 @@ cdef double[::, ::1] _c_interf_factorize_algo(
             mat_n_cols,
             &mat_factorized[0, 0],
         )
+        return mat_factorized
+
+    # Case 2: PTRRANS-II
+    elif solver == Solvers.PTRRANS_2:
+        info[0] = _c_core_factorize_algo_2(
+            &mat_flat[0, 0],
+            mat_n_cols,
+            &mat_factorized[0, 0],
+        )
+        return mat_factorized
 
     # Case 3: the wrong solver is chosen
     else:
         info[0] = Infos.WRONG_SOLVER
-
-    return mat_factorized
+        return mat_factorized
 
 
-cdef double[::, ::1] _c_interf_factorize_solve_algo(
+cdef double[::, ::1] _c_interf_factorize_solve(
     double[::, ::1] mat_factorized,
     double[::, ::1] rhs,
     int workers,
@@ -211,11 +220,30 @@ cdef double[::, ::1] _c_interf_factorize_solve_algo(
                 &result[0, iter_col],
             )
 
+        return result
+
+    # Case 2: PTRRANS-II
+    elif solver == Solvers.PTRRANS_2:
+        for iter_col in prange(
+            rhs_n_cols,
+            nogil=True,
+            num_threads=workers,
+        ):
+            info[0] = _c_core_factorize_solve_algo_2(
+                mat_n_cols,
+                &mat_factorized[0, 0],
+                &rhs[0, iter_col],
+                rhs_n_cols,
+                &result[0, iter_col],
+            )
+
+        return result
+
     # Case 3: the wrong solver is chosen
     else:
         info[0] = Infos.WRONG_SOLVER
+        return result
 
-    return result
 
 cdef int _c_core_factorize_algo_1(
     double* mat_flat,
@@ -260,7 +288,7 @@ cdef int _c_core_factorize_algo_1(
     cdef double al_i, al_i_minus_1, al_i_plus_1  # alpha
     cdef double be_i, be_i_minus_1, be_i_plus_1  # beta
 
-    # === Factorization ===
+    # --- Factorization ---
 
     # NOTE: in the following mu is manually checked for zero-division to extract the
     #       proper value of ``info`` and exit early in case of failure;
@@ -372,7 +400,7 @@ cdef int _c_core_factorize_solve_algo_1(
     cdef int64_t iter_row, fact_curr_base_idx, res_curr_base_idx
     cdef double ze_i, ze_i_minus_1, ze_i_plus_1  # zeta
 
-    # === Transformation ===
+    # --- Transformation ---
 
     # first, the right-hand side is transformed into the vector ``zeta``
     # First row
@@ -419,7 +447,7 @@ cdef int _c_core_factorize_solve_algo_1(
     ) / mat_factorized[fact_curr_base_idx + 6]
     result_view[res_curr_base_idx + rhs_n_cols] = ze_i_plus_1
 
-    # === Backward substitution ===
+    # --- Backward substitution ---
 
     # The solution vector is calculated by backward substitution that overwrites the
     # right-hand side vector with the solution vector
@@ -459,43 +487,38 @@ cdef double[::, ::1] c_penta_solver2(
 
     """
 
-    # --- Variable declarations ---
-
-    cdef int64_t mat_n_cols = mat_flat.shape[1]
-    cdef int64_t rhs_n_cols = rhs.shape[1]
-    cdef int64_t iter_col
+    # --- Initial checks ---
 
-    cdef double[::, ::1] result = np.empty(shape=(mat_n_cols, rhs_n_cols))
-    cdef double[::, ::1] mat_factorized = np.empty(shape=(mat_n_cols, 5))
+    # if the number of columns in the flattened matrix is not equal to the number of
+    # rows in the right-hand side, the function exits early to avoid memory errors
+    if mat_flat.shape[1] != rhs.shape[0]:
+        info[0] = Infos.SHAPE_MISMATCH
+        return np.empty_like(rhs)
 
-    # === Solving the system of equations ===
+    # --- Solving the system of equations ---
 
     # first, the matrix is factorized
-    info[0] = c_penta_factorize_algo_2(
-        &mat_flat[0, 0],
-        mat_n_cols,
-        &mat_factorized[0, 0],
+    cdef double[::, ::1] mat_factorized = _c_interf_factorize(
+        mat_flat,
+        info,
+        Solvers.PTRRANS_2,
     )
-    if info[0] > 0:
-        return result
+
+    # in case of an error during factorization, the function exits early
+    if info[0] != Infos.SUCCESS:
+        return np.empty_like(rhs)
 
     # then, all the right-hand sides are solved
-    for iter_col in prange(
-        rhs_n_cols,
-        nogil=True,
-        num_threads=workers,
-    ):
-        c_solve_penta_from_factorize_algo_2(
-            mat_n_cols,
-            &mat_factorized[0, 0],
-            &rhs[0, iter_col],
-            rhs_n_cols,
-            &result[0, iter_col],
-        )
+    return _c_interf_factorize_solve(
+        mat_factorized,
+        rhs,
+        workers,
+        info,
+        Solvers.PTRRANS_2,
+    )
 
-    return result
 
-cdef int c_penta_factorize_algo_2(
+cdef int _c_core_factorize_algo_2(
     double* mat_flat,
     int64_t mat_n_cols,
     double* mat_factorized,
@@ -539,7 +562,7 @@ cdef int c_penta_factorize_algo_2(
     cdef double si_i, si_i_minus_1, si_i_plus_1  # sigma
     cdef double phi_i, phi_i_minus_1, phi_i_plus_1  # phi
 
-    # === Factorization ===
+    # --- Factorization ---
 
     # NOTE: in the following ps is manually checked for zero-division to extract the
     #       proper value of ``info`` and exit early in case of failure;
@@ -632,7 +655,7 @@ cdef int c_penta_factorize_algo_2(
     return 0
 
 
-cdef int c_solve_penta_from_factorize_algo_2(
+cdef int _c_core_factorize_solve_algo_2(
     int64_t mat_n_cols,
     double* mat_factorized,
     double* rhs_single,
@@ -653,7 +676,7 @@ cdef int c_solve_penta_from_factorize_algo_2(
     cdef int64_t iter_row, fact_curr_base_idx, res_curr_base_idx
     cdef double om_i, om_i_minus_1, om_i_plus_1  # omega
 
-    # === Transformation ===
+    # --- Transformation ---
 
     # first, the right-hand side is transformed into the vector ``omega``
     # First row
@@ -708,7 +731,7 @@ cdef int c_solve_penta_from_factorize_algo_2(
     ) / mat_factorized[2]
     result_view[0] = om_i_minus_1
 
-    # === Forward substitution ===
+    # --- Forward substitution ---
 
     # The solution vector is calculated by forward substitution that overwrites the
     # right-hand side vector with the solution vector
diff --git a/tests/templates.py b/tests/templates.py
index 0c35226..2be97e1 100644
--- a/tests/templates.py
+++ b/tests/templates.py
@@ -5,18 +5,18 @@
 
 # === Imports ===
 
-from typing import Literal
+from typing import Dict, Literal
 
 import numpy as np
+import pentapy as pp
 import pytest
 import util_funcs as uf
 
-import pentapy as pp
-
 # === Constants ===
 
 SEED = 19_031_977
-REF_WARNING_CONTENT = "singular matrix at row index"
+SINGULAR_WARNING_REF_CONTENT = "singular matrix at row index"
+SHAPE_MISMATCH_ERROR_REF_CONTENT = "shape mismatch between the number of equations"
 N_ROWS = [
     3,  # important edge case
     4,  # important edge case
@@ -43,10 +43,69 @@
     "workers": [1],
 }
 
+# === Auxiliary functions ===
+
+
+def convert_matrix_to_layout(
+    mat: np.ndarray,
+    input_layout: Literal["full", "banded_row_wise", "banded_col_wise"],
+) -> tuple[np.ndarray, Dict[str, bool]]:
+    """
+    Converts a dense pentadiagonal matrix to the desired layout.
+
+    """
+
+    if input_layout == "full":
+        return (
+            mat,
+            dict(is_flat=False),
+        )
+
+    elif input_layout == "banded_row_wise":
+        return (
+            pp.create_banded(mat, col_wise=False),
+            dict(
+                is_flat=True,
+                index_row_wise=True,
+            ),
+        )
+
+    elif input_layout == "banded_col_wise":
+        return (
+            pp.create_banded(mat, col_wise=True),
+            dict(
+                is_flat=True,
+                index_row_wise=False,
+            ),
+        )
+
+    else:
+        raise ValueError(f"Invalid input layout: {input_layout}")
+
+
+def convert_matrix_to_order(
+    mat: np.ndarray,
+    from_order: Literal["C", "F"],
+) -> np.ndarray:
+    """
+    Converts a dense pentadiagonal matrix to the desired order.
+
+    """
+
+    if from_order == "C":
+        return np.ascontiguousarray(mat)
+
+    elif from_order == "F":
+        return np.asfortranarray(mat)
+
+    else:
+        raise ValueError(f"Invalid from order: {from_order=}")
+
+
 # === Templates ===
 
 
-def pentapy_solvers_template(
+def pentapy_solvers_extended_template(
     n_rows: int,
     n_rhs: int,
     input_layout: Literal["full", "banded_row_wise", "banded_col_wise"],
@@ -65,9 +124,9 @@ def pentapy_solvers_template(
     workers: int,
 ) -> None:
     """
-    Tests the pentadiagonal solver based on Algorithm PTRANS-I when starting from
-    different input layouts, number of right-hand sides, number of rows, and also
-    when inducing an error by making the first diagonal element zero.
+    Tests the pentadiagonal solvers when starting from different input layouts, number
+    of right-hand sides, number of rows, and also when inducing an error by making the
+    first or last diagonal element exactly zero.
     It has to be ensured that the edge case of ``n_rows = 3`` is also covered.
 
     """
@@ -104,42 +163,17 @@ def pentapy_solvers_template(
         result_shape = (n_rows,)
 
     # the matrix is converted to the desired layout
-    if input_layout == "full":
-        mat = mat_full
-        kwargs = dict(is_flat=False)
-
-    elif input_layout == "banded_row_wise":
-        mat = pp.create_banded(mat_full, col_wise=False)
-        kwargs = dict(
-            is_flat=True,
-            index_row_wise=True,
-        )
-
-    elif input_layout == "banded_col_wise":
-        mat = pp.create_banded(mat_full, col_wise=True)
-        kwargs = dict(
-            is_flat=True,
-            index_row_wise=False,
-        )
-
-    else:
-        raise ValueError(f"Invalid input layout: {input_layout}")
+    mat, kwargs = convert_matrix_to_layout(mat_full, input_layout)
 
-    # the matrix is converted to the desired order
-    if from_order == "C":
-        mat = np.ascontiguousarray(mat)
-        rhs = np.ascontiguousarray(rhs)
-    elif from_order == "F":
-        mat = np.asfortranarray(mat)
-        rhs = np.asfortranarray(rhs)
-    else:
-        raise ValueError(f"Invalid from order: {from_order=}")
+    # the left-hand side matrix and right-hand side is converted to the desired order
+    mat = convert_matrix_to_order(mat=mat, from_order=from_order)
+    rhs = convert_matrix_to_order(mat=rhs, from_order=from_order)
 
     # the solution is computed
     # Case 1: in case of an error, a warning has to be issued and the result has to
     # be NaN
     if induce_error:
-        with pytest.warns(UserWarning, match=REF_WARNING_CONTENT):
+        with pytest.warns(UserWarning, match=SINGULAR_WARNING_REF_CONTENT):
             mat_ref_copy = mat.copy()
             sol = pp.solve(
                 mat=mat,
@@ -148,9 +182,10 @@ def pentapy_solvers_template(
                 workers=workers,
                 **kwargs,
             )
-            assert sol.shape == result_shape
-            assert np.isnan(sol).all()
-            assert np.array_equal(mat, mat_ref_copy)
+
+        assert sol.shape == result_shape
+        assert np.isnan(sol).all()
+        assert np.array_equal(mat, mat_ref_copy)
 
         return
 
@@ -174,3 +209,64 @@ def pentapy_solvers_template(
 
     # the solutions are compared
     assert np.allclose(sol, sol_ref)
+
+    return
+
+
+def pentapy_solvers_shape_mismatch_template(
+    n_rows: int,
+    n_rhs: int,
+    input_layout: Literal["full", "banded_row_wise", "banded_col_wise"],
+    solver_alias: Literal[
+        1,
+        "1",
+        "PTRANS-I",
+        "pTrAnS-I",
+        2,
+        "2",
+        "PTRANS-II",
+        "pTrAnS-Ii",
+    ],
+    from_order: Literal["C", "F"],
+    workers: int,
+) -> None:
+    """
+    Tests the pentadiagonal solvers when the shape of the right-hand side is incorrect,
+    starting from different input layouts, number of right-hand sides, and number of
+    rows.
+
+    """
+
+    # first, a random pentadiagonal matrix is generated
+    mat_full = uf.gen_conditioned_rand_penta_matrix_dense(
+        n_rows=n_rows,
+        seed=SEED,
+        ill_conditioned=False,
+    )
+
+    # the right-hand side is generated with a wrong shape (rows + 10)
+    np.random.seed(SEED)
+    if n_rhs is not None:
+        rhs = np.random.rand(n_rows + 10, n_rhs)
+    else:
+        rhs = np.random.rand(n_rows + 10)
+
+    # the matrix is converted to the desired layout
+    mat, kwargs = convert_matrix_to_layout(mat_full, input_layout)
+
+    # the left-hand side matrix and right-hand side is converted to the desired order
+    mat = convert_matrix_to_order(mat=mat, from_order=from_order)
+    rhs = convert_matrix_to_order(mat=rhs, from_order=from_order)
+
+    # the solution is computed, but due to the wrong shape of the right-hand side, an
+    # error has to be raised
+    with pytest.raises(ValueError, match=SHAPE_MISMATCH_ERROR_REF_CONTENT):
+        pp.solve(
+            mat=mat,
+            rhs=rhs,
+            solver=solver_alias,  # type: ignore
+            workers=workers,
+            **kwargs,
+        )
+
+    return
diff --git a/tests/test_solvers_internal_parallel.py b/tests/test_solvers_internal_parallel.py
index d091d8d..3775ffa 100644
--- a/tests/test_solvers_internal_parallel.py
+++ b/tests/test_solvers_internal_parallel.py
@@ -22,6 +22,8 @@
 param_dict["from_order"] = ["C"]
 param_dict["workers"] = [-1]
 
+# --- Extended solve test ---
+
 
 def test_pentapy_solvers_parallel(
     n_rows: int,
@@ -42,7 +44,7 @@ def test_pentapy_solvers_parallel(
     workers: int,
 ) -> None:
 
-    templates.pentapy_solvers_template(
+    templates.pentapy_solvers_extended_template(
         n_rows=n_rows,
         n_rhs=n_rhs,
         input_layout=input_layout,
@@ -59,6 +61,9 @@ def test_pentapy_solvers_parallel(
     )
 
 
+# --- Different workers test ---
+
+
 @pytest.mark.parametrize(
     "workers, expected", [(0, None), (1, None), (-1, None), (-2, ValueError)]
 )
@@ -83,9 +88,50 @@ def test_pentapy_solvers_parallel_different_workers(
     # Case 1: the test should fail
     if expected is not None:
         with pytest.raises(expected):
-            templates.pentapy_solvers_template(**kwargs)  # type: ignore
+            templates.pentapy_solvers_extended_template(**kwargs)  # type: ignore
 
         return
 
     # Case 2: the test should pass
-    templates.pentapy_solvers_template(**kwargs)  # type: ignore
+    templates.pentapy_solvers_extended_template(**kwargs)  # type: ignore
+
+
+# --- Shape mismatch test ---
+
+
+def test_pentapy_solvers_shape_mismatch_parallel(
+    n_rows: int,
+    n_rhs: int,
+    input_layout: Literal["full", "banded_row_wise", "banded_col_wise"],
+    solver_alias: Literal[
+        1,
+        "1",
+        "PTRANS-I",
+        "pTrAnS-I",
+        2,
+        "2",
+        "PTRANS-II",
+        "pTrAnS-Ii",
+    ],
+    from_order: Literal["C", "F"],
+    workers: int,
+) -> None:
+
+    templates.pentapy_solvers_shape_mismatch_template(
+        n_rows=n_rows,
+        n_rhs=n_rhs,
+        input_layout=input_layout,
+        solver_alias=solver_alias,
+        from_order=from_order,
+        workers=workers,
+    )
+
+
+params_dict_without_induce_error = deepcopy(templates.PARAM_DICT)
+params_dict_without_induce_error["workers"] = [-1]
+params_dict_without_induce_error.pop("induce_error")
+
+for key, value in params_dict_without_induce_error.items():
+    test_pentapy_solvers_shape_mismatch_parallel = pytest.mark.parametrize(key, value)(
+        test_pentapy_solvers_shape_mismatch_parallel
+    )
diff --git a/tests/test_solvers_internal_serial.py b/tests/test_solvers_internal_serial.py
index 6bed962..f1248f8 100644
--- a/tests/test_solvers_internal_serial.py
+++ b/tests/test_solvers_internal_serial.py
@@ -8,6 +8,7 @@
 
 # === Imports ===
 
+from copy import deepcopy
 from typing import Literal
 
 import pytest
@@ -19,7 +20,10 @@
 # based on either Algorithm PTRANS-I or PTRANS-II in serial mode
 
 
-def test_pentapy_solvers_serial(
+# --- Extended solve test ---
+
+
+def test_pentapy_solvers_extended_serial(
     n_rows: int,
     n_rhs: int,
     input_layout: Literal["full", "banded_row_wise", "banded_col_wise"],
@@ -38,7 +42,7 @@ def test_pentapy_solvers_serial(
     workers: int,
 ) -> None:
 
-    templates.pentapy_solvers_template(
+    templates.pentapy_solvers_extended_template(
         n_rows=n_rows,
         n_rhs=n_rhs,
         input_layout=input_layout,
@@ -50,6 +54,47 @@ def test_pentapy_solvers_serial(
 
 
 for key, value in templates.PARAM_DICT.items():
-    test_pentapy_solvers_serial = pytest.mark.parametrize(key, value)(
-        test_pentapy_solvers_serial
+    test_pentapy_solvers_extended_serial = pytest.mark.parametrize(key, value)(
+        test_pentapy_solvers_extended_serial
+    )
+
+
+# --- Shape mismatch test ---
+
+
+def test_pentapy_solvers_shape_mismatch_serial(
+    n_rows: int,
+    n_rhs: int,
+    input_layout: Literal["full", "banded_row_wise", "banded_col_wise"],
+    solver_alias: Literal[
+        1,
+        "1",
+        "PTRANS-I",
+        "pTrAnS-I",
+        2,
+        "2",
+        "PTRANS-II",
+        "pTrAnS-Ii",
+    ],
+    from_order: Literal["C", "F"],
+    workers: int,
+) -> None:
+
+    templates.pentapy_solvers_shape_mismatch_template(
+        n_rows=n_rows,
+        n_rhs=n_rhs,
+        input_layout=input_layout,
+        solver_alias=solver_alias,
+        from_order=from_order,
+        workers=workers,
+    )
+
+
+params_dict_without_induce_error = deepcopy(templates.PARAM_DICT)
+params_dict_without_induce_error.pop("induce_error")
+
+
+for key, value in params_dict_without_induce_error.items():
+    test_pentapy_solvers_shape_mismatch_serial = pytest.mark.parametrize(key, value)(
+        test_pentapy_solvers_shape_mismatch_serial
     )

From 549422d13fbb749a98e70abc5c307dbf8191d25c Mon Sep 17 00:00:00 2001
From: MothNik <nik.zoe@web.de>
Date: Tue, 11 Jun 2024 23:14:17 +0200
Subject: [PATCH 55/62] refactor/tests: [11] split up cluttered ptrans-solving
 into dedicated auxiliary functions in `core`; added NumPy-solver error case
 test

---
 src/pentapy/core.py | 265 +++++++++++++++++++++++++++-----------------
 tests/templates.py  |  11 +-
 2 files changed, 171 insertions(+), 105 deletions(-)

diff --git a/src/pentapy/core.py b/src/pentapy/core.py
index da9fcbe..df1d152 100644
--- a/src/pentapy/core.py
+++ b/src/pentapy/core.py
@@ -57,7 +57,165 @@ def _get_num_workers(workers: int) -> int:
     return workers
 
 
-# === Solver ===
+def _raise_ptrans_or_numpy_shape_mismatch_error(
+    mat_n_cols: int,
+    rhs_n_rows: int,
+) -> None:
+    """
+    Raises a shape mismatch error for the PTRANS solver or the NumPy dense solver.
+
+    """
+
+    raise ValueError(
+        perrors.PentaPyErrorMessages.SHAPE_MISMATCH.format(
+            lhs_n_cols=mat_n_cols,
+            rhs_n_rows=rhs_n_rows,
+        )
+    )
+
+
+def _handle_ptrans_info_complete_fail_cases(
+    info: int,
+    mat_n_cols: int,
+    rhs_n_rows: int,
+) -> None:
+    """
+    Handles the cases where the PTRANS solver by raising the appropriate error.
+
+    """
+
+    # Case 1: shape mismatch
+    if info == pmodels.Infos.SHAPE_MISMATCH:
+        _raise_ptrans_or_numpy_shape_mismatch_error(
+            mat_n_cols=mat_n_cols,
+            rhs_n_rows=rhs_n_rows,
+        )
+
+    # Case 2: wrong solver
+    elif info == pmodels.Infos.WRONG_SOLVER:  # pragma: no cover
+        raise AssertionError(perrors.PentaPyErrorMessages.WRONG_SOLVER)
+
+    # Case 3: unknown error
+    # pragma: no cover
+    raise AssertionError(perrors.PentaPyErrorMessages.UNKNOWN_ERROR)
+
+
+# === Auxiliary Solver Interfaces ===
+
+
+def _solve_with_numpy(
+    mat_flat: np.ndarray,
+    rhs: np.ndarray,
+) -> np.ndarray:
+    """
+    Solver for a pentadiagonal system using NumPy's dense LAPACK solver.
+
+    """
+
+    # in case of a shape mismatch, an error will be raised
+    if not mat_flat.shape[1] == rhs.shape[0]:
+        _raise_ptrans_or_numpy_shape_mismatch_error(
+            mat_n_cols=mat_flat.shape[1],
+            rhs_n_rows=rhs.shape[0],
+        )
+
+    # then, the system is solved using NumPy's dense solver
+    try:
+        return np.linalg.solve(
+            a=ptools.create_full(mat_flat, col_wise=False),
+            b=rhs,
+        )
+
+    # in case of a singular matrix, a warning will be issued and NaNs will be returned
+    except np.linalg.LinAlgError:
+        warnings.warn("pentapy: NumPy LAPACK dense solver encountered singular matrix.")
+        return np.full(shape=rhs.shape, fill_value=np.nan)
+
+
+def _solve_with_ptrans(
+    mat: np.ndarray,
+    rhs: np.ndarray,
+    is_flat: bool,
+    index_row_wise: bool,
+    workers: int,
+    solver_inter: pmodels.PentaSolverAliases,
+) -> np.ndarray:  # type: ignore
+    """
+    Solver for a pentadiagonal system using one of the PTRANS algorithms.
+
+    """
+
+    # the matrix is checked and shifted if necessary ...
+    if is_flat and index_row_wise:
+        mat_flat = np.asarray(mat, dtype=np.double)
+        ptools._check_penta(mat_flat)
+    elif is_flat:
+        mat_flat = np.array(mat, dtype=np.double)  # NOTE: this is a copy
+        ptools._check_penta(mat_flat)
+        ptools.shift_banded(mat_flat, copy=False)
+    else:
+        mat_flat = ptools.create_banded(mat, col_wise=False, dtype=np.double)
+
+    # ... followed by the conversion of the right-hand side
+    rhs = np.asarray(rhs, dtype=np.double)
+
+    # Special case: Early exit when the matrix has only 3 rows/columns
+    # NOTE: this avoids memory leakage in the Cython-solver that will iterate over
+    #       at least 4 rows/columns no matter what
+    if mat_flat.shape[1] == 3:
+        return _solve_with_numpy(mat_flat=mat_flat, rhs=rhs)
+
+    # now, the number of workers for multithreading has to be determined if necessary
+    workers = _get_num_workers(workers)
+
+    # if there is only a single right-hand side, it has to be reshaped to a 2D array
+    # NOTE: this has to be reverted at the end
+    single_rhs = rhs.ndim == 1
+    rhs_og_shape = rhs.shape
+    if single_rhs:
+        rhs = rhs[:, np.newaxis]
+
+    # the respective solver is chosen ...
+    solver_func = (
+        psolver.penta_solver1
+        if solver_inter == pmodels.PentaSolverAliases.PTRANS_I
+        else psolver.penta_solver2
+    )
+
+    # ... and the solver is called
+    sol, info = solver_func(
+        np.ascontiguousarray(mat_flat),
+        np.ascontiguousarray(rhs),
+        workers,
+    )
+
+    # in case of success, the solution can be returned (reshaped if necessary)
+    if info == pmodels.Infos.SUCCESS:
+        if single_rhs:
+            sol = sol.ravel()
+
+        return sol
+
+    # in case of a singular matrix, a warning will be issued and NaNs will be returned
+    elif info > pmodels.Infos.SUCCESS:
+        warnings.warn(
+            perrors.PentaPyErrorMessages.SINGULAR_MATRIX.format(
+                solver_inter_name=pmodels.PentaSolverAliases.PTRANS_I.name,
+                row_idx=info - 1,
+            )
+        )
+
+        return np.full(shape=rhs_og_shape, fill_value=np.nan)
+
+    # in case of an error, the respective error will be raised
+    _handle_ptrans_info_complete_fail_cases(
+        info=info,
+        mat_n_cols=mat_flat.shape[1],
+        rhs_n_rows=rhs_og_shape[0],
+    )
+
+
+# === Main Solver Interface ===
 
 
 def solve(
@@ -165,105 +323,14 @@ def solve(
         pmodels.PentaSolverAliases.PTRANS_I,
         pmodels.PentaSolverAliases.PTRANS_II,
     }:
-        # the matrix is checked and shifted if necessary ...
-        if is_flat and index_row_wise:
-            mat_flat = np.asarray(mat, dtype=np.double)
-            ptools._check_penta(mat_flat)
-        elif is_flat:
-            mat_flat = np.array(mat, dtype=np.double)  # NOTE: this is a copy
-            ptools._check_penta(mat_flat)
-            ptools.shift_banded(mat_flat, copy=False)
-        else:
-            mat_flat = ptools.create_banded(mat, col_wise=False, dtype=np.double)
-
-        # ... followed by the conversion of the right-hand side
-        rhs = np.asarray(rhs, dtype=np.double)
-
-        # Special case: Early exit when the matrix has only 3 rows/columns
-        # NOTE: this avoids memory leakage in the Cython-solver that will iterate over
-        #       at least 4 rows/columns no matter what
-        if mat_flat.shape[1] == 3:
-            if not mat_flat.shape[1] == rhs.shape[0]:
-                raise ValueError(
-                    perrors.PentaPyErrorMessages.SHAPE_MISMATCH.format(
-                        lhs_n_cols=mat_flat.shape[1],
-                        rhs_n_rows=rhs.shape[0],
-                    )
-                )
-
-            try:
-                return np.linalg.solve(
-                    a=ptools.create_full(mat_flat, col_wise=False),
-                    b=rhs,
-                )
-            except np.linalg.LinAlgError:
-                warnings.warn(
-                    "pentapy: NumPy LAPACK dense solver encountered singular matrix."
-                )
-                return np.full(shape=rhs.shape, fill_value=np.nan)
-
-        # now, the number of workers for multithreading has to be determined if
-        # necessary
-        workers = _get_num_workers(workers)
-
-        # if there is only a single right-hand side, it has to be reshaped to a 2D array
-        # NOTE: this has to be reverted at the end
-        single_rhs = rhs.ndim == 1
-        rhs_og_shape = rhs.shape
-        if single_rhs:
-            rhs = rhs[:, np.newaxis]
-
-        # the respective solver is chosen ...
-        solver_func = (
-            psolver.penta_solver1
-            if solver_inter == pmodels.PentaSolverAliases.PTRANS_I
-            else psolver.penta_solver2
-        )
-
-        # ... and the solver is called
-        sol, info = solver_func(
-            np.ascontiguousarray(mat_flat),
-            np.ascontiguousarray(rhs),
-            workers,
-        )
-
-        print(f"{info=}")
-
-        # in case of success, the solution can be returned (reshaped if necessary)
-        if info == pmodels.Infos.SUCCESS:
-            if single_rhs:
-                sol = sol.ravel()
-
-            return sol
-
-        # in case of a shape mismatch, an error will be raised
-        if info == pmodels.Infos.SHAPE_MISMATCH:
-            raise ValueError(
-                perrors.PentaPyErrorMessages.SHAPE_MISMATCH.format(
-                    lhs_n_cols=mat_flat.shape[1],
-                    rhs_n_rows=rhs_og_shape[0],
-                )
-            )
-
-        # in case of a zero-division, the solver will return NaNs and issue a warning
-        elif info > pmodels.Infos.SUCCESS:
-            warnings.warn(
-                perrors.PentaPyErrorMessages.SINGULAR_MATRIX.format(
-                    solver_inter_name=solver_inter.name,
-                    row_idx=info - 1,
-                )
-            )
-
-            return np.full(shape=rhs_og_shape, fill_value=np.nan)
-
-        # in case of an internal error in determination of the solver, an error will be
-        # raised
-        elif info == pmodels.Infos.WRONG_SOLVER:  # pragma: no cover
-            raise AssertionError(perrors.PentaPyErrorMessages.WRONG_SOLVER)
 
-        # in case of an unknown error, an error will be raised
-        raise AssertionError(  # pragma: no cover
-            perrors.PentaPyErrorMessages.UNKNOWN_ERROR
+        return _solve_with_ptrans(
+            mat=mat,
+            rhs=rhs,
+            is_flat=is_flat,
+            index_row_wise=index_row_wise,
+            workers=workers,
+            solver_inter=solver_inter,
         )
 
     # Case 2: LAPACK's banded solver
diff --git a/tests/templates.py b/tests/templates.py
index 2be97e1..6d195a1 100644
--- a/tests/templates.py
+++ b/tests/templates.py
@@ -15,7 +15,7 @@
 # === Constants ===
 
 SEED = 19_031_977
-SINGULAR_WARNING_REF_CONTENT = "singular matrix at row index"
+SINGULAR_WARNING_REF_CONTENT = "solver encountered singular matrix"
 SHAPE_MISMATCH_ERROR_REF_CONTENT = "shape mismatch between the number of equations"
 N_ROWS = [
     3,  # important edge case
@@ -129,6 +129,8 @@ def pentapy_solvers_extended_template(
     first or last diagonal element exactly zero.
     It has to be ensured that the edge case of ``n_rows = 3`` is also covered.
 
+    For ``n_rows = 3``, the error is induced by initialising a matrix of zeros.
+
     """
 
     # first, a random pentadiagonal matrix is generated
@@ -143,12 +145,9 @@ def pentapy_solvers_extended_template(
         # the induction of the error is only possible if the matrix does not have
         # only 3 rows
         if n_rows == 3:
-            pytest.skip(
-                "Only 3 rows, cannot induce error because this will not go into "
-                "PTRANS-I, but NumPy."
-            )
+            mat_full = np.zeros_like(mat_full)
 
-        if solver_alias in SOLVER_ALIASES_PTRANS_I:
+        elif solver_alias in SOLVER_ALIASES_PTRANS_I:
             mat_full[0, 0] = 0.0
         else:
             mat_full[n_rows - 1, n_rows - 1] = 0.0

From a31fe718794a53ead17357670f7186adb917ce48 Mon Sep 17 00:00:00 2001
From: MothNik <nik.zoe@web.de>
Date: Tue, 11 Jun 2024 23:29:21 +0200
Subject: [PATCH 56/62] fix: [11] fixed broken coverage of unknown error (was
 not skipped by pragma)

---
 src/pentapy/core.py | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/src/pentapy/core.py b/src/pentapy/core.py
index df1d152..8607972 100644
--- a/src/pentapy/core.py
+++ b/src/pentapy/core.py
@@ -96,8 +96,10 @@ def _handle_ptrans_info_complete_fail_cases(
         raise AssertionError(perrors.PentaPyErrorMessages.WRONG_SOLVER)
 
     # Case 3: unknown error
-    # pragma: no cover
-    raise AssertionError(perrors.PentaPyErrorMessages.UNKNOWN_ERROR)
+
+    raise AssertionError(  # pragma: no cover
+        perrors.PentaPyErrorMessages.UNKNOWN_ERROR,
+    )
 
 
 # === Auxiliary Solver Interfaces ===

From ac5759b48e5c26297c8f2e69e4163232f341ca23 Mon Sep 17 00:00:00 2001
From: MothNik <nik.zoe@web.de>
Date: Wed, 12 Jun 2024 07:39:03 +0200
Subject: [PATCH 57/62] refactor: [11] made mu occupy the central column of the
 factorized matrix Reason: it is the main diagonal of the matrix L for A = LU
 just like ps  is the main diagonal of U for PTRANS-II. So now, the main
 diagonals of the non unit triangular factors are always in the central column
 which makes the most sense.

---
 src/pentapy/solver.pyx | 50 +++++++++++++++++++++---------------------
 1 file changed, 25 insertions(+), 25 deletions(-)

diff --git a/src/pentapy/solver.pyx b/src/pentapy/solver.pyx
index 32fdf77..58cd00f 100644
--- a/src/pentapy/solver.pyx
+++ b/src/pentapy/solver.pyx
@@ -262,14 +262,14 @@ cdef int _c_core_factorize_algo_1(
     They are overwriting the memoryview ``mat_factorized`` as follows:
 
     ```bash
-    [[   *          mu_0         *          al_0        be_0      ]
-     [   *          mu_1        ga_1        al_1        be_1      ]
-     [  e_2         mu_2        ga_2        al_2        be_2      ]
+    [[   *           *          mu_0        al_0        be_0      ]
+     [   *          ga_1        mu_1        al_1        be_1      ]
+     [  e_2         ga_2        mu_2        al_2        be_2      ]
                                 ...
-     [  e_i         mu_i        ga_i        al_i        be_i  ]
-     [  e_{n-3}     mu_{n-3}    ga_{n-3}    al_{n-3}    be_{n-3}  ]                                ...
-     [  e_{n-2}     mu_{n-2}    ga_{n-2}    al_{n-2}      *       ]
-     [  e_{n-1}     mu_{n-1}    ga_{n-1}      *           *       ]]
+     [  e_i         ga_i        mu_i        al_i        be_i  ]
+     [  e_{n-3}     ga_{n-3}    mu_{n-3}    al_{n-3}    be_{n-3}  ]                                ...
+     [  e_{n-2}     ga_{n-2}    mu_{n-2}    al_{n-2}      *       ]
+     [  e_{n-1}     ga_{n-1}    mu_{n-1}      *           *       ]]
     ```
 
     where the entries marked with ``*`` are not used by design, but overwritten with
@@ -304,8 +304,8 @@ cdef int _c_core_factorize_algo_1(
 
 
     mat_factorized[0] = 0.0
-    mat_factorized[1] = mu_i
-    mat_factorized[2] = 0.0
+    mat_factorized[1] = 0.0
+    mat_factorized[2] = mu_i
     mat_factorized[3] = al_i_minus_1
     mat_factorized[4] = be_i_minus_1
 
@@ -319,8 +319,8 @@ cdef int _c_core_factorize_algo_1(
     be_i = mat_flat[1] / mu_i
 
     mat_factorized[5] = 0.0
-    mat_factorized[6] = mu_i
-    mat_factorized[7] = ga_i
+    mat_factorized[6] = ga_i
+    mat_factorized[7] = mu_i
     mat_factorized[8] = al_i
     mat_factorized[9] = be_i
 
@@ -342,8 +342,8 @@ cdef int _c_core_factorize_algo_1(
         be_i = be_i_plus_1
 
         mat_factorized[fact_curr_base_idx] = e_i
-        mat_factorized[fact_curr_base_idx + 1] = mu_i
-        mat_factorized[fact_curr_base_idx + 2] = ga_i
+        mat_factorized[fact_curr_base_idx + 1] = ga_i
+        mat_factorized[fact_curr_base_idx + 2] = mu_i
         mat_factorized[fact_curr_base_idx + 3] = al_i
         mat_factorized[fact_curr_base_idx + 4] = be_i
 
@@ -359,8 +359,8 @@ cdef int _c_core_factorize_algo_1(
     al_i_plus_1 = (mat_flat[mat_row_base_idx_1 + mat_n_cols - 2] - be_i * ga_i) / mu_i
 
     mat_factorized[fact_curr_base_idx] = e_i
-    mat_factorized[fact_curr_base_idx + 1] = mu_i
-    mat_factorized[fact_curr_base_idx + 2] = ga_i
+    mat_factorized[fact_curr_base_idx + 1] = ga_i
+    mat_factorized[fact_curr_base_idx + 2] = mu_i
     mat_factorized[fact_curr_base_idx + 3] = al_i_plus_1
     mat_factorized[fact_curr_base_idx + 4] = 0.0
 
@@ -372,8 +372,8 @@ cdef int _c_core_factorize_algo_1(
         return mat_n_cols
 
     mat_factorized[fact_curr_base_idx + 5] = e_i
-    mat_factorized[fact_curr_base_idx + 6] = mu_i
-    mat_factorized[fact_curr_base_idx + 7] = ga_i
+    mat_factorized[fact_curr_base_idx + 6] = ga_i
+    mat_factorized[fact_curr_base_idx + 7] = mu_i
     mat_factorized[fact_curr_base_idx + 8] = 0.0
     mat_factorized[fact_curr_base_idx + 9] = 0.0
 
@@ -405,11 +405,11 @@ cdef int _c_core_factorize_solve_algo_1(
     # first, the right-hand side is transformed into the vector ``zeta``
     # First row
 
-    ze_i_minus_1 = rhs_single[0] / mat_factorized[1]
+    ze_i_minus_1 = rhs_single[0] / mat_factorized[2]
     result_view[0] = ze_i_minus_1
 
     # Second row
-    ze_i = (rhs_single[rhs_n_cols] - ze_i_minus_1 * mat_factorized[7]) / mat_factorized[6]
+    ze_i = (rhs_single[rhs_n_cols] - ze_i_minus_1 * mat_factorized[6]) / mat_factorized[7]
     result_view[rhs_n_cols] = ze_i
 
     # Central rows
@@ -420,8 +420,8 @@ cdef int _c_core_factorize_solve_algo_1(
         ze_i_plus_1 = (
             rhs_single[res_curr_base_idx]
             - ze_i_minus_1 * mat_factorized[fact_curr_base_idx]
-            - ze_i * mat_factorized[fact_curr_base_idx + 2]
-        ) / mat_factorized[fact_curr_base_idx + 1]
+            - ze_i * mat_factorized[fact_curr_base_idx + 1]
+        ) / mat_factorized[fact_curr_base_idx + 2]
         ze_i_minus_1 = ze_i
         ze_i = ze_i_plus_1
         result_view[res_curr_base_idx] = ze_i_plus_1
@@ -433,8 +433,8 @@ cdef int _c_core_factorize_solve_algo_1(
     ze_i_plus_1 = (
         rhs_single[res_curr_base_idx]
         - ze_i_minus_1 * mat_factorized[fact_curr_base_idx]
-        - ze_i * mat_factorized[fact_curr_base_idx + 2]
-    ) / mat_factorized[fact_curr_base_idx + 1]
+        - ze_i * mat_factorized[fact_curr_base_idx + 1]
+    ) / mat_factorized[fact_curr_base_idx + 2]
     ze_i_minus_1 = ze_i
     ze_i = ze_i_plus_1
     result_view[res_curr_base_idx] = ze_i_plus_1
@@ -443,8 +443,8 @@ cdef int _c_core_factorize_solve_algo_1(
     ze_i_plus_1 = (
         rhs_single[res_curr_base_idx + rhs_n_cols]
         - ze_i_minus_1 * mat_factorized[fact_curr_base_idx + 5]
-        - ze_i * mat_factorized[fact_curr_base_idx + 7]
-    ) / mat_factorized[fact_curr_base_idx + 6]
+        - ze_i * mat_factorized[fact_curr_base_idx + 6]
+    ) / mat_factorized[fact_curr_base_idx + 7]
     result_view[res_curr_base_idx + rhs_n_cols] = ze_i_plus_1
 
     # --- Backward substitution ---

From 0945501405321a73f003f7406aee44d4afb328d9 Mon Sep 17 00:00:00 2001
From: MothNik <nik.zoe@web.de>
Date: Sun, 21 Jul 2024 23:27:40 +0200
Subject: [PATCH 58/62] fix: - avoided performance pitfall of `except * nogil`
 in Cython implementation of the solvers - replaced this statement by
 `noexcept nogil`

---
 src/pentapy/solver.pyx | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/pentapy/solver.pyx b/src/pentapy/solver.pyx
index 58cd00f..37328bc 100644
--- a/src/pentapy/solver.pyx
+++ b/src/pentapy/solver.pyx
@@ -386,7 +386,7 @@ cdef int _c_core_factorize_solve_algo_1(
     double* rhs_single,
     int64_t rhs_n_cols,
     double* result_view,
-) except * nogil:
+) noexcept nogil:
     """
     Solves the pentadiagonal system of equations ``Ax = b`` with the factorized
     unit upper triangular matrix ``U`` and the right-hand side ``b``.
@@ -661,7 +661,7 @@ cdef int _c_core_factorize_solve_algo_2(
     double* rhs_single,
     int64_t rhs_n_cols,
     double* result_view,
-) except * nogil:
+) noexcept nogil:
 
     """
     Solves the pentadiagonal system of equations ``Ax = b`` with the factorized

From d1433f949775a57ef3cf4a426f32dd2cd99d76a0 Mon Sep 17 00:00:00 2001
From: MothNik <nik.zoe@web.de>
Date: Tue, 23 Jul 2024 19:35:51 +0200
Subject: [PATCH 59/62] refactor: - moved `pytest.ini` to `pyproject.toml` -
 hard-coded dependencies in `pyproject.toml` again and removed the dynamic
 link

---
 pyproject.toml         | 38 ++++++++++++++++++++++++++++++++++----
 pytest.ini             |  2 --
 requirements/all.txt   |  2 --
 requirements/base.txt  |  2 --
 requirements/check.txt |  4 ----
 requirements/doc.txt   |  8 --------
 requirements/scipy.txt |  1 -
 requirements/test.txt  |  4 ----
 requirements/umfpack   |  1 -
 9 files changed, 34 insertions(+), 28 deletions(-)
 delete mode 100644 pytest.ini
 delete mode 100644 requirements/all.txt
 delete mode 100644 requirements/base.txt
 delete mode 100644 requirements/check.txt
 delete mode 100644 requirements/doc.txt
 delete mode 100644 requirements/scipy.txt
 delete mode 100644 requirements/test.txt
 delete mode 100644 requirements/umfpack

diff --git a/pyproject.toml b/pyproject.toml
index b26ae54..b12bf0f 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -14,7 +14,7 @@ name = "pentapy"
 authors = [{name = "Sebastian Müller", email = "info@geostat-framework.org"}]
 readme = "README.md"
 license = {text = "MIT"}
-dynamic = ["version", "dependencies", "optional-dependencies"]
+dynamic = ["version"]
 description = "pentapy: A toolbox for pentadiagonal matrizes."
 classifiers = [
     "Development Status :: 5 - Production/Stable",
@@ -35,10 +35,37 @@ classifiers = [
     "Topic :: Scientific/Engineering",
     "Topic :: Utilities",
 ]
+dependencies = [
+    "numpy>=1.20.0",
+    "psutil>=5.8.0",
+]
 
-[tool.setuptools.dynamic]
-dependencies = {file = ["requirements/base.txt"]}
-optional-dependencies = {scipy = {file = ["requirements/scipy.txt"]}, umfpack = {file = ["requirements/umfpack.txt"]}, all = {file = ["requirements/all.txt"]}, doc = {file = ["requirements/doc.txt"]}, test = {file = ["requirements/test.txt"]}, check = {file = ["requirements/check.txt"]}}
+[project.optional-dependencies]
+scipy = ["scipy"]
+umfpack = ["scikit-umfpack"]
+all = ["scipy", "scikit-umfpack"]
+doc = [
+    "m2r2>=0.2.8",
+    "scipy>=1.1.0",
+    "matplotlib>=3",
+    "perfplot<0.9",
+    "numpydoc>=1.1",
+    "sphinx>=7",
+    "sphinx-gallery>=0.8",
+    "sphinx-rtd-theme>=2",
+]
+test = [
+    "pytest>=8",
+    "pytest-cov>=3",
+    "pytest-xdist>=3",
+    "scipy>=1.1.0",
+]
+check = [
+    "black>=24,<25",
+    "isort[colors]",
+    "pylint",
+    "cython-lint",
+]
 
 [project.urls]
 Homepage = "https://github.com/GeoStat-Framework/pentapy"
@@ -116,6 +143,9 @@ max-line-length = 120
     max-attributes = 25
     max-public-methods = 75
 
+[tool.pytest.ini_options]
+addopts = "--doctest-modules"
+
 [tool.cibuildwheel]
 # Switch to using build
 build-frontend = "build"
diff --git a/pytest.ini b/pytest.ini
deleted file mode 100644
index 2bed0f3..0000000
--- a/pytest.ini
+++ /dev/null
@@ -1,2 +0,0 @@
-[pytest]
-addopts = --doctest-modules
\ No newline at end of file
diff --git a/requirements/all.txt b/requirements/all.txt
deleted file mode 100644
index be8d325..0000000
--- a/requirements/all.txt
+++ /dev/null
@@ -1,2 +0,0 @@
-scikit-umfpack
-scipy
\ No newline at end of file
diff --git a/requirements/base.txt b/requirements/base.txt
deleted file mode 100644
index 0e77631..0000000
--- a/requirements/base.txt
+++ /dev/null
@@ -1,2 +0,0 @@
-numpy>=1.20.0
-psutil>=5.8.0
\ No newline at end of file
diff --git a/requirements/check.txt b/requirements/check.txt
deleted file mode 100644
index 4af46fc..0000000
--- a/requirements/check.txt
+++ /dev/null
@@ -1,4 +0,0 @@
-black>=24,<25
-isort[colors]
-pylint
-cython-lint
\ No newline at end of file
diff --git a/requirements/doc.txt b/requirements/doc.txt
deleted file mode 100644
index c49be85..0000000
--- a/requirements/doc.txt
+++ /dev/null
@@ -1,8 +0,0 @@
-m2r2>=0.2.8
-scipy>=1.1.0
-matplotlib>=3
-perfplot<0.9
-numpydoc>=1.1
-sphinx>=7
-sphinx-gallery>=0.8
-sphinx-rtd-theme>=2
\ No newline at end of file
diff --git a/requirements/scipy.txt b/requirements/scipy.txt
deleted file mode 100644
index 9c61c73..0000000
--- a/requirements/scipy.txt
+++ /dev/null
@@ -1 +0,0 @@
-scipy
\ No newline at end of file
diff --git a/requirements/test.txt b/requirements/test.txt
deleted file mode 100644
index 2f8c0c7..0000000
--- a/requirements/test.txt
+++ /dev/null
@@ -1,4 +0,0 @@
-pytest>=8
-pytest-cov>=3
-pytest-xdist>=3
-scipy>=1.1.0
\ No newline at end of file
diff --git a/requirements/umfpack b/requirements/umfpack
deleted file mode 100644
index a8630c1..0000000
--- a/requirements/umfpack
+++ /dev/null
@@ -1 +0,0 @@
-scikit-umfpack
\ No newline at end of file

From edb4f337defb7437ace62d8c8d15c699a061d758 Mon Sep 17 00:00:00 2001
From: MothNik <nik.zoe@web.de>
Date: Tue, 23 Jul 2024 21:13:16 +0200
Subject: [PATCH 60/62] refactor: - made OpenMP-link in `setup.py` optional
 based on an environment variable - changed thread number evaluation to be
 OpenMP-based (if at all) - dropped `psutil` dependency - renamed `workers` to
 `num_threads` and made `None` a possible option and the default - adapted
 tests to the new `num_threads` - updated documentation - adapted to `pylint`
 comments

---
 CHANGELOG.md                            |  51 ++++-----
 README.md                               |   1 -
 docs/source/index.rst                   |   1 -
 pyproject.toml                          |   2 +-
 setup.py                                |  44 ++++++--
 src/pentapy/core.py                     | 141 +++++++++---------------
 src/pentapy/errors.py                   |   4 -
 src/pentapy/solver.pxd                  |   4 +-
 src/pentapy/solver.pyx                  |  55 ++++++---
 tests/templates.py                      |  12 +-
 tests/test_solvers_internal_parallel.py |  37 +++----
 tests/test_solvers_internal_serial.py   |   8 +-
 12 files changed, 177 insertions(+), 183 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 5891ee6..a4be735 100755
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -8,141 +8,138 @@ See [#27](https://github.com/GeoStat-Framework/pentapy/pull/27)
 
 ### Breaking Changes
 
-- fully parallelized the Cython implementation of PTRANS-I and PTRANS-II for single and multiple right-hand sides support that can now be enabled via the new ``workers`` parameter in ``pentapy.solve`` (default: 1)
-- fully typed the ``pentapy.tools`` module
+- fully parallelized the Cython implementation of PTRANS-I and PTRANS-II for single and multiple right-hand sides support that can now be enabled via the new `num_threads` parameter in `pentapy.solve` (default: 1)
 - updated the **Cython low level interfaces** to PTRANS-I and PTRANS-II to **only accept C-contiguous arrays** (not backwards compatible)
 
-## [1.4.0] - 2024-06
-
-See [#26](https://github.com/GeoStat-Framework/pentapy/pull/26)
-
 ### Enhancements
 
-- added support for multiple right-hand sides (currently serial)
 - improved error handling and added debug information to error messages
 
 ### Changes
 
-- shotgun refactored and documented the Cython implementation of PTRANS-I and PTRANS-II for single and multiple right-hand sides support
-- fully typed the function ``pentapy.solve``
-- made internal solver alias handling of ``pentapy.solve`` smarter, more robust, and removed all duplicate code
+- refactored and documented the Cython implementation of PTRANS-I and PTRANS-II for single and multiple right-hand sides support
+- fully typed the `pentapy.tools` module and the function ``pentapy.solve`
+- made internal solver alias handling of `pentapy.solve` smarter, more robust, and removed all duplicate code
 - gave all solvers a consistent interface
-- made code in ``pentapy.core`` more human-readable and maintainable and added comments
+- made code in ``pentapy.core` `more maintainable
 - fixed typos in documentation
 
 ### Bugfixes
 
 - fixed error handling in case of zero-division to trigger dead error handling branch (see [Issue 23](https://github.com/GeoStat-Framework/pentapy/issues/23))
-- fixed edge case error for row/column of 3 (see [Issue 24](https://github.com/GeoStat-Framework/pentapy/issues/24))
+- fixed edge case error for row/column count of 3 (see [Issue 24](https://github.com/GeoStat-Framework/pentapy/issues/24))
 
 ### Tests
 
-- transitioned from ``unittest``-based testing to fully ``pytest``-based testing with parametrized and parallelized exhaustive testing (see [Issue 25](https://github.com/GeoStat-Framework/pentapy/issues/25))
+- transitioned from `unittest`-based testing to fully `pytest`-based testing with parametrized and parallelized exhaustive testing (see [Issue 25](https://github.com/GeoStat-Framework/pentapy/issues/25))
 - made actual tests more meaningful by comparing them to LAPACK as reference standard (see [Issue 25](https://github.com/GeoStat-Framework/pentapy/issues/25))
-- included external solver bindings accessible via ``pentapy.solve`` as part of the test suite
+- included external solver bindings accessible via `pentapy.solve` as part of the test suite
 - increased true coverage (not line-hit coverage) close to 100%
 
-### Packaging
-
-- made dependency specification file-based and dynamic
-
 ## [1.3.0] - 2024-04
 
 See [#21](https://github.com/GeoStat-Framework/pentapy/pull/21)
 
 ### Enhancements
+
 - added support for python 3.12
 - added support for numpy 2
 - build extensions with numpy 2 and cython 3
 
 ### Changes
+
 - dropped python 3.7 support
 - dropped 32bit builds
 - linted cython files
 - increase maximal line length to 88 (black default)
 
-
 ## [1.2.0] - 2023-04
 
 See [#19](https://github.com/GeoStat-Framework/pentapy/pull/19)
 
 ### Enhancements
+
 - added support for python 3.10 and 3.11
 - add wheels for arm64 systems
 - created `solver.pxd` file to be able to cimport the solver module
 - added a `CITATION.bib` file
 
 ### Changes
+
 - move to `src/` based package structure
 - dropped python 3.6 support
 - move meta-data to pyproject.toml
 - simplified documentation
 
 ### Bugfixes
+
 - determine correct version when installing from archive
 
 ## [1.1.2] - 2021-07
 
 ### Changes
+
 - new package structure with `pyproject.toml` ([#15](https://github.com/GeoStat-Framework/pentapy/pull/15))
 - Sphinx-Gallery for Examples
 - Repository restructuring: use a single `main` branch
 - use `np.asarray` in `solve` to speed up computation ([#17](https://github.com/GeoStat-Framework/pentapy/pull/17))
 
-
 ## [1.1.1] - 2021-02
 
 ### Enhancements
+
 - Python 3.9 support
 
 ### Changes
-- GitHub Actions for CI
 
+- GitHub Actions for CI
 
 ## [1.1.0] - 2020-03-22
 
 ### Enhancements
+
 - Python 3.8 support
 
 ### Changes
+
 - python only builds are no longer available
 - Python 2.7 and 3.4 support dropped
 
-
 ## [1.0.3] - 2019-11-10
 
 ### Enhancements
+
 - the algorithms `PTRANS-I` and `PTRANS-II` now raise a warning when they can not solve the given system
 - there are now switches to install scipy and umf solvers as extra requirements
 
 ### Bugfixes
-- multiple minor bugfixes
 
+- multiple minor bugfixes
 
 ## [1.0.0] - 2019-09-18
 
 ### Enhancements
-- the second algorithm `PTRANS-II` from *Askar et al. 2015* is now implemented and can be used by `solver=2`
+
+- the second algorithm `PTRANS-II` from _Askar et al. 2015_ is now implemented and can be used by `solver=2`
 - the package is now tested and a coverage is calculated
 - there are now pre-built binaries for Python 3.7
 - the documentation is now available under https://geostat-framework.readthedocs.io/projects/pentapy
 
 ### Changes
-- pentapy is now licensed under the MIT license
 
+- pentapy is now licensed under the MIT license
 
 ## [0.1.1] - 2019-03-08
 
 ### Bugfixes
-- MANIFEST.in was missing in the 0.1.0 version
 
+- MANIFEST.in was missing in the 0.1.0 version
 
 ## [0.1.0] - 2019-03-07
 
 This is the first release of pentapy, a python toolbox for solving pentadiagonal linear equation systems.
 The solver is implemented in cython, which makes it really fast.
 
-
 [2.0.0]: https://github.com/GeoStat-Framework/pentapy/compare/v1.4.0...v2.0.0
 [1.4.0]: https://github.com/GeoStat-Framework/pentapy/compare/v1.3.0...v1.4.0
 [1.3.0]: https://github.com/GeoStat-Framework/pentapy/compare/v1.2.0...v1.3.0
diff --git a/README.md b/README.md
index 33e7237..068ab5b 100644
--- a/README.md
+++ b/README.md
@@ -108,7 +108,6 @@ Have a look at the script: [``examples/03_perform_simple.py``](https://github.co
 ## Requirements:
 
 - [NumPy >= 1.20.0](https://www.numpy.org)
-- [psutil >= 5.8.0](https://psutil.readthedocs.io/en/latest/) (for parallelisation)
 
 ### Optional
 
diff --git a/docs/source/index.rst b/docs/source/index.rst
index 83ae30f..93d9a5d 100644
--- a/docs/source/index.rst
+++ b/docs/source/index.rst
@@ -95,7 +95,6 @@ Requirements
 ============
 
 - `Numpy >= 1.20.0 <http://www.numpy.org>`_
-- `psutil >= 5.8.0 <https://psutil.readthedocs.io/en/latest/>`_
 
 Optional
 --------
diff --git a/pyproject.toml b/pyproject.toml
index b12bf0f..57bed29 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -4,6 +4,7 @@ requires = [
     "setuptools_scm>=7",
     "numpy>=2.0.0rc1,<2.3; python_version >= '3.9'",
     "oldest-supported-numpy; python_version < '3.9'",
+    "extension-helpers>=1",
     "Cython>=3.0.10,<3.1.0",
 ]
 build-backend = "setuptools.build_meta"
@@ -37,7 +38,6 @@ classifiers = [
 ]
 dependencies = [
     "numpy>=1.20.0",
-    "psutil>=5.8.0",
 ]
 
 [project.optional-dependencies]
diff --git a/setup.py b/setup.py
index 8d7421d..0c23078 100644
--- a/setup.py
+++ b/setup.py
@@ -1,19 +1,24 @@
 """pentapy: A toolbox for pentadiagonal matrices."""
 
+# === Imports ===
+
 import os
-import sys
 
-import Cython.Compiler.Options
 import numpy as np
 from Cython.Build import cythonize
+from extension_helpers import add_openmp_flags_if_available
 from setuptools import Extension, setup
 
-if sys.platform.startswith("win"):
-    openmp_arg = "/openmp"
-else:
-    openmp_arg = "-fopenmp"
+# === Constants ===
 
-Cython.Compiler.Options.annotate = True
+# the environment variable key for the build of the serial/parallel version
+PENTAPY_BUILD_PARALLEL = "PENTAPY_BUILD_PARALLEL"
+# the compiler flags for the OpenMP parallelization
+OPENMP = "OPENMP"
+# the number of threads for the Cython build
+CYTHON_BUILD_NUM_THREADS = 1
+
+# === Setup ===
 
 # cython extensions
 CY_MODULES = [
@@ -22,13 +27,32 @@
         sources=[os.path.join("src", "pentapy", "solver.pyx")],
         include_dirs=[np.get_include()],
         define_macros=[("NPY_NO_DEPRECATED_API", "NPY_1_7_API_VERSION")],
-        extra_compile_args=[openmp_arg],
-        extra_link_args=[openmp_arg],
     )
 ]
 
+# the OpenMP link is added if available/requested
+# the environment variables can be PENTAPY_BUILD_PARALLEL = 0 (builds serial version) or
+# PENTAPY_BUILD_PARALLEL != 0 (builds parallel version)
+with_open_mp = False
+if int(os.environ.get(PENTAPY_BUILD_PARALLEL, "0")):
+    openmp_added = [add_openmp_flags_if_available(mod) for mod in CY_MODULES]
+    with_open_mp = any(openmp_added)
+    if with_open_mp:
+        open_mp_str = "linking OpenMP (parallel version)"
+    else:
+        open_mp_str = "not linking OpenMP (serial version)"
+
+    print(f"PENTAPY SETUP - {open_mp_str}")
+
+else:
+    print("PENTAPY SETUP - OpenMP not requested (serial version)")
+
 setup(
-    ext_modules=cythonize(CY_MODULES, nthreads=1, annotate=True),
+    ext_modules=cythonize(
+        CY_MODULES,
+        nthreads=CYTHON_BUILD_NUM_THREADS,
+        compile_time_env={OPENMP: with_open_mp},
+    ),
     package_data={"pentapy": ["*.pxd"]},  # include pxd files
     include_package_data=False,  # ignore other files
     zip_safe=False,
diff --git a/src/pentapy/core.py b/src/pentapy/core.py
index 8607972..4126a9f 100644
--- a/src/pentapy/core.py
+++ b/src/pentapy/core.py
@@ -5,56 +5,40 @@
 # === Imports ===
 
 import warnings
-from typing import Literal
+from typing import Literal, Optional
 
 import numpy as np
-import psutil
 
 from pentapy import _models as pmodels
 from pentapy import errors as perrors
 from pentapy import solver as psolver  # type: ignore
 from pentapy import tools as ptools
 
-# === Auxiliary functions ===
-
-
-def _get_num_workers(workers: int) -> int:
-    """
-    Gets the number of available workers for the solver.
+# === Types ===
+
+SolverAliases = Literal[
+    1,
+    "1",
+    "PTRANS-I",
+    "ptrans-i",
+    2,
+    "2",
+    "PTRANS-II",
+    "ptrans-ii",
+    3,
+    "3",
+    "lapack",
+    4,
+    "4",
+    "spsolve",
+    5,
+    "5",
+    "spsolve_umf",
+    "umf",
+    "umf_pack",
+]
 
-    Parameters
-    ----------
-    workers : :class:`int`
-        Number of workers requested.
-
-    Returns
-    -------
-    workers : :class:`int`
-        Number of workers available.
-
-    """
-
-    if workers < -1:
-        raise ValueError(
-            perrors.PentaPyErrorMessages.WRONG_WORKERS.format(workers=workers)
-        )
-
-    if workers == -1:
-        # NOTE: the following will be overwritten by the number of available threads
-        workers = 999_999_999_999_999_999_999_999_999
-
-    # the number of workers is limited between 1 and the number of available threads
-    # NOTE: the following does not count the number of total threads, but the number of
-    #       threads available for the solver
-    proc = psutil.Process()
-    workers = min(
-        workers,
-        len(proc.cpu_affinity()),  # type: ignore
-    )
-    workers = max(workers, 1)
-    del proc
-
-    return workers
+# === Auxiliary functions ===
 
 
 def _raise_ptrans_or_numpy_shape_mismatch_error(
@@ -134,12 +118,12 @@ def _solve_with_numpy(
         return np.full(shape=rhs.shape, fill_value=np.nan)
 
 
-def _solve_with_ptrans(
+def _solve_with_ptrans(  # pylint: disable=R1710
     mat: np.ndarray,
     rhs: np.ndarray,
     is_flat: bool,
     index_row_wise: bool,
-    workers: int,
+    num_threads: Optional[int],
     solver_inter: pmodels.PentaSolverAliases,
 ) -> np.ndarray:  # type: ignore
     """
@@ -150,10 +134,10 @@ def _solve_with_ptrans(
     # the matrix is checked and shifted if necessary ...
     if is_flat and index_row_wise:
         mat_flat = np.asarray(mat, dtype=np.double)
-        ptools._check_penta(mat_flat)
+        ptools._check_penta(mat_flat)  # pylint: disable=W0212
     elif is_flat:
         mat_flat = np.array(mat, dtype=np.double)  # NOTE: this is a copy
-        ptools._check_penta(mat_flat)
+        ptools._check_penta(mat_flat)  # pylint: disable=W0212
         ptools.shift_banded(mat_flat, copy=False)
     else:
         mat_flat = ptools.create_banded(mat, col_wise=False, dtype=np.double)
@@ -167,9 +151,6 @@ def _solve_with_ptrans(
     if mat_flat.shape[1] == 3:
         return _solve_with_numpy(mat_flat=mat_flat, rhs=rhs)
 
-    # now, the number of workers for multithreading has to be determined if necessary
-    workers = _get_num_workers(workers)
-
     # if there is only a single right-hand side, it has to be reshaped to a 2D array
     # NOTE: this has to be reverted at the end
     single_rhs = rhs.ndim == 1
@@ -179,16 +160,16 @@ def _solve_with_ptrans(
 
     # the respective solver is chosen ...
     solver_func = (
-        psolver.penta_solver1
+        psolver.penta_solver1  # pylint: disable=I1101
         if solver_inter == pmodels.PentaSolverAliases.PTRANS_I
-        else psolver.penta_solver2
+        else psolver.penta_solver2  # pylint: disable=I1101
     )
 
     # ... and the solver is called
     sol, info = solver_func(
         np.ascontiguousarray(mat_flat),
         np.ascontiguousarray(rhs),
-        workers,
+        num_threads,
     )
 
     # in case of success, the solution can be returned (reshaped if necessary)
@@ -199,7 +180,7 @@ def _solve_with_ptrans(
         return sol
 
     # in case of a singular matrix, a warning will be issued and NaNs will be returned
-    elif info > pmodels.Infos.SUCCESS:
+    if info > pmodels.Infos.SUCCESS:
         warnings.warn(
             perrors.PentaPyErrorMessages.SINGULAR_MATRIX.format(
                 solver_inter_name=pmodels.PentaSolverAliases.PTRANS_I.name,
@@ -225,28 +206,8 @@ def solve(
     rhs: np.ndarray,
     is_flat: bool = False,
     index_row_wise: bool = True,
-    solver: Literal[
-        1,
-        "1",
-        "PTRANS-I",
-        "ptrans-i",
-        2,
-        "2",
-        "PTRANS-II",
-        "ptrans-ii",
-        3,
-        "3",
-        "lapack",
-        4,
-        "4",
-        "spsolve",
-        5,
-        "5",
-        "spsolve_umf",
-        "umf",
-        "umf_pack",
-    ] = 1,
-    workers: int = 1,
+    solver: SolverAliases = 1,
+    num_threads: Optional[int] = None,
 ) -> np.ndarray:
     """
     Solver for a pentadiagonal system.
@@ -296,11 +257,11 @@ def solve(
             * ``[5, "5", "spsolve_umf", "umf", "umf_pack"]`` : :func:`scipy.sparse.linalg.spsolve(..., use_umfpack=False)`
 
         Strings are not case-sensitive.
-    workers : :class:`int`, optional
-        Number of workers used in the PTRANS-I and PTRANS-II solvers for parallel
+    num_threads : :class:`int` or ``None``, optional
+        Number of num_threads used in the PTRANS-I and PTRANS-II solvers for parallel
         processing of multiple right-hand sides. Parallelisation overhead can be
-        significant for small systems. If set to ``-1``, the number of workers is
-        automatically determined. Default: ``1``
+        significant for small systems. If set to a negative value or ``None``, the
+        number of threads is automatically determined. Default: ``None``
 
     Returns
     -------
@@ -309,16 +270,16 @@ def solve(
 
     Raises
     ------
-    ValueError
-        If the number of workers is incorrect.
     ValueError
         If there is a shape mismatch between the number of equations in the left-hand
         side matrix and the number of right-hand sides.
 
-    """
+    """  # pylint: disable=C0301
 
     # first, the solver is converted to the internal name to avoid confusion
-    solver_inter = pmodels._SOLVER_ALIAS_CONVERSIONS[str(solver).lower()]
+    solver_inter = pmodels._SOLVER_ALIAS_CONVERSIONS[  # pylint: disable=W0212
+        str(solver).lower()
+    ]
 
     # Case 1: the pentapy solvers
     if solver_inter in {
@@ -331,12 +292,12 @@ def solve(
             rhs=rhs,
             is_flat=is_flat,
             index_row_wise=index_row_wise,
-            workers=workers,
+            num_threads=num_threads,
             solver_inter=solver_inter,
         )
 
     # Case 2: LAPACK's banded solver
-    elif solver_inter == pmodels.PentaSolverAliases.LAPACK:
+    if solver_inter == pmodels.PentaSolverAliases.LAPACK:
         try:
             from scipy.linalg import solve_banded
         except ImportError as imp_err:  # pragma: no cover
@@ -345,7 +306,7 @@ def solve(
 
         if is_flat and index_row_wise:
             mat_flat = np.array(mat)  # NOTE: this is a copy
-            ptools._check_penta(mat_flat)
+            ptools._check_penta(mat_flat)  # pylint: disable=W0212
             ptools.shift_banded(mat_flat, col_to_row=False, copy=False)
         elif is_flat:
             mat_flat = np.asarray(mat)
@@ -367,7 +328,7 @@ def solve(
             return np.full(shape=rhs.shape, fill_value=np.nan)
 
     # Case 3: SciPy's sparse solver with or without UMFPACK
-    elif solver_inter in {
+    if solver_inter in {
         pmodels.PentaSolverAliases.SUPER_LU,
         pmodels.PentaSolverAliases.UMFPACK,
     }:
@@ -380,7 +341,7 @@ def solve(
 
         if is_flat and index_row_wise:
             mat_flat = np.array(mat)  # NOTE: this is a copy
-            ptools._check_penta(mat_flat)
+            ptools._check_penta(mat_flat)  # pylint: disable=W0212
             ptools.shift_banded(mat_flat, col_to_row=False, copy=False)
         elif is_flat:
             mat_flat = np.asarray(mat)
@@ -412,6 +373,6 @@ def solve(
 
         return sol
 
-    else:  # pragma: no cover
-        msg = f"pentapy.solve: unknown solver ({solver})"
-        raise ValueError(msg)
+    # Case 4: unknown solver
+    msg = f"pentapy.solve: unknown solver ({solver})"  # pragma: no cover
+    raise ValueError(msg)  # pragma: no cover
diff --git a/src/pentapy/errors.py b/src/pentapy/errors.py
index 0974816..8479cd3 100644
--- a/src/pentapy/errors.py
+++ b/src/pentapy/errors.py
@@ -12,7 +12,6 @@ class PentaPyErrorMessages(str, Enum):
     """
     Defines the possible error messages for the pentapy package, namely
 
-    - ``WRONG_WORKERS``: the number of workers is incorrect
     - ``SINGULAR_MATRIX``: the matrix is singular
     - ``SHAPE_MISMATCH``: the shape of the input arrays is incorrect
     - ``WRONG_SOLVER``: the solver alias is incorrect on C-level (internal error,
@@ -21,9 +20,6 @@ class PentaPyErrorMessages(str, Enum):
 
     """
 
-    WRONG_WORKERS = (
-        "pentapy.solve: workers has to be -1 or greater, but got workers={workers}"
-    )
     SINGULAR_MATRIX = (
         "pentapy: {solver_inter_name} solver encountered singular matrix at "
         "row index {row_idx}. Returning NaNs."
diff --git a/src/pentapy/solver.pxd b/src/pentapy/solver.pxd
index 879dcf6..901f1a2 100644
--- a/src/pentapy/solver.pxd
+++ b/src/pentapy/solver.pxd
@@ -2,13 +2,13 @@
 cdef double[::, ::1] c_penta_solver1(
     double[::, ::1] mat_flat,
     double[::, ::1] rhs,
-    int workers,
+    int num_threads,
     int* info,
 )
 
 cdef double[::, ::1] c_penta_solver2(
     double[::, ::1] mat_flat,
     double[::, ::1] rhs,
-    int workers,
+    int num_threads,
     int* info,
 )
diff --git a/src/pentapy/solver.pyx b/src/pentapy/solver.pyx
index 37328bc..b2fc737 100644
--- a/src/pentapy/solver.pyx
+++ b/src/pentapy/solver.pyx
@@ -11,11 +11,38 @@ implemented in Cython.
 import numpy as np
 
 cimport numpy as np
-
 from cython cimport view
+
 from cython.parallel import prange
+
 from libc.stdint cimport int64_t
 
+# NOTE: OPENMP is set during setup
+if OPENMP:
+    cimport openmp
+
+# === Optional Setup of OpenMP ===
+
+
+def get_c_num_threads(num_threads):
+    # if the thread number was speficied explicitly, it needs to be sanitized
+    if num_threads is not None:
+        if num_threads >= 0:
+            return max(1, num_threads)
+
+        elif OPENMP:  # negative numbers result in maximum thread number with OPENMP
+            return openmp.omp_get_max_threads()
+
+        # without OpenMP, the number of threads is set to 1 in the final return
+
+    # if None, the maximum thread number is retrieved with OPENMP if available
+    # NOTE: OPENMP is set during setup
+    if OPENMP:
+        return openmp.omp_get_num_procs()
+
+    # if no threads were set so far, the number of threads is set to 1 (serial mode)
+    return 1
+
 
 # === Constants ===
 
@@ -36,18 +63,19 @@ cdef enum Infos:
 def penta_solver1(
     double[::, ::1] mat_flat,
     double[::, ::1] rhs,
-    int workers,
+    num_threads=None,
 ):
 
     # NOTE: info is defined to be overwritten for possible future validations
     cdef int info
 
+    num_threads_c = get_c_num_threads(num_threads)
     return (
         np.asarray(
             c_penta_solver1(
                 mat_flat,
                 rhs,
-                workers,
+                num_threads_c,
                 &info,
             )
         ),
@@ -58,18 +86,19 @@ def penta_solver1(
 def penta_solver2(
     double[::, ::1] mat_flat,
     double[::, ::1] rhs,
-    int workers,
+    num_threads=None,
 ):
 
     # NOTE: info is defined to be overwritten for possible future validations
     cdef int info
 
+    num_threads_c = get_c_num_threads(num_threads)
     return (
         np.asarray(
             c_penta_solver2(
                 mat_flat,
                 rhs,
-                workers,
+                num_threads_c,
                 &info,
             )
         ),
@@ -82,7 +111,7 @@ def penta_solver2(
 cdef double[::, ::1] c_penta_solver1(
     double[::, ::1] mat_flat,
     double[::, ::1] rhs,
-    int workers,
+    int num_threads,
     int* info,
 ):
     """
@@ -121,13 +150,12 @@ cdef double[::, ::1] c_penta_solver1(
     return _c_interf_factorize_solve(
         mat_factorized,
         rhs,
-        workers,
+        num_threads,
         info,
         Solvers.PTRRANS_1,
     )
 
 
-
 cdef double[::, ::1] _c_interf_factorize(
     double[::, ::1] mat_flat,
     int* info,
@@ -179,7 +207,7 @@ cdef double[::, ::1] _c_interf_factorize(
 cdef double[::, ::1] _c_interf_factorize_solve(
     double[::, ::1] mat_factorized,
     double[::, ::1] rhs,
-    int workers,
+    int num_threads,
     int* info,
     int solver,
 ):
@@ -210,7 +238,7 @@ cdef double[::, ::1] _c_interf_factorize_solve(
         for iter_col in prange(
             rhs_n_cols,
             nogil=True,
-            num_threads=workers,
+            num_threads=num_threads,
         ):
             info[0] = _c_core_factorize_solve_algo_1(
                 mat_n_cols,
@@ -227,7 +255,7 @@ cdef double[::, ::1] _c_interf_factorize_solve(
         for iter_col in prange(
             rhs_n_cols,
             nogil=True,
-            num_threads=workers,
+            num_threads=num_threads,
         ):
             info[0] = _c_core_factorize_solve_algo_2(
                 mat_n_cols,
@@ -302,7 +330,6 @@ cdef int _c_core_factorize_algo_1(
     al_i_minus_1 = mat_flat[mat_row_base_idx_1] / mu_i
     be_i_minus_1 = mat_flat[0] / mu_i
 
-
     mat_factorized[0] = 0.0
     mat_factorized[1] = 0.0
     mat_factorized[2] = mu_i
@@ -473,7 +500,7 @@ cdef int _c_core_factorize_solve_algo_1(
 cdef double[::, ::1] c_penta_solver2(
     double[::, ::1] mat_flat,
     double[::, ::1] rhs,
-    int workers,
+    int num_threads,
     int* info,
 ):
     """
@@ -512,7 +539,7 @@ cdef double[::, ::1] c_penta_solver2(
     return _c_interf_factorize_solve(
         mat_factorized,
         rhs,
-        workers,
+        num_threads,
         info,
         Solvers.PTRRANS_2,
     )
diff --git a/tests/templates.py b/tests/templates.py
index 6d195a1..554b44f 100644
--- a/tests/templates.py
+++ b/tests/templates.py
@@ -40,7 +40,7 @@
     "solver_alias": SOLVER_ALIASES_PTRANS_I + SOLVER_ALIASES_PTRANS_II,
     "induce_error": [False, True],
     "from_order": ["C", "F"],
-    "workers": [1],
+    "num_threads": [1],
 }
 
 # === Auxiliary functions ===
@@ -121,7 +121,7 @@ def pentapy_solvers_extended_template(
     ],
     induce_error: bool,
     from_order: Literal["C", "F"],
-    workers: int,
+    num_threads: int,
 ) -> None:
     """
     Tests the pentadiagonal solvers when starting from different input layouts, number
@@ -178,7 +178,7 @@ def pentapy_solvers_extended_template(
                 mat=mat,
                 rhs=rhs,
                 solver=solver_alias,  # type: ignore
-                workers=workers,
+                num_threads=num_threads,
                 **kwargs,
             )
 
@@ -194,7 +194,7 @@ def pentapy_solvers_extended_template(
         mat=mat,
         rhs=rhs,
         solver=solver_alias,  # type: ignore
-        workers=workers,
+        num_threads=num_threads,
         **kwargs,
     )
     assert sol.shape == result_shape
@@ -227,7 +227,7 @@ def pentapy_solvers_shape_mismatch_template(
         "pTrAnS-Ii",
     ],
     from_order: Literal["C", "F"],
-    workers: int,
+    num_threads: int,
 ) -> None:
     """
     Tests the pentadiagonal solvers when the shape of the right-hand side is incorrect,
@@ -264,7 +264,7 @@ def pentapy_solvers_shape_mismatch_template(
             mat=mat,
             rhs=rhs,
             solver=solver_alias,  # type: ignore
-            workers=workers,
+            num_threads=num_threads,
             **kwargs,
         )
 
diff --git a/tests/test_solvers_internal_parallel.py b/tests/test_solvers_internal_parallel.py
index 3775ffa..b6a3b4e 100644
--- a/tests/test_solvers_internal_parallel.py
+++ b/tests/test_solvers_internal_parallel.py
@@ -9,7 +9,7 @@
 # === Imports ===
 
 from copy import deepcopy
-from typing import Literal, Optional, Type
+from typing import Literal, Optional
 
 import pytest
 import templates
@@ -20,7 +20,7 @@
 # based on either Algorithm PTRANS-I or PTRANS-II in parallel mode
 param_dict = deepcopy(templates.PARAM_DICT)
 param_dict["from_order"] = ["C"]
-param_dict["workers"] = [-1]
+param_dict["num_threads"] = [-1]
 
 # --- Extended solve test ---
 
@@ -41,7 +41,7 @@ def test_pentapy_solvers_parallel(
     ],
     induce_error: bool,
     from_order: Literal["C", "F"],
-    workers: int,
+    num_threads: int,
 ) -> None:
 
     templates.pentapy_solvers_extended_template(
@@ -51,7 +51,7 @@ def test_pentapy_solvers_parallel(
         solver_alias=solver_alias,
         induce_error=induce_error,
         from_order=from_order,
-        workers=workers,
+        num_threads=num_threads,
     )
 
 
@@ -61,17 +61,15 @@ def test_pentapy_solvers_parallel(
     )
 
 
-# --- Different workers test ---
+# --- Different number of threads test ---
 
 
-@pytest.mark.parametrize(
-    "workers, expected", [(0, None), (1, None), (-1, None), (-2, ValueError)]
-)
-def test_pentapy_solvers_parallel_different_workers(
-    workers: int, expected: Optional[Type[Exception]]
+@pytest.mark.parametrize("num_threads", [0, 1, -1, -2, None])
+def test_pentapy_solvers_parallel_different_num_threads(
+    num_threads: Optional[int],
 ) -> None:
     """
-    Tests the parallel solver with different number of workers, which might be wrong.
+    Tests that the parallel solvers run properly with different numbers of threads.
 
     """
 
@@ -82,17 +80,10 @@ def test_pentapy_solvers_parallel_different_workers(
         solver_alias=1,
         induce_error=False,
         from_order="C",
-        workers=workers,
+        num_threads=num_threads,
     )
 
-    # Case 1: the test should fail
-    if expected is not None:
-        with pytest.raises(expected):
-            templates.pentapy_solvers_extended_template(**kwargs)  # type: ignore
-
-        return
-
-    # Case 2: the test should pass
+    # NOTE: if there is no crash, the test is successful
     templates.pentapy_solvers_extended_template(**kwargs)  # type: ignore
 
 
@@ -114,7 +105,7 @@ def test_pentapy_solvers_shape_mismatch_parallel(
         "pTrAnS-Ii",
     ],
     from_order: Literal["C", "F"],
-    workers: int,
+    num_threads: int,
 ) -> None:
 
     templates.pentapy_solvers_shape_mismatch_template(
@@ -123,12 +114,12 @@ def test_pentapy_solvers_shape_mismatch_parallel(
         input_layout=input_layout,
         solver_alias=solver_alias,
         from_order=from_order,
-        workers=workers,
+        num_threads=num_threads,
     )
 
 
 params_dict_without_induce_error = deepcopy(templates.PARAM_DICT)
-params_dict_without_induce_error["workers"] = [-1]
+params_dict_without_induce_error["num_threads"] = [-1]
 params_dict_without_induce_error.pop("induce_error")
 
 for key, value in params_dict_without_induce_error.items():
diff --git a/tests/test_solvers_internal_serial.py b/tests/test_solvers_internal_serial.py
index f1248f8..0957a65 100644
--- a/tests/test_solvers_internal_serial.py
+++ b/tests/test_solvers_internal_serial.py
@@ -39,7 +39,7 @@ def test_pentapy_solvers_extended_serial(
     ],
     induce_error: bool,
     from_order: Literal["C", "F"],
-    workers: int,
+    num_threads: int,
 ) -> None:
 
     templates.pentapy_solvers_extended_template(
@@ -49,7 +49,7 @@ def test_pentapy_solvers_extended_serial(
         solver_alias=solver_alias,
         induce_error=induce_error,
         from_order=from_order,
-        workers=workers,
+        num_threads=num_threads,
     )
 
 
@@ -77,7 +77,7 @@ def test_pentapy_solvers_shape_mismatch_serial(
         "pTrAnS-Ii",
     ],
     from_order: Literal["C", "F"],
-    workers: int,
+    num_threads: int,
 ) -> None:
 
     templates.pentapy_solvers_shape_mismatch_template(
@@ -86,7 +86,7 @@ def test_pentapy_solvers_shape_mismatch_serial(
         input_layout=input_layout,
         solver_alias=solver_alias,
         from_order=from_order,
-        workers=workers,
+        num_threads=num_threads,
     )
 
 

From 5b424281e68a2f287f04b64fde48cc50728c53a4 Mon Sep 17 00:00:00 2001
From: MothNik <nik.zoe@web.de>
Date: Tue, 23 Jul 2024 21:32:36 +0200
Subject: [PATCH 61/62] doc: - fixed typo in docstring of `solve` for the
 number of threads

---
 src/pentapy/core.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/pentapy/core.py b/src/pentapy/core.py
index 4126a9f..fabb1cc 100644
--- a/src/pentapy/core.py
+++ b/src/pentapy/core.py
@@ -258,7 +258,7 @@ def solve(
 
         Strings are not case-sensitive.
     num_threads : :class:`int` or ``None``, optional
-        Number of num_threads used in the PTRANS-I and PTRANS-II solvers for parallel
+        Number of threads used in the PTRANS-I and PTRANS-II solvers for parallel
         processing of multiple right-hand sides. Parallelisation overhead can be
         significant for small systems. If set to a negative value or ``None``, the
         number of threads is automatically determined. Default: ``None``

From 743549bbaaf88d1f36c620e784b671124102efe0 Mon Sep 17 00:00:00 2001
From: MothNik <nik.zoe@web.de>
Date: Tue, 23 Jul 2024 21:44:44 +0200
Subject: [PATCH 62/62] refactor: - made input matrix and right-hand side
 read-only on Cython-level

---
 src/pentapy/solver.pxd |  8 ++++----
 src/pentapy/solver.pyx | 20 ++++++++++----------
 2 files changed, 14 insertions(+), 14 deletions(-)

diff --git a/src/pentapy/solver.pxd b/src/pentapy/solver.pxd
index 901f1a2..ec7adc3 100644
--- a/src/pentapy/solver.pxd
+++ b/src/pentapy/solver.pxd
@@ -1,14 +1,14 @@
 # cython: language_level=3
 cdef double[::, ::1] c_penta_solver1(
-    double[::, ::1] mat_flat,
-    double[::, ::1] rhs,
+    const double[::, ::1] mat_flat,
+    const double[::, ::1] rhs,
     int num_threads,
     int* info,
 )
 
 cdef double[::, ::1] c_penta_solver2(
-    double[::, ::1] mat_flat,
-    double[::, ::1] rhs,
+    const double[::, ::1] mat_flat,
+    const double[::, ::1] rhs,
     int num_threads,
     int* info,
 )
diff --git a/src/pentapy/solver.pyx b/src/pentapy/solver.pyx
index b2fc737..11db625 100644
--- a/src/pentapy/solver.pyx
+++ b/src/pentapy/solver.pyx
@@ -61,8 +61,8 @@ cdef enum Infos:
 
 
 def penta_solver1(
-    double[::, ::1] mat_flat,
-    double[::, ::1] rhs,
+    const double[::, ::1] mat_flat,
+    const double[::, ::1] rhs,
     num_threads=None,
 ):
 
@@ -84,8 +84,8 @@ def penta_solver1(
 
 
 def penta_solver2(
-    double[::, ::1] mat_flat,
-    double[::, ::1] rhs,
+    const double[::, ::1] mat_flat,
+    const double[::, ::1] rhs,
     num_threads=None,
 ):
 
@@ -109,8 +109,8 @@ def penta_solver2(
 # === Solver Algorithm 1 ===
 
 cdef double[::, ::1] c_penta_solver1(
-    double[::, ::1] mat_flat,
-    double[::, ::1] rhs,
+    const double[::, ::1] mat_flat,
+    const double[::, ::1] rhs,
     int num_threads,
     int* info,
 ):
@@ -157,7 +157,7 @@ cdef double[::, ::1] c_penta_solver1(
 
 
 cdef double[::, ::1] _c_interf_factorize(
-    double[::, ::1] mat_flat,
+    const double[::, ::1] mat_flat,
     int* info,
     int solver,
 ):
@@ -206,7 +206,7 @@ cdef double[::, ::1] _c_interf_factorize(
 
 cdef double[::, ::1] _c_interf_factorize_solve(
     double[::, ::1] mat_factorized,
-    double[::, ::1] rhs,
+    const double[::, ::1] rhs,
     int num_threads,
     int* info,
     int solver,
@@ -498,8 +498,8 @@ cdef int _c_core_factorize_solve_algo_1(
 
 
 cdef double[::, ::1] c_penta_solver2(
-    double[::, ::1] mat_flat,
-    double[::, ::1] rhs,
+    const double[::, ::1] mat_flat,
+    const double[::, ::1] rhs,
     int num_threads,
     int* info,
 ):