From 96f67f6bc8eceb6c6d82d63f892683524cbb7779 Mon Sep 17 00:00:00 2001 From: MothNik Date: Sat, 25 May 2024 17:42:11 +0200 Subject: [PATCH 01/62] pkg: made `.venv` in `.gitignore` more general --- .gitignore | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.gitignore b/.gitignore index bdd5bb4..7eb4c23 100644 --- a/.gitignore +++ b/.gitignore @@ -83,7 +83,7 @@ celerybeat-schedule .env # virtualenv -.venv +.venv* venv/ ENV/ From 3deb05ca115a11e1d7ad419a9e3b7223a6ea680d Mon Sep 17 00:00:00 2001 From: MothNik Date: Fri, 7 Jun 2024 21:40:00 +0200 Subject: [PATCH 02/62] wip: [11] refactored algorithm 1 to handle multiple right-hand sides --- src/pentapy/core.py | 7 ++ src/pentapy/solver.pxd | 2 +- src/pentapy/solver.pyx | 272 ++++++++++++++++++++++++++++++++++------- 3 files changed, 237 insertions(+), 44 deletions(-) diff --git a/src/pentapy/core.py b/src/pentapy/core.py index 067189d..9a55780 100644 --- a/src/pentapy/core.py +++ b/src/pentapy/core.py @@ -77,7 +77,14 @@ def solve(mat, rhs, is_flat=False, index_row_wise=True, solver=1): else: mat_flat = create_banded(mat, col_wise=False, dtype=np.double) rhs = np.asarray(rhs, dtype=np.double) + single_rhs = rhs.ndim == 1 + if single_rhs: + rhs = rhs[:, np.newaxis] + try: + if single_rhs: + return penta_solver1(mat_flat, rhs).ravel() + return penta_solver1(mat_flat, rhs) except ZeroDivisionError: warnings.warn("pentapy: PTRANS-I not suitable for input-matrix.") diff --git a/src/pentapy/solver.pxd b/src/pentapy/solver.pxd index e7c471e..05d249f 100644 --- a/src/pentapy/solver.pxd +++ b/src/pentapy/solver.pxd @@ -1,4 +1,4 @@ # cython: language_level=3 -cdef double[:] c_penta_solver1(double[:, :] mat_flat, double[:] rhs) +cdef double[:, :] c_penta_solver1(double[:, :] mat_flat, double[:, :] rhs) cdef double[:] c_penta_solver2(double[:, :] mat_flat, double[:] rhs) diff --git a/src/pentapy/solver.pyx b/src/pentapy/solver.pyx index 469b074..c59226b 100644 --- a/src/pentapy/solver.pyx +++ b/src/pentapy/solver.pyx @@ -1,14 +1,23 @@ -# cython: language_level=3, boundscheck=False, wraparound=False, cdivision=True +# cython: language_level=3, boundscheck=True, wraparound=False, cdivision=True + """ This is a solver linear equation systems with a penta-diagonal matrix, -implemented in cython. +implemented in Cython. + """ + +### Imports ### + import numpy as np cimport numpy as np +from libc.stdint cimport int64_t, uint64_t -def penta_solver1(double[:, :] mat_flat, double[:] rhs): +### Main Python Interface ### + + +def penta_solver1(double[:, :] mat_flat, double[:, :] rhs): return np.asarray(c_penta_solver1(mat_flat, rhs)) @@ -16,56 +25,233 @@ def penta_solver2(double[:, :] mat_flat, double[:] rhs): return np.asarray(c_penta_solver2(mat_flat, rhs)) -cdef double[:] c_penta_solver1(double[:, :] mat_flat, double[:] rhs): - - cdef int mat_j = mat_flat.shape[1] - - cdef double[:] result = np.zeros(mat_j) +### Solver Algorithm 1 ### - cdef double[:] al = np.zeros(mat_j) - cdef double[:] be = np.zeros(mat_j) - cdef double[:] ze = np.zeros(mat_j) - cdef double[:] ga = np.zeros(mat_j) - cdef double[:] mu = np.zeros(mat_j) - cdef int i +cdef double[:, :] c_penta_solver1(double[:, :] mat_flat, double[:, :] rhs): + """ + Solves the pentadiagonal system of equations ``Ax = b`` with the matrix ``A`` and + the right-hand side ``b`` by - mu[0] = mat_flat[2, 0] - al[0] = mat_flat[1, 0] / mu[0] - be[0] = mat_flat[0, 0] / mu[0] - ze[0] = rhs[0] / mu[0] + - factorizing the matrix ``A`` into auxiliary coefficients and a unit upper + triangular matrix ``U`` + - transforming the right-hand side into a vector ``zeta`` + - solving the system of equations ``Ux = zeta`` by backward substitution - ga[1] = mat_flat[3, 1] - mu[1] = mat_flat[2, 1] - al[0] * ga[1] - al[1] = (mat_flat[1, 1] - be[0] * ga[1]) / mu[1] - be[1] = mat_flat[0, 1] / mu[1] - ze[1] = (rhs[1] - ze[0] * ga[1]) / mu[1] + """ - for i in range(2, mat_j-2): - ga[i] = mat_flat[3, i] - al[i-2] * mat_flat[4, i] - mu[i] = mat_flat[2, i] - be[i-2] * mat_flat[4, i] - al[i-1] * ga[i] - al[i] = (mat_flat[1, i] - be[i-1] * ga[i]) / mu[i] - be[i] = mat_flat[0, i] / mu[i] - ze[i] = (rhs[i] - ze[i-2] * mat_flat[4, i] - ze[i-1] * ga[i]) / mu[i] + cdef uint64_t mat_n_rows = mat_flat.shape[1] + cdef uint64_t rhs_n_cols = rhs.shape[1] + cdef uint64_t iter_col + cdef double[::, ::1] result = np.empty(shape=(mat_n_rows, rhs_n_cols)) + cdef double[::, ::1] mat_factorized = np.empty(shape=(mat_n_rows, 5)) - ga[mat_j-2] = mat_flat[3, mat_j-2] - al[mat_j-4] * mat_flat[4, mat_j-2] - mu[mat_j-2] = mat_flat[2, mat_j-2] - be[mat_j-4] * mat_flat[4, mat_j-2] - al[mat_j-3] * ga[mat_j-2] - al[mat_j-2] = (mat_flat[1, mat_j-2] - be[mat_j-3] * ga[mat_j-2]) / mu[mat_j-2] + # first, the matrix is factorized + c_penta_factorize_algo1( + mat_flat, + mat_n_rows, + mat_factorized, + ) - ga[mat_j-1] = mat_flat[3, mat_j-1] - al[mat_j-3] * mat_flat[4, mat_j-1] - mu[mat_j-1] = mat_flat[2, mat_j-1] - be[mat_j-3] * mat_flat[4, mat_j-1] - al[mat_j-2] * ga[mat_j-1] + # then, all the right-hand sides are solved + for iter_col in range(rhs_n_cols): + c_solve_penta_from_factorize_algo_1( + mat_n_rows, + mat_factorized, + rhs[::, iter_col], + result[::, iter_col], + ) - ze[mat_j-2] = (rhs[mat_j-2] - ze[mat_j-4] * mat_flat[4, mat_j-2] - ze[mat_j-3] * ga[mat_j-2]) / mu[mat_j-2] - ze[mat_j-1] = (rhs[mat_j-1] - ze[mat_j-3] * mat_flat[4, mat_j-1] - ze[mat_j-2] * ga[mat_j-1]) / mu[mat_j-1] - - # Backward substitution - result[mat_j-1] = ze[mat_j-1] - result[mat_j-2] = ze[mat_j-2] - al[mat_j-2] * result[mat_j-1] + return result - for i in range(mat_j-3, -1, -1): - result[i] = ze[i] - al[i] * result[i+1] - be[i] * result[i+2] - return result +cdef void c_penta_factorize_algo1( + double[:, :] mat_flat, + uint64_t mat_n_rows, + double[::, ::1] mat_factorized, +): + """ + Factorizes the pentadiagonal matrix ``A`` into + + - auxiliary coefficients ``e``, ``mu`` and ``gamma`` for the transformation of the + right-hand side + - a unit upper triangular matrix with the main diagonals ``alpha`` and ``beta`` + for the following backward substitution. Its unit main diagonal is implicit. + + They are overwriting the memoryview ``mat_factorized`` as follows: + + ```bash + [[ * mu_0 * al_0 be_0 ] + [ * mu_1 ga_1 al_1 be_1 ] + [ e_2 mu_2 ga_2 al_2 be_2 ] + ... + [ e_i mu_i ga_i al_i be_i ] + ... + [ e_{n-2} mu_{n-2} ga_{n-2} al_{n-2} * ] + [ e_{n-1} mu_{n-1} ga_{n-1} * * ]] + ``` + + where the entries marked with ``*`` are not used by design, but overwritten with + zeros. + + """ + + ### Variable declarations ### + + cdef uint64_t iter_row + cdef double mu_i, ga_i, e_i + cdef double al_i, al_i_minus_1, al_i_plus_1 + + ### Factorization ### + + # First row + mu_i = mat_flat[2, 0] + al_i_minus_1 = mat_flat[1, 0] / mu_i + be_i_minus_1 = mat_flat[0, 0] / mu_i + + mat_factorized[0, 0] = 0.0 + mat_factorized[0, 1] = mu_i + mat_factorized[0, 2] = 0.0 + mat_factorized[0, 3] = al_i_minus_1 + mat_factorized[0, 4] = be_i_minus_1 + + # Second row + ga_i = mat_flat[3, 1] + mu_i = mat_flat[2, 1] - al_i_minus_1 * ga_i + al_i = (mat_flat[1, 1] - be_i_minus_1 * ga_i) / mu_i + be_i = mat_flat[0, 1] / mu_i + + mat_factorized[1, 0] = 0.0 + mat_factorized[1, 1] = mu_i + mat_factorized[1, 2] = ga_i + mat_factorized[1, 3] = al_i + mat_factorized[1, 4] = be_i + + # Central rows + for iter_row in range(2, mat_n_rows-2): + e_i = mat_flat[4, iter_row] + ga_i = mat_flat[3, iter_row] - al_i_minus_1 * e_i + mu_i = mat_flat[2, iter_row] - be_i_minus_1 * e_i - al_i * ga_i + + al_i_plus_1 = (mat_flat[1, iter_row] - be_i * ga_i) / mu_i + al_i_minus_1 = al_i + al_i = al_i_plus_1 + + be_i_plus_1 = mat_flat[0, iter_row] / mu_i + be_i_minus_1 = be_i + be_i = be_i_plus_1 + + mat_factorized[iter_row, 0] = e_i + mat_factorized[iter_row, 1] = mu_i + mat_factorized[iter_row, 2] = ga_i + mat_factorized[iter_row, 3] = al_i + mat_factorized[iter_row, 4] = be_i + + # Second to last row + e_i = mat_flat[4, mat_n_rows-2] + ga_i = mat_flat[3, mat_n_rows-2] - al_i_minus_1 * e_i + mu_i = mat_flat[2, mat_n_rows-2] - be_i_minus_1 * e_i - al_i * ga_i + al_i_plus_1 = (mat_flat[1, mat_n_rows-2] - be_i * ga_i) / mu_i + + mat_factorized[mat_n_rows-2, 0] = e_i + mat_factorized[mat_n_rows-2, 1] = mu_i + mat_factorized[mat_n_rows-2, 2] = ga_i + mat_factorized[mat_n_rows-2, 3] = al_i_plus_1 + mat_factorized[mat_n_rows-2, 4] = 0.0 + + # Last Row + e_i = mat_flat[4, mat_n_rows-1] + ga_i = mat_flat[3, mat_n_rows-1] - al_i * e_i + mu_i = mat_flat[2, mat_n_rows-1] - be_i * e_i - al_i_plus_1 * ga_i + + mat_factorized[mat_n_rows-1, 0] = e_i + mat_factorized[mat_n_rows-1, 1] = mu_i + mat_factorized[mat_n_rows-1, 2] = ga_i + mat_factorized[mat_n_rows-1, 3] = 0.0 + mat_factorized[mat_n_rows-1, 4] = 0.0 + + return + + +cdef void c_solve_penta_from_factorize_algo_1( + uint64_t mat_n_rows, + double[::, ::1] mat_factorized, + double[::] rhs_single, + double[::] result_view, +): + """ + Solves the pentadiagonal system of equations ``Ax = b`` with the factorized + unit upper triangular matrix ``U`` and the right-hand side ``b``. + It overwrites the right-hand side ``b`` first with the transformed vector ``zeta`` + and then with the solution vector ``x`` for ``Ux = zeta``. + + """ + + ### Variable declarations ### + + cdef int64_t iter_row + cdef double ze_i, ze_i_minus_1, ze_i_plus_1 + + ### Transformation ### + + # first, the right-hand side is transformed into the vector ``zeta`` + # First row + + ze_i_minus_1 = rhs_single[0] / mat_factorized[0, 1] + result_view[0] = ze_i_minus_1 + + # Second row + ze_i = (rhs_single[1] - ze_i_minus_1 * mat_factorized[1, 2]) / mat_factorized[1, 1] + result_view[1] = ze_i + + # Central rows + for iter_row in range(2, mat_n_rows-2): + ze_i_plus_1 = ( + rhs_single[iter_row] + - ze_i_minus_1 * mat_factorized[iter_row, 0] + - ze_i * mat_factorized[iter_row, 2] + ) / mat_factorized[iter_row, 1] + ze_i_minus_1 = ze_i + ze_i = ze_i_plus_1 + result_view[iter_row] = ze_i_plus_1 + + # Second to last row + ze_i_plus_1 = ( + rhs_single[mat_n_rows-2] + - ze_i_minus_1 * mat_factorized[mat_n_rows-2, 0] + - ze_i * mat_factorized[mat_n_rows-2, 2] + ) / mat_factorized[mat_n_rows-2, 1] + ze_i_minus_1 = ze_i + ze_i = ze_i_plus_1 + result_view[mat_n_rows-2] = ze_i_plus_1 + + # Last row + ze_i_plus_1 = ( + rhs_single[mat_n_rows-1] + - ze_i_minus_1 * mat_factorized[mat_n_rows-1, 0] + - ze_i * mat_factorized[mat_n_rows-1, 2] + ) / mat_factorized[mat_n_rows-1, 1] + result_view[mat_n_rows-1] = ze_i_plus_1 + + ### Backward substitution ### + + # The solution vector is calculated by backward substitution that overwrites the + # right-hand side vector with the solution vector + ze_i -= mat_factorized[mat_n_rows-2, 3] * ze_i_plus_1 + result_view[mat_n_rows-2] = ze_i + + for iter_row in range(mat_n_rows-3, -1, -1): + result_view[iter_row] -= ( + mat_factorized[iter_row, 3] * ze_i + + mat_factorized[iter_row, 4] * ze_i_plus_1 + ) + ze_i_plus_1 = ze_i + ze_i = result_view[iter_row] + + return + + +### Solver Algorithm 2 ### cdef double[:] c_penta_solver2(double[:, :] mat_flat, double[:] rhs): From 328d9c90492ec4dd10a3a62f50917547c63082eb Mon Sep 17 00:00:00 2001 From: MothNik Date: Fri, 7 Jun 2024 21:40:29 +0200 Subject: [PATCH 03/62] feat: [11] added doctest runs to `pytest` --- pytest.ini | 2 ++ 1 file changed, 2 insertions(+) create mode 100644 pytest.ini diff --git a/pytest.ini b/pytest.ini new file mode 100644 index 0000000..2bed0f3 --- /dev/null +++ b/pytest.ini @@ -0,0 +1,2 @@ +[pytest] +addopts = --doctest-modules \ No newline at end of file From 90bdeb143f8677427d4255e332a00d83b51a40cf Mon Sep 17 00:00:00 2001 From: MothNik Date: Fri, 7 Jun 2024 22:33:59 +0200 Subject: [PATCH 04/62] tests: [11] replaced single tools test by scalable parametrized tools test that test more different cases --- tests/test_tools.py | 205 ++++++++++++++++++++++++++++++++++++++++++++ tests/util_funcs.py | 175 +++++++++++++++++++++++++++++++++++++ 2 files changed, 380 insertions(+) create mode 100644 tests/test_tools.py create mode 100644 tests/util_funcs.py diff --git a/tests/test_tools.py b/tests/test_tools.py new file mode 100644 index 0000000..879a713 --- /dev/null +++ b/tests/test_tools.py @@ -0,0 +1,205 @@ +""" +This test suite implements the test for the ``tools`` module of the ``pentapy`` package. + +""" + +### Imports ### + +import warnings +from typing import Optional, Tuple, Type + +import numpy as np +import pentapy as pp +import pytest +import util_funcs as uf +from pentapy.tools import _check_penta + +warnings.simplefilter("always") + +### Constants ### + +SEED = 19_031_977 +N_ROWS = [ + 3, + 4, + 5, + 10, + 11, + 25, + 26, + 50, + 51, + 100, + 101, + 250, + 251, + 500, + 501, + 1_000, + 1_001, + 10_000, + 10_001, +] + +### Tests ### + + +@pytest.mark.parametrize("offset", [0, 1, 2, -1, -2]) +@pytest.mark.parametrize("n_rows", N_ROWS) +def test_diag_indices(n_rows: int, offset: int) -> None: + """ + Tests the generation of the diagonal indices via the function + ``pentapy.diag_indices``. + + """ + + # the diagonal indices are obtained with NumPy and pentapy + row_idxs_ref, col_idxs_ref = uf.get_diag_indices(n=n_rows, offset=offset) + row_idxs, col_idxs = pp.diag_indices(n=n_rows, offset=offset) + + # the diagonal indices are compared + assert np.array_equal(row_idxs_ref, row_idxs) + assert np.array_equal(col_idxs_ref, col_idxs) + + +@pytest.mark.parametrize("copy", [True, False]) +@pytest.mark.parametrize("with_shift", [True, False]) +@pytest.mark.parametrize("col_wise", [True, False]) +@pytest.mark.parametrize("n_rows", N_ROWS) +def test_penta_generators( + n_rows: int, + col_wise: bool, + with_shift: bool, + copy: bool, +) -> None: + """ + Tests the generation of pentadiagonal matrices where the matrix. + + """ + + # a reference matrix is initialised + mat_full_ref = uf.gen_rand_penta_matrix_dense_int( + n_rows=n_rows, + seed=SEED, + with_pentapy_indices=False, + ) + + # then, it is turned into a banded matrix ... + mat_banded = pp.create_banded(mat_full_ref, col_wise=col_wise) + + # ... which is maybe shifted + # Case 1: copied shift + if with_shift and copy: + mat_banded = pp.shift_banded(mat_banded, col_to_row=col_wise, copy=True) + col_wise = not col_wise + + # Case 2: in-place shift + if with_shift and not copy: + mat_banded = pp.shift_banded(mat_banded, col_to_row=col_wise, copy=False) + col_wise = not col_wise + + # ... from which a full matrix is created again + mat_full = pp.create_full(mat_banded, col_wise=col_wise) + + # the matrices are compared + assert np.array_equal(mat_full_ref, mat_full) + + +@pytest.mark.parametrize( + "shape, exception", + [ + ((5, 5), None), # Valid 2D Array with 5 rows and 5 rows + ((5, 2), ValueError), # 2D Array with 5 rows but only 2 columns + ((2, 5), ValueError), # 2D Array with 2 rows but 5 columns + ((5,), ValueError), # 1D Array + ], +) +def test_create_banded_raises( + shape: Tuple[int, ...], + exception: Optional[Type[Exception]], +) -> None: + """ + Test if the function ``pentapy.create_banded`` raises the expected exceptions. + + """ + + # the test matrix is initialised + np.random.seed(SEED) + mat = np.random.rand(*shape) + + # Case 1: no exception should be raised + if exception is None: + pp.create_banded(mat) + return + + # Case 2: an exception should be raised + with pytest.raises(exception): + pp.create_banded(mat) + + +@pytest.mark.parametrize( + "shape, exception", + [ + ((5, 5), None), # Valid 2D Array with 5 bands and 5 columns + ((5, 10), None), # Valid 2D Array with 5 bands and 10 columns + ((5, 3), None), # 2D Array with 5 bands and the minimum number of columns + ((6, 20), ValueError), # 2D Array does not have 5 bands + ((4, 30), ValueError), # 2D Array does not have 5 bands + ((5, 1), ValueError), # 2D Array with 5 bands but too little columns + ((5, 2), ValueError), # 2D Array with 5 bands but too little columns + ((5,), ValueError), # 1D Array + ], +) +def test_create_full_raises( + shape: Tuple[int, ...], + exception: Optional[Type[Exception]], +) -> None: + """ + Test if the function ``pentapy.create_full`` raises the expected exceptions. + + """ + + # the test matrix is initialised + np.random.seed(SEED) + mat = np.random.rand(*shape) + + # Case 1: no exception should be raised + if exception is None: + pp.create_full(mat) + return + + # Case 2: an exception should be raised + with pytest.raises(exception): + pp.create_full(mat) + + +@pytest.mark.parametrize( + "shape, exception", + [ + ((5, 3), None), # Valid 2D Array with 5 bands and 3 rows + ((5, 2), ValueError), # 2D Array with 5 bands but less than 3 rows + ((4, 3), ValueError), # 2D Array with less than 5 bands + ((5,), ValueError), # 1D Array + ], +) +def test_check_penta( + shape: Tuple[int, ...], + exception: Optional[Type[Exception]], +) -> None: + """ + Test if the function ``pentapy.tools._check_penta`` raises the expected exceptions. + + """ + + # the test matrix is initialised + np.random.seed(SEED) + mat = np.random.rand(*shape) + + # Case 1: no exception should be raised + if exception is None: + _check_penta(mat) + return + + # Case 2: an exception should be raised + with pytest.raises(exception): + _check_penta(mat) diff --git a/tests/util_funcs.py b/tests/util_funcs.py new file mode 100644 index 0000000..2402d25 --- /dev/null +++ b/tests/util_funcs.py @@ -0,0 +1,175 @@ +""" +This test suite implements the utility functions for testing the ``pentapy`` package. + +""" + +### Imports ### + +from functools import partial +from typing import Tuple + +import numpy as np +import pentapy as pp + +### Utility Functions ### + + +def get_diag_indices( + n: int, + offset: int, +) -> Tuple[np.ndarray, np.ndarray]: + """ + Computes the row and column indices of the diagonal of a matrix ``mat``. + + This answer is based on the Stack Overflow answer that can be found at: + https://stackoverflow.com/a/18081653/14814813 + + Doctests + -------- + >>> # Setting up a test matrix + >>> n_rows = 5 + >>> mat = np.arange(start=0, stop=n_rows * n_rows).reshape(n_rows, n_rows) + + >>> # Getting the main diagonal indices + >>> row_idxs, col_idxs = get_diag_indices(n=n_rows, offset=0) + >>> row_idxs + array([0, 1, 2, 3, 4]) + >>> col_idxs + array([0, 1, 2, 3, 4]) + >>> mat[row_idxs, col_idxs] + array([ 0, 6, 12, 18, 24]) + + >>> # Getting the first upper diagonal indices + >>> row_idxs, col_idxs = get_diag_indices(n=n_rows, offset=1) + >>> row_idxs + array([0, 1, 2, 3]) + >>> col_idxs + array([1, 2, 3, 4]) + >>> mat[row_idxs, col_idxs] + array([ 1, 7, 13, 19]) + + >>> # Getting the second upper diagonal indices + >>> row_idxs, col_idxs = get_diag_indices(n=n_rows, offset=2) + >>> row_idxs + array([0, 1, 2]) + >>> col_idxs + array([2, 3, 4]) + >>> mat[row_idxs, col_idxs] + array([ 2, 8, 14]) + + >>> # Getting the first lower diagonal indices + >>> row_idxs, col_idxs = get_diag_indices(n=n_rows, offset=-1) + >>> row_idxs + array([1, 2, 3, 4]) + >>> col_idxs + array([0, 1, 2, 3]) + >>> mat[row_idxs, col_idxs] + array([ 5, 11, 17, 23]) + + >>> # Getting the second lower diagonal indices + >>> row_idxs, col_idxs = get_diag_indices(n=n_rows, offset=-2) + >>> row_idxs + array([2, 3, 4]) + >>> col_idxs + array([0, 1, 2]) + >>> mat[row_idxs, col_idxs] + array([10, 16, 22]) + + """ + + row_idxs, col_idxs = np.diag_indices(n=n, ndim=2) + if offset < 0: + row_idx_from = -offset + row_idx_to = None + col_idx_from = 0 + col_idx_to = offset + elif offset > 0: + row_idx_from = 0 + row_idx_to = -offset + col_idx_from = offset + col_idx_to = None + else: + row_idx_from = None + row_idx_to = None + col_idx_from = None + col_idx_to = None + + return ( + row_idxs[row_idx_from:row_idx_to], + col_idxs[col_idx_from:col_idx_to], + ) + + +def gen_rand_penta_matrix_dense_int( + n_rows: int, + seed: int, + with_pentapy_indices: bool, +) -> np.ndarray: + """ + Generates a random dense pentadiagonal matrix with shape ``(n_rows, n_rows)`` and + data type ``int64``. + + Doctests + -------- + >>> # Generating a random pentadiagonal matrix with NumPy indices + >>> n_rows = 5 + >>> seed = 19_031_977 + >>> with_pentapy_indices = False + + >>> mat_no_pentapy = gen_rand_penta_matrix_dense_int( + ... n_rows=n_rows, + ... seed=seed, + ... with_pentapy_indices=with_pentapy_indices + ... ) + >>> mat_no_pentapy + array([[117, 499, 43, 0, 0], + [378, 149, 857, 353, 0], + [285, 769, 767, 229, 484], + [ 0, 717, 214, 243, 877], + [ 0, 0, 410, 611, 79]], dtype=int64) + + >>> # Generating a random pentadiagonal matrix with pentapy indices + >>> mat_with_pentapy = gen_rand_penta_matrix_dense_int( + ... n_rows=n_rows, + ... seed=seed, + ... with_pentapy_indices=True + ... ) + >>> mat_with_pentapy + array([[117, 499, 43, 0, 0], + [378, 149, 857, 353, 0], + [285, 769, 767, 229, 484], + [ 0, 717, 214, 243, 877], + [ 0, 0, 410, 611, 79]], dtype=int64) + + >>> # Checking if the two matrices are equal + >>> np.array_equal(mat_no_pentapy, mat_with_pentapy) + True + + """ + + # first, a matrix of zeros is initialised ... + mat = np.zeros((n_rows, n_rows), dtype=np.int64) + # ... together with a partially specified random vector generator + # NOTE: this ensures consistent random numbers for both cases + gen_rand_int = partial(np.random.randint, low=1, high=1_000) + + # then, the diagonal index function is obtained + diag_idx_func = get_diag_indices + if with_pentapy_indices: + diag_idx_func = pp.diag_indices + + # then, the diagonals are filled with random integers + np.random.seed(seed=seed) + for offset in range(-2, 3): + row_idxs, col_idxs = diag_idx_func(n=n_rows, offset=offset) + mat[row_idxs, col_idxs] = gen_rand_int(size=n_rows - abs(offset)) + + return mat + + +### Doctests ### + +if __name__ == "__main__": # pragma: no cover + import doctest + + doctest.testmod() From 50806411c1087036776576f7c4e68a05d0f3b6a6 Mon Sep 17 00:00:00 2001 From: MothNik Date: Fri, 7 Jun 2024 22:35:43 +0200 Subject: [PATCH 05/62] tests: [11] removed version from coverage --- pyproject.toml | 1 + 1 file changed, 1 insertion(+) diff --git a/pyproject.toml b/pyproject.toml index 7b0aec6..4c400f8 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -103,6 +103,7 @@ max-line-length = 120 "*examples*", "*tests*", "*paper*", + "pentapy/src/pentapy/_version.py", ] [tool.coverage.report] From 2f576092dfc0bb52e4ab268248e0673dc0eda667 Mon Sep 17 00:00:00 2001 From: MothNik Date: Fri, 7 Jun 2024 23:02:53 +0200 Subject: [PATCH 06/62] lint: [11] fixed block comment lint error --- src/pentapy/solver.pyx | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/src/pentapy/solver.pyx b/src/pentapy/solver.pyx index c59226b..bf075da 100644 --- a/src/pentapy/solver.pyx +++ b/src/pentapy/solver.pyx @@ -6,7 +6,7 @@ implemented in Cython. """ -### Imports ### +# Imports import numpy as np @@ -14,7 +14,7 @@ cimport numpy as np from libc.stdint cimport int64_t, uint64_t -### Main Python Interface ### +# Main Python Interface def penta_solver1(double[:, :] mat_flat, double[:, :] rhs): @@ -25,7 +25,7 @@ def penta_solver2(double[:, :] mat_flat, double[:] rhs): return np.asarray(c_penta_solver2(mat_flat, rhs)) -### Solver Algorithm 1 ### +# Solver Algorithm 1 cdef double[:, :] c_penta_solver1(double[:, :] mat_flat, double[:, :] rhs): @@ -96,13 +96,13 @@ cdef void c_penta_factorize_algo1( """ - ### Variable declarations ### + # Variable declarations cdef uint64_t iter_row cdef double mu_i, ga_i, e_i cdef double al_i, al_i_minus_1, al_i_plus_1 - ### Factorization ### + # Factorization # First row mu_i = mat_flat[2, 0] @@ -187,12 +187,12 @@ cdef void c_solve_penta_from_factorize_algo_1( """ - ### Variable declarations ### + # Variable declarations cdef int64_t iter_row cdef double ze_i, ze_i_minus_1, ze_i_plus_1 - ### Transformation ### + # Transformation # first, the right-hand side is transformed into the vector ``zeta`` # First row @@ -233,7 +233,7 @@ cdef void c_solve_penta_from_factorize_algo_1( ) / mat_factorized[mat_n_rows-1, 1] result_view[mat_n_rows-1] = ze_i_plus_1 - ### Backward substitution ### + # Backward substitution # The solution vector is calculated by backward substitution that overwrites the # right-hand side vector with the solution vector @@ -251,7 +251,7 @@ cdef void c_solve_penta_from_factorize_algo_1( return -### Solver Algorithm 2 ### +# Solver Algorithm 2 cdef double[:] c_penta_solver2(double[:, :] mat_flat, double[:] rhs): From 8db6ba4841b697725a60ba7d205b7db59f4b3d79 Mon Sep 17 00:00:00 2001 From: MothNik Date: Sat, 8 Jun 2024 10:34:23 +0200 Subject: [PATCH 07/62] style: [11] improved headlines --- src/pentapy/solver.pyx | 16 ++++++++-------- tests/test_tools.py | 6 +++--- tests/util_funcs.py | 10 +++++----- 3 files changed, 16 insertions(+), 16 deletions(-) diff --git a/src/pentapy/solver.pyx b/src/pentapy/solver.pyx index bf075da..c8a22a2 100644 --- a/src/pentapy/solver.pyx +++ b/src/pentapy/solver.pyx @@ -14,7 +14,7 @@ cimport numpy as np from libc.stdint cimport int64_t, uint64_t -# Main Python Interface +# === Main Python Interface === def penta_solver1(double[:, :] mat_flat, double[:, :] rhs): @@ -25,7 +25,7 @@ def penta_solver2(double[:, :] mat_flat, double[:] rhs): return np.asarray(c_penta_solver2(mat_flat, rhs)) -# Solver Algorithm 1 +# === Solver Algorithm 1 === cdef double[:, :] c_penta_solver1(double[:, :] mat_flat, double[:, :] rhs): @@ -96,13 +96,13 @@ cdef void c_penta_factorize_algo1( """ - # Variable declarations + # === Variable declarations === cdef uint64_t iter_row cdef double mu_i, ga_i, e_i cdef double al_i, al_i_minus_1, al_i_plus_1 - # Factorization + # === Factorization === # First row mu_i = mat_flat[2, 0] @@ -187,12 +187,12 @@ cdef void c_solve_penta_from_factorize_algo_1( """ - # Variable declarations + # === Variable declarations === cdef int64_t iter_row cdef double ze_i, ze_i_minus_1, ze_i_plus_1 - # Transformation + # === Transformation === # first, the right-hand side is transformed into the vector ``zeta`` # First row @@ -233,7 +233,7 @@ cdef void c_solve_penta_from_factorize_algo_1( ) / mat_factorized[mat_n_rows-1, 1] result_view[mat_n_rows-1] = ze_i_plus_1 - # Backward substitution + # === Backward substitution === # The solution vector is calculated by backward substitution that overwrites the # right-hand side vector with the solution vector @@ -251,7 +251,7 @@ cdef void c_solve_penta_from_factorize_algo_1( return -# Solver Algorithm 2 +# === Solver Algorithm 2 === cdef double[:] c_penta_solver2(double[:, :] mat_flat, double[:] rhs): diff --git a/tests/test_tools.py b/tests/test_tools.py index 879a713..2f54c48 100644 --- a/tests/test_tools.py +++ b/tests/test_tools.py @@ -3,7 +3,7 @@ """ -### Imports ### +# === Imports === import warnings from typing import Optional, Tuple, Type @@ -16,7 +16,7 @@ warnings.simplefilter("always") -### Constants ### +# === Constants === SEED = 19_031_977 N_ROWS = [ @@ -41,7 +41,7 @@ 10_001, ] -### Tests ### +# === Tests === @pytest.mark.parametrize("offset", [0, 1, 2, -1, -2]) diff --git a/tests/util_funcs.py b/tests/util_funcs.py index 2402d25..594e75d 100644 --- a/tests/util_funcs.py +++ b/tests/util_funcs.py @@ -3,7 +3,7 @@ """ -### Imports ### +# === Imports === from functools import partial from typing import Tuple @@ -11,7 +11,7 @@ import numpy as np import pentapy as pp -### Utility Functions ### +# === Utility Functions === def get_diag_indices( @@ -25,7 +25,7 @@ def get_diag_indices( https://stackoverflow.com/a/18081653/14814813 Doctests - -------- + ======-- >>> # Setting up a test matrix >>> n_rows = 5 >>> mat = np.arange(start=0, stop=n_rows * n_rows).reshape(n_rows, n_rows) @@ -110,7 +110,7 @@ def gen_rand_penta_matrix_dense_int( data type ``int64``. Doctests - -------- + ======-- >>> # Generating a random pentadiagonal matrix with NumPy indices >>> n_rows = 5 >>> seed = 19_031_977 @@ -167,7 +167,7 @@ def gen_rand_penta_matrix_dense_int( return mat -### Doctests ### +# === Doctests === if __name__ == "__main__": # pragma: no cover import doctest From 1cdd525f8a93611e7c369253b724572388841819 Mon Sep 17 00:00:00 2001 From: MothNik Date: Sat, 8 Jun 2024 10:35:27 +0200 Subject: [PATCH 08/62] wip: [11] formatted --- tests/util_funcs.py | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/util_funcs.py b/tests/util_funcs.py index 594e75d..040bd04 100644 --- a/tests/util_funcs.py +++ b/tests/util_funcs.py @@ -9,6 +9,7 @@ from typing import Tuple import numpy as np + import pentapy as pp # === Utility Functions === From ae656cf94f38b4651545161e848443a97930b53f Mon Sep 17 00:00:00 2001 From: MothNik Date: Sat, 8 Jun 2024 11:13:44 +0200 Subject: [PATCH 09/62] test: [11] created conditioned banded matrix creator for testing purposes --- tests/util_funcs.py | 199 +++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 197 insertions(+), 2 deletions(-) diff --git a/tests/util_funcs.py b/tests/util_funcs.py index 040bd04..ca8f2be 100644 --- a/tests/util_funcs.py +++ b/tests/util_funcs.py @@ -9,9 +9,15 @@ from typing import Tuple import numpy as np +from scipy import sparse as sprs import pentapy as pp +# === Constants === + +_MIN_DIAG_VAL = 1e-3 + + # === Utility Functions === @@ -26,7 +32,7 @@ def get_diag_indices( https://stackoverflow.com/a/18081653/14814813 Doctests - ======-- + -------- >>> # Setting up a test matrix >>> n_rows = 5 >>> mat = np.arange(start=0, stop=n_rows * n_rows).reshape(n_rows, n_rows) @@ -111,7 +117,7 @@ def gen_rand_penta_matrix_dense_int( data type ``int64``. Doctests - ======-- + -------- >>> # Generating a random pentadiagonal matrix with NumPy indices >>> n_rows = 5 >>> seed = 19_031_977 @@ -168,6 +174,195 @@ def gen_rand_penta_matrix_dense_int( return mat +def gen_conditioned_rand_penta_matrix_dense( + n_rows: int, + seed: int, + ill_conditioned: bool, +) -> np.ndarray: + """ + Generates a well- or ill-conditioned random banded pentadiagonal matrix with shape + ``(n_rows, n_rows)``. + + This is achieved as follows: + - a fake LDU decomposition is generated where ``L`` and ``U`` are unit lower and + upper triangular matrices, respectively, and ``D`` is a diagonal matrix + - the matrix is then reconstructed by multiplying the three matrices and converting + the result to a banded matrix + + If ``D`` does not have any zeros or values of small magnitude compared to the + largest value, the matrix should be well-conditioned. + Otherwise, it is ill-conditioned. + + Doctests + -------- + >>> # Imports + >>> from scipy.linalg import bandwidth + + >>> # 1) Generating a super small well-conditioned random pentadiagonal matrix + >>> n_rows = 3 + >>> seed = 19_031_977 + + >>> mat = gen_conditioned_rand_penta_matrix_dense( + ... n_rows=n_rows, + ... seed=seed, + ... ill_conditioned=False, + ... ) + >>> mat + array([[ 0.92453713, 0.28308514, -0.09972199], + [-0.09784268, 0.2270634 , -0.1509019 ], + [-0.23431267, 0.00468463, 0.22991003]]) + >>> # its bandwidth is computed and should be equal to 2 + >>> bandwidth(mat) + (2, 2) + >>> # its condition number is computed and values below 1e10 can be considered good + >>> np.linalg.cond(mat) + 4.976880305142543 + + >>> # 2) Generating a super small ill-conditioned random pentadiagonal matrix + >>> mat = gen_conditioned_rand_penta_matrix_dense( + ... n_rows=n_rows, + ... seed=seed, + ... ill_conditioned=True, + ... ) + >>> mat + array([[ 0.92453713, 0.28308514, -0.09972199], + [-0.09784268, 0.2270634 , -0.1509019 ], + [-0.23431267, 0.00468463, -0.02273771]]) + >>> # its bandwidth is computed and should be equal to 2 + >>> bandwidth(mat) + (2, 2) + >>> # its condition number is computed and its value should be close to the + >>> # reciprocal floating point precision, i.e., ~1e16 + >>> np.linalg.cond(mat) + 1.493156437173682e+17 + + >>> # 3) Generating a small well-conditioned random pentadiagonal matrix + >>> n_rows = 7 + + >>> mat = gen_conditioned_rand_penta_matrix_dense( + ... n_rows=n_rows, + ... seed=seed, + ... ill_conditioned=False, + ... ) + >>> np.round(mat, 2) + array([[ 0.92, -0.72, 0.73, 0. , 0. , 0. , 0. ], + [ 0.83, -0.02, 1.08, 0.41, 0. , 0. , 0. ], + [-0.58, 0.13, -0.13, -0.37, 0.18, 0. , 0. ], + [ 0. , -0.07, -0.58, 0.46, -0.31, 0.28, 0. ], + [ 0. , 0. , 0.43, 0.13, 0.39, -0.1 , -0.15], + [ 0. , 0. , 0. , 0.06, -0.14, 0.4 , 0.28], + [ 0. , 0. , 0. , 0. , -0.14, 0.36, 0.53]]) + >>> # its bandwidth is computed and should be equal to 2 + >>> bandwidth(mat) + (2, 2) + >>> # its condition number is computed and values below 1e10 can be considered good + >>> np.linalg.cond(mat) + 42.4847446467131 + + >>> # 4) Generating a small ill-conditioned random pentadiagonal matrix + >>> mat = gen_conditioned_rand_penta_matrix_dense( + ... n_rows=n_rows, + ... seed=seed, + ... ill_conditioned=True, + ... ) + >>> np.round(mat, 2) + array([[ 0.92, -0.72, 0.73, 0. , 0. , 0. , 0. ], + [ 0.83, -0.02, 1.08, 0.41, 0. , 0. , 0. ], + [-0.58, 0.13, -0.13, -0.37, 0.18, 0. , 0. ], + [ 0. , -0.07, -0.58, 0.46, -0.31, 0.28, 0. ], + [ 0. , 0. , 0.43, 0.13, 0.39, -0.1 , -0.15], + [ 0. , 0. , 0. , 0.06, -0.14, 0.4 , 0.28], + [ 0. , 0. , 0. , 0. , -0.14, 0.36, 0.28]]) + >>> # its bandwidth is computed and should be equal to 2 + >>> bandwidth(mat) + (2, 2) + >>> # its condition number is computed and its value should be close to the + >>> # reciprocal floating point precision, i.e., ~1e16 + >>> np.linalg.cond(mat) + 1.1079218802103074e+17 + + >>> # 5) Generating a large well-conditioned random pentadiagonal matrix + >>> n_rows = 1_000 + + >>> mat = gen_conditioned_rand_penta_matrix_dense( + ... n_rows=n_rows, + ... seed=seed, + ... ill_conditioned=False, + ... ) + >>> # its bandwidth is computed and should be equal to 2 + >>> bandwidth(mat) + (2, 2) + >>> # its condition number is computed and values below 1e10 can be considered good + >>> np.linalg.cond(mat) + 9570.995402466417 + + >>> # 6) Generating a large ill-conditioned random pentadiagonal matrix + >>> mat = gen_conditioned_rand_penta_matrix_dense( + ... n_rows=n_rows, + ... seed=seed, + ... ill_conditioned=True, + ... ) + >>> # its bandwidth is computed and should be equal to 2 + >>> bandwidth(mat) + (2, 2) + >>> # its condition number is computed and its value should be close to the + >>> # reciprocal floating point precision, i.e., ~1e16 + >>> np.linalg.cond(mat) + 5.058722571393928e+17 + + """ + + # first, the fake diagonal matrix is generated whose entries are strictly + # positive and sorted in descending order + np.random.seed(seed=seed) + d_diag = np.flip(np.sort(np.random.rand(n_rows))) + + # the conditioning is achieved by manipulating the smallest diagonal entry + # Case 1: well-conditioned matrix + if not ill_conditioned: + # here, the smallest diagonal entry is set to a value that is enforced to have + # a minimum magnitude + d_diag = np.maximum(d_diag, _MIN_DIAG_VAL) + + # Case 2: ill-conditioned matrix + else: + # here, the smallest diagonal entry is set to a value that is numerically zero + # compared to the largest entry + d_diag[n_rows - 1] = 0.1 * np.finfo(np.float64).eps * d_diag[0] + + # ... followed by a unit lower triangular matrix with 2 sub-diagonals, but here + # the entries may be negative ... + diagonals = [ + 1.0 - 2.0 * np.random.rand(n_rows - 2), + 1.0 - 2.0 * np.random.rand(n_rows - 1), + np.ones(n_rows), + ] + l_mat = sprs.diags( + diagonals=diagonals, + offsets=[-2, -1, 0], # type: ignore + shape=(n_rows, n_rows), + format="csr", + dtype=np.float64, + ) + + # ... and an upper triangular matrix with 2 super-diagonals + diagonals = [ + np.ones(n_rows), + 1.0 - 2.0 * np.random.rand(n_rows - 1), + 1.0 - 2.0 * np.random.rand(n_rows - 2), + ] + u_mat = sprs.diags( + diagonals=diagonals, + offsets=[0, 1, 2], # type: ignore + shape=(n_rows, n_rows), + format="csr", + dtype=np.float64, + ) + + # finally, the matrix is reconstructed by multiplying the three matrices + return (l_mat.multiply(d_diag[np.newaxis, ::]).dot(u_mat)).toarray() + + # === Doctests === if __name__ == "__main__": # pragma: no cover From 26752f292f569e96b0a7b39d5a4fb95e695b6f72 Mon Sep 17 00:00:00 2001 From: MothNik Date: Sat, 8 Jun 2024 11:44:29 +0200 Subject: [PATCH 10/62] tests: [11] add doctested reference solver; made ill-conditioning more severe --- tests/util_funcs.py | 85 ++++++++++++++++++++++++++++++++++++++------- 1 file changed, 72 insertions(+), 13 deletions(-) diff --git a/tests/util_funcs.py b/tests/util_funcs.py index ca8f2be..8ef8efa 100644 --- a/tests/util_funcs.py +++ b/tests/util_funcs.py @@ -9,6 +9,7 @@ from typing import Tuple import numpy as np +from scipy import linalg as spla from scipy import sparse as sprs import pentapy as pp @@ -195,9 +196,6 @@ def gen_conditioned_rand_penta_matrix_dense( Doctests -------- - >>> # Imports - >>> from scipy.linalg import bandwidth - >>> # 1) Generating a super small well-conditioned random pentadiagonal matrix >>> n_rows = 3 >>> seed = 19_031_977 @@ -212,7 +210,7 @@ def gen_conditioned_rand_penta_matrix_dense( [-0.09784268, 0.2270634 , -0.1509019 ], [-0.23431267, 0.00468463, 0.22991003]]) >>> # its bandwidth is computed and should be equal to 2 - >>> bandwidth(mat) + >>> spla.bandwidth(mat) (2, 2) >>> # its condition number is computed and values below 1e10 can be considered good >>> np.linalg.cond(mat) @@ -229,7 +227,7 @@ def gen_conditioned_rand_penta_matrix_dense( [-0.09784268, 0.2270634 , -0.1509019 ], [-0.23431267, 0.00468463, -0.02273771]]) >>> # its bandwidth is computed and should be equal to 2 - >>> bandwidth(mat) + >>> spla.bandwidth(mat) (2, 2) >>> # its condition number is computed and its value should be close to the >>> # reciprocal floating point precision, i.e., ~1e16 @@ -253,7 +251,7 @@ def gen_conditioned_rand_penta_matrix_dense( [ 0. , 0. , 0. , 0.06, -0.14, 0.4 , 0.28], [ 0. , 0. , 0. , 0. , -0.14, 0.36, 0.53]]) >>> # its bandwidth is computed and should be equal to 2 - >>> bandwidth(mat) + >>> spla.bandwidth(mat) (2, 2) >>> # its condition number is computed and values below 1e10 can be considered good >>> np.linalg.cond(mat) @@ -274,7 +272,7 @@ def gen_conditioned_rand_penta_matrix_dense( [ 0. , 0. , 0. , 0.06, -0.14, 0.4 , 0.28], [ 0. , 0. , 0. , 0. , -0.14, 0.36, 0.28]]) >>> # its bandwidth is computed and should be equal to 2 - >>> bandwidth(mat) + >>> spla.bandwidth(mat) (2, 2) >>> # its condition number is computed and its value should be close to the >>> # reciprocal floating point precision, i.e., ~1e16 @@ -290,7 +288,7 @@ def gen_conditioned_rand_penta_matrix_dense( ... ill_conditioned=False, ... ) >>> # its bandwidth is computed and should be equal to 2 - >>> bandwidth(mat) + >>> spla.bandwidth(mat) (2, 2) >>> # its condition number is computed and values below 1e10 can be considered good >>> np.linalg.cond(mat) @@ -303,12 +301,12 @@ def gen_conditioned_rand_penta_matrix_dense( ... ill_conditioned=True, ... ) >>> # its bandwidth is computed and should be equal to 2 - >>> bandwidth(mat) + >>> spla.bandwidth(mat) (2, 2) >>> # its condition number is computed and its value should be close to the >>> # reciprocal floating point precision, i.e., ~1e16 >>> np.linalg.cond(mat) - 5.058722571393928e+17 + 1.7137059583101745e+19 """ @@ -326,9 +324,8 @@ def gen_conditioned_rand_penta_matrix_dense( # Case 2: ill-conditioned matrix else: - # here, the smallest diagonal entry is set to a value that is numerically zero - # compared to the largest entry - d_diag[n_rows - 1] = 0.1 * np.finfo(np.float64).eps * d_diag[0] + # here, the smallest diagonal entry is set to zero + d_diag[n_rows - 1] = 0.0 # ... followed by a unit lower triangular matrix with 2 sub-diagonals, but here # the entries may be negative ... @@ -363,6 +360,68 @@ def gen_conditioned_rand_penta_matrix_dense( return (l_mat.multiply(d_diag[np.newaxis, ::]).dot(u_mat)).toarray() +def solve_penta_matrix_dense_scipy( + mat: np.ndarray, + rhs: np.ndarray, +) -> np.ndarray: + """ + Solves a pentadiagonal matrix system using SciPy's banded solver. + + Doctests + -------- + >>> # Setting up a small test matrix and right-hand side + >>> n_rows = 5 + >>> seed = 19_031_977 + + >>> mat = gen_conditioned_rand_penta_matrix_dense( + ... n_rows=n_rows, + ... seed=seed, + ... ill_conditioned=False, + ... ) + >>> rhs = np.random.rand(n_rows, 5) + + >>> # Solving the system using SciPy's banded solver + >>> sol = solve_penta_matrix_dense_scipy(mat=mat, rhs=rhs) + >>> np.round(sol, 2) + array([[-2.16, -0.36, 0.72, 0.23, -0.2 ], + [ 4.07, 1.3 , 0.81, 1.31, 0.48], + [ 4.05, 0.33, 2.19, 1.22, 0.58], + [-1.9 , -0.79, 1.02, -0.39, 1.02], + [ 6.31, 1.81, 1.29, 1.41, 0.37]]) + + >>> # the solution is checked by verifying that the residual is close to zero + >>> np.max(np.abs(mat @ sol - rhs)) <= np.finfo(np.float64).eps * n_rows + True + + >>> # Setting up a large test matrix and right-hand side + >>> n_rows = 1_000 + + >>> mat = gen_conditioned_rand_penta_matrix_dense( + ... n_rows=n_rows, + ... seed=seed, + ... ill_conditioned=False, + ... ) + >>> rhs = np.random.rand(n_rows, 5) + + >>> # Solving the system using SciPy's banded solver + >>> sol = solve_penta_matrix_dense_scipy(mat=mat, rhs=rhs) + >>> # the solution is checked by verifying that the residual is close to zero + >>> np.max(np.abs(mat @ sol - rhs)) <= np.finfo(np.float64).eps * n_rows + True + + """ + + # first, the matrix is converted to LAPACK banded storage format + mat_banded = pp.create_banded(mat=mat, col_wise=True) + + # then, the system is solved using SciPy's banded solver + return spla.solve_banded( + l_and_u=(2, 2), + ab=mat_banded, + b=rhs, + ) + + # === Doctests === if __name__ == "__main__": # pragma: no cover From a0c8849e05d96236e1a234af00095def29848efb Mon Sep 17 00:00:00 2001 From: MothNik Date: Sat, 8 Jun 2024 12:05:50 +0200 Subject: [PATCH 11/62] feat: [11] made error messages informative --- src/pentapy/tools.py | 30 ++++++++++++++++++++++-------- 1 file changed, 22 insertions(+), 8 deletions(-) diff --git a/src/pentapy/tools.py b/src/pentapy/tools.py index 3db7126..ca80d27 100644 --- a/src/pentapy/tools.py +++ b/src/pentapy/tools.py @@ -172,10 +172,14 @@ def create_banded(mat, up=2, low=2, col_wise=True, dtype=None): """ mat = np.asanyarray(mat) if mat.ndim != 2: - msg = "create_banded: matrix has to be 2D" + msg = f"create_banded: matrix has to be 2D, got {mat.ndim}D" raise ValueError(msg) + if mat.shape[0] != mat.shape[1]: - msg = "create_banded: matrix has to be n x n" + msg = ( + f"create_banded: matrix has to be n x n, " + f"got {mat.shape[0]} x {mat.shape[1]}" + ) raise ValueError(msg) size = mat.shape[0] @@ -246,14 +250,23 @@ def create_full(mat, up=2, low=2, col_wise=True): """ mat = np.asanyarray(mat) if mat.ndim != 2: - msg = "create_full: matrix has to be 2D" + msg = f"create_full: matrix has to be 2D, got {mat.ndim}D" raise ValueError(msg) + if mat.shape[0] != up + low + 1: - msg = "create_full: matrix has wrong count of bands" + msg = ( + f"create_full: matrix has wrong count of bands, required " + f"{up} + {low} + 1 = {up + low + 1}, got {mat.shape[0]} bands" + ) raise ValueError(msg) + if mat.shape[1] < max(up, low) + 1: - msg = "create_full: matrix has to few information" + msg = ( + f"create_full: matrix has to few information, required " + f"{max(up, low) + 1} columns, got {mat.shape[1]} columns" + ) raise ValueError(msg) + size = mat.shape[1] mat_full = np.diag(mat[up]) if col_wise: @@ -266,16 +279,17 @@ def create_full(mat, up=2, low=2, col_wise=True): mat_full[diag_indices(size, up - i)] = mat[i, : -(up - i)] for i in range(low): mat_full[diag_indices(size, -(low - i))] = mat[-i - 1, (low - i) :] + return mat_full def _check_penta(mat): if mat.ndim != 2: - msg = "pentapy: matrix has to be 2D" + msg = f"pentapy: matrix has to be 2D, got {mat.ndim}D" raise ValueError(msg) if mat.shape[0] != 5: - msg = "pentapy: matrix needs 5 bands" + msg = f"pentapy: matrix needs 5 bands, got {mat.shape[0]} bands" raise ValueError(msg) if mat.shape[1] < 3: - msg = "pentapy: matrix needs at least 3 rows" + msg = f"pentapy: matrix needs at least 3 rows, got {mat.shape[1]} rows" raise ValueError(msg) From ee6943a2fae9d3fb676bc56e20c90f41b2b4d29b Mon Sep 17 00:00:00 2001 From: MothNik Date: Sat, 8 Jun 2024 13:23:46 +0200 Subject: [PATCH 12/62] fix: [23] disabled cdivision to fix error handling on Python side --- src/pentapy/solver.pyx | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/pentapy/solver.pyx b/src/pentapy/solver.pyx index c8a22a2..091288c 100644 --- a/src/pentapy/solver.pyx +++ b/src/pentapy/solver.pyx @@ -1,4 +1,4 @@ -# cython: language_level=3, boundscheck=True, wraparound=False, cdivision=True +# cython: language_level=3, boundscheck=True, wraparound=False, cdivision=False """ This is a solver linear equation systems with a penta-diagonal matrix, From 028484372fcce5d445695cae86f9db884bbe4290 Mon Sep 17 00:00:00 2001 From: MothNik Date: Sat, 8 Jun 2024 13:28:14 +0200 Subject: [PATCH 13/62] feat/refactor: [11] enabled multipe right-hand sides for solver I; improved import chain; improved code readability --- src/pentapy/core.py | 58 ++++++++++++++++++++++++++++++--------------- 1 file changed, 39 insertions(+), 19 deletions(-) diff --git a/src/pentapy/core.py b/src/pentapy/core.py index 9a55780..2393122 100644 --- a/src/pentapy/core.py +++ b/src/pentapy/core.py @@ -5,8 +5,8 @@ import numpy as np +from pentapy import tools as ptools from pentapy.solver import penta_solver1, penta_solver2 -from pentapy.tools import _check_penta, create_banded, shift_banded def solve(mat, rhs, is_flat=False, index_row_wise=True, solver=1): @@ -66,40 +66,60 @@ def solve(mat, rhs, is_flat=False, index_row_wise=True, solver=1): result : :class:`numpy.ndarray` Solution of the equation system """ + if solver in [1, "1", "PTRANS-I"]: if is_flat and index_row_wise: mat_flat = np.asarray(mat, dtype=np.double) - _check_penta(mat_flat) + ptools._check_penta(mat_flat) elif is_flat: mat_flat = np.array(mat, dtype=np.double) - _check_penta(mat_flat) - shift_banded(mat_flat, copy=False) + ptools._check_penta(mat_flat) + ptools.shift_banded(mat_flat, copy=False) else: - mat_flat = create_banded(mat, col_wise=False, dtype=np.double) + mat_flat = ptools.create_banded(mat, col_wise=False, dtype=np.double) + rhs = np.asarray(rhs, dtype=np.double) + + # Special case: Early exit when the matrix has only 3 rows/columns + # NOTE: this avoids memory leakage in the Cython-solver that will iterate over + # at least 4 rows/columns no matter what + if mat_flat.shape[1] == 3: + return np.linalg.solve( + a=ptools.create_full(mat_flat, col_wise=False), + b=rhs, + ) + + # if there is only a single right-hand side, it has to be reshaped to a 2D array + # NOTE: this has to be reverted at the end single_rhs = rhs.ndim == 1 + rhs_og_shape = rhs.shape if single_rhs: rhs = rhs[:, np.newaxis] try: + # if there was only a 1D right-hand side, the result has to be flattened if single_rhs: return penta_solver1(mat_flat, rhs).ravel() return penta_solver1(mat_flat, rhs) + except ZeroDivisionError: warnings.warn("pentapy: PTRANS-I not suitable for input-matrix.") - return np.full_like(rhs, np.nan) + return np.full(shape=rhs_og_shape, fill_value=np.nan) + elif solver in [2, "2", "PTRANS-II"]: if is_flat and index_row_wise: mat_flat = np.asarray(mat, dtype=np.double) - _check_penta(mat_flat) + ptools._check_penta(mat_flat) elif is_flat: mat_flat = np.array(mat, dtype=np.double) - _check_penta(mat_flat) - shift_banded(mat_flat, copy=False) + ptools._check_penta(mat_flat) + ptools.shift_banded(mat_flat, copy=False) else: - mat_flat = create_banded(mat, col_wise=False, dtype=np.double) + mat_flat = ptools.create_banded(mat, col_wise=False, dtype=np.double) + rhs = np.asarray(rhs, dtype=np.double) + try: return penta_solver2(mat_flat, rhs) except ZeroDivisionError: @@ -113,12 +133,12 @@ def solve(mat, rhs, is_flat=False, index_row_wise=True, solver=1): raise ValueError(msg) from imp_err if is_flat and index_row_wise: mat_flat = np.array(mat) - _check_penta(mat_flat) - shift_banded(mat_flat, col_to_row=False, copy=False) + ptools._check_penta(mat_flat) + ptools.shift_banded(mat_flat, col_to_row=False, copy=False) elif is_flat: mat_flat = np.asarray(mat) else: - mat_flat = create_banded(mat) + mat_flat = ptools.create_banded(mat) return solve_banded((2, 2), mat_flat, rhs) elif solver in [4, "4", "spsolve"]: # pragma: no cover try: @@ -129,12 +149,12 @@ def solve(mat, rhs, is_flat=False, index_row_wise=True, solver=1): raise ValueError(msg) from imp_err if is_flat and index_row_wise: mat_flat = np.array(mat) - _check_penta(mat_flat) - shift_banded(mat_flat, col_to_row=False, copy=False) + ptools._check_penta(mat_flat) + ptools.shift_banded(mat_flat, col_to_row=False, copy=False) elif is_flat: mat_flat = np.asarray(mat) else: - mat_flat = create_banded(mat) + mat_flat = ptools.create_banded(mat) size = mat_flat.shape[1] M = sps.spdiags(mat_flat, [2, 1, 0, -1, -2], size, size, format="csc") return spsolve(M, rhs, use_umfpack=False) @@ -153,12 +173,12 @@ def solve(mat, rhs, is_flat=False, index_row_wise=True, solver=1): raise ValueError(msg) from imp_err if is_flat and index_row_wise: mat_flat = np.array(mat) - _check_penta(mat_flat) - shift_banded(mat_flat, col_to_row=False, copy=False) + ptools._check_penta(mat_flat) + ptools.shift_banded(mat_flat, col_to_row=False, copy=False) elif is_flat: mat_flat = np.asarray(mat) else: - mat_flat = create_banded(mat) + mat_flat = ptools.create_banded(mat) size = mat_flat.shape[1] M = sps.spdiags(mat_flat, [2, 1, 0, -1, -2], size, size, format="csc") return spsolve(M, rhs, use_umfpack=True) From c32bec2744b08d455627197a56eca6927bfa1a0b Mon Sep 17 00:00:00 2001 From: MothNik Date: Sat, 8 Jun 2024 13:28:40 +0200 Subject: [PATCH 14/62] tests: [11] added shape check to doctest --- tests/util_funcs.py | 24 ++++++++++++++++++------ 1 file changed, 18 insertions(+), 6 deletions(-) diff --git a/tests/util_funcs.py b/tests/util_funcs.py index 8ef8efa..be9f3c1 100644 --- a/tests/util_funcs.py +++ b/tests/util_funcs.py @@ -209,7 +209,9 @@ def gen_conditioned_rand_penta_matrix_dense( array([[ 0.92453713, 0.28308514, -0.09972199], [-0.09784268, 0.2270634 , -0.1509019 ], [-0.23431267, 0.00468463, 0.22991003]]) - >>> # its bandwidth is computed and should be equal to 2 + >>> # it has to be square and its bandwidth is computed and should be equal to 2 + >>> mat.shape[0] == mat.shape[1] + True >>> spla.bandwidth(mat) (2, 2) >>> # its condition number is computed and values below 1e10 can be considered good @@ -226,7 +228,9 @@ def gen_conditioned_rand_penta_matrix_dense( array([[ 0.92453713, 0.28308514, -0.09972199], [-0.09784268, 0.2270634 , -0.1509019 ], [-0.23431267, 0.00468463, -0.02273771]]) - >>> # its bandwidth is computed and should be equal to 2 + >>> # it has to be square and its bandwidth is computed and should be equal to 2 + >>> mat.shape[0] == mat.shape[1] + True >>> spla.bandwidth(mat) (2, 2) >>> # its condition number is computed and its value should be close to the @@ -250,7 +254,9 @@ def gen_conditioned_rand_penta_matrix_dense( [ 0. , 0. , 0.43, 0.13, 0.39, -0.1 , -0.15], [ 0. , 0. , 0. , 0.06, -0.14, 0.4 , 0.28], [ 0. , 0. , 0. , 0. , -0.14, 0.36, 0.53]]) - >>> # its bandwidth is computed and should be equal to 2 + >>> # it has to be square and its bandwidth is computed and should be equal to 2 + >>> mat.shape[0] == mat.shape[1] + True >>> spla.bandwidth(mat) (2, 2) >>> # its condition number is computed and values below 1e10 can be considered good @@ -271,7 +277,9 @@ def gen_conditioned_rand_penta_matrix_dense( [ 0. , 0. , 0.43, 0.13, 0.39, -0.1 , -0.15], [ 0. , 0. , 0. , 0.06, -0.14, 0.4 , 0.28], [ 0. , 0. , 0. , 0. , -0.14, 0.36, 0.28]]) - >>> # its bandwidth is computed and should be equal to 2 + >>> # it has to be square and its bandwidth is computed and should be equal to 2 + >>> mat.shape[0] == mat.shape[1] + True >>> spla.bandwidth(mat) (2, 2) >>> # its condition number is computed and its value should be close to the @@ -287,7 +295,9 @@ def gen_conditioned_rand_penta_matrix_dense( ... seed=seed, ... ill_conditioned=False, ... ) - >>> # its bandwidth is computed and should be equal to 2 + >>> # it has to be square and its bandwidth is computed and should be equal to 2 + >>> mat.shape[0] == mat.shape[1] + True >>> spla.bandwidth(mat) (2, 2) >>> # its condition number is computed and values below 1e10 can be considered good @@ -300,7 +310,9 @@ def gen_conditioned_rand_penta_matrix_dense( ... seed=seed, ... ill_conditioned=True, ... ) - >>> # its bandwidth is computed and should be equal to 2 + >>> # it has to be square and its bandwidth is computed and should be equal to 2 + >>> mat.shape[0] == mat.shape[1] + True >>> spla.bandwidth(mat) (2, 2) >>> # its condition number is computed and its value should be close to the From e7d92c86e67ad2cc77a9e293e9d35c46f81ac624 Mon Sep 17 00:00:00 2001 From: MothNik Date: Sat, 8 Jun 2024 13:29:57 +0200 Subject: [PATCH 15/62] tests: [11] added extensive parametrized tests for solver I that also cover the edge cases --- tests/test_solver_1.py | 146 +++++++++++++++++++++++++++++++++++++++++ 1 file changed, 146 insertions(+) create mode 100644 tests/test_solver_1.py diff --git a/tests/test_solver_1.py b/tests/test_solver_1.py new file mode 100644 index 0000000..df00f9d --- /dev/null +++ b/tests/test_solver_1.py @@ -0,0 +1,146 @@ +""" +Test suite for testing the pentadiagonal solver based on Algorithm PTRANS-I. + +""" + +# === Imports === + +from typing import Literal + +import numpy as np +import pentapy as pp +import pytest +import util_funcs as uf + +# === Constants === + +SEED = 19_031_977 +N_ROWS = [ + 3, + 4, + 5, + 10, + 11, + 25, + 26, + 50, + 51, + 100, + 101, + 250, + 251, + 500, + 501, + 1_000, + 1_001, + 10_000, + 10_001, +] +REF_WARNING = "pentapy: PTRANS-I not suitable for input-matrix." + +# === Tests === + + +@pytest.mark.parametrize("induce_error", [False, True]) +@pytest.mark.parametrize("solver_alias", [1]) # "1", "PTRANS-I"]) +@pytest.mark.parametrize("input_layout", ["full", "banded_row_wise", "banded_col_wise"]) +@pytest.mark.parametrize("n_rhs", [None, 1, 10]) +@pytest.mark.parametrize("n_rows", N_ROWS) +def test_penta_solver1( + n_rows: int, + n_rhs: int, + input_layout: Literal["full", "banded_row_wise", "banded_col_wise"], + solver_alias: Literal[1, "1", "PTRANS-I"], + induce_error: bool, +) -> None: + """ + Tests the pentadiagonal solver based on Algorithm PTRANS-I when starting from + different input layouts, number of right-hand sides, number of rows, and also + when inducing an error by making the first diagonal element zero. + It has to be ensured that the edge case of ``n_rows = 3`` is also covered. + + """ + + # first, a random pentadiagonal matrix is generated + mat_full = uf.gen_conditioned_rand_penta_matrix_dense( + n_rows=n_rows, + seed=SEED, + ill_conditioned=False, + ) + + # an error is induced by setting the first diagonal element to zero + if induce_error: + # the induction of the error is only possible if the matrix does not have + # only 3 rows + if n_rows == 3: + pytest.skip( + "Only 3 rows, cannot induce error because this will not go into " + "PTRANS-I, but NumPy" + ) + + mat_full[0, 0] = 0.0 + + # the right-hand side is generated + np.random.seed(SEED) + if n_rhs is not None: + rhs = np.random.rand(n_rows, n_rhs) + result_shape = (n_rows, n_rhs) + else: + rhs = np.random.rand(n_rows) + result_shape = (n_rows,) + + # the matrix is converted to the desired layout + if input_layout == "full": + mat = mat_full + kwargs = dict(is_flat=False) + + elif input_layout == "banded_row_wise": + mat = pp.create_banded(mat_full, col_wise=False) + kwargs = dict( + is_flat=True, + index_row_wise=True, + ) + + elif input_layout == "banded_col_wise": + mat = pp.create_banded(mat_full, col_wise=True) + kwargs = dict( + is_flat=True, + index_row_wise=False, + ) + + else: + raise ValueError(f"Invalid input layout: {input_layout}") + + # the solution is computed + # Case 1: in case of an error, a warning has to be issued and the result has to + # be NaN + if induce_error: + with pytest.warns(UserWarning, match=REF_WARNING): + sol = pp.solve( + mat=mat, + rhs=rhs, + solver=solver_alias, # type: ignore + **kwargs, + ) + assert sol.shape == result_shape + assert np.isnan(sol).all() + + return + + # Case 2: in case of no error, the solution can be computed without any issues + sol = pp.solve( + mat=mat, + rhs=rhs, + solver=solver_alias, # type: ignore + **kwargs, + ) + assert sol.shape == result_shape + + # if no error was induced, the reference solution is computed with SciPy + sol_ref = uf.solve_penta_matrix_dense_scipy( + mat=mat_full, + rhs=rhs, + ) + + # the solutions are compared + assert np.allclose(sol, sol_ref) From 647012f05677306e911ef0edcfa1162785efbafe Mon Sep 17 00:00:00 2001 From: MothNik Date: Sat, 8 Jun 2024 14:11:28 +0200 Subject: [PATCH 16/62] tests: [11] added more intermediate sizes for tests --- tests/test_solver_1.py | 4 ++++ tests/test_tools.py | 7 ++++++- 2 files changed, 10 insertions(+), 1 deletion(-) diff --git a/tests/test_solver_1.py b/tests/test_solver_1.py index df00f9d..78956f6 100644 --- a/tests/test_solver_1.py +++ b/tests/test_solver_1.py @@ -33,6 +33,10 @@ 501, 1_000, 1_001, + 2500, + 2501, + 5_000, + 5_001, 10_000, 10_001, ] diff --git a/tests/test_tools.py b/tests/test_tools.py index 2f54c48..cabac61 100644 --- a/tests/test_tools.py +++ b/tests/test_tools.py @@ -9,9 +9,10 @@ from typing import Optional, Tuple, Type import numpy as np -import pentapy as pp import pytest import util_funcs as uf + +import pentapy as pp from pentapy.tools import _check_penta warnings.simplefilter("always") @@ -37,6 +38,10 @@ 501, 1_000, 1_001, + 2500, + 2501, + 5_000, + 5_001, 10_000, 10_001, ] From 8ab514f9404c803b4f6f368a5c21f607729d4201 Mon Sep 17 00:00:00 2001 From: MothNik Date: Sat, 8 Jun 2024 15:51:36 +0200 Subject: [PATCH 17/62] feat/fix: [11] added cython annotations to build process; fixed wrong f-string; fixed typo --- setup.py | 9 ++++++--- src/pentapy/.gitignore | 2 ++ 2 files changed, 8 insertions(+), 3 deletions(-) create mode 100644 src/pentapy/.gitignore diff --git a/setup.py b/setup.py index 8081e05..fc8648c 100644 --- a/setup.py +++ b/setup.py @@ -1,15 +1,18 @@ -"""pentapy: A toolbox for pentadiagonal matrizes.""" +"""pentapy: A toolbox for pentadiagonal matrices.""" import os +import Cython.Compiler.Options import numpy as np from Cython.Build import cythonize from setuptools import Extension, setup +Cython.Compiler.Options.annotate = True + # cython extensions CY_MODULES = [ Extension( - name=f"pentapy.solver", + name="pentapy.solver", sources=[os.path.join("src", "pentapy", "solver.pyx")], include_dirs=[np.get_include()], define_macros=[("NPY_NO_DEPRECATED_API", "NPY_1_7_API_VERSION")], @@ -17,7 +20,7 @@ ] setup( - ext_modules=cythonize(CY_MODULES), + ext_modules=cythonize(CY_MODULES, nthreads=1, annotate=True), package_data={"pentapy": ["*.pxd"]}, # include pxd files include_package_data=False, # ignore other files zip_safe=False, diff --git a/src/pentapy/.gitignore b/src/pentapy/.gitignore new file mode 100644 index 0000000..53cc0d6 --- /dev/null +++ b/src/pentapy/.gitignore @@ -0,0 +1,2 @@ +# Cython html files +*.html \ No newline at end of file From 5086959a21a974f697aa3212ecfb22d8671248d4 Mon Sep 17 00:00:00 2001 From: MothNik Date: Sat, 8 Jun 2024 15:52:50 +0200 Subject: [PATCH 18/62] refactor/doc: fixed missing variable declarations; added clarifying comments --- src/pentapy/solver.pyx | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/src/pentapy/solver.pyx b/src/pentapy/solver.pyx index 091288c..9d45c90 100644 --- a/src/pentapy/solver.pyx +++ b/src/pentapy/solver.pyx @@ -73,10 +73,11 @@ cdef void c_penta_factorize_algo1( """ Factorizes the pentadiagonal matrix ``A`` into - - auxiliary coefficients ``e``, ``mu`` and ``gamma`` for the transformation of the - right-hand side - - a unit upper triangular matrix with the main diagonals ``alpha`` and ``beta`` - for the following backward substitution. Its unit main diagonal is implicit. + - auxiliary coefficients ``e``, ``mu`` and ``gamma`` (``ga``) for the transformation + of the right-hand side + - a unit upper triangular matrix with the main diagonals ``alpha``(``al``) and + ``beta`` (``be``) for the following backward substitution. Its unit main + diagonal is implicit. They are overwriting the memoryview ``mat_factorized`` as follows: @@ -99,8 +100,9 @@ cdef void c_penta_factorize_algo1( # === Variable declarations === cdef uint64_t iter_row - cdef double mu_i, ga_i, e_i - cdef double al_i, al_i_minus_1, al_i_plus_1 + cdef double mu_i, ga_i, e_i # mu, gamma, e + cdef double al_i, al_i_minus_1, al_i_plus_1 # alpha + cdef double be_i, be_i_minus_1, be_i_plus_1 # beta # === Factorization === From f062b886c3b8218460f651fc742f75a6df305744 Mon Sep 17 00:00:00 2001 From: MothNik Date: Sat, 8 Jun 2024 17:26:08 +0200 Subject: [PATCH 19/62] style: [11] made - signs better readable --- src/pentapy/solver.pyx | 64 +++++++++++++++++++++--------------------- 1 file changed, 32 insertions(+), 32 deletions(-) diff --git a/src/pentapy/solver.pyx b/src/pentapy/solver.pyx index 9d45c90..ea15048 100644 --- a/src/pentapy/solver.pyx +++ b/src/pentapy/solver.pyx @@ -130,7 +130,7 @@ cdef void c_penta_factorize_algo1( mat_factorized[1, 4] = be_i # Central rows - for iter_row in range(2, mat_n_rows-2): + for iter_row in range(2, mat_n_rows - 2): e_i = mat_flat[4, iter_row] ga_i = mat_flat[3, iter_row] - al_i_minus_1 * e_i mu_i = mat_flat[2, iter_row] - be_i_minus_1 * e_i - al_i * ga_i @@ -150,27 +150,27 @@ cdef void c_penta_factorize_algo1( mat_factorized[iter_row, 4] = be_i # Second to last row - e_i = mat_flat[4, mat_n_rows-2] - ga_i = mat_flat[3, mat_n_rows-2] - al_i_minus_1 * e_i - mu_i = mat_flat[2, mat_n_rows-2] - be_i_minus_1 * e_i - al_i * ga_i - al_i_plus_1 = (mat_flat[1, mat_n_rows-2] - be_i * ga_i) / mu_i + e_i = mat_flat[4, mat_n_rows - 2] + ga_i = mat_flat[3, mat_n_rows - 2] - al_i_minus_1 * e_i + mu_i = mat_flat[2, mat_n_rows - 2] - be_i_minus_1 * e_i - al_i * ga_i + al_i_plus_1 = (mat_flat[1, mat_n_rows - 2] - be_i * ga_i) / mu_i - mat_factorized[mat_n_rows-2, 0] = e_i - mat_factorized[mat_n_rows-2, 1] = mu_i - mat_factorized[mat_n_rows-2, 2] = ga_i - mat_factorized[mat_n_rows-2, 3] = al_i_plus_1 - mat_factorized[mat_n_rows-2, 4] = 0.0 + mat_factorized[mat_n_rows - 2, 0] = e_i + mat_factorized[mat_n_rows - 2, 1] = mu_i + mat_factorized[mat_n_rows - 2, 2] = ga_i + mat_factorized[mat_n_rows - 2, 3] = al_i_plus_1 + mat_factorized[mat_n_rows - 2, 4] = 0.0 # Last Row - e_i = mat_flat[4, mat_n_rows-1] - ga_i = mat_flat[3, mat_n_rows-1] - al_i * e_i - mu_i = mat_flat[2, mat_n_rows-1] - be_i * e_i - al_i_plus_1 * ga_i + e_i = mat_flat[4, mat_n_rows - 1] + ga_i = mat_flat[3, mat_n_rows - 1] - al_i * e_i + mu_i = mat_flat[2, mat_n_rows - 1] - be_i * e_i - al_i_plus_1 * ga_i - mat_factorized[mat_n_rows-1, 0] = e_i - mat_factorized[mat_n_rows-1, 1] = mu_i - mat_factorized[mat_n_rows-1, 2] = ga_i - mat_factorized[mat_n_rows-1, 3] = 0.0 - mat_factorized[mat_n_rows-1, 4] = 0.0 + mat_factorized[mat_n_rows - 1, 0] = e_i + mat_factorized[mat_n_rows - 1, 1] = mu_i + mat_factorized[mat_n_rows - 1, 2] = ga_i + mat_factorized[mat_n_rows - 1, 3] = 0.0 + mat_factorized[mat_n_rows - 1, 4] = 0.0 return @@ -207,7 +207,7 @@ cdef void c_solve_penta_from_factorize_algo_1( result_view[1] = ze_i # Central rows - for iter_row in range(2, mat_n_rows-2): + for iter_row in range(2, mat_n_rows - 2): ze_i_plus_1 = ( rhs_single[iter_row] - ze_i_minus_1 * mat_factorized[iter_row, 0] @@ -219,30 +219,30 @@ cdef void c_solve_penta_from_factorize_algo_1( # Second to last row ze_i_plus_1 = ( - rhs_single[mat_n_rows-2] - - ze_i_minus_1 * mat_factorized[mat_n_rows-2, 0] - - ze_i * mat_factorized[mat_n_rows-2, 2] - ) / mat_factorized[mat_n_rows-2, 1] + rhs_single[mat_n_rows - 2] + - ze_i_minus_1 * mat_factorized[mat_n_rows - 2, 0] + - ze_i * mat_factorized[mat_n_rows - 2, 2] + ) / mat_factorized[mat_n_rows - 2, 1] ze_i_minus_1 = ze_i ze_i = ze_i_plus_1 - result_view[mat_n_rows-2] = ze_i_plus_1 + result_view[mat_n_rows - 2] = ze_i_plus_1 # Last row ze_i_plus_1 = ( - rhs_single[mat_n_rows-1] - - ze_i_minus_1 * mat_factorized[mat_n_rows-1, 0] - - ze_i * mat_factorized[mat_n_rows-1, 2] - ) / mat_factorized[mat_n_rows-1, 1] - result_view[mat_n_rows-1] = ze_i_plus_1 + rhs_single[mat_n_rows - 1] + - ze_i_minus_1 * mat_factorized[mat_n_rows - 1, 0] + - ze_i * mat_factorized[mat_n_rows - 1, 2] + ) / mat_factorized[mat_n_rows - 1, 1] + result_view[mat_n_rows - 1] = ze_i_plus_1 # === Backward substitution === # The solution vector is calculated by backward substitution that overwrites the # right-hand side vector with the solution vector - ze_i -= mat_factorized[mat_n_rows-2, 3] * ze_i_plus_1 - result_view[mat_n_rows-2] = ze_i + ze_i -= mat_factorized[mat_n_rows - 2, 3] * ze_i_plus_1 + result_view[mat_n_rows - 2] = ze_i - for iter_row in range(mat_n_rows-3, -1, -1): + for iter_row in range(mat_n_rows - 3, -1, -1): result_view[iter_row] -= ( mat_factorized[iter_row, 3] * ze_i + mat_factorized[iter_row, 4] * ze_i_plus_1 From 15c787e19c2396f18eb16d4a7717d5739b516632 Mon Sep 17 00:00:00 2001 From: MothNik Date: Sat, 8 Jun 2024 18:56:44 +0200 Subject: [PATCH 20/62] doc/refactor: [11] improved docs and comments of algorithm I; removed `uint` --- src/pentapy/solver.pyx | 30 +++++++++++++++++------------- 1 file changed, 17 insertions(+), 13 deletions(-) diff --git a/src/pentapy/solver.pyx b/src/pentapy/solver.pyx index ea15048..451eee9 100644 --- a/src/pentapy/solver.pyx +++ b/src/pentapy/solver.pyx @@ -11,7 +11,7 @@ implemented in Cython. import numpy as np cimport numpy as np -from libc.stdint cimport int64_t, uint64_t +from libc.stdint cimport int64_t # === Main Python Interface === @@ -40,12 +40,16 @@ cdef double[:, :] c_penta_solver1(double[:, :] mat_flat, double[:, :] rhs): """ - cdef uint64_t mat_n_rows = mat_flat.shape[1] - cdef uint64_t rhs_n_cols = rhs.shape[1] - cdef uint64_t iter_col + # === Variable declarations === + + cdef int64_t mat_n_rows = mat_flat.shape[1] + cdef int64_t rhs_n_cols = rhs.shape[1] + cdef int64_t iter_col cdef double[::, ::1] result = np.empty(shape=(mat_n_rows, rhs_n_cols)) cdef double[::, ::1] mat_factorized = np.empty(shape=(mat_n_rows, 5)) + # === Solving the system of equations === + # first, the matrix is factorized c_penta_factorize_algo1( mat_flat, @@ -67,7 +71,7 @@ cdef double[:, :] c_penta_solver1(double[:, :] mat_flat, double[:, :] rhs): cdef void c_penta_factorize_algo1( double[:, :] mat_flat, - uint64_t mat_n_rows, + int64_t mat_n_rows, double[::, ::1] mat_factorized, ): """ @@ -82,14 +86,14 @@ cdef void c_penta_factorize_algo1( They are overwriting the memoryview ``mat_factorized`` as follows: ```bash - [[ * mu_0 * al_0 be_0 ] - [ * mu_1 ga_1 al_1 be_1 ] - [ e_2 mu_2 ga_2 al_2 be_2 ] + [[ * mu_0 * al_0 be_0 ] + [ * mu_1 ga_1 al_1 be_1 ] + [ e_2 mu_2 ga_2 al_2 be_2 ] ... [ e_i mu_i ga_i al_i be_i ] - ... - [ e_{n-2} mu_{n-2} ga_{n-2} al_{n-2} * ] - [ e_{n-1} mu_{n-1} ga_{n-1} * * ]] + [ e_{n-3} mu_{n-3} ga_{n-3} al_{n-3} be_{n-3} ] ... + [ e_{n-2} mu_{n-2} ga_{n-2} al_{n-2} * ] + [ e_{n-1} mu_{n-1} ga_{n-1} * * ]] ``` where the entries marked with ``*`` are not used by design, but overwritten with @@ -99,7 +103,7 @@ cdef void c_penta_factorize_algo1( # === Variable declarations === - cdef uint64_t iter_row + cdef int64_t iter_row cdef double mu_i, ga_i, e_i # mu, gamma, e cdef double al_i, al_i_minus_1, al_i_plus_1 # alpha cdef double be_i, be_i_minus_1, be_i_plus_1 # beta @@ -176,7 +180,7 @@ cdef void c_penta_factorize_algo1( cdef void c_solve_penta_from_factorize_algo_1( - uint64_t mat_n_rows, + int64_t mat_n_rows, double[::, ::1] mat_factorized, double[::] rhs_single, double[::] result_view, From ef7ec51a6eceda76a54c39b83b912baeafe2f4bf Mon Sep 17 00:00:00 2001 From: MothNik Date: Sat, 8 Jun 2024 19:45:01 +0200 Subject: [PATCH 21/62] wip: [11] restructured chaotic alias comparisons --- src/pentapy/_models.py | 49 +++++++++++++++++ src/pentapy/core.py | 122 ++++++++++++++++++++++++----------------- 2 files changed, 121 insertions(+), 50 deletions(-) create mode 100644 src/pentapy/_models.py diff --git a/src/pentapy/_models.py b/src/pentapy/_models.py new file mode 100644 index 0000000..c75eb8c --- /dev/null +++ b/src/pentapy/_models.py @@ -0,0 +1,49 @@ +""" +Auxiliary models for the pentapy package. + +""" + +# === Imports === + +from enum import IntEnum +from typing import Dict + +# === Models === + + +class PentaSolverAliases(IntEnum): + """ + Defines all available solver aliases for pentadiagonal systems, namely + + - ``PTRANS_I``: The PTRANS-I algorithm + - ``PTRANS_II``: The PTRANS-II algorithm + - ``LAPACK``: Scipy's LAPACK solver :func:`scipy.linalg.solve_banded` + - ``SUPER_LU``: Scipy's SuperLU solver :func:`scipy.sparse.linalg.spsolve(..., use_umfpack=False)` + - ``UMFPACK``: Scipy's UMFpack solver :func:`scipy.sparse.linalg.spsolve(..., use_umfpack=True)` + + """ # noqa: E501 + + PTRANS_I = 1 + PTRANS_II = 2 + LAPACK = 3 + SUPER_LU = 4 + UMFPACK = 5 + + +# === Constants === + +_SOLVER_ALIAS_CONVERSIONS: Dict[str, PentaSolverAliases] = { + "1": PentaSolverAliases.PTRANS_I, + "ptrans-i": PentaSolverAliases.PTRANS_I, + "2": PentaSolverAliases.PTRANS_II, + "ptrans-ii": PentaSolverAliases.PTRANS_II, + "3": PentaSolverAliases.LAPACK, + "lapack": PentaSolverAliases.LAPACK, + "solve_banded": PentaSolverAliases.LAPACK, + "4": PentaSolverAliases.SUPER_LU, + "spsolve": PentaSolverAliases.SUPER_LU, + "5": PentaSolverAliases.UMFPACK, + "spsolve_umf": PentaSolverAliases.UMFPACK, + "umf": PentaSolverAliases.UMFPACK, + "umf_pack": PentaSolverAliases.UMFPACK, +} diff --git a/src/pentapy/core.py b/src/pentapy/core.py index 2393122..ccf0c8f 100644 --- a/src/pentapy/core.py +++ b/src/pentapy/core.py @@ -1,15 +1,48 @@ """The core module of pentapy.""" # pylint: disable=C0103, C0415, R0911, E0611 + +# === Imports === + import warnings +from typing import Literal import numpy as np +from pentapy import _models as pmodels +from pentapy import solver as psolver # type: ignore from pentapy import tools as ptools -from pentapy.solver import penta_solver1, penta_solver2 - -def solve(mat, rhs, is_flat=False, index_row_wise=True, solver=1): +# === Solver === + + +def solve( + mat: np.ndarray, + rhs: np.ndarray, + is_flat: bool = False, + index_row_wise: bool = True, + solver: Literal[ + 1, + "1", + "PTRANS-I", + "ptrans-i", + 2, + "2", + "PTRANS-II", + "ptrans-ii", + 3, + "3", + "lapack", + 4, + "4", + "spsolve", + 5, + "5", + "spsolve_umf", + "umf", + "umf_pack", + ] = 1, +) -> np.ndarray: """ Solver for a pentadiagonal system. @@ -39,35 +72,39 @@ def solve(mat, rhs, is_flat=False, index_row_wise=True, solver=1): Parameters ---------- - mat : :class:`numpy.ndarray` - The Matrix or the flattened Version of the pentadiagonal matrix. - rhs : :class:`numpy.ndarray` - The right hand side of the equation system. - is_flat : :class:`bool`, optional + mat : :class:`numpy.ndarray` of shape (m, m) or (5, m) + The full or flattened version of the pentadiagonal matrix. + rhs : :class:`numpy.ndarray` of shape (m,) or (m, n) + The right hand side(s) of the equation system. Its shape is preserved. + is_flat : :class:`bool`, default=False State if the matrix is already flattend. Default: ``False`` - index_row_wise : :class:`bool`, optional + index_row_wise : :class:`bool`, default=True State if the flattend matrix is row-wise flattend. Default: ``True`` - solver : :class:`int` or :class:`str`, optional + solver : :class:`int` or :class:`str`, default=1 Which solver should be used. The following are provided: - * ``[1, "1", "PTRANS-I"]`` : The PTRANS-I algorithm + * ``[1, "1", "PTRANS-I"]`` : The PTRANS-I algorithm (default) * ``[2, "2", "PTRANS-II"]`` : The PTRANS-II algorithm - * ``[3, "3", "lapack", "solve_banded"]`` : - scipy.linalg.solve_banded - * ``[4, "4", "spsolve"]`` : - The scipy sparse solver without umf_pack - * ``[5, "5", "spsolve_umf", "umf", "umf_pack"]`` : - The scipy sparse solver with umf_pack + * ``[3, "3", "lapack", "solve_banded"]`` : :func:`scipy.linalg.solve_banded` + * ``[4, "4", "spsolve"]`` : :func:`scipy.sparse.linalg.spsolve(..., use_umfpack=False)` + * ``[5, "5", "spsolve_umf", "umf", "umf_pack"]`` : :func:`scipy.sparse.linalg.spsolve(..., use_umfpack=False)` - Default: ``1`` + Strings are not case-sensitive. Returns ------- - result : :class:`numpy.ndarray` - Solution of the equation system + result : :class:`numpy.ndarray` of shape (m,) or (m, n) + Solution of the equation system with the same shape as ``rhs``. + """ - if solver in [1, "1", "PTRANS-I"]: + # first, the solver is converted to the internal name to avoid confusion + solver_inter = pmodels._SOLVER_ALIAS_CONVERSIONS[str(solver).lower()] + + if solver_inter in { + pmodels.PentaSolverAliases.PTRANS_I, + pmodels.PentaSolverAliases.PTRANS_II, + }: if is_flat and index_row_wise: mat_flat = np.asarray(mat, dtype=np.double) ptools._check_penta(mat_flat) @@ -97,35 +134,23 @@ def solve(mat, rhs, is_flat=False, index_row_wise=True, solver=1): rhs = rhs[:, np.newaxis] try: + solver_func = ( + psolver.penta_solver1 + if solver_inter == pmodels.PentaSolverAliases.PTRANS_I + else psolver.penta_solver2 + ) + # if there was only a 1D right-hand side, the result has to be flattened if single_rhs: - return penta_solver1(mat_flat, rhs).ravel() + return solver_func(mat_flat, rhs).ravel() - return penta_solver1(mat_flat, rhs) + return solver_func(mat_flat, rhs) except ZeroDivisionError: warnings.warn("pentapy: PTRANS-I not suitable for input-matrix.") return np.full(shape=rhs_og_shape, fill_value=np.nan) - elif solver in [2, "2", "PTRANS-II"]: - if is_flat and index_row_wise: - mat_flat = np.asarray(mat, dtype=np.double) - ptools._check_penta(mat_flat) - elif is_flat: - mat_flat = np.array(mat, dtype=np.double) - ptools._check_penta(mat_flat) - ptools.shift_banded(mat_flat, copy=False) - else: - mat_flat = ptools.create_banded(mat, col_wise=False, dtype=np.double) - - rhs = np.asarray(rhs, dtype=np.double) - - try: - return penta_solver2(mat_flat, rhs) - except ZeroDivisionError: - warnings.warn("pentapy: PTRANS-II not suitable for input-matrix.") - return np.full_like(rhs, np.nan) - elif solver in [3, "3", "lapack", "solve_banded"]: # pragma: no cover + elif solver_inter == pmodels.PentaSolverAliases.LAPACK: # pragma: no cover try: from scipy.linalg import solve_banded except ImportError as imp_err: # pragma: no cover @@ -140,7 +165,8 @@ def solve(mat, rhs, is_flat=False, index_row_wise=True, solver=1): else: mat_flat = ptools.create_banded(mat) return solve_banded((2, 2), mat_flat, rhs) - elif solver in [4, "4", "spsolve"]: # pragma: no cover + + elif solver_inter == pmodels.PentaSolverAliases.SUPER_LU: # pragma: no cover try: from scipy import sparse as sps from scipy.sparse.linalg import spsolve @@ -158,13 +184,8 @@ def solve(mat, rhs, is_flat=False, index_row_wise=True, solver=1): size = mat_flat.shape[1] M = sps.spdiags(mat_flat, [2, 1, 0, -1, -2], size, size, format="csc") return spsolve(M, rhs, use_umfpack=False) - elif solver in [ - 5, - "5", - "spsolve_umf", - "umf", - "umf_pack", - ]: # pragma: no cover + + elif solver_inter == pmodels.PentaSolverAliases.UMFPACK: # pragma: no cover try: from scipy import sparse as sps from scipy.sparse.linalg import spsolve @@ -182,6 +203,7 @@ def solve(mat, rhs, is_flat=False, index_row_wise=True, solver=1): size = mat_flat.shape[1] M = sps.spdiags(mat_flat, [2, 1, 0, -1, -2], size, size, format="csc") return spsolve(M, rhs, use_umfpack=True) + else: # pragma: no cover msg = f"pentapy.solve: unknown solver ({solver})" raise ValueError(msg) From 94d110ce03f676fe9ba6643d1f5ef69788cf715b Mon Sep 17 00:00:00 2001 From: MothNik Date: Sat, 8 Jun 2024 20:15:26 +0200 Subject: [PATCH 22/62] feat: [11] finalized multiple right-hand side support (serial) on Cython level; fixed typos --- src/pentapy/solver.pxd | 4 +- src/pentapy/solver.pyx | 262 ++++++++++++++++++++++++++++++++++------- 2 files changed, 222 insertions(+), 44 deletions(-) diff --git a/src/pentapy/solver.pxd b/src/pentapy/solver.pxd index 05d249f..b16f8a0 100644 --- a/src/pentapy/solver.pxd +++ b/src/pentapy/solver.pxd @@ -1,4 +1,4 @@ # cython: language_level=3 -cdef double[:, :] c_penta_solver1(double[:, :] mat_flat, double[:, :] rhs) +cdef double[::, ::] c_penta_solver1(double[::, ::] mat_flat, double[::, ::] rhs) -cdef double[:] c_penta_solver2(double[:, :] mat_flat, double[:] rhs) +cdef double[::, ::] c_penta_solver2(double[::, ::] mat_flat, double[::, ::] rhs) diff --git a/src/pentapy/solver.pyx b/src/pentapy/solver.pyx index 451eee9..1494a25 100644 --- a/src/pentapy/solver.pyx +++ b/src/pentapy/solver.pyx @@ -1,4 +1,4 @@ -# cython: language_level=3, boundscheck=True, wraparound=False, cdivision=False +# cython: language_level=3, boundscheck=False, wraparound=False, cdivision=False """ This is a solver linear equation systems with a penta-diagonal matrix, @@ -17,18 +17,18 @@ from libc.stdint cimport int64_t # === Main Python Interface === -def penta_solver1(double[:, :] mat_flat, double[:, :] rhs): +def penta_solver1(double[::, ::] mat_flat, double[::, ::] rhs): return np.asarray(c_penta_solver1(mat_flat, rhs)) -def penta_solver2(double[:, :] mat_flat, double[:] rhs): +def penta_solver2(double[::, ::] mat_flat, double[::, ::] rhs): return np.asarray(c_penta_solver2(mat_flat, rhs)) # === Solver Algorithm 1 === -cdef double[:, :] c_penta_solver1(double[:, :] mat_flat, double[:, :] rhs): +cdef double[::, ::] c_penta_solver1(double[::, ::] mat_flat, double[::, ::] rhs): """ Solves the pentadiagonal system of equations ``Ax = b`` with the matrix ``A`` and the right-hand side ``b`` by @@ -70,7 +70,7 @@ cdef double[:, :] c_penta_solver1(double[:, :] mat_flat, double[:, :] rhs): cdef void c_penta_factorize_algo1( - double[:, :] mat_flat, + double[::, ::] mat_flat, int64_t mat_n_rows, double[::, ::1] mat_factorized, ): @@ -260,53 +260,231 @@ cdef void c_solve_penta_from_factorize_algo_1( # === Solver Algorithm 2 === -cdef double[:] c_penta_solver2(double[:, :] mat_flat, double[:] rhs): +cdef double[::, ::] c_penta_solver2(double[::, ::] mat_flat, double[::, ::] rhs): + """ + Solves the pentadiagonal system of equations ``Ax = b`` with the matrix ``A`` and + the right-hand side ``b`` by + + - factorizing the matrix ``A`` into auxiliary coefficients and a unit lower + triangular matrix ``L`` + - transforming the right-hand side into a vector ``omega`` + - solving the system of equations ``Lx = omega`` by backward substitution + + """ + + # Variable declarations + + cdef int64_t mat_n_rows = mat_flat.shape[1] + cdef int64_t rhs_n_cols = rhs.shape[1] + cdef int64_t iter_col + cdef double[::, ::1] result = np.empty(shape=(mat_n_rows, rhs_n_cols)) + cdef double[::, ::1] mat_factorized = np.empty(shape=(mat_n_rows, 5)) + + # first, the matrix is factorized + c_penta_factorize_algo2( + mat_flat, + mat_n_rows, + mat_factorized, + ) - cdef int mat_j = mat_flat.shape[1] + # then, all the right-hand sides are solved + for iter_col in range(rhs_n_cols): + c_solve_penta_from_factorize_algo_2( + mat_n_rows, + mat_factorized, + rhs[::, iter_col], + result[::, iter_col], + ) - cdef double[:] result = np.zeros(mat_j) + return result - cdef double[:] ps = np.zeros(mat_j) # psi - cdef double[:] si = np.zeros(mat_j) # sigma - cdef double[:] ph = np.zeros(mat_j) # phi - cdef double[:] ro = np.zeros(mat_j) # rho - cdef double[:] we = np.zeros(mat_j) # w +cdef void c_penta_factorize_algo2( + double[::, ::] mat_flat, + int64_t mat_n_rows, + double[::, ::1] mat_factorized, +): + """ + Factorizes the pentadiagonal matrix ``A`` into - cdef int i + - auxiliary coefficients ``psi`` (``ps``), ``rho`` and ``b`` for the transformation + of the right-hand side + - a unit lower triangular matrix with the main diagonals ``phi`` and ``sigma`` + (``si``) for the following forward substitution. Its unit main diagonal is + implicit. - ps[mat_j-1] = mat_flat[2, mat_j-1] - si[mat_j-1] = mat_flat[3, mat_j-1] / ps[mat_j-1] - ph[mat_j-1] = mat_flat[4, mat_j-1] / ps[mat_j-1] - we[mat_j-1] = rhs[mat_j-1] / ps[mat_j-1] + They are overwriting the memoryview ``mat_factorized`` as follows: - ro[mat_j-2] = mat_flat[1, mat_j-2] - ps[mat_j-2] = mat_flat[2, mat_j-2] - si[mat_j-1] * ro[mat_j-2] - si[mat_j-2] = (mat_flat[3, mat_j-2] - ph[mat_j-1] * ro[mat_j-2]) / ps[mat_j-2] - ph[mat_j-2] = mat_flat[4, mat_j-2] / ps[mat_j-2] - we[mat_j-2] = (rhs[mat_j-2] - we[mat_j-1] * ro[mat_j-2]) / ps[mat_j-2] + ```bash + [[ * * ps_0 rho_0 b_i ] + [ * si_1 ps_1 rho_1 b_1 ] + [ phi_2 si_2 ps_2 rho_2 b_2 ] + ... + [ phi_i si_i ps_i rho_i b_i ] + ... + [ phi_{n-3} si_{n-3} ps_{n-3} rho_{n-3} b_{n-3} ] + [ phi_{n-2} si_{n-2} ps_{n-2} rho_{n-2} * ] + [ phi_{n-1} si_{n-1} ps_{n-1} * * ]] + ``` - for i in range(mat_j-3, 1, -1): - ro[i] = mat_flat[1, i] - si[i+2] * mat_flat[0, i] - ps[i] = mat_flat[2, i] - ph[i+2] * mat_flat[0, i] - si[i+1] * ro[i] - si[i] = (mat_flat[3, i] - ph[i+1] * ro[i]) / ps[i] - ph[i] = mat_flat[4, i] / ps[i] - we[i] = (rhs[i] - we[i+2] * mat_flat[0, i] - we[i+1] * ro[i]) / ps[i] + where the entries marked with ``*`` are not used by design, but overwritten with + zeros. - ro[1] = mat_flat[1, 1] - si[3] * mat_flat[0, 1] - ps[1] = mat_flat[2, 1] - ph[3] * mat_flat[0, 1] - si[2] * ro[1] - si[1] = (mat_flat[3, 1] - ph[2] * ro[1]) / ps[1] + """ - ro[0] = mat_flat[1, 0] - si[2] * mat_flat[0, 0] - ps[0] = mat_flat[2, 0] - ph[2] * mat_flat[0, 0] - si[1] * ro[0] + # === Variable declarations === - we[1] = (rhs[1] - we[3] * mat_flat[0, 1] - we[2] * ro[1]) / ps[1] - we[0] = (rhs[0] - we[2] * mat_flat[0, 0] - we[1] * ro[0]) / ps[0] + cdef int64_t iter_row + cdef double ps_i, rho_i # psi, rho + cdef double si_i, si_i_minus_1, si_i_plus_1 # sigma + cdef double phi_i, phi_i_minus_1, phi_i_plus_1 # phi - # Foreward substitution - result[0] = we[0] - result[1] = we[1] - si[1] * result[0] + # === Factorization === - for i in range(2, mat_j): - result[i] = we[i] - si[i] * result[i-1] - ph[i] * result[i-2] + # First row + ps_i = mat_flat[2, mat_n_rows - 1] + si_i_plus_1 = mat_flat[3, mat_n_rows - 1] / ps_i + phi_i_plus_1 = mat_flat[4, mat_n_rows - 1] / ps_i - return result + mat_factorized[mat_n_rows - 1, 0] = phi_i_plus_1 + mat_factorized[mat_n_rows - 1, 1] = si_i_plus_1 + mat_factorized[mat_n_rows - 1, 2] = ps_i + mat_factorized[mat_n_rows - 1, 3] = 0.0 + mat_factorized[mat_n_rows - 1, 4] = 0.0 + + # Second row + rho_i = mat_flat[1, mat_n_rows-2] + ps_i = mat_flat[2, mat_n_rows-2] - si_i_plus_1 * rho_i + si_i = (mat_flat[3, mat_n_rows-2] - phi_i_plus_1 * rho_i) / ps_i + phi_i = mat_flat[4, mat_n_rows-2] / ps_i + + mat_factorized[mat_n_rows - 2, 0] = phi_i + mat_factorized[mat_n_rows - 2, 1] = si_i + mat_factorized[mat_n_rows - 2, 2] = ps_i + mat_factorized[mat_n_rows - 2, 3] = rho_i + mat_factorized[mat_n_rows - 2, 4] = 0.0 + + # Central rows + for iter_row in range(mat_n_rows-3, 1, -1): + b_i = mat_flat[0, iter_row] + rho_i = mat_flat[1, iter_row] - si_i_plus_1 * b_i + ps_i = mat_flat[2, iter_row] - phi_i_plus_1 * b_i - si_i * rho_i + si_i_minus_1 = (mat_flat[3, iter_row] - phi_i * rho_i) / ps_i + si_i_plus_1 = si_i + si_i = si_i_minus_1 + phi_i_minus_1 = mat_flat[4, iter_row] / ps_i + phi_i_plus_1 = phi_i + phi_i = phi_i_minus_1 + + mat_factorized[iter_row, 0] = phi_i + mat_factorized[iter_row, 1] = si_i + mat_factorized[iter_row, 2] = ps_i + mat_factorized[iter_row, 3] = rho_i + mat_factorized[iter_row, 4] = b_i + + # Second to last row + b_i = mat_flat[0, 1] + rho_i = mat_flat[1, 1] - si_i_plus_1 * b_i + ps_i = mat_flat[2, 1] - phi_i_plus_1 * b_i - si_i * rho_i + si_i_minus_1 = (mat_flat[3, 1] - phi_i * rho_i) / ps_i + si_i_plus_1 = si_i + si_i = si_i_minus_1 + + mat_factorized[1, 0] = 0.0 + mat_factorized[1, 1] = si_i + mat_factorized[1, 2] = ps_i + mat_factorized[1, 3] = rho_i + mat_factorized[1, 4] = b_i + + # Last row + b_i = mat_flat[0, 0] + rho_i = mat_flat[1, 0] - si_i_plus_1 * b_i + ps_i = mat_flat[2, 0] - phi_i * b_i - si_i * rho_i + + mat_factorized[0, 0] = 0.0 + mat_factorized[0, 1] = 0.0 + mat_factorized[0, 2] = ps_i + mat_factorized[0, 3] = rho_i + mat_factorized[0, 4] = b_i + + return + + +cdef void c_solve_penta_from_factorize_algo_2( + int64_t mat_n_rows, + double[::, ::1] mat_factorized, + double[::] rhs_single, + double[::] result_view, +): + """ + Solves the pentadiagonal system of equations ``Ax = b`` with the factorized + unit lower triangular matrix ``L`` and the right-hand side ``b``. + It overwrites the right-hand side ``b`` first with the transformed vector ``omega`` + and then with the solution vector ``x`` for ``Lx = omega``. + + """ + + # === Variable declarations === + + cdef int64_t iter_row + cdef double om_i, om_i_minus_1, om_i_minus_2 # omega + + # === Transformation === + + # first, the right-hand side is transformed into the vector ``omega`` + # First row + om_i_plus_1 = rhs_single[mat_n_rows-1] / mat_factorized[mat_n_rows - 1, 2] + result_view[mat_n_rows-1] = om_i_plus_1 + + # Second row + om_i = ( + rhs_single[mat_n_rows-2] + - om_i_plus_1 * mat_factorized[mat_n_rows - 2, 3] + ) / mat_factorized[mat_n_rows - 2, 2] + result_view[mat_n_rows-2] = om_i + + # Central rows + for iter_row in range(mat_n_rows-3, 1, -1): + om_i_minus_1 = ( + rhs_single[iter_row] + - om_i_plus_1 * mat_factorized[iter_row, 4] + - om_i * mat_factorized[iter_row, 3] + ) / mat_factorized[iter_row, 2] + om_i_plus_1 = om_i + om_i = om_i_minus_1 + result_view[iter_row] = om_i + + # Second to last row + om_i_minus_1 = ( + rhs_single[1] + - om_i_plus_1 * mat_factorized[1, 4] + - om_i * mat_factorized[1, 3] + ) / mat_factorized[1, 2] + om_i_plus_1 = om_i + om_i = om_i_minus_1 + result_view[1] = om_i + + # Last row + om_i_minus_1 = ( + rhs_single[0] + - om_i_plus_1 * mat_factorized[0, 4] + - om_i * mat_factorized[0, 3] + ) / mat_factorized[0, 2] + result_view[0] = om_i_minus_1 + + # === Forward substitution === + + # The solution vector is calculated by forward substitution that overwrites the + # right-hand side vector with the solution vector + om_i -= mat_factorized[1, 1] * om_i_minus_1 + result_view[1] = om_i + + for iter_row in range(2, mat_n_rows): + result_view[iter_row] = ( + result_view[iter_row] + - mat_factorized[iter_row, 0] * om_i_minus_1 + - mat_factorized[iter_row, 1] * om_i + ) + om_i_minus_1 = om_i + om_i = result_view[iter_row] + + return \ No newline at end of file From ba5945f3e1b8ba10862d6c4e7302aa3e0f4dec00 Mon Sep 17 00:00:00 2001 From: MothNik Date: Sat, 8 Jun 2024 20:16:15 +0200 Subject: [PATCH 23/62] tests: [11] unified test for both pentapy algorithms, made tests a lot more extensive --- ...t_solver_1.py => test_solvers_internal.py} | 22 +++++++++++++------ 1 file changed, 15 insertions(+), 7 deletions(-) rename tests/{test_solver_1.py => test_solvers_internal.py} (82%) diff --git a/tests/test_solver_1.py b/tests/test_solvers_internal.py similarity index 82% rename from tests/test_solver_1.py rename to tests/test_solvers_internal.py index 78956f6..2b543cf 100644 --- a/tests/test_solver_1.py +++ b/tests/test_solvers_internal.py @@ -1,5 +1,6 @@ """ -Test suite for testing the pentadiagonal solver based on Algorithm PTRANS-I. +Test suite for testing the pentadiagonal solver based on either Algorithm PTRANS-I or +PTRANS-II. """ @@ -40,13 +41,17 @@ 10_000, 10_001, ] -REF_WARNING = "pentapy: PTRANS-I not suitable for input-matrix." +REF_WARNING_CONTENT = "not suitable for input-matrix." +SOLVER_ALIASES_PTRANS_I = [1, "1", "PTRANS-I", "ptrans-i"] +SOLVER_ALIASES_PTRANS_II = [2, "2", "PTRANS-II", "ptrans-ii"] # === Tests === @pytest.mark.parametrize("induce_error", [False, True]) -@pytest.mark.parametrize("solver_alias", [1]) # "1", "PTRANS-I"]) +@pytest.mark.parametrize( + "solver_alias", SOLVER_ALIASES_PTRANS_I + SOLVER_ALIASES_PTRANS_II +) @pytest.mark.parametrize("input_layout", ["full", "banded_row_wise", "banded_col_wise"]) @pytest.mark.parametrize("n_rhs", [None, 1, 10]) @pytest.mark.parametrize("n_rows", N_ROWS) @@ -72,17 +77,20 @@ def test_penta_solver1( ill_conditioned=False, ) - # an error is induced by setting the first diagonal element to zero + # an error is induced by setting the first or last diagonal element to zero if induce_error: # the induction of the error is only possible if the matrix does not have # only 3 rows if n_rows == 3: pytest.skip( "Only 3 rows, cannot induce error because this will not go into " - "PTRANS-I, but NumPy" + "PTRANS-I, but NumPy." ) - mat_full[0, 0] = 0.0 + if solver_alias in SOLVER_ALIASES_PTRANS_I: + mat_full[0, 0] = 0.0 + else: + mat_full[n_rows - 1, n_rows - 1] = 0.0 # the right-hand side is generated np.random.seed(SEED) @@ -119,7 +127,7 @@ def test_penta_solver1( # Case 1: in case of an error, a warning has to be issued and the result has to # be NaN if induce_error: - with pytest.warns(UserWarning, match=REF_WARNING): + with pytest.warns(UserWarning, match=REF_WARNING_CONTENT): sol = pp.solve( mat=mat, rhs=rhs, From 026e8eab05dfe8efffe3d0eff0348db5e2728c13 Mon Sep 17 00:00:00 2001 From: MothNik Date: Sat, 8 Jun 2024 20:24:26 +0200 Subject: [PATCH 24/62] tests: [11] removed superficial unittest --- tests/test_pentapy.py | 115 ------------------------------------------ 1 file changed, 115 deletions(-) delete mode 100755 tests/test_pentapy.py diff --git a/tests/test_pentapy.py b/tests/test_pentapy.py deleted file mode 100755 index 558de5e..0000000 --- a/tests/test_pentapy.py +++ /dev/null @@ -1,115 +0,0 @@ -""" -This is the unittest for pentapy. -""" - -import unittest - -# import platform -import warnings - -import numpy as np - -import pentapy as pp - -warnings.simplefilter("always") - - -class TestPentapy(unittest.TestCase): - def setUp(self): - self.seed = 19031977 - self.size = 1000 - self.rand = np.random.RandomState(self.seed) - self.mat = (self.rand.rand(5, self.size) - 0.5) * 1e-5 - self.rhs = self.rand.rand(self.size) * 1e5 - - def test_tools(self): - self.mat_int = np.zeros((100, 100), dtype=int) - # fill bands of pentadiagonal matrix - self.mat_int[pp.diag_indices(100, 0)] = self.rand.randint(1, 1000, size=100) - self.mat_int[pp.diag_indices(100, 1)] = self.rand.randint(1, 1000, size=99) - self.mat_int[pp.diag_indices(100, 2)] = self.rand.randint(1, 1000, size=98) - self.mat_int[pp.diag_indices(100, -1)] = self.rand.randint(1, 1000, size=99) - self.mat_int[pp.diag_indices(100, -2)] = self.rand.randint(1, 1000, size=98) - # create banded - self.mat_int_col = pp.create_banded(self.mat_int) - self.mat_int_row = pp.create_banded(self.mat_int, col_wise=False) - # create full - self.mat_int_col_ful = pp.create_full(self.mat_int_col, col_wise=True) - self.mat_int_row_ful = pp.create_full(self.mat_int_row, col_wise=False) - # shifting - self.mat_shift_cr = pp.shift_banded(self.mat_int_col) - self.mat_shift_rc = pp.shift_banded(self.mat_int_row, col_to_row=False) - # in place shifting - self.mat_int_col_ip = pp.create_banded(self.mat_int) - self.mat_int_row_ip = pp.create_banded(self.mat_int, col_wise=False) - pp.shift_banded(self.mat_int_col_ip, copy=False) - pp.shift_banded(self.mat_int_row_ip, copy=False, col_to_row=False) - # checking - self.assertEqual(np.sum(self.mat_int > 0), 494) - self.assertTrue(np.array_equal(self.mat_int_col, self.mat_shift_rc)) - self.assertTrue(np.array_equal(self.mat_int_row, self.mat_shift_cr)) - self.assertTrue(np.array_equal(self.mat_int_col, self.mat_int_row_ip)) - self.assertTrue(np.array_equal(self.mat_int_row, self.mat_int_col_ip)) - self.assertTrue(np.array_equal(self.mat_int, self.mat_int_col_ful)) - self.assertTrue(np.array_equal(self.mat_int, self.mat_int_row_ful)) - - def test_solve1(self): - self.mat_col = pp.shift_banded(self.mat, col_to_row=False) - self.mat_ful = pp.create_full(self.mat, col_wise=False) - - sol_row = pp.solve(self.mat, self.rhs, is_flat=True, solver=1) - sol_col = pp.solve( - self.mat_col, - self.rhs, - is_flat=True, - index_row_wise=False, - solver=1, - ) - sol_ful = pp.solve(self.mat_ful, self.rhs, solver=1) - - diff_row = np.max(np.abs(np.dot(self.mat_ful, sol_row) - self.rhs)) - diff_col = np.max(np.abs(np.dot(self.mat_ful, sol_col) - self.rhs)) - diff_ful = np.max(np.abs(np.dot(self.mat_ful, sol_ful) - self.rhs)) - - diff_row_col = np.max(np.abs(self.mat_ful - pp.create_full(self.mat_col))) - self.assertAlmostEqual(diff_row * 1e-5, 0.0) - self.assertAlmostEqual(diff_col * 1e-5, 0.0) - self.assertAlmostEqual(diff_ful * 1e-5, 0.0) - self.assertAlmostEqual(diff_row_col * 1e5, 0.0) - - def test_solve2(self): - self.mat_col = pp.shift_banded(self.mat, col_to_row=False) - self.mat_ful = pp.create_full(self.mat, col_wise=False) - - sol_row = pp.solve(self.mat, self.rhs, is_flat=True, solver=2) - sol_col = pp.solve( - self.mat_col, - self.rhs, - is_flat=True, - index_row_wise=False, - solver=2, - ) - sol_ful = pp.solve(self.mat_ful, self.rhs, solver=2) - - diff_row = np.max(np.abs(np.dot(self.mat_ful, sol_row) - self.rhs)) - diff_col = np.max(np.abs(np.dot(self.mat_ful, sol_col) - self.rhs)) - diff_ful = np.max(np.abs(np.dot(self.mat_ful, sol_ful) - self.rhs)) - - diff_row_col = np.max(np.abs(self.mat_ful - pp.create_full(self.mat_col))) - self.assertAlmostEqual(diff_row * 1e-5, 0.0) - self.assertAlmostEqual(diff_col * 1e-5, 0.0) - self.assertAlmostEqual(diff_ful * 1e-5, 0.0) - self.assertAlmostEqual(diff_row_col * 1e5, 0.0) - - def test_error(self): - self.err_mat = np.array( - [[3, 2, 1, 0], [-3, -2, 7, 1], [3, 2, -1, 5], [0, 1, 2, 3]] - ) - self.err_rhs = np.array([6, 3, 9, 6]) - sol_2 = pp.solve(self.err_mat, self.err_rhs, is_flat=False, solver=2) - diff_2 = np.max(np.abs(np.dot(self.err_mat, sol_2) - self.err_rhs)) - self.assertAlmostEqual(diff_2, 0.0) - - -if __name__ == "__main__": - unittest.main() From f6fa4cbb28cb18b2d921bc4f6ad3243d539cdad0 Mon Sep 17 00:00:00 2001 From: MothNik Date: Sat, 8 Jun 2024 20:25:26 +0200 Subject: [PATCH 25/62] misc: [11] fixed typos --- examples/README.rst | 4 ++-- paper/paper.md | 6 +++--- src/pentapy/core.py | 12 ++++++------ src/pentapy/tools.py | 22 +++++++++++----------- 4 files changed, 22 insertions(+), 22 deletions(-) diff --git a/examples/README.rst b/examples/README.rst index ea7bac4..a1b7ae0 100644 --- a/examples/README.rst +++ b/examples/README.rst @@ -88,7 +88,7 @@ If M is a full matrix, you call the following: X = pp.solve(M, Y) -If M is flattend in row-wise order you have to set the keyword argument ``is_flat=True``: +If M is flattened in row-wise order you have to set the keyword argument ``is_flat=True``: .. code-block:: python @@ -99,7 +99,7 @@ If M is flattend in row-wise order you have to set the keyword argument ``is_fla X = pp.solve(M, Y, is_flat=True) -If you got a col-wise flattend matrix you have to set ``index_row_wise=False``: +If you got a col-wise flattened matrix you have to set ``index_row_wise=False``: .. code-block:: python diff --git a/paper/paper.md b/paper/paper.md index 53c6f04..fa0fdc3 100644 --- a/paper/paper.md +++ b/paper/paper.md @@ -51,8 +51,8 @@ $$ Here, $d_i$ are the diagonal entries and $d_i^{(j)}$ represent the $j$-th minor diagonal. -Recently, @askar presented two algorithms to -solve the linear systems of equations for $X$, ``PTRANS-I`` and ``PTRANS-II``, +Recently, @askar presented two algorithms to +solve the linear systems of equations for $X$, ``PTRANS-I`` and ``PTRANS-II``, applying first transformation to a triangular matrix and then, respectively, backward and forward substitution. ``pentapy`` provides Cython [@cython] implementations of these algorithms and a set of tools to convert matrices to row-wise or @@ -73,7 +73,7 @@ The linear algebra solver of NumPy [@numpy] served as a standard reference, whic ``pentapy`` is designed to provide a fast solver for the special case of a pentadiagonal linear system. To the best of the author's knowledge, this package outperforms the current algorithms for solving pentadiagonal systems in Python. -The solver can handle different input formats of the coefficient matrix, i.e., a flattend matrix or a +The solver can handle different input formats of the coefficient matrix, i.e., a flattened matrix or a quadratic matrix. diff --git a/src/pentapy/core.py b/src/pentapy/core.py index ccf0c8f..ad704a6 100644 --- a/src/pentapy/core.py +++ b/src/pentapy/core.py @@ -46,8 +46,8 @@ def solve( """ Solver for a pentadiagonal system. - The matrix can be given as a full n x n matrix or as a flattend one. - The flattend matrix can be given in a row-wise flattend form:: + The matrix can be given as a full n x n matrix or as a flattened one. + The flattened matrix can be given in a row-wise flattened form:: [[Dup2[0] Dup2[1] Dup2[2] ... Dup2[N-2] 0 0 ] [Dup1[0] Dup1[1] Dup1[2] ... Dup1[N-2] Dup1[N-1] 0 ] @@ -55,7 +55,7 @@ def solve( [0 Dlow1[1] Dlow1[2] ... Dlow1[N-2] Dlow1[N-1] Dlow1[N]] [0 0 Dlow2[2] ... Dlow2[N-2] Dlow2[N-2] Dlow2[N]]] - Or a column-wise flattend form:: + Or a column-wise flattened form:: [[0 0 Dup2[2] ... Dup2[N-2] Dup2[N-1] Dup2[N] ] [0 Dup1[1] Dup1[2] ... Dup1[N-2] Dup1[N-1] Dup1[N] ] @@ -65,7 +65,7 @@ def solve( Dup1 and Dup2 are the first and second upper minor-diagonals and Dlow1 resp. Dlow2 are the lower ones. - If you provide a column-wise flattend matrix, you have to set:: + If you provide a column-wise flattened matrix, you have to set:: index_row_wise=False @@ -77,9 +77,9 @@ def solve( rhs : :class:`numpy.ndarray` of shape (m,) or (m, n) The right hand side(s) of the equation system. Its shape is preserved. is_flat : :class:`bool`, default=False - State if the matrix is already flattend. Default: ``False`` + State if the matrix is already flattened. Default: ``False`` index_row_wise : :class:`bool`, default=True - State if the flattend matrix is row-wise flattend. Default: ``True`` + State if the flattened matrix is row-wise flattened. Default: ``True`` solver : :class:`int` or :class:`str`, default=1 Which solver should be used. The following are provided: diff --git a/src/pentapy/tools.py b/src/pentapy/tools.py index ca80d27..4bb0b52 100644 --- a/src/pentapy/tools.py +++ b/src/pentapy/tools.py @@ -52,8 +52,8 @@ def shift_banded(mat, up=2, low=2, col_to_row=True, copy=True): Either from column-wise to row-wise storage or vice versa. - The Matrix has to be given as a flattend matrix. - Either in a column-wise flattend form:: + The Matrix has to be given as a flattened matrix. + Either in a column-wise flattened form:: [[0 0 Dup2[2] ... Dup2[N-2] Dup2[N-1] Dup2[N] ] [0 Dup1[1] Dup1[2] ... Dup1[N-2] Dup1[N-1] Dup1[N] ] @@ -65,7 +65,7 @@ def shift_banded(mat, up=2, low=2, col_to_row=True, copy=True): col_to_row=True - Or in a row-wise flattend form:: + Or in a row-wise flattened form:: [[Dup2[0] Dup2[1] Dup2[2] ... Dup2[N-2] 0 0 ] [Dup1[0] Dup1[1] Dup1[2] ... Dup1[N-2] Dup1[N-1] 0 ] @@ -98,7 +98,7 @@ def shift_banded(mat, up=2, low=2, col_to_row=True, copy=True): Returns ------- :class:`numpy.ndarray` - Shifted bandend matrix + Shifted banded matrix """ if copy: mat_flat = np.copy(mat) @@ -124,8 +124,8 @@ def shift_banded(mat, up=2, low=2, col_to_row=True, copy=True): def create_banded(mat, up=2, low=2, col_wise=True, dtype=None): """Create a banded matrix from a given quadratic Matrix. - The Matrix will to be returned as a flattend matrix. - Either in a column-wise flattend form:: + The Matrix will to be returned as a flattened matrix. + Either in a column-wise flattened form:: [[0 0 Dup2[2] ... Dup2[N-2] Dup2[N-1] Dup2[N] ] [0 Dup1[1] Dup1[2] ... Dup1[N-2] Dup1[N-1] Dup1[N] ] @@ -137,7 +137,7 @@ def create_banded(mat, up=2, low=2, col_wise=True, dtype=None): col_wise=True - Or in a row-wise flattend form:: + Or in a row-wise flattened form:: [[Dup2[0] Dup2[1] Dup2[2] ... Dup2[N-2] 0 0 ] [Dup1[0] Dup1[1] Dup1[2] ... Dup1[N-2] Dup1[N-1] 0 ] @@ -168,7 +168,7 @@ def create_banded(mat, up=2, low=2, col_wise=True, dtype=None): Returns ------- :class:`numpy.ndarray` - Bandend matrix + Banded matrix """ mat = np.asanyarray(mat) if mat.ndim != 2: @@ -202,8 +202,8 @@ def create_banded(mat, up=2, low=2, col_wise=True, dtype=None): def create_full(mat, up=2, low=2, col_wise=True): """Create a (n x n) Matrix from a given banded matrix. - The given Matrix has to be a flattend matrix. - Either in a column-wise flattend form:: + The given Matrix has to be a flattened matrix. + Either in a column-wise flattened form:: [[0 0 Dup2[2] ... Dup2[N-2] Dup2[N-1] Dup2[N] ] [0 Dup1[1] Dup1[2] ... Dup1[N-2] Dup1[N-1] Dup1[N] ] @@ -215,7 +215,7 @@ def create_full(mat, up=2, low=2, col_wise=True): col_wise=True - Or in a row-wise flattend form:: + Or in a row-wise flattened form:: [[Dup2[0] Dup2[1] Dup2[2] ... Dup2[N-2] 0 0 ] [Dup1[0] Dup1[1] Dup1[2] ... Dup1[N-2] Dup1[N-1] 0 ] From 9b3e1224b205f50caec5fa136771680d952bab55 Mon Sep 17 00:00:00 2001 From: MothNik Date: Sat, 8 Jun 2024 20:39:43 +0200 Subject: [PATCH 26/62] style/refactor: [11] made core code readable to humans and not only a machine --- src/pentapy/core.py | 55 +++++++++++++++++++++++++++------------------ 1 file changed, 33 insertions(+), 22 deletions(-) diff --git a/src/pentapy/core.py b/src/pentapy/core.py index ad704a6..4158d94 100644 --- a/src/pentapy/core.py +++ b/src/pentapy/core.py @@ -101,6 +101,7 @@ def solve( # first, the solver is converted to the internal name to avoid confusion solver_inter = pmodels._SOLVER_ALIAS_CONVERSIONS[str(solver).lower()] + # Case 1: the pentapy solvers if solver_inter in { pmodels.PentaSolverAliases.PTRANS_I, pmodels.PentaSolverAliases.PTRANS_II, @@ -150,29 +151,14 @@ def solve( warnings.warn("pentapy: PTRANS-I not suitable for input-matrix.") return np.full(shape=rhs_og_shape, fill_value=np.nan) + # Case 2: LAPACK's banded solver elif solver_inter == pmodels.PentaSolverAliases.LAPACK: # pragma: no cover try: from scipy.linalg import solve_banded except ImportError as imp_err: # pragma: no cover msg = "pentapy.solve: scipy.linalg.solve_banded could not be imported" raise ValueError(msg) from imp_err - if is_flat and index_row_wise: - mat_flat = np.array(mat) - ptools._check_penta(mat_flat) - ptools.shift_banded(mat_flat, col_to_row=False, copy=False) - elif is_flat: - mat_flat = np.asarray(mat) - else: - mat_flat = ptools.create_banded(mat) - return solve_banded((2, 2), mat_flat, rhs) - elif solver_inter == pmodels.PentaSolverAliases.SUPER_LU: # pragma: no cover - try: - from scipy import sparse as sps - from scipy.sparse.linalg import spsolve - except ImportError as imp_err: - msg = "pentapy.solve: scipy.sparse could not be imported" - raise ValueError(msg) from imp_err if is_flat and index_row_wise: mat_flat = np.array(mat) ptools._check_penta(mat_flat) @@ -181,17 +167,27 @@ def solve( mat_flat = np.asarray(mat) else: mat_flat = ptools.create_banded(mat) - size = mat_flat.shape[1] - M = sps.spdiags(mat_flat, [2, 1, 0, -1, -2], size, size, format="csc") - return spsolve(M, rhs, use_umfpack=False) - elif solver_inter == pmodels.PentaSolverAliases.UMFPACK: # pragma: no cover + # NOTE: since this is a general banded solver, the number of sub- and super- + # diagonals has to be provided + return solve_banded( + l_and_u=(2, 2), + ab=mat_flat, + b=rhs, + ) + + # Case 3: SciPy's sparse solver with or without UMFPACK + elif solver_inter in { + pmodels.PentaSolverAliases.SUPER_LU, + pmodels.PentaSolverAliases.UMFPACK, + }: try: from scipy import sparse as sps from scipy.sparse.linalg import spsolve except ImportError as imp_err: msg = "pentapy.solve: scipy.sparse could not be imported" raise ValueError(msg) from imp_err + if is_flat and index_row_wise: mat_flat = np.array(mat) ptools._check_penta(mat_flat) @@ -200,9 +196,24 @@ def solve( mat_flat = np.asarray(mat) else: mat_flat = ptools.create_banded(mat) + + # the solvers require a sparse left-hand side matrix, so this is created here + # NOTE: the UMFPACK solver will not be triggered for multiple right-hand sides + use_umfpack = solver_inter == pmodels.PentaSolverAliases.UMFPACK size = mat_flat.shape[1] - M = sps.spdiags(mat_flat, [2, 1, 0, -1, -2], size, size, format="csc") - return spsolve(M, rhs, use_umfpack=True) + M = sps.spdiags( + data=mat_flat, + diags=[2, 1, 0, -1, -2], + m=size, + n=size, + format="csc", + ) + + return spsolve( + A=M, + b=rhs, + use_umfpack=use_umfpack, + ) else: # pragma: no cover msg = f"pentapy.solve: unknown solver ({solver})" From 92abbbdaca93083334917c7db42fe63e63ed6c23 Mon Sep 17 00:00:00 2001 From: MothNik Date: Sat, 8 Jun 2024 21:27:50 +0200 Subject: [PATCH 27/62] feat/refactor: [11] ensured all solvers behave the same in terms of output and warning behaviour; tested it altogether --- src/pentapy/core.py | 32 ++++++--- tests/test_solvers_external.py | 121 +++++++++++++++++++++++++++++++++ tests/test_solvers_internal.py | 2 +- 3 files changed, 145 insertions(+), 10 deletions(-) create mode 100644 tests/test_solvers_external.py diff --git a/src/pentapy/core.py b/src/pentapy/core.py index 4158d94..c701573 100644 --- a/src/pentapy/core.py +++ b/src/pentapy/core.py @@ -142,10 +142,11 @@ def solve( ) # if there was only a 1D right-hand side, the result has to be flattened + sol = solver_func(mat_flat, rhs) if single_rhs: - return solver_func(mat_flat, rhs).ravel() + sol = sol.ravel() - return solver_func(mat_flat, rhs) + return sol except ZeroDivisionError: warnings.warn("pentapy: PTRANS-I not suitable for input-matrix.") @@ -170,11 +171,17 @@ def solve( # NOTE: since this is a general banded solver, the number of sub- and super- # diagonals has to be provided - return solve_banded( - l_and_u=(2, 2), - ab=mat_flat, - b=rhs, - ) + # NOTE: LAPACK handles all the reshaping and flattening internally + try: + return solve_banded( + l_and_u=(2, 2), + ab=mat_flat, + b=rhs, + ) + + except np.linalg.LinAlgError: + warnings.warn("pentapy: LAPACK solver encountered singular matrix.") + return np.full(shape=rhs.shape, fill_value=np.nan) # Case 3: SciPy's sparse solver with or without UMFPACK elif solver_inter in { @@ -184,7 +191,7 @@ def solve( try: from scipy import sparse as sps from scipy.sparse.linalg import spsolve - except ImportError as imp_err: + except ImportError as imp_err: # pragma: no cover msg = "pentapy.solve: scipy.sparse could not be imported" raise ValueError(msg) from imp_err @@ -209,12 +216,19 @@ def solve( format="csc", ) - return spsolve( + sol = spsolve( A=M, b=rhs, use_umfpack=use_umfpack, ) + # NOTE: spsolve flattens column-vectors, thus their shape has to be restored + # NOTE: it already fills the result vector with NaNs if the matrix is singular + if rhs.ndim == 2 and 1 in rhs.shape: + sol = sol[::, np.newaxis] + + return sol + else: # pragma: no cover msg = f"pentapy.solve: unknown solver ({solver})" raise ValueError(msg) diff --git a/tests/test_solvers_external.py b/tests/test_solvers_external.py new file mode 100644 index 0000000..075dfb4 --- /dev/null +++ b/tests/test_solvers_external.py @@ -0,0 +1,121 @@ +""" +Test suite for testing the external solvers that can be called via pentapy. The tests +are not exhaustive and only check whether the solvers can be called and return a +solution. + +""" + +# === Imports === + +from typing import Literal + +import numpy as np +import pentapy as pp +import pytest +import util_funcs as uf + +# === Constants === + +SEED = 19_031_977 +N_ROWS = [ + 3, + 4, + 5, + 10, + 11, + 25, + 26, + 50, + 51, +] +REF_WARNING_CONTENT = "singular" +SOLVER_ALIASES_LAPACK = [3, "3", "lapack", "LaPaCk"] +SOLVER_ALIASES_SPSOLVE = [4, "4", "spsolve", "SpSoLvE"] + +# === Tests === + + +@pytest.mark.parametrize("induce_error", [False, True]) +@pytest.mark.parametrize("solver_alias", SOLVER_ALIASES_LAPACK + SOLVER_ALIASES_SPSOLVE) +@pytest.mark.parametrize("input_layout", ["full", "banded_row_wise", "banded_col_wise"]) +@pytest.mark.parametrize("n_rhs", [None, 1, 10]) +@pytest.mark.parametrize("n_rows", N_ROWS) +def test_external_solvers( + n_rows: int, + n_rhs: int, + input_layout: Literal["full", "banded_row_wise", "banded_col_wise"], + solver_alias: Literal[1, "1", "PTRANS-I"], + induce_error: bool, +) -> None: + """ + Tests the external bindings for solving pentadiagonal systems starting from + different input layouts, number of right-hand sides, number of rows, and when an + error is induced by a zero matrix. + It has to be ensured that the edge case of ``n_rows = 3`` is also covered. + + """ + + # first, a random pentadiagonal matrix is generated + mat_full = np.zeros(shape=(n_rows, n_rows)) + if not induce_error: + mat_full[::, ::] = uf.gen_conditioned_rand_penta_matrix_dense( + n_rows=n_rows, + seed=SEED, + ill_conditioned=False, + ) + + # the right-hand side is generated + np.random.seed(SEED) + if n_rhs is not None: + rhs = np.random.rand(n_rows, n_rhs) + result_shape = (n_rows, n_rhs) + else: + rhs = np.random.rand(n_rows) + result_shape = (n_rows,) + + # the matrix is converted to the desired layout + if input_layout == "full": + mat = mat_full + kwargs = dict(is_flat=False) + + elif input_layout == "banded_row_wise": + mat = pp.create_banded(mat_full, col_wise=False) + kwargs = dict( + is_flat=True, + index_row_wise=True, + ) + + elif input_layout == "banded_col_wise": + mat = pp.create_banded(mat_full, col_wise=True) + kwargs = dict( + is_flat=True, + index_row_wise=False, + ) + + else: + raise ValueError(f"Invalid input layout: {input_layout}") + + # the solution is computed + # Case 1: in case of an error, a warning has to be issued and the result has to + # be NaN + if induce_error: + with pytest.warns(UserWarning, match=REF_WARNING_CONTENT): + sol = pp.solve( + mat=mat, + rhs=rhs, + solver=solver_alias, # type: ignore + **kwargs, + ) + assert sol.shape == result_shape + assert np.isnan(sol).all() + + return + + # Case 2: in case of no error, the solution can be computed without any issues + sol = pp.solve( + mat=mat, + rhs=rhs, + solver=solver_alias, # type: ignore + **kwargs, + ) + assert sol.shape == result_shape diff --git a/tests/test_solvers_internal.py b/tests/test_solvers_internal.py index 2b543cf..cdc55fa 100644 --- a/tests/test_solvers_internal.py +++ b/tests/test_solvers_internal.py @@ -55,7 +55,7 @@ @pytest.mark.parametrize("input_layout", ["full", "banded_row_wise", "banded_col_wise"]) @pytest.mark.parametrize("n_rhs", [None, 1, 10]) @pytest.mark.parametrize("n_rows", N_ROWS) -def test_penta_solver1( +def test_pentapy_solvers( n_rows: int, n_rhs: int, input_layout: Literal["full", "banded_row_wise", "banded_col_wise"], From fb0cf2a342240e3b637774f2f3a8661424d1d515 Mon Sep 17 00:00:00 2001 From: MothNik Date: Sat, 8 Jun 2024 21:28:23 +0200 Subject: [PATCH 28/62] fix: [11] fixed coverage typo --- src/pentapy/__init__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/pentapy/__init__.py b/src/pentapy/__init__.py index 655c428..3705064 100644 --- a/src/pentapy/__init__.py +++ b/src/pentapy/__init__.py @@ -44,7 +44,7 @@ try: from pentapy._version import __version__ -except ImportError: # pragma: nocover +except ImportError: # pragma: no cover # package is not installed __version__ = "0.0.0.dev0" From c2e8032007a172528adb596f667eb44aedc89c3b Mon Sep 17 00:00:00 2001 From: MothNik Date: Sat, 8 Jun 2024 21:28:50 +0200 Subject: [PATCH 29/62] lint: [11] linted cython files --- src/pentapy/solver.pyx | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/src/pentapy/solver.pyx b/src/pentapy/solver.pyx index 1494a25..2da01db 100644 --- a/src/pentapy/solver.pyx +++ b/src/pentapy/solver.pyx @@ -104,9 +104,9 @@ cdef void c_penta_factorize_algo1( # === Variable declarations === cdef int64_t iter_row - cdef double mu_i, ga_i, e_i # mu, gamma, e - cdef double al_i, al_i_minus_1, al_i_plus_1 # alpha - cdef double be_i, be_i_minus_1, be_i_plus_1 # beta + cdef double mu_i, ga_i, e_i # mu, gamma, e + cdef double al_i, al_i_minus_1, al_i_plus_1 # alpha + cdef double be_i, be_i_minus_1, be_i_plus_1 # beta # === Factorization === @@ -334,9 +334,9 @@ cdef void c_penta_factorize_algo2( # === Variable declarations === cdef int64_t iter_row - cdef double ps_i, rho_i # psi, rho - cdef double si_i, si_i_minus_1, si_i_plus_1 # sigma - cdef double phi_i, phi_i_minus_1, phi_i_plus_1 # phi + cdef double ps_i, rho_i # psi, rho + cdef double si_i, si_i_minus_1, si_i_plus_1 # sigma + cdef double phi_i, phi_i_minus_1, phi_i_plus_1 # phi # === Factorization === @@ -426,7 +426,7 @@ cdef void c_solve_penta_from_factorize_algo_2( # === Variable declarations === cdef int64_t iter_row - cdef double om_i, om_i_minus_1, om_i_minus_2 # omega + cdef double om_i, om_i_minus_1, om_i_plus_1 # omega # === Transformation === @@ -487,4 +487,4 @@ cdef void c_solve_penta_from_factorize_algo_2( om_i_minus_1 = om_i om_i = result_view[iter_row] - return \ No newline at end of file + return From b4190f8144a24a122cf79c6724bb5ef938278dae Mon Sep 17 00:00:00 2001 From: MothNik Date: Sat, 8 Jun 2024 21:31:46 +0200 Subject: [PATCH 30/62] package: [11] made requirements file-based and dynamic to make facilitate development without compromising on build --- pyproject.toml | 33 ++++++--------------------------- requirements/all.txt | 2 ++ requirements/base.txt | 1 + requirements/check.txt | 4 ++++ requirements/doc.txt | 8 ++++++++ requirements/scipy.txt | 1 + requirements/test.txt | 4 ++++ requirements/umfpack | 1 + 8 files changed, 27 insertions(+), 27 deletions(-) create mode 100644 requirements/all.txt create mode 100644 requirements/base.txt create mode 100644 requirements/check.txt create mode 100644 requirements/doc.txt create mode 100644 requirements/scipy.txt create mode 100644 requirements/test.txt create mode 100644 requirements/umfpack diff --git a/pyproject.toml b/pyproject.toml index 4c400f8..b26ae54 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -14,7 +14,7 @@ name = "pentapy" authors = [{name = "Sebastian Müller", email = "info@geostat-framework.org"}] readme = "README.md" license = {text = "MIT"} -dynamic = ["version"] +dynamic = ["version", "dependencies", "optional-dependencies"] description = "pentapy: A toolbox for pentadiagonal matrizes." classifiers = [ "Development Status :: 5 - Production/Stable", @@ -35,32 +35,10 @@ classifiers = [ "Topic :: Scientific/Engineering", "Topic :: Utilities", ] -dependencies = ["numpy>=1.20.0"] -[project.optional-dependencies] -scipy = ["scipy"] -umfpack = ["scikit-umfpack"] -all = [ - "scipy", - "scikit-umfpack", -] -doc = [ - "m2r2>=0.2.8", - "scipy>=1.1.0", - "matplotlib>=3", - "perfplot<0.9", - "numpydoc>=1.1", - "sphinx>=7", - "sphinx-gallery>=0.8", - "sphinx-rtd-theme>=2", -] -test = ["pytest-cov>=3"] -check = [ - "black>=24,<25", - "isort[colors]", - "pylint", - "cython-lint", -] +[tool.setuptools.dynamic] +dependencies = {file = ["requirements/base.txt"]} +optional-dependencies = {scipy = {file = ["requirements/scipy.txt"]}, umfpack = {file = ["requirements/umfpack.txt"]}, all = {file = ["requirements/all.txt"]}, doc = {file = ["requirements/doc.txt"]}, test = {file = ["requirements/test.txt"]}, check = {file = ["requirements/check.txt"]}} [project.urls] Homepage = "https://github.com/GeoStat-Framework/pentapy" @@ -103,7 +81,8 @@ max-line-length = 120 "*examples*", "*tests*", "*paper*", - "pentapy/src/pentapy/_version.py", + "src/pentapy/_version.py", + "src/pentapy/__init__.py", ] [tool.coverage.report] diff --git a/requirements/all.txt b/requirements/all.txt new file mode 100644 index 0000000..be8d325 --- /dev/null +++ b/requirements/all.txt @@ -0,0 +1,2 @@ +scikit-umfpack +scipy \ No newline at end of file diff --git a/requirements/base.txt b/requirements/base.txt new file mode 100644 index 0000000..19b3787 --- /dev/null +++ b/requirements/base.txt @@ -0,0 +1 @@ +numpy>=1.20.0 \ No newline at end of file diff --git a/requirements/check.txt b/requirements/check.txt new file mode 100644 index 0000000..4af46fc --- /dev/null +++ b/requirements/check.txt @@ -0,0 +1,4 @@ +black>=24,<25 +isort[colors] +pylint +cython-lint \ No newline at end of file diff --git a/requirements/doc.txt b/requirements/doc.txt new file mode 100644 index 0000000..c49be85 --- /dev/null +++ b/requirements/doc.txt @@ -0,0 +1,8 @@ +m2r2>=0.2.8 +scipy>=1.1.0 +matplotlib>=3 +perfplot<0.9 +numpydoc>=1.1 +sphinx>=7 +sphinx-gallery>=0.8 +sphinx-rtd-theme>=2 \ No newline at end of file diff --git a/requirements/scipy.txt b/requirements/scipy.txt new file mode 100644 index 0000000..9c61c73 --- /dev/null +++ b/requirements/scipy.txt @@ -0,0 +1 @@ +scipy \ No newline at end of file diff --git a/requirements/test.txt b/requirements/test.txt new file mode 100644 index 0000000..2f8c0c7 --- /dev/null +++ b/requirements/test.txt @@ -0,0 +1,4 @@ +pytest>=8 +pytest-cov>=3 +pytest-xdist>=3 +scipy>=1.1.0 \ No newline at end of file diff --git a/requirements/umfpack b/requirements/umfpack new file mode 100644 index 0000000..a8630c1 --- /dev/null +++ b/requirements/umfpack @@ -0,0 +1 @@ +scikit-umfpack \ No newline at end of file From 4c1e3a0827ec42a78a31be65d4aba32077c3d7b8 Mon Sep 17 00:00:00 2001 From: MothNik Date: Sat, 8 Jun 2024 21:32:11 +0200 Subject: [PATCH 31/62] feat: [11] updated chagelog --- CHANGELOG.md | 34 ++++++++++++++++++++++++++++++++++ 1 file changed, 34 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 1417c4c..b197caf 100755 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -2,6 +2,39 @@ All notable changes to **pentapy** will be documented in this file. +## [1.4.0] - 2024-06 + +See [#22](https://github.com/GeoStat-Framework/pentapy/pull/22) + +### Enhancements + +- added support for multiple right-hand sides (currently serial) +- improved error handling and added debug information to error messages + +### Changes + +- shotgun refactored and documented the Cython implementation of PTRANS-I and PTRANS-II for single and multiple right-hand sides support +- fully typed the function ``pentapy.solve`` +- made internal solver alias handling of ``pentapy.solve`` smarter, more robust, and removed all duplicate code +- gave all solvers a consistent interface +- made code in ``pentapy.core`` more human-readable and maintainable and added comments +- fixed typos in documentation + +### Bugfixes + +- fixed error handling in case of zero-division to trigger dead error handling branch (see [Issue 23](https://github.com/GeoStat-Framework/pentapy/issues/23)) +- fixed edge case error for row/column of 3 (see [Issue 24](https://github.com/GeoStat-Framework/pentapy/issues/24)) + +### Tests + +- transitioned from ``unittest``-based testing to fully ``pytest``-based testing with parametrized and parallelized exhaustive testing (see [Issue 25](https://github.com/GeoStat-Framework/pentapy/issues/25)) +- made actual tests more meaningful by comparing them to LAPACK as reference standard (see [Issue 25](https://github.com/GeoStat-Framework/pentapy/issues/25)) +- included external solver bindings accessible via ``pentapy.solve`` as part of the test suite +- increased true coverage (not line-hit coverage) close to 100% + +### Packaging + +- made dependency specification file-based and dynamic ## [1.3.0] - 2024-04 @@ -100,6 +133,7 @@ This is the first release of pentapy, a python toolbox for solving pentadiagonal The solver is implemented in cython, which makes it really fast. +[1.4.0]: https://github.com/GeoStat-Framework/pentapy/compare/v1.3.0...v1.4.0 [1.3.0]: https://github.com/GeoStat-Framework/pentapy/compare/v1.2.0...v1.3.0 [1.2.0]: https://github.com/GeoStat-Framework/pentapy/compare/v1.1.2...v1.2.0 [1.1.2]: https://github.com/GeoStat-Framework/pentapy/compare/v1.1.1...v1.1.2 From b4f79e5fd3b73e666c738d3756e223ef952930a0 Mon Sep 17 00:00:00 2001 From: MothNik Date: Sat, 8 Jun 2024 21:53:28 +0200 Subject: [PATCH 32/62] fix: [11] fixed wrong coverage exclude --- src/pentapy/core.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/pentapy/core.py b/src/pentapy/core.py index c701573..456155e 100644 --- a/src/pentapy/core.py +++ b/src/pentapy/core.py @@ -153,7 +153,7 @@ def solve( return np.full(shape=rhs_og_shape, fill_value=np.nan) # Case 2: LAPACK's banded solver - elif solver_inter == pmodels.PentaSolverAliases.LAPACK: # pragma: no cover + elif solver_inter == pmodels.PentaSolverAliases.LAPACK: try: from scipy.linalg import solve_banded except ImportError as imp_err: # pragma: no cover From a2edcb4d7d458e6ffd2cc095d42312b4095032fc Mon Sep 17 00:00:00 2001 From: MothNik Date: Sat, 8 Jun 2024 22:34:59 +0200 Subject: [PATCH 33/62] doc: [11] improved wording for preserving shape --- src/pentapy/core.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/pentapy/core.py b/src/pentapy/core.py index 456155e..56e2a54 100644 --- a/src/pentapy/core.py +++ b/src/pentapy/core.py @@ -75,7 +75,8 @@ def solve( mat : :class:`numpy.ndarray` of shape (m, m) or (5, m) The full or flattened version of the pentadiagonal matrix. rhs : :class:`numpy.ndarray` of shape (m,) or (m, n) - The right hand side(s) of the equation system. Its shape is preserved. + The right hand side(s) of the equation system. Its shape determines the shape + of the output as they will be identical. is_flat : :class:`bool`, default=False State if the matrix is already flattened. Default: ``False`` index_row_wise : :class:`bool`, default=True From 007bc5a0f794bcabe6edab86b2f10038ce84a4d9 Mon Sep 17 00:00:00 2001 From: MothNik Date: Sat, 8 Jun 2024 22:36:04 +0200 Subject: [PATCH 34/62] fix: [11] fixed changelog --- CHANGELOG.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index b197caf..d13b435 100755 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -4,7 +4,7 @@ All notable changes to **pentapy** will be documented in this file. ## [1.4.0] - 2024-06 -See [#22](https://github.com/GeoStat-Framework/pentapy/pull/22) +See [#26](https://github.com/GeoStat-Framework/pentapy/pull/26) ### Enhancements From 997d372162352ccdd8e61e54101a1acc4e6f10ad Mon Sep 17 00:00:00 2001 From: MothNik Date: Sun, 9 Jun 2024 16:27:14 +0200 Subject: [PATCH 35/62] feat: [11] enable multi-threaded parallelism for multiple right-hand sides --- requirements/base.txt | 3 +- src/pentapy/core.py | 38 +++- src/pentapy/solver.pxd | 12 +- src/pentapy/solver.pyx | 489 ++++++++++++++++++++++++----------------- 4 files changed, 333 insertions(+), 209 deletions(-) diff --git a/requirements/base.txt b/requirements/base.txt index 19b3787..0e77631 100644 --- a/requirements/base.txt +++ b/requirements/base.txt @@ -1 +1,2 @@ -numpy>=1.20.0 \ No newline at end of file +numpy>=1.20.0 +psutil>=5.8.0 \ No newline at end of file diff --git a/src/pentapy/core.py b/src/pentapy/core.py index 56e2a54..87761a4 100644 --- a/src/pentapy/core.py +++ b/src/pentapy/core.py @@ -8,6 +8,7 @@ from typing import Literal import numpy as np +import psutil from pentapy import _models as pmodels from pentapy import solver as psolver # type: ignore @@ -42,6 +43,7 @@ def solve( "umf", "umf_pack", ] = 1, + workers: int = 1, ) -> np.ndarray: """ Solver for a pentadiagonal system. @@ -91,6 +93,11 @@ def solve( * ``[5, "5", "spsolve_umf", "umf", "umf_pack"]`` : :func:`scipy.sparse.linalg.spsolve(..., use_umfpack=False)` Strings are not case-sensitive. + workers : :class:`int`, default=1 + Number of workers used in the PTRANS-I and PTRANS-II solvers for parallel + processing of multiple right-hand sides. Parallelisation overhead can be + significant for small systems. If set to ``-1``, the number of workers is + automatically determined. Default: ``1`` Returns ------- @@ -107,16 +114,18 @@ def solve( pmodels.PentaSolverAliases.PTRANS_I, pmodels.PentaSolverAliases.PTRANS_II, }: + # the matrix is checked and shifted if necessary ... if is_flat and index_row_wise: mat_flat = np.asarray(mat, dtype=np.double) ptools._check_penta(mat_flat) elif is_flat: - mat_flat = np.array(mat, dtype=np.double) + mat_flat = np.asarray(mat, dtype=np.double) ptools._check_penta(mat_flat) ptools.shift_banded(mat_flat, copy=False) else: mat_flat = ptools.create_banded(mat, col_wise=False, dtype=np.double) + # ... followed by the conversion of the right-hand side rhs = np.asarray(rhs, dtype=np.double) # Special case: Early exit when the matrix has only 3 rows/columns @@ -128,6 +137,23 @@ def solve( b=rhs, ) + # now, the number of workers for multithreading has to be determined if + # necessary + # NOTE: the following does not count the number of total threads, but the number + # of threads available for the solver + if workers < -1: + raise ValueError( + f"pentapy.solve: workers has to be -1 or greater, not {workers=}" + ) + + if workers == -1: + proc = psutil.Process() + workers = len(proc.cpu_affinity()) # type: ignore + del proc + + elif workers == 0: + workers = 1 + # if there is only a single right-hand side, it has to be reshaped to a 2D array # NOTE: this has to be reverted at the end single_rhs = rhs.ndim == 1 @@ -143,7 +169,11 @@ def solve( ) # if there was only a 1D right-hand side, the result has to be flattened - sol = solver_func(mat_flat, rhs) + sol = solver_func( + np.ascontiguousarray(mat_flat), + np.ascontiguousarray(rhs), + workers, + ) if single_rhs: sol = sol.ravel() @@ -162,7 +192,7 @@ def solve( raise ValueError(msg) from imp_err if is_flat and index_row_wise: - mat_flat = np.array(mat) + mat_flat = np.asarray(mat) ptools._check_penta(mat_flat) ptools.shift_banded(mat_flat, col_to_row=False, copy=False) elif is_flat: @@ -197,7 +227,7 @@ def solve( raise ValueError(msg) from imp_err if is_flat and index_row_wise: - mat_flat = np.array(mat) + mat_flat = np.asarray(mat) ptools._check_penta(mat_flat) ptools.shift_banded(mat_flat, col_to_row=False, copy=False) elif is_flat: diff --git a/src/pentapy/solver.pxd b/src/pentapy/solver.pxd index b16f8a0..4fe6c1e 100644 --- a/src/pentapy/solver.pxd +++ b/src/pentapy/solver.pxd @@ -1,4 +1,12 @@ # cython: language_level=3 -cdef double[::, ::] c_penta_solver1(double[::, ::] mat_flat, double[::, ::] rhs) +cdef double[::, ::1] c_penta_solver1( + double[::, ::1] mat_flat, + double[::, ::1] rhs, + int workers, +) -cdef double[::, ::] c_penta_solver2(double[::, ::] mat_flat, double[::, ::] rhs) +cdef double[::, ::1] c_penta_solver2( + double[::, ::1] mat_flat, + double[::, ::1] rhs, + int workers, +) diff --git a/src/pentapy/solver.pyx b/src/pentapy/solver.pyx index 2da01db..fc2a9a5 100644 --- a/src/pentapy/solver.pyx +++ b/src/pentapy/solver.pyx @@ -11,24 +11,50 @@ implemented in Cython. import numpy as np cimport numpy as np +from cython.parallel import prange from libc.stdint cimport int64_t +cdef enum: MAT_FACT_N_COLS = 5 + # === Main Python Interface === -def penta_solver1(double[::, ::] mat_flat, double[::, ::] rhs): - return np.asarray(c_penta_solver1(mat_flat, rhs)) +def penta_solver1( + double[::, ::1] mat_flat, + double[::, ::1] rhs, + int workers, +): + return np.asarray( + c_penta_solver1( + mat_flat, + rhs, + workers, + ) + ) -def penta_solver2(double[::, ::] mat_flat, double[::, ::] rhs): - return np.asarray(c_penta_solver2(mat_flat, rhs)) +def penta_solver2( + double[::, ::1] mat_flat, + double[::, ::1] rhs, + int workers, +): + return np.asarray( + c_penta_solver2( + mat_flat, + rhs, + workers, + ) + ) # === Solver Algorithm 1 === - -cdef double[::, ::] c_penta_solver1(double[::, ::] mat_flat, double[::, ::] rhs): +cdef double[::, ::1] c_penta_solver1( + double[::, ::1] mat_flat, + double[::, ::1] rhs, + int workers, +): """ Solves the pentadiagonal system of equations ``Ax = b`` with the matrix ``A`` and the right-hand side ``b`` by @@ -42,37 +68,43 @@ cdef double[::, ::] c_penta_solver1(double[::, ::] mat_flat, double[::, ::] rhs) # === Variable declarations === - cdef int64_t mat_n_rows = mat_flat.shape[1] + cdef int64_t mat_n_cols = mat_flat.shape[1] cdef int64_t rhs_n_cols = rhs.shape[1] cdef int64_t iter_col - cdef double[::, ::1] result = np.empty(shape=(mat_n_rows, rhs_n_cols)) - cdef double[::, ::1] mat_factorized = np.empty(shape=(mat_n_rows, 5)) + + cdef double[::, ::1] result = np.empty(shape=(mat_n_cols, rhs_n_cols)) + cdef double[::, ::1] mat_factorized = np.empty(shape=(mat_n_cols, MAT_FACT_N_COLS)) # === Solving the system of equations === # first, the matrix is factorized c_penta_factorize_algo1( - mat_flat, - mat_n_rows, - mat_factorized, + &mat_flat[0, 0], + mat_n_cols, + &mat_factorized[0, 0], ) # then, all the right-hand sides are solved - for iter_col in range(rhs_n_cols): + for iter_col in prange( + rhs_n_cols, + nogil=True, + num_threads=workers, + ): c_solve_penta_from_factorize_algo_1( - mat_n_rows, - mat_factorized, - rhs[::, iter_col], - result[::, iter_col], + mat_n_cols, + &mat_factorized[0, 0], + &rhs[0, iter_col], + rhs_n_cols, + &result[0, iter_col], ) return result cdef void c_penta_factorize_algo1( - double[::, ::] mat_flat, - int64_t mat_n_rows, - double[::, ::1] mat_factorized, + double* mat_flat, + int64_t mat_n_cols, + double* mat_factorized, ): """ Factorizes the pentadiagonal matrix ``A`` into @@ -103,7 +135,11 @@ cdef void c_penta_factorize_algo1( # === Variable declarations === - cdef int64_t iter_row + cdef int64_t iter_row, fact_curr_base_idx + cdef int64_t mat_row_base_idx_1 = mat_n_cols # base index for the second row + cdef int64_t mat_row_base_idx_2 = 2 * mat_n_cols # base index for the third row + cdef int64_t mat_row_base_idx_3 = 3 * mat_n_cols # base index for the fourth row + cdef int64_t mat_row_base_idx_4 = 4 * mat_n_cols # base index for the fifth row cdef double mu_i, ga_i, e_i # mu, gamma, e cdef double al_i, al_i_minus_1, al_i_plus_1 # alpha cdef double be_i, be_i_minus_1, be_i_plus_1 # beta @@ -111,80 +147,84 @@ cdef void c_penta_factorize_algo1( # === Factorization === # First row - mu_i = mat_flat[2, 0] - al_i_minus_1 = mat_flat[1, 0] / mu_i - be_i_minus_1 = mat_flat[0, 0] / mu_i + mu_i = mat_flat[mat_row_base_idx_2] + al_i_minus_1 = mat_flat[mat_row_base_idx_1] / mu_i + be_i_minus_1 = mat_flat[0] / mu_i - mat_factorized[0, 0] = 0.0 - mat_factorized[0, 1] = mu_i - mat_factorized[0, 2] = 0.0 - mat_factorized[0, 3] = al_i_minus_1 - mat_factorized[0, 4] = be_i_minus_1 + mat_factorized[0] = 0.0 + mat_factorized[1] = mu_i + mat_factorized[2] = 0.0 + mat_factorized[3] = al_i_minus_1 + mat_factorized[4] = be_i_minus_1 # Second row - ga_i = mat_flat[3, 1] - mu_i = mat_flat[2, 1] - al_i_minus_1 * ga_i - al_i = (mat_flat[1, 1] - be_i_minus_1 * ga_i) / mu_i - be_i = mat_flat[0, 1] / mu_i + ga_i = mat_flat[mat_row_base_idx_3 + 1] + mu_i = mat_flat[mat_row_base_idx_2 + 1] - al_i_minus_1 * ga_i + al_i = (mat_flat[mat_row_base_idx_1 + 1] - be_i_minus_1 * ga_i) / mu_i + be_i = mat_flat[1] / mu_i - mat_factorized[1, 0] = 0.0 - mat_factorized[1, 1] = mu_i - mat_factorized[1, 2] = ga_i - mat_factorized[1, 3] = al_i - mat_factorized[1, 4] = be_i + mat_factorized[5] = 0.0 + mat_factorized[6] = mu_i + mat_factorized[7] = ga_i + mat_factorized[8] = al_i + mat_factorized[9] = be_i # Central rows - for iter_row in range(2, mat_n_rows - 2): - e_i = mat_flat[4, iter_row] - ga_i = mat_flat[3, iter_row] - al_i_minus_1 * e_i - mu_i = mat_flat[2, iter_row] - be_i_minus_1 * e_i - al_i * ga_i + fact_curr_base_idx = 10 + for iter_row in range(2, mat_n_cols-2): + e_i = mat_flat[mat_row_base_idx_4 + iter_row] + ga_i = mat_flat[mat_row_base_idx_3 + iter_row] - al_i_minus_1 * e_i + mu_i = mat_flat[mat_row_base_idx_2 + iter_row] - be_i_minus_1 * e_i - al_i * ga_i - al_i_plus_1 = (mat_flat[1, iter_row] - be_i * ga_i) / mu_i + al_i_plus_1 = (mat_flat[mat_row_base_idx_1 + iter_row] - be_i * ga_i) / mu_i al_i_minus_1 = al_i al_i = al_i_plus_1 - be_i_plus_1 = mat_flat[0, iter_row] / mu_i + be_i_plus_1 = mat_flat[iter_row] / mu_i be_i_minus_1 = be_i be_i = be_i_plus_1 - mat_factorized[iter_row, 0] = e_i - mat_factorized[iter_row, 1] = mu_i - mat_factorized[iter_row, 2] = ga_i - mat_factorized[iter_row, 3] = al_i - mat_factorized[iter_row, 4] = be_i + mat_factorized[fact_curr_base_idx] = e_i + mat_factorized[fact_curr_base_idx + 1] = mu_i + mat_factorized[fact_curr_base_idx + 2] = ga_i + mat_factorized[fact_curr_base_idx + 3] = al_i + mat_factorized[fact_curr_base_idx + 4] = be_i + + fact_curr_base_idx += MAT_FACT_N_COLS # Second to last row - e_i = mat_flat[4, mat_n_rows - 2] - ga_i = mat_flat[3, mat_n_rows - 2] - al_i_minus_1 * e_i - mu_i = mat_flat[2, mat_n_rows - 2] - be_i_minus_1 * e_i - al_i * ga_i - al_i_plus_1 = (mat_flat[1, mat_n_rows - 2] - be_i * ga_i) / mu_i + e_i = mat_flat[mat_row_base_idx_4 + mat_n_cols - 2] + ga_i = mat_flat[mat_row_base_idx_3 + mat_n_cols - 2] - al_i_minus_1 * e_i + mu_i = mat_flat[mat_row_base_idx_2 + mat_n_cols - 2] - be_i_minus_1 * e_i - al_i * ga_i + al_i_plus_1 = (mat_flat[mat_row_base_idx_1 + mat_n_cols - 2] - be_i * ga_i) / mu_i - mat_factorized[mat_n_rows - 2, 0] = e_i - mat_factorized[mat_n_rows - 2, 1] = mu_i - mat_factorized[mat_n_rows - 2, 2] = ga_i - mat_factorized[mat_n_rows - 2, 3] = al_i_plus_1 - mat_factorized[mat_n_rows - 2, 4] = 0.0 + mat_factorized[fact_curr_base_idx] = e_i + mat_factorized[fact_curr_base_idx + 1] = mu_i + mat_factorized[fact_curr_base_idx + 2] = ga_i + mat_factorized[fact_curr_base_idx + 3] = al_i_plus_1 + mat_factorized[fact_curr_base_idx + 4] = 0.0 # Last Row - e_i = mat_flat[4, mat_n_rows - 1] - ga_i = mat_flat[3, mat_n_rows - 1] - al_i * e_i - mu_i = mat_flat[2, mat_n_rows - 1] - be_i * e_i - al_i_plus_1 * ga_i + e_i = mat_flat[mat_row_base_idx_4 + mat_n_cols - 1] + ga_i = mat_flat[mat_row_base_idx_3 + mat_n_cols - 1] - al_i * e_i + mu_i = mat_flat[mat_row_base_idx_2 + mat_n_cols - 1] - be_i * e_i - al_i_plus_1 * ga_i - mat_factorized[mat_n_rows - 1, 0] = e_i - mat_factorized[mat_n_rows - 1, 1] = mu_i - mat_factorized[mat_n_rows - 1, 2] = ga_i - mat_factorized[mat_n_rows - 1, 3] = 0.0 - mat_factorized[mat_n_rows - 1, 4] = 0.0 + mat_factorized[fact_curr_base_idx + 5] = e_i + mat_factorized[fact_curr_base_idx + 6] = mu_i + mat_factorized[fact_curr_base_idx + 7] = ga_i + mat_factorized[fact_curr_base_idx + 8] = 0.0 + mat_factorized[fact_curr_base_idx + 9] = 0.0 return -cdef void c_solve_penta_from_factorize_algo_1( - int64_t mat_n_rows, - double[::, ::1] mat_factorized, - double[::] rhs_single, - double[::] result_view, -): +cdef int c_solve_penta_from_factorize_algo_1( + int64_t mat_n_cols, + double* mat_factorized, + double* rhs_single, + int64_t rhs_n_cols, + double* result_view, +) except * nogil: """ Solves the pentadiagonal system of equations ``Ax = b`` with the factorized unit upper triangular matrix ``U`` and the right-hand side ``b``. @@ -195,72 +235,84 @@ cdef void c_solve_penta_from_factorize_algo_1( # === Variable declarations === - cdef int64_t iter_row - cdef double ze_i, ze_i_minus_1, ze_i_plus_1 + cdef int64_t iter_row, fact_curr_base_idx, res_curr_base_idx + cdef double ze_i, ze_i_minus_1, ze_i_plus_1 # zeta # === Transformation === # first, the right-hand side is transformed into the vector ``zeta`` # First row - ze_i_minus_1 = rhs_single[0] / mat_factorized[0, 1] + ze_i_minus_1 = rhs_single[0] / mat_factorized[1] result_view[0] = ze_i_minus_1 # Second row - ze_i = (rhs_single[1] - ze_i_minus_1 * mat_factorized[1, 2]) / mat_factorized[1, 1] - result_view[1] = ze_i + ze_i = (rhs_single[rhs_n_cols] - ze_i_minus_1 * mat_factorized[7]) / mat_factorized[6] + result_view[rhs_n_cols] = ze_i # Central rows - for iter_row in range(2, mat_n_rows - 2): + fact_curr_base_idx = 10 + res_curr_base_idx = rhs_n_cols + rhs_n_cols + + for iter_row in range(2, mat_n_cols-2): ze_i_plus_1 = ( - rhs_single[iter_row] - - ze_i_minus_1 * mat_factorized[iter_row, 0] - - ze_i * mat_factorized[iter_row, 2] - ) / mat_factorized[iter_row, 1] + rhs_single[res_curr_base_idx] + - ze_i_minus_1 * mat_factorized[fact_curr_base_idx] + - ze_i * mat_factorized[fact_curr_base_idx + 2] + ) / mat_factorized[fact_curr_base_idx + 1] ze_i_minus_1 = ze_i ze_i = ze_i_plus_1 - result_view[iter_row] = ze_i_plus_1 + result_view[res_curr_base_idx] = ze_i_plus_1 + + fact_curr_base_idx += MAT_FACT_N_COLS + res_curr_base_idx += rhs_n_cols # Second to last row ze_i_plus_1 = ( - rhs_single[mat_n_rows - 2] - - ze_i_minus_1 * mat_factorized[mat_n_rows - 2, 0] - - ze_i * mat_factorized[mat_n_rows - 2, 2] - ) / mat_factorized[mat_n_rows - 2, 1] + rhs_single[res_curr_base_idx] + - ze_i_minus_1 * mat_factorized[fact_curr_base_idx] + - ze_i * mat_factorized[fact_curr_base_idx + 2] + ) / mat_factorized[fact_curr_base_idx + 1] ze_i_minus_1 = ze_i ze_i = ze_i_plus_1 - result_view[mat_n_rows - 2] = ze_i_plus_1 + result_view[res_curr_base_idx] = ze_i_plus_1 # Last row ze_i_plus_1 = ( - rhs_single[mat_n_rows - 1] - - ze_i_minus_1 * mat_factorized[mat_n_rows - 1, 0] - - ze_i * mat_factorized[mat_n_rows - 1, 2] - ) / mat_factorized[mat_n_rows - 1, 1] - result_view[mat_n_rows - 1] = ze_i_plus_1 + rhs_single[res_curr_base_idx + rhs_n_cols] + - ze_i_minus_1 * mat_factorized[fact_curr_base_idx + 5] + - ze_i * mat_factorized[fact_curr_base_idx + 7] + ) / mat_factorized[fact_curr_base_idx + 6] + result_view[res_curr_base_idx + rhs_n_cols] = ze_i_plus_1 # === Backward substitution === # The solution vector is calculated by backward substitution that overwrites the # right-hand side vector with the solution vector - ze_i -= mat_factorized[mat_n_rows - 2, 3] * ze_i_plus_1 - result_view[mat_n_rows - 2] = ze_i + ze_i -= mat_factorized[fact_curr_base_idx + 3] * ze_i_plus_1 + result_view[res_curr_base_idx] = ze_i + + for iter_row in range(mat_n_cols-3, -1, -1): + fact_curr_base_idx -= MAT_FACT_N_COLS + res_curr_base_idx -= rhs_n_cols - for iter_row in range(mat_n_rows - 3, -1, -1): - result_view[iter_row] -= ( - mat_factorized[iter_row, 3] * ze_i - + mat_factorized[iter_row, 4] * ze_i_plus_1 + result_view[res_curr_base_idx] -= ( + mat_factorized[fact_curr_base_idx + 3] * ze_i + + mat_factorized[fact_curr_base_idx + 4] * ze_i_plus_1 ) ze_i_plus_1 = ze_i - ze_i = result_view[iter_row] - - return + ze_i = result_view[res_curr_base_idx] + return 0 # === Solver Algorithm 2 === -cdef double[::, ::] c_penta_solver2(double[::, ::] mat_flat, double[::, ::] rhs): +cdef double[::, ::1] c_penta_solver2( + double[::, ::1] mat_flat, + double[::, ::1] rhs, + int workers, +): """ Solves the pentadiagonal system of equations ``Ax = b`` with the matrix ``A`` and the right-hand side ``b`` by @@ -272,36 +324,44 @@ cdef double[::, ::] c_penta_solver2(double[::, ::] mat_flat, double[::, ::] rhs) """ - # Variable declarations + # === Variable declarations === - cdef int64_t mat_n_rows = mat_flat.shape[1] + cdef int64_t mat_n_cols = mat_flat.shape[1] cdef int64_t rhs_n_cols = rhs.shape[1] cdef int64_t iter_col - cdef double[::, ::1] result = np.empty(shape=(mat_n_rows, rhs_n_cols)) - cdef double[::, ::1] mat_factorized = np.empty(shape=(mat_n_rows, 5)) + + cdef double[::, ::1] result = np.empty(shape=(mat_n_cols, rhs_n_cols)) + cdef double[::, ::1] mat_factorized = np.empty(shape=(mat_n_cols, 5)) + + # === Solving the system of equations === # first, the matrix is factorized c_penta_factorize_algo2( - mat_flat, - mat_n_rows, - mat_factorized, + &mat_flat[0, 0], + mat_n_cols, + &mat_factorized[0, 0], ) # then, all the right-hand sides are solved - for iter_col in range(rhs_n_cols): + for iter_col in prange( + rhs_n_cols, + nogil=True, + num_threads=workers, + ): c_solve_penta_from_factorize_algo_2( - mat_n_rows, - mat_factorized, - rhs[::, iter_col], - result[::, iter_col], + mat_n_cols, + &mat_factorized[0, 0], + &rhs[0, iter_col], + rhs_n_cols, + &result[0, iter_col], ) return result cdef void c_penta_factorize_algo2( - double[::, ::] mat_flat, - int64_t mat_n_rows, - double[::, ::1] mat_factorized, + double* mat_flat, + int64_t mat_n_cols, + double* mat_factorized, ): """ Factorizes the pentadiagonal matrix ``A`` into @@ -333,7 +393,11 @@ cdef void c_penta_factorize_algo2( # === Variable declarations === - cdef int64_t iter_row + cdef int64_t iter_row, fact_curr_base_idx + cdef int64_t mat_row_base_idx_1 = mat_n_cols # base index for the second row + cdef int64_t mat_row_base_idx_2 = 2 * mat_n_cols # base index for the third row + cdef int64_t mat_row_base_idx_3 = 3 * mat_n_cols # base index for the fourth row + cdef int64_t mat_row_base_idx_4 = 4 * mat_n_cols # base index for the fifth row cdef double ps_i, rho_i # psi, rho cdef double si_i, si_i_minus_1, si_i_plus_1 # sigma cdef double phi_i, phi_i_minus_1, phi_i_plus_1 # phi @@ -341,80 +405,86 @@ cdef void c_penta_factorize_algo2( # === Factorization === # First row - ps_i = mat_flat[2, mat_n_rows - 1] - si_i_plus_1 = mat_flat[3, mat_n_rows - 1] / ps_i - phi_i_plus_1 = mat_flat[4, mat_n_rows - 1] / ps_i - mat_factorized[mat_n_rows - 1, 0] = phi_i_plus_1 - mat_factorized[mat_n_rows - 1, 1] = si_i_plus_1 - mat_factorized[mat_n_rows - 1, 2] = ps_i - mat_factorized[mat_n_rows - 1, 3] = 0.0 - mat_factorized[mat_n_rows - 1, 4] = 0.0 + ps_i = mat_flat[mat_row_base_idx_2 + mat_n_cols - 1] + si_i_plus_1 = mat_flat[mat_row_base_idx_3 + mat_n_cols - 1] / ps_i + phi_i_plus_1 = mat_flat[mat_row_base_idx_4 + mat_n_cols - 1] / ps_i - # Second row - rho_i = mat_flat[1, mat_n_rows-2] - ps_i = mat_flat[2, mat_n_rows-2] - si_i_plus_1 * rho_i - si_i = (mat_flat[3, mat_n_rows-2] - phi_i_plus_1 * rho_i) / ps_i - phi_i = mat_flat[4, mat_n_rows-2] / ps_i + fact_curr_base_idx = (mat_n_cols - 1) * MAT_FACT_N_COLS + mat_factorized[fact_curr_base_idx + 4] = 0.0 + mat_factorized[fact_curr_base_idx + 3] = 0.0 + mat_factorized[fact_curr_base_idx + 2] = ps_i + mat_factorized[fact_curr_base_idx + 1] = si_i_plus_1 + mat_factorized[fact_curr_base_idx] = phi_i_plus_1 - mat_factorized[mat_n_rows - 2, 0] = phi_i - mat_factorized[mat_n_rows - 2, 1] = si_i - mat_factorized[mat_n_rows - 2, 2] = ps_i - mat_factorized[mat_n_rows - 2, 3] = rho_i - mat_factorized[mat_n_rows - 2, 4] = 0.0 + # Second row + rho_i = mat_flat[mat_row_base_idx_1 + mat_n_cols - 2] + ps_i = mat_flat[mat_row_base_idx_2 + mat_n_cols - 2] - si_i_plus_1 * rho_i + si_i = (mat_flat[mat_row_base_idx_3 + mat_n_cols - 2] - phi_i_plus_1 * rho_i) / ps_i + phi_i = mat_flat[mat_row_base_idx_4 + mat_n_cols - 2] / ps_i + + fact_curr_base_idx -= MAT_FACT_N_COLS + mat_factorized[fact_curr_base_idx + 4] = 0.0 + mat_factorized[fact_curr_base_idx + 3] = rho_i + mat_factorized[fact_curr_base_idx + 2] = ps_i + mat_factorized[fact_curr_base_idx + 1] = si_i + mat_factorized[fact_curr_base_idx] = phi_i # Central rows - for iter_row in range(mat_n_rows-3, 1, -1): - b_i = mat_flat[0, iter_row] - rho_i = mat_flat[1, iter_row] - si_i_plus_1 * b_i - ps_i = mat_flat[2, iter_row] - phi_i_plus_1 * b_i - si_i * rho_i - si_i_minus_1 = (mat_flat[3, iter_row] - phi_i * rho_i) / ps_i + for iter_row in range(mat_n_cols - 3, 1, -1): + b_i = mat_flat[iter_row] + rho_i = mat_flat[mat_row_base_idx_1 + iter_row] - si_i_plus_1 * b_i + ps_i = mat_flat[mat_row_base_idx_2 + iter_row] - phi_i_plus_1 * b_i - si_i * rho_i + si_i_minus_1 = (mat_flat[mat_row_base_idx_3 + iter_row] - phi_i * rho_i) / ps_i si_i_plus_1 = si_i si_i = si_i_minus_1 - phi_i_minus_1 = mat_flat[4, iter_row] / ps_i + phi_i_minus_1 = mat_flat[mat_row_base_idx_4 + iter_row] / ps_i phi_i_plus_1 = phi_i phi_i = phi_i_minus_1 - mat_factorized[iter_row, 0] = phi_i - mat_factorized[iter_row, 1] = si_i - mat_factorized[iter_row, 2] = ps_i - mat_factorized[iter_row, 3] = rho_i - mat_factorized[iter_row, 4] = b_i + fact_curr_base_idx -= MAT_FACT_N_COLS + mat_factorized[fact_curr_base_idx + 4] = b_i + mat_factorized[fact_curr_base_idx + 3] = rho_i + mat_factorized[fact_curr_base_idx + 2] = ps_i + mat_factorized[fact_curr_base_idx + 1] = si_i + mat_factorized[fact_curr_base_idx] = phi_i # Second to last row - b_i = mat_flat[0, 1] - rho_i = mat_flat[1, 1] - si_i_plus_1 * b_i - ps_i = mat_flat[2, 1] - phi_i_plus_1 * b_i - si_i * rho_i - si_i_minus_1 = (mat_flat[3, 1] - phi_i * rho_i) / ps_i + b_i = mat_flat[1] + rho_i = mat_flat[mat_row_base_idx_1 + 1] - si_i_plus_1 * b_i + ps_i = mat_flat[mat_row_base_idx_2 + 1] - phi_i_plus_1 * b_i - si_i * rho_i + si_i_minus_1 = (mat_flat[mat_row_base_idx_3 + 1] - phi_i * rho_i) / ps_i si_i_plus_1 = si_i si_i = si_i_minus_1 - mat_factorized[1, 0] = 0.0 - mat_factorized[1, 1] = si_i - mat_factorized[1, 2] = ps_i - mat_factorized[1, 3] = rho_i - mat_factorized[1, 4] = b_i + mat_factorized[9] = b_i + mat_factorized[8] = rho_i + mat_factorized[7] = ps_i + mat_factorized[6] = si_i + mat_factorized[5] = 0.0 # Last row - b_i = mat_flat[0, 0] - rho_i = mat_flat[1, 0] - si_i_plus_1 * b_i - ps_i = mat_flat[2, 0] - phi_i * b_i - si_i * rho_i + b_i = mat_flat[0] + rho_i = mat_flat[mat_row_base_idx_1 + 0] - si_i_plus_1 * b_i + ps_i = mat_flat[mat_row_base_idx_2 + 0] - phi_i * b_i - si_i * rho_i - mat_factorized[0, 0] = 0.0 - mat_factorized[0, 1] = 0.0 - mat_factorized[0, 2] = ps_i - mat_factorized[0, 3] = rho_i - mat_factorized[0, 4] = b_i + mat_factorized[4] = b_i + mat_factorized[3] = rho_i + mat_factorized[2] = ps_i + mat_factorized[1] = 0.0 + mat_factorized[0] = 0.0 return -cdef void c_solve_penta_from_factorize_algo_2( - int64_t mat_n_rows, - double[::, ::1] mat_factorized, - double[::] rhs_single, - double[::] result_view, -): +cdef int c_solve_penta_from_factorize_algo_2( + int64_t mat_n_cols, + double* mat_factorized, + double* rhs_single, + int64_t rhs_n_cols, + double* result_view, +) except * nogil: + """ Solves the pentadiagonal system of equations ``Ax = b`` with the factorized unit lower triangular matrix ``L`` and the right-hand side ``b``. @@ -425,66 +495,81 @@ cdef void c_solve_penta_from_factorize_algo_2( # === Variable declarations === - cdef int64_t iter_row + cdef int64_t iter_row, fact_curr_base_idx, res_curr_base_idx cdef double om_i, om_i_minus_1, om_i_plus_1 # omega # === Transformation === # first, the right-hand side is transformed into the vector ``omega`` # First row - om_i_plus_1 = rhs_single[mat_n_rows-1] / mat_factorized[mat_n_rows - 1, 2] - result_view[mat_n_rows-1] = om_i_plus_1 + fact_curr_base_idx = (mat_n_cols - 1) * MAT_FACT_N_COLS + res_curr_base_idx = (mat_n_cols - 1) * rhs_n_cols + + om_i_plus_1 = rhs_single[res_curr_base_idx] / mat_factorized[fact_curr_base_idx + 2] + result_view[res_curr_base_idx] = om_i_plus_1 # Second row + fact_curr_base_idx -= MAT_FACT_N_COLS + res_curr_base_idx -= rhs_n_cols + om_i = ( - rhs_single[mat_n_rows-2] - - om_i_plus_1 * mat_factorized[mat_n_rows - 2, 3] - ) / mat_factorized[mat_n_rows - 2, 2] - result_view[mat_n_rows-2] = om_i + rhs_single[res_curr_base_idx] + - om_i_plus_1 * mat_factorized[fact_curr_base_idx + 3] + ) / mat_factorized[fact_curr_base_idx + 2] + result_view[res_curr_base_idx] = om_i # Central rows - for iter_row in range(mat_n_rows-3, 1, -1): + for iter_row in range(mat_n_cols - 3, 1, -1): + fact_curr_base_idx -= MAT_FACT_N_COLS + res_curr_base_idx -= rhs_n_cols + om_i_minus_1 = ( - rhs_single[iter_row] - - om_i_plus_1 * mat_factorized[iter_row, 4] - - om_i * mat_factorized[iter_row, 3] - ) / mat_factorized[iter_row, 2] + rhs_single[res_curr_base_idx] + - om_i_plus_1 * mat_factorized[fact_curr_base_idx + 4] + - om_i * mat_factorized[fact_curr_base_idx + 3] + ) / mat_factorized[fact_curr_base_idx + 2] om_i_plus_1 = om_i om_i = om_i_minus_1 - result_view[iter_row] = om_i + result_view[res_curr_base_idx] = om_i # Second to last row + fact_curr_base_idx -= MAT_FACT_N_COLS + res_curr_base_idx -= rhs_n_cols + om_i_minus_1 = ( - rhs_single[1] - - om_i_plus_1 * mat_factorized[1, 4] - - om_i * mat_factorized[1, 3] - ) / mat_factorized[1, 2] + rhs_single[res_curr_base_idx] + - om_i_plus_1 * mat_factorized[fact_curr_base_idx + 4] + - om_i * mat_factorized[fact_curr_base_idx + 3] + ) / mat_factorized[fact_curr_base_idx + 2] om_i_plus_1 = om_i om_i = om_i_minus_1 - result_view[1] = om_i + result_view[res_curr_base_idx] = om_i # Last row om_i_minus_1 = ( rhs_single[0] - - om_i_plus_1 * mat_factorized[0, 4] - - om_i * mat_factorized[0, 3] - ) / mat_factorized[0, 2] + - om_i_plus_1 * mat_factorized[4] + - om_i * mat_factorized[3] + ) / mat_factorized[2] result_view[0] = om_i_minus_1 # === Forward substitution === # The solution vector is calculated by forward substitution that overwrites the # right-hand side vector with the solution vector - om_i -= mat_factorized[1, 1] * om_i_minus_1 - result_view[1] = om_i - - for iter_row in range(2, mat_n_rows): - result_view[iter_row] = ( - result_view[iter_row] - - mat_factorized[iter_row, 0] * om_i_minus_1 - - mat_factorized[iter_row, 1] * om_i + om_i -= mat_factorized[fact_curr_base_idx + 1] * om_i_minus_1 + result_view[res_curr_base_idx] = om_i + + for iter_row in range(2, mat_n_cols): + fact_curr_base_idx += MAT_FACT_N_COLS + res_curr_base_idx += rhs_n_cols + + result_view[res_curr_base_idx] = ( + result_view[res_curr_base_idx] + - mat_factorized[fact_curr_base_idx] * om_i_minus_1 + - mat_factorized[fact_curr_base_idx + 1] * om_i ) om_i_minus_1 = om_i - om_i = result_view[iter_row] + om_i = result_view[res_curr_base_idx] - return + return 0 From 0cf9890e507a3ac887b6ed8a68ab737c9d1ed1b9 Mon Sep 17 00:00:00 2001 From: MothNik Date: Sun, 9 Jun 2024 16:30:22 +0200 Subject: [PATCH 36/62] tests: [11] renamed tests for serial mode --- ...st_solvers_internal.py => test_solvers_internal_serial.py} | 4 ++++ 1 file changed, 4 insertions(+) rename tests/{test_solvers_internal.py => test_solvers_internal_serial.py} (98%) diff --git a/tests/test_solvers_internal.py b/tests/test_solvers_internal_serial.py similarity index 98% rename from tests/test_solvers_internal.py rename to tests/test_solvers_internal_serial.py index cdc55fa..8b75964 100644 --- a/tests/test_solvers_internal.py +++ b/tests/test_solvers_internal_serial.py @@ -2,6 +2,8 @@ Test suite for testing the pentadiagonal solver based on either Algorithm PTRANS-I or PTRANS-II. +It tests them in SERIAL mode only. + """ # === Imports === @@ -132,6 +134,7 @@ def test_pentapy_solvers( mat=mat, rhs=rhs, solver=solver_alias, # type: ignore + workers=1, **kwargs, ) assert sol.shape == result_shape @@ -144,6 +147,7 @@ def test_pentapy_solvers( mat=mat, rhs=rhs, solver=solver_alias, # type: ignore + workers=1, **kwargs, ) assert sol.shape == result_shape From 9040bb7a4ff5e5f26f55e394f316cfa001c3a751 Mon Sep 17 00:00:00 2001 From: MothNik Date: Sun, 9 Jun 2024 18:28:33 +0200 Subject: [PATCH 37/62] test: [11] reduced load on tests; transitioned to template-based approach; added tests for parallel solvers --- tests/templates.py | 176 ++++++++++++++++++++++++ tests/test_solvers_external.py | 4 +- tests/test_solvers_internal_parallel.py | 59 ++++++++ tests/test_solvers_internal_serial.py | 161 ++++------------------ 4 files changed, 264 insertions(+), 136 deletions(-) create mode 100644 tests/templates.py create mode 100644 tests/test_solvers_internal_parallel.py diff --git a/tests/templates.py b/tests/templates.py new file mode 100644 index 0000000..fe5b316 --- /dev/null +++ b/tests/templates.py @@ -0,0 +1,176 @@ +""" +This test suite implements reusable templates for testing the pentadiagonal solver based +on either Algorithm PTRANS-I or PTRANS-II. +""" + +# === Imports === + +from typing import Literal + +import numpy as np +import pytest +import util_funcs as uf + +import pentapy as pp + +# === Constants === + +SEED = 19_031_977 +REF_WARNING_CONTENT = "not suitable for input-matrix." +N_ROWS = [ + 3, # important edge case + 4, # important edge case + 5, # important edge case + 10, # even + 11, # odd + 50, # even + 51, # odd + 100, # ... + 101, + 500, + 501, + 1_000, + 1_001, + 5_000, + 5_001, +] +SOLVER_ALIASES_PTRANS_I = [1, "1", "pTrAnS-I"] +SOLVER_ALIASES_PTRANS_II = [2, "2", "pTrAnS-Ii"] + +PARAM_DICT = { + "n_rows": N_ROWS, + "n_rhs": [None, 1, 10], + "input_layout": ["full", "banded_row_wise", "banded_col_wise"], + "solver_alias": SOLVER_ALIASES_PTRANS_I + SOLVER_ALIASES_PTRANS_II, + "induce_error": [False, True], + "from_order": ["C", "F"], + "workers": [1], +} + +# === Templates === + + +def pentapy_solvers_template( + n_rows: int, + n_rhs: int, + input_layout: Literal["full", "banded_row_wise", "banded_col_wise"], + solver_alias: Literal[ + 1, + "1", + "PTRANS-I", + "pTrAnS-I", + 2, + "2", + "PTRANS-II", + "pTrAnS-Ii", + ], + induce_error: bool, + from_order: Literal["C", "F"], + workers: int, +) -> None: + """ + Tests the pentadiagonal solver based on Algorithm PTRANS-I when starting from + different input layouts, number of right-hand sides, number of rows, and also + when inducing an error by making the first diagonal element zero. + It has to be ensured that the edge case of ``n_rows = 3`` is also covered. + + """ + + # first, a random pentadiagonal matrix is generated + mat_full = uf.gen_conditioned_rand_penta_matrix_dense( + n_rows=n_rows, + seed=SEED, + ill_conditioned=False, + ) + + # an error is induced by setting the first or last diagonal element to zero + if induce_error: + # the induction of the error is only possible if the matrix does not have + # only 3 rows + if n_rows == 3: + pytest.skip( + "Only 3 rows, cannot induce error because this will not go into " + "PTRANS-I, but NumPy." + ) + + if solver_alias in SOLVER_ALIASES_PTRANS_I: + mat_full[0, 0] = 0.0 + else: + mat_full[n_rows - 1, n_rows - 1] = 0.0 + + # the right-hand side is generated + np.random.seed(SEED) + if n_rhs is not None: + rhs = np.random.rand(n_rows, n_rhs) + result_shape = (n_rows, n_rhs) + else: + rhs = np.random.rand(n_rows) + result_shape = (n_rows,) + + # the matrix is converted to the desired layout + if input_layout == "full": + mat = mat_full + kwargs = dict(is_flat=False) + + elif input_layout == "banded_row_wise": + mat = pp.create_banded(mat_full, col_wise=False) + kwargs = dict( + is_flat=True, + index_row_wise=True, + ) + + elif input_layout == "banded_col_wise": + mat = pp.create_banded(mat_full, col_wise=True) + kwargs = dict( + is_flat=True, + index_row_wise=False, + ) + + else: + raise ValueError(f"Invalid input layout: {input_layout}") + + # the matrix is converted to the desired order + if from_order == "C": + mat = np.ascontiguousarray(mat) + rhs = np.ascontiguousarray(rhs) + elif from_order == "F": + mat = np.asfortranarray(mat) + rhs = np.asfortranarray(rhs) + else: + raise ValueError(f"Invalid from order: {from_order=}") + + # the solution is computed + # Case 1: in case of an error, a warning has to be issued and the result has to + # be NaN + if induce_error: + with pytest.warns(UserWarning, match=REF_WARNING_CONTENT): + sol = pp.solve( + mat=mat, + rhs=rhs, + solver=solver_alias, # type: ignore + workers=workers, + **kwargs, + ) + assert sol.shape == result_shape + assert np.isnan(sol).all() + + return + + # Case 2: in case of no error, the solution can be computed without any issues + sol = pp.solve( + mat=mat, + rhs=rhs, + solver=solver_alias, # type: ignore + workers=workers, + **kwargs, + ) + assert sol.shape == result_shape + + # if no error was induced, the reference solution is computed with SciPy + sol_ref = uf.solve_penta_matrix_dense_scipy( + mat=mat_full, + rhs=rhs, + ) + + # the solutions are compared + assert np.allclose(sol, sol_ref) diff --git a/tests/test_solvers_external.py b/tests/test_solvers_external.py index 075dfb4..30c7b9a 100644 --- a/tests/test_solvers_external.py +++ b/tests/test_solvers_external.py @@ -29,8 +29,8 @@ 51, ] REF_WARNING_CONTENT = "singular" -SOLVER_ALIASES_LAPACK = [3, "3", "lapack", "LaPaCk"] -SOLVER_ALIASES_SPSOLVE = [4, "4", "spsolve", "SpSoLvE"] +SOLVER_ALIASES_LAPACK = [3, "3", "LaPaCk"] +SOLVER_ALIASES_SPSOLVE = [4, "4", "SpSoLvE"] # === Tests === diff --git a/tests/test_solvers_internal_parallel.py b/tests/test_solvers_internal_parallel.py new file mode 100644 index 0000000..b8595b1 --- /dev/null +++ b/tests/test_solvers_internal_parallel.py @@ -0,0 +1,59 @@ +""" +Test suite for testing the pentadiagonal solver based on either Algorithm PTRANS-I or +PTRANS-II. + +It tests them in PARALLEL mode. + +""" + +# === Imports === + +from copy import deepcopy +from typing import Literal + +import pytest +import templates + +# === Tests === + +# the following series of decorators parametrize the tests for the pentadiagonal solver +# based on either Algorithm PTRANS-I or PTRANS-II in parallel mode +param_dict = deepcopy(templates.PARAM_DICT) +param_dict["from_order"] = ["C"] +param_dict["workers"] = [-1] + + +def test_pentapy_solvers_parallel( + n_rows: int, + n_rhs: int, + input_layout: Literal["full", "banded_row_wise", "banded_col_wise"], + solver_alias: Literal[ + 1, + "1", + "PTRANS-I", + "pTrAnS-I", + 2, + "2", + "PTRANS-II", + "pTrAnS-Ii", + ], + induce_error: bool, + from_order: Literal["C", "F"], + workers: int, +) -> None: + + templates.pentapy_solvers_template( + n_rows=n_rows, + n_rhs=n_rhs, + input_layout=input_layout, + solver_alias=solver_alias, + induce_error=induce_error, + from_order=from_order, + workers=workers, + ) + + +for key, value in param_dict.items(): + test_pentapy_solvers_parallel = pytest.mark.parametrize(key, value)( + test_pentapy_solvers_parallel + ) diff --git a/tests/test_solvers_internal_serial.py b/tests/test_solvers_internal_serial.py index 8b75964..6bed962 100644 --- a/tests/test_solvers_internal_serial.py +++ b/tests/test_solvers_internal_serial.py @@ -10,153 +10,46 @@ from typing import Literal -import numpy as np -import pentapy as pp import pytest -import util_funcs as uf - -# === Constants === - -SEED = 19_031_977 -N_ROWS = [ - 3, - 4, - 5, - 10, - 11, - 25, - 26, - 50, - 51, - 100, - 101, - 250, - 251, - 500, - 501, - 1_000, - 1_001, - 2500, - 2501, - 5_000, - 5_001, - 10_000, - 10_001, -] -REF_WARNING_CONTENT = "not suitable for input-matrix." -SOLVER_ALIASES_PTRANS_I = [1, "1", "PTRANS-I", "ptrans-i"] -SOLVER_ALIASES_PTRANS_II = [2, "2", "PTRANS-II", "ptrans-ii"] +import templates # === Tests === +# the following series of decorators parametrize the tests for the pentadiagonal solver +# based on either Algorithm PTRANS-I or PTRANS-II in serial mode -@pytest.mark.parametrize("induce_error", [False, True]) -@pytest.mark.parametrize( - "solver_alias", SOLVER_ALIASES_PTRANS_I + SOLVER_ALIASES_PTRANS_II -) -@pytest.mark.parametrize("input_layout", ["full", "banded_row_wise", "banded_col_wise"]) -@pytest.mark.parametrize("n_rhs", [None, 1, 10]) -@pytest.mark.parametrize("n_rows", N_ROWS) -def test_pentapy_solvers( + +def test_pentapy_solvers_serial( n_rows: int, n_rhs: int, input_layout: Literal["full", "banded_row_wise", "banded_col_wise"], - solver_alias: Literal[1, "1", "PTRANS-I"], + solver_alias: Literal[ + 1, + "1", + "PTRANS-I", + "pTrAnS-I", + 2, + "2", + "PTRANS-II", + "pTrAnS-Ii", + ], induce_error: bool, + from_order: Literal["C", "F"], + workers: int, ) -> None: - """ - Tests the pentadiagonal solver based on Algorithm PTRANS-I when starting from - different input layouts, number of right-hand sides, number of rows, and also - when inducing an error by making the first diagonal element zero. - It has to be ensured that the edge case of ``n_rows = 3`` is also covered. - - """ - # first, a random pentadiagonal matrix is generated - mat_full = uf.gen_conditioned_rand_penta_matrix_dense( + templates.pentapy_solvers_template( n_rows=n_rows, - seed=SEED, - ill_conditioned=False, + n_rhs=n_rhs, + input_layout=input_layout, + solver_alias=solver_alias, + induce_error=induce_error, + from_order=from_order, + workers=workers, ) - # an error is induced by setting the first or last diagonal element to zero - if induce_error: - # the induction of the error is only possible if the matrix does not have - # only 3 rows - if n_rows == 3: - pytest.skip( - "Only 3 rows, cannot induce error because this will not go into " - "PTRANS-I, but NumPy." - ) - - if solver_alias in SOLVER_ALIASES_PTRANS_I: - mat_full[0, 0] = 0.0 - else: - mat_full[n_rows - 1, n_rows - 1] = 0.0 - - # the right-hand side is generated - np.random.seed(SEED) - if n_rhs is not None: - rhs = np.random.rand(n_rows, n_rhs) - result_shape = (n_rows, n_rhs) - else: - rhs = np.random.rand(n_rows) - result_shape = (n_rows,) - - # the matrix is converted to the desired layout - if input_layout == "full": - mat = mat_full - kwargs = dict(is_flat=False) - - elif input_layout == "banded_row_wise": - mat = pp.create_banded(mat_full, col_wise=False) - kwargs = dict( - is_flat=True, - index_row_wise=True, - ) - elif input_layout == "banded_col_wise": - mat = pp.create_banded(mat_full, col_wise=True) - kwargs = dict( - is_flat=True, - index_row_wise=False, - ) - - else: - raise ValueError(f"Invalid input layout: {input_layout}") - - # the solution is computed - # Case 1: in case of an error, a warning has to be issued and the result has to - # be NaN - if induce_error: - with pytest.warns(UserWarning, match=REF_WARNING_CONTENT): - sol = pp.solve( - mat=mat, - rhs=rhs, - solver=solver_alias, # type: ignore - workers=1, - **kwargs, - ) - assert sol.shape == result_shape - assert np.isnan(sol).all() - - return - - # Case 2: in case of no error, the solution can be computed without any issues - sol = pp.solve( - mat=mat, - rhs=rhs, - solver=solver_alias, # type: ignore - workers=1, - **kwargs, +for key, value in templates.PARAM_DICT.items(): + test_pentapy_solvers_serial = pytest.mark.parametrize(key, value)( + test_pentapy_solvers_serial ) - assert sol.shape == result_shape - - # if no error was induced, the reference solution is computed with SciPy - sol_ref = uf.solve_penta_matrix_dense_scipy( - mat=mat_full, - rhs=rhs, - ) - - # the solutions are compared - assert np.allclose(sol, sol_ref) From a0b33883290d1580ab12a8d5e019cb6753ab7242 Mon Sep 17 00:00:00 2001 From: MothNik Date: Sun, 9 Jun 2024 18:31:31 +0200 Subject: [PATCH 38/62] doc/style: [11] fully typed `tools`; improved documentation --- src/pentapy/tools.py | 154 ++++++++++++++++++++++++++++++------------- 1 file changed, 107 insertions(+), 47 deletions(-) diff --git a/src/pentapy/tools.py b/src/pentapy/tools.py index 4bb0b52..0e446b5 100644 --- a/src/pentapy/tools.py +++ b/src/pentapy/tools.py @@ -14,12 +14,21 @@ create_full """ -# pylint: disable=C0103 +# === Imports === + +from typing import Optional, Tuple, Type + import numpy as np +# === Functions === -def diag_indices(n, offset=0): + +def diag_indices( + n: int, + offset: int = 0, +) -> Tuple[np.ndarray, np.ndarray]: """ + Get indices for the main or minor diagonals of a matrix. This returns a tuple of indices that can be used to access the main @@ -28,17 +37,17 @@ def diag_indices(n, offset=0): Parameters ---------- - n : int + n : :class:`int` The size, along each dimension, of the arrays for which the returned indices can be used. - offset : int, optional - The diagonal offset. + offset : :class:`int`, default=0 + The diagonal offset. Default: 0 Returns ------- - idx : :class:`numpy.ndarray` + idx : :class:`numpy.ndarray` of shape (n - abs(offset),) row indices - idy : :class:`numpy.ndarray` + idy : :class:`numpy.ndarray` of shape (n - abs(offset),) col indices """ @@ -47,8 +56,15 @@ def diag_indices(n, offset=0): return idx, idy -def shift_banded(mat, up=2, low=2, col_to_row=True, copy=True): - """Shift rows of a banded matrix. +def shift_banded( + mat: np.ndarray, + up: int = 2, + low: int = 2, + col_to_row: bool = True, + copy: bool = True, +) -> np.ndarray: + """ + Shift rows of a banded matrix. Either from column-wise to row-wise storage or vice versa. @@ -83,27 +99,33 @@ def shift_banded(mat, up=2, low=2, col_to_row=True, copy=True): Parameters ---------- - mat : :class:`numpy.ndarray` + mat : :class:`numpy.ndarray` of shape (5, n) The Matrix or the flattened Version of the pentadiagonal matrix. - up : :class:`int` + up : :class:`int`, default=2 The number of upper minor-diagonals. Default: 2 - low : :class:`int` + low : :class:`int`, default=2 The number of lower minor-diagonals. Default: 2 - col_to_row : :class:`bool`, optional + col_to_row : :class:`bool`, default=``True`` Shift from column-wise to row-wise storage or vice versa. Default: ``True`` - copy : :class:`bool`, optional + copy : :class:`bool`, default=``True`` Copy the input matrix or overwrite it. Default: ``True`` Returns ------- - :class:`numpy.ndarray` + :class:`numpy.ndarray` of shape (5, n) Shifted banded matrix + """ + + # first, the matrix is copied if required if copy: mat_flat = np.copy(mat) else: mat_flat = mat + + # then, the shifting is performed + # Case 1: Column-wise to row-wise if col_to_row: for i in range(up): mat_flat[i, : -(up - i)] = mat_flat[i, (up - i) :] @@ -111,18 +133,29 @@ def shift_banded(mat, up=2, low=2, col_to_row=True, copy=True): for i in range(low): mat_flat[-i - 1, (low - i) :] = mat_flat[-i - 1, : -(low - i)] mat_flat[-i - 1, : (low - i)] = 0 - else: - for i in range(up): - mat_flat[i, (up - i) :] = mat_flat[i, : -(up - i)] - mat_flat[i, : (up - i)] = 0 - for i in range(low): - mat_flat[-i - 1, : -(low - i)] = mat_flat[-i - 1, (low - i) :] - mat_flat[-i - 1, -(low - i) :] = 0 + + return mat_flat + + # Case 2: Row-wise to column-wise + for i in range(up): + mat_flat[i, (up - i) :] = mat_flat[i, : -(up - i)] + mat_flat[i, : (up - i)] = 0 + for i in range(low): + mat_flat[-i - 1, : -(low - i)] = mat_flat[-i - 1, (low - i) :] + mat_flat[-i - 1, -(low - i) :] = 0 + return mat_flat -def create_banded(mat, up=2, low=2, col_wise=True, dtype=None): - """Create a banded matrix from a given quadratic Matrix. +def create_banded( + mat: np.ndarray, + up: int = 2, + low: int = 2, + col_wise: bool = True, + dtype: Optional[Type] = None, +) -> np.ndarray: + """ + Create a banded matrix from a given square Matrix. The Matrix will to be returned as a flattened matrix. Either in a column-wise flattened form:: @@ -155,21 +188,27 @@ def create_banded(mat, up=2, low=2, col_wise=True, dtype=None): Parameters ---------- - mat : :class:`numpy.ndarray` + mat : :class:`numpy.ndarray` of shape (n, n) The full (n x n) Matrix. - up : :class:`int` + up : :class:`int`, default=2 The number of upper minor-diagonals. Default: 2 - low : :class:`int` + low : :class:`int`, default=2 The number of lower minor-diagonals. Default: 2 - col_wise : :class:`bool`, optional + col_wise : :class:`bool`, default=``True`` Use column-wise storage. If False, use row-wise storage. Default: ``True`` + dtype : :class:`type` or ``None``, default=``None`` + The data type of the returned matrix. If ``None``, the data type of the + input matrix is preserved. Default: ``None`` Returns ------- - :class:`numpy.ndarray` + :class:`numpy.ndarray` of shape (5, n) Banded matrix + """ + + # first, the matrix is checked mat = np.asanyarray(mat) if mat.ndim != 2: msg = f"create_banded: matrix has to be 2D, got {mat.ndim}D" @@ -182,24 +221,36 @@ def create_banded(mat, up=2, low=2, col_wise=True, dtype=None): ) raise ValueError(msg) + # then, the matrix is created + dtype = mat.dtype if dtype is None else dtype size = mat.shape[0] - mat_flat = np.zeros((5, size), dtype=dtype) + mat_flat = np.zeros(shape=(5, size), dtype=dtype) mat_flat[up, :] = mat.diagonal() + # Case 1: Column-wise storage if col_wise: for i in range(up): mat_flat[i, (up - i) :] = mat.diagonal(up - i) for i in range(low): mat_flat[-i - 1, : -(low - i)] = mat.diagonal(-(low - i)) - else: - for i in range(up): - mat_flat[i, : -(up - i)] = mat.diagonal(up - i) - for i in range(low): - mat_flat[-i - 1, (low - i) :] = mat.diagonal(-(low - i)) + + return mat_flat + + # Case 2: Row-wise storage + for i in range(up): + mat_flat[i, : -(up - i)] = mat.diagonal(up - i) + for i in range(low): + mat_flat[-i - 1, (low - i) :] = mat.diagonal(-(low - i)) + return mat_flat -def create_full(mat, up=2, low=2, col_wise=True): +def create_full( + mat: np.ndarray, + up: int = 2, + low: int = 2, + col_wise: bool = True, +) -> np.ndarray: """Create a (n x n) Matrix from a given banded matrix. The given Matrix has to be a flattened matrix. @@ -233,21 +284,24 @@ def create_full(mat, up=2, low=2, col_wise=True): Parameters ---------- - mat : :class:`numpy.ndarray` + mat : :class:`numpy.ndarray` of shape (5, n) The flattened Matrix. - up : :class:`int` + up : :class:`int`, default=2 The number of upper minor-diagonals. Default: 2 - low : :class:`int` + low : :class:`int`, default=2 The number of lower minor-diagonals. Default: 2 - col_wise : :class:`bool`, optional + col_wise : :class:`bool`, default=``True`` Input is in column-wise storage. If False, use as row-wise storage. Default: ``True`` Returns ------- - :class:`numpy.ndarray` + :class:`numpy.ndarray` of shape (n, n) Full matrix. + """ + + # first, the matrix is checked mat = np.asanyarray(mat) if mat.ndim != 2: msg = f"create_full: matrix has to be 2D, got {mat.ndim}D" @@ -267,23 +321,29 @@ def create_full(mat, up=2, low=2, col_wise=True): ) raise ValueError(msg) + # then, the matrix is created size = mat.shape[1] mat_full = np.diag(mat[up]) + + # Case 1: Column-wise storage if col_wise: for i in range(up): mat_full[diag_indices(size, up - i)] = mat[i, (up - i) :] for i in range(low): mat_full[diag_indices(size, -(low - i))] = mat[-i - 1, : -(low - i)] - else: - for i in range(up): - mat_full[diag_indices(size, up - i)] = mat[i, : -(up - i)] - for i in range(low): - mat_full[diag_indices(size, -(low - i))] = mat[-i - 1, (low - i) :] + + return mat_full + + # Case 2: Row-wise storage + for i in range(up): + mat_full[diag_indices(size, up - i)] = mat[i, : -(up - i)] + for i in range(low): + mat_full[diag_indices(size, -(low - i))] = mat[-i - 1, (low - i) :] return mat_full -def _check_penta(mat): +def _check_penta(mat: np.ndarray) -> None: if mat.ndim != 2: msg = f"pentapy: matrix has to be 2D, got {mat.ndim}D" raise ValueError(msg) From b93102baba32f5d1d14282ac8f7dddf76cf3572d Mon Sep 17 00:00:00 2001 From: MothNik Date: Sun, 9 Jun 2024 18:32:03 +0200 Subject: [PATCH 39/62] package: [11] included build information for parallelized solvers --- setup.py | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/setup.py b/setup.py index fc8648c..8d7421d 100644 --- a/setup.py +++ b/setup.py @@ -1,12 +1,18 @@ """pentapy: A toolbox for pentadiagonal matrices.""" import os +import sys import Cython.Compiler.Options import numpy as np from Cython.Build import cythonize from setuptools import Extension, setup +if sys.platform.startswith("win"): + openmp_arg = "/openmp" +else: + openmp_arg = "-fopenmp" + Cython.Compiler.Options.annotate = True # cython extensions @@ -16,6 +22,8 @@ sources=[os.path.join("src", "pentapy", "solver.pyx")], include_dirs=[np.get_include()], define_macros=[("NPY_NO_DEPRECATED_API", "NPY_1_7_API_VERSION")], + extra_compile_args=[openmp_arg], + extra_link_args=[openmp_arg], ) ] From 3b015dc7615756b03796f791b79fbfefbe0f816b Mon Sep 17 00:00:00 2001 From: MothNik Date: Sun, 9 Jun 2024 18:55:37 +0200 Subject: [PATCH 40/62] tests: [11] finalised parallel solver tests --- tests/test_solvers_internal_parallel.py | 34 ++++++++++++++++++++++++- 1 file changed, 33 insertions(+), 1 deletion(-) diff --git a/tests/test_solvers_internal_parallel.py b/tests/test_solvers_internal_parallel.py index b8595b1..d091d8d 100644 --- a/tests/test_solvers_internal_parallel.py +++ b/tests/test_solvers_internal_parallel.py @@ -9,7 +9,7 @@ # === Imports === from copy import deepcopy -from typing import Literal +from typing import Literal, Optional, Type import pytest import templates @@ -57,3 +57,35 @@ def test_pentapy_solvers_parallel( test_pentapy_solvers_parallel = pytest.mark.parametrize(key, value)( test_pentapy_solvers_parallel ) + + +@pytest.mark.parametrize( + "workers, expected", [(0, None), (1, None), (-1, None), (-2, ValueError)] +) +def test_pentapy_solvers_parallel_different_workers( + workers: int, expected: Optional[Type[Exception]] +) -> None: + """ + Tests the parallel solver with different number of workers, which might be wrong. + + """ + + kwargs = dict( + n_rows=10, + n_rhs=1, + input_layout="full", + solver_alias=1, + induce_error=False, + from_order="C", + workers=workers, + ) + + # Case 1: the test should fail + if expected is not None: + with pytest.raises(expected): + templates.pentapy_solvers_template(**kwargs) # type: ignore + + return + + # Case 2: the test should pass + templates.pentapy_solvers_template(**kwargs) # type: ignore From 5e86a349e011bd19ce55443a0dd55bfd4b69a2b0 Mon Sep 17 00:00:00 2001 From: MothNik Date: Sun, 9 Jun 2024 19:15:21 +0200 Subject: [PATCH 41/62] tests/fix: [11] fixed inter-os-incompatibility of doctests --- tests/util_funcs.py | 27 +++++++++++++++------------ 1 file changed, 15 insertions(+), 12 deletions(-) diff --git a/tests/util_funcs.py b/tests/util_funcs.py index be9f3c1..f9d9074 100644 --- a/tests/util_funcs.py +++ b/tests/util_funcs.py @@ -196,6 +196,9 @@ def gen_conditioned_rand_penta_matrix_dense( Doctests -------- + >>> # Imports + >>> from math import log10 + >>> # 1) Generating a super small well-conditioned random pentadiagonal matrix >>> n_rows = 3 >>> seed = 19_031_977 @@ -215,8 +218,8 @@ def gen_conditioned_rand_penta_matrix_dense( >>> spla.bandwidth(mat) (2, 2) >>> # its condition number is computed and values below 1e10 can be considered good - >>> np.linalg.cond(mat) - 4.976880305142543 + >>> round(np.linalg.cond(mat), 2) + 4.98 >>> # 2) Generating a super small ill-conditioned random pentadiagonal matrix >>> mat = gen_conditioned_rand_penta_matrix_dense( @@ -235,8 +238,8 @@ def gen_conditioned_rand_penta_matrix_dense( (2, 2) >>> # its condition number is computed and its value should be close to the >>> # reciprocal floating point precision, i.e., ~1e16 - >>> np.linalg.cond(mat) - 1.493156437173682e+17 + >>> round(log10(np.linalg.cond(mat)), 2) + 17.17 >>> # 3) Generating a small well-conditioned random pentadiagonal matrix >>> n_rows = 7 @@ -260,8 +263,8 @@ def gen_conditioned_rand_penta_matrix_dense( >>> spla.bandwidth(mat) (2, 2) >>> # its condition number is computed and values below 1e10 can be considered good - >>> np.linalg.cond(mat) - 42.4847446467131 + >>> round(np.linalg.cond(mat), 2) + 42.48 >>> # 4) Generating a small ill-conditioned random pentadiagonal matrix >>> mat = gen_conditioned_rand_penta_matrix_dense( @@ -284,8 +287,8 @@ def gen_conditioned_rand_penta_matrix_dense( (2, 2) >>> # its condition number is computed and its value should be close to the >>> # reciprocal floating point precision, i.e., ~1e16 - >>> np.linalg.cond(mat) - 1.1079218802103074e+17 + >>> round(log10(np.linalg.cond(mat)), 2) + 17.04 >>> # 5) Generating a large well-conditioned random pentadiagonal matrix >>> n_rows = 1_000 @@ -301,8 +304,8 @@ def gen_conditioned_rand_penta_matrix_dense( >>> spla.bandwidth(mat) (2, 2) >>> # its condition number is computed and values below 1e10 can be considered good - >>> np.linalg.cond(mat) - 9570.995402466417 + >>> round(np.linalg.cond(mat), 2) + 9571.0 >>> # 6) Generating a large ill-conditioned random pentadiagonal matrix >>> mat = gen_conditioned_rand_penta_matrix_dense( @@ -317,8 +320,8 @@ def gen_conditioned_rand_penta_matrix_dense( (2, 2) >>> # its condition number is computed and its value should be close to the >>> # reciprocal floating point precision, i.e., ~1e16 - >>> np.linalg.cond(mat) - 1.7137059583101745e+19 + >>> round(log10(np.linalg.cond(mat)), 2) + 19.23 """ From 712b0c78d049e3810f6424ed7f2884c739f0e577 Mon Sep 17 00:00:00 2001 From: MothNik Date: Sun, 9 Jun 2024 19:22:53 +0200 Subject: [PATCH 42/62] test/fix: [11] ? really fixed the inter-os-problems ? --- tests/util_funcs.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/tests/util_funcs.py b/tests/util_funcs.py index f9d9074..0ea1582 100644 --- a/tests/util_funcs.py +++ b/tests/util_funcs.py @@ -320,8 +320,10 @@ def gen_conditioned_rand_penta_matrix_dense( (2, 2) >>> # its condition number is computed and its value should be close to the >>> # reciprocal floating point precision, i.e., ~1e16 - >>> round(log10(np.linalg.cond(mat)), 2) - 19.23 + >>> # NOTE: the next number will be so big that it will be different on each OS + >>> # so it will only be checked if it is greater than 1e16 + >>> round(log10(np.linalg.cond(mat)), 2) >= 16 + True """ From 6fd57d4d2840cd2f5b5642f9576d351c570b34cf Mon Sep 17 00:00:00 2001 From: MothNik Date: Sun, 9 Jun 2024 19:37:59 +0200 Subject: [PATCH 43/62] feat: [11] updated changelog --- CHANGELOG.md | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index d13b435..5891ee6 100755 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -2,6 +2,16 @@ All notable changes to **pentapy** will be documented in this file. +## [2.0.0] - 2024-06 + +See [#27](https://github.com/GeoStat-Framework/pentapy/pull/27) + +### Breaking Changes + +- fully parallelized the Cython implementation of PTRANS-I and PTRANS-II for single and multiple right-hand sides support that can now be enabled via the new ``workers`` parameter in ``pentapy.solve`` (default: 1) +- fully typed the ``pentapy.tools`` module +- updated the **Cython low level interfaces** to PTRANS-I and PTRANS-II to **only accept C-contiguous arrays** (not backwards compatible) + ## [1.4.0] - 2024-06 See [#26](https://github.com/GeoStat-Framework/pentapy/pull/26) @@ -133,6 +143,7 @@ This is the first release of pentapy, a python toolbox for solving pentadiagonal The solver is implemented in cython, which makes it really fast. +[2.0.0]: https://github.com/GeoStat-Framework/pentapy/compare/v1.4.0...v2.0.0 [1.4.0]: https://github.com/GeoStat-Framework/pentapy/compare/v1.3.0...v1.4.0 [1.3.0]: https://github.com/GeoStat-Framework/pentapy/compare/v1.2.0...v1.3.0 [1.2.0]: https://github.com/GeoStat-Framework/pentapy/compare/v1.1.2...v1.2.0 From 88e30e6efd697af0ee4e8b93db22a909ed335f98 Mon Sep 17 00:00:00 2001 From: MothNik Date: Sun, 9 Jun 2024 21:08:34 +0200 Subject: [PATCH 44/62] fix/doc: [11] reverted change that caused overwrite of `mat`; added a fucking comment; augmented tests to cover this error --- src/pentapy/core.py | 6 +++--- tests/templates.py | 4 ++++ tests/test_solvers_external.py | 7 ++++++- 3 files changed, 13 insertions(+), 4 deletions(-) diff --git a/src/pentapy/core.py b/src/pentapy/core.py index 87761a4..0794ba9 100644 --- a/src/pentapy/core.py +++ b/src/pentapy/core.py @@ -119,7 +119,7 @@ def solve( mat_flat = np.asarray(mat, dtype=np.double) ptools._check_penta(mat_flat) elif is_flat: - mat_flat = np.asarray(mat, dtype=np.double) + mat_flat = np.array(mat, dtype=np.double) # NOTE: this is a copy ptools._check_penta(mat_flat) ptools.shift_banded(mat_flat, copy=False) else: @@ -192,7 +192,7 @@ def solve( raise ValueError(msg) from imp_err if is_flat and index_row_wise: - mat_flat = np.asarray(mat) + mat_flat = np.array(mat) # NOTE: this is a copy ptools._check_penta(mat_flat) ptools.shift_banded(mat_flat, col_to_row=False, copy=False) elif is_flat: @@ -227,7 +227,7 @@ def solve( raise ValueError(msg) from imp_err if is_flat and index_row_wise: - mat_flat = np.asarray(mat) + mat_flat = np.array(mat) # NOTE: this is a copy ptools._check_penta(mat_flat) ptools.shift_banded(mat_flat, col_to_row=False, copy=False) elif is_flat: diff --git a/tests/templates.py b/tests/templates.py index fe5b316..d67d6cc 100644 --- a/tests/templates.py +++ b/tests/templates.py @@ -144,6 +144,7 @@ def pentapy_solvers_template( # be NaN if induce_error: with pytest.warns(UserWarning, match=REF_WARNING_CONTENT): + mat_ref_copy = mat.copy() sol = pp.solve( mat=mat, rhs=rhs, @@ -153,10 +154,12 @@ def pentapy_solvers_template( ) assert sol.shape == result_shape assert np.isnan(sol).all() + assert np.array_equal(mat, mat_ref_copy) return # Case 2: in case of no error, the solution can be computed without any issues + mat_ref_copy = mat.copy() sol = pp.solve( mat=mat, rhs=rhs, @@ -165,6 +168,7 @@ def pentapy_solvers_template( **kwargs, ) assert sol.shape == result_shape + assert np.array_equal(mat, mat_ref_copy) # if no error was induced, the reference solution is computed with SciPy sol_ref = uf.solve_penta_matrix_dense_scipy( diff --git a/tests/test_solvers_external.py b/tests/test_solvers_external.py index 30c7b9a..0f407aa 100644 --- a/tests/test_solvers_external.py +++ b/tests/test_solvers_external.py @@ -10,10 +10,11 @@ from typing import Literal import numpy as np -import pentapy as pp import pytest import util_funcs as uf +import pentapy as pp + # === Constants === SEED = 19_031_977 @@ -100,6 +101,7 @@ def test_external_solvers( # be NaN if induce_error: with pytest.warns(UserWarning, match=REF_WARNING_CONTENT): + mat_ref_copy = mat.copy() sol = pp.solve( mat=mat, rhs=rhs, @@ -108,10 +110,12 @@ def test_external_solvers( ) assert sol.shape == result_shape assert np.isnan(sol).all() + assert np.array_equal(mat, mat_ref_copy) return # Case 2: in case of no error, the solution can be computed without any issues + mat_ref_copy = mat.copy() sol = pp.solve( mat=mat, rhs=rhs, @@ -119,3 +123,4 @@ def test_external_solvers( **kwargs, ) assert sol.shape == result_shape + assert np.array_equal(mat, mat_ref_copy) From b4ce4a2a5a2adf8e0f838c6ea52d3f2a5037f926 Mon Sep 17 00:00:00 2001 From: MothNik Date: Sun, 9 Jun 2024 21:29:09 +0200 Subject: [PATCH 45/62] docs: [11] removed doubled defaults from docstrings --- src/pentapy/core.py | 10 +++++----- src/pentapy/tools.py | 24 ++++++++++++------------ 2 files changed, 17 insertions(+), 17 deletions(-) diff --git a/src/pentapy/core.py b/src/pentapy/core.py index 0794ba9..af338c8 100644 --- a/src/pentapy/core.py +++ b/src/pentapy/core.py @@ -79,11 +79,11 @@ def solve( rhs : :class:`numpy.ndarray` of shape (m,) or (m, n) The right hand side(s) of the equation system. Its shape determines the shape of the output as they will be identical. - is_flat : :class:`bool`, default=False + is_flat : :class:`bool`, optional State if the matrix is already flattened. Default: ``False`` - index_row_wise : :class:`bool`, default=True + index_row_wise : :class:`bool`, optional State if the flattened matrix is row-wise flattened. Default: ``True`` - solver : :class:`int` or :class:`str`, default=1 + solver : :class:`int` or :class:`str`, optional Which solver should be used. The following are provided: * ``[1, "1", "PTRANS-I"]`` : The PTRANS-I algorithm (default) @@ -93,7 +93,7 @@ def solve( * ``[5, "5", "spsolve_umf", "umf", "umf_pack"]`` : :func:`scipy.sparse.linalg.spsolve(..., use_umfpack=False)` Strings are not case-sensitive. - workers : :class:`int`, default=1 + workers : :class:`int`, optional Number of workers used in the PTRANS-I and PTRANS-II solvers for parallel processing of multiple right-hand sides. Parallelisation overhead can be significant for small systems. If set to ``-1``, the number of workers is @@ -119,7 +119,7 @@ def solve( mat_flat = np.asarray(mat, dtype=np.double) ptools._check_penta(mat_flat) elif is_flat: - mat_flat = np.array(mat, dtype=np.double) # NOTE: this is a copy + mat_flat = np.array(mat, dtype=np.double) # NOTE: this is a copy ptools._check_penta(mat_flat) ptools.shift_banded(mat_flat, copy=False) else: diff --git a/src/pentapy/tools.py b/src/pentapy/tools.py index 0e446b5..57d7f81 100644 --- a/src/pentapy/tools.py +++ b/src/pentapy/tools.py @@ -40,7 +40,7 @@ def diag_indices( n : :class:`int` The size, along each dimension, of the arrays for which the returned indices can be used. - offset : :class:`int`, default=0 + offset : :class:`int`, optional The diagonal offset. Default: 0 Returns @@ -101,14 +101,14 @@ def shift_banded( ---------- mat : :class:`numpy.ndarray` of shape (5, n) The Matrix or the flattened Version of the pentadiagonal matrix. - up : :class:`int`, default=2 + up : :class:`int`, optional The number of upper minor-diagonals. Default: 2 - low : :class:`int`, default=2 + low : :class:`int`, optional The number of lower minor-diagonals. Default: 2 - col_to_row : :class:`bool`, default=``True`` + col_to_row : :class:`bool`, optional Shift from column-wise to row-wise storage or vice versa. Default: ``True`` - copy : :class:`bool`, default=``True`` + copy : :class:`bool`, optional Copy the input matrix or overwrite it. Default: ``True`` Returns @@ -190,14 +190,14 @@ def create_banded( ---------- mat : :class:`numpy.ndarray` of shape (n, n) The full (n x n) Matrix. - up : :class:`int`, default=2 + up : :class:`int`, optional The number of upper minor-diagonals. Default: 2 - low : :class:`int`, default=2 + low : :class:`int`, optional The number of lower minor-diagonals. Default: 2 - col_wise : :class:`bool`, default=``True`` + col_wise : :class:`bool`, optional Use column-wise storage. If False, use row-wise storage. Default: ``True`` - dtype : :class:`type` or ``None``, default=``None`` + dtype : :class:`type` or ``None``, optional The data type of the returned matrix. If ``None``, the data type of the input matrix is preserved. Default: ``None`` @@ -286,11 +286,11 @@ def create_full( ---------- mat : :class:`numpy.ndarray` of shape (5, n) The flattened Matrix. - up : :class:`int`, default=2 + up : :class:`int`, optional The number of upper minor-diagonals. Default: 2 - low : :class:`int`, default=2 + low : :class:`int`, optional The number of lower minor-diagonals. Default: 2 - col_wise : :class:`bool`, default=``True`` + col_wise : :class:`bool`, optional Input is in column-wise storage. If False, use as row-wise storage. Default: ``True`` From e71ff9d4a26709d32359a9d33d1e6363da13dbc1 Mon Sep 17 00:00:00 2001 From: MothNik Date: Sun, 9 Jun 2024 21:36:35 +0200 Subject: [PATCH 46/62] docs: [11] fixed docstring and spelling inconsistencies --- src/pentapy/core.py | 2 +- src/pentapy/tools.py | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/src/pentapy/core.py b/src/pentapy/core.py index af338c8..98b07cd 100644 --- a/src/pentapy/core.py +++ b/src/pentapy/core.py @@ -48,7 +48,7 @@ def solve( """ Solver for a pentadiagonal system. - The matrix can be given as a full n x n matrix or as a flattened one. + The matrix can be given as a full (n x n) matrix or as a flattened one. The flattened matrix can be given in a row-wise flattened form:: [[Dup2[0] Dup2[1] Dup2[2] ... Dup2[N-2] 0 0 ] diff --git a/src/pentapy/tools.py b/src/pentapy/tools.py index 57d7f81..fc5b29e 100644 --- a/src/pentapy/tools.py +++ b/src/pentapy/tools.py @@ -28,7 +28,6 @@ def diag_indices( offset: int = 0, ) -> Tuple[np.ndarray, np.ndarray]: """ - Get indices for the main or minor diagonals of a matrix. This returns a tuple of indices that can be used to access the main @@ -251,7 +250,8 @@ def create_full( low: int = 2, col_wise: bool = True, ) -> np.ndarray: - """Create a (n x n) Matrix from a given banded matrix. + """ + Create an (n x n) Matrix from a given banded matrix. The given Matrix has to be a flattened matrix. Either in a column-wise flattened form:: From aef7d0f23694e9c37895df185b2aebd01d7ef089 Mon Sep 17 00:00:00 2001 From: MothNik Date: Mon, 10 Jun 2024 13:33:50 +0200 Subject: [PATCH 47/62] feat: [11] enabled future possible validation of the quality of the solve on C-level --- src/pentapy/core.py | 4 ++- src/pentapy/solver.pxd | 4 +++ src/pentapy/solver.pyx | 58 ++++++++++++++++++++++++++++++------------ 3 files changed, 49 insertions(+), 17 deletions(-) diff --git a/src/pentapy/core.py b/src/pentapy/core.py index 98b07cd..6ed1aaa 100644 --- a/src/pentapy/core.py +++ b/src/pentapy/core.py @@ -169,11 +169,13 @@ def solve( ) # if there was only a 1D right-hand side, the result has to be flattened - sol = solver_func( + sol, info = solver_func( # NOTE: info is for potential future validation np.ascontiguousarray(mat_flat), np.ascontiguousarray(rhs), workers, + False, # NOTE: this can enable validation in the future ) + if single_rhs: sol = sol.ravel() diff --git a/src/pentapy/solver.pxd b/src/pentapy/solver.pxd index 4fe6c1e..2fe2843 100644 --- a/src/pentapy/solver.pxd +++ b/src/pentapy/solver.pxd @@ -3,10 +3,14 @@ cdef double[::, ::1] c_penta_solver1( double[::, ::1] mat_flat, double[::, ::1] rhs, int workers, + bint validate, + int* info, ) cdef double[::, ::1] c_penta_solver2( double[::, ::1] mat_flat, double[::, ::1] rhs, int workers, + bint validate, + int* info, ) diff --git a/src/pentapy/solver.pyx b/src/pentapy/solver.pyx index fc2a9a5..53e8cd3 100644 --- a/src/pentapy/solver.pyx +++ b/src/pentapy/solver.pyx @@ -24,13 +24,23 @@ def penta_solver1( double[::, ::1] mat_flat, double[::, ::1] rhs, int workers, + bint validate, ): - return np.asarray( - c_penta_solver1( - mat_flat, - rhs, - workers, - ) + + # NOTE: info is defined to be overwritten for possible future validations + cdef int info + + return ( + np.asarray( + c_penta_solver1( + mat_flat, + rhs, + workers, + validate, + &info, + ) + ), + info, ) @@ -38,13 +48,23 @@ def penta_solver2( double[::, ::1] mat_flat, double[::, ::1] rhs, int workers, + bint validate, ): - return np.asarray( - c_penta_solver2( - mat_flat, - rhs, - workers, - ) + + # NOTE: info is defined to be overwritten for possible future validations + cdef int info + + return ( + np.asarray( + c_penta_solver2( + mat_flat, + rhs, + workers, + validate, + &info, + ) + ), + info, ) @@ -54,6 +74,8 @@ cdef double[::, ::1] c_penta_solver1( double[::, ::1] mat_flat, double[::, ::1] rhs, int workers, + bint validate, + int* info, ): """ Solves the pentadiagonal system of equations ``Ax = b`` with the matrix ``A`` and @@ -78,7 +100,7 @@ cdef double[::, ::1] c_penta_solver1( # === Solving the system of equations === # first, the matrix is factorized - c_penta_factorize_algo1( + c_penta_factorize_algo_1( &mat_flat[0, 0], mat_n_cols, &mat_factorized[0, 0], @@ -98,10 +120,11 @@ cdef double[::, ::1] c_penta_solver1( &result[0, iter_col], ) + info[0] = 0 return result -cdef void c_penta_factorize_algo1( +cdef void c_penta_factorize_algo_1( double* mat_flat, int64_t mat_n_cols, double* mat_factorized, @@ -312,6 +335,8 @@ cdef double[::, ::1] c_penta_solver2( double[::, ::1] mat_flat, double[::, ::1] rhs, int workers, + bint validate, + int* info, ): """ Solves the pentadiagonal system of equations ``Ax = b`` with the matrix ``A`` and @@ -336,7 +361,7 @@ cdef double[::, ::1] c_penta_solver2( # === Solving the system of equations === # first, the matrix is factorized - c_penta_factorize_algo2( + c_penta_factorize_algo_2( &mat_flat[0, 0], mat_n_cols, &mat_factorized[0, 0], @@ -356,9 +381,10 @@ cdef double[::, ::1] c_penta_solver2( &result[0, iter_col], ) + info[0] = 0 return result -cdef void c_penta_factorize_algo2( +cdef void c_penta_factorize_algo_2( double* mat_flat, int64_t mat_n_cols, double* mat_factorized, From 86ebf887cedd016d79dc27a7975da6228c982102 Mon Sep 17 00:00:00 2001 From: MothNik Date: Mon, 10 Jun 2024 14:03:01 +0200 Subject: [PATCH 48/62] refactor: [11] made internal `workers`-handling safer --- src/pentapy/core.py | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) diff --git a/src/pentapy/core.py b/src/pentapy/core.py index 6ed1aaa..87e6160 100644 --- a/src/pentapy/core.py +++ b/src/pentapy/core.py @@ -147,13 +147,21 @@ def solve( ) if workers == -1: - proc = psutil.Process() - workers = len(proc.cpu_affinity()) # type: ignore - del proc + # NOTE: the following will be overwritten by the number of available threads + workers = 999_999_999_999_999_999_999_999_999 elif workers == 0: workers = 1 + # the number of workers is limited to the number of available threads + proc = psutil.Process() + workers = min( + workers, + len(proc.cpu_affinity()), # type: ignore + ) + workers = max(workers, 1) + del proc + # if there is only a single right-hand side, it has to be reshaped to a 2D array # NOTE: this has to be reverted at the end single_rhs = rhs.ndim == 1 From b5f8b95e8b830e57d034270cd5a3cf0c193c695e Mon Sep 17 00:00:00 2001 From: MothNik Date: Mon, 10 Jun 2024 14:03:42 +0200 Subject: [PATCH 49/62] docs: [11] updated outdated `README` --- README.md | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index 6e5983a..33e7237 100644 --- a/README.md +++ b/README.md @@ -107,7 +107,8 @@ Have a look at the script: [``examples/03_perform_simple.py``](https://github.co ## Requirements: -- [NumPy >= 1.14.5](https://www.numpy.org) +- [NumPy >= 1.20.0](https://www.numpy.org) +- [psutil >= 5.8.0](https://psutil.readthedocs.io/en/latest/) (for parallelisation) ### Optional From 87264d71f71af7a49cda27222cc4e166b96df9de Mon Sep 17 00:00:00 2001 From: MothNik Date: Mon, 10 Jun 2024 18:35:04 +0200 Subject: [PATCH 50/62] refactor: [11] removed `validate` and leveraged `info`; reduced test load --- src/pentapy/core.py | 42 ++++++++++++++------------- src/pentapy/solver.pxd | 2 -- src/pentapy/solver.pyx | 64 ++++++++++++++++++++++++++++++++---------- tests/templates.py | 6 +--- 4 files changed, 73 insertions(+), 41 deletions(-) diff --git a/src/pentapy/core.py b/src/pentapy/core.py index 87e6160..f8134ac 100644 --- a/src/pentapy/core.py +++ b/src/pentapy/core.py @@ -169,29 +169,33 @@ def solve( if single_rhs: rhs = rhs[:, np.newaxis] - try: - solver_func = ( - psolver.penta_solver1 - if solver_inter == pmodels.PentaSolverAliases.PTRANS_I - else psolver.penta_solver2 - ) + # the respective solver is chosen ... + solver_func = ( + psolver.penta_solver1 + if solver_inter == pmodels.PentaSolverAliases.PTRANS_I + else psolver.penta_solver2 + ) - # if there was only a 1D right-hand side, the result has to be flattened - sol, info = solver_func( # NOTE: info is for potential future validation - np.ascontiguousarray(mat_flat), - np.ascontiguousarray(rhs), - workers, - False, # NOTE: this can enable validation in the future - ) + # ... and the solver is called + sol, info = solver_func( + np.ascontiguousarray(mat_flat), + np.ascontiguousarray(rhs), + workers, + ) - if single_rhs: - sol = sol.ravel() + # in case of failure, the solver will return NaNs and issue a warning + if info > 0: + warnings.warn( + f"pentapy: {solver_inter.name} solver encountered singular matrix at " + f"row index {info - 1}. Returning NaNs." + ) + sol = np.full(shape=rhs_og_shape, fill_value=np.nan) - return sol + # in case of success, the solution can be returned (reshaped if necessary) + if single_rhs: + sol = sol.ravel() - except ZeroDivisionError: - warnings.warn("pentapy: PTRANS-I not suitable for input-matrix.") - return np.full(shape=rhs_og_shape, fill_value=np.nan) + return sol # Case 2: LAPACK's banded solver elif solver_inter == pmodels.PentaSolverAliases.LAPACK: diff --git a/src/pentapy/solver.pxd b/src/pentapy/solver.pxd index 2fe2843..879dcf6 100644 --- a/src/pentapy/solver.pxd +++ b/src/pentapy/solver.pxd @@ -3,7 +3,6 @@ cdef double[::, ::1] c_penta_solver1( double[::, ::1] mat_flat, double[::, ::1] rhs, int workers, - bint validate, int* info, ) @@ -11,6 +10,5 @@ cdef double[::, ::1] c_penta_solver2( double[::, ::1] mat_flat, double[::, ::1] rhs, int workers, - bint validate, int* info, ) diff --git a/src/pentapy/solver.pyx b/src/pentapy/solver.pyx index 53e8cd3..be860c8 100644 --- a/src/pentapy/solver.pyx +++ b/src/pentapy/solver.pyx @@ -1,4 +1,4 @@ -# cython: language_level=3, boundscheck=False, wraparound=False, cdivision=False +# cython: language_level=3, boundscheck=False, wraparound=False, cdivision=True """ This is a solver linear equation systems with a penta-diagonal matrix, @@ -24,7 +24,6 @@ def penta_solver1( double[::, ::1] mat_flat, double[::, ::1] rhs, int workers, - bint validate, ): # NOTE: info is defined to be overwritten for possible future validations @@ -36,7 +35,6 @@ def penta_solver1( mat_flat, rhs, workers, - validate, &info, ) ), @@ -48,7 +46,6 @@ def penta_solver2( double[::, ::1] mat_flat, double[::, ::1] rhs, int workers, - bint validate, ): # NOTE: info is defined to be overwritten for possible future validations @@ -60,7 +57,6 @@ def penta_solver2( mat_flat, rhs, workers, - validate, &info, ) ), @@ -74,7 +70,6 @@ cdef double[::, ::1] c_penta_solver1( double[::, ::1] mat_flat, double[::, ::1] rhs, int workers, - bint validate, int* info, ): """ @@ -100,12 +95,16 @@ cdef double[::, ::1] c_penta_solver1( # === Solving the system of equations === # first, the matrix is factorized - c_penta_factorize_algo_1( + info[0] = c_penta_factorize_algo_1( &mat_flat[0, 0], mat_n_cols, &mat_factorized[0, 0], ) + # in case of a zero-division, the function exits early + if info[0] > 0: + return result + # then, all the right-hand sides are solved for iter_col in prange( rhs_n_cols, @@ -120,11 +119,10 @@ cdef double[::, ::1] c_penta_solver1( &result[0, iter_col], ) - info[0] = 0 return result -cdef void c_penta_factorize_algo_1( +cdef int c_penta_factorize_algo_1( double* mat_flat, int64_t mat_n_cols, double* mat_factorized, @@ -169,11 +167,19 @@ cdef void c_penta_factorize_algo_1( # === Factorization === + # NOTE: in the following mu is manually checked for zero-division to extract the + # proper value of ``info`` and exit early in case of failure; + # ``info`` is set to the row count where the error occured as for LAPACK ``pbtrf`` + # First row mu_i = mat_flat[mat_row_base_idx_2] + if mu_i == 0.0: + return 1 + al_i_minus_1 = mat_flat[mat_row_base_idx_1] / mu_i be_i_minus_1 = mat_flat[0] / mu_i + mat_factorized[0] = 0.0 mat_factorized[1] = mu_i mat_factorized[2] = 0.0 @@ -183,6 +189,9 @@ cdef void c_penta_factorize_algo_1( # Second row ga_i = mat_flat[mat_row_base_idx_3 + 1] mu_i = mat_flat[mat_row_base_idx_2 + 1] - al_i_minus_1 * ga_i + if mu_i == 0.0: + return 2 + al_i = (mat_flat[mat_row_base_idx_1 + 1] - be_i_minus_1 * ga_i) / mu_i be_i = mat_flat[1] / mu_i @@ -198,6 +207,8 @@ cdef void c_penta_factorize_algo_1( e_i = mat_flat[mat_row_base_idx_4 + iter_row] ga_i = mat_flat[mat_row_base_idx_3 + iter_row] - al_i_minus_1 * e_i mu_i = mat_flat[mat_row_base_idx_2 + iter_row] - be_i_minus_1 * e_i - al_i * ga_i + if mu_i == 0.0: + return iter_row + 1 al_i_plus_1 = (mat_flat[mat_row_base_idx_1 + iter_row] - be_i * ga_i) / mu_i al_i_minus_1 = al_i @@ -219,6 +230,9 @@ cdef void c_penta_factorize_algo_1( e_i = mat_flat[mat_row_base_idx_4 + mat_n_cols - 2] ga_i = mat_flat[mat_row_base_idx_3 + mat_n_cols - 2] - al_i_minus_1 * e_i mu_i = mat_flat[mat_row_base_idx_2 + mat_n_cols - 2] - be_i_minus_1 * e_i - al_i * ga_i + if mu_i == 0.0: + return mat_n_cols - 1 + al_i_plus_1 = (mat_flat[mat_row_base_idx_1 + mat_n_cols - 2] - be_i * ga_i) / mu_i mat_factorized[fact_curr_base_idx] = e_i @@ -231,6 +245,8 @@ cdef void c_penta_factorize_algo_1( e_i = mat_flat[mat_row_base_idx_4 + mat_n_cols - 1] ga_i = mat_flat[mat_row_base_idx_3 + mat_n_cols - 1] - al_i * e_i mu_i = mat_flat[mat_row_base_idx_2 + mat_n_cols - 1] - be_i * e_i - al_i_plus_1 * ga_i + if mu_i == 0.0: + return mat_n_cols mat_factorized[fact_curr_base_idx + 5] = e_i mat_factorized[fact_curr_base_idx + 6] = mu_i @@ -238,7 +254,7 @@ cdef void c_penta_factorize_algo_1( mat_factorized[fact_curr_base_idx + 8] = 0.0 mat_factorized[fact_curr_base_idx + 9] = 0.0 - return + return 0 cdef int c_solve_penta_from_factorize_algo_1( @@ -335,7 +351,6 @@ cdef double[::, ::1] c_penta_solver2( double[::, ::1] mat_flat, double[::, ::1] rhs, int workers, - bint validate, int* info, ): """ @@ -361,11 +376,13 @@ cdef double[::, ::1] c_penta_solver2( # === Solving the system of equations === # first, the matrix is factorized - c_penta_factorize_algo_2( + info[0] = c_penta_factorize_algo_2( &mat_flat[0, 0], mat_n_cols, &mat_factorized[0, 0], ) + if info[0] > 0: + return result # then, all the right-hand sides are solved for iter_col in prange( @@ -381,10 +398,9 @@ cdef double[::, ::1] c_penta_solver2( &result[0, iter_col], ) - info[0] = 0 return result -cdef void c_penta_factorize_algo_2( +cdef int c_penta_factorize_algo_2( double* mat_flat, int64_t mat_n_cols, double* mat_factorized, @@ -430,9 +446,16 @@ cdef void c_penta_factorize_algo_2( # === Factorization === + # NOTE: in the following ps is manually checked for zero-division to extract the + # proper value of ``info`` and exit early in case of failure; + # ``info`` is set to the row count where the error occured as for LAPACK ``pbtrf`` + # First row ps_i = mat_flat[mat_row_base_idx_2 + mat_n_cols - 1] + if ps_i == 0.0: + return mat_n_cols + si_i_plus_1 = mat_flat[mat_row_base_idx_3 + mat_n_cols - 1] / ps_i phi_i_plus_1 = mat_flat[mat_row_base_idx_4 + mat_n_cols - 1] / ps_i @@ -446,6 +469,9 @@ cdef void c_penta_factorize_algo_2( # Second row rho_i = mat_flat[mat_row_base_idx_1 + mat_n_cols - 2] ps_i = mat_flat[mat_row_base_idx_2 + mat_n_cols - 2] - si_i_plus_1 * rho_i + if ps_i == 0.0: + return mat_n_cols - 1 + si_i = (mat_flat[mat_row_base_idx_3 + mat_n_cols - 2] - phi_i_plus_1 * rho_i) / ps_i phi_i = mat_flat[mat_row_base_idx_4 + mat_n_cols - 2] / ps_i @@ -461,6 +487,9 @@ cdef void c_penta_factorize_algo_2( b_i = mat_flat[iter_row] rho_i = mat_flat[mat_row_base_idx_1 + iter_row] - si_i_plus_1 * b_i ps_i = mat_flat[mat_row_base_idx_2 + iter_row] - phi_i_plus_1 * b_i - si_i * rho_i + if ps_i == 0.0: + return iter_row + 1 + si_i_minus_1 = (mat_flat[mat_row_base_idx_3 + iter_row] - phi_i * rho_i) / ps_i si_i_plus_1 = si_i si_i = si_i_minus_1 @@ -479,6 +508,9 @@ cdef void c_penta_factorize_algo_2( b_i = mat_flat[1] rho_i = mat_flat[mat_row_base_idx_1 + 1] - si_i_plus_1 * b_i ps_i = mat_flat[mat_row_base_idx_2 + 1] - phi_i_plus_1 * b_i - si_i * rho_i + if ps_i == 0.0: + return 2 + si_i_minus_1 = (mat_flat[mat_row_base_idx_3 + 1] - phi_i * rho_i) / ps_i si_i_plus_1 = si_i si_i = si_i_minus_1 @@ -493,6 +525,8 @@ cdef void c_penta_factorize_algo_2( b_i = mat_flat[0] rho_i = mat_flat[mat_row_base_idx_1 + 0] - si_i_plus_1 * b_i ps_i = mat_flat[mat_row_base_idx_2 + 0] - phi_i * b_i - si_i * rho_i + if ps_i == 0.0: + return 1 mat_factorized[4] = b_i mat_factorized[3] = rho_i @@ -500,7 +534,7 @@ cdef void c_penta_factorize_algo_2( mat_factorized[1] = 0.0 mat_factorized[0] = 0.0 - return + return 0 cdef int c_solve_penta_from_factorize_algo_2( diff --git a/tests/templates.py b/tests/templates.py index d67d6cc..0c35226 100644 --- a/tests/templates.py +++ b/tests/templates.py @@ -16,7 +16,7 @@ # === Constants === SEED = 19_031_977 -REF_WARNING_CONTENT = "not suitable for input-matrix." +REF_WARNING_CONTENT = "singular matrix at row index" N_ROWS = [ 3, # important edge case 4, # important edge case @@ -27,12 +27,8 @@ 51, # odd 100, # ... 101, - 500, - 501, 1_000, 1_001, - 5_000, - 5_001, ] SOLVER_ALIASES_PTRANS_I = [1, "1", "pTrAnS-I"] SOLVER_ALIASES_PTRANS_II = [2, "2", "pTrAnS-Ii"] From 8db881fa3e2ae1cfd9874180b06c9a4132c17c98 Mon Sep 17 00:00:00 2001 From: MothNik Date: Mon, 10 Jun 2024 18:43:55 +0200 Subject: [PATCH 51/62] doc: [11] updated outdated index --- docs/source/index.rst | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/docs/source/index.rst b/docs/source/index.rst index 8433404..83ae30f 100644 --- a/docs/source/index.rst +++ b/docs/source/index.rst @@ -94,7 +94,8 @@ The performance plot was created with ``perfplot`` (`link = 1.14.5 `_ +- `Numpy >= 1.20.0 `_ +- `psutil >= 5.8.0 `_ Optional -------- From 37c2fbc615a2d4dea226d1bbf045a534546b8371 Mon Sep 17 00:00:00 2001 From: MothNik Date: Tue, 11 Jun 2024 20:18:24 +0200 Subject: [PATCH 52/62] refactor: [11] made Cython structure smarter for algorithm I --- src/pentapy/solver.pyx | 108 ++++++++++++++++++++++++++++++++--------- 1 file changed, 86 insertions(+), 22 deletions(-) diff --git a/src/pentapy/solver.pyx b/src/pentapy/solver.pyx index be860c8..c6379e5 100644 --- a/src/pentapy/solver.pyx +++ b/src/pentapy/solver.pyx @@ -11,6 +11,8 @@ implemented in Cython. import numpy as np cimport numpy as np + +from cython cimport view from cython.parallel import prange from libc.stdint cimport int64_t @@ -83,35 +85,98 @@ cdef double[::, ::1] c_penta_solver1( """ - # === Variable declarations === + # --- Initial checks --- - cdef int64_t mat_n_cols = mat_flat.shape[1] - cdef int64_t rhs_n_cols = rhs.shape[1] - cdef int64_t iter_col + # if the number of columns in the flattened matrix is not equal to the number of + # rows in the right-hand side, the function exits early to avoid memory errors + if mat_flat.shape[1] != rhs.shape[0]: + info[0] = -1 + return np.empty_like(rhs) - cdef double[::, ::1] result = np.empty(shape=(mat_n_cols, rhs_n_cols)) - cdef double[::, ::1] mat_factorized = np.empty(shape=(mat_n_cols, MAT_FACT_N_COLS)) - - # === Solving the system of equations === + # --- Solving the system of equations --- # first, the matrix is factorized - info[0] = c_penta_factorize_algo_1( - &mat_flat[0, 0], - mat_n_cols, - &mat_factorized[0, 0], + cdef double[::, ::1] mat_factorized = _c_interf_factorize_algo_1( + mat_flat, + info, ) # in case of a zero-division, the function exits early if info[0] > 0: - return result + return np.empty_like(rhs) # then, all the right-hand sides are solved + return _c_interf_factorize_solve_algo_1( + mat_factorized, + rhs, + workers, + ) + + + +cdef double[::, ::1] _c_interf_factorize_algo_1( + double[::, ::1] mat_flat, + int* info, +): + """ + This function serves as the interface that takes the memoryview of the flattened + matrix and returns the freshly allocated factorized matrix. + + """ + + # --- Variable declarations --- + + cdef int64_t mat_n_cols = mat_flat.shape[1] + tmp = view.array( + shape=(mat_n_cols, MAT_FACT_N_COLS), + itemsize=sizeof(double), + format="d", + ) + cdef double[::, ::1] mat_factorized = tmp + + # --- Factorization --- + + info[0] = _c_core_factorize_algo_1( + &mat_flat[0, 0], + mat_n_cols, + &mat_factorized[0, 0], + ) + + return mat_factorized + + +cdef double[::, ::1] _c_interf_factorize_solve_algo_1( + double[::, ::1] mat_factorized, + double[::, ::1] rhs, + int workers, +): + """ + This function serves as the interface that takes the factorized matrix and the + right-hand sides and returns the freshly allocated solution vector obtained by + solving the system of equations via backward substitution. + + """ + + # --- Variable declarations --- + + cdef int64_t mat_n_cols = mat_factorized.shape[0] + cdef int64_t rhs_n_cols = rhs.shape[1] + cdef int64_t iter_col + tmp = view.array( + shape=(mat_n_cols, rhs_n_cols), + itemsize=sizeof(double), + format="d", + ) + cdef double[::, ::1] result = tmp + + # --- Solving the system of equations --- + for iter_col in prange( rhs_n_cols, nogil=True, num_threads=workers, ): - c_solve_penta_from_factorize_algo_1( + _c_core_factorize_solve_algo_1( mat_n_cols, &mat_factorized[0, 0], &rhs[0, iter_col], @@ -121,8 +186,7 @@ cdef double[::, ::1] c_penta_solver1( return result - -cdef int c_penta_factorize_algo_1( +cdef int _c_core_factorize_algo_1( double* mat_flat, int64_t mat_n_cols, double* mat_factorized, @@ -154,7 +218,7 @@ cdef int c_penta_factorize_algo_1( """ - # === Variable declarations === + # --- Variable declarations --- cdef int64_t iter_row, fact_curr_base_idx cdef int64_t mat_row_base_idx_1 = mat_n_cols # base index for the second row @@ -257,7 +321,7 @@ cdef int c_penta_factorize_algo_1( return 0 -cdef int c_solve_penta_from_factorize_algo_1( +cdef int _c_core_factorize_solve_algo_1( int64_t mat_n_cols, double* mat_factorized, double* rhs_single, @@ -272,7 +336,7 @@ cdef int c_solve_penta_from_factorize_algo_1( """ - # === Variable declarations === + # --- Variable declarations --- cdef int64_t iter_row, fact_curr_base_idx, res_curr_base_idx cdef double ze_i, ze_i_minus_1, ze_i_plus_1 # zeta @@ -364,7 +428,7 @@ cdef double[::, ::1] c_penta_solver2( """ - # === Variable declarations === + # --- Variable declarations --- cdef int64_t mat_n_cols = mat_flat.shape[1] cdef int64_t rhs_n_cols = rhs.shape[1] @@ -433,7 +497,7 @@ cdef int c_penta_factorize_algo_2( """ - # === Variable declarations === + # --- Variable declarations --- cdef int64_t iter_row, fact_curr_base_idx cdef int64_t mat_row_base_idx_1 = mat_n_cols # base index for the second row @@ -553,7 +617,7 @@ cdef int c_solve_penta_from_factorize_algo_2( """ - # === Variable declarations === + # --- Variable declarations --- cdef int64_t iter_row, fact_curr_base_idx, res_curr_base_idx cdef double om_i, om_i_minus_1, om_i_plus_1 # omega From 896b1ccd67666a7a7c34ce954c0e64d7d26f5203 Mon Sep 17 00:00:00 2001 From: MothNik Date: Tue, 11 Jun 2024 21:25:48 +0200 Subject: [PATCH 53/62] wip: [11] made use of Enums for checks; prepared for unifying solver I and II --- src/pentapy/solver.pyx | 77 +++++++++++++++++++++++++++++------------- 1 file changed, 54 insertions(+), 23 deletions(-) diff --git a/src/pentapy/solver.pyx b/src/pentapy/solver.pyx index c6379e5..f31debe 100644 --- a/src/pentapy/solver.pyx +++ b/src/pentapy/solver.pyx @@ -17,8 +17,19 @@ from cython.parallel import prange from libc.stdint cimport int64_t +# === Constants === + cdef enum: MAT_FACT_N_COLS = 5 +cdef enum Solvers: + PTRRANS_1 = 1 + PTRRANS_2 = 2 + +cdef enum Infos: + SUCCESS = 0 + SHAPE_MISMATCH = -1 + WRONG_SOLVER = -2 + # === Main Python Interface === @@ -90,33 +101,37 @@ cdef double[::, ::1] c_penta_solver1( # if the number of columns in the flattened matrix is not equal to the number of # rows in the right-hand side, the function exits early to avoid memory errors if mat_flat.shape[1] != rhs.shape[0]: - info[0] = -1 + info[0] = Infos.SHAPE_MISMATCH return np.empty_like(rhs) # --- Solving the system of equations --- # first, the matrix is factorized - cdef double[::, ::1] mat_factorized = _c_interf_factorize_algo_1( + cdef double[::, ::1] mat_factorized = _c_interf_factorize_algo( mat_flat, info, + Solvers.PTRRANS_1, ) - # in case of a zero-division, the function exits early - if info[0] > 0: + # in case of an error during factorization, the function exits early + if info[0] != Infos.SUCCESS: return np.empty_like(rhs) # then, all the right-hand sides are solved - return _c_interf_factorize_solve_algo_1( + return _c_interf_factorize_solve_algo( mat_factorized, rhs, workers, + info, + Solvers.PTRRANS_1, ) -cdef double[::, ::1] _c_interf_factorize_algo_1( +cdef double[::, ::1] _c_interf_factorize_algo( double[::, ::1] mat_flat, int* info, + int solver, ): """ This function serves as the interface that takes the memoryview of the flattened @@ -136,19 +151,28 @@ cdef double[::, ::1] _c_interf_factorize_algo_1( # --- Factorization --- - info[0] = _c_core_factorize_algo_1( - &mat_flat[0, 0], - mat_n_cols, - &mat_factorized[0, 0], - ) + # the solver algorithm is chosen based on the input parameter + # Case 1: PTRRANS-I + if solver == Solvers.PTRRANS_1: + info[0] = _c_core_factorize_algo_1( + &mat_flat[0, 0], + mat_n_cols, + &mat_factorized[0, 0], + ) + + # Case 3: the wrong solver is chosen + else: + info[0] = Infos.WRONG_SOLVER return mat_factorized -cdef double[::, ::1] _c_interf_factorize_solve_algo_1( +cdef double[::, ::1] _c_interf_factorize_solve_algo( double[::, ::1] mat_factorized, double[::, ::1] rhs, int workers, + int* info, + int solver, ): """ This function serves as the interface that takes the factorized matrix and the @@ -171,18 +195,25 @@ cdef double[::, ::1] _c_interf_factorize_solve_algo_1( # --- Solving the system of equations --- - for iter_col in prange( - rhs_n_cols, - nogil=True, - num_threads=workers, - ): - _c_core_factorize_solve_algo_1( - mat_n_cols, - &mat_factorized[0, 0], - &rhs[0, iter_col], + # the solver algorithm is chosen based on the input parameter + # Case 1: PTRRANS-I + if solver == Solvers.PTRRANS_1: + for iter_col in prange( rhs_n_cols, - &result[0, iter_col], - ) + nogil=True, + num_threads=workers, + ): + info[0] = _c_core_factorize_solve_algo_1( + mat_n_cols, + &mat_factorized[0, 0], + &rhs[0, iter_col], + rhs_n_cols, + &result[0, iter_col], + ) + + # Case 3: the wrong solver is chosen + else: + info[0] = Infos.WRONG_SOLVER return result From 2a6daf4cda2eecafed6a4ff1eef273dbd4ef0536 Mon Sep 17 00:00:00 2001 From: MothNik Date: Tue, 11 Jun 2024 22:53:04 +0200 Subject: [PATCH 54/62] refactor/tests: [11] fully unified Cython implementations of the solvers I and II: - now, they share one common processing logic that - calls the solvers it needs depending on which solver was called Besides, a shape check was introduced and the `info`-variable was extended. All of this is now reflected in `core`, where the solvers and errors are not structured a bit better. --- src/pentapy/_models.py | 21 ++- src/pentapy/core.py | 139 +++++++++++++----- src/pentapy/errors.py | 37 +++++ src/pentapy/solver.pyx | 107 ++++++++------ tests/templates.py | 178 ++++++++++++++++++------ tests/test_solvers_internal_parallel.py | 52 ++++++- tests/test_solvers_internal_serial.py | 53 ++++++- 7 files changed, 460 insertions(+), 127 deletions(-) create mode 100644 src/pentapy/errors.py diff --git a/src/pentapy/_models.py b/src/pentapy/_models.py index c75eb8c..16119cd 100644 --- a/src/pentapy/_models.py +++ b/src/pentapy/_models.py @@ -11,12 +11,29 @@ # === Models === +class Infos(IntEnum): + """ + Defines the possible returns for ``info`` of the low level pentadiagonal solvers, + namely + + - ``SUCCESS``: the solver has successfully solved the system + - ``SHAPE_MISMATCH``: the shape of the input arrays is incorrect + - ``WRONG_SOLVER``: the solver alias is the solver alias is incorrect on C-level + (internal error, should not occur) + + """ + + SUCCESS = 0 + SHAPE_MISMATCH = -1 + WRONG_SOLVER = -2 + + class PentaSolverAliases(IntEnum): """ Defines all available solver aliases for pentadiagonal systems, namely - - ``PTRANS_I``: The PTRANS-I algorithm - - ``PTRANS_II``: The PTRANS-II algorithm + - ``PTRANS_I``: the PTRANS-I algorithm + - ``PTRANS_II``: the PTRANS-II algorithm - ``LAPACK``: Scipy's LAPACK solver :func:`scipy.linalg.solve_banded` - ``SUPER_LU``: Scipy's SuperLU solver :func:`scipy.sparse.linalg.spsolve(..., use_umfpack=False)` - ``UMFPACK``: Scipy's UMFpack solver :func:`scipy.sparse.linalg.spsolve(..., use_umfpack=True)` diff --git a/src/pentapy/core.py b/src/pentapy/core.py index f8134ac..da9fcbe 100644 --- a/src/pentapy/core.py +++ b/src/pentapy/core.py @@ -11,9 +11,52 @@ import psutil from pentapy import _models as pmodels +from pentapy import errors as perrors from pentapy import solver as psolver # type: ignore from pentapy import tools as ptools +# === Auxiliary functions === + + +def _get_num_workers(workers: int) -> int: + """ + Gets the number of available workers for the solver. + + Parameters + ---------- + workers : :class:`int` + Number of workers requested. + + Returns + ------- + workers : :class:`int` + Number of workers available. + + """ + + if workers < -1: + raise ValueError( + perrors.PentaPyErrorMessages.WRONG_WORKERS.format(workers=workers) + ) + + if workers == -1: + # NOTE: the following will be overwritten by the number of available threads + workers = 999_999_999_999_999_999_999_999_999 + + # the number of workers is limited between 1 and the number of available threads + # NOTE: the following does not count the number of total threads, but the number of + # threads available for the solver + proc = psutil.Process() + workers = min( + workers, + len(proc.cpu_affinity()), # type: ignore + ) + workers = max(workers, 1) + del proc + + return workers + + # === Solver === @@ -104,6 +147,14 @@ def solve( result : :class:`numpy.ndarray` of shape (m,) or (m, n) Solution of the equation system with the same shape as ``rhs``. + Raises + ------ + ValueError + If the number of workers is incorrect. + ValueError + If there is a shape mismatch between the number of equations in the left-hand + side matrix and the number of right-hand sides. + """ # first, the solver is converted to the internal name to avoid confusion @@ -132,35 +183,28 @@ def solve( # NOTE: this avoids memory leakage in the Cython-solver that will iterate over # at least 4 rows/columns no matter what if mat_flat.shape[1] == 3: - return np.linalg.solve( - a=ptools.create_full(mat_flat, col_wise=False), - b=rhs, - ) + if not mat_flat.shape[1] == rhs.shape[0]: + raise ValueError( + perrors.PentaPyErrorMessages.SHAPE_MISMATCH.format( + lhs_n_cols=mat_flat.shape[1], + rhs_n_rows=rhs.shape[0], + ) + ) + + try: + return np.linalg.solve( + a=ptools.create_full(mat_flat, col_wise=False), + b=rhs, + ) + except np.linalg.LinAlgError: + warnings.warn( + "pentapy: NumPy LAPACK dense solver encountered singular matrix." + ) + return np.full(shape=rhs.shape, fill_value=np.nan) # now, the number of workers for multithreading has to be determined if # necessary - # NOTE: the following does not count the number of total threads, but the number - # of threads available for the solver - if workers < -1: - raise ValueError( - f"pentapy.solve: workers has to be -1 or greater, not {workers=}" - ) - - if workers == -1: - # NOTE: the following will be overwritten by the number of available threads - workers = 999_999_999_999_999_999_999_999_999 - - elif workers == 0: - workers = 1 - - # the number of workers is limited to the number of available threads - proc = psutil.Process() - workers = min( - workers, - len(proc.cpu_affinity()), # type: ignore - ) - workers = max(workers, 1) - del proc + workers = _get_num_workers(workers) # if there is only a single right-hand side, it has to be reshaped to a 2D array # NOTE: this has to be reverted at the end @@ -183,19 +227,44 @@ def solve( workers, ) - # in case of failure, the solver will return NaNs and issue a warning - if info > 0: + print(f"{info=}") + + # in case of success, the solution can be returned (reshaped if necessary) + if info == pmodels.Infos.SUCCESS: + if single_rhs: + sol = sol.ravel() + + return sol + + # in case of a shape mismatch, an error will be raised + if info == pmodels.Infos.SHAPE_MISMATCH: + raise ValueError( + perrors.PentaPyErrorMessages.SHAPE_MISMATCH.format( + lhs_n_cols=mat_flat.shape[1], + rhs_n_rows=rhs_og_shape[0], + ) + ) + + # in case of a zero-division, the solver will return NaNs and issue a warning + elif info > pmodels.Infos.SUCCESS: warnings.warn( - f"pentapy: {solver_inter.name} solver encountered singular matrix at " - f"row index {info - 1}. Returning NaNs." + perrors.PentaPyErrorMessages.SINGULAR_MATRIX.format( + solver_inter_name=solver_inter.name, + row_idx=info - 1, + ) ) - sol = np.full(shape=rhs_og_shape, fill_value=np.nan) - # in case of success, the solution can be returned (reshaped if necessary) - if single_rhs: - sol = sol.ravel() + return np.full(shape=rhs_og_shape, fill_value=np.nan) - return sol + # in case of an internal error in determination of the solver, an error will be + # raised + elif info == pmodels.Infos.WRONG_SOLVER: # pragma: no cover + raise AssertionError(perrors.PentaPyErrorMessages.WRONG_SOLVER) + + # in case of an unknown error, an error will be raised + raise AssertionError( # pragma: no cover + perrors.PentaPyErrorMessages.UNKNOWN_ERROR + ) # Case 2: LAPACK's banded solver elif solver_inter == pmodels.PentaSolverAliases.LAPACK: diff --git a/src/pentapy/errors.py b/src/pentapy/errors.py new file mode 100644 index 0000000..0974816 --- /dev/null +++ b/src/pentapy/errors.py @@ -0,0 +1,37 @@ +""" +Auxiliary errors for the pentapy package. + +""" + +# === Imports === + +from enum import Enum + + +class PentaPyErrorMessages(str, Enum): + """ + Defines the possible error messages for the pentapy package, namely + + - ``WRONG_WORKERS``: the number of workers is incorrect + - ``SINGULAR_MATRIX``: the matrix is singular + - ``SHAPE_MISMATCH``: the shape of the input arrays is incorrect + - ``WRONG_SOLVER``: the solver alias is incorrect on C-level (internal error, + should not occur) + - ``UNKNOWN_ERROR``: an unknown error occurred + + """ + + WRONG_WORKERS = ( + "pentapy.solve: workers has to be -1 or greater, but got workers={workers}" + ) + SINGULAR_MATRIX = ( + "pentapy: {solver_inter_name} solver encountered singular matrix at " + "row index {row_idx}. Returning NaNs." + ) + SHAPE_MISMATCH = ( + "pentapy.solve: shape mismatch between the number of equations in the " + "left-hand side matrix ({lhs_n_cols}) and the number of right-hand sides " + "({rhs_n_rows})." + ) + WRONG_SOLVER = "pentapy.solve: failure in determining the solver internally." + UNKNOWN_ERROR = "pentapy.solve: unknown error in the pentadiagonal solver." diff --git a/src/pentapy/solver.pyx b/src/pentapy/solver.pyx index f31debe..32fdf77 100644 --- a/src/pentapy/solver.pyx +++ b/src/pentapy/solver.pyx @@ -107,7 +107,7 @@ cdef double[::, ::1] c_penta_solver1( # --- Solving the system of equations --- # first, the matrix is factorized - cdef double[::, ::1] mat_factorized = _c_interf_factorize_algo( + cdef double[::, ::1] mat_factorized = _c_interf_factorize( mat_flat, info, Solvers.PTRRANS_1, @@ -118,7 +118,7 @@ cdef double[::, ::1] c_penta_solver1( return np.empty_like(rhs) # then, all the right-hand sides are solved - return _c_interf_factorize_solve_algo( + return _c_interf_factorize_solve( mat_factorized, rhs, workers, @@ -128,7 +128,7 @@ cdef double[::, ::1] c_penta_solver1( -cdef double[::, ::1] _c_interf_factorize_algo( +cdef double[::, ::1] _c_interf_factorize( double[::, ::1] mat_flat, int* info, int solver, @@ -159,15 +159,24 @@ cdef double[::, ::1] _c_interf_factorize_algo( mat_n_cols, &mat_factorized[0, 0], ) + return mat_factorized + + # Case 2: PTRRANS-II + elif solver == Solvers.PTRRANS_2: + info[0] = _c_core_factorize_algo_2( + &mat_flat[0, 0], + mat_n_cols, + &mat_factorized[0, 0], + ) + return mat_factorized # Case 3: the wrong solver is chosen else: info[0] = Infos.WRONG_SOLVER - - return mat_factorized + return mat_factorized -cdef double[::, ::1] _c_interf_factorize_solve_algo( +cdef double[::, ::1] _c_interf_factorize_solve( double[::, ::1] mat_factorized, double[::, ::1] rhs, int workers, @@ -211,11 +220,30 @@ cdef double[::, ::1] _c_interf_factorize_solve_algo( &result[0, iter_col], ) + return result + + # Case 2: PTRRANS-II + elif solver == Solvers.PTRRANS_2: + for iter_col in prange( + rhs_n_cols, + nogil=True, + num_threads=workers, + ): + info[0] = _c_core_factorize_solve_algo_2( + mat_n_cols, + &mat_factorized[0, 0], + &rhs[0, iter_col], + rhs_n_cols, + &result[0, iter_col], + ) + + return result + # Case 3: the wrong solver is chosen else: info[0] = Infos.WRONG_SOLVER + return result - return result cdef int _c_core_factorize_algo_1( double* mat_flat, @@ -260,7 +288,7 @@ cdef int _c_core_factorize_algo_1( cdef double al_i, al_i_minus_1, al_i_plus_1 # alpha cdef double be_i, be_i_minus_1, be_i_plus_1 # beta - # === Factorization === + # --- Factorization --- # NOTE: in the following mu is manually checked for zero-division to extract the # proper value of ``info`` and exit early in case of failure; @@ -372,7 +400,7 @@ cdef int _c_core_factorize_solve_algo_1( cdef int64_t iter_row, fact_curr_base_idx, res_curr_base_idx cdef double ze_i, ze_i_minus_1, ze_i_plus_1 # zeta - # === Transformation === + # --- Transformation --- # first, the right-hand side is transformed into the vector ``zeta`` # First row @@ -419,7 +447,7 @@ cdef int _c_core_factorize_solve_algo_1( ) / mat_factorized[fact_curr_base_idx + 6] result_view[res_curr_base_idx + rhs_n_cols] = ze_i_plus_1 - # === Backward substitution === + # --- Backward substitution --- # The solution vector is calculated by backward substitution that overwrites the # right-hand side vector with the solution vector @@ -459,43 +487,38 @@ cdef double[::, ::1] c_penta_solver2( """ - # --- Variable declarations --- - - cdef int64_t mat_n_cols = mat_flat.shape[1] - cdef int64_t rhs_n_cols = rhs.shape[1] - cdef int64_t iter_col + # --- Initial checks --- - cdef double[::, ::1] result = np.empty(shape=(mat_n_cols, rhs_n_cols)) - cdef double[::, ::1] mat_factorized = np.empty(shape=(mat_n_cols, 5)) + # if the number of columns in the flattened matrix is not equal to the number of + # rows in the right-hand side, the function exits early to avoid memory errors + if mat_flat.shape[1] != rhs.shape[0]: + info[0] = Infos.SHAPE_MISMATCH + return np.empty_like(rhs) - # === Solving the system of equations === + # --- Solving the system of equations --- # first, the matrix is factorized - info[0] = c_penta_factorize_algo_2( - &mat_flat[0, 0], - mat_n_cols, - &mat_factorized[0, 0], + cdef double[::, ::1] mat_factorized = _c_interf_factorize( + mat_flat, + info, + Solvers.PTRRANS_2, ) - if info[0] > 0: - return result + + # in case of an error during factorization, the function exits early + if info[0] != Infos.SUCCESS: + return np.empty_like(rhs) # then, all the right-hand sides are solved - for iter_col in prange( - rhs_n_cols, - nogil=True, - num_threads=workers, - ): - c_solve_penta_from_factorize_algo_2( - mat_n_cols, - &mat_factorized[0, 0], - &rhs[0, iter_col], - rhs_n_cols, - &result[0, iter_col], - ) + return _c_interf_factorize_solve( + mat_factorized, + rhs, + workers, + info, + Solvers.PTRRANS_2, + ) - return result -cdef int c_penta_factorize_algo_2( +cdef int _c_core_factorize_algo_2( double* mat_flat, int64_t mat_n_cols, double* mat_factorized, @@ -539,7 +562,7 @@ cdef int c_penta_factorize_algo_2( cdef double si_i, si_i_minus_1, si_i_plus_1 # sigma cdef double phi_i, phi_i_minus_1, phi_i_plus_1 # phi - # === Factorization === + # --- Factorization --- # NOTE: in the following ps is manually checked for zero-division to extract the # proper value of ``info`` and exit early in case of failure; @@ -632,7 +655,7 @@ cdef int c_penta_factorize_algo_2( return 0 -cdef int c_solve_penta_from_factorize_algo_2( +cdef int _c_core_factorize_solve_algo_2( int64_t mat_n_cols, double* mat_factorized, double* rhs_single, @@ -653,7 +676,7 @@ cdef int c_solve_penta_from_factorize_algo_2( cdef int64_t iter_row, fact_curr_base_idx, res_curr_base_idx cdef double om_i, om_i_minus_1, om_i_plus_1 # omega - # === Transformation === + # --- Transformation --- # first, the right-hand side is transformed into the vector ``omega`` # First row @@ -708,7 +731,7 @@ cdef int c_solve_penta_from_factorize_algo_2( ) / mat_factorized[2] result_view[0] = om_i_minus_1 - # === Forward substitution === + # --- Forward substitution --- # The solution vector is calculated by forward substitution that overwrites the # right-hand side vector with the solution vector diff --git a/tests/templates.py b/tests/templates.py index 0c35226..2be97e1 100644 --- a/tests/templates.py +++ b/tests/templates.py @@ -5,18 +5,18 @@ # === Imports === -from typing import Literal +from typing import Dict, Literal import numpy as np +import pentapy as pp import pytest import util_funcs as uf -import pentapy as pp - # === Constants === SEED = 19_031_977 -REF_WARNING_CONTENT = "singular matrix at row index" +SINGULAR_WARNING_REF_CONTENT = "singular matrix at row index" +SHAPE_MISMATCH_ERROR_REF_CONTENT = "shape mismatch between the number of equations" N_ROWS = [ 3, # important edge case 4, # important edge case @@ -43,10 +43,69 @@ "workers": [1], } +# === Auxiliary functions === + + +def convert_matrix_to_layout( + mat: np.ndarray, + input_layout: Literal["full", "banded_row_wise", "banded_col_wise"], +) -> tuple[np.ndarray, Dict[str, bool]]: + """ + Converts a dense pentadiagonal matrix to the desired layout. + + """ + + if input_layout == "full": + return ( + mat, + dict(is_flat=False), + ) + + elif input_layout == "banded_row_wise": + return ( + pp.create_banded(mat, col_wise=False), + dict( + is_flat=True, + index_row_wise=True, + ), + ) + + elif input_layout == "banded_col_wise": + return ( + pp.create_banded(mat, col_wise=True), + dict( + is_flat=True, + index_row_wise=False, + ), + ) + + else: + raise ValueError(f"Invalid input layout: {input_layout}") + + +def convert_matrix_to_order( + mat: np.ndarray, + from_order: Literal["C", "F"], +) -> np.ndarray: + """ + Converts a dense pentadiagonal matrix to the desired order. + + """ + + if from_order == "C": + return np.ascontiguousarray(mat) + + elif from_order == "F": + return np.asfortranarray(mat) + + else: + raise ValueError(f"Invalid from order: {from_order=}") + + # === Templates === -def pentapy_solvers_template( +def pentapy_solvers_extended_template( n_rows: int, n_rhs: int, input_layout: Literal["full", "banded_row_wise", "banded_col_wise"], @@ -65,9 +124,9 @@ def pentapy_solvers_template( workers: int, ) -> None: """ - Tests the pentadiagonal solver based on Algorithm PTRANS-I when starting from - different input layouts, number of right-hand sides, number of rows, and also - when inducing an error by making the first diagonal element zero. + Tests the pentadiagonal solvers when starting from different input layouts, number + of right-hand sides, number of rows, and also when inducing an error by making the + first or last diagonal element exactly zero. It has to be ensured that the edge case of ``n_rows = 3`` is also covered. """ @@ -104,42 +163,17 @@ def pentapy_solvers_template( result_shape = (n_rows,) # the matrix is converted to the desired layout - if input_layout == "full": - mat = mat_full - kwargs = dict(is_flat=False) - - elif input_layout == "banded_row_wise": - mat = pp.create_banded(mat_full, col_wise=False) - kwargs = dict( - is_flat=True, - index_row_wise=True, - ) - - elif input_layout == "banded_col_wise": - mat = pp.create_banded(mat_full, col_wise=True) - kwargs = dict( - is_flat=True, - index_row_wise=False, - ) - - else: - raise ValueError(f"Invalid input layout: {input_layout}") + mat, kwargs = convert_matrix_to_layout(mat_full, input_layout) - # the matrix is converted to the desired order - if from_order == "C": - mat = np.ascontiguousarray(mat) - rhs = np.ascontiguousarray(rhs) - elif from_order == "F": - mat = np.asfortranarray(mat) - rhs = np.asfortranarray(rhs) - else: - raise ValueError(f"Invalid from order: {from_order=}") + # the left-hand side matrix and right-hand side is converted to the desired order + mat = convert_matrix_to_order(mat=mat, from_order=from_order) + rhs = convert_matrix_to_order(mat=rhs, from_order=from_order) # the solution is computed # Case 1: in case of an error, a warning has to be issued and the result has to # be NaN if induce_error: - with pytest.warns(UserWarning, match=REF_WARNING_CONTENT): + with pytest.warns(UserWarning, match=SINGULAR_WARNING_REF_CONTENT): mat_ref_copy = mat.copy() sol = pp.solve( mat=mat, @@ -148,9 +182,10 @@ def pentapy_solvers_template( workers=workers, **kwargs, ) - assert sol.shape == result_shape - assert np.isnan(sol).all() - assert np.array_equal(mat, mat_ref_copy) + + assert sol.shape == result_shape + assert np.isnan(sol).all() + assert np.array_equal(mat, mat_ref_copy) return @@ -174,3 +209,64 @@ def pentapy_solvers_template( # the solutions are compared assert np.allclose(sol, sol_ref) + + return + + +def pentapy_solvers_shape_mismatch_template( + n_rows: int, + n_rhs: int, + input_layout: Literal["full", "banded_row_wise", "banded_col_wise"], + solver_alias: Literal[ + 1, + "1", + "PTRANS-I", + "pTrAnS-I", + 2, + "2", + "PTRANS-II", + "pTrAnS-Ii", + ], + from_order: Literal["C", "F"], + workers: int, +) -> None: + """ + Tests the pentadiagonal solvers when the shape of the right-hand side is incorrect, + starting from different input layouts, number of right-hand sides, and number of + rows. + + """ + + # first, a random pentadiagonal matrix is generated + mat_full = uf.gen_conditioned_rand_penta_matrix_dense( + n_rows=n_rows, + seed=SEED, + ill_conditioned=False, + ) + + # the right-hand side is generated with a wrong shape (rows + 10) + np.random.seed(SEED) + if n_rhs is not None: + rhs = np.random.rand(n_rows + 10, n_rhs) + else: + rhs = np.random.rand(n_rows + 10) + + # the matrix is converted to the desired layout + mat, kwargs = convert_matrix_to_layout(mat_full, input_layout) + + # the left-hand side matrix and right-hand side is converted to the desired order + mat = convert_matrix_to_order(mat=mat, from_order=from_order) + rhs = convert_matrix_to_order(mat=rhs, from_order=from_order) + + # the solution is computed, but due to the wrong shape of the right-hand side, an + # error has to be raised + with pytest.raises(ValueError, match=SHAPE_MISMATCH_ERROR_REF_CONTENT): + pp.solve( + mat=mat, + rhs=rhs, + solver=solver_alias, # type: ignore + workers=workers, + **kwargs, + ) + + return diff --git a/tests/test_solvers_internal_parallel.py b/tests/test_solvers_internal_parallel.py index d091d8d..3775ffa 100644 --- a/tests/test_solvers_internal_parallel.py +++ b/tests/test_solvers_internal_parallel.py @@ -22,6 +22,8 @@ param_dict["from_order"] = ["C"] param_dict["workers"] = [-1] +# --- Extended solve test --- + def test_pentapy_solvers_parallel( n_rows: int, @@ -42,7 +44,7 @@ def test_pentapy_solvers_parallel( workers: int, ) -> None: - templates.pentapy_solvers_template( + templates.pentapy_solvers_extended_template( n_rows=n_rows, n_rhs=n_rhs, input_layout=input_layout, @@ -59,6 +61,9 @@ def test_pentapy_solvers_parallel( ) +# --- Different workers test --- + + @pytest.mark.parametrize( "workers, expected", [(0, None), (1, None), (-1, None), (-2, ValueError)] ) @@ -83,9 +88,50 @@ def test_pentapy_solvers_parallel_different_workers( # Case 1: the test should fail if expected is not None: with pytest.raises(expected): - templates.pentapy_solvers_template(**kwargs) # type: ignore + templates.pentapy_solvers_extended_template(**kwargs) # type: ignore return # Case 2: the test should pass - templates.pentapy_solvers_template(**kwargs) # type: ignore + templates.pentapy_solvers_extended_template(**kwargs) # type: ignore + + +# --- Shape mismatch test --- + + +def test_pentapy_solvers_shape_mismatch_parallel( + n_rows: int, + n_rhs: int, + input_layout: Literal["full", "banded_row_wise", "banded_col_wise"], + solver_alias: Literal[ + 1, + "1", + "PTRANS-I", + "pTrAnS-I", + 2, + "2", + "PTRANS-II", + "pTrAnS-Ii", + ], + from_order: Literal["C", "F"], + workers: int, +) -> None: + + templates.pentapy_solvers_shape_mismatch_template( + n_rows=n_rows, + n_rhs=n_rhs, + input_layout=input_layout, + solver_alias=solver_alias, + from_order=from_order, + workers=workers, + ) + + +params_dict_without_induce_error = deepcopy(templates.PARAM_DICT) +params_dict_without_induce_error["workers"] = [-1] +params_dict_without_induce_error.pop("induce_error") + +for key, value in params_dict_without_induce_error.items(): + test_pentapy_solvers_shape_mismatch_parallel = pytest.mark.parametrize(key, value)( + test_pentapy_solvers_shape_mismatch_parallel + ) diff --git a/tests/test_solvers_internal_serial.py b/tests/test_solvers_internal_serial.py index 6bed962..f1248f8 100644 --- a/tests/test_solvers_internal_serial.py +++ b/tests/test_solvers_internal_serial.py @@ -8,6 +8,7 @@ # === Imports === +from copy import deepcopy from typing import Literal import pytest @@ -19,7 +20,10 @@ # based on either Algorithm PTRANS-I or PTRANS-II in serial mode -def test_pentapy_solvers_serial( +# --- Extended solve test --- + + +def test_pentapy_solvers_extended_serial( n_rows: int, n_rhs: int, input_layout: Literal["full", "banded_row_wise", "banded_col_wise"], @@ -38,7 +42,7 @@ def test_pentapy_solvers_serial( workers: int, ) -> None: - templates.pentapy_solvers_template( + templates.pentapy_solvers_extended_template( n_rows=n_rows, n_rhs=n_rhs, input_layout=input_layout, @@ -50,6 +54,47 @@ def test_pentapy_solvers_serial( for key, value in templates.PARAM_DICT.items(): - test_pentapy_solvers_serial = pytest.mark.parametrize(key, value)( - test_pentapy_solvers_serial + test_pentapy_solvers_extended_serial = pytest.mark.parametrize(key, value)( + test_pentapy_solvers_extended_serial + ) + + +# --- Shape mismatch test --- + + +def test_pentapy_solvers_shape_mismatch_serial( + n_rows: int, + n_rhs: int, + input_layout: Literal["full", "banded_row_wise", "banded_col_wise"], + solver_alias: Literal[ + 1, + "1", + "PTRANS-I", + "pTrAnS-I", + 2, + "2", + "PTRANS-II", + "pTrAnS-Ii", + ], + from_order: Literal["C", "F"], + workers: int, +) -> None: + + templates.pentapy_solvers_shape_mismatch_template( + n_rows=n_rows, + n_rhs=n_rhs, + input_layout=input_layout, + solver_alias=solver_alias, + from_order=from_order, + workers=workers, + ) + + +params_dict_without_induce_error = deepcopy(templates.PARAM_DICT) +params_dict_without_induce_error.pop("induce_error") + + +for key, value in params_dict_without_induce_error.items(): + test_pentapy_solvers_shape_mismatch_serial = pytest.mark.parametrize(key, value)( + test_pentapy_solvers_shape_mismatch_serial ) From 549422d13fbb749a98e70abc5c307dbf8191d25c Mon Sep 17 00:00:00 2001 From: MothNik Date: Tue, 11 Jun 2024 23:14:17 +0200 Subject: [PATCH 55/62] refactor/tests: [11] split up cluttered ptrans-solving into dedicated auxiliary functions in `core`; added NumPy-solver error case test --- src/pentapy/core.py | 265 +++++++++++++++++++++++++++----------------- tests/templates.py | 11 +- 2 files changed, 171 insertions(+), 105 deletions(-) diff --git a/src/pentapy/core.py b/src/pentapy/core.py index da9fcbe..df1d152 100644 --- a/src/pentapy/core.py +++ b/src/pentapy/core.py @@ -57,7 +57,165 @@ def _get_num_workers(workers: int) -> int: return workers -# === Solver === +def _raise_ptrans_or_numpy_shape_mismatch_error( + mat_n_cols: int, + rhs_n_rows: int, +) -> None: + """ + Raises a shape mismatch error for the PTRANS solver or the NumPy dense solver. + + """ + + raise ValueError( + perrors.PentaPyErrorMessages.SHAPE_MISMATCH.format( + lhs_n_cols=mat_n_cols, + rhs_n_rows=rhs_n_rows, + ) + ) + + +def _handle_ptrans_info_complete_fail_cases( + info: int, + mat_n_cols: int, + rhs_n_rows: int, +) -> None: + """ + Handles the cases where the PTRANS solver by raising the appropriate error. + + """ + + # Case 1: shape mismatch + if info == pmodels.Infos.SHAPE_MISMATCH: + _raise_ptrans_or_numpy_shape_mismatch_error( + mat_n_cols=mat_n_cols, + rhs_n_rows=rhs_n_rows, + ) + + # Case 2: wrong solver + elif info == pmodels.Infos.WRONG_SOLVER: # pragma: no cover + raise AssertionError(perrors.PentaPyErrorMessages.WRONG_SOLVER) + + # Case 3: unknown error + # pragma: no cover + raise AssertionError(perrors.PentaPyErrorMessages.UNKNOWN_ERROR) + + +# === Auxiliary Solver Interfaces === + + +def _solve_with_numpy( + mat_flat: np.ndarray, + rhs: np.ndarray, +) -> np.ndarray: + """ + Solver for a pentadiagonal system using NumPy's dense LAPACK solver. + + """ + + # in case of a shape mismatch, an error will be raised + if not mat_flat.shape[1] == rhs.shape[0]: + _raise_ptrans_or_numpy_shape_mismatch_error( + mat_n_cols=mat_flat.shape[1], + rhs_n_rows=rhs.shape[0], + ) + + # then, the system is solved using NumPy's dense solver + try: + return np.linalg.solve( + a=ptools.create_full(mat_flat, col_wise=False), + b=rhs, + ) + + # in case of a singular matrix, a warning will be issued and NaNs will be returned + except np.linalg.LinAlgError: + warnings.warn("pentapy: NumPy LAPACK dense solver encountered singular matrix.") + return np.full(shape=rhs.shape, fill_value=np.nan) + + +def _solve_with_ptrans( + mat: np.ndarray, + rhs: np.ndarray, + is_flat: bool, + index_row_wise: bool, + workers: int, + solver_inter: pmodels.PentaSolverAliases, +) -> np.ndarray: # type: ignore + """ + Solver for a pentadiagonal system using one of the PTRANS algorithms. + + """ + + # the matrix is checked and shifted if necessary ... + if is_flat and index_row_wise: + mat_flat = np.asarray(mat, dtype=np.double) + ptools._check_penta(mat_flat) + elif is_flat: + mat_flat = np.array(mat, dtype=np.double) # NOTE: this is a copy + ptools._check_penta(mat_flat) + ptools.shift_banded(mat_flat, copy=False) + else: + mat_flat = ptools.create_banded(mat, col_wise=False, dtype=np.double) + + # ... followed by the conversion of the right-hand side + rhs = np.asarray(rhs, dtype=np.double) + + # Special case: Early exit when the matrix has only 3 rows/columns + # NOTE: this avoids memory leakage in the Cython-solver that will iterate over + # at least 4 rows/columns no matter what + if mat_flat.shape[1] == 3: + return _solve_with_numpy(mat_flat=mat_flat, rhs=rhs) + + # now, the number of workers for multithreading has to be determined if necessary + workers = _get_num_workers(workers) + + # if there is only a single right-hand side, it has to be reshaped to a 2D array + # NOTE: this has to be reverted at the end + single_rhs = rhs.ndim == 1 + rhs_og_shape = rhs.shape + if single_rhs: + rhs = rhs[:, np.newaxis] + + # the respective solver is chosen ... + solver_func = ( + psolver.penta_solver1 + if solver_inter == pmodels.PentaSolverAliases.PTRANS_I + else psolver.penta_solver2 + ) + + # ... and the solver is called + sol, info = solver_func( + np.ascontiguousarray(mat_flat), + np.ascontiguousarray(rhs), + workers, + ) + + # in case of success, the solution can be returned (reshaped if necessary) + if info == pmodels.Infos.SUCCESS: + if single_rhs: + sol = sol.ravel() + + return sol + + # in case of a singular matrix, a warning will be issued and NaNs will be returned + elif info > pmodels.Infos.SUCCESS: + warnings.warn( + perrors.PentaPyErrorMessages.SINGULAR_MATRIX.format( + solver_inter_name=pmodels.PentaSolverAliases.PTRANS_I.name, + row_idx=info - 1, + ) + ) + + return np.full(shape=rhs_og_shape, fill_value=np.nan) + + # in case of an error, the respective error will be raised + _handle_ptrans_info_complete_fail_cases( + info=info, + mat_n_cols=mat_flat.shape[1], + rhs_n_rows=rhs_og_shape[0], + ) + + +# === Main Solver Interface === def solve( @@ -165,105 +323,14 @@ def solve( pmodels.PentaSolverAliases.PTRANS_I, pmodels.PentaSolverAliases.PTRANS_II, }: - # the matrix is checked and shifted if necessary ... - if is_flat and index_row_wise: - mat_flat = np.asarray(mat, dtype=np.double) - ptools._check_penta(mat_flat) - elif is_flat: - mat_flat = np.array(mat, dtype=np.double) # NOTE: this is a copy - ptools._check_penta(mat_flat) - ptools.shift_banded(mat_flat, copy=False) - else: - mat_flat = ptools.create_banded(mat, col_wise=False, dtype=np.double) - - # ... followed by the conversion of the right-hand side - rhs = np.asarray(rhs, dtype=np.double) - - # Special case: Early exit when the matrix has only 3 rows/columns - # NOTE: this avoids memory leakage in the Cython-solver that will iterate over - # at least 4 rows/columns no matter what - if mat_flat.shape[1] == 3: - if not mat_flat.shape[1] == rhs.shape[0]: - raise ValueError( - perrors.PentaPyErrorMessages.SHAPE_MISMATCH.format( - lhs_n_cols=mat_flat.shape[1], - rhs_n_rows=rhs.shape[0], - ) - ) - - try: - return np.linalg.solve( - a=ptools.create_full(mat_flat, col_wise=False), - b=rhs, - ) - except np.linalg.LinAlgError: - warnings.warn( - "pentapy: NumPy LAPACK dense solver encountered singular matrix." - ) - return np.full(shape=rhs.shape, fill_value=np.nan) - - # now, the number of workers for multithreading has to be determined if - # necessary - workers = _get_num_workers(workers) - - # if there is only a single right-hand side, it has to be reshaped to a 2D array - # NOTE: this has to be reverted at the end - single_rhs = rhs.ndim == 1 - rhs_og_shape = rhs.shape - if single_rhs: - rhs = rhs[:, np.newaxis] - - # the respective solver is chosen ... - solver_func = ( - psolver.penta_solver1 - if solver_inter == pmodels.PentaSolverAliases.PTRANS_I - else psolver.penta_solver2 - ) - - # ... and the solver is called - sol, info = solver_func( - np.ascontiguousarray(mat_flat), - np.ascontiguousarray(rhs), - workers, - ) - - print(f"{info=}") - - # in case of success, the solution can be returned (reshaped if necessary) - if info == pmodels.Infos.SUCCESS: - if single_rhs: - sol = sol.ravel() - - return sol - - # in case of a shape mismatch, an error will be raised - if info == pmodels.Infos.SHAPE_MISMATCH: - raise ValueError( - perrors.PentaPyErrorMessages.SHAPE_MISMATCH.format( - lhs_n_cols=mat_flat.shape[1], - rhs_n_rows=rhs_og_shape[0], - ) - ) - - # in case of a zero-division, the solver will return NaNs and issue a warning - elif info > pmodels.Infos.SUCCESS: - warnings.warn( - perrors.PentaPyErrorMessages.SINGULAR_MATRIX.format( - solver_inter_name=solver_inter.name, - row_idx=info - 1, - ) - ) - - return np.full(shape=rhs_og_shape, fill_value=np.nan) - - # in case of an internal error in determination of the solver, an error will be - # raised - elif info == pmodels.Infos.WRONG_SOLVER: # pragma: no cover - raise AssertionError(perrors.PentaPyErrorMessages.WRONG_SOLVER) - # in case of an unknown error, an error will be raised - raise AssertionError( # pragma: no cover - perrors.PentaPyErrorMessages.UNKNOWN_ERROR + return _solve_with_ptrans( + mat=mat, + rhs=rhs, + is_flat=is_flat, + index_row_wise=index_row_wise, + workers=workers, + solver_inter=solver_inter, ) # Case 2: LAPACK's banded solver diff --git a/tests/templates.py b/tests/templates.py index 2be97e1..6d195a1 100644 --- a/tests/templates.py +++ b/tests/templates.py @@ -15,7 +15,7 @@ # === Constants === SEED = 19_031_977 -SINGULAR_WARNING_REF_CONTENT = "singular matrix at row index" +SINGULAR_WARNING_REF_CONTENT = "solver encountered singular matrix" SHAPE_MISMATCH_ERROR_REF_CONTENT = "shape mismatch between the number of equations" N_ROWS = [ 3, # important edge case @@ -129,6 +129,8 @@ def pentapy_solvers_extended_template( first or last diagonal element exactly zero. It has to be ensured that the edge case of ``n_rows = 3`` is also covered. + For ``n_rows = 3``, the error is induced by initialising a matrix of zeros. + """ # first, a random pentadiagonal matrix is generated @@ -143,12 +145,9 @@ def pentapy_solvers_extended_template( # the induction of the error is only possible if the matrix does not have # only 3 rows if n_rows == 3: - pytest.skip( - "Only 3 rows, cannot induce error because this will not go into " - "PTRANS-I, but NumPy." - ) + mat_full = np.zeros_like(mat_full) - if solver_alias in SOLVER_ALIASES_PTRANS_I: + elif solver_alias in SOLVER_ALIASES_PTRANS_I: mat_full[0, 0] = 0.0 else: mat_full[n_rows - 1, n_rows - 1] = 0.0 From a31fe718794a53ead17357670f7186adb917ce48 Mon Sep 17 00:00:00 2001 From: MothNik Date: Tue, 11 Jun 2024 23:29:21 +0200 Subject: [PATCH 56/62] fix: [11] fixed broken coverage of unknown error (was not skipped by pragma) --- src/pentapy/core.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/src/pentapy/core.py b/src/pentapy/core.py index df1d152..8607972 100644 --- a/src/pentapy/core.py +++ b/src/pentapy/core.py @@ -96,8 +96,10 @@ def _handle_ptrans_info_complete_fail_cases( raise AssertionError(perrors.PentaPyErrorMessages.WRONG_SOLVER) # Case 3: unknown error - # pragma: no cover - raise AssertionError(perrors.PentaPyErrorMessages.UNKNOWN_ERROR) + + raise AssertionError( # pragma: no cover + perrors.PentaPyErrorMessages.UNKNOWN_ERROR, + ) # === Auxiliary Solver Interfaces === From ac5759b48e5c26297c8f2e69e4163232f341ca23 Mon Sep 17 00:00:00 2001 From: MothNik Date: Wed, 12 Jun 2024 07:39:03 +0200 Subject: [PATCH 57/62] refactor: [11] made mu occupy the central column of the factorized matrix Reason: it is the main diagonal of the matrix L for A = LU just like ps is the main diagonal of U for PTRANS-II. So now, the main diagonals of the non unit triangular factors are always in the central column which makes the most sense. --- src/pentapy/solver.pyx | 50 +++++++++++++++++++++--------------------- 1 file changed, 25 insertions(+), 25 deletions(-) diff --git a/src/pentapy/solver.pyx b/src/pentapy/solver.pyx index 32fdf77..58cd00f 100644 --- a/src/pentapy/solver.pyx +++ b/src/pentapy/solver.pyx @@ -262,14 +262,14 @@ cdef int _c_core_factorize_algo_1( They are overwriting the memoryview ``mat_factorized`` as follows: ```bash - [[ * mu_0 * al_0 be_0 ] - [ * mu_1 ga_1 al_1 be_1 ] - [ e_2 mu_2 ga_2 al_2 be_2 ] + [[ * * mu_0 al_0 be_0 ] + [ * ga_1 mu_1 al_1 be_1 ] + [ e_2 ga_2 mu_2 al_2 be_2 ] ... - [ e_i mu_i ga_i al_i be_i ] - [ e_{n-3} mu_{n-3} ga_{n-3} al_{n-3} be_{n-3} ] ... - [ e_{n-2} mu_{n-2} ga_{n-2} al_{n-2} * ] - [ e_{n-1} mu_{n-1} ga_{n-1} * * ]] + [ e_i ga_i mu_i al_i be_i ] + [ e_{n-3} ga_{n-3} mu_{n-3} al_{n-3} be_{n-3} ] ... + [ e_{n-2} ga_{n-2} mu_{n-2} al_{n-2} * ] + [ e_{n-1} ga_{n-1} mu_{n-1} * * ]] ``` where the entries marked with ``*`` are not used by design, but overwritten with @@ -304,8 +304,8 @@ cdef int _c_core_factorize_algo_1( mat_factorized[0] = 0.0 - mat_factorized[1] = mu_i - mat_factorized[2] = 0.0 + mat_factorized[1] = 0.0 + mat_factorized[2] = mu_i mat_factorized[3] = al_i_minus_1 mat_factorized[4] = be_i_minus_1 @@ -319,8 +319,8 @@ cdef int _c_core_factorize_algo_1( be_i = mat_flat[1] / mu_i mat_factorized[5] = 0.0 - mat_factorized[6] = mu_i - mat_factorized[7] = ga_i + mat_factorized[6] = ga_i + mat_factorized[7] = mu_i mat_factorized[8] = al_i mat_factorized[9] = be_i @@ -342,8 +342,8 @@ cdef int _c_core_factorize_algo_1( be_i = be_i_plus_1 mat_factorized[fact_curr_base_idx] = e_i - mat_factorized[fact_curr_base_idx + 1] = mu_i - mat_factorized[fact_curr_base_idx + 2] = ga_i + mat_factorized[fact_curr_base_idx + 1] = ga_i + mat_factorized[fact_curr_base_idx + 2] = mu_i mat_factorized[fact_curr_base_idx + 3] = al_i mat_factorized[fact_curr_base_idx + 4] = be_i @@ -359,8 +359,8 @@ cdef int _c_core_factorize_algo_1( al_i_plus_1 = (mat_flat[mat_row_base_idx_1 + mat_n_cols - 2] - be_i * ga_i) / mu_i mat_factorized[fact_curr_base_idx] = e_i - mat_factorized[fact_curr_base_idx + 1] = mu_i - mat_factorized[fact_curr_base_idx + 2] = ga_i + mat_factorized[fact_curr_base_idx + 1] = ga_i + mat_factorized[fact_curr_base_idx + 2] = mu_i mat_factorized[fact_curr_base_idx + 3] = al_i_plus_1 mat_factorized[fact_curr_base_idx + 4] = 0.0 @@ -372,8 +372,8 @@ cdef int _c_core_factorize_algo_1( return mat_n_cols mat_factorized[fact_curr_base_idx + 5] = e_i - mat_factorized[fact_curr_base_idx + 6] = mu_i - mat_factorized[fact_curr_base_idx + 7] = ga_i + mat_factorized[fact_curr_base_idx + 6] = ga_i + mat_factorized[fact_curr_base_idx + 7] = mu_i mat_factorized[fact_curr_base_idx + 8] = 0.0 mat_factorized[fact_curr_base_idx + 9] = 0.0 @@ -405,11 +405,11 @@ cdef int _c_core_factorize_solve_algo_1( # first, the right-hand side is transformed into the vector ``zeta`` # First row - ze_i_minus_1 = rhs_single[0] / mat_factorized[1] + ze_i_minus_1 = rhs_single[0] / mat_factorized[2] result_view[0] = ze_i_minus_1 # Second row - ze_i = (rhs_single[rhs_n_cols] - ze_i_minus_1 * mat_factorized[7]) / mat_factorized[6] + ze_i = (rhs_single[rhs_n_cols] - ze_i_minus_1 * mat_factorized[6]) / mat_factorized[7] result_view[rhs_n_cols] = ze_i # Central rows @@ -420,8 +420,8 @@ cdef int _c_core_factorize_solve_algo_1( ze_i_plus_1 = ( rhs_single[res_curr_base_idx] - ze_i_minus_1 * mat_factorized[fact_curr_base_idx] - - ze_i * mat_factorized[fact_curr_base_idx + 2] - ) / mat_factorized[fact_curr_base_idx + 1] + - ze_i * mat_factorized[fact_curr_base_idx + 1] + ) / mat_factorized[fact_curr_base_idx + 2] ze_i_minus_1 = ze_i ze_i = ze_i_plus_1 result_view[res_curr_base_idx] = ze_i_plus_1 @@ -433,8 +433,8 @@ cdef int _c_core_factorize_solve_algo_1( ze_i_plus_1 = ( rhs_single[res_curr_base_idx] - ze_i_minus_1 * mat_factorized[fact_curr_base_idx] - - ze_i * mat_factorized[fact_curr_base_idx + 2] - ) / mat_factorized[fact_curr_base_idx + 1] + - ze_i * mat_factorized[fact_curr_base_idx + 1] + ) / mat_factorized[fact_curr_base_idx + 2] ze_i_minus_1 = ze_i ze_i = ze_i_plus_1 result_view[res_curr_base_idx] = ze_i_plus_1 @@ -443,8 +443,8 @@ cdef int _c_core_factorize_solve_algo_1( ze_i_plus_1 = ( rhs_single[res_curr_base_idx + rhs_n_cols] - ze_i_minus_1 * mat_factorized[fact_curr_base_idx + 5] - - ze_i * mat_factorized[fact_curr_base_idx + 7] - ) / mat_factorized[fact_curr_base_idx + 6] + - ze_i * mat_factorized[fact_curr_base_idx + 6] + ) / mat_factorized[fact_curr_base_idx + 7] result_view[res_curr_base_idx + rhs_n_cols] = ze_i_plus_1 # --- Backward substitution --- From 0945501405321a73f003f7406aee44d4afb328d9 Mon Sep 17 00:00:00 2001 From: MothNik Date: Sun, 21 Jul 2024 23:27:40 +0200 Subject: [PATCH 58/62] fix: - avoided performance pitfall of `except * nogil` in Cython implementation of the solvers - replaced this statement by `noexcept nogil` --- src/pentapy/solver.pyx | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/pentapy/solver.pyx b/src/pentapy/solver.pyx index 58cd00f..37328bc 100644 --- a/src/pentapy/solver.pyx +++ b/src/pentapy/solver.pyx @@ -386,7 +386,7 @@ cdef int _c_core_factorize_solve_algo_1( double* rhs_single, int64_t rhs_n_cols, double* result_view, -) except * nogil: +) noexcept nogil: """ Solves the pentadiagonal system of equations ``Ax = b`` with the factorized unit upper triangular matrix ``U`` and the right-hand side ``b``. @@ -661,7 +661,7 @@ cdef int _c_core_factorize_solve_algo_2( double* rhs_single, int64_t rhs_n_cols, double* result_view, -) except * nogil: +) noexcept nogil: """ Solves the pentadiagonal system of equations ``Ax = b`` with the factorized From d1433f949775a57ef3cf4a426f32dd2cd99d76a0 Mon Sep 17 00:00:00 2001 From: MothNik Date: Tue, 23 Jul 2024 19:35:51 +0200 Subject: [PATCH 59/62] refactor: - moved `pytest.ini` to `pyproject.toml` - hard-coded dependencies in `pyproject.toml` again and removed the dynamic link --- pyproject.toml | 38 ++++++++++++++++++++++++++++++++++---- pytest.ini | 2 -- requirements/all.txt | 2 -- requirements/base.txt | 2 -- requirements/check.txt | 4 ---- requirements/doc.txt | 8 -------- requirements/scipy.txt | 1 - requirements/test.txt | 4 ---- requirements/umfpack | 1 - 9 files changed, 34 insertions(+), 28 deletions(-) delete mode 100644 pytest.ini delete mode 100644 requirements/all.txt delete mode 100644 requirements/base.txt delete mode 100644 requirements/check.txt delete mode 100644 requirements/doc.txt delete mode 100644 requirements/scipy.txt delete mode 100644 requirements/test.txt delete mode 100644 requirements/umfpack diff --git a/pyproject.toml b/pyproject.toml index b26ae54..b12bf0f 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -14,7 +14,7 @@ name = "pentapy" authors = [{name = "Sebastian Müller", email = "info@geostat-framework.org"}] readme = "README.md" license = {text = "MIT"} -dynamic = ["version", "dependencies", "optional-dependencies"] +dynamic = ["version"] description = "pentapy: A toolbox for pentadiagonal matrizes." classifiers = [ "Development Status :: 5 - Production/Stable", @@ -35,10 +35,37 @@ classifiers = [ "Topic :: Scientific/Engineering", "Topic :: Utilities", ] +dependencies = [ + "numpy>=1.20.0", + "psutil>=5.8.0", +] -[tool.setuptools.dynamic] -dependencies = {file = ["requirements/base.txt"]} -optional-dependencies = {scipy = {file = ["requirements/scipy.txt"]}, umfpack = {file = ["requirements/umfpack.txt"]}, all = {file = ["requirements/all.txt"]}, doc = {file = ["requirements/doc.txt"]}, test = {file = ["requirements/test.txt"]}, check = {file = ["requirements/check.txt"]}} +[project.optional-dependencies] +scipy = ["scipy"] +umfpack = ["scikit-umfpack"] +all = ["scipy", "scikit-umfpack"] +doc = [ + "m2r2>=0.2.8", + "scipy>=1.1.0", + "matplotlib>=3", + "perfplot<0.9", + "numpydoc>=1.1", + "sphinx>=7", + "sphinx-gallery>=0.8", + "sphinx-rtd-theme>=2", +] +test = [ + "pytest>=8", + "pytest-cov>=3", + "pytest-xdist>=3", + "scipy>=1.1.0", +] +check = [ + "black>=24,<25", + "isort[colors]", + "pylint", + "cython-lint", +] [project.urls] Homepage = "https://github.com/GeoStat-Framework/pentapy" @@ -116,6 +143,9 @@ max-line-length = 120 max-attributes = 25 max-public-methods = 75 +[tool.pytest.ini_options] +addopts = "--doctest-modules" + [tool.cibuildwheel] # Switch to using build build-frontend = "build" diff --git a/pytest.ini b/pytest.ini deleted file mode 100644 index 2bed0f3..0000000 --- a/pytest.ini +++ /dev/null @@ -1,2 +0,0 @@ -[pytest] -addopts = --doctest-modules \ No newline at end of file diff --git a/requirements/all.txt b/requirements/all.txt deleted file mode 100644 index be8d325..0000000 --- a/requirements/all.txt +++ /dev/null @@ -1,2 +0,0 @@ -scikit-umfpack -scipy \ No newline at end of file diff --git a/requirements/base.txt b/requirements/base.txt deleted file mode 100644 index 0e77631..0000000 --- a/requirements/base.txt +++ /dev/null @@ -1,2 +0,0 @@ -numpy>=1.20.0 -psutil>=5.8.0 \ No newline at end of file diff --git a/requirements/check.txt b/requirements/check.txt deleted file mode 100644 index 4af46fc..0000000 --- a/requirements/check.txt +++ /dev/null @@ -1,4 +0,0 @@ -black>=24,<25 -isort[colors] -pylint -cython-lint \ No newline at end of file diff --git a/requirements/doc.txt b/requirements/doc.txt deleted file mode 100644 index c49be85..0000000 --- a/requirements/doc.txt +++ /dev/null @@ -1,8 +0,0 @@ -m2r2>=0.2.8 -scipy>=1.1.0 -matplotlib>=3 -perfplot<0.9 -numpydoc>=1.1 -sphinx>=7 -sphinx-gallery>=0.8 -sphinx-rtd-theme>=2 \ No newline at end of file diff --git a/requirements/scipy.txt b/requirements/scipy.txt deleted file mode 100644 index 9c61c73..0000000 --- a/requirements/scipy.txt +++ /dev/null @@ -1 +0,0 @@ -scipy \ No newline at end of file diff --git a/requirements/test.txt b/requirements/test.txt deleted file mode 100644 index 2f8c0c7..0000000 --- a/requirements/test.txt +++ /dev/null @@ -1,4 +0,0 @@ -pytest>=8 -pytest-cov>=3 -pytest-xdist>=3 -scipy>=1.1.0 \ No newline at end of file diff --git a/requirements/umfpack b/requirements/umfpack deleted file mode 100644 index a8630c1..0000000 --- a/requirements/umfpack +++ /dev/null @@ -1 +0,0 @@ -scikit-umfpack \ No newline at end of file From edb4f337defb7437ace62d8c8d15c699a061d758 Mon Sep 17 00:00:00 2001 From: MothNik Date: Tue, 23 Jul 2024 21:13:16 +0200 Subject: [PATCH 60/62] refactor: - made OpenMP-link in `setup.py` optional based on an environment variable - changed thread number evaluation to be OpenMP-based (if at all) - dropped `psutil` dependency - renamed `workers` to `num_threads` and made `None` a possible option and the default - adapted tests to the new `num_threads` - updated documentation - adapted to `pylint` comments --- CHANGELOG.md | 51 ++++----- README.md | 1 - docs/source/index.rst | 1 - pyproject.toml | 2 +- setup.py | 44 ++++++-- src/pentapy/core.py | 141 +++++++++--------------- src/pentapy/errors.py | 4 - src/pentapy/solver.pxd | 4 +- src/pentapy/solver.pyx | 55 ++++++--- tests/templates.py | 12 +- tests/test_solvers_internal_parallel.py | 37 +++---- tests/test_solvers_internal_serial.py | 8 +- 12 files changed, 177 insertions(+), 183 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 5891ee6..a4be735 100755 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -8,141 +8,138 @@ See [#27](https://github.com/GeoStat-Framework/pentapy/pull/27) ### Breaking Changes -- fully parallelized the Cython implementation of PTRANS-I and PTRANS-II for single and multiple right-hand sides support that can now be enabled via the new ``workers`` parameter in ``pentapy.solve`` (default: 1) -- fully typed the ``pentapy.tools`` module +- fully parallelized the Cython implementation of PTRANS-I and PTRANS-II for single and multiple right-hand sides support that can now be enabled via the new `num_threads` parameter in `pentapy.solve` (default: 1) - updated the **Cython low level interfaces** to PTRANS-I and PTRANS-II to **only accept C-contiguous arrays** (not backwards compatible) -## [1.4.0] - 2024-06 - -See [#26](https://github.com/GeoStat-Framework/pentapy/pull/26) - ### Enhancements -- added support for multiple right-hand sides (currently serial) - improved error handling and added debug information to error messages ### Changes -- shotgun refactored and documented the Cython implementation of PTRANS-I and PTRANS-II for single and multiple right-hand sides support -- fully typed the function ``pentapy.solve`` -- made internal solver alias handling of ``pentapy.solve`` smarter, more robust, and removed all duplicate code +- refactored and documented the Cython implementation of PTRANS-I and PTRANS-II for single and multiple right-hand sides support +- fully typed the `pentapy.tools` module and the function ``pentapy.solve` +- made internal solver alias handling of `pentapy.solve` smarter, more robust, and removed all duplicate code - gave all solvers a consistent interface -- made code in ``pentapy.core`` more human-readable and maintainable and added comments +- made code in ``pentapy.core` `more maintainable - fixed typos in documentation ### Bugfixes - fixed error handling in case of zero-division to trigger dead error handling branch (see [Issue 23](https://github.com/GeoStat-Framework/pentapy/issues/23)) -- fixed edge case error for row/column of 3 (see [Issue 24](https://github.com/GeoStat-Framework/pentapy/issues/24)) +- fixed edge case error for row/column count of 3 (see [Issue 24](https://github.com/GeoStat-Framework/pentapy/issues/24)) ### Tests -- transitioned from ``unittest``-based testing to fully ``pytest``-based testing with parametrized and parallelized exhaustive testing (see [Issue 25](https://github.com/GeoStat-Framework/pentapy/issues/25)) +- transitioned from `unittest`-based testing to fully `pytest`-based testing with parametrized and parallelized exhaustive testing (see [Issue 25](https://github.com/GeoStat-Framework/pentapy/issues/25)) - made actual tests more meaningful by comparing them to LAPACK as reference standard (see [Issue 25](https://github.com/GeoStat-Framework/pentapy/issues/25)) -- included external solver bindings accessible via ``pentapy.solve`` as part of the test suite +- included external solver bindings accessible via `pentapy.solve` as part of the test suite - increased true coverage (not line-hit coverage) close to 100% -### Packaging - -- made dependency specification file-based and dynamic - ## [1.3.0] - 2024-04 See [#21](https://github.com/GeoStat-Framework/pentapy/pull/21) ### Enhancements + - added support for python 3.12 - added support for numpy 2 - build extensions with numpy 2 and cython 3 ### Changes + - dropped python 3.7 support - dropped 32bit builds - linted cython files - increase maximal line length to 88 (black default) - ## [1.2.0] - 2023-04 See [#19](https://github.com/GeoStat-Framework/pentapy/pull/19) ### Enhancements + - added support for python 3.10 and 3.11 - add wheels for arm64 systems - created `solver.pxd` file to be able to cimport the solver module - added a `CITATION.bib` file ### Changes + - move to `src/` based package structure - dropped python 3.6 support - move meta-data to pyproject.toml - simplified documentation ### Bugfixes + - determine correct version when installing from archive ## [1.1.2] - 2021-07 ### Changes + - new package structure with `pyproject.toml` ([#15](https://github.com/GeoStat-Framework/pentapy/pull/15)) - Sphinx-Gallery for Examples - Repository restructuring: use a single `main` branch - use `np.asarray` in `solve` to speed up computation ([#17](https://github.com/GeoStat-Framework/pentapy/pull/17)) - ## [1.1.1] - 2021-02 ### Enhancements + - Python 3.9 support ### Changes -- GitHub Actions for CI +- GitHub Actions for CI ## [1.1.0] - 2020-03-22 ### Enhancements + - Python 3.8 support ### Changes + - python only builds are no longer available - Python 2.7 and 3.4 support dropped - ## [1.0.3] - 2019-11-10 ### Enhancements + - the algorithms `PTRANS-I` and `PTRANS-II` now raise a warning when they can not solve the given system - there are now switches to install scipy and umf solvers as extra requirements ### Bugfixes -- multiple minor bugfixes +- multiple minor bugfixes ## [1.0.0] - 2019-09-18 ### Enhancements -- the second algorithm `PTRANS-II` from *Askar et al. 2015* is now implemented and can be used by `solver=2` + +- the second algorithm `PTRANS-II` from _Askar et al. 2015_ is now implemented and can be used by `solver=2` - the package is now tested and a coverage is calculated - there are now pre-built binaries for Python 3.7 - the documentation is now available under https://geostat-framework.readthedocs.io/projects/pentapy ### Changes -- pentapy is now licensed under the MIT license +- pentapy is now licensed under the MIT license ## [0.1.1] - 2019-03-08 ### Bugfixes -- MANIFEST.in was missing in the 0.1.0 version +- MANIFEST.in was missing in the 0.1.0 version ## [0.1.0] - 2019-03-07 This is the first release of pentapy, a python toolbox for solving pentadiagonal linear equation systems. The solver is implemented in cython, which makes it really fast. - [2.0.0]: https://github.com/GeoStat-Framework/pentapy/compare/v1.4.0...v2.0.0 [1.4.0]: https://github.com/GeoStat-Framework/pentapy/compare/v1.3.0...v1.4.0 [1.3.0]: https://github.com/GeoStat-Framework/pentapy/compare/v1.2.0...v1.3.0 diff --git a/README.md b/README.md index 33e7237..068ab5b 100644 --- a/README.md +++ b/README.md @@ -108,7 +108,6 @@ Have a look at the script: [``examples/03_perform_simple.py``](https://github.co ## Requirements: - [NumPy >= 1.20.0](https://www.numpy.org) -- [psutil >= 5.8.0](https://psutil.readthedocs.io/en/latest/) (for parallelisation) ### Optional diff --git a/docs/source/index.rst b/docs/source/index.rst index 83ae30f..93d9a5d 100644 --- a/docs/source/index.rst +++ b/docs/source/index.rst @@ -95,7 +95,6 @@ Requirements ============ - `Numpy >= 1.20.0 `_ -- `psutil >= 5.8.0 `_ Optional -------- diff --git a/pyproject.toml b/pyproject.toml index b12bf0f..57bed29 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -4,6 +4,7 @@ requires = [ "setuptools_scm>=7", "numpy>=2.0.0rc1,<2.3; python_version >= '3.9'", "oldest-supported-numpy; python_version < '3.9'", + "extension-helpers>=1", "Cython>=3.0.10,<3.1.0", ] build-backend = "setuptools.build_meta" @@ -37,7 +38,6 @@ classifiers = [ ] dependencies = [ "numpy>=1.20.0", - "psutil>=5.8.0", ] [project.optional-dependencies] diff --git a/setup.py b/setup.py index 8d7421d..0c23078 100644 --- a/setup.py +++ b/setup.py @@ -1,19 +1,24 @@ """pentapy: A toolbox for pentadiagonal matrices.""" +# === Imports === + import os -import sys -import Cython.Compiler.Options import numpy as np from Cython.Build import cythonize +from extension_helpers import add_openmp_flags_if_available from setuptools import Extension, setup -if sys.platform.startswith("win"): - openmp_arg = "/openmp" -else: - openmp_arg = "-fopenmp" +# === Constants === -Cython.Compiler.Options.annotate = True +# the environment variable key for the build of the serial/parallel version +PENTAPY_BUILD_PARALLEL = "PENTAPY_BUILD_PARALLEL" +# the compiler flags for the OpenMP parallelization +OPENMP = "OPENMP" +# the number of threads for the Cython build +CYTHON_BUILD_NUM_THREADS = 1 + +# === Setup === # cython extensions CY_MODULES = [ @@ -22,13 +27,32 @@ sources=[os.path.join("src", "pentapy", "solver.pyx")], include_dirs=[np.get_include()], define_macros=[("NPY_NO_DEPRECATED_API", "NPY_1_7_API_VERSION")], - extra_compile_args=[openmp_arg], - extra_link_args=[openmp_arg], ) ] +# the OpenMP link is added if available/requested +# the environment variables can be PENTAPY_BUILD_PARALLEL = 0 (builds serial version) or +# PENTAPY_BUILD_PARALLEL != 0 (builds parallel version) +with_open_mp = False +if int(os.environ.get(PENTAPY_BUILD_PARALLEL, "0")): + openmp_added = [add_openmp_flags_if_available(mod) for mod in CY_MODULES] + with_open_mp = any(openmp_added) + if with_open_mp: + open_mp_str = "linking OpenMP (parallel version)" + else: + open_mp_str = "not linking OpenMP (serial version)" + + print(f"PENTAPY SETUP - {open_mp_str}") + +else: + print("PENTAPY SETUP - OpenMP not requested (serial version)") + setup( - ext_modules=cythonize(CY_MODULES, nthreads=1, annotate=True), + ext_modules=cythonize( + CY_MODULES, + nthreads=CYTHON_BUILD_NUM_THREADS, + compile_time_env={OPENMP: with_open_mp}, + ), package_data={"pentapy": ["*.pxd"]}, # include pxd files include_package_data=False, # ignore other files zip_safe=False, diff --git a/src/pentapy/core.py b/src/pentapy/core.py index 8607972..4126a9f 100644 --- a/src/pentapy/core.py +++ b/src/pentapy/core.py @@ -5,56 +5,40 @@ # === Imports === import warnings -from typing import Literal +from typing import Literal, Optional import numpy as np -import psutil from pentapy import _models as pmodels from pentapy import errors as perrors from pentapy import solver as psolver # type: ignore from pentapy import tools as ptools -# === Auxiliary functions === - - -def _get_num_workers(workers: int) -> int: - """ - Gets the number of available workers for the solver. +# === Types === + +SolverAliases = Literal[ + 1, + "1", + "PTRANS-I", + "ptrans-i", + 2, + "2", + "PTRANS-II", + "ptrans-ii", + 3, + "3", + "lapack", + 4, + "4", + "spsolve", + 5, + "5", + "spsolve_umf", + "umf", + "umf_pack", +] - Parameters - ---------- - workers : :class:`int` - Number of workers requested. - - Returns - ------- - workers : :class:`int` - Number of workers available. - - """ - - if workers < -1: - raise ValueError( - perrors.PentaPyErrorMessages.WRONG_WORKERS.format(workers=workers) - ) - - if workers == -1: - # NOTE: the following will be overwritten by the number of available threads - workers = 999_999_999_999_999_999_999_999_999 - - # the number of workers is limited between 1 and the number of available threads - # NOTE: the following does not count the number of total threads, but the number of - # threads available for the solver - proc = psutil.Process() - workers = min( - workers, - len(proc.cpu_affinity()), # type: ignore - ) - workers = max(workers, 1) - del proc - - return workers +# === Auxiliary functions === def _raise_ptrans_or_numpy_shape_mismatch_error( @@ -134,12 +118,12 @@ def _solve_with_numpy( return np.full(shape=rhs.shape, fill_value=np.nan) -def _solve_with_ptrans( +def _solve_with_ptrans( # pylint: disable=R1710 mat: np.ndarray, rhs: np.ndarray, is_flat: bool, index_row_wise: bool, - workers: int, + num_threads: Optional[int], solver_inter: pmodels.PentaSolverAliases, ) -> np.ndarray: # type: ignore """ @@ -150,10 +134,10 @@ def _solve_with_ptrans( # the matrix is checked and shifted if necessary ... if is_flat and index_row_wise: mat_flat = np.asarray(mat, dtype=np.double) - ptools._check_penta(mat_flat) + ptools._check_penta(mat_flat) # pylint: disable=W0212 elif is_flat: mat_flat = np.array(mat, dtype=np.double) # NOTE: this is a copy - ptools._check_penta(mat_flat) + ptools._check_penta(mat_flat) # pylint: disable=W0212 ptools.shift_banded(mat_flat, copy=False) else: mat_flat = ptools.create_banded(mat, col_wise=False, dtype=np.double) @@ -167,9 +151,6 @@ def _solve_with_ptrans( if mat_flat.shape[1] == 3: return _solve_with_numpy(mat_flat=mat_flat, rhs=rhs) - # now, the number of workers for multithreading has to be determined if necessary - workers = _get_num_workers(workers) - # if there is only a single right-hand side, it has to be reshaped to a 2D array # NOTE: this has to be reverted at the end single_rhs = rhs.ndim == 1 @@ -179,16 +160,16 @@ def _solve_with_ptrans( # the respective solver is chosen ... solver_func = ( - psolver.penta_solver1 + psolver.penta_solver1 # pylint: disable=I1101 if solver_inter == pmodels.PentaSolverAliases.PTRANS_I - else psolver.penta_solver2 + else psolver.penta_solver2 # pylint: disable=I1101 ) # ... and the solver is called sol, info = solver_func( np.ascontiguousarray(mat_flat), np.ascontiguousarray(rhs), - workers, + num_threads, ) # in case of success, the solution can be returned (reshaped if necessary) @@ -199,7 +180,7 @@ def _solve_with_ptrans( return sol # in case of a singular matrix, a warning will be issued and NaNs will be returned - elif info > pmodels.Infos.SUCCESS: + if info > pmodels.Infos.SUCCESS: warnings.warn( perrors.PentaPyErrorMessages.SINGULAR_MATRIX.format( solver_inter_name=pmodels.PentaSolverAliases.PTRANS_I.name, @@ -225,28 +206,8 @@ def solve( rhs: np.ndarray, is_flat: bool = False, index_row_wise: bool = True, - solver: Literal[ - 1, - "1", - "PTRANS-I", - "ptrans-i", - 2, - "2", - "PTRANS-II", - "ptrans-ii", - 3, - "3", - "lapack", - 4, - "4", - "spsolve", - 5, - "5", - "spsolve_umf", - "umf", - "umf_pack", - ] = 1, - workers: int = 1, + solver: SolverAliases = 1, + num_threads: Optional[int] = None, ) -> np.ndarray: """ Solver for a pentadiagonal system. @@ -296,11 +257,11 @@ def solve( * ``[5, "5", "spsolve_umf", "umf", "umf_pack"]`` : :func:`scipy.sparse.linalg.spsolve(..., use_umfpack=False)` Strings are not case-sensitive. - workers : :class:`int`, optional - Number of workers used in the PTRANS-I and PTRANS-II solvers for parallel + num_threads : :class:`int` or ``None``, optional + Number of num_threads used in the PTRANS-I and PTRANS-II solvers for parallel processing of multiple right-hand sides. Parallelisation overhead can be - significant for small systems. If set to ``-1``, the number of workers is - automatically determined. Default: ``1`` + significant for small systems. If set to a negative value or ``None``, the + number of threads is automatically determined. Default: ``None`` Returns ------- @@ -309,16 +270,16 @@ def solve( Raises ------ - ValueError - If the number of workers is incorrect. ValueError If there is a shape mismatch between the number of equations in the left-hand side matrix and the number of right-hand sides. - """ + """ # pylint: disable=C0301 # first, the solver is converted to the internal name to avoid confusion - solver_inter = pmodels._SOLVER_ALIAS_CONVERSIONS[str(solver).lower()] + solver_inter = pmodels._SOLVER_ALIAS_CONVERSIONS[ # pylint: disable=W0212 + str(solver).lower() + ] # Case 1: the pentapy solvers if solver_inter in { @@ -331,12 +292,12 @@ def solve( rhs=rhs, is_flat=is_flat, index_row_wise=index_row_wise, - workers=workers, + num_threads=num_threads, solver_inter=solver_inter, ) # Case 2: LAPACK's banded solver - elif solver_inter == pmodels.PentaSolverAliases.LAPACK: + if solver_inter == pmodels.PentaSolverAliases.LAPACK: try: from scipy.linalg import solve_banded except ImportError as imp_err: # pragma: no cover @@ -345,7 +306,7 @@ def solve( if is_flat and index_row_wise: mat_flat = np.array(mat) # NOTE: this is a copy - ptools._check_penta(mat_flat) + ptools._check_penta(mat_flat) # pylint: disable=W0212 ptools.shift_banded(mat_flat, col_to_row=False, copy=False) elif is_flat: mat_flat = np.asarray(mat) @@ -367,7 +328,7 @@ def solve( return np.full(shape=rhs.shape, fill_value=np.nan) # Case 3: SciPy's sparse solver with or without UMFPACK - elif solver_inter in { + if solver_inter in { pmodels.PentaSolverAliases.SUPER_LU, pmodels.PentaSolverAliases.UMFPACK, }: @@ -380,7 +341,7 @@ def solve( if is_flat and index_row_wise: mat_flat = np.array(mat) # NOTE: this is a copy - ptools._check_penta(mat_flat) + ptools._check_penta(mat_flat) # pylint: disable=W0212 ptools.shift_banded(mat_flat, col_to_row=False, copy=False) elif is_flat: mat_flat = np.asarray(mat) @@ -412,6 +373,6 @@ def solve( return sol - else: # pragma: no cover - msg = f"pentapy.solve: unknown solver ({solver})" - raise ValueError(msg) + # Case 4: unknown solver + msg = f"pentapy.solve: unknown solver ({solver})" # pragma: no cover + raise ValueError(msg) # pragma: no cover diff --git a/src/pentapy/errors.py b/src/pentapy/errors.py index 0974816..8479cd3 100644 --- a/src/pentapy/errors.py +++ b/src/pentapy/errors.py @@ -12,7 +12,6 @@ class PentaPyErrorMessages(str, Enum): """ Defines the possible error messages for the pentapy package, namely - - ``WRONG_WORKERS``: the number of workers is incorrect - ``SINGULAR_MATRIX``: the matrix is singular - ``SHAPE_MISMATCH``: the shape of the input arrays is incorrect - ``WRONG_SOLVER``: the solver alias is incorrect on C-level (internal error, @@ -21,9 +20,6 @@ class PentaPyErrorMessages(str, Enum): """ - WRONG_WORKERS = ( - "pentapy.solve: workers has to be -1 or greater, but got workers={workers}" - ) SINGULAR_MATRIX = ( "pentapy: {solver_inter_name} solver encountered singular matrix at " "row index {row_idx}. Returning NaNs." diff --git a/src/pentapy/solver.pxd b/src/pentapy/solver.pxd index 879dcf6..901f1a2 100644 --- a/src/pentapy/solver.pxd +++ b/src/pentapy/solver.pxd @@ -2,13 +2,13 @@ cdef double[::, ::1] c_penta_solver1( double[::, ::1] mat_flat, double[::, ::1] rhs, - int workers, + int num_threads, int* info, ) cdef double[::, ::1] c_penta_solver2( double[::, ::1] mat_flat, double[::, ::1] rhs, - int workers, + int num_threads, int* info, ) diff --git a/src/pentapy/solver.pyx b/src/pentapy/solver.pyx index 37328bc..b2fc737 100644 --- a/src/pentapy/solver.pyx +++ b/src/pentapy/solver.pyx @@ -11,11 +11,38 @@ implemented in Cython. import numpy as np cimport numpy as np - from cython cimport view + from cython.parallel import prange + from libc.stdint cimport int64_t +# NOTE: OPENMP is set during setup +if OPENMP: + cimport openmp + +# === Optional Setup of OpenMP === + + +def get_c_num_threads(num_threads): + # if the thread number was speficied explicitly, it needs to be sanitized + if num_threads is not None: + if num_threads >= 0: + return max(1, num_threads) + + elif OPENMP: # negative numbers result in maximum thread number with OPENMP + return openmp.omp_get_max_threads() + + # without OpenMP, the number of threads is set to 1 in the final return + + # if None, the maximum thread number is retrieved with OPENMP if available + # NOTE: OPENMP is set during setup + if OPENMP: + return openmp.omp_get_num_procs() + + # if no threads were set so far, the number of threads is set to 1 (serial mode) + return 1 + # === Constants === @@ -36,18 +63,19 @@ cdef enum Infos: def penta_solver1( double[::, ::1] mat_flat, double[::, ::1] rhs, - int workers, + num_threads=None, ): # NOTE: info is defined to be overwritten for possible future validations cdef int info + num_threads_c = get_c_num_threads(num_threads) return ( np.asarray( c_penta_solver1( mat_flat, rhs, - workers, + num_threads_c, &info, ) ), @@ -58,18 +86,19 @@ def penta_solver1( def penta_solver2( double[::, ::1] mat_flat, double[::, ::1] rhs, - int workers, + num_threads=None, ): # NOTE: info is defined to be overwritten for possible future validations cdef int info + num_threads_c = get_c_num_threads(num_threads) return ( np.asarray( c_penta_solver2( mat_flat, rhs, - workers, + num_threads_c, &info, ) ), @@ -82,7 +111,7 @@ def penta_solver2( cdef double[::, ::1] c_penta_solver1( double[::, ::1] mat_flat, double[::, ::1] rhs, - int workers, + int num_threads, int* info, ): """ @@ -121,13 +150,12 @@ cdef double[::, ::1] c_penta_solver1( return _c_interf_factorize_solve( mat_factorized, rhs, - workers, + num_threads, info, Solvers.PTRRANS_1, ) - cdef double[::, ::1] _c_interf_factorize( double[::, ::1] mat_flat, int* info, @@ -179,7 +207,7 @@ cdef double[::, ::1] _c_interf_factorize( cdef double[::, ::1] _c_interf_factorize_solve( double[::, ::1] mat_factorized, double[::, ::1] rhs, - int workers, + int num_threads, int* info, int solver, ): @@ -210,7 +238,7 @@ cdef double[::, ::1] _c_interf_factorize_solve( for iter_col in prange( rhs_n_cols, nogil=True, - num_threads=workers, + num_threads=num_threads, ): info[0] = _c_core_factorize_solve_algo_1( mat_n_cols, @@ -227,7 +255,7 @@ cdef double[::, ::1] _c_interf_factorize_solve( for iter_col in prange( rhs_n_cols, nogil=True, - num_threads=workers, + num_threads=num_threads, ): info[0] = _c_core_factorize_solve_algo_2( mat_n_cols, @@ -302,7 +330,6 @@ cdef int _c_core_factorize_algo_1( al_i_minus_1 = mat_flat[mat_row_base_idx_1] / mu_i be_i_minus_1 = mat_flat[0] / mu_i - mat_factorized[0] = 0.0 mat_factorized[1] = 0.0 mat_factorized[2] = mu_i @@ -473,7 +500,7 @@ cdef int _c_core_factorize_solve_algo_1( cdef double[::, ::1] c_penta_solver2( double[::, ::1] mat_flat, double[::, ::1] rhs, - int workers, + int num_threads, int* info, ): """ @@ -512,7 +539,7 @@ cdef double[::, ::1] c_penta_solver2( return _c_interf_factorize_solve( mat_factorized, rhs, - workers, + num_threads, info, Solvers.PTRRANS_2, ) diff --git a/tests/templates.py b/tests/templates.py index 6d195a1..554b44f 100644 --- a/tests/templates.py +++ b/tests/templates.py @@ -40,7 +40,7 @@ "solver_alias": SOLVER_ALIASES_PTRANS_I + SOLVER_ALIASES_PTRANS_II, "induce_error": [False, True], "from_order": ["C", "F"], - "workers": [1], + "num_threads": [1], } # === Auxiliary functions === @@ -121,7 +121,7 @@ def pentapy_solvers_extended_template( ], induce_error: bool, from_order: Literal["C", "F"], - workers: int, + num_threads: int, ) -> None: """ Tests the pentadiagonal solvers when starting from different input layouts, number @@ -178,7 +178,7 @@ def pentapy_solvers_extended_template( mat=mat, rhs=rhs, solver=solver_alias, # type: ignore - workers=workers, + num_threads=num_threads, **kwargs, ) @@ -194,7 +194,7 @@ def pentapy_solvers_extended_template( mat=mat, rhs=rhs, solver=solver_alias, # type: ignore - workers=workers, + num_threads=num_threads, **kwargs, ) assert sol.shape == result_shape @@ -227,7 +227,7 @@ def pentapy_solvers_shape_mismatch_template( "pTrAnS-Ii", ], from_order: Literal["C", "F"], - workers: int, + num_threads: int, ) -> None: """ Tests the pentadiagonal solvers when the shape of the right-hand side is incorrect, @@ -264,7 +264,7 @@ def pentapy_solvers_shape_mismatch_template( mat=mat, rhs=rhs, solver=solver_alias, # type: ignore - workers=workers, + num_threads=num_threads, **kwargs, ) diff --git a/tests/test_solvers_internal_parallel.py b/tests/test_solvers_internal_parallel.py index 3775ffa..b6a3b4e 100644 --- a/tests/test_solvers_internal_parallel.py +++ b/tests/test_solvers_internal_parallel.py @@ -9,7 +9,7 @@ # === Imports === from copy import deepcopy -from typing import Literal, Optional, Type +from typing import Literal, Optional import pytest import templates @@ -20,7 +20,7 @@ # based on either Algorithm PTRANS-I or PTRANS-II in parallel mode param_dict = deepcopy(templates.PARAM_DICT) param_dict["from_order"] = ["C"] -param_dict["workers"] = [-1] +param_dict["num_threads"] = [-1] # --- Extended solve test --- @@ -41,7 +41,7 @@ def test_pentapy_solvers_parallel( ], induce_error: bool, from_order: Literal["C", "F"], - workers: int, + num_threads: int, ) -> None: templates.pentapy_solvers_extended_template( @@ -51,7 +51,7 @@ def test_pentapy_solvers_parallel( solver_alias=solver_alias, induce_error=induce_error, from_order=from_order, - workers=workers, + num_threads=num_threads, ) @@ -61,17 +61,15 @@ def test_pentapy_solvers_parallel( ) -# --- Different workers test --- +# --- Different number of threads test --- -@pytest.mark.parametrize( - "workers, expected", [(0, None), (1, None), (-1, None), (-2, ValueError)] -) -def test_pentapy_solvers_parallel_different_workers( - workers: int, expected: Optional[Type[Exception]] +@pytest.mark.parametrize("num_threads", [0, 1, -1, -2, None]) +def test_pentapy_solvers_parallel_different_num_threads( + num_threads: Optional[int], ) -> None: """ - Tests the parallel solver with different number of workers, which might be wrong. + Tests that the parallel solvers run properly with different numbers of threads. """ @@ -82,17 +80,10 @@ def test_pentapy_solvers_parallel_different_workers( solver_alias=1, induce_error=False, from_order="C", - workers=workers, + num_threads=num_threads, ) - # Case 1: the test should fail - if expected is not None: - with pytest.raises(expected): - templates.pentapy_solvers_extended_template(**kwargs) # type: ignore - - return - - # Case 2: the test should pass + # NOTE: if there is no crash, the test is successful templates.pentapy_solvers_extended_template(**kwargs) # type: ignore @@ -114,7 +105,7 @@ def test_pentapy_solvers_shape_mismatch_parallel( "pTrAnS-Ii", ], from_order: Literal["C", "F"], - workers: int, + num_threads: int, ) -> None: templates.pentapy_solvers_shape_mismatch_template( @@ -123,12 +114,12 @@ def test_pentapy_solvers_shape_mismatch_parallel( input_layout=input_layout, solver_alias=solver_alias, from_order=from_order, - workers=workers, + num_threads=num_threads, ) params_dict_without_induce_error = deepcopy(templates.PARAM_DICT) -params_dict_without_induce_error["workers"] = [-1] +params_dict_without_induce_error["num_threads"] = [-1] params_dict_without_induce_error.pop("induce_error") for key, value in params_dict_without_induce_error.items(): diff --git a/tests/test_solvers_internal_serial.py b/tests/test_solvers_internal_serial.py index f1248f8..0957a65 100644 --- a/tests/test_solvers_internal_serial.py +++ b/tests/test_solvers_internal_serial.py @@ -39,7 +39,7 @@ def test_pentapy_solvers_extended_serial( ], induce_error: bool, from_order: Literal["C", "F"], - workers: int, + num_threads: int, ) -> None: templates.pentapy_solvers_extended_template( @@ -49,7 +49,7 @@ def test_pentapy_solvers_extended_serial( solver_alias=solver_alias, induce_error=induce_error, from_order=from_order, - workers=workers, + num_threads=num_threads, ) @@ -77,7 +77,7 @@ def test_pentapy_solvers_shape_mismatch_serial( "pTrAnS-Ii", ], from_order: Literal["C", "F"], - workers: int, + num_threads: int, ) -> None: templates.pentapy_solvers_shape_mismatch_template( @@ -86,7 +86,7 @@ def test_pentapy_solvers_shape_mismatch_serial( input_layout=input_layout, solver_alias=solver_alias, from_order=from_order, - workers=workers, + num_threads=num_threads, ) From 5b424281e68a2f287f04b64fde48cc50728c53a4 Mon Sep 17 00:00:00 2001 From: MothNik Date: Tue, 23 Jul 2024 21:32:36 +0200 Subject: [PATCH 61/62] doc: - fixed typo in docstring of `solve` for the number of threads --- src/pentapy/core.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/pentapy/core.py b/src/pentapy/core.py index 4126a9f..fabb1cc 100644 --- a/src/pentapy/core.py +++ b/src/pentapy/core.py @@ -258,7 +258,7 @@ def solve( Strings are not case-sensitive. num_threads : :class:`int` or ``None``, optional - Number of num_threads used in the PTRANS-I and PTRANS-II solvers for parallel + Number of threads used in the PTRANS-I and PTRANS-II solvers for parallel processing of multiple right-hand sides. Parallelisation overhead can be significant for small systems. If set to a negative value or ``None``, the number of threads is automatically determined. Default: ``None`` From 743549bbaaf88d1f36c620e784b671124102efe0 Mon Sep 17 00:00:00 2001 From: MothNik Date: Tue, 23 Jul 2024 21:44:44 +0200 Subject: [PATCH 62/62] refactor: - made input matrix and right-hand side read-only on Cython-level --- src/pentapy/solver.pxd | 8 ++++---- src/pentapy/solver.pyx | 20 ++++++++++---------- 2 files changed, 14 insertions(+), 14 deletions(-) diff --git a/src/pentapy/solver.pxd b/src/pentapy/solver.pxd index 901f1a2..ec7adc3 100644 --- a/src/pentapy/solver.pxd +++ b/src/pentapy/solver.pxd @@ -1,14 +1,14 @@ # cython: language_level=3 cdef double[::, ::1] c_penta_solver1( - double[::, ::1] mat_flat, - double[::, ::1] rhs, + const double[::, ::1] mat_flat, + const double[::, ::1] rhs, int num_threads, int* info, ) cdef double[::, ::1] c_penta_solver2( - double[::, ::1] mat_flat, - double[::, ::1] rhs, + const double[::, ::1] mat_flat, + const double[::, ::1] rhs, int num_threads, int* info, ) diff --git a/src/pentapy/solver.pyx b/src/pentapy/solver.pyx index b2fc737..11db625 100644 --- a/src/pentapy/solver.pyx +++ b/src/pentapy/solver.pyx @@ -61,8 +61,8 @@ cdef enum Infos: def penta_solver1( - double[::, ::1] mat_flat, - double[::, ::1] rhs, + const double[::, ::1] mat_flat, + const double[::, ::1] rhs, num_threads=None, ): @@ -84,8 +84,8 @@ def penta_solver1( def penta_solver2( - double[::, ::1] mat_flat, - double[::, ::1] rhs, + const double[::, ::1] mat_flat, + const double[::, ::1] rhs, num_threads=None, ): @@ -109,8 +109,8 @@ def penta_solver2( # === Solver Algorithm 1 === cdef double[::, ::1] c_penta_solver1( - double[::, ::1] mat_flat, - double[::, ::1] rhs, + const double[::, ::1] mat_flat, + const double[::, ::1] rhs, int num_threads, int* info, ): @@ -157,7 +157,7 @@ cdef double[::, ::1] c_penta_solver1( cdef double[::, ::1] _c_interf_factorize( - double[::, ::1] mat_flat, + const double[::, ::1] mat_flat, int* info, int solver, ): @@ -206,7 +206,7 @@ cdef double[::, ::1] _c_interf_factorize( cdef double[::, ::1] _c_interf_factorize_solve( double[::, ::1] mat_factorized, - double[::, ::1] rhs, + const double[::, ::1] rhs, int num_threads, int* info, int solver, @@ -498,8 +498,8 @@ cdef int _c_core_factorize_solve_algo_1( cdef double[::, ::1] c_penta_solver2( - double[::, ::1] mat_flat, - double[::, ::1] rhs, + const double[::, ::1] mat_flat, + const double[::, ::1] rhs, int num_threads, int* info, ):