diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 3df093d1..13f219a8 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -66,7 +66,7 @@ jobs: python-version: '3.x' - name: "Main Script" run: | - EXTRA_INSTALL="numpy" + EXTRA_INSTALL="numpy siphash24" curl -L -O https://gitlab.tiker.net/inducer/ci-support/raw/main/prepare-and-run-mypy.sh . ./prepare-and-run-mypy.sh python3 mypy @@ -89,7 +89,7 @@ jobs: # AK, 2020-12-13 rm pytools/mpiwrap.py - EXTRA_INSTALL="numpy frozendict immutabledict orderedsets constantdict immutables pyrsistent attrs" + EXTRA_INSTALL="numpy frozendict immutabledict orderedsets constantdict immutables pyrsistent attrs siphash24" curl -L -O https://gitlab.tiker.net/inducer/ci-support/raw/main/build-and-test-py-project.sh . ./build-and-test-py-project.sh @@ -104,6 +104,7 @@ jobs: python-version: '3.x' - name: "Main Script" run: | + EXTRA_INSTALL="siphash24" rm pytools/{convergence,spatial_btree,obj_array,mpiwrap}.py curl -L -O https://gitlab.tiker.net/inducer/ci-support/raw/main/build-and-test-py-project.sh . ./build-and-test-py-project.sh diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml index 7f23c645..568089af 100644 --- a/.gitlab-ci.yml +++ b/.gitlab-ci.yml @@ -4,7 +4,7 @@ Pytest: # AK, 2020-12-13 rm pytools/mpiwrap.py - export EXTRA_INSTALL="numpy" + export EXTRA_INSTALL="numpy siphash24" curl -L -O https://gitlab.tiker.net/inducer/ci-support/raw/main/build-and-test-py-project.sh . ./build-and-test-py-project.sh tags: @@ -17,6 +17,7 @@ Pytest: Pytest without Numpy: script: | + EXTRA_INSTALL="siphash24" rm pytools/{convergence,spatial_btree,obj_array,mpiwrap}.py curl -L -O https://gitlab.tiker.net/inducer/ci-support/raw/main/build-and-test-py-project.sh . ./build-and-test-py-project.sh @@ -40,7 +41,7 @@ Pytest without Numpy: # except: # - tags -Flake8: +Ruff: script: - pipx install ruff - ruff check @@ -60,7 +61,7 @@ Mypy: Pylint: script: - - EXTRA_INSTALL="numpy pymbolic orderedsets" + - EXTRA_INSTALL="numpy pymbolic orderedsets siphash24" - py_version=3 - curl -L -O https://gitlab.tiker.net/inducer/ci-support/raw/main/prepare-and-run-pylint.sh - . ./prepare-and-run-pylint.sh "$CI_PROJECT_NAME" @@ -71,7 +72,7 @@ Pylint: Documentation: script: - - EXTRA_INSTALL="numpy" + - EXTRA_INSTALL="numpy siphash24" - curl -L -O https://gitlab.tiker.net/inducer/ci-support/raw/main/build-docs.sh - ". ./build-docs.sh" tags: diff --git a/README.rst b/README.rst index 1cbdccf0..7f9268d3 100644 --- a/README.rst +++ b/README.rst @@ -4,9 +4,9 @@ Pytools: Lots of Little Utilities .. image:: https://gitlab.tiker.net/inducer/pytools/badges/main/pipeline.svg :alt: Gitlab Build Status :target: https://gitlab.tiker.net/inducer/pytools/commits/main -.. image:: https://github.com/inducer/pytools/workflows/CI/badge.svg?branch=main&event=push +.. image:: https://github.com/inducer/pytools/workflows/CI/badge.svg?branch=main :alt: Github Build Status - :target: https://github.com/inducer/pytools/actions?query=branch%3Amain+workflow%3ACI+event%3Apush + :target: https://github.com/inducer/pytools/actions?query=branch%3Amain+workflow%3ACI .. image:: https://badge.fury.io/py/pytools.png :alt: Python Package Index Release Page :target: https://pypi.org/project/pytools/ diff --git a/doc/conf.py b/doc/conf.py index 13dcc2c8..62eceea3 100644 --- a/doc/conf.py +++ b/doc/conf.py @@ -26,13 +26,13 @@ exclude_patterns = ["_build", "Thumbs.db", ".DS_Store"] intersphinx_mapping = { - "loopy": ("https://documen.tician.de/loopy/", None), + "loopy": ("https://documen.tician.de/loopy", None), "numpy": ("https://numpy.org/doc/stable", None), - "pymbolic": ("https://documen.tician.de/pymbolic/", None), - "pytest": ("https://docs.pytest.org/en/stable/", None), - "setuptools": ("https://setuptools.pypa.io/en/latest/", None), + "pymbolic": ("https://documen.tician.de/pymbolic", None), + "pytest": ("https://docs.pytest.org/en/stable", None), + "setuptools": ("https://setuptools.pypa.io/en/latest", None), "python": ("https://docs.python.org/3", None), - "platformdirs": ("https://platformdirs.readthedocs.io/en/latest/", None), + "platformdirs": ("https://platformdirs.readthedocs.io/en/latest", None), } nitpicky = True diff --git a/doc/upload-docs.sh b/doc/upload-docs.sh index ef523e61..90139e77 100755 --- a/doc/upload-docs.sh +++ b/doc/upload-docs.sh @@ -1,3 +1,3 @@ #! /bin/sh -rsync --verbose --archive --delete _build/html/* doc-upload:doc/pytools +rsync --verbose --archive --delete _build/html/ doc-upload:doc/pytools diff --git a/pyproject.toml b/pyproject.toml index 2fce1070..fde22124 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -6,7 +6,7 @@ requires = [ [project] name = "pytools" -version = "2024.1.10" +version = "2024.1.13" description = "A collection of tools for Python" readme = "README.rst" license = { text = "MIT" } @@ -34,7 +34,6 @@ dependencies = [ "platformdirs>=2.2", # for dataclass_transform with frozen_default "typing-extensions>=4; python_version<'3.13'", - "siphash24>=1.6", ] [project.optional-dependencies] @@ -46,6 +45,9 @@ test = [ "pytest", "ruff", ] +siphash = [ + "siphash24>=1.6", +] [project.urls] Documentation = "https://documen.tician.de/pytools/" @@ -108,5 +110,13 @@ known-local-folder = [ lines-after-imports = 2 [tool.mypy] +python_version = "3.8" ignore_missing_imports = true warn_unused_ignores = true +# TODO: enable this at some point +# check_untyped_defs = true + +[tool.typos.default] +extend-ignore-re = [ + "(?Rm)^.*(#|//)\\s*spellchecker:\\s*disable-line$" +] diff --git a/pytools/__init__.py b/pytools/__init__.py index 08c3ed1a..ff45b90e 100644 --- a/pytools/__init__.py +++ b/pytools/__init__.py @@ -32,14 +32,15 @@ from functools import reduce, wraps from sys import intern from typing import ( + TYPE_CHECKING, Any, Callable, ClassVar, + Collection, Dict, Generic, Hashable, Iterable, - Iterator, List, Mapping, Optional, @@ -52,15 +53,21 @@ ) -try: - from typing import Concatenate, SupportsIndex -except ImportError: - from typing_extensions import Concatenate, SupportsIndex +if TYPE_CHECKING: + # NOTE: mypy seems to be confused by the `try.. except` below when called with + # python -m mypy --python-version 3.8 ... + # see https://github.com/python/mypy/issues/14220 + from typing_extensions import Concatenate, ParamSpec, SupportsIndex +else: + try: + from typing import Concatenate, SupportsIndex + except ImportError: + from typing_extensions import Concatenate, SupportsIndex -try: - from typing import ParamSpec -except ImportError: - from typing_extensions import ParamSpec # type: ignore[assignment] + try: + from typing import ParamSpec + except ImportError: + from typing_extensions import ParamSpec # type: ignore[assignment] # These are deprecated and will go away in 2022. @@ -203,10 +210,16 @@ .. autofunction:: strtobool .. autofunction:: to_identifier -Sequence utilities ------------------- +Set-like functions for iterables +-------------------------------- + +These functions provide set-like operations on iterables. In contrast to +Python's built-in set type, they maintain the internal order of elements. .. autofunction:: unique +.. autofunction:: unique_difference +.. autofunction:: unique_intersection +.. autofunction:: unique_union Type Variables Used ------------------- @@ -1046,11 +1059,11 @@ def monkeypatch_class(_name, bases, namespace): # {{{ generic utilities def add_tuples(t1, t2): - return tuple([t1v + t2v for t1v, t2v in zip(t1, t2)]) + return tuple(t1v + t2v for t1v, t2v in zip(t1, t2)) def negate_tuple(t1): - return tuple([-t1v for t1v in t1]) + return tuple(-t1v for t1v in t1) def shift(vec, dist): @@ -1601,7 +1614,7 @@ def add_row(self, row: Tuple[Any, ...]) -> None: f"tried to add a row with {len(row)} columns to " f"a table with {self.ncolumns} columns") - self.rows.append(tuple([str(i) for i in row])) + self.rows.append(tuple(str(i) for i in row)) def _get_alignments(self) -> Tuple[str, ...]: # NOTE: If not all alignments were specified, extend alignments with the @@ -1611,9 +1624,9 @@ def _get_alignments(self) -> Tuple[str, ...]: ) def _get_column_widths(self, rows) -> Tuple[int, ...]: - return tuple([ + return tuple( max(len(row[i]) for row in rows) for i in range(self.ncolumns) - ]) + ) def __str__(self) -> str: """ @@ -1670,7 +1683,7 @@ def escape(cell: str) -> str: # Pipe symbols ('|') must be replaced return cell.replace("|", "\\|") - rows = [tuple([escape(cell) for cell in row]) for row in self.rows] + rows = [tuple(escape(cell) for cell in row) for row in self.rows] alignments = self._get_alignments() col_widths = self._get_column_widths(rows) @@ -1780,9 +1793,9 @@ def remove_columns(i, row): if i == 0 or skip_columns is None: return row else: - return tuple([ + return tuple( entry for i, entry in enumerate(row) if i not in skip_columns - ]) + ) alignments = sum(( remove_columns(i, tbl._get_alignments()) @@ -2252,7 +2265,7 @@ class UniqueNameGenerator: .. automethod:: __call__ """ def __init__(self, - existing_names: Optional[Set[str]] = None, + existing_names: Optional[Collection[str]] = None, forced_prefix: str = ""): """ Create a new :class:`UniqueNameGenerator`. @@ -2264,7 +2277,7 @@ def __init__(self, if existing_names is None: existing_names = set() - self.existing_names = existing_names.copy() + self.existing_names = set(existing_names) self.forced_prefix = forced_prefix self.prefix_to_counter: Dict[str, int] = {} @@ -2983,11 +2996,55 @@ def to_identifier(s: str) -> str: # {{{ unique -def unique(seq: Iterable[T]) -> Iterator[T]: - """Yield unique elements in *seq*, removing all duplicates. The internal +def unique(seq: Iterable[T]) -> Collection[T]: + """Return unique elements in *seq*, removing all duplicates. The internal order of the elements is preserved. See also :func:`itertools.groupby` (which removes consecutive duplicates).""" - return iter(dict.fromkeys(seq)) + return dict.fromkeys(seq) + + +def unique_difference(*args: Iterable[T]) -> Collection[T]: + r"""Return unique elements that are in the first iterable in *\*args* but not + in any of the others. The internal order of the elements is preserved.""" + if not args: + return [] + + res = dict.fromkeys(args[0]) + for seq in args[1:]: + for item in seq: + if item in res: + del res[item] + + return res + + +def unique_intersection(*args: Iterable[T]) -> Collection[T]: + r"""Return unique elements that are common to all iterables in *\*args*. + The internal order of the elements is preserved.""" + if not args: + return [] + + res = dict.fromkeys(args[0]) + for seq in args[1:]: + seq = set(seq) + res = {item: None for item in res if item in seq} + + return res + + +def unique_union(*args: Iterable[T]) -> Collection[T]: + r"""Return unique elements that are in any iterable in *\*args*. + The internal order of the elements is preserved.""" + if not args: + return [] + + res: Dict[T, None] = {} + for seq in args: + for item in seq: + if item not in res: + res[item] = None + + return res # }}} diff --git a/pytools/convergence.py b/pytools/convergence.py index aa85c201..7cf96925 100644 --- a/pytools/convergence.py +++ b/pytools/convergence.py @@ -67,7 +67,7 @@ def estimate_order_of_convergence(self, # NOTE: in case any of the errors are exactly 0.0, which # can give NaNs in `estimate_order_of_convergence` - emax = np.amax(errors) + emax: float = np.amax(errors) errors += (1 if emax == 0 else emax) * np.finfo(errors.dtype).eps size = len(abscissae) @@ -75,14 +75,15 @@ def estimate_order_of_convergence(self, gliding_mean = size data_points = size - gliding_mean + 1 - result = np.zeros((data_points, 2), float) + result: np.ndarray = np.zeros((data_points, 2), float) for i in range(data_points): result[i, 0], result[i, 1] = estimate_order_of_convergence( abscissae[i:i+gliding_mean], errors[i:i+gliding_mean]) return result def order_estimate(self) -> float: - return self.estimate_order_of_convergence()[0, 1] + from typing import cast + return cast(float, self.estimate_order_of_convergence()[0, 1]) def max_error(self) -> float: return max(err for absc, err in self.history) @@ -176,7 +177,7 @@ def stringify_eocs(*eocs: EOCRecorder, f"{len(eocs)} EOCRecorder instances") if names is None: - names = tuple([f"{error_label} {i}" for i in range(len(eocs))]) + names = tuple(f"{error_label} {i}" for i in range(len(eocs))) from pytools import merge_tables tbl = merge_tables(*[eoc._to_table( diff --git a/pytools/graph.py b/pytools/graph.py index 73c3ff31..84a566c3 100644 --- a/pytools/graph.py +++ b/pytools/graph.py @@ -69,6 +69,7 @@ from dataclasses import dataclass from typing import ( + TYPE_CHECKING, Any, Callable, Collection, @@ -88,10 +89,16 @@ ) -try: - from typing import TypeAlias -except ImportError: +if TYPE_CHECKING: + # NOTE: mypy seems to be confused by the `try.. except` below when called with + # python -m mypy --python-version 3.8 ... + # see https://github.com/python/mypy/issues/14220 from typing_extensions import TypeAlias +else: + try: + from typing import TypeAlias + except ImportError: + from typing_extensions import TypeAlias NodeT = TypeVar("NodeT", bound=Hashable) diff --git a/pytools/persistent_dict.py b/pytools/persistent_dict.py index 50e35072..146d2554 100644 --- a/pytools/persistent_dict.py +++ b/pytools/persistent_dict.py @@ -50,10 +50,23 @@ TypeVar, cast, ) +from warnings import warn -from siphash24 import siphash13 + +class RecommendedHashNotFoundWarning(UserWarning): + pass +try: + from siphash24 import siphash13 as _default_hash +except ImportError: + warn("Unable to import recommended hash 'siphash24.siphash13', " + "falling back to 'hashlib.sha256'. " + "Run 'python3 -m pip install siphash24' to install " + "the recommended hash.", + RecommendedHashNotFoundWarning, stacklevel=1) + from hashlib import sha256 as _default_hash + if TYPE_CHECKING: from _typeshed import ReadableBuffer from typing_extensions import Self @@ -68,6 +81,10 @@ logger = logging.getLogger(__name__) +# NOTE: not always available so they get hardcoded here +SQLITE_BUSY = getattr(sqlite3, "SQLITE_BUSY", 5) +SQLITE_CONSTRAINT_PRIMARYKEY = getattr(sqlite3, "SQLITE_CONSTRAINT_PRIMARYKEY", 1555) + __doc__ = """ Persistent Hashing and Persistent Dictionaries ============================================== @@ -161,7 +178,7 @@ class KeyBuilder: # this exists so that we can (conceivably) switch algorithms at some point # down the road - new_hash: Callable[..., Hash] = siphash13 + new_hash: Callable[..., Hash] = _default_hash def rec(self, key_hash: Hash, key: Any) -> Hash: """ @@ -429,7 +446,6 @@ class CollisionWarning(UserWarning): def __getattr__(name: str) -> Any: if name in ("NoSuchEntryInvalidKeyError", "NoSuchEntryInvalidContentsError"): - from warnings import warn warn(f"pytools.persistent_dict.{name} has been removed.", stacklevel=2) return NoSuchEntryError @@ -506,7 +522,6 @@ def __init__(self, # https://www.sqlite.org/pragma.html#pragma_synchronous if safe_sync is None or safe_sync: if safe_sync is None: - from warnings import warn warn(f"pytools.persistent_dict '{identifier}': " "enabling safe_sync as default. " "This provides strong protection against data loss, " @@ -531,7 +546,6 @@ def __del__(self) -> None: def _collision_check(self, key: K, stored_key: K) -> None: if stored_key != key: # Key collision, oh well. - from warnings import warn warn(f"{self.identifier}: key collision in cache at " f"'{self.container_dir}' -- these are sufficiently unlikely " "that they're often indicative of a broken hash key " @@ -568,10 +582,9 @@ def _exec_sql_fn(self, fn: Callable[[], T]) -> Optional[T]: except sqlite3.OperationalError as e: # If the database is busy, retry if (hasattr(e, "sqlite_errorcode") - and not e.sqlite_errorcode == sqlite3.SQLITE_BUSY): + and e.sqlite_errorcode != SQLITE_BUSY): raise if n % 20 == 0: - from warnings import warn warn(f"PersistentDict: database '{self.filename}' busy, {n} " "retries", stacklevel=3) else: @@ -716,7 +729,7 @@ def store(self, key: K, value: V, _skip_if_present: bool = False) -> None: self._exec_sql("INSERT INTO dict VALUES (?, ?)", (keyhash, v)) except sqlite3.IntegrityError as e: if hasattr(e, "sqlite_errorcode"): - if e.sqlite_errorcode == sqlite3.SQLITE_CONSTRAINT_PRIMARYKEY: + if e.sqlite_errorcode == SQLITE_CONSTRAINT_PRIMARYKEY: raise ReadOnlyEntryError("WriteOncePersistentDict, " "tried overwriting key") from e else: diff --git a/pytools/tag.py b/pytools/tag.py index 336df880..9055d8d4 100644 --- a/pytools/tag.py +++ b/pytools/tag.py @@ -39,7 +39,17 @@ ) from warnings import warn -from typing_extensions import Self, dataclass_transform + +if TYPE_CHECKING: + # NOTE: mypy seems to be confused by the `try.. except` below when called with + # python -m mypy --python-version 3.8 ... + # see https://github.com/python/mypy/issues/14220 + from typing_extensions import Self, dataclass_transform +else: + try: + from typing import Self, dataclass_transform + except ImportError: + from typing_extensions import Self, dataclass_transform from pytools import memoize, memoize_method @@ -90,7 +100,7 @@ class DottedName: .. automethod:: from_class """ - def __init__(self, name_parts: Tuple[str, ...]): + def __init__(self, name_parts: Tuple[str, ...]) -> None: if len(name_parts) == 0: raise ValueError("empty name parts") diff --git a/pytools/test/test_pytools.py b/pytools/test/test_pytools.py index 07b59a9c..c262fc91 100644 --- a/pytools/test/test_pytools.py +++ b/pytools/test/test_pytools.py @@ -497,7 +497,7 @@ def vectorized_add(self, ary): # }}} -def test_tag(): +def test_tag() -> None: from pytools.tag import ( NonUniqueTagError, Tag, @@ -552,7 +552,7 @@ class BestInClassRibbon(FairRibbon, UniqueTag): # a subclass of Tag with pytest.raises(TypeError): check_tag_uniqueness(frozenset(( - "I am not a tag", best_in_show_ribbon, + "I am not a tag", best_in_show_ribbon, # type: ignore[arg-type] blue_ribbon, red_ribbon))) # Test that instantiation succeeds if there are multiple instances @@ -583,7 +583,7 @@ class BestInClassRibbon(FairRibbon, UniqueTag): # Test that tagged() fails if tags are not a FrozenSet of Tags with pytest.raises(TypeError): - t1.tagged(tags=frozenset((1,))) + t1.tagged(tags=frozenset((1,))) # type: ignore[arg-type] # Test without_tags() function t4 = t2.without_tags(red_ribbon) @@ -766,7 +766,7 @@ class D: def test_unique(): - from pytools import unique + from pytools import unique, unique_difference, unique_intersection, unique_union assert list(unique([1, 2, 1])) == [1, 2] assert tuple(unique((1, 2, 1))) == (1, 2) @@ -774,14 +774,27 @@ def test_unique(): assert list(range(1000)) == list(unique(range(1000))) assert list(unique(list(range(1000)) + list(range(1000)))) == list(range(1000)) - assert next(unique([1, 2, 1, 3])) == 1 - assert next(unique([]), None) is None - # Also test strings since their ordering would be thrown off by # set-based 'unique' implementations. - assert list(unique(["A", "B", "A"])) == ["A", "B"] - assert tuple(unique(("A", "B", "A"))) == ("A", "B") - assert next(unique(["A", "B", "A", "C"])) == "A" + assert list(unique(["a", "b", "a"])) == ["a", "b"] + assert tuple(unique(("a", "b", "a"))) == ("a", "b") + + assert list(unique_difference(["a", "b", "c"], ["b", "c", "d"])) == ["a"] + assert list(unique_difference(["a", "b", "c"], ["a", "b", "c", "d"])) == [] + assert list(unique_difference(["a", "b", "c"], ["a"], ["b"], ["c"])) == [] + + assert list(unique_intersection(["a", "b", "a"], ["b", "c", "a"])) == ["a", "b"] + assert list(unique_intersection(["a", "b", "a"], ["d", "c", "e"])) == [] + + assert list(unique_union(["a", "b", "a"], ["b", "c", "b"])) == ["a", "b", "c"] + assert list(unique_union( + ["a", "b", "a"], ["b", "c", "b"], ["c", "d", "c"])) == ["a", "b", "c", "d"] + assert list(unique(["a", "b", "a"])) == \ + list(unique_union(["a", "b", "a"])) == ["a", "b"] + + assert list(unique_intersection()) == [] + assert list(unique_difference()) == [] + assert list(unique_union()) == [] # This class must be defined globally to be picklable diff --git a/typos.toml b/typos.toml deleted file mode 100644 index 9d260f95..00000000 --- a/typos.toml +++ /dev/null @@ -1,4 +0,0 @@ -[default] -extend-ignore-re = [ - "(?Rm)^.*(#|//)\\s*spellchecker:\\s*disable-line$" -]