From 80c3bc1c941669a45152d3a8f96dbfb60c9fe520 Mon Sep 17 00:00:00 2001 From: Matthew Wardrop Date: Tue, 7 Jan 2025 10:20:52 -0800 Subject: [PATCH] Update typing annotations for Python 3.9+ --- benchmarks/plot.py | 2 +- docsite/docs/guides/integration.ipynb | 5 +- formulaic/formula.py | 44 ++++------ formulaic/materializers/arrow.py | 6 +- formulaic/materializers/base.py | 85 +++++++++---------- formulaic/materializers/pandas.py | 18 ++-- .../materializers/types/evaluated_factor.py | 4 +- .../materializers/types/factor_values.py | 17 ++-- formulaic/materializers/types/scoped_term.py | 5 +- formulaic/model_matrix.py | 5 +- formulaic/model_spec.py | 83 +++++++++--------- formulaic/parser/algos/sanitize_tokens.py | 4 +- formulaic/parser/algos/tokenize.py | 5 +- formulaic/parser/algos/tokens_to_ast.py | 13 +-- formulaic/parser/parser.py | 26 +++--- formulaic/parser/types/ast_node.py | 14 ++- formulaic/parser/types/factor.py | 5 +- formulaic/parser/types/formula_parser.py | 7 +- formulaic/parser/types/operator.py | 7 +- formulaic/parser/types/operator_resolver.py | 12 +-- formulaic/parser/types/ordered_set.py | 4 +- formulaic/parser/types/term.py | 5 +- formulaic/parser/types/token.py | 11 +-- formulaic/parser/utils.py | 13 +-- formulaic/sugar.py | 5 +- formulaic/transforms/basis_spline.py | 7 +- formulaic/transforms/contrasts.py | 23 ++--- formulaic/transforms/cubic_spline.py | 3 +- formulaic/transforms/hashed.py | 15 ++-- formulaic/transforms/patsy_compat.py | 5 +- formulaic/utils/calculus.py | 9 +- formulaic/utils/cast.py | 9 +- formulaic/utils/code.py | 3 +- formulaic/utils/constraints.py | 52 +++++------- formulaic/utils/context.py | 3 +- formulaic/utils/deprecations.py | 6 +- formulaic/utils/iterators.py | 5 +- formulaic/utils/layered_mapping.py | 16 ++-- formulaic/utils/null_handling.py | 25 +++--- formulaic/utils/sparse.py | 5 +- formulaic/utils/stateful_transforms.py | 11 +-- formulaic/utils/structured.py | 33 +++---- formulaic/utils/variables.py | 15 ++-- pyproject.toml | 2 + tests/parser/test_parser.py | 3 +- tests/parser/types/test_ordered_set.py | 2 +- 46 files changed, 309 insertions(+), 348 deletions(-) diff --git a/benchmarks/plot.py b/benchmarks/plot.py index 12a6a110..dd7c7bc5 100644 --- a/benchmarks/plot.py +++ b/benchmarks/plot.py @@ -24,7 +24,7 @@ def grouped_barplot(df, cat, subcat, val, err, subcats=None, **kwargs): x + offsets[i], dfg[val].values, width=width, - label="{}".format(gr), + label=f"{gr}", yerr=dfg[err].values, capsize=6, **kwargs, diff --git a/docsite/docs/guides/integration.ipynb b/docsite/docs/guides/integration.ipynb index 525a7ed9..c9bce27d 100644 --- a/docsite/docs/guides/integration.ipynb +++ b/docsite/docs/guides/integration.ipynb @@ -141,7 +141,8 @@ } ], "source": [ - "from typing import Iterable, List, Optional\n", + "from collections.abc import Iterable\n", + "from typing import Optional\n", "\n", "from sklearn.base import BaseEstimator, TransformerMixin\n", "from sklearn.linear_model import LinearRegression\n", @@ -181,7 +182,7 @@ "\n", " def get_feature_names_out(\n", " self, input_features: Optional[Iterable[str]] = None\n", - " ) -> List[str]:\n", + " ) -> list[str]:\n", " \"\"\"\n", " Expose model spec column names to scikit learn to allow column transforms later in the pipeline.\n", " \"\"\"\n", diff --git a/formulaic/formula.py b/formulaic/formula.py index b9e02b82..939ce58e 100644 --- a/formulaic/formula.py +++ b/formulaic/formula.py @@ -2,20 +2,12 @@ import sys from abc import ABCMeta, abstractmethod -from collections.abc import MutableSequence +from collections.abc import Generator, Iterable, Mapping, MutableSequence from enum import Enum from typing import ( Any, Callable, - Dict, - Generator, - Iterable, - List, - Mapping, Optional, - Set, - Tuple, - Type, TypeVar, Union, cast, @@ -38,10 +30,10 @@ FormulaSpec: TypeAlias = Union[ "Formula", str, - List[Union[str, Term]], - Set[Union[str, Term]], - Dict[str, "FormulaSpec"], - Tuple["FormulaSpec", ...], + list[Union[str, Term]], + set[Union[str, Term]], + dict[str, "FormulaSpec"], + tuple["FormulaSpec", ...], Structured["FormulaSpec"], ] _SelfType = TypeVar("_SelfType", bound="Formula") @@ -281,7 +273,7 @@ def get_model_matrix( self, data: Any, context: Optional[Mapping[str, Any]] = None, - drop_rows: Optional[Set[int]] = None, + drop_rows: Optional[set[int]] = None, **spec_overrides: Any, ) -> Union[ModelMatrix, Structured[ModelMatrix]]: """ @@ -302,7 +294,7 @@ def get_model_matrix( @property @abstractmethod - def required_variables(self) -> Set[Variable]: + def required_variables(self) -> set[Variable]: """ The set of variables required to be in the data to materialize this formula. @@ -504,7 +496,7 @@ def get_model_matrix( self, data: Any, context: Optional[Mapping[str, Any]] = None, - drop_rows: Optional[Set[int]] = None, + drop_rows: Optional[set[int]] = None, **spec_overrides: Any, ) -> Union[ModelMatrix, Structured[ModelMatrix]]: """ @@ -529,7 +521,7 @@ def get_model_matrix( ) @property - def required_variables(self) -> Set[Variable]: + def required_variables(self) -> set[Variable]: """ The set of variables required in the data order to materialize this formula. @@ -542,7 +534,7 @@ def required_variables(self) -> Set[Variable]: evaluation context rather than the data context. """ - variables: List[Variable] = [ + variables: list[Variable] = [ variable for term in self.__terms for factor in term.factors @@ -605,11 +597,11 @@ def _map( self, func: Union[ Callable[[SimpleFormula], Any], - Callable[[SimpleFormula, Tuple[Union[str, int], ...]], Any], + Callable[[SimpleFormula, tuple[Union[str, int], ...]], Any], ], recurse: bool = True, - as_type: Optional[Type[Structured]] = None, - _context: Tuple[Union[str, int], ...] = (), + as_type: Optional[type[Structured]] = None, + _context: tuple[Union[str, int], ...] = (), ) -> Any: try: return func(self, ()) # type: ignore @@ -629,7 +621,7 @@ def _flatten(self) -> Generator[SimpleFormula, None, None]: as_of=(1, 1), removed_in=(2, 0), ) - def _to_dict(self) -> Dict[str, SimpleFormula]: + def _to_dict(self) -> dict[str, SimpleFormula]: return {"root": self} @deprecated( @@ -726,7 +718,7 @@ def get_model_matrix( self, data: Any, context: Optional[Mapping[str, Any]] = None, - drop_rows: Optional[Set[int]] = None, + drop_rows: Optional[set[int]] = None, **spec_overrides: Any, ) -> Union[ModelMatrix, Structured[ModelMatrix]]: """ @@ -751,7 +743,7 @@ def get_model_matrix( ) @property - def required_variables(self) -> Set[Variable]: + def required_variables(self) -> set[Variable]: """ The set of variables required in the data order to materialize this formula. @@ -764,7 +756,7 @@ def required_variables(self) -> Set[Variable]: evaluation context rather than the data context. """ - variables: List[Variable] = [] + variables: list[Variable] = [] # Recurse through formula to collect all variables self._map( @@ -797,7 +789,7 @@ def differentiate( # pylint: disable=redefined-builtin ) # Ensure pickling never includes context - def __getstate__(self) -> Tuple[None, Dict[str, Any]]: + def __getstate__(self) -> tuple[None, dict[str, Any]]: slots = self.__slots__ + Structured.__slots__ return ( None, diff --git a/formulaic/materializers/arrow.py b/formulaic/materializers/arrow.py index 5c94a9da..fcddf7f3 100644 --- a/formulaic/materializers/arrow.py +++ b/formulaic/materializers/arrow.py @@ -1,7 +1,7 @@ from __future__ import annotations -from collections.abc import Mapping -from typing import TYPE_CHECKING, Any, Dict, Iterator, Sequence +from collections.abc import Iterator, Mapping, Sequence +from typing import TYPE_CHECKING, Any import pandas from interface_meta import override @@ -30,7 +30,7 @@ class LazyArrowTableProxy(Mapping): def __init__(self, table: pyarrow.Table): self.table = table self.column_names = set(self.table.column_names) - self._cache: Dict[str, pandas.Series] = {} + self._cache: dict[str, pandas.Series] = {} self.index = pandas.RangeIndex(len(table)) def __contains__(self, value: Any) -> Any: diff --git a/formulaic/materializers/base.py b/formulaic/materializers/base.py index b6a45aa9..a2d599c3 100644 --- a/formulaic/materializers/base.py +++ b/formulaic/materializers/base.py @@ -7,20 +7,11 @@ import operator from abc import abstractmethod from collections import defaultdict, namedtuple +from collections.abc import Generator, Hashable, Iterable, Mapping, Sequence from typing import ( TYPE_CHECKING, Any, - Dict, - Generator, - Hashable, - Iterable, - List, - Mapping, Optional, - Sequence, - Set, - Tuple, - Type, Union, cast, ) @@ -58,8 +49,8 @@ class FormulaMaterializerMeta(InterfaceMeta): INTERFACE_RAISE_ON_VIOLATION = True - REGISTERED_NAMES: Dict[str, Type[FormulaMaterializer]] = {} - REGISTERED_INPUTS: Dict[str, List[Type[FormulaMaterializer]]] = defaultdict(list) + REGISTERED_NAMES: dict[str, type[FormulaMaterializer]] = {} + REGISTERED_INPUTS: dict[str, list[type[FormulaMaterializer]]] = defaultdict(list) def __register_implementation__(cls) -> None: if "REGISTER_NAME" in cls.__dict__ and cls.REGISTER_NAME: @@ -74,8 +65,8 @@ def __register_implementation__(cls) -> None: ) def for_materializer( - cls, materializer: Union[str, FormulaMaterializer, Type[FormulaMaterializer]] - ) -> Type[FormulaMaterializer]: + cls, materializer: Union[str, FormulaMaterializer, type[FormulaMaterializer]] + ) -> type[FormulaMaterializer]: if isinstance(materializer, str): if materializer not in cls.REGISTERED_NAMES: raise FormulaMaterializerNotFoundError(materializer) @@ -90,7 +81,7 @@ def for_materializer( ) return materializer - def for_data(cls, data: Any, output: Hashable = None) -> Type[FormulaMaterializer]: + def for_data(cls, data: Any, output: Hashable = None) -> type[FormulaMaterializer]: datacls = data.__class__ input_type = f"{datacls.__module__}.{datacls.__qualname__}" @@ -106,7 +97,7 @@ def for_data(cls, data: Any, output: Hashable = None) -> Type[FormulaMaterialize if output in materializer.REGISTER_OUTPUTS: return materializer - output_types: Set[Hashable] = set( + output_types: set[Hashable] = set( *itertools.chain( materializer.REGISTER_OUTPUTS for materializer in cls.REGISTERED_INPUTS[input_type] @@ -140,8 +131,8 @@ def __init__( LayeredMapping(TRANSFORMS, name="transforms"), ) - self.factor_cache: Dict[str, EvaluatedFactor] = {} - self.encoded_cache: Dict[Union[str, Tuple[str, bool]], Any] = {} + self.factor_cache: dict[str, EvaluatedFactor] = {} + self.encoded_cache: dict[Union[str, tuple[str, bool]], Any] = {} def _init(self) -> None: pass # pragma: no cover @@ -157,7 +148,7 @@ def nrows(self) -> int: def get_model_matrix( self, spec: Union[FormulaSpec, ModelMatrix, ModelMatrices, ModelSpec, ModelSpecs], - drop_rows: Optional[Set[int]] = None, + drop_rows: Optional[set[int]] = None, **spec_overrides: Any, ) -> Union[ModelMatrix, ModelMatrices]: from formulaic import ModelSpec @@ -178,7 +169,7 @@ def get_model_matrix( # Step 1: Evaluate all factors and cache the results, keeping track of # which rows need dropping (if `self.config.na_action == 'drop'`). - drop_rows: Set[int] = drop_rows if drop_rows is not None else set() + drop_rows: set[int] = drop_rows if drop_rows is not None else set() for factor in factors: self._evaluate_factor(factor, factor_evaluation_model_spec, drop_rows) drop_rows: Sequence[int] = sorted(drop_rows) @@ -220,7 +211,7 @@ def _build_model_matrix( # (reusing pre-generated structure if it is available) if spec.structure: scoped_terms_for_terms: Generator[ - Tuple[Term, Iterable[ScopedTerm]], None, None + tuple[Term, Iterable[ScopedTerm]], None, None ] = ( (s.term, [st.rehydrate(self.factor_cache) for st in s.scoped_terms]) for s in spec.structure @@ -297,7 +288,7 @@ def _prepare_model_specs(self, spec: Union[ModelSpec, ModelSpecs]) -> ModelSpecs spec = ModelSpecs(spec) def prepare_model_spec(model_spec: ModelSpec) -> ModelSpec: - overrides: Dict[str, Any] = { + overrides: dict[str, Any] = { "materializer": self.REGISTER_NAME, "materializer_params": self.params, } @@ -315,13 +306,13 @@ def prepare_model_spec(model_spec: ModelSpec) -> ModelSpec: def _prepare_factor_evaluation_model_spec( self, model_specs: ModelSpecs - ) -> Tuple[Set[Factor], ModelSpec]: + ) -> tuple[set[Factor], ModelSpec]: from formulaic.model_spec import ModelSpec output = set() na_action = set() ensure_full_rank = set() - factors: Set[Factor] = set() + factors: set[Factor] = set() transform_state = {} def update_pooled_spec(model_spec: ModelSpec) -> None: @@ -376,7 +367,7 @@ def _cluster_terms( def _get_scoped_terms( self, terms: Iterable[Term], ensure_full_rank: bool = True - ) -> Generator[Tuple[Term, Iterable[ScopedTerm]], None, None]: + ) -> Generator[tuple[Term, Iterable[ScopedTerm]], None, None]: """ Generate the terms to be used in the model matrix. @@ -395,7 +386,7 @@ def _get_scoped_terms( Returns: list: A list of appropriately scoped terms. """ - spanned: Set[ScopedTerm] = set() + spanned: set[ScopedTerm] = set() for term in terms: evaled_factors = [ @@ -454,7 +445,7 @@ def _get_scoped_terms_spanned_by_evaled_factors( The scoped terms for the nominated `evaled_factors`. """ scale = 1 - factors: List[Tuple[Union[ScopedFactor, int], ...]] = [] + factors: list[tuple[Union[ScopedFactor, int], ...]] = [] for factor in evaled_factors: if factor.metadata.kind is Factor.Kind.CONSTANT: scale *= factor.values @@ -525,7 +516,7 @@ def _simplify_scoped_terms( # Methods related to looking-up, evaluating and encoding terms and factors def _evaluate_factor( - self, factor: Factor, spec: ModelSpec, drop_rows: Set[int] + self, factor: Factor, spec: ModelSpec, drop_rows: set[int] ) -> EvaluatedFactor: if factor.expr not in self.factor_cache: try: @@ -595,7 +586,7 @@ def _evaluate_factor( ) return self.factor_cache[factor.expr] - def _lookup(self, name: str) -> Tuple[Any, Set[Variable]]: + def _lookup(self, name: str) -> tuple[Any, set[Variable]]: sentinel = object() values, layer = self.layered_context.get_with_layer_name(name, default=sentinel) if values is sentinel: @@ -606,8 +597,8 @@ def _lookup(self, name: str) -> Tuple[Any, Set[Variable]]: def _evaluate( self, expr: str, metadata: Any, spec: ModelSpec - ) -> Tuple[Any, Set[Variable]]: - variables: Set[Variable] = set() + ) -> tuple[Any, set[Variable]]: + variables: set[Variable] = set() return ( stateful_eval( expr, @@ -626,7 +617,7 @@ def _is_categorical(self, values: Any) -> bool: return False def _check_for_nulls( - self, name: str, values: Any, na_action: NAAction, drop_rows: Set[int] + self, name: str, values: Any, na_action: NAAction, drop_rows: set[int] ) -> None: pass # pragma: no cover @@ -636,7 +627,7 @@ def _encode_evaled_factor( spec: ModelSpec, drop_rows: Sequence[int], reduced_rank: bool = False, - ) -> Dict[str, Any]: + ) -> dict[str, Any]: if not factor.metadata.encoded: if factor.expr in self.encoded_cache: encoded = self.encoded_cache[factor.expr] @@ -656,7 +647,7 @@ def map_dict(f: Any) -> Any: def wrapped( values: Any, metadata: Any, - state: Dict[str, Any], + state: dict[str, Any], *args: Any, **kwargs: Any, ) -> Any: @@ -681,7 +672,7 @@ def wrapped( return wrapped - encoder_state: Dict[str, Any] = spec.encoder_state.get( + encoder_state: dict[str, Any] = spec.encoder_state.get( factor.expr, [None, {}] )[1] @@ -736,7 +727,7 @@ def wrapped( # Only encode once for encodings where we can just drop a field # later on below. - cache_key: Union[str, Tuple[str, bool]] = ( + cache_key: Union[str, tuple[str, bool]] = ( factor.expr if isinstance(encoded, dict) and factor.metadata.drop_field else (factor.expr, reduced_rank) @@ -770,7 +761,7 @@ def wrapped( def _extract_columns_for_encoding( self, factor: EvaluatedFactor - ) -> Union[Any, Dict[str, Any]]: + ) -> Union[Any, dict[str, Any]]: """ If incoming factor has values that need to be unpacked into columns (e.g. a two-dimensions numpy array), do that expansion here. Otherwise, @@ -780,7 +771,7 @@ def _extract_columns_for_encoding( def _flatten_encoded_evaled_factor( self, name: str, values: FactorValues[dict] - ) -> Dict[str, Any]: + ) -> dict[str, Any]: if not isinstance(values, dict): return {name: values} @@ -808,7 +799,7 @@ def _encode_constant( self, value: Any, metadata: Any, - encoder_state: Dict[str, Any], + encoder_state: dict[str, Any], spec: ModelSpec, drop_rows: Sequence[int], ) -> Any: @@ -819,7 +810,7 @@ def _encode_categorical( self, values: Any, metadata: Any, - encoder_state: Dict[str, Any], + encoder_state: dict[str, Any], spec: ModelSpec, drop_rows: Sequence[int], reduced_rank: bool = False, @@ -831,7 +822,7 @@ def _encode_numerical( self, values: Any, metadata: Any, - encoder_state: Dict[str, Any], + encoder_state: dict[str, Any], spec: ModelSpec, drop_rows: Sequence[int], ) -> Any: @@ -841,12 +832,12 @@ def _encode_numerical( def _enforce_structure( self, - cols: List[Tuple[Term, Iterable[ScopedTerm], Dict[str, Any]]], + cols: list[tuple[Term, Iterable[ScopedTerm], dict[str, Any]]], spec: ModelSpec, drop_rows: Sequence[int], - ) -> Generator[Tuple[Term, Iterable[ScopedTerm], Dict[str, Any]], None, None]: + ) -> Generator[tuple[Term, Iterable[ScopedTerm], dict[str, Any]], None, None]: # TODO: Verify that imputation strategies are intuitive and make sense. - structure = cast(List[EncodedTermStructure], spec.structure) + structure = cast(list[EncodedTermStructure], spec.structure) if not len(cols) == len(structure): # pragma: no cover raise RuntimeError( "Specification structure and columns are mismatched. Please report this error with examples!" @@ -880,8 +871,8 @@ def _enforce_structure( ) def _get_columns_for_term( - self, factors: List[Dict[str, Any]], spec: ModelSpec, scale: float = 1 - ) -> Dict[str, Any]: + self, factors: list[dict[str, Any]], spec: ModelSpec, scale: float = 1 + ) -> dict[str, Any]: """ Assemble the columns for a model matrix given factors and a scale. @@ -908,6 +899,6 @@ def _get_columns_for_term( @abstractmethod def _combine_columns( - self, cols: Sequence[Tuple[str, Any]], spec: ModelSpec, drop_rows: Sequence[int] + self, cols: Sequence[tuple[str, Any]], spec: ModelSpec, drop_rows: Sequence[int] ) -> Any: pass # pragma: no cover diff --git a/formulaic/materializers/pandas.py b/formulaic/materializers/pandas.py index ee263376..e950c160 100644 --- a/formulaic/materializers/pandas.py +++ b/formulaic/materializers/pandas.py @@ -2,8 +2,8 @@ import functools import itertools -from collections.abc import Mapping -from typing import TYPE_CHECKING, Any, Dict, List, Sequence, Set, Tuple, cast +from collections.abc import Mapping, Sequence +from typing import TYPE_CHECKING, Any, cast import numpy import pandas @@ -51,7 +51,7 @@ def _is_categorical(self, values: Any) -> bool: @override def _check_for_nulls( - self, name: str, values: Any, na_action: NAAction, drop_rows: Set[int] + self, name: str, values: Any, na_action: NAAction, drop_rows: set[int] ) -> None: if na_action is NAAction.IGNORE: return @@ -80,7 +80,7 @@ def _encode_constant( self, value: Any, metadata: Any, - encoder_state: Dict[str, Any], + encoder_state: dict[str, Any], spec: ModelSpec, drop_rows: Sequence[int], ) -> Any: @@ -95,7 +95,7 @@ def _encode_numerical( self, values: Any, metadata: Any, - encoder_state: Dict[str, Any], + encoder_state: dict[str, Any], spec: ModelSpec, drop_rows: Sequence[int], ) -> Any: @@ -112,7 +112,7 @@ def _encode_categorical( self, values: Any, metadata: Any, - encoder_state: Dict[str, Any], + encoder_state: dict[str, Any], spec: ModelSpec, drop_rows: Sequence[int], reduced_rank: bool = False, @@ -136,8 +136,8 @@ def _encode_categorical( @override def _get_columns_for_term( - self, factors: List[Dict[str, Any]], spec: ModelSpec, scale: float = 1 - ) -> Dict[str, Any]: + self, factors: list[dict[str, Any]], spec: ModelSpec, scale: float = 1 + ) -> dict[str, Any]: out = {} names = [ @@ -190,7 +190,7 @@ def _get_columns_for_term( @override def _combine_columns( - self, cols: Sequence[Tuple[str, Any]], spec: ModelSpec, drop_rows: Sequence[int] + self, cols: Sequence[tuple[str, Any]], spec: ModelSpec, drop_rows: Sequence[int] ) -> pandas.DataFrame: # If we are outputing a pandas DataFrame, explicitly override index # in case transforms/etc have lost track of it. diff --git a/formulaic/materializers/types/evaluated_factor.py b/formulaic/materializers/types/evaluated_factor.py index 5d222d1e..b94c1d23 100644 --- a/formulaic/materializers/types/evaluated_factor.py +++ b/formulaic/materializers/types/evaluated_factor.py @@ -1,7 +1,7 @@ from __future__ import annotations from dataclasses import dataclass, replace -from typing import Any, Optional, Set +from typing import Any, Optional from formulaic.parser.types import Factor from formulaic.utils.variables import Variable @@ -27,7 +27,7 @@ class EvaluatedFactor: factor: Factor values: FactorValues[Any] - variables: Optional[Set[Variable]] = None + variables: Optional[set[Variable]] = None @property def expr(self) -> str: diff --git a/formulaic/materializers/types/factor_values.py b/formulaic/materializers/types/factor_values.py index 4c1c2894..6d687018 100644 --- a/formulaic/materializers/types/factor_values.py +++ b/formulaic/materializers/types/factor_values.py @@ -1,17 +1,14 @@ from __future__ import annotations import copy +from collections.abc import Hashable from dataclasses import dataclass, replace from typing import ( TYPE_CHECKING, Any, Callable, - Dict, Generic, - Hashable, - List, Optional, - Tuple, TypeVar, Union, ) @@ -65,11 +62,11 @@ class FactorValuesMetadata: """ kind: Factor.Kind = Factor.Kind.UNKNOWN - column_names: Optional[Tuple[str]] = None + column_names: Optional[tuple[str]] = None format: str = "{name}[{field}]" encoded: bool = False encoder: Optional[ - Callable[[Any, bool, List[int], Dict[str, Any], ModelSpec], Any] + Callable[[Any, bool, list[int], dict[str, Any], ModelSpec], Any] ] = None # Rank-Reduction Attributes @@ -106,12 +103,12 @@ def __init__( metadata: Union[FactorValuesMetadata, MissingType] = MISSING, *, kind: Union[str, Factor.Kind, MissingType] = MISSING, - column_names: Union[Tuple[Hashable, ...], MissingType] = MISSING, + column_names: Union[tuple[Hashable, ...], MissingType] = MISSING, format: Union[str, MissingType] = MISSING, # pylint: disable=redefined-builtin encoded: Union[bool, MissingType] = MISSING, encoder: Union[ None, - Callable[[Any, bool, List[int], Dict[str, Any], ModelSpec], Any], + Callable[[Any, bool, list[int], dict[str, Any], ModelSpec], Any], MissingType, ] = MISSING, spans_intercept: Union[bool, MissingType] = MISSING, @@ -168,8 +165,8 @@ def __deepcopy__(self, memo: Any = None) -> FactorValues[T]: def __reduce_ex__( self, protocol: SupportsIndex - ) -> Tuple[ + ) -> tuple[ Callable[[Any, Union[FactorValuesMetadata, MissingType]], FactorValues], - Tuple[Any, Union[FactorValuesMetadata, MissingType]], + tuple[Any, Union[FactorValuesMetadata, MissingType]], ]: return FactorValues, (self.__wrapped__, self._self_metadata) diff --git a/formulaic/materializers/types/scoped_term.py b/formulaic/materializers/types/scoped_term.py index 1912b0e5..83836635 100644 --- a/formulaic/materializers/types/scoped_term.py +++ b/formulaic/materializers/types/scoped_term.py @@ -1,6 +1,7 @@ from __future__ import annotations -from typing import Any, Iterable, Mapping, Set +from collections.abc import Iterable, Mapping +from typing import Any from formulaic.materializers.types.evaluated_factor import EvaluatedFactor from formulaic.utils.variables import Variable @@ -87,7 +88,7 @@ def rehydrate(self, factor_values: Mapping[str, EvaluatedFactor]) -> ScopedTerm: ) @property - def variables(self) -> Set[Variable]: + def variables(self) -> set[Variable]: return Variable.union( *( factor.factor.variables diff --git a/formulaic/model_matrix.py b/formulaic/model_matrix.py index e563a578..b203375d 100644 --- a/formulaic/model_matrix.py +++ b/formulaic/model_matrix.py @@ -8,7 +8,6 @@ Generic, Optional, SupportsIndex, - Tuple, TypeVar, cast, ) @@ -69,8 +68,8 @@ def __deepcopy__(self, memo: Any = None) -> ModelMatrix[MatrixType]: def __reduce_ex__( self, protocol: SupportsIndex - ) -> Tuple[ - Callable[[Any, ModelSpec], ModelMatrix], Tuple[Any, Optional[ModelSpec]] + ) -> tuple[ + Callable[[Any, ModelSpec], ModelMatrix], tuple[Any, Optional[ModelSpec]] ]: return ModelMatrix, (self.__wrapped__, self._self_model_spec) diff --git a/formulaic/model_spec.py b/formulaic/model_spec.py index 66ce16a3..b370d765 100644 --- a/formulaic/model_spec.py +++ b/formulaic/model_spec.py @@ -1,17 +1,12 @@ from __future__ import annotations from collections import defaultdict +from collections.abc import Mapping, Sequence from dataclasses import dataclass, field, replace from typing import ( TYPE_CHECKING, Any, - Dict, - List, - Mapping, Optional, - Sequence, - Set, - Tuple, Union, cast, ) @@ -116,16 +111,16 @@ def prepare_model_spec(obj: Any) -> Union[ModelSpec, ModelSpecs]: # Configuration attributes formula: SimpleFormula materializer: Optional[str] = None - materializer_params: Optional[Dict[str, Any]] = None + materializer_params: Optional[dict[str, Any]] = None ensure_full_rank: bool = True na_action: NAAction = NAAction.DROP output: Optional[str] = None cluster_by: ClusterBy = ClusterBy.NONE # State attributes - structure: Optional[List[EncodedTermStructure]] = None - transform_state: Dict = field(default_factory=dict) - encoder_state: Dict = field(default_factory=dict) + structure: Optional[list[EncodedTermStructure]] = None + transform_state: dict = field(default_factory=dict) + encoder_state: dict = field(default_factory=dict) def __post_init__(self) -> None: self.__dict__["formula"] = SimpleFormula.from_spec(self.formula) @@ -148,7 +143,7 @@ def __post_init__(self) -> None: # Derived features @property - def __structure(self) -> List[EncodedTermStructure]: + def __structure(self) -> list[EncodedTermStructure]: """ A reference to `.structure` if it is populated, or otherwise an exception is raised. @@ -169,14 +164,14 @@ def column_names(self) -> Sequence[str]: return tuple(feature for row in self.__structure for feature in row.columns) @cached_property - def column_indices(self) -> Dict[str, int]: + def column_indices(self) -> dict[str, int]: """ An ordered mapping from column names to the column index in generated model matrices. """ return {name: i for i, name in enumerate(self.column_names)} - def get_column_indices(self, columns: Union[str, Sequence[str]]) -> List[int]: + def get_column_indices(self, columns: Union[str, Sequence[str]]) -> list[int]: """ Generate a list of column indices corresponding to the nominated column names. This is useful when you want to slice a model matrix by specific @@ -190,7 +185,7 @@ def get_column_indices(self, columns: Union[str, Sequence[str]]) -> List[int]: return [self.column_indices[column] for column in columns] @property - def terms(self) -> List[Term]: + def terms(self) -> list[Term]: """ The terms used to generate model matrices from this `ModelSpec` instance. @@ -198,7 +193,7 @@ def terms(self) -> List[Term]: return list(self.formula) @cached_property - def term_indices(self) -> Dict[Term, List[int]]: + def term_indices(self) -> dict[Term, list[int]]: """ An ordered mapping of `Term` instances to the generated column indices. @@ -216,7 +211,7 @@ def term_indices(self) -> Dict[Term, List[int]]: def get_term_indices( self, terms_spec: FormulaSpec, **formula_kwargs: Any - ) -> List[int]: + ) -> list[int]: """ Generate a list of column indices corresponding to the columns associated with the nominated `term_spec`. @@ -240,13 +235,13 @@ def get_term_indices( formula_kwargs: Additional keyword arguments to pass to the `Formula.from_spec` constructor to control (e.g.) ordering. """ - terms: List[Term] = list( + terms: list[Term] = list( self.__get_restricted_formula(terms_spec, **formula_kwargs) ) return [idx for term in terms for idx in self.term_indices[term]] @cached_property - def term_slices(self) -> Dict[Term, slice]: + def term_slices(self) -> dict[Term, slice]: """ An ordered mapping of `Term` instances to a slice that when used on the columns of the model matrix will subsample the model matrix down to @@ -262,19 +257,19 @@ def term_slices(self) -> Dict[Term, slice]: } @cached_property - def term_factors(self) -> Dict[Term, Set[Factor]]: + def term_factors(self) -> dict[Term, set[Factor]]: """ A mapping from `Term` instances to the factors which were used to generate them. """ - term_factors: Dict[Term, Set[Factor]] = defaultdict(set) + term_factors: dict[Term, set[Factor]] = defaultdict(set) for term in self.terms: for factor in term.factors: term_factors[term].add(factor) return dict(term_factors) @cached_property - def term_variables(self) -> Dict[Term, Set[Variable]]: + def term_variables(self) -> dict[Term, set[Variable]]: """ An ordered mapping of `Term` instances to the set of `Variable` instances corresponding to the variables used in the evaluation of that @@ -289,7 +284,7 @@ def term_variables(self) -> Dict[Term, Set[Variable]]: return term_variables @cached_property - def factors(self) -> Set[Factor]: + def factors(self) -> set[Factor]: """ The factors used to generate model matrices from this `ModelSpec` instance. @@ -297,24 +292,24 @@ def factors(self) -> Set[Factor]: return {factor for term in self.terms for factor in term.factors} @cached_property - def factor_terms(self) -> Dict[Factor, Set[Term]]: + def factor_terms(self) -> dict[Factor, set[Term]]: """ A mapping from `Factor` instances to the terms which used it. This is the reverse mapping of `.term_factors`. """ - factor_terms: Dict[Factor, Set[Term]] = defaultdict(set) + factor_terms: dict[Factor, set[Term]] = defaultdict(set) for term, factors in self.term_factors.items(): for factor in factors: factor_terms[factor].add(term) return dict(factor_terms) @cached_property - def factor_variables(self) -> Dict[Factor, Set[Variable]]: + def factor_variables(self) -> dict[Factor, set[Variable]]: """ A mapping from `Factor` instances to the variables used in the evaluation of that factor. """ - factor_variables: Dict[Factor, List[Variable]] = defaultdict(list) + factor_variables: dict[Factor, list[Variable]] = defaultdict(list) for s in self.__structure: for scoped_term in s.scoped_terms: for scoped_factor in scoped_term.factors: @@ -328,7 +323,7 @@ def factor_variables(self) -> Dict[Factor, Set[Variable]]: } @cached_property - def factor_contrasts(self) -> Dict[Factor, ContrastsState]: + def factor_contrasts(self) -> dict[Factor, ContrastsState]: """ A mapping of `Factor` instances to their contrasts state. This is useful if you would like to introspect some of the coding choices, or reuse @@ -358,7 +353,7 @@ def factor_contrasts(self) -> Dict[Factor, ContrastsState]: } @cached_property - def variables(self) -> Set[Variable]: + def variables(self) -> set[Variable]: """ The variables used during the materialization of the entire formula. """ @@ -367,19 +362,19 @@ def variables(self) -> Set[Variable]: ) @cached_property - def variable_terms(self) -> Dict[Variable, Set[Term]]: + def variable_terms(self) -> dict[Variable, set[Term]]: """ A mapping from `Variable` instances to the terms which used it. This is the reverse mapping of `.term_variables`. """ - variable_terms: Dict[Variable, Set[Term]] = defaultdict(set) + variable_terms: dict[Variable, set[Term]] = defaultdict(set) for term, variables in self.term_variables.items(): for variable in variables: variable_terms[variable].add(term) return dict(variable_terms) @cached_property - def variable_indices(self) -> Dict[Variable, List[int]]: + def variable_indices(self) -> dict[Variable, list[int]]: """ A mapping from `Variable` instances to the indices in the model matrix where they were used. @@ -393,7 +388,7 @@ def variable_indices(self) -> Dict[Variable, List[int]]: def get_variable_indices( self, variables: Sequence[Union[str, Variable]] - ) -> List[int]: + ) -> list[int]: """ Generate a list of column indices corresponding to the columns associated with the nominated variables. This is useful when you want to slice a model @@ -410,19 +405,19 @@ def get_variable_indices( ] @cached_property - def variables_by_source(self) -> Dict[Optional[str], Set[Variable]]: + def variables_by_source(self) -> dict[Optional[str], set[Variable]]: """ A mapping of source name to the set of variables drawn from that source. Formulaic, by default, has three top-level sources of variables: 'data', 'transforms', and 'context'. """ - variables_by_source: Dict[Optional[str], Set[Variable]] = defaultdict(set) + variables_by_source: dict[Optional[str], set[Variable]] = defaultdict(set) for variable in self.variables: variables_by_source[variable.source].add(variable) return dict(variables_by_source) @property - def required_variables(self) -> Set[Variable]: + def required_variables(self) -> set[Variable]: """ The set of variables required to be in the data to materialize this model specification. @@ -500,7 +495,7 @@ def get_model_matrix( self, data: Any, context: Optional[Mapping[str, Any]] = None, - drop_rows: Optional[Set[int]] = None, + drop_rows: Optional[set[int]] = None, **attr_overrides: Any, ) -> ModelMatrix: """ @@ -585,8 +580,8 @@ def subset(self, terms_spec: FormulaSpec, **formula_kwargs: Any) -> ModelSpec: formula: SimpleFormula = self.__get_restricted_formula( terms_spec, **formula_kwargs ) - terms: List[Term] = list(formula) - terms_set: Set[Term] = set(terms) + terms: list[Term] = list(formula) + terms_set: set[Term] = set(terms) term_structure = {s.term: s for s in self.__structure if s.term in terms_set} return self.update( @@ -615,7 +610,7 @@ def differentiate(self, *wrt: str, use_sympy: bool = False) -> ModelSpec: ) # Only include dataclass fields when pickling. - def __getstate__(self) -> Dict[str, Any]: + def __getstate__(self) -> dict[str, Any]: return { k: v for k, v in self.__dict__.items() if k in self.__dataclass_fields__ } @@ -643,7 +638,7 @@ def __get_restricted_formula( "Cannot subset a `ModelSpec` using a formula that has structure." ) - missing_terms: Set[Term] = set(formula).difference(self.terms) + missing_terms: set[Term] = set(formula).difference(self.terms) if missing_terms: raise ValueError( f"Cannot subset a model spec with terms not present in the original model spec: {missing_terms}." @@ -669,12 +664,12 @@ def _prepare_item(self, key: str, item: Any) -> Any: return item @property - def required_variables(self) -> Set[Variable]: + def required_variables(self) -> set[Variable]: """ The set of variables required to be in the data to materialize all of the model specifications in this `ModelSpecs` instance. """ - variables: Set[Variable] = set() + variables: set[Variable] = set() self._map(lambda ms: variables.update(ms.required_variables)) return variables @@ -682,7 +677,7 @@ def get_model_matrix( self, data: Any, context: Optional[Mapping[str, Any]] = None, - drop_rows: Optional[Set[int]] = None, + drop_rows: Optional[set[int]] = None, **attr_overrides: Any, ) -> ModelMatrices: """ @@ -768,7 +763,7 @@ def subset(self, terms_spec: FormulaSpec) -> ModelSpecs: """ def map_formula_structure_onto_model_spec( - formula: SimpleFormula, context: Tuple[Union[int, str], ...] + formula: SimpleFormula, context: tuple[Union[int, str], ...] ) -> ModelSpec: try: return self[context].subset(formula) diff --git a/formulaic/parser/algos/sanitize_tokens.py b/formulaic/parser/algos/sanitize_tokens.py index 13336150..94ec457e 100644 --- a/formulaic/parser/algos/sanitize_tokens.py +++ b/formulaic/parser/algos/sanitize_tokens.py @@ -1,4 +1,4 @@ -from typing import Dict, Iterable +from collections.abc import Iterable from formulaic.utils.code import format_expr, sanitize_variable_names @@ -27,7 +27,7 @@ def sanitize_python_code(expr: str) -> str: Ensure than python code is consistently formatted, and that quoted portions (by backticks) are properly handled. """ - aliases: Dict[str, str] = {} + aliases: dict[str, str] = {} expr = format_expr( sanitize_variable_names(expr, {}, aliases, template="_formulaic_{}") ) diff --git a/formulaic/parser/algos/tokenize.py b/formulaic/parser/algos/tokenize.py index 1d08526d..fbd4f75d 100644 --- a/formulaic/parser/algos/tokenize.py +++ b/formulaic/parser/algos/tokenize.py @@ -1,5 +1,6 @@ import re -from typing import Iterable, List, Pattern +from collections.abc import Iterable +from re import Pattern from ..types import Token from ..utils import exc_for_token @@ -53,7 +54,7 @@ def tokenize( A generator over the tokens found in the formula string. """ - quote_context: List[str] = [] + quote_context: list[str] = [] take = 0 token = Token(source=formula) diff --git a/formulaic/parser/algos/tokens_to_ast.py b/formulaic/parser/algos/tokens_to_ast.py index 6631fb00..23c430a9 100644 --- a/formulaic/parser/algos/tokens_to_ast.py +++ b/formulaic/parser/algos/tokens_to_ast.py @@ -1,5 +1,6 @@ from collections import namedtuple -from typing import Iterable, List, Set, Union +from collections.abc import Iterable +from typing import Union from ..types import ASTNode, Operator, OperatorResolver, Token from ..utils import exc_for_missing_operator, exc_for_token @@ -40,16 +41,16 @@ def tokens_to_ast( Returns: The generated abstract syntax tree as a nested `ASTNode` instance. """ - output_queue: List[Union[Token, ASTNode]] = [] - operator_stack: List[OrderedOperator] = [] - disabled_operators: Set[Token] = set() + output_queue: list[Union[Token, ASTNode]] = [] + operator_stack: list[OrderedOperator] = [] + disabled_operators: set[Token] = set() def stack_operator(operator: Union[Token, Operator], token: Token) -> None: operator_stack.append(OrderedOperator(operator, token, len(output_queue))) def operate( - ordered_operator: OrderedOperator, output_queue: List[Union[Token, ASTNode]] - ) -> List[Union[Token, ASTNode]]: + ordered_operator: OrderedOperator, output_queue: list[Union[Token, ASTNode]] + ) -> list[Union[Token, ASTNode]]: operator, token, index = ordered_operator if operator.fixity is Operator.Fixity.INFIX: diff --git a/formulaic/parser/parser.py b/formulaic/parser/parser.py index 945779cb..c87dd4c8 100644 --- a/formulaic/parser/parser.py +++ b/formulaic/parser/parser.py @@ -4,17 +4,11 @@ import functools import itertools import re +from collections.abc import Generator, Iterable, Mapping, MutableMapping from dataclasses import dataclass, field from enum import Flag, auto from typing import ( Any, - Generator, - Iterable, - List, - Mapping, - MutableMapping, - Set, - Tuple, Union, cast, ) @@ -82,7 +76,7 @@ class FeatureFlags(Flag): @classmethod def from_spec( - cls, flags: Union[DefaultFormulaParser.FeatureFlags, Set[str]] + cls, flags: Union[DefaultFormulaParser.FeatureFlags, set[str]] ) -> DefaultFormulaParser.FeatureFlags: if isinstance(flags, DefaultFormulaParser.FeatureFlags): return flags @@ -109,7 +103,7 @@ def __post_init__(self) -> None: self.operator_resolver.set_feature_flags(self.feature_flags) def set_feature_flags( - self, flags: DefaultFormulaParser.FeatureFlags | Set[str] + self, flags: DefaultFormulaParser.FeatureFlags | set[str] ) -> Self: self.feature_flags = DefaultFormulaParser.FeatureFlags.from_spec(flags) self.__post_init__() @@ -159,7 +153,7 @@ def get_tokens_from_formula( ) ) - def find_rhs_index(tokens: List[Token]) -> int: + def find_rhs_index(tokens: list[Token]) -> int: """ Find the top-level index of the tilde operator starting the right hand side of the formula (or -1 if not found). @@ -310,7 +304,7 @@ def __post_init__(self) -> None: ) def set_feature_flags( - self, flags: DefaultFormulaParser.FeatureFlags | Set[str] + self, flags: DefaultFormulaParser.FeatureFlags | set[str] ) -> Self: self.feature_flags = DefaultFormulaParser.FeatureFlags.from_spec(flags) if "operator_table" in self.__dict__: @@ -318,10 +312,10 @@ def set_feature_flags( return self @property - def operators(self) -> List[Operator]: + def operators(self) -> list[Operator]: def formula_part_expansion( lhs: OrderedSet[Term], rhs: OrderedSet[Term] - ) -> Tuple[OrderedSet[Term], ...]: + ) -> tuple[OrderedSet[Term], ...]: terms = (lhs, rhs) out = [] @@ -360,7 +354,7 @@ def power(arg: OrderedSet[Term], power: OrderedSet[Term]) -> OrderedSet[Term]: def multistage_formula( lhs: OrderedSet[Term], rhs: OrderedSet[Term] ) -> Structured[OrderedSet[Term]]: - def get_terms(terms: OrderedSet[Term]) -> List[Term]: + def get_terms(terms: OrderedSet[Term]) -> list[Term]: return [ Term( factors=[Factor(str(t) + "_hat", eval_method="lookup")], @@ -378,7 +372,7 @@ def get_terms(terms: OrderedSet[Term]) -> List[Term]: def insert_unused_terms(context: Mapping[str, Any]) -> OrderedSet[Term]: available_variables: OrderedSet[str] - used_variables: Set[str] = set(context["__formulaic_variables_used_lhs__"]) + used_variables: set[str] = set(context["__formulaic_variables_used_lhs__"]) # Populate `available_variables` or raise. if "__formulaic_variables_available__" in context: @@ -540,7 +534,7 @@ def insert_unused_terms(context: Mapping[str, Any]) -> OrderedSet[Term]: def resolve( self, token: Token, - ) -> Generator[Tuple[Token, Iterable[Operator]], None, None]: + ) -> Generator[tuple[Token, Iterable[Operator]], None, None]: if token.token in self.operator_table: yield from super().resolve(token) return diff --git a/formulaic/parser/types/ast_node.py b/formulaic/parser/types/ast_node.py index e4f11a06..6299a0c0 100644 --- a/formulaic/parser/types/ast_node.py +++ b/formulaic/parser/types/ast_node.py @@ -2,15 +2,11 @@ import functools import graphlib +from collections.abc import Iterable, Mapping from typing import ( Any, - Dict, Generic, - Iterable, - List, - Mapping, Optional, - Tuple, TypeVar, Union, ) @@ -44,7 +40,7 @@ def __init__(self, operator: Operator, args: Iterable[Any]): def to_terms( self, *, context: Optional[Mapping[str, Any]] = None - ) -> Union[OrderedSet[Term], Structured[OrderedSet[Term]], Tuple]: + ) -> Union[OrderedSet[Term], Structured[OrderedSet[Term]], tuple]: """ Evaluate this AST node and return the resulting set of `Term` instances. @@ -59,7 +55,7 @@ def to_terms( g = graphlib.TopologicalSorter(self.__generate_evaluation_graph()) g.prepare() - results: Dict[ASTNode, Any] = {} + results: dict[ASTNode, Any] = {} while g.is_active(): for node in g.get_ready(): @@ -86,7 +82,7 @@ def __repr__(self) -> str: except RecursionError: return f"" - def flatten(self, str_args: bool = False) -> List[Any]: + def flatten(self, str_args: bool = False) -> list[Any]: """ Flatten this `ASTNode` instance into a list of form: [, *]. @@ -111,7 +107,7 @@ def flatten(self, str_args: bool = False) -> List[Any]: # Helpers - def __generate_evaluation_graph(self) -> Dict[ASTNode, List[ASTNode]]: + def __generate_evaluation_graph(self) -> dict[ASTNode, list[ASTNode]]: nodes_to_parse = [self] graph = {} while nodes_to_parse: diff --git a/formulaic/parser/types/factor.py b/formulaic/parser/types/factor.py index dc4ed4a4..1a5e619a 100644 --- a/formulaic/parser/types/factor.py +++ b/formulaic/parser/types/factor.py @@ -1,7 +1,8 @@ from __future__ import annotations +from collections.abc import Mapping from enum import Enum -from typing import TYPE_CHECKING, Any, Dict, Mapping, Optional, Union +from typing import TYPE_CHECKING, Any, Optional, Union from .ordered_set import OrderedSet from .term import Term @@ -51,7 +52,7 @@ def __init__( *, eval_method: Optional[Union[str, EvalMethod]] = None, kind: Optional[Union[str, Kind]] = None, - metadata: Optional[Dict] = None, + metadata: Optional[dict] = None, token: Optional[Token] = None, ): self.expr = expr diff --git a/formulaic/parser/types/formula_parser.py b/formulaic/parser/types/formula_parser.py index c38221ac..2197672b 100644 --- a/formulaic/parser/types/formula_parser.py +++ b/formulaic/parser/types/formula_parser.py @@ -1,15 +1,12 @@ from __future__ import annotations +from collections.abc import Iterable, Mapping, MutableMapping from dataclasses import dataclass from enum import IntEnum from typing import ( Any, - Iterable, Literal, - Mapping, - MutableMapping, Optional, - Tuple, Union, overload, ) @@ -180,7 +177,7 @@ def get_terms_from_ast( return Structured([]) terms: Union[ - OrderedSet[Term], Tuple[OrderedSet[Term]], Structured[OrderedSet[Term]] + OrderedSet[Term], tuple[OrderedSet[Term]], Structured[OrderedSet[Term]] ] = ast.to_terms(context=context) if not isinstance(terms, Structured): terms = Structured[OrderedSet[Term]](terms) diff --git a/formulaic/parser/types/operator.py b/formulaic/parser/types/operator.py index 844bc4c0..9a465f66 100644 --- a/formulaic/parser/types/operator.py +++ b/formulaic/parser/types/operator.py @@ -1,8 +1,9 @@ from __future__ import annotations import inspect +from collections.abc import Mapping from enum import Enum -from typing import Any, Callable, List, Mapping, Optional, Union +from typing import Any, Callable, Optional, Union from .token import Token @@ -61,7 +62,7 @@ def __init__( fixity: Union[str, Fixity] = Fixity.INFIX, to_terms: Optional[Callable[..., Any]] = None, accepts_context: Optional[ - Callable[[List[Union[Token, Operator]]], bool] + Callable[[list[Union[Token, Operator]]], bool] ] = None, structural: bool = False, disabled: bool = False, @@ -99,7 +100,7 @@ def to_terms(self, *args: Any, context: Optional[Mapping[str, Any]] = None) -> A return self._to_terms(*args, context=context or {}) return self._to_terms(*args) - def accepts_context(self, context: List[Union[Token, Operator]]) -> bool: + def accepts_context(self, context: list[Union[Token, Operator]]) -> bool: if self._accepts_context: # We only need to pass on tokens and operators with precedence less # than or equal to ourselves, since all other operators will be diff --git a/formulaic/parser/types/operator_resolver.py b/formulaic/parser/types/operator_resolver.py index 787ce616..9e3a09c5 100644 --- a/formulaic/parser/types/operator_resolver.py +++ b/formulaic/parser/types/operator_resolver.py @@ -1,7 +1,7 @@ import abc from collections import defaultdict +from collections.abc import Generator, Iterable from functools import cached_property -from typing import Dict, Generator, Iterable, List, Tuple from ..utils import exc_for_token from .operator import Operator @@ -28,14 +28,14 @@ class OperatorResolver(metaclass=abc.ABCMeta): @property @abc.abstractmethod - def operators(self) -> List[Operator]: + def operators(self) -> list[Operator]: """ The `Operator` instance pool which can be matched to tokens by `.resolve()`. """ @cached_property - def operator_table(self) -> Dict[str, List[Operator]]: + def operator_table(self) -> dict[str, list[Operator]]: operator_table = defaultdict(list) for operator in self.operators: operator_table[operator.symbol].append(operator) @@ -49,7 +49,7 @@ def operator_table(self) -> Dict[str, List[Operator]]: def resolve( self, token: Token - ) -> Generator[Tuple[Token, Iterable[Operator]], None, None]: + ) -> Generator[tuple[Token, Iterable[Operator]], None, None]: """ Generate the sets of operator candidates that may be viable for the given token (which may include multiple adjacent operators concatenated @@ -73,7 +73,7 @@ def _resolve( self, token: Token, symbol: str, - ) -> Tuple[Token, Iterable[Operator]]: + ) -> tuple[Token, Iterable[Operator]]: """ The default operator resolving logic. """ @@ -82,5 +82,5 @@ def _resolve( return token, self.operator_table[symbol] # The operator table cache may not be pickleable, so let's drop it. - def __getstate__(self) -> Dict: + def __getstate__(self) -> dict: return {} diff --git a/formulaic/parser/types/ordered_set.py b/formulaic/parser/types/ordered_set.py index f961e80f..fe246f12 100644 --- a/formulaic/parser/types/ordered_set.py +++ b/formulaic/parser/types/ordered_set.py @@ -1,7 +1,7 @@ from __future__ import annotations -from collections.abc import Set -from typing import Any, Generic, Iterable, Iterator, TypeVar +from collections.abc import Iterable, Iterator, Set +from typing import Any, Generic, TypeVar ItemType = TypeVar("ItemType") diff --git a/formulaic/parser/types/term.py b/formulaic/parser/types/term.py index a409c699..08eed272 100644 --- a/formulaic/parser/types/term.py +++ b/formulaic/parser/types/term.py @@ -1,7 +1,8 @@ from __future__ import annotations import re -from typing import TYPE_CHECKING, Any, Iterable, Mapping, Optional +from collections.abc import Iterable, Mapping +from typing import TYPE_CHECKING, Any, Optional from .ordered_set import OrderedSet @@ -24,7 +25,7 @@ class Term: FACTOR_MATCHER = re.compile(r"(?:^|(?<=:))(`?)(?P[^`]+?)\1(?=:|$)") - def __init__(self, factors: Iterable["Factor"], origin: Optional[Term] = None): + def __init__(self, factors: Iterable[Factor], origin: Optional[Term] = None): self.factors = tuple(dict.fromkeys(factors)) self.origin = origin self._factor_key = tuple(factor.expr for factor in sorted(self.factors)) diff --git a/formulaic/parser/types/token.py b/formulaic/parser/types/token.py index 331d0e5a..4afd6a97 100644 --- a/formulaic/parser/types/token.py +++ b/formulaic/parser/types/token.py @@ -2,8 +2,9 @@ import copy import re +from collections.abc import Iterable, Mapping from enum import Enum -from typing import Any, Iterable, Mapping, Optional, Set, Tuple, Union +from typing import Any, Optional, Union from formulaic.utils.variables import Variable, get_expression_variables @@ -76,7 +77,7 @@ def kind(self, kind: Optional[Union[str, Kind]]) -> None: def update( self, char: str, source_index: int, kind: Union[None, str, Kind] = None - ) -> "Token": + ) -> Token: """ Add a character to the token string, keeping track of the source indices. @@ -117,7 +118,7 @@ def __lt__(self, other: Any) -> bool: return NotImplemented @property - def source_loc(self) -> Tuple[Optional[int], Optional[int]]: + def source_loc(self) -> tuple[Optional[int], Optional[int]]: """ The indices of the first and last character represented by this token in the source string. @@ -182,7 +183,7 @@ def get_source_context(self, colorize: bool = False) -> Optional[str]: return f"{self.source[:self.source_start]}⧛{self.source[self.source_start:self.source_end+1]}⧚{self.source[self.source_end+1:]}" @property - def required_variables(self) -> Set[Variable]: + def required_variables(self) -> set[Variable]: """ The set of variables required to evaluate this token. @@ -255,7 +256,7 @@ def split( last_index = 0 separators = pattern.finditer(self.token) - def get_next_token(next_index: int) -> Tuple[int, Token]: + def get_next_token(next_index: int) -> tuple[int, Token]: return next_index, self.copy_with_attrs( token=self.token[last_index:next_index] ) diff --git a/formulaic/parser/utils.py b/formulaic/parser/utils.py index e000454a..60ae0cb6 100644 --- a/formulaic/parser/utils.py +++ b/formulaic/parser/utils.py @@ -1,5 +1,6 @@ import re -from typing import Iterable, Optional, Sequence, Set, Tuple, Type, Union +from collections.abc import Iterable, Sequence +from typing import Optional, Union from formulaic.errors import FormulaSyntaxError @@ -12,7 +13,7 @@ def exc_for_token( token: Union[Token, ASTNode], message: str, - errcls: Type[Exception] = FormulaSyntaxError, + errcls: type[Exception] = FormulaSyntaxError, ) -> Exception: """ Return an exception ready to be raised with a helpful token/source context. @@ -33,7 +34,7 @@ def exc_for_token( def exc_for_missing_operator( lhs: Union[Token, ASTNode], rhs: Union[Token, ASTNode], - errcls: Type[Exception] = FormulaSyntaxError, + errcls: type[Exception] = FormulaSyntaxError, extra: Optional[str] = None, ) -> Exception: """ @@ -84,7 +85,7 @@ def __get_token_for_ast(ast: Union[Token, ASTNode]) -> Token: # pragma: no cove def __get_tokens_for_gap( lhs: Union[Token, ASTNode], rhs: Union[Token, ASTNode] -) -> Tuple[Token, Token, Token]: +) -> tuple[Token, Token, Token]: """ Ensure that incoming `lhs` and `rhs` objects are `Token`s, or else generate some for debugging purposes (note that these tokens will not be valid @@ -166,7 +167,7 @@ def insert_tokens_after( *, kind: Optional[Token.Kind] = None, join_operator: Optional[str] = None, - no_join_for_operators: Union[bool, Set[str]] = True, + no_join_for_operators: Union[bool, set[str]] = True, ) -> Iterable[Token]: """ Insert additional tokens into a sequence of tokens after (within token) @@ -232,7 +233,7 @@ def insert_tokens_after( def merge_operator_tokens( - tokens: Iterable[Token], symbols: Optional[Set[str]] = None + tokens: Iterable[Token], symbols: Optional[set[str]] = None ) -> Iterable[Token]: """ Merge operator tokens within a sequence of tokens. diff --git a/formulaic/sugar.py b/formulaic/sugar.py index 09201434..9440df32 100644 --- a/formulaic/sugar.py +++ b/formulaic/sugar.py @@ -1,4 +1,5 @@ -from typing import Any, Mapping, Optional, Set, Union +from collections.abc import Mapping +from typing import Any, Optional, Union from .formula import FormulaSpec from .model_matrix import ModelMatrices, ModelMatrix @@ -11,7 +12,7 @@ def model_matrix( data: Any, *, context: Union[int, Mapping[str, Any]] = 0, - drop_rows: Optional[Set[int]] = None, + drop_rows: Optional[set[int]] = None, **spec_overrides: Any, ) -> Union[ModelMatrix, ModelMatrices]: """ diff --git a/formulaic/transforms/basis_spline.py b/formulaic/transforms/basis_spline.py index d9d37e6a..9e22b595 100644 --- a/formulaic/transforms/basis_spline.py +++ b/formulaic/transforms/basis_spline.py @@ -1,6 +1,7 @@ from collections import defaultdict +from collections.abc import Iterable from enum import Enum -from typing import Dict, Iterable, List, Optional, Union, cast +from typing import Optional, Union, cast import numpy import pandas @@ -157,7 +158,7 @@ def basis_spline( # pylint: disable=dangerous-default-value # always replaced f"{upper_bound}), no data points are available for knot selection." ) knots = cast( - List[float], + list[float], numpy.nanquantile(knots_x, numpy.linspace(0, 1, nknots + 2))[ 1:-1 ].tolist(), @@ -172,7 +173,7 @@ def basis_spline( # pylint: disable=dangerous-default-value # always replaced # The following code is equivalent to [B(i, j=degree) for in range(len(knots)-d-1)], with B(i, j) as defined below. # B = lambda i, j: ((x >= knots[i]) & (x < knots[i+1])).astype(float) if j == 0 else alpha(i, j, x) * B(i, j-1, x) + (1 - alpha(i+1, j, x)) * B(i+1, j-1, x) # We don't directly use this recurrence relation so that we can memoise the B(i, j). - cache: Dict[int, Dict[int, float]] = defaultdict(dict) + cache: dict[int, dict[int, float]] = defaultdict(dict) alpha = ( lambda i, j: (x - knots[i]) / (knots[i + j] - knots[i]) if knots[i + j] != knots[i] diff --git a/formulaic/transforms/contrasts.py b/formulaic/transforms/contrasts.py index ffa3bb51..d057e83f 100644 --- a/formulaic/transforms/contrasts.py +++ b/formulaic/transforms/contrasts.py @@ -3,18 +3,13 @@ import inspect import warnings from abc import abstractmethod +from collections.abc import Hashable, Iterable, Sequence from dataclasses import dataclass from numbers import Number from typing import ( TYPE_CHECKING, Any, - Dict, - Hashable, - Iterable, - List, Optional, - Sequence, - Tuple, Union, cast, ) @@ -40,7 +35,7 @@ def C( data: Any, contrasts: Optional[ - Union[Contrasts, Dict[str, Iterable[Number]], numpy.ndarray] + Union[Contrasts, dict[str, Iterable[Number]], numpy.ndarray] ] = None, *, levels: Optional[Iterable[str]] = None, @@ -70,8 +65,8 @@ def C( def encoder( values: Any, reduced_rank: bool, - drop_rows: List[int], - encoder_state: Dict[str, Any], + drop_rows: list[int], + encoder_state: dict[str, Any], model_spec: ModelSpec, ) -> FactorValues: values = pandas.Series(values) @@ -98,7 +93,7 @@ def encode_contrasts( # pylint: disable=dangerous-default-value # always repla data: Any, contrasts: Union[ Contrasts, - Dict[Hashable, Sequence[float]], + dict[Hashable, Sequence[float]], Sequence[Sequence[float]], numpy.ndarray, None, @@ -107,7 +102,7 @@ def encode_contrasts( # pylint: disable=dangerous-default-value # always repla levels: Optional[Iterable[str]] = None, reduced_rank: bool = False, output: Optional[str] = None, - _state: Dict[str, Any] = {}, + _state: dict[str, Any] = {}, _spec: Optional[ModelSpec] = None, ) -> FactorValues[Union[pandas.DataFrame, spsparse.spmatrix]]: """ @@ -143,7 +138,7 @@ def encode_contrasts( # pylint: disable=dangerous-default-value # always repla contrasts = CustomContrasts( cast( Union[ - Dict[Hashable, Sequence[float]], + dict[Hashable, Sequence[float]], Sequence[Sequence[float]], numpy.ndarray, ], @@ -257,7 +252,7 @@ def apply( return FactorValues( encoded, kind="categorical", - column_names=cast(Tuple[Hashable], ()), + column_names=cast(tuple[Hashable], ()), spans_intercept=False, format=self.get_factor_format(levels, reduced_rank=reduced_rank), format_reduced=self.get_factor_format(levels, reduced_rank=True), @@ -846,7 +841,7 @@ class CustomContrasts(Contrasts): def __init__( self, contrasts: Union[ - Dict[Hashable, Sequence[float]], Sequence[Sequence[float]], numpy.ndarray + dict[Hashable, Sequence[float]], Sequence[Sequence[float]], numpy.ndarray ], names: Optional[Sequence[Hashable]] = None, ): diff --git a/formulaic/transforms/cubic_spline.py b/formulaic/transforms/cubic_spline.py index b3e28194..476d7c5e 100644 --- a/formulaic/transforms/cubic_spline.py +++ b/formulaic/transforms/cubic_spline.py @@ -32,8 +32,9 @@ from __future__ import annotations +from collections.abc import Iterable from functools import partial -from typing import Iterable, Literal, cast +from typing import Literal, cast import numpy import pandas diff --git a/formulaic/transforms/hashed.py b/formulaic/transforms/hashed.py index c1aec07d..5c35eba7 100644 --- a/formulaic/transforms/hashed.py +++ b/formulaic/transforms/hashed.py @@ -1,9 +1,9 @@ from __future__ import annotations -import sys +from collections.abc import Iterable from hashlib import md5 from numbers import Number -from typing import TYPE_CHECKING, Any, Callable, Dict, Iterable, List, Optional, Union +from typing import TYPE_CHECKING, Any, Callable, Optional, Union import numpy as np @@ -16,10 +16,7 @@ def md5_to_int(s: str) -> int: # pragma: no cover; branched code - if sys.version_info >= (3, 9): - hashed = md5(s.encode(), usedforsecurity=False) - else: - hashed = md5(s.encode()) # noqa: S324 ; use of insecure hash function + hashed = md5(s.encode(), usedforsecurity=False) # noqa: S324 ; use of insecure hash function return int(hashed.hexdigest(), 16) @@ -27,7 +24,7 @@ def hashed( data: Any, levels: int, contrasts: Optional[ - Union[Contrasts, Dict[str, Iterable[Number]], np.ndarray] + Union[Contrasts, dict[str, Iterable[Number]], np.ndarray] ] = None, *, hash_func: Callable[[str], int] = md5_to_int, @@ -56,8 +53,8 @@ def hashed( def encoder( values: Any, reduced_rank: bool, - drop_rows: List[int], - encoder_state: Dict[str, Any], + drop_rows: list[int], + encoder_state: dict[str, Any], model_spec: ModelSpec, ) -> FactorValues: values = np.array(values) diff --git a/formulaic/transforms/patsy_compat.py b/formulaic/transforms/patsy_compat.py index e5bb5191..ce8b7174 100644 --- a/formulaic/transforms/patsy_compat.py +++ b/formulaic/transforms/patsy_compat.py @@ -1,4 +1,5 @@ -from typing import Any, Dict, Mapping, Optional +from collections.abc import Mapping +from typing import Any, Optional from formulaic.utils.stateful_transforms import stateful_transform @@ -19,7 +20,7 @@ def standardize( center: bool = True, rescale: bool = True, ddof: int = 0, - _state: Optional[Dict[str, Any]] = None, + _state: Optional[dict[str, Any]] = None, ) -> Any: return scale(x, center=center, scale=rescale, ddof=ddof, _state=_state) diff --git a/formulaic/utils/calculus.py b/formulaic/utils/calculus.py index 8f7e1a57..b245c2d3 100644 --- a/formulaic/utils/calculus.py +++ b/formulaic/utils/calculus.py @@ -1,4 +1,5 @@ -from typing import Iterable, Set, cast +from collections.abc import Iterable +from typing import cast from formulaic.parser.types import Factor, Term from formulaic.parser.types.ordered_set import OrderedSet @@ -48,7 +49,7 @@ def differentiate_term( return Term(factors or {Factor("1", eval_method="literal")}) -def _factor_symbols(factor: Factor, use_sympy: bool = False) -> Set[str]: +def _factor_symbols(factor: Factor, use_sympy: bool = False) -> set[str]: """ Extract the symbols represented in a factor. @@ -73,8 +74,8 @@ def _factor_symbols(factor: Factor, use_sympy: bool = False) -> Set[str]: def _differentiate_factors( - factors: Set[Factor], var: str, use_sympy: bool = False -) -> Set[Factor]: + factors: set[Factor], var: str, use_sympy: bool = False +) -> set[Factor]: """ Differentiate the nominated `factors` by `var`. diff --git a/formulaic/utils/cast.py b/formulaic/utils/cast.py index c56f247e..709482ef 100644 --- a/formulaic/utils/cast.py +++ b/formulaic/utils/cast.py @@ -1,5 +1,6 @@ +from collections.abc import Hashable from functools import singledispatch, wraps -from typing import Any, Callable, Dict, Hashable, Union +from typing import Any, Callable, Union import numpy import pandas @@ -34,13 +35,13 @@ def as_columns(data: Any) -> Any: @as_columns.register @propagate_metadata -def _(data: pandas.DataFrame) -> Dict[Hashable, pandas.Series]: +def _(data: pandas.DataFrame) -> dict[Hashable, pandas.Series]: return dict(data.items()) @as_columns.register @propagate_metadata -def _(data: numpy.ndarray) -> Union[numpy.ndarray, Dict[Hashable, numpy.ndarray]]: +def _(data: numpy.ndarray) -> Union[numpy.ndarray, dict[Hashable, numpy.ndarray]]: if len(data.shape) == 1: return data if len(data.shape) > 2: @@ -60,7 +61,7 @@ def _(data: numpy.ndarray) -> Union[numpy.ndarray, Dict[Hashable, numpy.ndarray] @as_columns.register @propagate_metadata -def _(data: scipy.sparse.csc_matrix) -> Dict[Hashable, scipy.sparse.spmatrix]: +def _(data: scipy.sparse.csc_matrix) -> dict[Hashable, scipy.sparse.spmatrix]: if ( hasattr(data, "__formulaic_metadata__") and data.__formulaic_metadata__.column_names diff --git a/formulaic/utils/code.py b/formulaic/utils/code.py index 5df55581..6b4f2294 100644 --- a/formulaic/utils/code.py +++ b/formulaic/utils/code.py @@ -1,7 +1,8 @@ import ast import keyword import re -from typing import MutableMapping, Union +from collections.abc import MutableMapping +from typing import Union import numpy diff --git a/formulaic/utils/constraints.py b/formulaic/utils/constraints.py index f00f1d48..7766ef0b 100644 --- a/formulaic/utils/constraints.py +++ b/formulaic/utils/constraints.py @@ -3,18 +3,12 @@ import ast import functools import itertools +from collections.abc import Iterable, Mapping, Sequence from numbers import Number from typing import ( Any, - Dict, - Iterable, - List, Literal, - Mapping, Optional, - Sequence, - Set, - Tuple, Union, cast, ) @@ -34,9 +28,9 @@ LinearConstraintSpec = Union[ str, - List[str], - Dict[str, Number], - Tuple["numpy.typing.ArrayLike", "numpy.typing.ArrayLike"], + list[str], + dict[str, Number], + tuple["numpy.typing.ArrayLike", "numpy.typing.ArrayLike"], "numpy.typing.ArrayLike", ] @@ -120,8 +114,8 @@ def from_spec( def __init__( self, - constraint_matrix: "numpy.typing.ArrayLike", - constraint_values: "numpy.typing.ArrayLike", + constraint_matrix: numpy.typing.ArrayLike, + constraint_values: numpy.typing.ArrayLike, variable_names: Optional[Sequence[str]] = None, ): """ @@ -246,7 +240,7 @@ def get_ast(self, formula: str) -> Optional[ASTNode]: def get_terms( self, formula: str - ) -> Union[None, List[ScaledFactor], Tuple[List[ScaledFactor], ...]]: + ) -> Union[None, list[ScaledFactor], tuple[list[ScaledFactor], ...]]: """ Build the `ScaledFactor` instances for a constraint formula string. @@ -257,13 +251,13 @@ def get_terms( if not ast: return None return cast( - Union[None, List[ScaledFactor], Tuple[List[ScaledFactor], ...]], + Union[None, list[ScaledFactor], tuple[list[ScaledFactor], ...]], ast.to_terms(), ) def get_matrix( self, formula: str - ) -> Tuple["numpy.typing.ArrayLike", "numpy.typing.ArrayLike"]: + ) -> tuple[numpy.typing.ArrayLike, numpy.typing.ArrayLike]: """ Build the constraint matrix and constraint values vector associated with the parsed string. @@ -323,7 +317,7 @@ def for_token(cls, token: Token) -> ConstraintToken: def to_terms( # type: ignore[override] self, *, context: Optional[Mapping[str, Any]] = None - ) -> Set[ScaledFactor]: + ) -> set[ScaledFactor]: if self.kind is Token.Kind.VALUE: factor = ast.literal_eval(self.token) if isinstance(factor, (int, float)): @@ -384,8 +378,8 @@ class ConstraintOperatorResolver(OperatorResolver): # pylint: disable=unnecessa """ @property - def operators(self) -> List[Operator]: - def join_tuples(lhs: Any, rhs: Any) -> Tuple: + def operators(self) -> list[Operator]: + def join_tuples(lhs: Any, rhs: Any) -> tuple: if not isinstance(lhs, tuple): lhs = (lhs,) if not isinstance(rhs, tuple): @@ -393,8 +387,8 @@ def join_tuples(lhs: Any, rhs: Any) -> Tuple: return lhs + rhs def add_terms( - terms_left: Set[ScaledFactor], terms_right: Set[ScaledFactor] - ) -> Set[ScaledFactor]: + terms_left: set[ScaledFactor], terms_right: set[ScaledFactor] + ) -> set[ScaledFactor]: terms_left = {term: term for term in terms_left} terms_right = {term: term for term in terms_right} @@ -409,8 +403,8 @@ def add_terms( return added def sub_terms( - terms_left: Set[ScaledFactor], terms_right: Set[ScaledFactor] - ) -> Set[ScaledFactor]: + terms_left: set[ScaledFactor], terms_right: set[ScaledFactor] + ) -> set[ScaledFactor]: terms_left = {term: term for term in terms_left} terms_right = {term: term for term in terms_right} @@ -426,16 +420,16 @@ def sub_terms( return added - def negate_terms(terms: Set[ScaledFactor]) -> Set[ScaledFactor]: + def negate_terms(terms: set[ScaledFactor]) -> set[ScaledFactor]: return {-term for term in terms} def mul_terms( - terms_left: Set[ScaledFactor], terms_right: Set[ScaledFactor] - ) -> Set[ScaledFactor]: + terms_left: set[ScaledFactor], terms_right: set[ScaledFactor] + ) -> set[ScaledFactor]: terms_left = {term: term for term in terms_left} terms_right = {term: term for term in terms_right} - terms: Set[ScaledFactor] = set() + terms: set[ScaledFactor] = set() for term_left, term_right in itertools.product(terms_left, terms_right): terms = add_terms(terms, {mul_term(term_left, term_right)}) @@ -456,12 +450,12 @@ def mul_term(term_left: ScaledFactor, term_right: ScaledFactor) -> ScaledFactor: ) def div_terms( - terms_left: Set[ScaledFactor], terms_right: Set[ScaledFactor] - ) -> Set[ScaledFactor]: + terms_left: set[ScaledFactor], terms_right: set[ScaledFactor] + ) -> set[ScaledFactor]: terms_left = {term: term for term in terms_left} terms_right = {term: term for term in terms_right} - terms: Set[ScaledFactor] = set() + terms: set[ScaledFactor] = set() for term_left, term_right in itertools.product(terms_left, terms_right): terms = add_terms(terms, {div_term(term_left, term_right)}) diff --git a/formulaic/utils/context.py b/formulaic/utils/context.py index 27846650..4b2b9b60 100644 --- a/formulaic/utils/context.py +++ b/formulaic/utils/context.py @@ -1,5 +1,6 @@ import sys -from typing import Any, Mapping, Optional, Union +from collections.abc import Mapping +from typing import Any, Optional, Union from .layered_mapping import LayeredMapping diff --git a/formulaic/utils/deprecations.py b/formulaic/utils/deprecations.py index 9267c4d5..6b67c089 100644 --- a/formulaic/utils/deprecations.py +++ b/formulaic/utils/deprecations.py @@ -1,14 +1,14 @@ import functools import warnings -from typing import Callable, Optional, Tuple +from typing import Callable, Optional def deprecated( func: Optional[Callable] = None, *, message: Optional[str] = None, - as_of: Optional[Tuple[int, ...]] = None, - removed_in: Optional[Tuple[int, ...]] = None, + as_of: Optional[tuple[int, ...]] = None, + removed_in: Optional[tuple[int, ...]] = None, ) -> Callable: if func is None: return functools.partial( diff --git a/formulaic/utils/iterators.py b/formulaic/utils/iterators.py index 26ac54d2..4d7cb166 100644 --- a/formulaic/utils/iterators.py +++ b/formulaic/utils/iterators.py @@ -1,4 +1,5 @@ -from typing import Any, Iterable, Iterator, List +from collections.abc import Iterable, Iterator +from typing import Any from .sentinels import MISSING @@ -10,7 +11,7 @@ class peekable_iter: def __init__(self, it: Iterable): self._it = iter(it) - self._next: List[Any] = [] + self._next: list[Any] = [] def __iter__(self) -> Iterator: return self diff --git a/formulaic/utils/layered_mapping.py b/formulaic/utils/layered_mapping.py index 8ae473c1..49fbb598 100644 --- a/formulaic/utils/layered_mapping.py +++ b/formulaic/utils/layered_mapping.py @@ -1,9 +1,9 @@ from __future__ import annotations import itertools -from collections.abc import MutableMapping +from collections.abc import Iterable, Iterator, Mapping, MutableMapping from functools import cached_property -from typing import Any, Dict, Iterable, Iterator, List, Mapping, Optional, Tuple +from typing import Any, Optional class LayeredMapping(MutableMapping): @@ -24,11 +24,11 @@ def __init__(self, *layers: Optional[Mapping], name: Optional[str] = None): layers. """ self.name = name - self._mutations: Dict = {} - self._layers: List[Mapping] = self.__filter_layers(layers) + self._mutations: dict = {} + self._layers: list[Mapping] = self.__filter_layers(layers) @staticmethod - def __filter_layers(layers: Iterable[Optional[Mapping]]) -> List[Mapping]: + def __filter_layers(layers: Iterable[Optional[Mapping]]) -> list[Mapping]: """ Filter incoming `layers` down to those which are not null. """ @@ -100,7 +100,7 @@ def with_layers( # Named layer lookups and caching @cached_property - def named_layers(self) -> Dict[str, LayeredMapping]: + def named_layers(self) -> dict[str, LayeredMapping]: """ A mapping from string names to named `LayeredMapping` instances. If no children mappings are named, this will be an empty dictionary. If more @@ -119,8 +119,8 @@ def named_layers(self) -> Dict[str, LayeredMapping]: return named_layers def get_with_layer_name( - self, key: Any, default: Any = None, *, _path: Tuple[str, ...] = () - ) -> Tuple[Any, Optional[str]]: + self, key: Any, default: Any = None, *, _path: tuple[str, ...] = () + ) -> tuple[Any, Optional[str]]: """ Return the value for the nominated `key` (or `default` if `key` is not in this mapping); and the name of the layer from which the value is diff --git a/formulaic/utils/null_handling.py b/formulaic/utils/null_handling.py index 2c5e2b4e..1d68c773 100644 --- a/formulaic/utils/null_handling.py +++ b/formulaic/utils/null_handling.py @@ -1,5 +1,6 @@ +from collections.abc import Sequence from functools import singledispatch -from typing import Any, Sequence, Set, Union +from typing import Any, Union import numpy import pandas @@ -9,7 +10,7 @@ @singledispatch -def find_nulls(values: Any) -> Set[int]: +def find_nulls(values: Any) -> set[int]: """ Find the indices of rows in `values` that have null/nan values. @@ -22,27 +23,27 @@ def find_nulls(values: Any) -> Set[int]: @find_nulls.register -def _(values: None) -> Set[int]: +def _(values: None) -> set[int]: # Literal `None` values have special meaning and are checked elsewhere. return set() @find_nulls.register -def _(values: str) -> Set[int]: +def _(values: str) -> set[int]: return set() @find_nulls.register -def _(values: int) -> Set[int]: +def _(values: int) -> set[int]: return _drop_nulls_scalar(values) @find_nulls.register -def _(values: float) -> Set[int]: +def _(values: float) -> set[int]: return _drop_nulls_scalar(values) -def _drop_nulls_scalar(values: Union[int, float]) -> Set[int]: +def _drop_nulls_scalar(values: Union[int, float]) -> set[int]: if isinstance(values, FactorValues): values = values.__wrapped__ if numpy.isnan(values): @@ -51,7 +52,7 @@ def _drop_nulls_scalar(values: Union[int, float]) -> Set[int]: @find_nulls.register -def _(values: list) -> Set[int]: +def _(values: list) -> set[int]: if isinstance(values, FactorValues): # Older versions of pandas (<1.2) cannot unpack this automatically. values = values.__wrapped__ @@ -59,7 +60,7 @@ def _(values: list) -> Set[int]: @find_nulls.register -def _(values: dict) -> Set[int]: +def _(values: dict) -> set[int]: indices = set() for vs in values.values(): indices.update(find_nulls(vs)) @@ -67,12 +68,12 @@ def _(values: dict) -> Set[int]: @find_nulls.register -def _(values: pandas.Series) -> Set[int]: +def _(values: pandas.Series) -> set[int]: return set(numpy.flatnonzero(values.isnull().values)) @find_nulls.register -def _(values: numpy.ndarray) -> Set[int]: +def _(values: numpy.ndarray) -> set[int]: if len(values.shape) == 0: if numpy.isnan(values): raise ValueError("Constant value is null, invalidating all rows.") @@ -90,7 +91,7 @@ def _(values: numpy.ndarray) -> Set[int]: @find_nulls.register -def _(values: spsparse.spmatrix) -> Set[int]: +def _(values: spsparse.spmatrix) -> set[int]: rows, _, data = spsparse.find(values) null_data_indices = numpy.flatnonzero(numpy.isnan(data)) return set(rows[null_data_indices]) diff --git a/formulaic/utils/sparse.py b/formulaic/utils/sparse.py index bc5a52c2..20925c61 100644 --- a/formulaic/utils/sparse.py +++ b/formulaic/utils/sparse.py @@ -1,4 +1,5 @@ -from typing import Iterable, List, Optional, Tuple +from collections.abc import Iterable +from typing import Optional import numpy import pandas @@ -7,7 +8,7 @@ def categorical_encode_series_to_sparse_csc_matrix( series: Iterable, levels: Optional[Iterable[str]] = None, drop_first: bool = False -) -> Tuple[List, spsparse.csc_matrix]: +) -> tuple[list, spsparse.csc_matrix]: """ Categorically encode (via dummy encoding) a `series` as a sparse matrix. diff --git a/formulaic/utils/stateful_transforms.py b/formulaic/utils/stateful_transforms.py index 8128f743..bbbe5d29 100644 --- a/formulaic/utils/stateful_transforms.py +++ b/formulaic/utils/stateful_transforms.py @@ -1,15 +1,12 @@ import ast import functools import inspect +from collections.abc import Mapping, MutableMapping from typing import ( TYPE_CHECKING, Any, Callable, - Dict, - Mapping, - MutableMapping, Optional, - Set, cast, ) @@ -94,7 +91,7 @@ def stateful_eval( metadata: Optional[Mapping], state: Optional[MutableMapping], spec: Optional["ModelSpec"], - variables: Optional[Set[Variable]] = None, + variables: Optional[set[Variable]] = None, ) -> Any: """ Evaluate an expression in a nominated environment and with a nominated state. @@ -133,7 +130,7 @@ def stateful_eval( # Ensure that variable names in code are valid for Python's interpreter # If not, create new variable in mutable env layer, and update code. - aliases: Dict[str, str] = {} + aliases: dict[str, str] = {} expr = sanitize_variable_names(expr, env, aliases) # Parse Python code @@ -143,7 +140,7 @@ def stateful_eval( variables.update(get_expression_variables(code, env, aliases)) # Extract the nodes of the graph that correspond to stateful transforms - stateful_nodes: Dict[str, ast.Call] = {} + stateful_nodes: dict[str, ast.Call] = {} for node in ast.walk(code): if _is_stateful_transform(node, env): stateful_nodes[format_expr(node)] = cast(ast.Call, node) diff --git a/formulaic/utils/structured.py b/formulaic/utils/structured.py index 48122126..4c1f875e 100644 --- a/formulaic/utils/structured.py +++ b/formulaic/utils/structured.py @@ -3,17 +3,12 @@ import copy import itertools from collections import defaultdict +from collections.abc import Generator, Iterable from typing import ( Any, Callable, - Dict, - Generator, Generic, - Iterable, - List, Optional, - Tuple, - Type, TypeVar, Union, cast, @@ -92,7 +87,7 @@ def __init__( self, root: Any = MISSING, *, - _metadata: Optional[Dict[str, Any]] = None, + _metadata: Optional[dict[str, Any]] = None, **structure: Any, ): if any(key.startswith("_") for key in structure): @@ -150,11 +145,11 @@ def _map( self, func: Union[ Callable[[_ItemType], Any], - Callable[[_ItemType, Tuple[Union[str, int], ...]], Any], + Callable[[_ItemType, tuple[Union[str, int], ...]], Any], ], recurse: bool = True, - as_type: Optional[Type[Structured]] = None, - _context: Tuple[Union[str, int], ...] = (), + as_type: Optional[type[Structured]] = None, + _context: tuple[Union[str, int], ...] = (), ) -> Structured[Any]: """ Map a callable object onto all the structured objects, returning a @@ -178,7 +173,7 @@ def _map( but with all objects transformed under `func`. """ - def apply_func(obj: Any, context: Tuple[Union[str, int], ...]) -> Any: + def apply_func(obj: Any, context: tuple[Union[str, int], ...]) -> Any: if recurse and isinstance(obj, Structured): return obj._map(func, recurse=True, as_type=as_type, _context=context) if isinstance(obj, tuple): @@ -214,7 +209,7 @@ def _flatten(self) -> Generator[_ItemType, None, None]: else: yield value - def _to_dict(self, recurse: bool = True) -> Dict[Optional[str], Any]: + def _to_dict(self, recurse: bool = True) -> dict[Optional[str], Any]: """ Generate a dictionary representation of this structure. @@ -285,8 +280,8 @@ def _simplify( if recurse: def simplify_obj( - obj: Union[_ItemType, Tuple[_ItemType], Structured[_ItemType]], - ) -> Tuple[Union[_ItemType, Tuple[_ItemType], Structured[_ItemType]], bool]: + obj: Union[_ItemType, tuple[_ItemType], Structured[_ItemType]], + ) -> tuple[Union[_ItemType, tuple[_ItemType], Structured[_ItemType]], bool]: """ Return the simplified object, and a flag indicating whether the object was modified. @@ -342,8 +337,8 @@ def _merge( cls, *objects: Any, merger: Optional[Callable] = None, - _context: Tuple[str, ...] = (), - ) -> Union[_ItemType, Structured[_ItemType], Tuple]: + _context: tuple[str, ...] = (), + ) -> Union[_ItemType, Structured[_ItemType], tuple]: """ Merge arbitrarily many objects into a single `Structured` instance. @@ -407,7 +402,7 @@ def _merge( return cls( **cast( - Dict[str, Any], + dict[str, Any], { key: ( cls._merge(*values, merger=merger, _context=_context + (key,)) @@ -433,7 +428,7 @@ def __merger_default(*items: Any) -> Union[list, set, dict]: "Please specify `merger` explicitly." ) - def __dir__(self) -> List[str]: + def __dir__(self) -> list[str]: return [*super().__dir__(), *self._structure] def __getattr__(self, attr: str) -> Any: @@ -451,7 +446,7 @@ def __setattr__(self, attr: str, value: Any) -> None: return self._structure[attr] = self.__prepare_item(attr, value) - def __lookup_path(self, path: Tuple[Union[str, int], ...]) -> Any: + def __lookup_path(self, path: tuple[Union[str, int], ...]) -> Any: obj = self idx = 0 diff --git a/formulaic/utils/variables.py b/formulaic/utils/variables.py index 748b8bee..84f2a5e3 100644 --- a/formulaic/utils/variables.py +++ b/formulaic/utils/variables.py @@ -3,8 +3,9 @@ import ast from collections import deque +from collections.abc import Iterable, Mapping from enum import Enum -from typing import Dict, Iterable, List, Mapping, Optional, Set, Union +from typing import Optional, Union from formulaic.utils.layered_mapping import LayeredMapping @@ -14,7 +15,7 @@ class Role(str, Enum): VALUE = "value" CALLABLE = "callable" - roles: Set[Role] + roles: set[Role] source: Optional[str] def __new__( @@ -30,8 +31,8 @@ def __new__( return s @classmethod - def union(cls, *variable_sets: Iterable[Variable]) -> Set[Variable]: - variables: Dict[Variable, Variable] = {} + def union(cls, *variable_sets: Iterable[Variable]) -> set[Variable]: + variables: dict[Variable, Variable] = {} for variable_set in variable_sets: for variable in variable_set: if variable in variables: @@ -49,7 +50,7 @@ def get_expression_variables( expr: Union[str, ast.AST], context: Optional[Mapping] = None, aliases: Optional[Mapping] = None, -) -> Set[Variable]: +) -> set[Variable]: """ Extract the variables that are used in the nominated Python expression. @@ -73,8 +74,8 @@ def get_expression_variables( return set(variables) -def _get_ast_node_variables(node: ast.AST, aliases: Mapping) -> List[Variable]: - variables: List[Variable] = [] +def _get_ast_node_variables(node: ast.AST, aliases: Mapping) -> list[Variable]: + variables: list[Variable] = [] todo = deque([node]) while todo: diff --git a/pyproject.toml b/pyproject.toml index 509f79ed..ac7f49d0 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -129,8 +129,10 @@ select = [ "W", # pycodestyle "S", # bandit "I001", # import sorting + "UP", # syntax upgrades # "D", # documentation ] +pyupgrade = { keep-runtime-typing = true } ignore = [ "C901", diff --git a/tests/parser/test_parser.py b/tests/parser/test_parser.py index 38684b88..1972137b 100644 --- a/tests/parser/test_parser.py +++ b/tests/parser/test_parser.py @@ -1,7 +1,6 @@ import pickle import re from io import BytesIO -from typing import List from xml.etree.ElementInclude import include import pytest @@ -159,7 +158,7 @@ class TestFormulaParser: @pytest.mark.parametrize("formula,terms", FORMULA_TO_TERMS.items()) def test_to_terms(self, formula, terms): - generated_terms: Structured[List[Term]] = PARSER.get_terms( + generated_terms: Structured[list[Term]] = PARSER.get_terms( formula, context=PARSER_CONTEXT ) if generated_terms._has_keys: diff --git a/tests/parser/types/test_ordered_set.py b/tests/parser/types/test_ordered_set.py index 48adebb8..25d3900d 100644 --- a/tests/parser/types/test_ordered_set.py +++ b/tests/parser/types/test_ordered_set.py @@ -9,5 +9,5 @@ def test_ordered_set(): assert repr(OrderedSet(["z", "b", "c"])) == "{'z', 'b', 'c'}" assert OrderedSet(["z", "k"]) | ["a", "b"] == OrderedSet(["z", "k", "a", "b"]) - assert OrderedSet(("z", "k")) - ("z",) == OrderedSet(("k")) + assert OrderedSet(("z", "k")) - ("z",) == OrderedSet("k") assert ["b"] | OrderedSet("a") == OrderedSet("ba")