Skip to content

Commit

Permalink
chore: Implement PEP 563 deferred annotation resolution (#333)
Browse files Browse the repository at this point in the history
- Add `from __future__ import annotations` to defer annotation resolution and reduce unnecessary runtime computations during type checking
- Enable Ruff checks for PEP-compliant annotations:
  - [non-pep585-annotation (UP006)](https://docs.astral.sh/ruff/rules/non-pep585-annotation/)
  - [non-pep604-annotation (UP007)](https://docs.astral.sh/ruff/rules/non-pep604-annotation/)

For more details on PEP 563, see: https://peps.python.org/pep-0563/
  • Loading branch information
tony authored Jan 4, 2025
2 parents 0d0af36 + 090aa0a commit b44390a
Show file tree
Hide file tree
Showing 21 changed files with 178 additions and 109 deletions.
11 changes: 11 additions & 0 deletions CHANGES
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,17 @@ $ pipx install --suffix=@next unihan-etl --pip-args '\--pre' --force

<!-- Maintainers, insert changes / features for the next release here -->

### Development

#### chore: Implement PEP 563 deferred annotation resolution (#333)

- Add `from __future__ import annotations` to defer annotation resolution and reduce unnecessary runtime computations during type checking.
- Enable Ruff checks for PEP-compliant annotations:
- [non-pep585-annotation (UP006)](https://docs.astral.sh/ruff/rules/non-pep585-annotation/)
- [non-pep604-annotation (UP007)](https://docs.astral.sh/ruff/rules/non-pep604-annotation/)

For more details on PEP 563, see: https://peps.python.org/pep-0563/

## unihan-etl 0.37.0 (2024-12-21)

_Maintenance release: No bug fixes or new features._
Expand Down
6 changes: 5 additions & 1 deletion conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,8 @@
https://docs.pytest.org/en/stable/deprecations.html
"""

import pathlib
from __future__ import annotations

import shutil
import typing as t

Expand All @@ -17,6 +18,9 @@

from unihan_etl.pytest_plugin import USING_ZSH

if t.TYPE_CHECKING:
import pathlib

pytest_plugins = ["pytester"]


Expand Down
10 changes: 6 additions & 4 deletions docs/conf.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
# flake8: NOQA: E501
"""Sphinx configuration for unihan-etl."""

from __future__ import annotations

import inspect
import pathlib
import sys
Expand Down Expand Up @@ -76,7 +78,7 @@
html_favicon = "_static/favicon.ico"
html_theme = "furo"
html_theme_path: list[str] = []
html_theme_options: dict[str, t.Union[str, list[dict[str, str]]]] = {
html_theme_options: dict[str, str | list[dict[str, str]]] = {
"light_logo": "img/cihai.svg",
"dark_logo": "img/cihai.svg",
"footer_icons": [
Expand Down Expand Up @@ -153,7 +155,7 @@
}


def linkcode_resolve(domain: str, info: dict[str, str]) -> t.Union[None, str]:
def linkcode_resolve(domain: str, info: dict[str, str]) -> None | str:
"""
Determine the URL corresponding to Python object.
Expand Down Expand Up @@ -369,13 +371,13 @@ class TsvLexer(CsvLexer):
lexers["tsv"] = TsvLexer()


def remove_tabs_js(app: "Sphinx", exc: Exception) -> None:
def remove_tabs_js(app: Sphinx, exc: Exception) -> None:
"""Remove tabs.js from _static after build."""
# Fix for sphinx-inline-tabs#18
if app.builder.format == "html" and not exc:
tabs_js = pathlib.Path(app.builder.outdir) / "_static" / "tabs.js"
tabs_js.unlink(missing_ok=True)


def setup(app: "Sphinx") -> None:
def setup(app: Sphinx) -> None:
"""Configure Sphinx app hooks."""
10 changes: 10 additions & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -179,6 +179,7 @@ exclude_lines = [
"if TYPE_CHECKING:",
"if t.TYPE_CHECKING:",
"@overload( |$)",
"from __future__ import annotations",
]

[tool.ruff]
Expand All @@ -202,16 +203,25 @@ select = [
"PERF", # Perflint
"RUF", # Ruff-specific rules
"D", # pydocstyle
"FA100", # future annotations
]
ignore = [
"COM812", # missing trailing comma, ruff format conflict
"SIM115", # Use a context manager for opening files
"UP031", # Use format specifiers instead of percent format
]
extend-safe-fixes = [
"UP006",
"UP007",
]
pyupgrade.keep-runtime-typing = false

[tool.ruff.lint.isort]
known-first-party = ["unihan_etl", "cihai"]
combine-as-imports = true
required-imports = [
"from __future__ import annotations",
]

[tool.ruff.lint.pydocstyle]
convention = "numpy"
Expand Down
2 changes: 2 additions & 0 deletions src/unihan_etl/__about__.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
"""Metadata for unihan-etl package."""

from __future__ import annotations

__title__ = "unihan-etl"
__package_name__ = "unihan_etl"
__description__ = "Export UNIHAN data of Chinese, Japanese, Korean to CSV, JSON or YAML"
Expand Down
2 changes: 2 additions & 0 deletions src/unihan_etl/__init__.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
"""Create structured, customized exports of UNIHAN."""

from __future__ import annotations

from .__about__ import __version__
4 changes: 3 additions & 1 deletion src/unihan_etl/__main__.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
#!/usr/bin/env python
"""For accessing cihai as a package."""

from __future__ import annotations

import pathlib
import sys
import typing as t
Expand All @@ -13,7 +15,7 @@
_ExitCode: TypeAlias = t.Optional[t.Union[str, int]]


def run() -> "_ExitCode":
def run() -> _ExitCode:
"""Execute unihan-etl via CLI entrypoint."""
base = pathlib.Path(__file__).parent.parent
sys.path.insert(0, str(base))
Expand Down
6 changes: 4 additions & 2 deletions src/unihan_etl/_internal/app_dirs.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,8 @@
- Tilde expansion is expanded via :func:`os.path.expanduser`
"""

from __future__ import annotations

import dataclasses
import os
import pathlib
Expand Down Expand Up @@ -67,7 +69,7 @@ class AppDirs:
PosixPath('/var/cache')
"""

_app_dirs: dataclasses.InitVar["BaseAppDirs"]
_app_dirs: dataclasses.InitVar[BaseAppDirs]
user_data_dir: pathlib.Path = dataclasses.field(default=MISSING_DIR)
site_data_dir: pathlib.Path = dataclasses.field(default=MISSING_DIR)
user_config_dir: pathlib.Path = dataclasses.field(default=MISSING_DIR)
Expand All @@ -76,7 +78,7 @@ class AppDirs:
user_state_dir: pathlib.Path = dataclasses.field(default=MISSING_DIR)
user_log_dir: pathlib.Path = dataclasses.field(default=MISSING_DIR)

def __post_init__(self, _app_dirs: "BaseAppDirs") -> None:
def __post_init__(self, _app_dirs: BaseAppDirs) -> None:
"""Initialize attributes for AppDirs object."""
dir_attrs = [key for key in _app_dirs.__dir__() if key.endswith("_dir")]
dir_mapping: dict[str, str] = {k: getattr(_app_dirs, k) for k in dir_attrs}
Expand Down
9 changes: 7 additions & 2 deletions src/unihan_etl/constants.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,9 @@
"""Constants for unihan_etl."""

from __future__ import annotations

import importlib.util
import typing as t

from appdirs import AppDirs as BaseAppDirs

Expand All @@ -9,9 +12,11 @@
__package_name__,
)
from unihan_etl._internal.app_dirs import AppDirs
from unihan_etl.types import ColumnDataTuple
from unihan_etl.util import get_fields

if t.TYPE_CHECKING:
from unihan_etl.types import ColumnDataTuple

#: Dictionary of tuples mapping locations of files to fields
UNIHAN_MANIFEST = {
"Unihan_DictionaryIndices.txt": (
Expand Down Expand Up @@ -248,7 +253,7 @@
#: Filepath to download Zip file.
UNIHAN_ZIP_PATH = WORK_DIR / "Unihan.zip"
#: Default Unihan fields
UNIHAN_FIELDS: "ColumnDataTuple" = tuple(get_fields(UNIHAN_MANIFEST))
UNIHAN_FIELDS: ColumnDataTuple = tuple(get_fields(UNIHAN_MANIFEST))
#: Allowed export types
ALLOWED_EXPORT_TYPES = ["json", "csv"]

Expand Down
51 changes: 27 additions & 24 deletions src/unihan_etl/core.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
#!/usr/bin/env python
"""Download + ETL UNIHAN into structured format and export it."""

from __future__ import annotations

import argparse
import codecs
import csv
Expand All @@ -13,7 +15,6 @@
import sys
import typing as t
import zipfile
from collections.abc import Mapping, Sequence
from urllib.request import urlretrieve

from unihan_etl import expansion
Expand All @@ -37,6 +38,8 @@
from unihan_etl.util import _dl_progress, get_fields, ucn_to_unicode

if t.TYPE_CHECKING:
from collections.abc import Mapping, Sequence

from typing_extensions import TypeGuard

from unihan_etl.types import (
Expand Down Expand Up @@ -70,7 +73,7 @@ def in_fields(

def filter_manifest(
files: list[str],
) -> "UntypedUnihanData":
) -> UntypedUnihanData:
"""Return filtered :attr:`~.UNIHAN_MANIFEST` from list of file names."""
return {f: UNIHAN_MANIFEST[f] for f in files}

Expand Down Expand Up @@ -210,7 +213,7 @@ def get_parser() -> argparse.ArgumentParser:
return parser


def has_valid_zip(zip_path: "StrPath") -> bool:
def has_valid_zip(zip_path: StrPath) -> bool:
"""Return True if valid zip exists.
Parameters
Expand Down Expand Up @@ -255,10 +258,10 @@ def zip_has_files(files: list[str], zip_file: zipfile.ZipFile) -> bool:


def download(
url: "StrPath",
url: StrPath,
dest: pathlib.Path,
urlretrieve_fn: "UrlRetrieveFn" = urlretrieve,
reporthook: t.Optional["ReportHookFn"] = None,
urlretrieve_fn: UrlRetrieveFn = urlretrieve,
reporthook: ReportHookFn | None = None,
cache: bool = True,
) -> pathlib.Path:
"""Download UNIHAN zip from URL to destination.
Expand Down Expand Up @@ -304,8 +307,8 @@ def not_downloaded() -> bool:


def load_data(
files: Sequence[t.Union[pathlib.Path, str]],
) -> "fileinput.FileInput[t.Any]":
files: Sequence[pathlib.Path | str],
) -> fileinput.FileInput[t.Any]:
"""Extract zip and process information into CSV's.
Parameters
Expand Down Expand Up @@ -350,9 +353,9 @@ def extract_zip(zip_path: pathlib.Path, dest_dir: pathlib.Path) -> zipfile.ZipFi


def normalize(
raw_data: "fileinput.FileInput[t.Any]",
raw_data: fileinput.FileInput[t.Any],
fields: Sequence[str],
) -> "UntypedNormalizedData":
) -> UntypedNormalizedData:
"""Return normalized data from a UNIHAN data files.
Parameters
Expand Down Expand Up @@ -391,7 +394,7 @@ def normalize(
return list(items.values())


def expand_delimiters(normalized_data: "UntypedNormalizedData") -> "ExpandedExport":
def expand_delimiters(normalized_data: UntypedNormalizedData) -> ExpandedExport:
"""Return expanded multi-value fields in UNIHAN.
Parameters
Expand All @@ -417,9 +420,9 @@ def expand_delimiters(normalized_data: "UntypedNormalizedData") -> "ExpandedExpo


def listify(
data: "UntypedNormalizedData",
data: UntypedNormalizedData,
fields: Sequence[str],
) -> "ListifiedExport":
) -> ListifiedExport:
"""Convert tabularized data to a CSV-friendly list.
Parameters
Expand All @@ -434,9 +437,9 @@ def listify(


def export_csv(
data: "UntypedNormalizedData",
destination: "StrPath",
fields: "ColumnData",
data: UntypedNormalizedData,
destination: StrPath,
fields: ColumnData,
) -> None:
"""Export UNIHAN in flattened, CSV format."""
listified_data = listify(data, fields)
Expand All @@ -447,14 +450,14 @@ def export_csv(
log.info(f"Saved output to: {destination}")


def export_json(data: "UntypedNormalizedData", destination: "StrPath") -> None:
def export_json(data: UntypedNormalizedData, destination: StrPath) -> None:
"""Export UNIHAN in JSON format."""
with codecs.open(str(destination), "w", encoding="utf-8") as f:
json.dump(data, f, indent=2, ensure_ascii=False)
log.info(f"Saved output to: {destination}")


def export_yaml(data: "UntypedNormalizedData", destination: "StrPath") -> None:
def export_yaml(data: UntypedNormalizedData, destination: StrPath) -> None:
"""Export UNIHAN in YAML format."""
import yaml

Expand All @@ -470,7 +473,7 @@ def is_default_option(field_name: str, val: t.Any) -> bool:

def validate_options(
options: Options,
) -> "TypeGuard[Options]":
) -> TypeGuard[Options]:
"""Validate unihan-etl options."""
if not is_default_option("input_files", options.input_files) and is_default_option(
"fields",
Expand Down Expand Up @@ -512,7 +515,7 @@ class Packager:

def __init__(
self,
options: t.Union[Options, "Mapping[str, t.Any]"] = DEFAULT_OPTIONS,
options: Options | Mapping[str, t.Any] = DEFAULT_OPTIONS,
) -> None:
"""Initialize UNIHAN Packager.
Expand Down Expand Up @@ -558,7 +561,7 @@ def download(self, urlretrieve_fn: t.Any = urlretrieve) -> None:
):
extract_zip(self.options.zip_path, self.options.work_dir)

def export(self) -> t.Union[None, "UntypedNormalizedData"]:
def export(self) -> None | UntypedNormalizedData:
"""Extract zip and process information into CSV's."""
fields = list(self.options.fields)
for k in INDEX_FIELDS:
Expand Down Expand Up @@ -604,7 +607,7 @@ def export(self) -> t.Union[None, "UntypedNormalizedData"]:
return None

@classmethod
def from_cli(cls, argv: Sequence[str]) -> "Packager":
def from_cli(cls, argv: Sequence[str]) -> Packager:
"""Create Packager instance from CLI :mod:`argparse` arguments.
Parameters
Expand All @@ -630,8 +633,8 @@ def from_cli(cls, argv: Sequence[str]) -> "Packager":


def setup_logger(
logger: t.Optional[logging.Logger] = None,
level: "LogLevel" = "DEBUG",
logger: logging.Logger | None = None,
level: LogLevel = "DEBUG",
) -> None:
"""Configure logger for CLI use.
Expand Down
Loading

0 comments on commit b44390a

Please sign in to comment.