diff --git a/mediafile/__init__.py b/mediafile/__init__.py index 9ae1c95..15baafe 100644 --- a/mediafile/__init__.py +++ b/mediafile/__init__.py @@ -37,12 +37,14 @@ import os import re -import mutagen -import mutagen.mp3 +from mutagen._file import File as MutagenFileFactory +from mutagen.mp3 import BitrateMode +from ._types import MutagenFile from .constants import TYPES, ImageType from .exceptions import FileTypeError, MutagenError, UnreadableFileError from .fields import ( + BaseMediaField, CoverArtField, DateField, DateItemField, @@ -92,6 +94,8 @@ class MediaFile: metadata. """ + mgfile: MutagenFile + @loadfile() def __init__(self, filething, id3v23=False): """Constructs a new `MediaFile` reflecting the provided file. @@ -106,7 +110,7 @@ def __init__(self, filething, id3v23=False): """ self.filething = filething - self.mgfile = mutagen_call("open", self.filename, mutagen.File, filething) + self.mgfile = mutagen_call("open", self.filename, MutagenFileFactory, filething) if self.mgfile is None: # Mutagen couldn't guess the type @@ -230,7 +234,7 @@ def fields(cls): :class:`MediaField`). """ for property, descriptor in cls.__dict__.items(): - if isinstance(descriptor, MediaField): + if isinstance(descriptor, BaseMediaField): if isinstance(property, bytes): # On Python 2, class field names are bytes. This method # produces text strings. @@ -870,9 +874,9 @@ def bitrate_mode(self): """ if hasattr(self.mgfile.info, "bitrate_mode"): return { - mutagen.mp3.BitrateMode.CBR: "CBR", - mutagen.mp3.BitrateMode.VBR: "VBR", - mutagen.mp3.BitrateMode.ABR: "ABR", + BitrateMode.CBR: "CBR", + BitrateMode.VBR: "VBR", + BitrateMode.ABR: "ABR", }.get(self.mgfile.info.bitrate_mode, "") else: return "" diff --git a/mediafile/_types.py b/mediafile/_types.py new file mode 100644 index 0000000..54b1909 --- /dev/null +++ b/mediafile/_types.py @@ -0,0 +1,10 @@ +"""Type definitions for MediaFile.""" + +from typing import Any, TypeAlias + +# It is close to impossible to type the Mutagen File +# correctly due to Mutagen's dynamic typing. We might be able +# to create a Protocol that defines the minimum interface +# MediaFile needs, using an alias here should allow us to migrate +# to a more precise type later. +MutagenFile: TypeAlias = Any diff --git a/mediafile/constants.py b/mediafile/constants.py index 17a6342..5b38b68 100644 --- a/mediafile/constants.py +++ b/mediafile/constants.py @@ -1,4 +1,7 @@ import enum +from typing import TypeVar, cast + +T = TypeVar("T") # Human-readable type names. TYPES = { @@ -42,3 +45,16 @@ class ImageType(enum.Enum): illustration = 18 artist_logo = 19 publisher_logo = 20 + + +NULL_VALUES = { + int: 0, + bool: False, + str: "", + float: 0.0, +} + + +def null_value(out_type: type[T]) -> T | None: + """Get an appropriate "null" value for the given type.""" + return cast(T, NULL_VALUES.get(out_type)) diff --git a/mediafile/fields.py b/mediafile/fields.py index 2358df2..19993cc 100644 --- a/mediafile/fields.py +++ b/mediafile/fields.py @@ -1,28 +1,116 @@ +from __future__ import annotations + # MediaField is a descriptor that represents a single logical field. It # aggregates several StorageStyles describing how to access the data for # each file type. import datetime import re +from abc import ABC, abstractmethod +from collections.abc import Iterable, Sequence +from typing import TYPE_CHECKING, Generic + +from typing_extensions import TypeVar +from mediafile.constants import null_value from mediafile.utils import Image, safe_cast +from mediafile.utils.type_conversion import safe_cast_list +from ._types import MutagenFile from .constants import ImageType from .storage import ( APEv2ImageStorageStyle, ASFImageStorageStyle, FlacImageStorageStyle, + ListStorageStyle, MP3ImageStorageStyle, MP4ImageStorageStyle, + StorageStyle, VorbisImageStorageStyle, ) +if TYPE_CHECKING: + from . import MediaFile + +T = TypeVar("T", default=str) +S = TypeVar("S", bound=StorageStyle, default=StorageStyle) +DEFAULT_OUT_TYPE: type = str + + +class BaseMediaField(ABC, Generic[T, S]): + """Abstract base class for media file metadata field descriptors. + + Subclasses must implement the ``__get__``, ``__set__``, and + ``__delete__`` methods. + """ + + _styles: Sequence[S] + + def styles(self, mutagen_file: MutagenFile) -> Iterable[S]: + """Yields the list of storage styles of this field that can + handle the MediaFile's format. + """ + for style in self._styles: + if mutagen_file.__class__.__name__ in style.formats: + yield style + + @abstractmethod + def __get__(self, mediafile: MediaFile, owner: object = None) -> T | None: ... -class MediaField: + @abstractmethod + def __set__(self, mediafile: MediaFile, value: T | None) -> None: ... + + def __delete__(self, mediafile: MediaFile) -> None: + for style in self.styles(mediafile.mgfile): + style.delete(mediafile.mgfile) + + +def _set_data_in_styles( + value: T | None, + mediafile: MediaFile, + styles: Iterable[StorageStyle], + out_type: type[T], +) -> None: + """Helpers to set data in multiple styles. + + Mainly created to avoid code duplication and keep type checking happy. + """ + + if value is None: + value = null_value(out_type) + for style in styles: + if not style.read_only: + style.set(mediafile.mgfile, value) + + +def _get_data_from_styles( + mediafile: MediaFile, + styles: Iterable[StorageStyle], + out_type: type[T], +) -> T | None: + """Helper to get data from multiple styles. + + Mainly created to avoid code duplication and keep type checking happy. + """ + out = None + for style in styles: + out = style.get(mediafile.mgfile) + if out: + break + return safe_cast(out_type, out) + + +class MediaField(BaseMediaField[T, StorageStyle]): """A descriptor providing access to a particular (abstract) metadata field. """ - def __init__(self, *styles, **kwargs): + out_type: type[T] + + def __init__( + self, + *styles: StorageStyle, + out_type: type[T] = DEFAULT_OUT_TYPE, + ): """Creates a new MediaField. :param styles: `StorageStyle` instances that describe the strategy @@ -34,51 +122,26 @@ def __init__(self, *styles, **kwargs): getting this property. """ - self.out_type = kwargs.get("out_type", str) + self.out_type = out_type self._styles = styles - def styles(self, mutagen_file): - """Yields the list of storage styles of this field that can - handle the MediaFile's format. - """ - for style in self._styles: - if mutagen_file.__class__.__name__ in style.formats: - yield style - - def __get__(self, mediafile, owner=None): - out = None - for style in self.styles(mediafile.mgfile): - out = style.get(mediafile.mgfile) - if out: - break - return safe_cast(self.out_type, out) - - def __set__(self, mediafile, value): - if value is None: - value = self._none_value() - for style in self.styles(mediafile.mgfile): - if not style.read_only: - style.set(mediafile.mgfile, value) - - def __delete__(self, mediafile): - for style in self.styles(mediafile.mgfile): - style.delete(mediafile.mgfile) + def __get__(self, mediafile: MediaFile, owner: object | None = None) -> T | None: + return _get_data_from_styles( + mediafile, + self.styles(mediafile.mgfile), + self.out_type, + ) - def _none_value(self): - """Get an appropriate "null" value for this field's type. This - is used internally when setting the field to None. - """ - if self.out_type is int: - return 0 - elif self.out_type is float: - return 0.0 - elif self.out_type is bool: - return False - elif self.out_type is str: - return "" + def __set__(self, mediafile: MediaFile, value: T | None) -> None: + return _set_data_in_styles( + value, + mediafile, + self.styles(mediafile.mgfile), + self.out_type, + ) -class ListMediaField(MediaField): +class ListMediaField(BaseMediaField[list[T], ListStorageStyle]): """Property descriptor that retrieves a list of multiple values from a tag. @@ -86,19 +149,46 @@ class ListMediaField(MediaField): strategies to do the actual work. """ - def __get__(self, mediafile, _=None): + _styles: Sequence[ListStorageStyle] + + def __init__( + self, + *styles: ListStorageStyle, + out_type: type[T] = DEFAULT_OUT_TYPE, + ): + """Creates a new ListMediaField. + + :param styles: `ListStorageStyle` instances that describe the + strategy for reading and writing the field in + particular formats. There must be at least one + style for each possible file format. + + :param out_type: the type of the elements in the list that + should be returned when getting this property. + + """ + self.out_type = out_type + self._styles = styles + + def __get__( + self, mediafile: MediaFile, owner: object | None = None + ) -> list[T] | None: + """Returns the list of values, or None if no values are set. + + Note: the list is always non-empty when returned; if the underlying + styles return an empty list, None is returned instead. + """ for style in self.styles(mediafile.mgfile): values = style.get_list(mediafile.mgfile) - if values: - return [safe_cast(self.out_type, value) for value in values] + return safe_cast_list(self.out_type, values) or None return None - def __set__(self, mediafile, values): + def __set__(self, mediafile: MediaFile, values: list[T] | None) -> None: for style in self.styles(mediafile.mgfile): if not style.read_only: style.set_list(mediafile.mgfile, values) - def single_field(self): + def single_field(self) -> MediaField[T]: """Returns a ``MediaField`` descriptor that gets and sets the first item. """ @@ -106,7 +196,7 @@ def single_field(self): return MediaField(*self._styles, **options) -class DateField(MediaField): +class DateField(BaseMediaField[datetime.date, StorageStyle]): """Descriptor that handles serializing and deserializing dates The getter parses value from tags into a ``datetime.date`` instance @@ -116,19 +206,26 @@ class DateField(MediaField): methods to create corresponding `DateItemField`s. """ - def __init__(self, *date_styles, **kwargs): + def __init__( + self, + *date_styles: StorageStyle, + year: Sequence[StorageStyle] | None = None, + ): """``date_styles`` is a list of ``StorageStyle``s to store and retrieve the whole date from. The ``year`` option is an additional list of fallback styles for the year. The year is always set on this style, but is only retrieved if the main storage styles do not return a value. """ - super().__init__(*date_styles) - year_style = kwargs.get("year", None) - if year_style: - self._year_field = MediaField(*year_style) - - def __get__(self, mediafile, owner=None): + # The OOP pattern is janky here, while + # the base class should handle strings + self._styles = date_styles + if year is not None: + self._year_field = MediaField(*year, out_type=str) + + def __get__( + self, mediafile: MediaFile, owner: object | None = None + ) -> None | datetime.date: year, month, day = self._get_date_tuple(mediafile) if not year: return None @@ -137,25 +234,31 @@ def __get__(self, mediafile, owner=None): except ValueError: # Out of range values. return None - def __set__(self, mediafile, date): + def __set__(self, mediafile: MediaFile, date: datetime.date | None) -> None: if date is None: self._set_date_tuple(mediafile, None, None, None) else: self._set_date_tuple(mediafile, date.year, date.month, date.day) - def __delete__(self, mediafile): + def __delete__(self, mediafile: MediaFile) -> None: super().__delete__(mediafile) if hasattr(self, "_year_field"): self._year_field.__delete__(mediafile) - def _get_date_tuple(self, mediafile): + def _get_date_tuple(self, mediafile: MediaFile) -> list[int | None]: """Get a 3-item sequence representing the date consisting of a - year, month, and day number. Each number is either an integer or - None. + year, month, and day number. """ - # Get the underlying data and split on hyphens and slashes. - datestring = super().__get__(mediafile, None) - if isinstance(datestring, str): + # Get the underlying data as string + datestring = _get_data_from_styles( + mediafile, + self.styles(mediafile.mgfile), + str, + ) + + # Split the date string into components. + items: list[str | None] + if datestring: datestring = re.sub(r"[Tt ].*$", "", str(datestring)) items = re.split("[-/]", str(datestring)) else: @@ -172,15 +275,15 @@ def _get_date_tuple(self, mediafile): items[0] = self._year_field.__get__(mediafile) # Convert each component to an integer if possible. - items_ = [] - for item in items: - try: - items_.append(int(item)) - except (TypeError, ValueError): - items_.append(None) - return items_ - - def _set_date_tuple(self, mediafile, year, month=None, day=None): + return [int(x) if x and x.isdigit() else None for x in items] + + def _set_date_tuple( + self, + mediafile: MediaFile, + year: int | None, + month: int | None = None, + day: int | None = None, + ) -> None: """Set the value of the field given a year, month, and day number. Each number can be an integer or None to indicate an unset component. @@ -194,44 +297,50 @@ def _set_date_tuple(self, mediafile, year, month=None, day=None): date.append(f"{int(month):02d}") if month and day: date.append(f"{int(day):02d}") - date = map(str, date) - super().__set__(mediafile, "-".join(date)) + + _set_data_in_styles( + "-".join(map(str, date)), + mediafile, + self.styles(mediafile.mgfile), + str, + ) if hasattr(self, "_year_field"): - self._year_field.__set__(mediafile, year) + self._year_field.__set__(mediafile, safe_cast(str, year)) - def year_field(self): + def year_field(self) -> DateItemField: return DateItemField(self, 0) - def month_field(self): + def month_field(self) -> DateItemField: return DateItemField(self, 1) - def day_field(self): + def day_field(self) -> DateItemField: return DateItemField(self, 2) -class DateItemField(MediaField): +class DateItemField(MediaField[int]): """Descriptor that gets and sets constituent parts of a `DateField`: the month, day, or year. """ - def __init__(self, date_field, item_pos): + def __init__(self, date_field: DateField, item_pos: int): self.date_field = date_field self.item_pos = item_pos + super().__init__(out_type=int) - def __get__(self, mediafile, _): + def __get__(self, mediafile: MediaFile, owner: object | None = None) -> int | None: return self.date_field._get_date_tuple(mediafile)[self.item_pos] - def __set__(self, mediafile, value): + def __set__(self, mediafile: MediaFile, value: int | None) -> None: items = self.date_field._get_date_tuple(mediafile) items[self.item_pos] = value self.date_field._set_date_tuple(mediafile, *items) - def __delete__(self, mediafile): + def __delete__(self, mediafile: MediaFile) -> None: self.__set__(mediafile, None) -class CoverArtField(MediaField): +class CoverArtField(MediaField[bytes]): """A descriptor that provides access to the *raw image data* for the cover image on a file. This is used for backwards compatibility: the full `ImageListField` provides richer `Image` objects. @@ -240,10 +349,12 @@ class CoverArtField(MediaField): cover. """ - def __init__(self): + def __init__(self) -> None: pass - def __get__(self, mediafile, _): + def __get__( + self, mediafile: MediaFile, owner: object | None = None + ) -> bytes | None: candidates = mediafile.images if candidates: return self.guess_cover_image(candidates).data @@ -251,7 +362,7 @@ def __get__(self, mediafile, _): return None @staticmethod - def guess_cover_image(candidates): + def guess_cover_image(candidates: list[Image]) -> Image: if len(candidates) == 1: return candidates[0] try: @@ -259,17 +370,17 @@ def guess_cover_image(candidates): except StopIteration: return candidates[0] - def __set__(self, mediafile, data): + def __set__(self, mediafile: MediaFile, data: bytes | None) -> None: if data: mediafile.images = [Image(data=data)] else: mediafile.images = [] - def __delete__(self, mediafile): + def __delete__(self, mediafile: MediaFile) -> None: delattr(mediafile, "images") -class QNumberField(MediaField): +class QNumberField(MediaField[float]): """Access integer-represented Q number fields. Access a fixed-point fraction as a float. The stored value is shifted by @@ -277,23 +388,34 @@ class QNumberField(MediaField): simple integer. """ - def __init__(self, fraction_bits, *args, **kwargs): - super().__init__(out_type=int, *args, **kwargs) + __fraction_bits: int + + def __init__( + self, + fraction_bits: int, + *styles: StorageStyle, + ): + super().__init__( + *styles, + out_type=int, + ) self.__fraction_bits = fraction_bits - def __get__(self, mediafile, owner=None): - q_num = super().__get__(mediafile, owner) + def __get__( + self, mediafile: MediaFile, owner: object | None = None + ) -> float | None: + q_num: int | None = super().__get__(mediafile, owner) # type: ignore[assignment] if q_num is None: return None - return q_num / pow(2, self.__fraction_bits) + return q_num / 2**self.__fraction_bits # type: ignore[no-any-return] - def __set__(self, mediafile, value): - q_num = round(value * pow(2, self.__fraction_bits)) - q_num = int(q_num) # needed for py2.7 - super().__set__(mediafile, q_num) + def __set__(self, mediafile: MediaFile, value: float | None) -> None: + if value is not None: + value = round(value * 2**self.__fraction_bits) + super().__set__(mediafile, value) -class ImageListField(ListMediaField): +class ImageListField(ListMediaField[Image]): """Descriptor to access the list of images embedded in tags. The getter returns a list of `Image` instances obtained from @@ -301,7 +423,7 @@ class ImageListField(ListMediaField): written to the tags. """ - def __init__(self): + def __init__(self) -> None: # The storage styles used here must implement the # `ListStorageStyle` interface and get and set lists of # `Image`s. diff --git a/mediafile/utils/image.py b/mediafile/utils/image.py index cd37ef2..3d6d5a3 100644 --- a/mediafile/utils/image.py +++ b/mediafile/utils/image.py @@ -9,7 +9,7 @@ log = logging.getLogger(__name__) -def image_mime_type(data): +def image_mime_type(data) -> str | None: """Return the MIME type of the image data (a bytestring).""" return filetype.guess_mime(data) @@ -32,7 +32,16 @@ class Image: the binary data """ - def __init__(self, data, desc=None, type=None): + data: bytes + desc: str | None + type: ImageType | None + + def __init__( + self, + data: bytes, + desc: str | None = None, + type: ImageType | None = None, + ) -> None: assert isinstance(data, bytes) if desc is not None: assert isinstance(desc, str) @@ -47,12 +56,13 @@ def __init__(self, data, desc=None, type=None): self.type = type @property - def mime_type(self): + def mime_type(self) -> str | None: if self.data: return image_mime_type(self.data) + return None @property - def type_index(self): + def type_index(self) -> int: if self.type is None: # This method is used when a tag format requires the type # index to be set, so we return "other" as the default value. diff --git a/mediafile/utils/type_conversion.py b/mediafile/utils/type_conversion.py index 95398ea..8693ccb 100644 --- a/mediafile/utils/type_conversion.py +++ b/mediafile/utils/type_conversion.py @@ -1,7 +1,10 @@ import re +from typing import Any, TypeVar, cast +T = TypeVar("T") -def safe_cast(out_type, val): + +def safe_cast(out_type: type[T], val: Any) -> T | None: """Try to covert val to out_type but never raise an exception. If the value does not exist, return None. Or, if the value @@ -15,7 +18,7 @@ def safe_cast(out_type, val): if out_type is int: if isinstance(val, int) or isinstance(val, float): # Just a number. - return int(val) + return cast(T, int(val)) else: # Process any other type as a string. if isinstance(val, bytes): @@ -24,26 +27,26 @@ def safe_cast(out_type, val): val = str(val) # Get a number from the front of the string. match = re.match(r"[\+-]?[0-9]+", val.strip()) - return int(match.group(0)) if match else 0 + return cast(T, int(match.group(0))) if match else cast(T, 0) elif out_type is bool: try: # Should work for strings, bools, ints: - return bool(int(val)) + return cast(T, bool(int(val))) except ValueError: - return False + return cast(T, False) elif out_type is str: if isinstance(val, bytes): - return val.decode("utf-8", "ignore") + return cast(T, val.decode("utf-8", "ignore")) elif isinstance(val, str): - return val + return cast(T, val) else: - return str(val) + return cast(T, str(val)) elif out_type is float: if isinstance(val, int) or isinstance(val, float): - return float(val) + return cast(T, float(val)) else: if isinstance(val, bytes): val = val.decode("utf-8", "ignore") @@ -53,8 +56,23 @@ def safe_cast(out_type, val): if match: val = match.group(0) if val: - return float(val) - return 0.0 + return cast(T, float(val)) + return cast(T, 0.0) else: - return val + return cast(T, val) + + +def safe_cast_list(out_type: type[T], val: list[Any] | None) -> list[T] | None: + """Cast a value to a list of out_type elements, or None. + + If the value is None or, return None. If the value is already a list or + tuple, cast each element to out_type. + """ + if val is None: + return None + + if isinstance(val, (list, tuple)): + return list(filter(None, (safe_cast(out_type, v) for v in val))) + + raise ValueError("Value is not a list or tuple") diff --git a/pyproject.toml b/pyproject.toml index 91a5fdd..02f10d6 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -25,6 +25,7 @@ dependencies = [ dev = [ "pytest >= 8", "pytest-cov >= 7.0.0", + "typing-extensions >=4.15.0", ] lint = [ "docstrfmt >= 1.11.1", diff --git a/setup.cfg b/setup.cfg index 26b271c..95f9171 100644 --- a/setup.cfg +++ b/setup.cfg @@ -18,8 +18,22 @@ exclude_also = show_contexts = true [mypy] +python_version = 3.10 + [mypy-filetype.*] ignore_missing_imports = true [mypy-mutagen.*] ignore_missing_imports = true + +[mypy-mediafile.fields] +# This is basically strict=True, but strict leaks +# to the global config so we define all rules +# manually here. +warn_return_any = True +warn_unused_ignores = True +warn_no_return = True +no_implicit_reexport = True +disallow_untyped_defs = True +check_untyped_defs = True +local_partial_types = True \ No newline at end of file