diff --git a/iscc_sdk/audio.py b/iscc_sdk/audio.py index edb655a..24bcab2 100644 --- a/iscc_sdk/audio.py +++ b/iscc_sdk/audio.py @@ -2,9 +2,7 @@ import shutil import tempfile -from os.path import join, basename -from typing import Optional - +from pathlib import Path from PIL import Image, ImageEnhance from loguru import logger as log import json @@ -42,16 +40,16 @@ def audio_thumbnail(fp): - # type: (str) -> Optional[Image.Image] + # type: (str|Path) -> Image.Image|None """ Create a thumbnail from embedded cover art. - :param str fp: Filepath to audio file. + :param fp: Filepath to audio file. :return: Thumbnail image as PIL Image object - :rtype: Image.Image|None """ - tempdir = tempfile.mkdtemp() - tempimg = join(tempdir, "cover.jpg") + fp = Path(fp) + tempdir = Path(tempfile.mkdtemp()) + tempimg = tempdir / "cover.jpg" cmd = ["-i", fp, "-an", "-vcodec", "copy", tempimg] size = idk.sdk_opts.image_thumbnail_size try: @@ -68,14 +66,14 @@ def audio_thumbnail(fp): def audio_meta_extract(fp): - # type: (str) -> dict + # type: (str|Path) -> dict """ Extract metadata from audio file. - :param str fp: Filepath to audio file. + :param fp: Filepath to audio file. :return: Metadata mapped to IsccMeta schema - :rtype: dict """ + fp = Path(fp) mapped = dict() done = set() @@ -86,7 +84,7 @@ def audio_meta_extract(fp): obj.close() except OSError as e: # pragma: no cover # This is a workaround for the issue that taglib requires exclusive access even for reading. - log.warning(f"Create tempfile for taglib access {basename(fp)}: {e}") + log.warning(f"Create tempfile for taglib access {fp.name}: {e}") try: with idk.TempFile(fp) as tmp_path: obj = taglib.File(tmp_path.as_posix()) @@ -94,7 +92,7 @@ def audio_meta_extract(fp): mapped["duration"] = obj.length obj.close() except Exception as e: - log.warning(f"Failed metadata extraction for {basename(fp)}: {e}") + log.warning(f"Failed metadata extraction for {fp.name}: {e}") return mapped for tag, mapped_field in AUDIO_META_MAP.items(): @@ -113,15 +111,15 @@ def audio_meta_extract(fp): def audio_meta_embed(fp, meta): - # type: (str, idk.IsccMeta) -> str + # type: (str|Path, idk.IsccMeta) -> str """ Embed metadata into a copy of the audio file. - :param str fp: Filepath to source audio file + :param fp: Filepath to source audio file :param IsccMeta meta: Metadata to embed into audio file :return: Filepath to new audio file with updated metadata - :rtype: str """ + fp = Path(fp) tdir = tempfile.mkdtemp() tfile = shutil.copy(fp, tdir) obj = taglib.File(tfile) @@ -142,14 +140,14 @@ def audio_meta_embed(fp, meta): def audio_features_extract(fp): - # type: (str) -> dict + # type: (str|Path) -> dict """ Exctracts chromprint fingerprint. - :param str fp: Filepath + :param fp: Filepath :return: A dict with `duration` in seconds and `fingerprint` 32-bit integers - :rtype: dict """ + fp = Path(fp) args = ["-raw", "-json", "-signed", "-length", "0", fp] proc = idk.run_fpcalc(args) result = json.loads(proc.stdout) diff --git a/iscc_sdk/docx_.py b/iscc_sdk/docx_.py index 32c3881..965f832 100644 --- a/iscc_sdk/docx_.py +++ b/iscc_sdk/docx_.py @@ -1,4 +1,6 @@ import tempfile +from pathlib import Path + from docx import Document import iscc_sdk as idk import shutil @@ -17,17 +19,18 @@ def docx_meta_embed(fp, meta): - # type: (str, idk.IsccMeta) -> str + # type: (str|Path, idk.IsccMeta) -> str """ - Embed metadata into a copy of the PDF file. - :param str fp: Filepath to source PDF file - :param IsccMeta meta: Metadata to embed into PDF - :return: Filepath to the new PDF file with updated metadata - :rtype: str + Embed metadata into a copy of the DOCX file. + + :param fp: Filepath to source DOCX file + :param meta: Metadata to embed into DOCX + :return: Filepath to the new DOCX file with updated metadata """ + fp = Path(fp) tempdir = tempfile.mkdtemp() tempdoc = shutil.copy(fp, tempdir) - doc = Document(fp) + doc = Document(fp.as_posix()) new_meta = doc.core_properties for iscc_field, docx_field in META_DOCX_MAP.items(): value = getattr(meta, iscc_field) diff --git a/iscc_sdk/epub.py b/iscc_sdk/epub.py index 414087f..f906874 100644 --- a/iscc_sdk/epub.py +++ b/iscc_sdk/epub.py @@ -3,6 +3,7 @@ import io import shutil import tempfile +from pathlib import Path import ebookmeta from PIL import Image, ImageEnhance @@ -16,15 +17,15 @@ def epub_thumbnail(fp): - # type: (str) -> Image.Image + # type: (str|Path) -> Image.Image """ Creat thumbnail from EPUB document cover image. - :param str fp: Filepath to EPUB document. + :param fp: Filepath to EPUB document. :return: Thumbnail image as PIL Image object - :rtype: Image.Image """ - meta = ebookmeta.get_metadata(fp) + fp = Path(fp) + meta = ebookmeta.get_metadata(fp.as_posix()) data = meta.cover_image_data img = Image.open(io.BytesIO(data)) size = idk.sdk_opts.image_thumbnail_size @@ -33,15 +34,15 @@ def epub_thumbnail(fp): def epub_meta_embed(fp, meta): - # type: (str, idk.IsccMeta) -> str + # type: (str|Path, idk.IsccMeta) -> str """ Embed metadata into a copy of the EPUB file. - :param str fp: Filepath to source EPUB file + :param fp: Filepath to source EPUB file :param IsccMeta meta: Metadata to embed into EPUB - :return: Filepath to the new PDF file with updated metadata - :rtype: str + :return: Filepath to the new EPUB file with updated metadata """ + fp = Path(fp) tempdir = tempfile.mkdtemp() tempepub = shutil.copy(fp, tempdir) new_meta = ebookmeta.get_metadata(tempepub) diff --git a/iscc_sdk/image.py b/iscc_sdk/image.py index 521984c..11ce36c 100644 --- a/iscc_sdk/image.py +++ b/iscc_sdk/image.py @@ -1,5 +1,7 @@ """*Image handling module*.""" +from pathlib import Path + import pillow_avif import base64 import io @@ -7,7 +9,6 @@ import sys import json import tempfile -from os.path import basename, join from typing import Sequence import jmespath from iscc_schema import IsccMeta @@ -40,9 +41,8 @@ def image_normalize(img): """ Normalize image for hash calculation. - :param Image.Image img: Pillow Image Object + :param img: Pillow Image Object :return: Normalized and flattened image as 1024-pixel array (from 32x32 gray pixels) - :rtype: Sequence[int] """ # Transpose image according to EXIF Orientation tag @@ -74,9 +74,8 @@ def image_exif_transpose(img): """ Transpose image according to EXIF Orientation tag - :param Image.Image img: Pillow Image Object + :param img: Pillow Image Object :return: EXIF transposed image - :rtype: Image.Image """ img = ImageOps.exif_transpose(img) log.debug(f"Image exif transpose applied") @@ -88,9 +87,8 @@ def image_fill_transparency(img): """ Add white background to image if it has alpha transparency. - :param Image.Image img: Pillow Image Object + :param img: Pillow Image Object :return: Image with transparency replaced by white background - :rtype: Image.Image """ if img.mode != "RGBA": img = img.convert("RGBA") @@ -106,9 +104,8 @@ def image_trim_border(img): Takes the upper left pixel as reference for border color. - :param Image.Image img: Pillow Image Object + :param img: Pillow Image Object :return: Image with uniform colored (empty) border removed. - :rtype: Image.Image """ bg = Image.new(img.mode, img.size, img.getpixel((0, 0))) @@ -122,14 +119,14 @@ def image_trim_border(img): def image_meta_extract(fp): - # type: (str) -> dict + # type: (str|Path) -> dict """ Extract metadata from image. - :param str fp: Filepath to image file. + :param fp: Filepath to image file. :return: Metadata mapped to IsccMeta schema - :rtype: dict """ + fp = Path(fp) args = ["--all", fp] result = idk.run_exiv2json(args) encoding = sys.stdout.encoding or "utf-8" @@ -163,15 +160,15 @@ def image_meta_extract(fp): def image_meta_embed(fp, meta): - # type: (str, IsccMeta) -> str + # type: (str|Path, IsccMeta) -> Path """ Embed metadata into a copy of the image file. - :param str fp: Filepath to source image file - :param IsccMeta meta: Metadata to embed into image + :param fp: Filepath to source image file + :param meta: Metadata to embed into image :return: Filepath to the new image file with updated metadata - :rtype: str """ + fp = Path(fp) cmdf = "reg iscc http://purl.org/iscc/schema\n" cmdf += "reg dc http://purl.org/dc/elements/1.1/\n" @@ -194,9 +191,9 @@ def image_meta_embed(fp, meta): cmdf += f"set Xmp.dc.rights {meta.rights}\n" # Create temp filepaths - tempdir = tempfile.mkdtemp() - metafile = join(tempdir, "meta.txt") - imagefile = shutil.copy(fp, tempdir) + tempdir = Path(tempfile.mkdtemp()) + metafile = tempdir / "meta.txt" + imagefile = Path(shutil.copy(fp, tempdir)) # Store metadata with open(metafile, "wt", encoding="utf-8") as outf: @@ -204,32 +201,32 @@ def image_meta_embed(fp, meta): # Embed metaadata args = ["-m", metafile, imagefile] - log.debug(f"Embedding {meta.dict(exclude_unset=True)} in {basename(imagefile)}") + log.debug(f"Embedding {meta.dict(exclude_unset=True)} in {imagefile.name}") idk.run_exiv2(args) return imagefile def image_meta_delete(fp): - # type: (str) -> None + # type: (str|Path) -> None """ Delete all metadata from image. - :param str fp: Filepath to image file. - :rtype: None + :param fp: Filepath to image file. """ + fp = Path(fp) args = ["rm", fp] return idk.run_exiv2(args) def image_thumbnail(fp): - # type: (str) -> Image.Image + # type: (str|Path) -> Image.Image """ Create a thumbnail for an image. - :param str fp: Filepath to image file. + :param fp: Filepath to image file. :return: Thumbnail image as PIL Image object - :rtype: Image.Image """ + fp = Path(fp) size = idk.sdk_opts.image_thumbnail_size img = Image.open(fp) img.thumbnail((size, size), resample=idk.LANCZOS) @@ -241,9 +238,8 @@ def image_to_data_url(img): """ Convert PIL Image object to WebP Data-URL. - :param Image.Image img: PIL Image object to encode as WebP Data-URL. + :param img: PIL Image object to encode as WebP Data-URL. :return: Data-URL string - :rtype: str """ quality = idk.sdk_opts.image_thumbnail_quality raw = io.BytesIO() diff --git a/iscc_sdk/ipfs.py b/iscc_sdk/ipfs.py index c148b60..c1aad75 100644 --- a/iscc_sdk/ipfs.py +++ b/iscc_sdk/ipfs.py @@ -1,7 +1,7 @@ """IPFS wrapper""" import sys -from os.path import basename +from pathlib import Path import iscc_sdk as idk @@ -12,17 +12,17 @@ def ipfs_cidv1(fp, wrap=False): - # type: (str) -> str + # type: (str|Path) -> str """ Create default IPFS CIDv1 for file at filepath `fp`. If `wrap` is True, the file will be wrapped with a directory and the filname will be appended to the directory CIDv1 with a `/`. - :param str fp: Filepath + :param fp: Filepath :return: IPFS CIDv1 of the file - :rtype: str """ + fp = Path(fp) args = ["add", "--only-hash", "--cid-version=1", "--offline", "--quieter"] if wrap: args.append("--wrap-with-directory") @@ -31,19 +31,19 @@ def ipfs_cidv1(fp, wrap=False): encoding = sys.stdout.encoding or "utf-8" cid = result.stdout.decode(encoding).strip() if wrap: - cid += f"/{basename(fp)}" + cid += f"/{fp.name}" return cid def ipfs_cidv1_base16(fp): - # type: (str) -> str + # type: (str|Path) -> str """ Create IPFS CIDv1 with base16 encoding. - :param str fp: Filepath + :param fp: Filepath :return: IPFS CIDv1 of the file in base16 (hex) - :rtype: str """ + fp = Path(fp) args = [ "add", "--only-hash", diff --git a/iscc_sdk/main.py b/iscc_sdk/main.py index e5a1632..a3c6658 100644 --- a/iscc_sdk/main.py +++ b/iscc_sdk/main.py @@ -1,7 +1,7 @@ """*SDK main top-level functions*.""" from concurrent.futures import ThreadPoolExecutor -from os.path import basename +from pathlib import Path from PIL import Image import iscc_core as ic import iscc_sdk as idk @@ -21,16 +21,16 @@ def code_iscc(fp): - # type: (str) -> idk.IsccMeta + # type: (str|Path) -> idk.IsccMeta """ Generate ISCC-CODE. The ISCC-CODE is a composite of Meta, Content, Data and Instance Codes. - :param str fp: Filepath used for ISCC-CODE creation. + :param fp: Filepath used for ISCC-CODE creation. :return: ISCC metadata including ISCC-CODE and merged metadata from ISCC-UNITs. - :rtype: IsccMeta """ + fp = Path(fp) # Generate ISCC-UNITs in parallel with ThreadPoolExecutor() as executor: @@ -51,7 +51,7 @@ def code_iscc(fp): iscc_code = ic.gen_iscc_code_v0([meta.iscc, content.iscc, data.iscc, instance.iscc]) # Merge ISCC Metadata - iscc_meta = dict(filename=basename(fp)) + iscc_meta = dict(filename=fp.name) iscc_meta.update(instance.dict()) iscc_meta.update(data.dict()) iscc_meta.update(content.dict()) @@ -61,14 +61,14 @@ def code_iscc(fp): def code_meta(fp): - # type: (str) -> idk.IsccMeta + # type: (str|Path) -> idk.IsccMeta """ Generate Meta-Code from digital asset. - :param str fp: Filepath used for Meta-Code creation. + :param fp: Filepath used for Meta-Code creation. :return: ISCC metadata including Meta-Code and extracted metadata fields. - :rtype: IsccMeta """ + fp = Path(fp) meta = idk.extract_metadata(fp).dict() @@ -87,17 +87,16 @@ def code_meta(fp): def code_content(fp, extract_meta=None, create_thumb=None): - # type: (str, bool|None, bool|None) -> idk.IsccMeta + # type: (str|Path, bool|None, bool|None) -> idk.IsccMeta """ Detect mediatype and create corresponding Content-Code. - :param str fp: Filepath - :param bool|None extract_meta: Whether to extract metadata. - :param bool|None create_thumb: Whether to create a thumbnail. + :param fp: Filepath + :param extract_meta: Whether to extract metadata. + :param create_thumb: Whether to create a thumbnail. :return: Content-Code wrapped in ISCC metadata. - :rtype: IsccMeta """ - + fp = Path(fp) schema_org_map = { "text": "TextDigitalDocument", "image": "ImageObject", @@ -126,16 +125,16 @@ def code_content(fp, extract_meta=None, create_thumb=None): def code_text(fp, extract_meta=None, create_thumb=None): - # type: (str, bool|None, bool|None) -> idk.IsccMeta + # type: (str|Path, bool|None, bool|None) -> idk.IsccMeta """ Generate Content-Code Text. - :param str fp: Filepath used for Text-Code creation. - :param bool|None extract_meta: Whether to extract metadata. - :param bool|None create_thumb: Whether to create a thumbnail. + :param fp: Filepath used for Text-Code creation. + :param extract_meta: Whether to extract metadata. + :param create_thumb: Whether to create a thumbnail. :return: ISCC metadata including Text-Code. - :rtype: IsccMeta """ + fp = Path(fp) meta = dict() if extract_meta is None: @@ -162,16 +161,16 @@ def code_text(fp, extract_meta=None, create_thumb=None): def code_image(fp, extract_meta=None, create_thumb=None): - # type: (str, bool|None, bool|None) -> idk.IsccMeta + # type: (str|Path, bool|None, bool|None) -> idk.IsccMeta """ Generate Content-Code Image. - :param str fp: Filepath used for Image-Code creation. - :param bool|None extract_meta: Whether to extract metadata. - :param bool|None create_thumb: Whether to create a thumbnail. + :param fp: Filepath used for Image-Code creation. + :param extract_meta: Whether to extract metadata. + :param create_thumb: Whether to create a thumbnail. :return: ISCC metadata including Image-Code. - :rtype: IsccMeta """ + fp = Path(fp) meta = dict() if extract_meta is None: @@ -194,16 +193,16 @@ def code_image(fp, extract_meta=None, create_thumb=None): def code_audio(fp, extract_meta=None, create_thumb=None): - # type: (str, bool|None, bool|None) -> idk.IsccMeta + # type: (str|Path, bool|None, bool|None) -> idk.IsccMeta """ Generate Content-Code Audio. - :param str fp: Filepath used for Audio-Code creation. - :param bool|None extract_meta: Whether to extract metadata. - :param bool|None create_thumb: Whether to create a thumbnail. + :param fp: Filepath used for Audio-Code creation. + :param extract_meta: Whether to extract metadata. + :param create_thumb: Whether to create a thumbnail. :return: ISCC metadata including Audio-Code. - :rtype: IsccMeta """ + fp = Path(fp) meta = dict() if extract_meta is None: @@ -227,16 +226,16 @@ def code_audio(fp, extract_meta=None, create_thumb=None): def code_video(fp, extract_meta=None, create_thumb=None): - # type: (str) -> idk.IsccMeta + # type: (str|Path) -> idk.IsccMeta """ Generate Content-Code Video. - :param str fp: Filepath used for Video-Code creation. - :param bool|None extract_meta: Whether to extract metadata. - :param bool|None create_thumb: Whether to create a thumbnail. + :param fp: Filepath used for Video-Code creation. + :param extract_meta: Whether to extract metadata. + :param create_thumb: Whether to create a thumbnail. :return: ISCC metadata including Image-Code. - :rtype: IsccMeta """ + fp = Path(fp) meta = dict() if extract_meta is None: @@ -260,7 +259,7 @@ def code_video(fp, extract_meta=None, create_thumb=None): sig = idk.video_mp7sig_extract(fp) if idk.sdk_opts.video_store_mp7sig: - outp = fp + ".iscc.mp7sig" + outp = fp.with_suffix(".iscc.mp7sig") with open(outp, "wb") as outf: outf.write(sig) @@ -278,17 +277,16 @@ def code_video(fp, extract_meta=None, create_thumb=None): def code_data(fp): - # type: (str) -> idk.IsccMeta + # type: (str|Path) -> idk.IsccMeta """ Create ISCC Data-Code. The Data-Code is a similarity preserving hash of the input data. - :param str fp: Filepath used for Data-Code creation. + :param fp: Filepath used for Data-Code creation. :return: ISCC metadata including Data-Code. - :rtype: IsccMeta """ - + fp = Path(fp) with open(fp, "rb") as stream: meta = ic.gen_data_code_v0(stream, bits=idk.core_opts.data_bits) @@ -296,7 +294,7 @@ def code_data(fp): def code_instance(fp): - # type: (str) -> idk.IsccMeta + # type: (str|Path) -> idk.IsccMeta """ Create ISCC Instance-Code. @@ -305,10 +303,10 @@ def code_instance(fp): to the data of the referenced media asset. For cryptographicaly secure integrity checking a full 256-bit multihash is provided with the `datahash` field. - :param str fp: Filepath used for Instance-Code creation. + :param fp: Filepath used for Instance-Code creation. :return: ISCC metadata including Instance-Code, datahash and filesize. - :rtype: IsccMeta """ + fp = Path(fp) with open(fp, "rb") as stream: meta = ic.gen_instance_code_v0(stream, bits=idk.core_opts.instance_bits) diff --git a/iscc_sdk/mediatype.py b/iscc_sdk/mediatype.py index 95e9961..c09fa72 100644 --- a/iscc_sdk/mediatype.py +++ b/iscc_sdk/mediatype.py @@ -1,6 +1,6 @@ """*Detect and map RFC6838 mediatypes to ISCC processing modes*.""" -from os.path import basename +from pathlib import Path from loguru import logger as log from typing import List, Optional, Union import mimetypes @@ -23,7 +23,7 @@ def mediatype_and_mode(fp): - # type: (str) -> tuple + # type: (str|Path) -> tuple[str, str] """ Detect mediatype and processing mode for a file. @@ -35,18 +35,18 @@ def mediatype_and_mode(fp): ``` - :param str fp: Filepath + :param fp: Filepath :return: A tuple of `mediatype` and `mode` - :rtype: tuple[str, str] """ + fp = Path(fp) with open(fp, "rb") as infile: data = infile.read(4096) - mediatype = mediatype_guess(data, file_name=basename(fp)) + mediatype = mediatype_guess(data, file_name=fp.name) try: mode = mediatype_to_mode(mediatype) except idk.IsccUnsupportedMediatype: - raise idk.IsccUnsupportedMediatype(f"Unsupported mediatype {mediatype} for {basename(fp)}") + raise idk.IsccUnsupportedMediatype(f"Unsupported mediatype {mediatype} for {fp.name}") return mediatype, mode diff --git a/iscc_sdk/metadata.py b/iscc_sdk/metadata.py index f76f628..481da75 100644 --- a/iscc_sdk/metadata.py +++ b/iscc_sdk/metadata.py @@ -1,5 +1,6 @@ """*Metadata handling functions*""" +from pathlib import Path from typing import Optional try: @@ -33,14 +34,14 @@ def extract_metadata(fp): - # type: (str) -> idk.IsccMeta + # type: (str|Path) -> idk.IsccMeta """ Extract metadata from file. - :param str fp: Filepath to media file. + :param fp: Filepath to media file. :return: Metadata mapped to IsccMeta schema - :rtype: IsccMeta """ + fp = Path(fp) mime, mode = idk.mediatype_and_mode(fp) extractor = EXTRACTORS.get(mode) if extractor: diff --git a/iscc_sdk/pdf.py b/iscc_sdk/pdf.py index 63ccf67..e9defd0 100644 --- a/iscc_sdk/pdf.py +++ b/iscc_sdk/pdf.py @@ -2,6 +2,8 @@ import shutil import tempfile +from pathlib import Path + from PIL import Image, ImageEnhance import fitz import iscc_sdk as idk @@ -13,14 +15,14 @@ def pdf_thumbnail(fp): - # type: (str) -> Image.Image + # type: (str|Path) -> Image.Image """ Create a thumbnail from PDF document. - :param str fp: Filepath to PDF document. + :param fp: Filepath to PDF document. :return: Thumbnail image as PIL Image object - :rtype: Image.Image """ + fp = Path(fp) with fitz.Document(fp) as doc: page = doc.load_page(0) pix = page.get_pixmap() @@ -32,15 +34,15 @@ def pdf_thumbnail(fp): def pdf_meta_embed(fp, meta): - # type: (str, idk.IsccMeta) -> str + # type: (str|Path, idk.IsccMeta) -> Path """ Embed metadata into a copy of the PDF file. - :param str fp: Filepath to source PDF file - :param IsccMeta meta: Metadata to embed into PDF + :param fp: Filepath to source PDF file + :param meta: Metadata to embed into PDF :return: Filepath to the new PDF file with updated metadata - :rtype: str """ + fp = Path(fp) tempdir = tempfile.mkdtemp() temppdf = shutil.copy(fp, tempdir) with fitz.Document(temppdf) as doc: @@ -71,4 +73,4 @@ def pdf_meta_embed(fp, meta): doc.xref_set_key(xref, "iscc_rights", fitz.get_pdf_str(meta.rights)) doc.set_metadata(new_meta) doc.saveIncr() - return temppdf + return Path(temppdf) diff --git a/iscc_sdk/text.py b/iscc_sdk/text.py index 641f1bc..c92f9fe 100644 --- a/iscc_sdk/text.py +++ b/iscc_sdk/text.py @@ -49,14 +49,14 @@ def text_meta_extract(fp): - # type: (str) -> dict + # type: (str|Path) -> dict """ Extract metadata from text document file. - :param str fp: Filepath to text document file. + :param fp: Filepath to text document file. :return: Metadata mapped to IsccMeta schema - :rtype: dict """ + fp = Path(fp) args = ["--metadata", "-j", "--encoding=UTF-8", fp] result = idk.run_tika(args) @@ -78,15 +78,15 @@ def text_meta_extract(fp): def text_meta_embed(fp, meta): - # type: (str, IsccMeta) -> Optional[str] + # type: (str|Path, IsccMeta) -> Path|None """ Embed metadata into a copy of the text document. - :param str fp: Filepath to source text document file - :param IsccMeta meta: Metadata to embed into text document + :param fp: Filepath to source text document file + :param meta: Metadata to embed into text document :return: Filepath to the new file with updated metadata (None if no embedding supported) - :rtype: str|None """ + fp = Path(fp) mt, _ = idk.mediatype_and_mode(fp) if mt == "application/pdf": return idk.pdf_meta_embed(fp, meta) @@ -97,20 +97,19 @@ def text_meta_embed(fp, meta): def text_extract(fp): - # type: (str) -> str + # type: (str|Path) -> str """ Extract plaintext from a text document. - :param st fp: Filepath to text document file. + :param fp: Filepath to text document file. :return: Extracted plaintext - :rtype: str """ - + fp = Path(fp) args = ["--text", "--encoding=UTF-8", fp] result = idk.run_tika(args) text = result.stdout.decode(encoding="UTF-8").strip() if not text: - raise idk.IsccExtractionError(f"No text extracted from {basename(fp)}") + raise idk.IsccExtractionError(f"No text extracted from {fp.name}") return result.stdout.decode(encoding="UTF-8") @@ -120,7 +119,7 @@ def text_features(text): Create granular fingerprint for text (minhashes over ngrams from cdc-chunks). Text should be normalized before extracting text features. - :param str text: Normalized plaintext. + :param text: Normalized plaintext. :returns dict: Dictionary with 'sizes' and 'features'. """ clean_text = ic.text_clean(text) @@ -158,9 +157,8 @@ def text_name_from_uri(uri): Extract `filename` part of an uri without file extension to be used as fallback title for an asset if no title information can be acquired. - :param str uri: Url or file path + :param uri: Url or file path :return: derived name (might be an empty string) - :rtype: str """ if isinstance(uri, Path): result = urlparse(uri.as_uri()) @@ -175,13 +173,12 @@ def text_name_from_uri(uri): def text_thumbnail(fp): - # type: (str) -> Optional[Image.Image] + # type: (str|Path) -> Image.Image|None """ Create a thumbnail for a text document. - :param str fp: Filepath to text document. + :param fp: Filepath to text document. :return: Thumbnail image as PIL Image object - :rtype: Image.Image|None """ mt, _ = idk.mediatype_and_mode(fp) if mt == "application/pdf": diff --git a/iscc_sdk/thumbnail.py b/iscc_sdk/thumbnail.py index 097122c..3d2a93b 100644 --- a/iscc_sdk/thumbnail.py +++ b/iscc_sdk/thumbnail.py @@ -1,6 +1,6 @@ """*Generate thumbnails for media assets*""" -from typing import Optional +from pathlib import Path from PIL import Image import iscc_sdk as idk @@ -17,14 +17,14 @@ def thumbnail(fp): - # type: (str) -> Optional[Image.Image] + # type: (str|Path) -> Image.Image|None """ Create a thumbnail for a media asset. - :param str fp: Filepath to media file. + :param fp: Filepath to media file. :return: Thumbnail image as PIL Image object - :rtype: Image.Image|None """ + fp = Path(fp) mime, mode = idk.mediatype_and_mode(fp) thumbnailer = THUMBNAILERS.get(mode) if thumbnailer: diff --git a/iscc_sdk/video.py b/iscc_sdk/video.py index 2ae92c5..a59faa5 100644 --- a/iscc_sdk/video.py +++ b/iscc_sdk/video.py @@ -6,7 +6,6 @@ import io import sys import tempfile -from os.path import join, basename from pathlib import Path from secrets import token_hex from PIL import Image, ImageEnhance @@ -47,14 +46,14 @@ def video_meta_extract(fp): - # type: (str) -> dict + # type: (str|Path) -> dict """ Extract metadata from video. - :param str fp: Filepath to video file + :param fp: Filepath to video file :return: Metdata mpped to IsccMeta schema - :rtype: dict """ + fp = Path(fp) args = ["-i", fp, "-movflags", "use_metadata_tags", "-f", "ffmetadata", "-"] @@ -92,18 +91,17 @@ def video_meta_extract(fp): def video_meta_embed(fp, meta): - # type: (str, idk.IsccMeta) -> str + # type: (str|Path, idk.IsccMeta) -> Path """ Embed metadata into a copy of the video file. Supported fields: name, description, meta, creator, license, aquire - :param str fp: Filepath to source video file - :param IsccMeta meta: Metadata to embed into video + :param fp: Filepath to source video file + :param meta: Metadata to embed into video :return: Filepath to new video file with updated metadata - :rtype: str """ - + fp = Path(fp) write_map = { "name": "iscc_name", "description": "iscc_description", @@ -135,9 +133,9 @@ def video_meta_embed(fp, meta): cmdf += f"{write_map[field]}={value}\n" # Create temp filepaths - tempdir = tempfile.mkdtemp() - metafile = join(tempdir, "meta.txt") - videofile = join(tempdir, basename(fp)) + tempdir = Path(tempfile.mkdtemp()) + metafile = tempdir / "meta.txt" + videofile = tempdir / fp.name # Store metadata with open(metafile, "wt", encoding="utf-8") as outf: @@ -163,14 +161,15 @@ def video_meta_embed(fp, meta): def video_thumbnail(fp): - # type: (str) -> Optional[Image.Image] + # type: (str|Path) -> Image.Image|None """ Create a thumbnail for a video. - :param str fp: Filepath to video file. + :param fp: Filepath to video file. :return: Raw PNG byte data :rtype: bytes """ + fp = Path(fp) size = idk.sdk_opts.image_thumbnail_size args = [ @@ -196,19 +195,19 @@ def video_thumbnail(fp): def video_features_extract(fp): - # type: (str) -> List[Tuple[int, ...]] + # type: (str|Path) -> List[Tuple[int, ...]] """ Extract video features. - :param str fp: Filepath to video file. + :param fp: Filepath to video file. :return: A sequence of frame signatures. - :rtype: Sequence[Tuple[int, ...]] """ + fp = Path(fp) # TODO use confidence value to improve simililarity hash. sig = video_mp7sig_extract(fp) if idk.sdk_opts.video_store_mp7sig: - outp = fp + ".iscc.mp7sig" + outp = fp.as_posix() + ".iscc.mp7sig" with open(outp, "wb") as outf: outf.write(sig) @@ -217,13 +216,13 @@ def video_features_extract(fp): def video_mp7sig_extract(fp): - # type: (str) -> bytes + # type: (str|Path) -> bytes """Extract MPEG-7 Video Signature. - :param str fp: Filepath to video file. + :param fp: Filepath to video file. :return: raw signature data - :rtype: bytes """ + fp = Path(fp) sigfile_path = Path(tempfile.mkdtemp(), token_hex(16) + ".bin") sigfile_path_escaped = sigfile_path.as_posix().replace(":", "\\\\:") @@ -241,13 +240,14 @@ def video_mp7sig_extract(fp): def video_mp7sig_extract_scenes(fp, scene_limit=None): + # type: (str|Path, int|None) -> tuple[bytes, list[float]] """Extract MPEG-7 Video Signature and Scenes. - :param str fp: Filepath to video file. - :param Optional[float] scene_limit: Threshold value above which a scene cut is created (0.4) - :return: raw signature data - :rtype: bytes + :param fp: Filepath to video file. + :param scene_limit: Threshold value above which a scene cut is created (0.4) + :return: tuple of raw signature data and list of scene cutpoints """ + fp = Path(fp) scene_limit = scene_limit or idk.sdk_opts.video_scene_limit @@ -296,11 +296,13 @@ def video_mp7sig_extract_scenes(fp, scene_limit=None): def video_parse_scenes(scene_text, scene_limit=None): - # type: (str) -> List[float] - """Parse scene score output from ffmpeg + # type: (str, int|None) -> List[float] + """ + Parse scene score output from ffmpeg - :param str scene_text: Scene score output from ffmpeg - :param Optional[float] scene_limit: Threshold value above which a scene cut is created (0.4) + :param scene_text: Scene score output from ffmpeg + :param scene_limit: Threshold value above which a scene cut is created (0.4) + :return: Scene cutpoints """ scene_limit = scene_limit or idk.sdk_opts.video_scene_limit @@ -331,8 +333,12 @@ def video_parse_scenes(scene_text, scene_limit=None): def video_compute_granular(frames, scenes): # type: (List[idk.Frame], List[float]) -> dict - """Compute video signatures for individual scenes in video. - Returns a dictionary conforming to `shema.Feature`- objects. + """ + Compute video signatures for individual scenes in video. + + :param frames: List of video frames. + :param scenes: List of video scene cutpints. + :return: A dictionary conforming to `shema.Feature`- objects. """ features, sizes, segment = [], [], [] start_frame = 0