Skip to content

Commit

Permalink
♻️ split up sources module to accomodate for future additions (#288)
Browse files Browse the repository at this point in the history
  • Loading branch information
sebastianMindee authored Dec 9, 2024
1 parent 0c4c9e6 commit 90bdd2c
Show file tree
Hide file tree
Showing 8 changed files with 120 additions and 101 deletions.
15 changes: 6 additions & 9 deletions mindee/input/__init__.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,9 @@
from mindee.input.local_response import LocalResponse
from mindee.input.page_options import PageOptions
from mindee.input.sources import (
Base64Input,
BytesInput,
FileInput,
InputType,
LocalInputSource,
PathInput,
UrlInputSource,
)
from mindee.input.sources.base_64_input import Base64Input
from mindee.input.sources.bytes_input import BytesInput
from mindee.input.sources.file_input import FileInput
from mindee.input.sources.local_input_source import InputType, LocalInputSource
from mindee.input.sources.path_input import PathInput
from mindee.input.sources.url_input_source import UrlInputSource
from mindee.input.workflow_options import WorkflowOptions
6 changes: 6 additions & 0 deletions mindee/input/sources/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
from mindee.input.sources.base_64_input import Base64Input
from mindee.input.sources.bytes_input import BytesInput
from mindee.input.sources.file_input import FileInput
from mindee.input.sources.local_input_source import InputType, LocalInputSource
from mindee.input.sources.path_input import PathInput
from mindee.input.sources.url_input_source import UrlInputSource
20 changes: 20 additions & 0 deletions mindee/input/sources/base_64_input.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
import base64
import io

from mindee.input.sources.local_input_source import InputType, LocalInputSource


class Base64Input(LocalInputSource):
"""Base64-encoded text input."""

def __init__(self, base64_string: str, filename: str) -> None:
"""
Input document from a base64 encoded string.
:param base64_string: Raw data as a base64 encoded string
:param filename: File name of the input
"""
self.file_object = io.BytesIO(base64.standard_b64decode(base64_string))
self.filename = filename
self.filepath = None
super().__init__(input_type=InputType.BASE64)
19 changes: 19 additions & 0 deletions mindee/input/sources/bytes_input.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
import io

from mindee.input.sources.local_input_source import InputType, LocalInputSource


class BytesInput(LocalInputSource):
"""Raw bytes input."""

def __init__(self, raw_bytes: bytes, filename: str) -> None:
"""
Input document from raw bytes (no buffer).
:param raw_bytes: Raw data as bytes
:param filename: File name of the input
"""
self.file_object = io.BytesIO(raw_bytes)
self.filename = filename
self.filepath = None
super().__init__(input_type=InputType.BYTES)
23 changes: 23 additions & 0 deletions mindee/input/sources/file_input.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
import os
from typing import BinaryIO

from mindee.input.sources.local_input_source import InputType, LocalInputSource


class FileInput(LocalInputSource):
"""A binary file input."""

def __init__(self, file: BinaryIO) -> None:
"""
Input document from a Python binary file object.
Note: the calling function is responsible for closing the file.
:param file: FileIO object
"""
assert file.name, "File name must be set"

self.file_object = file
self.filename = os.path.basename(file.name)
self.filepath = file.name
super().__init__(input_type=InputType.FILE)
Original file line number Diff line number Diff line change
@@ -1,11 +1,8 @@
import base64
import io
import mimetypes
import os
import tempfile
from enum import Enum
from pathlib import Path
from typing import BinaryIO, Optional, Sequence, Tuple, Union
from typing import BinaryIO, Optional, Sequence, Tuple

import pypdfium2 as pdfium

Expand Down Expand Up @@ -205,91 +202,3 @@ def read_contents(self, close_file: bool) -> Tuple[str, bytes]:
def close(self) -> None:
"""Close the file object."""
self.file_object.close()


class FileInput(LocalInputSource):
"""A binary file input."""

def __init__(self, file: BinaryIO) -> None:
"""
Input document from a Python binary file object.
Note: the calling function is responsible for closing the file.
:param file: FileIO object
"""
assert file.name, "File name must be set"

self.file_object = file
self.filename = os.path.basename(file.name)
self.filepath = file.name
super().__init__(input_type=InputType.FILE)


class PathInput(LocalInputSource):
"""A local path input."""

def __init__(self, filepath: Union[Path, str]) -> None:
"""
Input document from a path.
:param filepath: Path to open
"""
self.file_object = open(filepath, "rb") # pylint: disable=consider-using-with
self.filename = os.path.basename(filepath)
self.filepath = str(filepath)
super().__init__(input_type=InputType.PATH)


class BytesInput(LocalInputSource):
"""Raw bytes input."""

def __init__(self, raw_bytes: bytes, filename: str) -> None:
"""
Input document from raw bytes (no buffer).
:param raw_bytes: Raw data as bytes
:param filename: File name of the input
"""
self.file_object = io.BytesIO(raw_bytes)
self.filename = filename
self.filepath = None
super().__init__(input_type=InputType.BYTES)


class Base64Input(LocalInputSource):
"""Base64-encoded text input."""

def __init__(self, base64_string: str, filename: str) -> None:
"""
Input document from a base64 encoded string.
:param base64_string: Raw data as a base64 encoded string
:param filename: File name of the input
"""
self.file_object = io.BytesIO(base64.standard_b64decode(base64_string))
self.filename = filename
self.filepath = None
super().__init__(input_type=InputType.BASE64)


class UrlInputSource:
"""A local or distant URL input."""

url: str
"""The Uniform Resource Locator."""

def __init__(self, url: str) -> None:
"""
Input document from a base64 encoded string.
:param url: URL to send, must be HTTPS
"""
if not url.lower().startswith("https"):
raise MindeeSourceError("URL must be HTTPS")

self.input_type = InputType.URL

logger.debug("URL input: %s", url)

self.url = url
20 changes: 20 additions & 0 deletions mindee/input/sources/path_input.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
import os
from pathlib import Path
from typing import Union

from mindee.input.sources.local_input_source import InputType, LocalInputSource


class PathInput(LocalInputSource):
"""A local path input."""

def __init__(self, filepath: Union[Path, str]) -> None:
"""
Input document from a path.
:param filepath: Path to open
"""
self.file_object = open(filepath, "rb") # pylint: disable=consider-using-with
self.filename = os.path.basename(filepath)
self.filepath = str(filepath)
super().__init__(input_type=InputType.PATH)
25 changes: 25 additions & 0 deletions mindee/input/sources/url_input_source.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
from mindee.error.mindee_error import MindeeSourceError
from mindee.input.sources.local_input_source import InputType
from mindee.logger import logger


class UrlInputSource:
"""A local or distant URL input."""

url: str
"""The Uniform Resource Locator."""

def __init__(self, url: str) -> None:
"""
Input document from a base64 encoded string.
:param url: URL to send, must be HTTPS
"""
if not url.lower().startswith("https"):
raise MindeeSourceError("URL must be HTTPS")

self.input_type = InputType.URL

logger.debug("URL input: %s", url)

self.url = url

0 comments on commit 90bdd2c

Please sign in to comment.